1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27
28 #include "qemu/osdep.h"
29
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
32
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40 CPU definitions. Currently they are used for qemu_ld/st
41 instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44
45 #include "exec/exec-all.h"
46
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
50
51 #include "tcg/tcg-op.h"
52
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS ELFCLASS32
55 #else
56 # define ELF_CLASS ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA ELFDATA2MSB
60 #else
61 # define ELF_DATA ELFDATA2LSB
62 #endif
63
64 #include "elf.h"
65 #include "exec/log.h"
66 #include "sysemu/sysemu.h"
67
68 /* Forward declarations for functions declared in tcg-target.inc.c and
69 used here. */
70 static void tcg_target_init(TCGContext *s);
71 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74 intptr_t value, intptr_t addend);
75
76 /* The CIE and FDE header definitions will be common to all hosts. */
77 typedef struct {
78 uint32_t len __attribute__((aligned((sizeof(void *)))));
79 uint32_t id;
80 uint8_t version;
81 char augmentation[1];
82 uint8_t code_align;
83 uint8_t data_align;
84 uint8_t return_column;
85 } DebugFrameCIE;
86
87 typedef struct QEMU_PACKED {
88 uint32_t len __attribute__((aligned((sizeof(void *)))));
89 uint32_t cie_offset;
90 uintptr_t func_start;
91 uintptr_t func_len;
92 } DebugFrameFDEHeader;
93
94 typedef struct QEMU_PACKED {
95 DebugFrameCIE cie;
96 DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98
99 static void tcg_register_jit_int(void *buf, size_t size,
100 const void *debug_frame,
101 size_t debug_frame_size)
102 __attribute__((unused));
103
104 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
105 static const char *target_parse_constraint(TCGArgConstraint *ct,
106 const char *ct_str, TCGType type);
107 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
108 intptr_t arg2);
109 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
110 static void tcg_out_movi(TCGContext *s, TCGType type,
111 TCGReg ret, tcg_target_long arg);
112 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
113 const int *const_args);
114 #if TCG_TARGET_MAYBE_vec
115 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
116 TCGReg dst, TCGReg src);
117 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
118 TCGReg dst, TCGReg base, intptr_t offset);
119 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
120 TCGReg dst, tcg_target_long arg);
121 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
122 unsigned vece, const TCGArg *args,
123 const int *const_args);
124 #else
tcg_out_dup_vec(TCGContext * s,TCGType type,unsigned vece,TCGReg dst,TCGReg src)125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126 TCGReg dst, TCGReg src)
127 {
128 g_assert_not_reached();
129 }
tcg_out_dupm_vec(TCGContext * s,TCGType type,unsigned vece,TCGReg dst,TCGReg base,intptr_t offset)130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131 TCGReg dst, TCGReg base, intptr_t offset)
132 {
133 g_assert_not_reached();
134 }
tcg_out_dupi_vec(TCGContext * s,TCGType type,TCGReg dst,tcg_target_long arg)135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
136 TCGReg dst, tcg_target_long arg)
137 {
138 g_assert_not_reached();
139 }
tcg_out_vec_op(TCGContext * s,TCGOpcode opc,unsigned vecl,unsigned vece,const TCGArg * args,const int * const_args)140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
141 unsigned vece, const TCGArg *args,
142 const int *const_args)
143 {
144 g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148 intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150 TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
152 static int tcg_target_const_match(tcg_target_long val, TCGType type,
153 const TCGArgConstraint *arg_ct);
154 #ifdef TCG_TARGET_NEED_LDST_LABELS
155 static int tcg_out_ldst_finalize(TCGContext *s);
156 #endif
157
158 #define TCG_HIGHWATER 1024
159
160 static TCGContext **tcg_ctxs;
161 static unsigned int n_tcg_ctxs;
162 TCGv_env cpu_env = 0;
163 #ifdef CONFIG_DEBUG_TCG
164 TCGv _pc_is_current = 0;
165 #endif
166
167 struct tcg_region_tree {
168 QemuMutex lock;
169 GTree *tree;
170 /* padding to avoid false sharing is computed at run-time */
171 };
172
173 /*
174 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
175 * dynamically allocate from as demand dictates. Given appropriate region
176 * sizing, this minimizes flushes even when some TCG threads generate a lot
177 * more code than others.
178 */
179 struct tcg_region_state {
180 QemuMutex lock;
181
182 /* fields set at init time */
183 void *start;
184 void *start_aligned;
185 void *end;
186 size_t n;
187 size_t size; /* size of one region */
188 size_t stride; /* .size + guard size */
189
190 /* fields protected by the lock */
191 size_t current; /* current region index */
192 size_t agg_size_full; /* aggregate size of full regions */
193 };
194
195 static struct tcg_region_state region;
196 /*
197 * This is an array of struct tcg_region_tree's, with padding.
198 * We use void * to simplify the computation of region_trees[i]; each
199 * struct is found every tree_size bytes.
200 */
201 static void *region_trees;
202 static size_t tree_size;
203 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
204 static TCGRegSet tcg_target_call_clobber_regs;
205
206 #if TCG_TARGET_INSN_UNIT_SIZE == 1
tcg_out8(TCGContext * s,uint8_t v)207 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
208 {
209 *s->code_ptr++ = v;
210 }
211
tcg_patch8(tcg_insn_unit * p,uint8_t v)212 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
213 uint8_t v)
214 {
215 *p = v;
216 }
217 #endif
218
219 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
tcg_out16(TCGContext * s,uint16_t v)220 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
221 {
222 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
223 *s->code_ptr++ = v;
224 } else {
225 tcg_insn_unit *p = s->code_ptr;
226 memcpy(p, &v, sizeof(v));
227 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
228 }
229 }
230
tcg_patch16(tcg_insn_unit * p,uint16_t v)231 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
232 uint16_t v)
233 {
234 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
235 *p = v;
236 } else {
237 memcpy(p, &v, sizeof(v));
238 }
239 }
240 #endif
241
242 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
tcg_out32(TCGContext * s,uint32_t v)243 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
244 {
245 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
246 *s->code_ptr++ = v;
247 } else {
248 tcg_insn_unit *p = s->code_ptr;
249 memcpy(p, &v, sizeof(v));
250 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
251 }
252 }
253
tcg_patch32(tcg_insn_unit * p,uint32_t v)254 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
255 uint32_t v)
256 {
257 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
258 *p = v;
259 } else {
260 memcpy(p, &v, sizeof(v));
261 }
262 }
263 #endif
264
265 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
tcg_out64(TCGContext * s,uint64_t v)266 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
267 {
268 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
269 *s->code_ptr++ = v;
270 } else {
271 tcg_insn_unit *p = s->code_ptr;
272 memcpy(p, &v, sizeof(v));
273 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
274 }
275 }
276
tcg_patch64(tcg_insn_unit * p,uint64_t v)277 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
278 uint64_t v)
279 {
280 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
281 *p = v;
282 } else {
283 memcpy(p, &v, sizeof(v));
284 }
285 }
286 #endif
287
288 /* label relocation processing */
289
tcg_out_reloc(TCGContext * s,tcg_insn_unit * code_ptr,int type,TCGLabel * l,intptr_t addend)290 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
291 TCGLabel *l, intptr_t addend)
292 {
293 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
294
295 r->type = type;
296 r->ptr = code_ptr;
297 r->addend = addend;
298 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
299 }
300
tcg_out_label(TCGContext * s,TCGLabel * l,tcg_insn_unit * ptr)301 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
302 {
303 tcg_debug_assert(!l->has_value);
304 l->has_value = 1;
305 l->u.value_ptr = ptr;
306 }
307
gen_new_label(void)308 TCGLabel *gen_new_label(void)
309 {
310 TCGContext *s = tcg_ctx;
311 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
312
313 memset(l, 0, sizeof(TCGLabel));
314 l->id = s->nb_labels++;
315 QSIMPLEQ_INIT(&l->relocs);
316
317 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
318
319 return l;
320 }
321
tcg_resolve_relocs(TCGContext * s)322 static bool tcg_resolve_relocs(TCGContext *s)
323 {
324 TCGLabel *l;
325
326 QSIMPLEQ_FOREACH(l, &s->labels, next) {
327 TCGRelocation *r;
328 uintptr_t value = l->u.value;
329
330 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
331 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
332 return false;
333 }
334 }
335 }
336 return true;
337 }
338
set_jmp_reset_offset(TCGContext * s,int which)339 static void set_jmp_reset_offset(TCGContext *s, int which)
340 {
341 size_t off = tcg_current_code_size(s);
342 s->tb_jmp_reset_offset[which] = off;
343 /* Make sure that we didn't overflow the stored offset. */
344 assert(s->tb_jmp_reset_offset[which] == off);
345 }
346
347 #include "tcg-target.inc.c"
348
349 /* compare a pointer @ptr and a tb_tc @s */
ptr_cmp_tb_tc(const void * ptr,const struct tb_tc * s)350 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
351 {
352 if (ptr >= s->ptr + s->size) {
353 return 1;
354 } else if (ptr < s->ptr) {
355 return -1;
356 }
357 return 0;
358 }
359
tb_tc_cmp(gconstpointer ap,gconstpointer bp)360 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
361 {
362 const struct tb_tc *a = ap;
363 const struct tb_tc *b = bp;
364
365 /*
366 * When both sizes are set, we know this isn't a lookup.
367 * This is the most likely case: every TB must be inserted; lookups
368 * are a lot less frequent.
369 */
370 if (likely(a->size && b->size)) {
371 if (a->ptr > b->ptr) {
372 return 1;
373 } else if (a->ptr < b->ptr) {
374 return -1;
375 }
376 /* a->ptr == b->ptr should happen only on deletions */
377 g_assert(a->size == b->size);
378 return 0;
379 }
380 /*
381 * All lookups have either .size field set to 0.
382 * From the glib sources we see that @ap is always the lookup key. However
383 * the docs provide no guarantee, so we just mark this case as likely.
384 */
385 if (likely(a->size == 0)) {
386 return ptr_cmp_tb_tc(a->ptr, b);
387 }
388 return ptr_cmp_tb_tc(b->ptr, a);
389 }
390
tcg_region_trees_init(void)391 static void tcg_region_trees_init(void)
392 {
393 size_t i;
394
395 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
396 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
397 for (i = 0; i < region.n; i++) {
398 struct tcg_region_tree *rt = region_trees + i * tree_size;
399
400 qemu_mutex_init(&rt->lock);
401 rt->tree = g_tree_new(tb_tc_cmp);
402 }
403 }
404
tc_ptr_to_region_tree(void * p)405 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
406 {
407 size_t region_idx;
408
409 if (p < region.start_aligned) {
410 region_idx = 0;
411 } else {
412 ptrdiff_t offset = p - region.start_aligned;
413
414 if (offset > region.stride * (region.n - 1)) {
415 region_idx = region.n - 1;
416 } else {
417 region_idx = offset / region.stride;
418 }
419 }
420 return region_trees + region_idx * tree_size;
421 }
422
tcg_tb_insert(TranslationBlock * tb)423 void tcg_tb_insert(TranslationBlock *tb)
424 {
425 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
426
427 qemu_mutex_lock(&rt->lock);
428 g_tree_insert(rt->tree, &tb->tc, tb);
429 qemu_mutex_unlock(&rt->lock);
430 }
431
tcg_tb_remove(TranslationBlock * tb)432 void tcg_tb_remove(TranslationBlock *tb)
433 {
434 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
435
436 qemu_mutex_lock(&rt->lock);
437 g_tree_remove(rt->tree, &tb->tc);
438 qemu_mutex_unlock(&rt->lock);
439 }
440
441 /*
442 * Find the TB 'tb' such that
443 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
444 * Return NULL if not found.
445 */
tcg_tb_lookup(uintptr_t tc_ptr)446 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
447 {
448 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
449 TranslationBlock *tb;
450 struct tb_tc s = { .ptr = (void *)tc_ptr };
451
452 qemu_mutex_lock(&rt->lock);
453 tb = g_tree_lookup(rt->tree, &s);
454 qemu_mutex_unlock(&rt->lock);
455 return tb;
456 }
457
tcg_region_tree_lock_all(void)458 static void tcg_region_tree_lock_all(void)
459 {
460 size_t i;
461
462 for (i = 0; i < region.n; i++) {
463 struct tcg_region_tree *rt = region_trees + i * tree_size;
464
465 qemu_mutex_lock(&rt->lock);
466 }
467 }
468
tcg_region_tree_unlock_all(void)469 static void tcg_region_tree_unlock_all(void)
470 {
471 size_t i;
472
473 for (i = 0; i < region.n; i++) {
474 struct tcg_region_tree *rt = region_trees + i * tree_size;
475
476 qemu_mutex_unlock(&rt->lock);
477 }
478 }
479
tcg_tb_foreach(GTraverseFunc func,gpointer user_data)480 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
481 {
482 size_t i;
483
484 tcg_region_tree_lock_all();
485 for (i = 0; i < region.n; i++) {
486 struct tcg_region_tree *rt = region_trees + i * tree_size;
487
488 g_tree_foreach(rt->tree, func, user_data);
489 }
490 tcg_region_tree_unlock_all();
491 }
492
tcg_nb_tbs(void)493 size_t tcg_nb_tbs(void)
494 {
495 size_t nb_tbs = 0;
496 size_t i;
497
498 tcg_region_tree_lock_all();
499 for (i = 0; i < region.n; i++) {
500 struct tcg_region_tree *rt = region_trees + i * tree_size;
501
502 nb_tbs += g_tree_nnodes(rt->tree);
503 }
504 tcg_region_tree_unlock_all();
505 return nb_tbs;
506 }
507
tcg_region_tree_reset_all(void)508 static void tcg_region_tree_reset_all(void)
509 {
510 size_t i;
511
512 tcg_region_tree_lock_all();
513 for (i = 0; i < region.n; i++) {
514 struct tcg_region_tree *rt = region_trees + i * tree_size;
515
516 /* Increment the refcount first so that destroy acts as a reset */
517 g_tree_ref(rt->tree);
518 g_tree_destroy(rt->tree);
519 }
520 tcg_region_tree_unlock_all();
521 }
522
tcg_region_bounds(size_t curr_region,void ** pstart,void ** pend)523 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
524 {
525 void *start, *end;
526
527 start = region.start_aligned + curr_region * region.stride;
528 end = start + region.size;
529
530 if (curr_region == 0) {
531 start = region.start;
532 }
533 if (curr_region == region.n - 1) {
534 end = region.end;
535 }
536
537 *pstart = start;
538 *pend = end;
539 }
540
tcg_region_assign(TCGContext * s,size_t curr_region)541 static void tcg_region_assign(TCGContext *s, size_t curr_region)
542 {
543 void *start, *end;
544
545 tcg_region_bounds(curr_region, &start, &end);
546
547 s->code_gen_buffer = start;
548 s->code_gen_ptr = start;
549 s->code_gen_buffer_size = end - start;
550 s->code_gen_highwater = end - TCG_HIGHWATER;
551 }
552
tcg_region_alloc__locked(TCGContext * s)553 static bool tcg_region_alloc__locked(TCGContext *s)
554 {
555 if (region.current == region.n) {
556 return true;
557 }
558 tcg_region_assign(s, region.current);
559 region.current++;
560 return false;
561 }
562
563 /*
564 * Request a new region once the one in use has filled up.
565 * Returns true on error.
566 */
tcg_region_alloc(TCGContext * s)567 static bool tcg_region_alloc(TCGContext *s)
568 {
569 bool err;
570 /* read the region size now; alloc__locked will overwrite it on success */
571 size_t size_full = s->code_gen_buffer_size;
572
573 qemu_mutex_lock(®ion.lock);
574 err = tcg_region_alloc__locked(s);
575 if (!err) {
576 region.agg_size_full += size_full - TCG_HIGHWATER;
577 }
578 qemu_mutex_unlock(®ion.lock);
579 return err;
580 }
581
582 /*
583 * Perform a context's first region allocation.
584 * This function does _not_ increment region.agg_size_full.
585 */
tcg_region_initial_alloc__locked(TCGContext * s)586 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
587 {
588 return tcg_region_alloc__locked(s);
589 }
590
591 /* Call from a safe-work context */
tcg_region_reset_all(void)592 void tcg_region_reset_all(void)
593 {
594 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
595 unsigned int i;
596
597 qemu_mutex_lock(®ion.lock);
598 region.current = 0;
599 region.agg_size_full = 0;
600
601 for (i = 0; i < n_ctxs; i++) {
602 TCGContext *s = atomic_read(&tcg_ctxs[i]);
603 bool err = tcg_region_initial_alloc__locked(s);
604
605 g_assert(!err);
606 }
607 qemu_mutex_unlock(®ion.lock);
608
609 tcg_region_tree_reset_all();
610 }
611
612 #ifdef CONFIG_USER_ONLY
tcg_n_regions(void)613 static size_t tcg_n_regions(void)
614 {
615 return 1;
616 }
617 #else
618 /*
619 * It is likely that some vCPUs will translate more code than others, so we
620 * first try to set more regions than max_cpus, with those regions being of
621 * reasonable size. If that's not possible we make do by evenly dividing
622 * the code_gen_buffer among the vCPUs.
623 */
tcg_n_regions(void)624 static size_t tcg_n_regions(void)
625 {
626 size_t i;
627
628 /* Use a single region if all we have is one vCPU thread */
629 #if !defined(CONFIG_USER_ONLY)
630 MachineState *ms = MACHINE(qdev_get_machine());
631 unsigned int max_cpus = ms->smp.max_cpus;
632 #endif
633 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
634 return 1;
635 }
636
637 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
638 for (i = 8; i > 0; i--) {
639 size_t regions_per_thread = i;
640 size_t region_size;
641
642 region_size = tcg_init_ctx.code_gen_buffer_size;
643 region_size /= max_cpus * regions_per_thread;
644
645 if (region_size >= 2 * 1024u * 1024) {
646 return max_cpus * regions_per_thread;
647 }
648 }
649 /* If we can't, then just allocate one region per vCPU thread */
650 return max_cpus;
651 }
652 #endif
653
654 /*
655 * Initializes region partitioning.
656 *
657 * Called at init time from the parent thread (i.e. the one calling
658 * tcg_context_init), after the target's TCG globals have been set.
659 *
660 * Region partitioning works by splitting code_gen_buffer into separate regions,
661 * and then assigning regions to TCG threads so that the threads can translate
662 * code in parallel without synchronization.
663 *
664 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
665 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
666 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
667 * must have been parsed before calling this function, since it calls
668 * qemu_tcg_mttcg_enabled().
669 *
670 * In user-mode we use a single region. Having multiple regions in user-mode
671 * is not supported, because the number of vCPU threads (recall that each thread
672 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
673 * OS, and usually this number is huge (tens of thousands is not uncommon).
674 * Thus, given this large bound on the number of vCPU threads and the fact
675 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
676 * that the availability of at least one region per vCPU thread.
677 *
678 * However, this user-mode limitation is unlikely to be a significant problem
679 * in practice. Multi-threaded guests share most if not all of their translated
680 * code, which makes parallel code generation less appealing than in softmmu.
681 */
tcg_region_init(void)682 void tcg_region_init(void)
683 {
684 void *buf = tcg_init_ctx.code_gen_buffer;
685 void *aligned;
686 size_t size = tcg_init_ctx.code_gen_buffer_size;
687 size_t page_size = qemu_real_host_page_size;
688 size_t region_size;
689 size_t n_regions;
690 size_t i;
691
692 n_regions = tcg_n_regions();
693
694 /* The first region will be 'aligned - buf' bytes larger than the others */
695 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
696 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
697 /*
698 * Make region_size a multiple of page_size, using aligned as the start.
699 * As a result of this we might end up with a few extra pages at the end of
700 * the buffer; we will assign those to the last region.
701 */
702 region_size = (size - (aligned - buf)) / n_regions;
703 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
704
705 /* A region must have at least 2 pages; one code, one guard */
706 g_assert(region_size >= 2 * page_size);
707
708 /* init the region struct */
709 qemu_mutex_init(®ion.lock);
710 region.n = n_regions;
711 region.size = region_size - page_size;
712 region.stride = region_size;
713 region.start = buf;
714 region.start_aligned = aligned;
715 /* page-align the end, since its last page will be a guard page */
716 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
717 /* account for that last guard page */
718 region.end -= page_size;
719
720 /* set guard pages */
721 for (i = 0; i < region.n; i++) {
722 void *start, *end;
723 int rc;
724
725 tcg_region_bounds(i, &start, &end);
726 rc = qemu_mprotect_none(end, page_size);
727 g_assert(!rc);
728 }
729
730 tcg_region_trees_init();
731
732 /* In user-mode we support only one ctx, so do the initial allocation now */
733 #ifdef CONFIG_USER_ONLY
734 {
735 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
736
737 g_assert(!err);
738 }
739 #endif
740 }
741
alloc_tcg_plugin_context(TCGContext * s)742 static void alloc_tcg_plugin_context(TCGContext *s)
743 {
744 #ifdef CONFIG_PLUGIN
745 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
746 s->plugin_tb->insns =
747 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
748 #endif
749 }
750
751 /*
752 * All TCG threads except the parent (i.e. the one that called tcg_context_init
753 * and registered the target's TCG globals) must register with this function
754 * before initiating translation.
755 *
756 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
757 * of tcg_region_init() for the reasoning behind this.
758 *
759 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
760 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
761 * is not used anymore for translation once this function is called.
762 *
763 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
764 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
765 */
766 #ifdef CONFIG_USER_ONLY
tcg_register_thread(void)767 void tcg_register_thread(void)
768 {
769 tcg_ctx = &tcg_init_ctx;
770 }
771 #else
tcg_register_thread(void)772 void tcg_register_thread(void)
773 {
774 MachineState *ms = MACHINE(qdev_get_machine());
775 TCGContext *s = g_malloc(sizeof(*s));
776 unsigned int i, n;
777 bool err;
778
779 *s = tcg_init_ctx;
780
781 /* Relink mem_base. */
782 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
783 if (tcg_init_ctx.temps[i].mem_base) {
784 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
785 tcg_debug_assert(b >= 0 && b < n);
786 s->temps[i].mem_base = &s->temps[b];
787 }
788 }
789
790 /* Claim an entry in tcg_ctxs */
791 n = atomic_fetch_inc(&n_tcg_ctxs);
792 g_assert(n < ms->smp.max_cpus);
793 atomic_set(&tcg_ctxs[n], s);
794
795 if (n > 0) {
796 alloc_tcg_plugin_context(s);
797 }
798
799 tcg_ctx = s;
800 qemu_mutex_lock(®ion.lock);
801 err = tcg_region_initial_alloc__locked(tcg_ctx);
802 g_assert(!err);
803 qemu_mutex_unlock(®ion.lock);
804 }
805 #endif /* !CONFIG_USER_ONLY */
806
807 /*
808 * Returns the size (in bytes) of all translated code (i.e. from all regions)
809 * currently in the cache.
810 * See also: tcg_code_capacity()
811 * Do not confuse with tcg_current_code_size(); that one applies to a single
812 * TCG context.
813 */
tcg_code_size(void)814 size_t tcg_code_size(void)
815 {
816 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
817 unsigned int i;
818 size_t total;
819
820 qemu_mutex_lock(®ion.lock);
821 total = region.agg_size_full;
822 for (i = 0; i < n_ctxs; i++) {
823 const TCGContext *s = atomic_read(&tcg_ctxs[i]);
824 size_t size;
825
826 size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
827 g_assert(size <= s->code_gen_buffer_size);
828 total += size;
829 }
830 qemu_mutex_unlock(®ion.lock);
831 return total;
832 }
833
834 /*
835 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
836 * regions.
837 * See also: tcg_code_size()
838 */
tcg_code_capacity(void)839 size_t tcg_code_capacity(void)
840 {
841 size_t guard_size, capacity;
842
843 /* no need for synchronization; these variables are set at init time */
844 guard_size = region.stride - region.size;
845 capacity = region.end + guard_size - region.start;
846 capacity -= region.n * (guard_size + TCG_HIGHWATER);
847 return capacity;
848 }
849
tcg_tb_phys_invalidate_count(void)850 size_t tcg_tb_phys_invalidate_count(void)
851 {
852 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
853 unsigned int i;
854 size_t total = 0;
855
856 for (i = 0; i < n_ctxs; i++) {
857 const TCGContext *s = atomic_read(&tcg_ctxs[i]);
858
859 total += atomic_read(&s->tb_phys_invalidate_count);
860 }
861 return total;
862 }
863
864 /* pool based memory allocation */
tcg_malloc_internal(TCGContext * s,int size)865 void *tcg_malloc_internal(TCGContext *s, int size)
866 {
867 TCGPool *p;
868 int pool_size;
869
870 if (size > TCG_POOL_CHUNK_SIZE) {
871 /* big malloc: insert a new pool (XXX: could optimize) */
872 p = g_malloc(sizeof(TCGPool) + size);
873 p->size = size;
874 p->next = s->pool_first_large;
875 s->pool_first_large = p;
876 return p->data;
877 } else {
878 p = s->pool_current;
879 if (!p) {
880 p = s->pool_first;
881 if (!p)
882 goto new_pool;
883 } else {
884 if (!p->next) {
885 new_pool:
886 pool_size = TCG_POOL_CHUNK_SIZE;
887 p = g_malloc(sizeof(TCGPool) + pool_size);
888 p->size = pool_size;
889 p->next = NULL;
890 if (s->pool_current)
891 s->pool_current->next = p;
892 else
893 s->pool_first = p;
894 } else {
895 p = p->next;
896 }
897 }
898 }
899 s->pool_current = p;
900 s->pool_cur = p->data + size;
901 s->pool_end = p->data + p->size;
902 return p->data;
903 }
904
tcg_pool_reset(TCGContext * s)905 void tcg_pool_reset(TCGContext *s)
906 {
907 TCGPool *p, *t;
908 for (p = s->pool_first_large; p; p = t) {
909 t = p->next;
910 g_free(p);
911 }
912 s->pool_first_large = NULL;
913 s->pool_cur = s->pool_end = NULL;
914 s->pool_current = NULL;
915 }
916
917 typedef struct TCGHelperInfo {
918 void *func;
919 const char *name;
920 unsigned flags;
921 unsigned sizemask;
922 } TCGHelperInfo;
923
924 #include "exec/helper-proto.h"
925
926 static const TCGHelperInfo all_helpers[] = {
927 #include "exec/helper-tcg.h"
928 };
929 static GHashTable *helper_table;
930
931 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
932 static void process_op_defs(TCGContext *s);
933 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
934 TCGReg reg, const char *name);
935
tcg_context_init(TCGContext * s)936 void tcg_context_init(TCGContext *s)
937 {
938 int op, total_args, n, i;
939 TCGOpDef *def;
940 TCGArgConstraint *args_ct;
941 int *sorted_args;
942 TCGTemp *ts;
943
944 memset(s, 0, sizeof(*s));
945 s->nb_globals = 0;
946
947 /* Count total number of arguments and allocate the corresponding
948 space */
949 total_args = 0;
950 for(op = 0; op < NB_OPS; op++) {
951 def = &tcg_op_defs[op];
952 n = def->nb_iargs + def->nb_oargs;
953 total_args += n;
954 }
955
956 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
957 sorted_args = g_malloc(sizeof(int) * total_args);
958
959 for(op = 0; op < NB_OPS; op++) {
960 def = &tcg_op_defs[op];
961 def->args_ct = args_ct;
962 def->sorted_args = sorted_args;
963 n = def->nb_iargs + def->nb_oargs;
964 sorted_args += n;
965 args_ct += n;
966 }
967
968 /* Register helpers. */
969 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
970 helper_table = g_hash_table_new(NULL, NULL);
971
972 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
973 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
974 (gpointer)&all_helpers[i]);
975 }
976
977 tcg_target_init(s);
978 process_op_defs(s);
979
980 /* Reverse the order of the saved registers, assuming they're all at
981 the start of tcg_target_reg_alloc_order. */
982 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
983 int r = tcg_target_reg_alloc_order[n];
984 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
985 break;
986 }
987 }
988 for (i = 0; i < n; ++i) {
989 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
990 }
991 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
992 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
993 }
994
995 alloc_tcg_plugin_context(s);
996
997 tcg_ctx = s;
998 /*
999 * In user-mode we simply share the init context among threads, since we
1000 * use a single region. See the documentation tcg_region_init() for the
1001 * reasoning behind this.
1002 * In softmmu we will have at most max_cpus TCG threads.
1003 */
1004 #ifdef CONFIG_USER_ONLY
1005 tcg_ctxs = &tcg_ctx;
1006 n_tcg_ctxs = 1;
1007 #else
1008 MachineState *ms = MACHINE(qdev_get_machine());
1009 unsigned int max_cpus = ms->smp.max_cpus;
1010 tcg_ctxs = g_new(TCGContext *, max_cpus);
1011 #endif
1012
1013 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1014 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1015 cpu_env = temp_tcgv_ptr(ts);
1016 }
1017
1018 /*
1019 * Allocate TBs right before their corresponding translated code, making
1020 * sure that TBs and code are on different cache lines.
1021 */
tcg_tb_alloc(TCGContext * s)1022 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1023 {
1024 uintptr_t align = qemu_icache_linesize;
1025 TranslationBlock *tb;
1026 void *next;
1027
1028 retry:
1029 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1030 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1031
1032 if (unlikely(next > s->code_gen_highwater)) {
1033 if (tcg_region_alloc(s)) {
1034 return NULL;
1035 }
1036 goto retry;
1037 }
1038 atomic_set(&s->code_gen_ptr, next);
1039 s->data_gen_ptr = NULL;
1040 return tb;
1041 }
1042
tcg_prologue_init(TCGContext * s)1043 void tcg_prologue_init(TCGContext *s)
1044 {
1045 size_t prologue_size, total_size;
1046 void *buf0, *buf1;
1047
1048 /* Put the prologue at the beginning of code_gen_buffer. */
1049 buf0 = s->code_gen_buffer;
1050 total_size = s->code_gen_buffer_size;
1051 s->code_ptr = buf0;
1052 s->code_buf = buf0;
1053 s->data_gen_ptr = NULL;
1054 s->code_gen_prologue = buf0;
1055
1056 /* Compute a high-water mark, at which we voluntarily flush the buffer
1057 and start over. The size here is arbitrary, significantly larger
1058 than we expect the code generation for any one opcode to require. */
1059 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1060
1061 #ifdef TCG_TARGET_NEED_POOL_LABELS
1062 s->pool_labels = NULL;
1063 #endif
1064
1065 /* Generate the prologue. */
1066 tcg_target_qemu_prologue(s);
1067
1068 #ifdef TCG_TARGET_NEED_POOL_LABELS
1069 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1070 {
1071 int result = tcg_out_pool_finalize(s);
1072 tcg_debug_assert(result == 0);
1073 }
1074 #endif
1075
1076 buf1 = s->code_ptr;
1077 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1078
1079 /* Deduct the prologue from the buffer. */
1080 prologue_size = tcg_current_code_size(s);
1081 s->code_gen_ptr = buf1;
1082 s->code_gen_buffer = buf1;
1083 s->code_buf = buf1;
1084 total_size -= prologue_size;
1085 s->code_gen_buffer_size = total_size;
1086
1087 tcg_register_jit(s->code_gen_buffer, total_size);
1088
1089 #ifdef DEBUG_DISAS
1090 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1091 FILE *logfile = qemu_log_lock();
1092 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1093 if (s->data_gen_ptr) {
1094 size_t code_size = s->data_gen_ptr - buf0;
1095 size_t data_size = prologue_size - code_size;
1096 size_t i;
1097
1098 log_disas(buf0, code_size);
1099
1100 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1101 if (sizeof(tcg_target_ulong) == 8) {
1102 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1103 (uintptr_t)s->data_gen_ptr + i,
1104 *(uint64_t *)(s->data_gen_ptr + i));
1105 } else {
1106 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1107 (uintptr_t)s->data_gen_ptr + i,
1108 *(uint32_t *)(s->data_gen_ptr + i));
1109 }
1110 }
1111 } else {
1112 log_disas(buf0, prologue_size);
1113 }
1114 qemu_log("\n");
1115 qemu_log_flush();
1116 qemu_log_unlock(logfile);
1117 }
1118 #endif
1119
1120 /* Assert that goto_ptr is implemented completely. */
1121 if (TCG_TARGET_HAS_goto_ptr) {
1122 tcg_debug_assert(s->code_gen_epilogue != NULL);
1123 }
1124 }
1125
tcg_func_start(TCGContext * s)1126 void tcg_func_start(TCGContext *s)
1127 {
1128 tcg_pool_reset(s);
1129 s->nb_temps = s->nb_globals;
1130
1131 /* No temps have been previously allocated for size or locality. */
1132 memset(s->free_temps, 0, sizeof(s->free_temps));
1133
1134 s->nb_ops = 0;
1135 s->nb_labels = 0;
1136 s->current_frame_offset = s->frame_start;
1137
1138 #ifdef CONFIG_DEBUG_TCG
1139 s->goto_tb_issue_mask = 0;
1140 #endif
1141
1142 QTAILQ_INIT(&s->ops);
1143 QTAILQ_INIT(&s->free_ops);
1144 QSIMPLEQ_INIT(&s->labels);
1145 }
1146
tcg_temp_alloc(TCGContext * s)1147 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1148 {
1149 int n = s->nb_temps++;
1150 tcg_debug_assert(n < TCG_MAX_TEMPS);
1151 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1152 }
1153
tcg_global_alloc(TCGContext * s)1154 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1155 {
1156 TCGTemp *ts;
1157
1158 tcg_debug_assert(s->nb_globals == s->nb_temps);
1159 s->nb_globals++;
1160 ts = tcg_temp_alloc(s);
1161 ts->temp_global = 1;
1162
1163 return ts;
1164 }
1165
tcg_global_reg_new_internal(TCGContext * s,TCGType type,TCGReg reg,const char * name)1166 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1167 TCGReg reg, const char *name)
1168 {
1169 TCGTemp *ts;
1170
1171 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1172 tcg_abort();
1173 }
1174
1175 ts = tcg_global_alloc(s);
1176 ts->base_type = type;
1177 ts->type = type;
1178 ts->fixed_reg = 1;
1179 ts->reg = reg;
1180 ts->name = name;
1181 tcg_regset_set_reg(s->reserved_regs, reg);
1182
1183 return ts;
1184 }
1185
tcg_set_frame(TCGContext * s,TCGReg reg,intptr_t start,intptr_t size)1186 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1187 {
1188 s->frame_start = start;
1189 s->frame_end = start + size;
1190 s->frame_temp
1191 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1192 }
1193
tcg_global_mem_new_internal(TCGType type,TCGv_ptr base,intptr_t offset,const char * name)1194 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1195 intptr_t offset, const char *name)
1196 {
1197 TCGContext *s = tcg_ctx;
1198 TCGTemp *base_ts = tcgv_ptr_temp(base);
1199 TCGTemp *ts = tcg_global_alloc(s);
1200 int indirect_reg = 0, bigendian = 0;
1201 #ifdef HOST_WORDS_BIGENDIAN
1202 bigendian = 1;
1203 #endif
1204
1205 if (!base_ts->fixed_reg) {
1206 /* We do not support double-indirect registers. */
1207 tcg_debug_assert(!base_ts->indirect_reg);
1208 base_ts->indirect_base = 1;
1209 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1210 ? 2 : 1);
1211 indirect_reg = 1;
1212 }
1213
1214 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1215 TCGTemp *ts2 = tcg_global_alloc(s);
1216 char buf[64];
1217
1218 ts->base_type = TCG_TYPE_I64;
1219 ts->type = TCG_TYPE_I32;
1220 ts->indirect_reg = indirect_reg;
1221 ts->mem_allocated = 1;
1222 ts->mem_base = base_ts;
1223 ts->mem_offset = offset + bigendian * 4;
1224 pstrcpy(buf, sizeof(buf), name);
1225 pstrcat(buf, sizeof(buf), "_0");
1226 ts->name = strdup(buf);
1227
1228 tcg_debug_assert(ts2 == ts + 1);
1229 ts2->base_type = TCG_TYPE_I64;
1230 ts2->type = TCG_TYPE_I32;
1231 ts2->indirect_reg = indirect_reg;
1232 ts2->mem_allocated = 1;
1233 ts2->mem_base = base_ts;
1234 ts2->mem_offset = offset + (1 - bigendian) * 4;
1235 pstrcpy(buf, sizeof(buf), name);
1236 pstrcat(buf, sizeof(buf), "_1");
1237 ts2->name = strdup(buf);
1238 } else {
1239 ts->base_type = type;
1240 ts->type = type;
1241 ts->indirect_reg = indirect_reg;
1242 ts->mem_allocated = 1;
1243 ts->mem_base = base_ts;
1244 ts->mem_offset = offset;
1245 ts->name = name;
1246 }
1247 return ts;
1248 }
1249
tcg_temp_new_internal(TCGType type,bool temp_local)1250 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1251 {
1252 TCGContext *s = tcg_ctx;
1253 TCGTemp *ts;
1254 int idx, k;
1255
1256 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1257 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1258 if (idx < TCG_MAX_TEMPS) {
1259 /* There is already an available temp with the right type. */
1260 clear_bit(idx, s->free_temps[k].l);
1261
1262 ts = &s->temps[idx];
1263 ts->temp_allocated = 1;
1264 tcg_debug_assert(ts->base_type == type);
1265 tcg_debug_assert(ts->temp_local == temp_local);
1266 } else {
1267 ts = tcg_temp_alloc(s);
1268 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1269 TCGTemp *ts2 = tcg_temp_alloc(s);
1270
1271 ts->base_type = type;
1272 ts->type = TCG_TYPE_I32;
1273 ts->temp_allocated = 1;
1274 ts->temp_local = temp_local;
1275
1276 tcg_debug_assert(ts2 == ts + 1);
1277 ts2->base_type = TCG_TYPE_I64;
1278 ts2->type = TCG_TYPE_I32;
1279 ts2->temp_allocated = 1;
1280 ts2->temp_local = temp_local;
1281 } else {
1282 ts->base_type = type;
1283 ts->type = type;
1284 ts->temp_allocated = 1;
1285 ts->temp_local = temp_local;
1286 }
1287 }
1288
1289 #if defined(CONFIG_DEBUG_TCG)
1290 s->temps_in_use++;
1291 #endif
1292 return ts;
1293 }
1294
tcg_temp_new_vec(TCGType type)1295 TCGv_vec tcg_temp_new_vec(TCGType type)
1296 {
1297 TCGTemp *t;
1298
1299 #ifdef CONFIG_DEBUG_TCG
1300 switch (type) {
1301 case TCG_TYPE_V64:
1302 assert(TCG_TARGET_HAS_v64);
1303 break;
1304 case TCG_TYPE_V128:
1305 assert(TCG_TARGET_HAS_v128);
1306 break;
1307 case TCG_TYPE_V256:
1308 assert(TCG_TARGET_HAS_v256);
1309 break;
1310 default:
1311 g_assert_not_reached();
1312 }
1313 #endif
1314
1315 t = tcg_temp_new_internal(type, 0);
1316 return temp_tcgv_vec(t);
1317 }
1318
1319 /* Create a new temp of the same type as an existing temp. */
tcg_temp_new_vec_matching(TCGv_vec match)1320 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1321 {
1322 TCGTemp *t = tcgv_vec_temp(match);
1323
1324 tcg_debug_assert(t->temp_allocated != 0);
1325
1326 t = tcg_temp_new_internal(t->base_type, 0);
1327 return temp_tcgv_vec(t);
1328 }
1329
tcg_temp_free_internal(TCGTemp * ts)1330 void tcg_temp_free_internal(TCGTemp *ts)
1331 {
1332 TCGContext *s = tcg_ctx;
1333 int k, idx;
1334
1335 #if defined(CONFIG_DEBUG_TCG)
1336 s->temps_in_use--;
1337 if (s->temps_in_use < 0) {
1338 fprintf(stderr, "More temporaries freed than allocated!\n");
1339 }
1340 #endif
1341
1342 tcg_debug_assert(ts->temp_global == 0);
1343 tcg_debug_assert(ts->temp_allocated != 0);
1344 ts->temp_allocated = 0;
1345
1346 idx = temp_idx(ts);
1347 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1348 set_bit(idx, s->free_temps[k].l);
1349 }
1350
tcg_const_i32(int32_t val)1351 TCGv_i32 tcg_const_i32(int32_t val)
1352 {
1353 TCGv_i32 t0;
1354 t0 = tcg_temp_new_i32();
1355 tcg_gen_movi_i32(t0, val);
1356 return t0;
1357 }
1358
tcg_const_i64(int64_t val)1359 TCGv_i64 tcg_const_i64(int64_t val)
1360 {
1361 TCGv_i64 t0;
1362 t0 = tcg_temp_new_i64();
1363 tcg_gen_movi_i64(t0, val);
1364 return t0;
1365 }
1366
tcg_const_local_i32(int32_t val)1367 TCGv_i32 tcg_const_local_i32(int32_t val)
1368 {
1369 TCGv_i32 t0;
1370 t0 = tcg_temp_local_new_i32();
1371 tcg_gen_movi_i32(t0, val);
1372 return t0;
1373 }
1374
tcg_const_local_i64(int64_t val)1375 TCGv_i64 tcg_const_local_i64(int64_t val)
1376 {
1377 TCGv_i64 t0;
1378 t0 = tcg_temp_local_new_i64();
1379 tcg_gen_movi_i64(t0, val);
1380 return t0;
1381 }
1382
1383 #if defined(CONFIG_DEBUG_TCG)
tcg_clear_temp_count(void)1384 void tcg_clear_temp_count(void)
1385 {
1386 TCGContext *s = tcg_ctx;
1387 s->temps_in_use = 0;
1388 }
1389
tcg_check_temp_count(void)1390 int tcg_check_temp_count(void)
1391 {
1392 TCGContext *s = tcg_ctx;
1393 if (s->temps_in_use) {
1394 /* Clear the count so that we don't give another
1395 * warning immediately next time around.
1396 */
1397 s->temps_in_use = 0;
1398 return 1;
1399 }
1400 return 0;
1401 }
1402 #endif
1403
1404 /* Return true if OP may appear in the opcode stream.
1405 Test the runtime variable that controls each opcode. */
tcg_op_supported(TCGOpcode op)1406 bool tcg_op_supported(TCGOpcode op)
1407 {
1408 const bool have_vec
1409 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1410
1411 switch (op) {
1412 case INDEX_op_discard:
1413 case INDEX_op_set_label:
1414 case INDEX_op_call:
1415 case INDEX_op_br:
1416 case INDEX_op_mb:
1417 case INDEX_op_insn_start:
1418 case INDEX_op_exit_tb:
1419 case INDEX_op_goto_tb:
1420 case INDEX_op_qemu_ld_i32:
1421 case INDEX_op_qemu_st_i32:
1422 case INDEX_op_qemu_ld_i64:
1423 case INDEX_op_qemu_st_i64:
1424 return true;
1425
1426 case INDEX_op_goto_ptr:
1427 return TCG_TARGET_HAS_goto_ptr;
1428
1429 case INDEX_op_mov_i32:
1430 case INDEX_op_movi_i32:
1431 case INDEX_op_setcond_i32:
1432 case INDEX_op_brcond_i32:
1433 case INDEX_op_ld8u_i32:
1434 case INDEX_op_ld8s_i32:
1435 case INDEX_op_ld16u_i32:
1436 case INDEX_op_ld16s_i32:
1437 case INDEX_op_ld_i32:
1438 case INDEX_op_st8_i32:
1439 case INDEX_op_st16_i32:
1440 case INDEX_op_st_i32:
1441 case INDEX_op_add_i32:
1442 case INDEX_op_sub_i32:
1443 case INDEX_op_mul_i32:
1444 case INDEX_op_and_i32:
1445 case INDEX_op_or_i32:
1446 case INDEX_op_xor_i32:
1447 case INDEX_op_shl_i32:
1448 case INDEX_op_shr_i32:
1449 case INDEX_op_sar_i32:
1450 return true;
1451
1452 case INDEX_op_movcond_i32:
1453 return TCG_TARGET_HAS_movcond_i32;
1454 case INDEX_op_div_i32:
1455 case INDEX_op_divu_i32:
1456 return TCG_TARGET_HAS_div_i32;
1457 case INDEX_op_rem_i32:
1458 case INDEX_op_remu_i32:
1459 return TCG_TARGET_HAS_rem_i32;
1460 case INDEX_op_div2_i32:
1461 case INDEX_op_divu2_i32:
1462 return TCG_TARGET_HAS_div2_i32;
1463 case INDEX_op_rotl_i32:
1464 case INDEX_op_rotr_i32:
1465 return TCG_TARGET_HAS_rot_i32;
1466 case INDEX_op_deposit_i32:
1467 return TCG_TARGET_HAS_deposit_i32;
1468 case INDEX_op_extract_i32:
1469 return TCG_TARGET_HAS_extract_i32;
1470 case INDEX_op_sextract_i32:
1471 return TCG_TARGET_HAS_sextract_i32;
1472 case INDEX_op_extract2_i32:
1473 return TCG_TARGET_HAS_extract2_i32;
1474 case INDEX_op_add2_i32:
1475 return TCG_TARGET_HAS_add2_i32;
1476 case INDEX_op_sub2_i32:
1477 return TCG_TARGET_HAS_sub2_i32;
1478 case INDEX_op_mulu2_i32:
1479 return TCG_TARGET_HAS_mulu2_i32;
1480 case INDEX_op_muls2_i32:
1481 return TCG_TARGET_HAS_muls2_i32;
1482 case INDEX_op_muluh_i32:
1483 return TCG_TARGET_HAS_muluh_i32;
1484 case INDEX_op_mulsh_i32:
1485 return TCG_TARGET_HAS_mulsh_i32;
1486 case INDEX_op_ext8s_i32:
1487 return TCG_TARGET_HAS_ext8s_i32;
1488 case INDEX_op_ext16s_i32:
1489 return TCG_TARGET_HAS_ext16s_i32;
1490 case INDEX_op_ext8u_i32:
1491 return TCG_TARGET_HAS_ext8u_i32;
1492 case INDEX_op_ext16u_i32:
1493 return TCG_TARGET_HAS_ext16u_i32;
1494 case INDEX_op_bswap16_i32:
1495 return TCG_TARGET_HAS_bswap16_i32;
1496 case INDEX_op_bswap32_i32:
1497 return TCG_TARGET_HAS_bswap32_i32;
1498 case INDEX_op_not_i32:
1499 return TCG_TARGET_HAS_not_i32;
1500 case INDEX_op_neg_i32:
1501 return TCG_TARGET_HAS_neg_i32;
1502 case INDEX_op_andc_i32:
1503 return TCG_TARGET_HAS_andc_i32;
1504 case INDEX_op_orc_i32:
1505 return TCG_TARGET_HAS_orc_i32;
1506 case INDEX_op_eqv_i32:
1507 return TCG_TARGET_HAS_eqv_i32;
1508 case INDEX_op_nand_i32:
1509 return TCG_TARGET_HAS_nand_i32;
1510 case INDEX_op_nor_i32:
1511 return TCG_TARGET_HAS_nor_i32;
1512 case INDEX_op_clz_i32:
1513 return TCG_TARGET_HAS_clz_i32;
1514 case INDEX_op_ctz_i32:
1515 return TCG_TARGET_HAS_ctz_i32;
1516 case INDEX_op_ctpop_i32:
1517 return TCG_TARGET_HAS_ctpop_i32;
1518
1519 case INDEX_op_brcond2_i32:
1520 case INDEX_op_setcond2_i32:
1521 return TCG_TARGET_REG_BITS == 32;
1522
1523 case INDEX_op_mov_i64:
1524 case INDEX_op_movi_i64:
1525 case INDEX_op_setcond_i64:
1526 case INDEX_op_brcond_i64:
1527 case INDEX_op_ld8u_i64:
1528 case INDEX_op_ld8s_i64:
1529 case INDEX_op_ld16u_i64:
1530 case INDEX_op_ld16s_i64:
1531 case INDEX_op_ld32u_i64:
1532 case INDEX_op_ld32s_i64:
1533 case INDEX_op_ld_i64:
1534 case INDEX_op_st8_i64:
1535 case INDEX_op_st16_i64:
1536 case INDEX_op_st32_i64:
1537 case INDEX_op_st_i64:
1538 case INDEX_op_add_i64:
1539 case INDEX_op_sub_i64:
1540 case INDEX_op_mul_i64:
1541 case INDEX_op_and_i64:
1542 case INDEX_op_or_i64:
1543 case INDEX_op_xor_i64:
1544 case INDEX_op_shl_i64:
1545 case INDEX_op_shr_i64:
1546 case INDEX_op_sar_i64:
1547 case INDEX_op_ext_i32_i64:
1548 case INDEX_op_extu_i32_i64:
1549 return TCG_TARGET_REG_BITS == 64;
1550
1551 case INDEX_op_movcond_i64:
1552 return TCG_TARGET_HAS_movcond_i64;
1553 case INDEX_op_div_i64:
1554 case INDEX_op_divu_i64:
1555 return TCG_TARGET_HAS_div_i64;
1556 case INDEX_op_rem_i64:
1557 case INDEX_op_remu_i64:
1558 return TCG_TARGET_HAS_rem_i64;
1559 case INDEX_op_div2_i64:
1560 case INDEX_op_divu2_i64:
1561 return TCG_TARGET_HAS_div2_i64;
1562 case INDEX_op_rotl_i64:
1563 case INDEX_op_rotr_i64:
1564 return TCG_TARGET_HAS_rot_i64;
1565 case INDEX_op_deposit_i64:
1566 return TCG_TARGET_HAS_deposit_i64;
1567 case INDEX_op_extract_i64:
1568 return TCG_TARGET_HAS_extract_i64;
1569 case INDEX_op_sextract_i64:
1570 return TCG_TARGET_HAS_sextract_i64;
1571 case INDEX_op_extract2_i64:
1572 return TCG_TARGET_HAS_extract2_i64;
1573 case INDEX_op_extrl_i64_i32:
1574 return TCG_TARGET_HAS_extrl_i64_i32;
1575 case INDEX_op_extrh_i64_i32:
1576 return TCG_TARGET_HAS_extrh_i64_i32;
1577 case INDEX_op_ext8s_i64:
1578 return TCG_TARGET_HAS_ext8s_i64;
1579 case INDEX_op_ext16s_i64:
1580 return TCG_TARGET_HAS_ext16s_i64;
1581 case INDEX_op_ext32s_i64:
1582 return TCG_TARGET_HAS_ext32s_i64;
1583 case INDEX_op_ext8u_i64:
1584 return TCG_TARGET_HAS_ext8u_i64;
1585 case INDEX_op_ext16u_i64:
1586 return TCG_TARGET_HAS_ext16u_i64;
1587 case INDEX_op_ext32u_i64:
1588 return TCG_TARGET_HAS_ext32u_i64;
1589 case INDEX_op_bswap16_i64:
1590 return TCG_TARGET_HAS_bswap16_i64;
1591 case INDEX_op_bswap32_i64:
1592 return TCG_TARGET_HAS_bswap32_i64;
1593 case INDEX_op_bswap64_i64:
1594 return TCG_TARGET_HAS_bswap64_i64;
1595 case INDEX_op_not_i64:
1596 return TCG_TARGET_HAS_not_i64;
1597 case INDEX_op_neg_i64:
1598 return TCG_TARGET_HAS_neg_i64;
1599 case INDEX_op_andc_i64:
1600 return TCG_TARGET_HAS_andc_i64;
1601 case INDEX_op_orc_i64:
1602 return TCG_TARGET_HAS_orc_i64;
1603 case INDEX_op_eqv_i64:
1604 return TCG_TARGET_HAS_eqv_i64;
1605 case INDEX_op_nand_i64:
1606 return TCG_TARGET_HAS_nand_i64;
1607 case INDEX_op_nor_i64:
1608 return TCG_TARGET_HAS_nor_i64;
1609 case INDEX_op_clz_i64:
1610 return TCG_TARGET_HAS_clz_i64;
1611 case INDEX_op_ctz_i64:
1612 return TCG_TARGET_HAS_ctz_i64;
1613 case INDEX_op_ctpop_i64:
1614 return TCG_TARGET_HAS_ctpop_i64;
1615 case INDEX_op_add2_i64:
1616 return TCG_TARGET_HAS_add2_i64;
1617 case INDEX_op_sub2_i64:
1618 return TCG_TARGET_HAS_sub2_i64;
1619 case INDEX_op_mulu2_i64:
1620 return TCG_TARGET_HAS_mulu2_i64;
1621 case INDEX_op_muls2_i64:
1622 return TCG_TARGET_HAS_muls2_i64;
1623 case INDEX_op_muluh_i64:
1624 return TCG_TARGET_HAS_muluh_i64;
1625 case INDEX_op_mulsh_i64:
1626 return TCG_TARGET_HAS_mulsh_i64;
1627
1628 case INDEX_op_mov_vec:
1629 case INDEX_op_dup_vec:
1630 case INDEX_op_dupi_vec:
1631 case INDEX_op_dupm_vec:
1632 case INDEX_op_ld_vec:
1633 case INDEX_op_st_vec:
1634 case INDEX_op_add_vec:
1635 case INDEX_op_sub_vec:
1636 case INDEX_op_and_vec:
1637 case INDEX_op_or_vec:
1638 case INDEX_op_xor_vec:
1639 case INDEX_op_cmp_vec:
1640 return have_vec;
1641 case INDEX_op_dup2_vec:
1642 return have_vec && TCG_TARGET_REG_BITS == 32;
1643 case INDEX_op_not_vec:
1644 return have_vec && TCG_TARGET_HAS_not_vec;
1645 case INDEX_op_neg_vec:
1646 return have_vec && TCG_TARGET_HAS_neg_vec;
1647 case INDEX_op_abs_vec:
1648 return have_vec && TCG_TARGET_HAS_abs_vec;
1649 case INDEX_op_andc_vec:
1650 return have_vec && TCG_TARGET_HAS_andc_vec;
1651 case INDEX_op_orc_vec:
1652 return have_vec && TCG_TARGET_HAS_orc_vec;
1653 case INDEX_op_mul_vec:
1654 return have_vec && TCG_TARGET_HAS_mul_vec;
1655 case INDEX_op_shli_vec:
1656 case INDEX_op_shri_vec:
1657 case INDEX_op_sari_vec:
1658 return have_vec && TCG_TARGET_HAS_shi_vec;
1659 case INDEX_op_shls_vec:
1660 case INDEX_op_shrs_vec:
1661 case INDEX_op_sars_vec:
1662 return have_vec && TCG_TARGET_HAS_shs_vec;
1663 case INDEX_op_shlv_vec:
1664 case INDEX_op_shrv_vec:
1665 case INDEX_op_sarv_vec:
1666 return have_vec && TCG_TARGET_HAS_shv_vec;
1667 case INDEX_op_ssadd_vec:
1668 case INDEX_op_usadd_vec:
1669 case INDEX_op_sssub_vec:
1670 case INDEX_op_ussub_vec:
1671 return have_vec && TCG_TARGET_HAS_sat_vec;
1672 case INDEX_op_smin_vec:
1673 case INDEX_op_umin_vec:
1674 case INDEX_op_smax_vec:
1675 case INDEX_op_umax_vec:
1676 return have_vec && TCG_TARGET_HAS_minmax_vec;
1677 case INDEX_op_bitsel_vec:
1678 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1679 case INDEX_op_cmpsel_vec:
1680 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1681
1682 default:
1683 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1684 return true;
1685 }
1686 }
1687
1688 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1689 and endian swap. Maybe it would be better to do the alignment
1690 and endian swap in tcg_reg_alloc_call(). */
tcg_gen_callN(void * func,TCGTemp * ret,int nargs,TCGTemp ** args)1691 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1692 {
1693 int i, real_args, nb_rets, pi;
1694 unsigned sizemask, flags;
1695 TCGHelperInfo *info;
1696 TCGOp *op;
1697
1698 info = g_hash_table_lookup(helper_table, (gpointer)func);
1699 flags = info->flags;
1700 sizemask = info->sizemask;
1701
1702 #ifdef CONFIG_PLUGIN
1703 /* detect non-plugin helpers */
1704 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1705 tcg_ctx->plugin_insn->calls_helpers = true;
1706 }
1707 #endif
1708
1709 #if defined(__sparc__) && !defined(__arch64__) \
1710 && !defined(CONFIG_TCG_INTERPRETER)
1711 /* We have 64-bit values in one register, but need to pass as two
1712 separate parameters. Split them. */
1713 int orig_sizemask = sizemask;
1714 int orig_nargs = nargs;
1715 TCGv_i64 retl, reth;
1716 TCGTemp *split_args[MAX_OPC_PARAM];
1717
1718 retl = NULL;
1719 reth = NULL;
1720 if (sizemask != 0) {
1721 for (i = real_args = 0; i < nargs; ++i) {
1722 int is_64bit = sizemask & (1 << (i+1)*2);
1723 if (is_64bit) {
1724 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1725 TCGv_i32 h = tcg_temp_new_i32();
1726 TCGv_i32 l = tcg_temp_new_i32();
1727 tcg_gen_extr_i64_i32(l, h, orig);
1728 split_args[real_args++] = tcgv_i32_temp(h);
1729 split_args[real_args++] = tcgv_i32_temp(l);
1730 } else {
1731 split_args[real_args++] = args[i];
1732 }
1733 }
1734 nargs = real_args;
1735 args = split_args;
1736 sizemask = 0;
1737 }
1738 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1739 for (i = 0; i < nargs; ++i) {
1740 int is_64bit = sizemask & (1 << (i+1)*2);
1741 int is_signed = sizemask & (2 << (i+1)*2);
1742 if (!is_64bit) {
1743 TCGv_i64 temp = tcg_temp_new_i64();
1744 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1745 if (is_signed) {
1746 tcg_gen_ext32s_i64(temp, orig);
1747 } else {
1748 tcg_gen_ext32u_i64(temp, orig);
1749 }
1750 args[i] = tcgv_i64_temp(temp);
1751 }
1752 }
1753 #endif /* TCG_TARGET_EXTEND_ARGS */
1754
1755 op = tcg_emit_op(INDEX_op_call);
1756
1757 pi = 0;
1758 if (ret != NULL) {
1759 #if defined(__sparc__) && !defined(__arch64__) \
1760 && !defined(CONFIG_TCG_INTERPRETER)
1761 if (orig_sizemask & 1) {
1762 /* The 32-bit ABI is going to return the 64-bit value in
1763 the %o0/%o1 register pair. Prepare for this by using
1764 two return temporaries, and reassemble below. */
1765 retl = tcg_temp_new_i64();
1766 reth = tcg_temp_new_i64();
1767 op->args[pi++] = tcgv_i64_arg(reth);
1768 op->args[pi++] = tcgv_i64_arg(retl);
1769 nb_rets = 2;
1770 } else {
1771 op->args[pi++] = temp_arg(ret);
1772 nb_rets = 1;
1773 }
1774 #else
1775 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1776 #ifdef HOST_WORDS_BIGENDIAN
1777 op->args[pi++] = temp_arg(ret + 1);
1778 op->args[pi++] = temp_arg(ret);
1779 #else
1780 op->args[pi++] = temp_arg(ret);
1781 op->args[pi++] = temp_arg(ret + 1);
1782 #endif
1783 nb_rets = 2;
1784 } else {
1785 op->args[pi++] = temp_arg(ret);
1786 nb_rets = 1;
1787 }
1788 #endif
1789 } else {
1790 nb_rets = 0;
1791 }
1792 TCGOP_CALLO(op) = nb_rets;
1793
1794 real_args = 0;
1795 for (i = 0; i < nargs; i++) {
1796 int is_64bit = sizemask & (1 << (i+1)*2);
1797 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1798 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1799 /* some targets want aligned 64 bit args */
1800 if (real_args & 1) {
1801 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1802 real_args++;
1803 }
1804 #endif
1805 /* If stack grows up, then we will be placing successive
1806 arguments at lower addresses, which means we need to
1807 reverse the order compared to how we would normally
1808 treat either big or little-endian. For those arguments
1809 that will wind up in registers, this still works for
1810 HPPA (the only current STACK_GROWSUP target) since the
1811 argument registers are *also* allocated in decreasing
1812 order. If another such target is added, this logic may
1813 have to get more complicated to differentiate between
1814 stack arguments and register arguments. */
1815 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1816 op->args[pi++] = temp_arg(args[i] + 1);
1817 op->args[pi++] = temp_arg(args[i]);
1818 #else
1819 op->args[pi++] = temp_arg(args[i]);
1820 op->args[pi++] = temp_arg(args[i] + 1);
1821 #endif
1822 real_args += 2;
1823 continue;
1824 }
1825
1826 op->args[pi++] = temp_arg(args[i]);
1827 real_args++;
1828 }
1829 op->args[pi++] = (uintptr_t)func;
1830 op->args[pi++] = flags;
1831 TCGOP_CALLI(op) = real_args;
1832
1833 /* Make sure the fields didn't overflow. */
1834 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1835 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1836
1837 #if defined(__sparc__) && !defined(__arch64__) \
1838 && !defined(CONFIG_TCG_INTERPRETER)
1839 /* Free all of the parts we allocated above. */
1840 for (i = real_args = 0; i < orig_nargs; ++i) {
1841 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1842 if (is_64bit) {
1843 tcg_temp_free_internal(args[real_args++]);
1844 tcg_temp_free_internal(args[real_args++]);
1845 } else {
1846 real_args++;
1847 }
1848 }
1849 if (orig_sizemask & 1) {
1850 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1851 Note that describing these as TCGv_i64 eliminates an unnecessary
1852 zero-extension that tcg_gen_concat_i32_i64 would create. */
1853 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1854 tcg_temp_free_i64(retl);
1855 tcg_temp_free_i64(reth);
1856 }
1857 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1858 for (i = 0; i < nargs; ++i) {
1859 int is_64bit = sizemask & (1 << (i+1)*2);
1860 if (!is_64bit) {
1861 tcg_temp_free_internal(args[i]);
1862 }
1863 }
1864 #endif /* TCG_TARGET_EXTEND_ARGS */
1865 }
1866
tcg_reg_alloc_start(TCGContext * s)1867 static void tcg_reg_alloc_start(TCGContext *s)
1868 {
1869 int i, n;
1870 TCGTemp *ts;
1871
1872 for (i = 0, n = s->nb_globals; i < n; i++) {
1873 ts = &s->temps[i];
1874 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1875 }
1876 for (n = s->nb_temps; i < n; i++) {
1877 ts = &s->temps[i];
1878 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1879 ts->mem_allocated = 0;
1880 ts->fixed_reg = 0;
1881 }
1882
1883 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1884 }
1885
tcg_get_arg_str_ptr(TCGContext * s,char * buf,int buf_size,TCGTemp * ts)1886 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1887 TCGTemp *ts)
1888 {
1889 int idx = temp_idx(ts);
1890
1891 if (ts->temp_global) {
1892 pstrcpy(buf, buf_size, ts->name);
1893 } else if (ts->temp_local) {
1894 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1895 } else {
1896 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1897 }
1898 return buf;
1899 }
1900
tcg_get_arg_str(TCGContext * s,char * buf,int buf_size,TCGArg arg)1901 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1902 int buf_size, TCGArg arg)
1903 {
1904 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1905 }
1906
1907 /* Find helper name. */
tcg_find_helper(TCGContext * s,uintptr_t val)1908 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1909 {
1910 const char *ret = NULL;
1911 if (helper_table) {
1912 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1913 if (info) {
1914 ret = info->name;
1915 }
1916 }
1917 return ret;
1918 }
1919
1920 static const char * const cond_name[] =
1921 {
1922 [TCG_COND_NEVER] = "never",
1923 [TCG_COND_ALWAYS] = "always",
1924 [TCG_COND_EQ] = "eq",
1925 [TCG_COND_NE] = "ne",
1926 [TCG_COND_LT] = "lt",
1927 [TCG_COND_GE] = "ge",
1928 [TCG_COND_LE] = "le",
1929 [TCG_COND_GT] = "gt",
1930 [TCG_COND_LTU] = "ltu",
1931 [TCG_COND_GEU] = "geu",
1932 [TCG_COND_LEU] = "leu",
1933 [TCG_COND_GTU] = "gtu"
1934 };
1935
1936 static const char * const ldst_name[] =
1937 {
1938 [MO_UB] = "ub",
1939 [MO_SB] = "sb",
1940 [MO_LEUW] = "leuw",
1941 [MO_LESW] = "lesw",
1942 [MO_LEUL] = "leul",
1943 [MO_LESL] = "lesl",
1944 [MO_LEQ] = "leq",
1945 [MO_BEUW] = "beuw",
1946 [MO_BESW] = "besw",
1947 [MO_BEUL] = "beul",
1948 [MO_BESL] = "besl",
1949 [MO_BEQ] = "beq",
1950 };
1951
1952 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1953 #ifdef TARGET_ALIGNED_ONLY
1954 [MO_UNALN >> MO_ASHIFT] = "un+",
1955 [MO_ALIGN >> MO_ASHIFT] = "",
1956 #else
1957 [MO_UNALN >> MO_ASHIFT] = "",
1958 [MO_ALIGN >> MO_ASHIFT] = "al+",
1959 #endif
1960 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1961 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1962 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1963 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1964 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1965 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1966 };
1967
tcg_regset_single(TCGRegSet d)1968 static inline bool tcg_regset_single(TCGRegSet d)
1969 {
1970 return (d & (d - 1)) == 0;
1971 }
1972
tcg_regset_first(TCGRegSet d)1973 static inline TCGReg tcg_regset_first(TCGRegSet d)
1974 {
1975 if (TCG_TARGET_NB_REGS <= 32) {
1976 return ctz32(d);
1977 } else {
1978 return ctz64(d);
1979 }
1980 }
1981
tcg_dump_ops(TCGContext * s,bool have_prefs)1982 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1983 {
1984 char buf[128];
1985 TCGOp *op;
1986
1987 QTAILQ_FOREACH(op, &s->ops, link) {
1988 int i, k, nb_oargs, nb_iargs, nb_cargs;
1989 const TCGOpDef *def;
1990 TCGOpcode c;
1991 int col = 0;
1992
1993 c = op->opc;
1994 def = &tcg_op_defs[c];
1995
1996 if (c == INDEX_op_insn_start) {
1997 nb_oargs = 0;
1998 col += qemu_log("\n ----");
1999
2000 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2001 target_ulong a;
2002 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2003 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2004 #else
2005 a = op->args[i];
2006 #endif
2007 col += qemu_log(" " TARGET_FMT_lx, a);
2008 }
2009 } else if (c == INDEX_op_call) {
2010 /* variable number of arguments */
2011 nb_oargs = TCGOP_CALLO(op);
2012 nb_iargs = TCGOP_CALLI(op);
2013 nb_cargs = def->nb_cargs;
2014
2015 /* function name, flags, out args */
2016 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2017 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2018 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2019 for (i = 0; i < nb_oargs; i++) {
2020 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2021 op->args[i]));
2022 }
2023 for (i = 0; i < nb_iargs; i++) {
2024 TCGArg arg = op->args[nb_oargs + i];
2025 const char *t = "<dummy>";
2026 if (arg != TCG_CALL_DUMMY_ARG) {
2027 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2028 }
2029 col += qemu_log(",%s", t);
2030 }
2031 } else {
2032 col += qemu_log(" %s ", def->name);
2033
2034 nb_oargs = def->nb_oargs;
2035 nb_iargs = def->nb_iargs;
2036 nb_cargs = def->nb_cargs;
2037
2038 if (def->flags & TCG_OPF_VECTOR) {
2039 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2040 8 << TCGOP_VECE(op));
2041 }
2042
2043 k = 0;
2044 for (i = 0; i < nb_oargs; i++) {
2045 if (k != 0) {
2046 col += qemu_log(",");
2047 }
2048 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2049 op->args[k++]));
2050 }
2051 for (i = 0; i < nb_iargs; i++) {
2052 if (k != 0) {
2053 col += qemu_log(",");
2054 }
2055 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2056 op->args[k++]));
2057 }
2058 switch (c) {
2059 case INDEX_op_brcond_i32:
2060 case INDEX_op_setcond_i32:
2061 case INDEX_op_movcond_i32:
2062 case INDEX_op_brcond2_i32:
2063 case INDEX_op_setcond2_i32:
2064 case INDEX_op_brcond_i64:
2065 case INDEX_op_setcond_i64:
2066 case INDEX_op_movcond_i64:
2067 case INDEX_op_cmp_vec:
2068 case INDEX_op_cmpsel_vec:
2069 if (op->args[k] < ARRAY_SIZE(cond_name)
2070 && cond_name[op->args[k]]) {
2071 col += qemu_log(",%s", cond_name[op->args[k++]]);
2072 } else {
2073 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2074 }
2075 i = 1;
2076 break;
2077 case INDEX_op_qemu_ld_i32:
2078 case INDEX_op_qemu_st_i32:
2079 case INDEX_op_qemu_ld_i64:
2080 case INDEX_op_qemu_st_i64:
2081 {
2082 TCGMemOpIdx oi = op->args[k++];
2083 MemOp op = get_memop(oi);
2084 unsigned ix = get_mmuidx(oi);
2085
2086 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2087 col += qemu_log(",$0x%x,%u", op, ix);
2088 } else {
2089 const char *s_al, *s_op;
2090 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2091 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2092 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2093 }
2094 i = 1;
2095 }
2096 break;
2097 default:
2098 i = 0;
2099 break;
2100 }
2101 switch (c) {
2102 case INDEX_op_set_label:
2103 case INDEX_op_br:
2104 case INDEX_op_brcond_i32:
2105 case INDEX_op_brcond_i64:
2106 case INDEX_op_brcond2_i32:
2107 col += qemu_log("%s$L%d", k ? "," : "",
2108 arg_label(op->args[k])->id);
2109 i++, k++;
2110 break;
2111 default:
2112 break;
2113 }
2114 for (; i < nb_cargs; i++, k++) {
2115 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2116 }
2117 }
2118
2119 if (have_prefs || op->life) {
2120
2121 QemuLogFile *logfile;
2122
2123 rcu_read_lock();
2124 logfile = atomic_rcu_read(&qemu_logfile);
2125 if (logfile) {
2126 for (; col < 40; ++col) {
2127 putc(' ', logfile->fd);
2128 }
2129 }
2130 rcu_read_unlock();
2131 }
2132
2133 if (op->life) {
2134 unsigned life = op->life;
2135
2136 if (life & (SYNC_ARG * 3)) {
2137 qemu_log(" sync:");
2138 for (i = 0; i < 2; ++i) {
2139 if (life & (SYNC_ARG << i)) {
2140 qemu_log(" %d", i);
2141 }
2142 }
2143 }
2144 life /= DEAD_ARG;
2145 if (life) {
2146 qemu_log(" dead:");
2147 for (i = 0; life; ++i, life >>= 1) {
2148 if (life & 1) {
2149 qemu_log(" %d", i);
2150 }
2151 }
2152 }
2153 }
2154
2155 if (have_prefs) {
2156 for (i = 0; i < nb_oargs; ++i) {
2157 TCGRegSet set = op->output_pref[i];
2158
2159 if (i == 0) {
2160 qemu_log(" pref=");
2161 } else {
2162 qemu_log(",");
2163 }
2164 if (set == 0) {
2165 qemu_log("none");
2166 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2167 qemu_log("all");
2168 #ifdef CONFIG_DEBUG_TCG
2169 } else if (tcg_regset_single(set)) {
2170 TCGReg reg = tcg_regset_first(set);
2171 qemu_log("%s", tcg_target_reg_names[reg]);
2172 #endif
2173 } else if (TCG_TARGET_NB_REGS <= 32) {
2174 qemu_log("%#x", (uint32_t)set);
2175 } else {
2176 qemu_log("%#" PRIx64, (uint64_t)set);
2177 }
2178 }
2179 }
2180
2181 qemu_log("\n");
2182 }
2183 }
2184
2185 /* we give more priority to constraints with less registers */
get_constraint_priority(const TCGOpDef * def,int k)2186 static int get_constraint_priority(const TCGOpDef *def, int k)
2187 {
2188 const TCGArgConstraint *arg_ct;
2189
2190 int i, n;
2191 arg_ct = &def->args_ct[k];
2192 if (arg_ct->ct & TCG_CT_ALIAS) {
2193 /* an alias is equivalent to a single register */
2194 n = 1;
2195 } else {
2196 if (!(arg_ct->ct & TCG_CT_REG))
2197 return 0;
2198 n = 0;
2199 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2200 if (tcg_regset_test_reg(arg_ct->u.regs, i))
2201 n++;
2202 }
2203 }
2204 return TCG_TARGET_NB_REGS - n + 1;
2205 }
2206
2207 /* sort from highest priority to lowest */
sort_constraints(TCGOpDef * def,int start,int n)2208 static void sort_constraints(TCGOpDef *def, int start, int n)
2209 {
2210 int i, j, p1, p2, tmp;
2211
2212 for(i = 0; i < n; i++)
2213 def->sorted_args[start + i] = start + i;
2214 if (n <= 1)
2215 return;
2216 for(i = 0; i < n - 1; i++) {
2217 for(j = i + 1; j < n; j++) {
2218 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2219 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2220 if (p1 < p2) {
2221 tmp = def->sorted_args[start + i];
2222 def->sorted_args[start + i] = def->sorted_args[start + j];
2223 def->sorted_args[start + j] = tmp;
2224 }
2225 }
2226 }
2227 }
2228
process_op_defs(TCGContext * s)2229 static void process_op_defs(TCGContext *s)
2230 {
2231 TCGOpcode op;
2232
2233 for (op = 0; op < NB_OPS; op++) {
2234 TCGOpDef *def = &tcg_op_defs[op];
2235 const TCGTargetOpDef *tdefs;
2236 TCGType type;
2237 int i, nb_args;
2238
2239 if (def->flags & TCG_OPF_NOT_PRESENT) {
2240 continue;
2241 }
2242
2243 nb_args = def->nb_iargs + def->nb_oargs;
2244 if (nb_args == 0) {
2245 continue;
2246 }
2247
2248 tdefs = tcg_target_op_def(op);
2249 /* Missing TCGTargetOpDef entry. */
2250 tcg_debug_assert(tdefs != NULL);
2251
2252 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2253 for (i = 0; i < nb_args; i++) {
2254 const char *ct_str = tdefs->args_ct_str[i];
2255 /* Incomplete TCGTargetOpDef entry. */
2256 tcg_debug_assert(ct_str != NULL);
2257
2258 def->args_ct[i].u.regs = 0;
2259 def->args_ct[i].ct = 0;
2260 while (*ct_str != '\0') {
2261 switch(*ct_str) {
2262 case '0' ... '9':
2263 {
2264 int oarg = *ct_str - '0';
2265 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2266 tcg_debug_assert(oarg < def->nb_oargs);
2267 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2268 /* TCG_CT_ALIAS is for the output arguments.
2269 The input is tagged with TCG_CT_IALIAS. */
2270 def->args_ct[i] = def->args_ct[oarg];
2271 def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2272 def->args_ct[oarg].alias_index = i;
2273 def->args_ct[i].ct |= TCG_CT_IALIAS;
2274 def->args_ct[i].alias_index = oarg;
2275 }
2276 ct_str++;
2277 break;
2278 case '&':
2279 def->args_ct[i].ct |= TCG_CT_NEWREG;
2280 ct_str++;
2281 break;
2282 case 'i':
2283 def->args_ct[i].ct |= TCG_CT_CONST;
2284 ct_str++;
2285 break;
2286 default:
2287 ct_str = target_parse_constraint(&def->args_ct[i],
2288 ct_str, type);
2289 /* Typo in TCGTargetOpDef constraint. */
2290 tcg_debug_assert(ct_str != NULL);
2291 }
2292 }
2293 }
2294
2295 /* TCGTargetOpDef entry with too much information? */
2296 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2297
2298 /* sort the constraints (XXX: this is just an heuristic) */
2299 sort_constraints(def, 0, def->nb_oargs);
2300 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2301 }
2302 }
2303
tcg_op_remove(TCGContext * s,TCGOp * op)2304 void tcg_op_remove(TCGContext *s, TCGOp *op)
2305 {
2306 TCGLabel *label;
2307
2308 switch (op->opc) {
2309 case INDEX_op_br:
2310 label = arg_label(op->args[0]);
2311 label->refs--;
2312 break;
2313 case INDEX_op_brcond_i32:
2314 case INDEX_op_brcond_i64:
2315 label = arg_label(op->args[3]);
2316 label->refs--;
2317 break;
2318 case INDEX_op_brcond2_i32:
2319 label = arg_label(op->args[5]);
2320 label->refs--;
2321 break;
2322 default:
2323 break;
2324 }
2325
2326 QTAILQ_REMOVE(&s->ops, op, link);
2327 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2328 s->nb_ops--;
2329
2330 #ifdef CONFIG_PROFILER
2331 atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2332 #endif
2333 }
2334
tcg_op_alloc(TCGOpcode opc)2335 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2336 {
2337 TCGContext *s = tcg_ctx;
2338 TCGOp *op;
2339
2340 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2341 op = tcg_malloc(sizeof(TCGOp));
2342 } else {
2343 op = QTAILQ_FIRST(&s->free_ops);
2344 QTAILQ_REMOVE(&s->free_ops, op, link);
2345 }
2346 memset(op, 0, offsetof(TCGOp, link));
2347 op->opc = opc;
2348 s->nb_ops++;
2349
2350 return op;
2351 }
2352
tcg_emit_op(TCGOpcode opc)2353 TCGOp *tcg_emit_op(TCGOpcode opc)
2354 {
2355 TCGOp *op = tcg_op_alloc(opc);
2356 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2357 return op;
2358 }
2359
tcg_op_insert_before(TCGContext * s,TCGOp * old_op,TCGOpcode opc)2360 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2361 {
2362 TCGOp *new_op = tcg_op_alloc(opc);
2363 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2364 return new_op;
2365 }
2366
tcg_op_insert_after(TCGContext * s,TCGOp * old_op,TCGOpcode opc)2367 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2368 {
2369 TCGOp *new_op = tcg_op_alloc(opc);
2370 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2371 return new_op;
2372 }
2373
2374 /* Reachable analysis : remove unreachable code. */
reachable_code_pass(TCGContext * s)2375 static void reachable_code_pass(TCGContext *s)
2376 {
2377 TCGOp *op, *op_next;
2378 bool dead = false;
2379
2380 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2381 bool remove = dead;
2382 TCGLabel *label;
2383 int call_flags;
2384
2385 switch (op->opc) {
2386 case INDEX_op_set_label:
2387 label = arg_label(op->args[0]);
2388 if (label->refs == 0) {
2389 /*
2390 * While there is an occasional backward branch, virtually
2391 * all branches generated by the translators are forward.
2392 * Which means that generally we will have already removed
2393 * all references to the label that will be, and there is
2394 * little to be gained by iterating.
2395 */
2396 remove = true;
2397 } else {
2398 /* Once we see a label, insns become live again. */
2399 dead = false;
2400 remove = false;
2401
2402 /*
2403 * Optimization can fold conditional branches to unconditional.
2404 * If we find a label with one reference which is preceded by
2405 * an unconditional branch to it, remove both. This needed to
2406 * wait until the dead code in between them was removed.
2407 */
2408 if (label->refs == 1) {
2409 TCGOp *op_prev = QTAILQ_PREV(op, link);
2410 if (op_prev->opc == INDEX_op_br &&
2411 label == arg_label(op_prev->args[0])) {
2412 tcg_op_remove(s, op_prev);
2413 remove = true;
2414 }
2415 }
2416 }
2417 break;
2418
2419 case INDEX_op_br:
2420 case INDEX_op_exit_tb:
2421 case INDEX_op_goto_ptr:
2422 /* Unconditional branches; everything following is dead. */
2423 dead = true;
2424 break;
2425
2426 case INDEX_op_call:
2427 /* Notice noreturn helper calls, raising exceptions. */
2428 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2429 if (call_flags & TCG_CALL_NO_RETURN) {
2430 dead = true;
2431 }
2432 break;
2433
2434 case INDEX_op_insn_start:
2435 /* Never remove -- we need to keep these for unwind. */
2436 remove = false;
2437 break;
2438
2439 default:
2440 break;
2441 }
2442
2443 if (remove) {
2444 tcg_op_remove(s, op);
2445 }
2446 }
2447 }
2448
2449 #define TS_DEAD 1
2450 #define TS_MEM 2
2451
2452 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2453 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2454
2455 /* For liveness_pass_1, the register preferences for a given temp. */
la_temp_pref(TCGTemp * ts)2456 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2457 {
2458 return ts->state_ptr;
2459 }
2460
2461 /* For liveness_pass_1, reset the preferences for a given temp to the
2462 * maximal regset for its type.
2463 */
la_reset_pref(TCGTemp * ts)2464 static inline void la_reset_pref(TCGTemp *ts)
2465 {
2466 *la_temp_pref(ts)
2467 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2468 }
2469
2470 /* liveness analysis: end of function: all temps are dead, and globals
2471 should be in memory. */
la_func_end(TCGContext * s,int ng,int nt)2472 static void la_func_end(TCGContext *s, int ng, int nt)
2473 {
2474 int i;
2475
2476 for (i = 0; i < ng; ++i) {
2477 s->temps[i].state = TS_DEAD | TS_MEM;
2478 la_reset_pref(&s->temps[i]);
2479 }
2480 for (i = ng; i < nt; ++i) {
2481 s->temps[i].state = TS_DEAD;
2482 la_reset_pref(&s->temps[i]);
2483 }
2484 }
2485
2486 /* liveness analysis: end of basic block: all temps are dead, globals
2487 and local temps should be in memory. */
la_bb_end(TCGContext * s,int ng,int nt)2488 static void la_bb_end(TCGContext *s, int ng, int nt)
2489 {
2490 int i;
2491
2492 for (i = 0; i < ng; ++i) {
2493 s->temps[i].state = TS_DEAD | TS_MEM;
2494 la_reset_pref(&s->temps[i]);
2495 }
2496 for (i = ng; i < nt; ++i) {
2497 s->temps[i].state = (s->temps[i].temp_local
2498 ? TS_DEAD | TS_MEM
2499 : TS_DEAD);
2500 la_reset_pref(&s->temps[i]);
2501 }
2502 }
2503
2504 /* liveness analysis: sync globals back to memory. */
la_global_sync(TCGContext * s,int ng)2505 static void la_global_sync(TCGContext *s, int ng)
2506 {
2507 int i;
2508
2509 for (i = 0; i < ng; ++i) {
2510 int state = s->temps[i].state;
2511 s->temps[i].state = state | TS_MEM;
2512 if (state == TS_DEAD) {
2513 /* If the global was previously dead, reset prefs. */
2514 la_reset_pref(&s->temps[i]);
2515 }
2516 }
2517 }
2518
2519 /* liveness analysis: sync globals back to memory and kill. */
la_global_kill(TCGContext * s,int ng)2520 static void la_global_kill(TCGContext *s, int ng)
2521 {
2522 int i;
2523
2524 for (i = 0; i < ng; i++) {
2525 s->temps[i].state = TS_DEAD | TS_MEM;
2526 la_reset_pref(&s->temps[i]);
2527 }
2528 }
2529
2530 /* liveness analysis: note live globals crossing calls. */
la_cross_call(TCGContext * s,int nt)2531 static void la_cross_call(TCGContext *s, int nt)
2532 {
2533 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2534 int i;
2535
2536 for (i = 0; i < nt; i++) {
2537 TCGTemp *ts = &s->temps[i];
2538 if (!(ts->state & TS_DEAD)) {
2539 TCGRegSet *pset = la_temp_pref(ts);
2540 TCGRegSet set = *pset;
2541
2542 set &= mask;
2543 /* If the combination is not possible, restart. */
2544 if (set == 0) {
2545 set = tcg_target_available_regs[ts->type] & mask;
2546 }
2547 *pset = set;
2548 }
2549 }
2550 }
2551
2552 /* Liveness analysis : update the opc_arg_life array to tell if a
2553 given input arguments is dead. Instructions updating dead
2554 temporaries are removed. */
liveness_pass_1(TCGContext * s)2555 static void liveness_pass_1(TCGContext *s)
2556 {
2557 int nb_globals = s->nb_globals;
2558 int nb_temps = s->nb_temps;
2559 TCGOp *op, *op_prev;
2560 TCGRegSet *prefs;
2561 int i;
2562
2563 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2564 for (i = 0; i < nb_temps; ++i) {
2565 s->temps[i].state_ptr = prefs + i;
2566 }
2567
2568 /* ??? Should be redundant with the exit_tb that ends the TB. */
2569 la_func_end(s, nb_globals, nb_temps);
2570
2571 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2572 int nb_iargs, nb_oargs;
2573 TCGOpcode opc_new, opc_new2;
2574 bool have_opc_new2;
2575 TCGLifeData arg_life = 0;
2576 TCGTemp *ts;
2577 TCGOpcode opc = op->opc;
2578 const TCGOpDef *def = &tcg_op_defs[opc];
2579
2580 switch (opc) {
2581 case INDEX_op_call:
2582 {
2583 int call_flags;
2584 int nb_call_regs;
2585
2586 nb_oargs = TCGOP_CALLO(op);
2587 nb_iargs = TCGOP_CALLI(op);
2588 call_flags = op->args[nb_oargs + nb_iargs + 1];
2589
2590 /* pure functions can be removed if their result is unused */
2591 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2592 for (i = 0; i < nb_oargs; i++) {
2593 ts = arg_temp(op->args[i]);
2594 if (ts->state != TS_DEAD) {
2595 goto do_not_remove_call;
2596 }
2597 }
2598 goto do_remove;
2599 }
2600 do_not_remove_call:
2601
2602 /* Output args are dead. */
2603 for (i = 0; i < nb_oargs; i++) {
2604 ts = arg_temp(op->args[i]);
2605 if (ts->state & TS_DEAD) {
2606 arg_life |= DEAD_ARG << i;
2607 }
2608 if (ts->state & TS_MEM) {
2609 arg_life |= SYNC_ARG << i;
2610 }
2611 ts->state = TS_DEAD;
2612 la_reset_pref(ts);
2613
2614 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2615 op->output_pref[i] = 0;
2616 }
2617
2618 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2619 TCG_CALL_NO_READ_GLOBALS))) {
2620 la_global_kill(s, nb_globals);
2621 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2622 la_global_sync(s, nb_globals);
2623 }
2624
2625 /* Record arguments that die in this helper. */
2626 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2627 ts = arg_temp(op->args[i]);
2628 if (ts && ts->state & TS_DEAD) {
2629 arg_life |= DEAD_ARG << i;
2630 }
2631 }
2632
2633 /* For all live registers, remove call-clobbered prefs. */
2634 la_cross_call(s, nb_temps);
2635
2636 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2637
2638 /* Input arguments are live for preceding opcodes. */
2639 for (i = 0; i < nb_iargs; i++) {
2640 ts = arg_temp(op->args[i + nb_oargs]);
2641 if (ts && ts->state & TS_DEAD) {
2642 /* For those arguments that die, and will be allocated
2643 * in registers, clear the register set for that arg,
2644 * to be filled in below. For args that will be on
2645 * the stack, reset to any available reg.
2646 */
2647 *la_temp_pref(ts)
2648 = (i < nb_call_regs ? 0 :
2649 tcg_target_available_regs[ts->type]);
2650 ts->state &= ~TS_DEAD;
2651 }
2652 }
2653
2654 /* For each input argument, add its input register to prefs.
2655 If a temp is used once, this produces a single set bit. */
2656 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2657 ts = arg_temp(op->args[i + nb_oargs]);
2658 if (ts) {
2659 tcg_regset_set_reg(*la_temp_pref(ts),
2660 tcg_target_call_iarg_regs[i]);
2661 }
2662 }
2663 }
2664 break;
2665 case INDEX_op_insn_start:
2666 break;
2667 case INDEX_op_discard:
2668 /* mark the temporary as dead */
2669 ts = arg_temp(op->args[0]);
2670 ts->state = TS_DEAD;
2671 la_reset_pref(ts);
2672 break;
2673
2674 case INDEX_op_add2_i32:
2675 opc_new = INDEX_op_add_i32;
2676 goto do_addsub2;
2677 case INDEX_op_sub2_i32:
2678 opc_new = INDEX_op_sub_i32;
2679 goto do_addsub2;
2680 case INDEX_op_add2_i64:
2681 opc_new = INDEX_op_add_i64;
2682 goto do_addsub2;
2683 case INDEX_op_sub2_i64:
2684 opc_new = INDEX_op_sub_i64;
2685 do_addsub2:
2686 nb_iargs = 4;
2687 nb_oargs = 2;
2688 /* Test if the high part of the operation is dead, but not
2689 the low part. The result can be optimized to a simple
2690 add or sub. This happens often for x86_64 guest when the
2691 cpu mode is set to 32 bit. */
2692 if (arg_temp(op->args[1])->state == TS_DEAD) {
2693 if (arg_temp(op->args[0])->state == TS_DEAD) {
2694 goto do_remove;
2695 }
2696 /* Replace the opcode and adjust the args in place,
2697 leaving 3 unused args at the end. */
2698 op->opc = opc = opc_new;
2699 op->args[1] = op->args[2];
2700 op->args[2] = op->args[4];
2701 /* Fall through and mark the single-word operation live. */
2702 nb_iargs = 2;
2703 nb_oargs = 1;
2704 }
2705 goto do_not_remove;
2706
2707 case INDEX_op_mulu2_i32:
2708 opc_new = INDEX_op_mul_i32;
2709 opc_new2 = INDEX_op_muluh_i32;
2710 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2711 goto do_mul2;
2712 case INDEX_op_muls2_i32:
2713 opc_new = INDEX_op_mul_i32;
2714 opc_new2 = INDEX_op_mulsh_i32;
2715 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2716 goto do_mul2;
2717 case INDEX_op_mulu2_i64:
2718 opc_new = INDEX_op_mul_i64;
2719 opc_new2 = INDEX_op_muluh_i64;
2720 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2721 goto do_mul2;
2722 case INDEX_op_muls2_i64:
2723 opc_new = INDEX_op_mul_i64;
2724 opc_new2 = INDEX_op_mulsh_i64;
2725 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2726 goto do_mul2;
2727 do_mul2:
2728 nb_iargs = 2;
2729 nb_oargs = 2;
2730 if (arg_temp(op->args[1])->state == TS_DEAD) {
2731 if (arg_temp(op->args[0])->state == TS_DEAD) {
2732 /* Both parts of the operation are dead. */
2733 goto do_remove;
2734 }
2735 /* The high part of the operation is dead; generate the low. */
2736 op->opc = opc = opc_new;
2737 op->args[1] = op->args[2];
2738 op->args[2] = op->args[3];
2739 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2740 /* The low part of the operation is dead; generate the high. */
2741 op->opc = opc = opc_new2;
2742 op->args[0] = op->args[1];
2743 op->args[1] = op->args[2];
2744 op->args[2] = op->args[3];
2745 } else {
2746 goto do_not_remove;
2747 }
2748 /* Mark the single-word operation live. */
2749 nb_oargs = 1;
2750 goto do_not_remove;
2751
2752 default:
2753 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2754 nb_iargs = def->nb_iargs;
2755 nb_oargs = def->nb_oargs;
2756
2757 /* Test if the operation can be removed because all
2758 its outputs are dead. We assume that nb_oargs == 0
2759 implies side effects */
2760 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2761 for (i = 0; i < nb_oargs; i++) {
2762 if (arg_temp(op->args[i])->state != TS_DEAD) {
2763 goto do_not_remove;
2764 }
2765 }
2766 goto do_remove;
2767 }
2768 goto do_not_remove;
2769
2770 do_remove:
2771 tcg_op_remove(s, op);
2772 break;
2773
2774 do_not_remove:
2775 for (i = 0; i < nb_oargs; i++) {
2776 ts = arg_temp(op->args[i]);
2777
2778 /* Remember the preference of the uses that followed. */
2779 op->output_pref[i] = *la_temp_pref(ts);
2780
2781 /* Output args are dead. */
2782 if (ts->state & TS_DEAD) {
2783 arg_life |= DEAD_ARG << i;
2784 }
2785 if (ts->state & TS_MEM) {
2786 arg_life |= SYNC_ARG << i;
2787 }
2788 ts->state = TS_DEAD;
2789 la_reset_pref(ts);
2790 }
2791
2792 /* If end of basic block, update. */
2793 if (def->flags & TCG_OPF_BB_EXIT) {
2794 la_func_end(s, nb_globals, nb_temps);
2795 } else if (def->flags & TCG_OPF_BB_END) {
2796 la_bb_end(s, nb_globals, nb_temps);
2797 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2798 la_global_sync(s, nb_globals);
2799 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2800 la_cross_call(s, nb_temps);
2801 }
2802 }
2803
2804 /* Record arguments that die in this opcode. */
2805 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2806 ts = arg_temp(op->args[i]);
2807 if (ts->state & TS_DEAD) {
2808 arg_life |= DEAD_ARG << i;
2809 }
2810 }
2811
2812 /* Input arguments are live for preceding opcodes. */
2813 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2814 ts = arg_temp(op->args[i]);
2815 if (ts->state & TS_DEAD) {
2816 /* For operands that were dead, initially allow
2817 all regs for the type. */
2818 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2819 ts->state &= ~TS_DEAD;
2820 }
2821 }
2822
2823 /* Incorporate constraints for this operand. */
2824 switch (opc) {
2825 case INDEX_op_mov_i32:
2826 case INDEX_op_mov_i64:
2827 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2828 have proper constraints. That said, special case
2829 moves to propagate preferences backward. */
2830 if (IS_DEAD_ARG(1)) {
2831 *la_temp_pref(arg_temp(op->args[0]))
2832 = *la_temp_pref(arg_temp(op->args[1]));
2833 }
2834 break;
2835
2836 default:
2837 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2838 const TCGArgConstraint *ct = &def->args_ct[i];
2839 TCGRegSet set, *pset;
2840
2841 ts = arg_temp(op->args[i]);
2842 pset = la_temp_pref(ts);
2843 set = *pset;
2844
2845 set &= ct->u.regs;
2846 if (ct->ct & TCG_CT_IALIAS) {
2847 set &= op->output_pref[ct->alias_index];
2848 }
2849 /* If the combination is not possible, restart. */
2850 if (set == 0) {
2851 set = ct->u.regs;
2852 }
2853 *pset = set;
2854 }
2855 break;
2856 }
2857 break;
2858 }
2859 op->life = arg_life;
2860 }
2861 }
2862
2863 /* Liveness analysis: Convert indirect regs to direct temporaries. */
liveness_pass_2(TCGContext * s)2864 static bool liveness_pass_2(TCGContext *s)
2865 {
2866 int nb_globals = s->nb_globals;
2867 int nb_temps, i;
2868 bool changes = false;
2869 TCGOp *op, *op_next;
2870
2871 /* Create a temporary for each indirect global. */
2872 for (i = 0; i < nb_globals; ++i) {
2873 TCGTemp *its = &s->temps[i];
2874 if (its->indirect_reg) {
2875 TCGTemp *dts = tcg_temp_alloc(s);
2876 dts->type = its->type;
2877 dts->base_type = its->base_type;
2878 its->state_ptr = dts;
2879 } else {
2880 its->state_ptr = NULL;
2881 }
2882 /* All globals begin dead. */
2883 its->state = TS_DEAD;
2884 }
2885 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2886 TCGTemp *its = &s->temps[i];
2887 its->state_ptr = NULL;
2888 its->state = TS_DEAD;
2889 }
2890
2891 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2892 TCGOpcode opc = op->opc;
2893 const TCGOpDef *def = &tcg_op_defs[opc];
2894 TCGLifeData arg_life = op->life;
2895 int nb_iargs, nb_oargs, call_flags;
2896 TCGTemp *arg_ts, *dir_ts;
2897
2898 if (opc == INDEX_op_call) {
2899 nb_oargs = TCGOP_CALLO(op);
2900 nb_iargs = TCGOP_CALLI(op);
2901 call_flags = op->args[nb_oargs + nb_iargs + 1];
2902 } else {
2903 nb_iargs = def->nb_iargs;
2904 nb_oargs = def->nb_oargs;
2905
2906 /* Set flags similar to how calls require. */
2907 if (def->flags & TCG_OPF_BB_END) {
2908 /* Like writing globals: save_globals */
2909 call_flags = 0;
2910 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2911 /* Like reading globals: sync_globals */
2912 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2913 } else {
2914 /* No effect on globals. */
2915 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2916 TCG_CALL_NO_WRITE_GLOBALS);
2917 }
2918 }
2919
2920 /* Make sure that input arguments are available. */
2921 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2922 arg_ts = arg_temp(op->args[i]);
2923 if (arg_ts) {
2924 dir_ts = arg_ts->state_ptr;
2925 if (dir_ts && arg_ts->state == TS_DEAD) {
2926 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2927 ? INDEX_op_ld_i32
2928 : INDEX_op_ld_i64);
2929 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2930
2931 lop->args[0] = temp_arg(dir_ts);
2932 lop->args[1] = temp_arg(arg_ts->mem_base);
2933 lop->args[2] = arg_ts->mem_offset;
2934
2935 /* Loaded, but synced with memory. */
2936 arg_ts->state = TS_MEM;
2937 }
2938 }
2939 }
2940
2941 /* Perform input replacement, and mark inputs that became dead.
2942 No action is required except keeping temp_state up to date
2943 so that we reload when needed. */
2944 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2945 arg_ts = arg_temp(op->args[i]);
2946 if (arg_ts) {
2947 dir_ts = arg_ts->state_ptr;
2948 if (dir_ts) {
2949 op->args[i] = temp_arg(dir_ts);
2950 changes = true;
2951 if (IS_DEAD_ARG(i)) {
2952 arg_ts->state = TS_DEAD;
2953 }
2954 }
2955 }
2956 }
2957
2958 /* Liveness analysis should ensure that the following are
2959 all correct, for call sites and basic block end points. */
2960 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2961 /* Nothing to do */
2962 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2963 for (i = 0; i < nb_globals; ++i) {
2964 /* Liveness should see that globals are synced back,
2965 that is, either TS_DEAD or TS_MEM. */
2966 arg_ts = &s->temps[i];
2967 tcg_debug_assert(arg_ts->state_ptr == 0
2968 || arg_ts->state != 0);
2969 }
2970 } else {
2971 for (i = 0; i < nb_globals; ++i) {
2972 /* Liveness should see that globals are saved back,
2973 that is, TS_DEAD, waiting to be reloaded. */
2974 arg_ts = &s->temps[i];
2975 tcg_debug_assert(arg_ts->state_ptr == 0
2976 || arg_ts->state == TS_DEAD);
2977 }
2978 }
2979
2980 /* Outputs become available. */
2981 for (i = 0; i < nb_oargs; i++) {
2982 arg_ts = arg_temp(op->args[i]);
2983 dir_ts = arg_ts->state_ptr;
2984 if (!dir_ts) {
2985 continue;
2986 }
2987 op->args[i] = temp_arg(dir_ts);
2988 changes = true;
2989
2990 /* The output is now live and modified. */
2991 arg_ts->state = 0;
2992
2993 /* Sync outputs upon their last write. */
2994 if (NEED_SYNC_ARG(i)) {
2995 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2996 ? INDEX_op_st_i32
2997 : INDEX_op_st_i64);
2998 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2999
3000 sop->args[0] = temp_arg(dir_ts);
3001 sop->args[1] = temp_arg(arg_ts->mem_base);
3002 sop->args[2] = arg_ts->mem_offset;
3003
3004 arg_ts->state = TS_MEM;
3005 }
3006 /* Drop outputs that are dead. */
3007 if (IS_DEAD_ARG(i)) {
3008 arg_ts->state = TS_DEAD;
3009 }
3010 }
3011 }
3012
3013 return changes;
3014 }
3015
3016 #ifdef CONFIG_DEBUG_TCG
dump_regs(TCGContext * s)3017 static void dump_regs(TCGContext *s)
3018 {
3019 TCGTemp *ts;
3020 int i;
3021 char buf[64];
3022
3023 for(i = 0; i < s->nb_temps; i++) {
3024 ts = &s->temps[i];
3025 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3026 switch(ts->val_type) {
3027 case TEMP_VAL_REG:
3028 printf("%s", tcg_target_reg_names[ts->reg]);
3029 break;
3030 case TEMP_VAL_MEM:
3031 printf("%d(%s)", (int)ts->mem_offset,
3032 tcg_target_reg_names[ts->mem_base->reg]);
3033 break;
3034 case TEMP_VAL_CONST:
3035 printf("$0x%" TCG_PRIlx, ts->val);
3036 break;
3037 case TEMP_VAL_DEAD:
3038 printf("D");
3039 break;
3040 default:
3041 printf("???");
3042 break;
3043 }
3044 printf("\n");
3045 }
3046
3047 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3048 if (s->reg_to_temp[i] != NULL) {
3049 printf("%s: %s\n",
3050 tcg_target_reg_names[i],
3051 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3052 }
3053 }
3054 }
3055
check_regs(TCGContext * s)3056 static void check_regs(TCGContext *s)
3057 {
3058 int reg;
3059 int k;
3060 TCGTemp *ts;
3061 char buf[64];
3062
3063 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3064 ts = s->reg_to_temp[reg];
3065 if (ts != NULL) {
3066 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3067 printf("Inconsistency for register %s:\n",
3068 tcg_target_reg_names[reg]);
3069 goto fail;
3070 }
3071 }
3072 }
3073 for (k = 0; k < s->nb_temps; k++) {
3074 ts = &s->temps[k];
3075 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3076 && s->reg_to_temp[ts->reg] != ts) {
3077 printf("Inconsistency for temp %s:\n",
3078 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3079 fail:
3080 printf("reg state:\n");
3081 dump_regs(s);
3082 tcg_abort();
3083 }
3084 }
3085 }
3086 #endif
3087
temp_allocate_frame(TCGContext * s,TCGTemp * ts)3088 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3089 {
3090 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3091 /* Sparc64 stack is accessed with offset of 2047 */
3092 s->current_frame_offset = (s->current_frame_offset +
3093 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3094 ~(sizeof(tcg_target_long) - 1);
3095 #endif
3096 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3097 s->frame_end) {
3098 tcg_abort();
3099 }
3100 ts->mem_offset = s->current_frame_offset;
3101 ts->mem_base = s->frame_temp;
3102 ts->mem_allocated = 1;
3103 s->current_frame_offset += sizeof(tcg_target_long);
3104 }
3105
3106 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3107
3108 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3109 mark it free; otherwise mark it dead. */
temp_free_or_dead(TCGContext * s,TCGTemp * ts,int free_or_dead)3110 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3111 {
3112 if (ts->fixed_reg) {
3113 return;
3114 }
3115 if (ts->val_type == TEMP_VAL_REG) {
3116 s->reg_to_temp[ts->reg] = NULL;
3117 }
3118 ts->val_type = (free_or_dead < 0
3119 || ts->temp_local
3120 || ts->temp_global
3121 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3122 }
3123
3124 /* Mark a temporary as dead. */
temp_dead(TCGContext * s,TCGTemp * ts)3125 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3126 {
3127 temp_free_or_dead(s, ts, 1);
3128 }
3129
3130 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3131 registers needs to be allocated to store a constant. If 'free_or_dead'
3132 is non-zero, subsequently release the temporary; if it is positive, the
3133 temp is dead; if it is negative, the temp is free. */
temp_sync(TCGContext * s,TCGTemp * ts,TCGRegSet allocated_regs,TCGRegSet preferred_regs,int free_or_dead)3134 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3135 TCGRegSet preferred_regs, int free_or_dead)
3136 {
3137 if (ts->fixed_reg) {
3138 return;
3139 }
3140 if (!ts->mem_coherent) {
3141 if (!ts->mem_allocated) {
3142 temp_allocate_frame(s, ts);
3143 }
3144 switch (ts->val_type) {
3145 case TEMP_VAL_CONST:
3146 /* If we're going to free the temp immediately, then we won't
3147 require it later in a register, so attempt to store the
3148 constant to memory directly. */
3149 if (free_or_dead
3150 && tcg_out_sti(s, ts->type, ts->val,
3151 ts->mem_base->reg, ts->mem_offset)) {
3152 break;
3153 }
3154 temp_load(s, ts, tcg_target_available_regs[ts->type],
3155 allocated_regs, preferred_regs);
3156 /* fallthrough */
3157
3158 case TEMP_VAL_REG:
3159 tcg_out_st(s, ts->type, ts->reg,
3160 ts->mem_base->reg, ts->mem_offset);
3161 break;
3162
3163 case TEMP_VAL_MEM:
3164 break;
3165
3166 case TEMP_VAL_DEAD:
3167 default:
3168 tcg_abort();
3169 }
3170 ts->mem_coherent = 1;
3171 }
3172 if (free_or_dead) {
3173 temp_free_or_dead(s, ts, free_or_dead);
3174 }
3175 }
3176
3177 /* free register 'reg' by spilling the corresponding temporary if necessary */
tcg_reg_free(TCGContext * s,TCGReg reg,TCGRegSet allocated_regs)3178 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3179 {
3180 TCGTemp *ts = s->reg_to_temp[reg];
3181 if (ts != NULL) {
3182 temp_sync(s, ts, allocated_regs, 0, -1);
3183 }
3184 }
3185
3186 /**
3187 * tcg_reg_alloc:
3188 * @required_regs: Set of registers in which we must allocate.
3189 * @allocated_regs: Set of registers which must be avoided.
3190 * @preferred_regs: Set of registers we should prefer.
3191 * @rev: True if we search the registers in "indirect" order.
3192 *
3193 * The allocated register must be in @required_regs & ~@allocated_regs,
3194 * but if we can put it in @preferred_regs we may save a move later.
3195 */
tcg_reg_alloc(TCGContext * s,TCGRegSet required_regs,TCGRegSet allocated_regs,TCGRegSet preferred_regs,bool rev)3196 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3197 TCGRegSet allocated_regs,
3198 TCGRegSet preferred_regs, bool rev)
3199 {
3200 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3201 TCGRegSet reg_ct[2];
3202 const int *order;
3203
3204 reg_ct[1] = required_regs & ~allocated_regs;
3205 tcg_debug_assert(reg_ct[1] != 0);
3206 reg_ct[0] = reg_ct[1] & preferred_regs;
3207
3208 /* Skip the preferred_regs option if it cannot be satisfied,
3209 or if the preference made no difference. */
3210 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3211
3212 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3213
3214 /* Try free registers, preferences first. */
3215 for (j = f; j < 2; j++) {
3216 TCGRegSet set = reg_ct[j];
3217
3218 if (tcg_regset_single(set)) {
3219 /* One register in the set. */
3220 TCGReg reg = tcg_regset_first(set);
3221 if (s->reg_to_temp[reg] == NULL) {
3222 return reg;
3223 }
3224 } else {
3225 for (i = 0; i < n; i++) {
3226 TCGReg reg = order[i];
3227 if (s->reg_to_temp[reg] == NULL &&
3228 tcg_regset_test_reg(set, reg)) {
3229 return reg;
3230 }
3231 }
3232 }
3233 }
3234
3235 /* We must spill something. */
3236 for (j = f; j < 2; j++) {
3237 TCGRegSet set = reg_ct[j];
3238
3239 if (tcg_regset_single(set)) {
3240 /* One register in the set. */
3241 TCGReg reg = tcg_regset_first(set);
3242 tcg_reg_free(s, reg, allocated_regs);
3243 return reg;
3244 } else {
3245 for (i = 0; i < n; i++) {
3246 TCGReg reg = order[i];
3247 if (tcg_regset_test_reg(set, reg)) {
3248 tcg_reg_free(s, reg, allocated_regs);
3249 return reg;
3250 }
3251 }
3252 }
3253 }
3254
3255 tcg_abort();
3256 }
3257
3258 /* Make sure the temporary is in a register. If needed, allocate the register
3259 from DESIRED while avoiding ALLOCATED. */
temp_load(TCGContext * s,TCGTemp * ts,TCGRegSet desired_regs,TCGRegSet allocated_regs,TCGRegSet preferred_regs)3260 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3261 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3262 {
3263 TCGReg reg;
3264
3265 switch (ts->val_type) {
3266 case TEMP_VAL_REG:
3267 return;
3268 case TEMP_VAL_CONST:
3269 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3270 preferred_regs, ts->indirect_base);
3271 tcg_out_movi(s, ts->type, reg, ts->val);
3272 ts->mem_coherent = 0;
3273 break;
3274 case TEMP_VAL_MEM:
3275 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3276 preferred_regs, ts->indirect_base);
3277 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3278 ts->mem_coherent = 1;
3279 break;
3280 case TEMP_VAL_DEAD:
3281 default:
3282 tcg_abort();
3283 }
3284 ts->reg = reg;
3285 ts->val_type = TEMP_VAL_REG;
3286 s->reg_to_temp[reg] = ts;
3287 }
3288
3289 /* Save a temporary to memory. 'allocated_regs' is used in case a
3290 temporary registers needs to be allocated to store a constant. */
temp_save(TCGContext * s,TCGTemp * ts,TCGRegSet allocated_regs)3291 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3292 {
3293 /* The liveness analysis already ensures that globals are back
3294 in memory. Keep an tcg_debug_assert for safety. */
3295 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3296 }
3297
3298 /* save globals to their canonical location and assume they can be
3299 modified be the following code. 'allocated_regs' is used in case a
3300 temporary registers needs to be allocated to store a constant. */
save_globals(TCGContext * s,TCGRegSet allocated_regs)3301 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3302 {
3303 int i, n;
3304
3305 for (i = 0, n = s->nb_globals; i < n; i++) {
3306 temp_save(s, &s->temps[i], allocated_regs);
3307 }
3308 }
3309
3310 /* sync globals to their canonical location and assume they can be
3311 read by the following code. 'allocated_regs' is used in case a
3312 temporary registers needs to be allocated to store a constant. */
sync_globals(TCGContext * s,TCGRegSet allocated_regs)3313 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3314 {
3315 int i, n;
3316
3317 for (i = 0, n = s->nb_globals; i < n; i++) {
3318 TCGTemp *ts = &s->temps[i];
3319 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3320 || ts->fixed_reg
3321 || ts->mem_coherent);
3322 }
3323 }
3324
3325 /* at the end of a basic block, we assume all temporaries are dead and
3326 all globals are stored at their canonical location. */
tcg_reg_alloc_bb_end(TCGContext * s,TCGRegSet allocated_regs)3327 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3328 {
3329 int i;
3330
3331 for (i = s->nb_globals; i < s->nb_temps; i++) {
3332 TCGTemp *ts = &s->temps[i];
3333 if (ts->temp_local) {
3334 temp_save(s, ts, allocated_regs);
3335 } else {
3336 /* The liveness analysis already ensures that temps are dead.
3337 Keep an tcg_debug_assert for safety. */
3338 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3339 }
3340 }
3341
3342 save_globals(s, allocated_regs);
3343 }
3344
3345 /*
3346 * Specialized code generation for INDEX_op_movi_*.
3347 */
tcg_reg_alloc_do_movi(TCGContext * s,TCGTemp * ots,tcg_target_ulong val,TCGLifeData arg_life,TCGRegSet preferred_regs)3348 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3349 tcg_target_ulong val, TCGLifeData arg_life,
3350 TCGRegSet preferred_regs)
3351 {
3352 /* ENV should not be modified. */
3353 tcg_debug_assert(!ots->fixed_reg);
3354
3355 /* The movi is not explicitly generated here. */
3356 if (ots->val_type == TEMP_VAL_REG) {
3357 s->reg_to_temp[ots->reg] = NULL;
3358 }
3359 ots->val_type = TEMP_VAL_CONST;
3360 ots->val = val;
3361 ots->mem_coherent = 0;
3362 if (NEED_SYNC_ARG(0)) {
3363 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3364 } else if (IS_DEAD_ARG(0)) {
3365 temp_dead(s, ots);
3366 }
3367 }
3368
tcg_reg_alloc_movi(TCGContext * s,const TCGOp * op)3369 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3370 {
3371 TCGTemp *ots = arg_temp(op->args[0]);
3372 tcg_target_ulong val = op->args[1];
3373
3374 tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3375 }
3376
3377 /*
3378 * Specialized code generation for INDEX_op_mov_*.
3379 */
tcg_reg_alloc_mov(TCGContext * s,const TCGOp * op)3380 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3381 {
3382 const TCGLifeData arg_life = op->life;
3383 TCGRegSet allocated_regs, preferred_regs;
3384 TCGTemp *ts, *ots;
3385 TCGType otype, itype;
3386
3387 allocated_regs = s->reserved_regs;
3388 preferred_regs = op->output_pref[0];
3389 ots = arg_temp(op->args[0]);
3390 ts = arg_temp(op->args[1]);
3391
3392 /* ENV should not be modified. */
3393 tcg_debug_assert(!ots->fixed_reg);
3394
3395 /* Note that otype != itype for no-op truncation. */
3396 otype = ots->type;
3397 itype = ts->type;
3398
3399 if (ts->val_type == TEMP_VAL_CONST) {
3400 /* propagate constant or generate sti */
3401 tcg_target_ulong val = ts->val;
3402 if (IS_DEAD_ARG(1)) {
3403 temp_dead(s, ts);
3404 }
3405 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3406 return;
3407 }
3408
3409 /* If the source value is in memory we're going to be forced
3410 to have it in a register in order to perform the copy. Copy
3411 the SOURCE value into its own register first, that way we
3412 don't have to reload SOURCE the next time it is used. */
3413 if (ts->val_type == TEMP_VAL_MEM) {
3414 temp_load(s, ts, tcg_target_available_regs[itype],
3415 allocated_regs, preferred_regs);
3416 }
3417
3418 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3419 if (IS_DEAD_ARG(0)) {
3420 /* mov to a non-saved dead register makes no sense (even with
3421 liveness analysis disabled). */
3422 tcg_debug_assert(NEED_SYNC_ARG(0));
3423 if (!ots->mem_allocated) {
3424 temp_allocate_frame(s, ots);
3425 }
3426 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3427 if (IS_DEAD_ARG(1)) {
3428 temp_dead(s, ts);
3429 }
3430 temp_dead(s, ots);
3431 } else {
3432 if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3433 /* the mov can be suppressed */
3434 if (ots->val_type == TEMP_VAL_REG) {
3435 s->reg_to_temp[ots->reg] = NULL;
3436 }
3437 ots->reg = ts->reg;
3438 temp_dead(s, ts);
3439 } else {
3440 if (ots->val_type != TEMP_VAL_REG) {
3441 /* When allocating a new register, make sure to not spill the
3442 input one. */
3443 tcg_regset_set_reg(allocated_regs, ts->reg);
3444 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3445 allocated_regs, preferred_regs,
3446 ots->indirect_base);
3447 }
3448 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3449 /*
3450 * Cross register class move not supported.
3451 * Store the source register into the destination slot
3452 * and leave the destination temp as TEMP_VAL_MEM.
3453 */
3454 assert(!ots->fixed_reg);
3455 if (!ts->mem_allocated) {
3456 temp_allocate_frame(s, ots);
3457 }
3458 tcg_out_st(s, ts->type, ts->reg,
3459 ots->mem_base->reg, ots->mem_offset);
3460 ots->mem_coherent = 1;
3461 temp_free_or_dead(s, ots, -1);
3462 return;
3463 }
3464 }
3465 ots->val_type = TEMP_VAL_REG;
3466 ots->mem_coherent = 0;
3467 s->reg_to_temp[ots->reg] = ots;
3468 if (NEED_SYNC_ARG(0)) {
3469 temp_sync(s, ots, allocated_regs, 0, 0);
3470 }
3471 }
3472 }
3473
3474 /*
3475 * Specialized code generation for INDEX_op_dup_vec.
3476 */
tcg_reg_alloc_dup(TCGContext * s,const TCGOp * op)3477 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3478 {
3479 const TCGLifeData arg_life = op->life;
3480 TCGRegSet dup_out_regs, dup_in_regs;
3481 TCGTemp *its, *ots;
3482 TCGType itype, vtype;
3483 intptr_t endian_fixup;
3484 unsigned vece;
3485 bool ok;
3486
3487 ots = arg_temp(op->args[0]);
3488 its = arg_temp(op->args[1]);
3489
3490 /* ENV should not be modified. */
3491 tcg_debug_assert(!ots->fixed_reg);
3492
3493 itype = its->type;
3494 vece = TCGOP_VECE(op);
3495 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3496
3497 if (its->val_type == TEMP_VAL_CONST) {
3498 /* Propagate constant via movi -> dupi. */
3499 tcg_target_ulong val = its->val;
3500 if (IS_DEAD_ARG(1)) {
3501 temp_dead(s, its);
3502 }
3503 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3504 return;
3505 }
3506
3507 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3508 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3509
3510 /* Allocate the output register now. */
3511 if (ots->val_type != TEMP_VAL_REG) {
3512 TCGRegSet allocated_regs = s->reserved_regs;
3513
3514 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3515 /* Make sure to not spill the input register. */
3516 tcg_regset_set_reg(allocated_regs, its->reg);
3517 }
3518 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3519 op->output_pref[0], ots->indirect_base);
3520 ots->val_type = TEMP_VAL_REG;
3521 ots->mem_coherent = 0;
3522 s->reg_to_temp[ots->reg] = ots;
3523 }
3524
3525 switch (its->val_type) {
3526 case TEMP_VAL_REG:
3527 /*
3528 * The dup constriaints must be broad, covering all possible VECE.
3529 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3530 * to fail, indicating that extra moves are required for that case.
3531 */
3532 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3533 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3534 goto done;
3535 }
3536 /* Try again from memory or a vector input register. */
3537 }
3538 if (!its->mem_coherent) {
3539 /*
3540 * The input register is not synced, and so an extra store
3541 * would be required to use memory. Attempt an integer-vector
3542 * register move first. We do not have a TCGRegSet for this.
3543 */
3544 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3545 break;
3546 }
3547 /* Sync the temp back to its slot and load from there. */
3548 temp_sync(s, its, s->reserved_regs, 0, 0);
3549 }
3550 /* fall through */
3551
3552 case TEMP_VAL_MEM:
3553 #ifdef HOST_WORDS_BIGENDIAN
3554 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3555 endian_fixup -= 1 << vece;
3556 #else
3557 endian_fixup = 0;
3558 #endif
3559 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3560 its->mem_offset + endian_fixup)) {
3561 goto done;
3562 }
3563 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3564 break;
3565
3566 default:
3567 g_assert_not_reached();
3568 }
3569
3570 /* We now have a vector input register, so dup must succeed. */
3571 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3572 tcg_debug_assert(ok);
3573
3574 done:
3575 if (IS_DEAD_ARG(1)) {
3576 temp_dead(s, its);
3577 }
3578 if (NEED_SYNC_ARG(0)) {
3579 temp_sync(s, ots, s->reserved_regs, 0, 0);
3580 }
3581 if (IS_DEAD_ARG(0)) {
3582 temp_dead(s, ots);
3583 }
3584 }
3585
tcg_reg_alloc_op(TCGContext * s,const TCGOp * op)3586 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3587 {
3588 const TCGLifeData arg_life = op->life;
3589 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3590 TCGRegSet i_allocated_regs;
3591 TCGRegSet o_allocated_regs;
3592 int i, k, nb_iargs, nb_oargs;
3593 TCGReg reg;
3594 TCGArg arg;
3595 const TCGArgConstraint *arg_ct;
3596 TCGTemp *ts;
3597 TCGArg new_args[TCG_MAX_OP_ARGS];
3598 int const_args[TCG_MAX_OP_ARGS];
3599
3600 nb_oargs = def->nb_oargs;
3601 nb_iargs = def->nb_iargs;
3602
3603 /* copy constants */
3604 memcpy(new_args + nb_oargs + nb_iargs,
3605 op->args + nb_oargs + nb_iargs,
3606 sizeof(TCGArg) * def->nb_cargs);
3607
3608 i_allocated_regs = s->reserved_regs;
3609 o_allocated_regs = s->reserved_regs;
3610
3611 /* satisfy input constraints */
3612 for (k = 0; k < nb_iargs; k++) {
3613 TCGRegSet i_preferred_regs, o_preferred_regs;
3614
3615 i = def->sorted_args[nb_oargs + k];
3616 arg = op->args[i];
3617 arg_ct = &def->args_ct[i];
3618 ts = arg_temp(arg);
3619
3620 if (ts->val_type == TEMP_VAL_CONST
3621 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3622 /* constant is OK for instruction */
3623 const_args[i] = 1;
3624 new_args[i] = ts->val;
3625 continue;
3626 }
3627
3628 i_preferred_regs = o_preferred_regs = 0;
3629 if (arg_ct->ct & TCG_CT_IALIAS) {
3630 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3631 if (ts->fixed_reg) {
3632 /* if fixed register, we must allocate a new register
3633 if the alias is not the same register */
3634 if (arg != op->args[arg_ct->alias_index]) {
3635 goto allocate_in_reg;
3636 }
3637 } else {
3638 /* if the input is aliased to an output and if it is
3639 not dead after the instruction, we must allocate
3640 a new register and move it */
3641 if (!IS_DEAD_ARG(i)) {
3642 goto allocate_in_reg;
3643 }
3644
3645 /* check if the current register has already been allocated
3646 for another input aliased to an output */
3647 if (ts->val_type == TEMP_VAL_REG) {
3648 int k2, i2;
3649 reg = ts->reg;
3650 for (k2 = 0 ; k2 < k ; k2++) {
3651 i2 = def->sorted_args[nb_oargs + k2];
3652 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3653 reg == new_args[i2]) {
3654 goto allocate_in_reg;
3655 }
3656 }
3657 }
3658 i_preferred_regs = o_preferred_regs;
3659 }
3660 }
3661
3662 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3663 reg = ts->reg;
3664
3665 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3666 /* nothing to do : the constraint is satisfied */
3667 } else {
3668 allocate_in_reg:
3669 /* allocate a new register matching the constraint
3670 and move the temporary register into it */
3671 temp_load(s, ts, tcg_target_available_regs[ts->type],
3672 i_allocated_regs, 0);
3673 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3674 o_preferred_regs, ts->indirect_base);
3675 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3676 /*
3677 * Cross register class move not supported. Sync the
3678 * temp back to its slot and load from there.
3679 */
3680 temp_sync(s, ts, i_allocated_regs, 0, 0);
3681 tcg_out_ld(s, ts->type, reg,
3682 ts->mem_base->reg, ts->mem_offset);
3683 }
3684 }
3685 new_args[i] = reg;
3686 const_args[i] = 0;
3687 tcg_regset_set_reg(i_allocated_regs, reg);
3688 }
3689
3690 /* mark dead temporaries and free the associated registers */
3691 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3692 if (IS_DEAD_ARG(i)) {
3693 temp_dead(s, arg_temp(op->args[i]));
3694 }
3695 }
3696
3697 if (def->flags & TCG_OPF_BB_END) {
3698 tcg_reg_alloc_bb_end(s, i_allocated_regs);
3699 } else {
3700 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3701 /* XXX: permit generic clobber register list ? */
3702 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3703 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3704 tcg_reg_free(s, i, i_allocated_regs);
3705 }
3706 }
3707 }
3708 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3709 /* sync globals if the op has side effects and might trigger
3710 an exception. */
3711 sync_globals(s, i_allocated_regs);
3712 }
3713
3714 /* satisfy the output constraints */
3715 for(k = 0; k < nb_oargs; k++) {
3716 i = def->sorted_args[k];
3717 arg = op->args[i];
3718 arg_ct = &def->args_ct[i];
3719 ts = arg_temp(arg);
3720
3721 /* ENV should not be modified. */
3722 tcg_debug_assert(!ts->fixed_reg);
3723
3724 if ((arg_ct->ct & TCG_CT_ALIAS)
3725 && !const_args[arg_ct->alias_index]) {
3726 reg = new_args[arg_ct->alias_index];
3727 } else if (arg_ct->ct & TCG_CT_NEWREG) {
3728 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3729 i_allocated_regs | o_allocated_regs,
3730 op->output_pref[k], ts->indirect_base);
3731 } else {
3732 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3733 op->output_pref[k], ts->indirect_base);
3734 }
3735 tcg_regset_set_reg(o_allocated_regs, reg);
3736 if (ts->val_type == TEMP_VAL_REG) {
3737 s->reg_to_temp[ts->reg] = NULL;
3738 }
3739 ts->val_type = TEMP_VAL_REG;
3740 ts->reg = reg;
3741 /*
3742 * Temp value is modified, so the value kept in memory is
3743 * potentially not the same.
3744 */
3745 ts->mem_coherent = 0;
3746 s->reg_to_temp[reg] = ts;
3747 new_args[i] = reg;
3748 }
3749 }
3750
3751 /* emit instruction */
3752 if (def->flags & TCG_OPF_VECTOR) {
3753 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3754 new_args, const_args);
3755 } else {
3756 tcg_out_op(s, op->opc, new_args, const_args);
3757 }
3758
3759 /* move the outputs in the correct register if needed */
3760 for(i = 0; i < nb_oargs; i++) {
3761 ts = arg_temp(op->args[i]);
3762
3763 /* ENV should not be modified. */
3764 tcg_debug_assert(!ts->fixed_reg);
3765
3766 if (NEED_SYNC_ARG(i)) {
3767 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3768 } else if (IS_DEAD_ARG(i)) {
3769 temp_dead(s, ts);
3770 }
3771 }
3772 }
3773
3774 #ifdef TCG_TARGET_STACK_GROWSUP
3775 #define STACK_DIR(x) (-(x))
3776 #else
3777 #define STACK_DIR(x) (x)
3778 #endif
3779
tcg_reg_alloc_call(TCGContext * s,TCGOp * op)3780 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3781 {
3782 const int nb_oargs = TCGOP_CALLO(op);
3783 const int nb_iargs = TCGOP_CALLI(op);
3784 const TCGLifeData arg_life = op->life;
3785 int flags, nb_regs, i;
3786 TCGReg reg;
3787 TCGArg arg;
3788 TCGTemp *ts;
3789 intptr_t stack_offset;
3790 size_t call_stack_size;
3791 tcg_insn_unit *func_addr;
3792 int allocate_args;
3793 TCGRegSet allocated_regs;
3794
3795 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3796 flags = op->args[nb_oargs + nb_iargs + 1];
3797
3798 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3799 if (nb_regs > nb_iargs) {
3800 nb_regs = nb_iargs;
3801 }
3802
3803 /* assign stack slots first */
3804 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3805 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3806 ~(TCG_TARGET_STACK_ALIGN - 1);
3807 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3808 if (allocate_args) {
3809 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3810 preallocate call stack */
3811 tcg_abort();
3812 }
3813
3814 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3815 for (i = nb_regs; i < nb_iargs; i++) {
3816 arg = op->args[nb_oargs + i];
3817 #ifdef TCG_TARGET_STACK_GROWSUP
3818 stack_offset -= sizeof(tcg_target_long);
3819 #endif
3820 if (arg != TCG_CALL_DUMMY_ARG) {
3821 ts = arg_temp(arg);
3822 temp_load(s, ts, tcg_target_available_regs[ts->type],
3823 s->reserved_regs, 0);
3824 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3825 }
3826 #ifndef TCG_TARGET_STACK_GROWSUP
3827 stack_offset += sizeof(tcg_target_long);
3828 #endif
3829 }
3830
3831 /* assign input registers */
3832 allocated_regs = s->reserved_regs;
3833 for (i = 0; i < nb_regs; i++) {
3834 arg = op->args[nb_oargs + i];
3835 if (arg != TCG_CALL_DUMMY_ARG) {
3836 ts = arg_temp(arg);
3837 reg = tcg_target_call_iarg_regs[i];
3838
3839 if (ts->val_type == TEMP_VAL_REG) {
3840 if (ts->reg != reg) {
3841 tcg_reg_free(s, reg, allocated_regs);
3842 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3843 /*
3844 * Cross register class move not supported. Sync the
3845 * temp back to its slot and load from there.
3846 */
3847 temp_sync(s, ts, allocated_regs, 0, 0);
3848 tcg_out_ld(s, ts->type, reg,
3849 ts->mem_base->reg, ts->mem_offset);
3850 }
3851 }
3852 } else {
3853 TCGRegSet arg_set = 0;
3854
3855 tcg_reg_free(s, reg, allocated_regs);
3856 tcg_regset_set_reg(arg_set, reg);
3857 temp_load(s, ts, arg_set, allocated_regs, 0);
3858 }
3859
3860 tcg_regset_set_reg(allocated_regs, reg);
3861 }
3862 }
3863
3864 /* mark dead temporaries and free the associated registers */
3865 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3866 if (IS_DEAD_ARG(i)) {
3867 temp_dead(s, arg_temp(op->args[i]));
3868 }
3869 }
3870
3871 /* clobber call registers */
3872 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3873 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3874 tcg_reg_free(s, i, allocated_regs);
3875 }
3876 }
3877
3878 /* Save globals if they might be written by the helper, sync them if
3879 they might be read. */
3880 if (flags & TCG_CALL_NO_READ_GLOBALS) {
3881 /* Nothing to do */
3882 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3883 sync_globals(s, allocated_regs);
3884 } else {
3885 save_globals(s, allocated_regs);
3886 }
3887
3888 tcg_out_call(s, func_addr);
3889
3890 /* assign output registers and emit moves if needed */
3891 for(i = 0; i < nb_oargs; i++) {
3892 arg = op->args[i];
3893 ts = arg_temp(arg);
3894
3895 /* ENV should not be modified. */
3896 tcg_debug_assert(!ts->fixed_reg);
3897
3898 reg = tcg_target_call_oarg_regs[i];
3899 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3900 if (ts->val_type == TEMP_VAL_REG) {
3901 s->reg_to_temp[ts->reg] = NULL;
3902 }
3903 ts->val_type = TEMP_VAL_REG;
3904 ts->reg = reg;
3905 ts->mem_coherent = 0;
3906 s->reg_to_temp[reg] = ts;
3907 if (NEED_SYNC_ARG(i)) {
3908 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3909 } else if (IS_DEAD_ARG(i)) {
3910 temp_dead(s, ts);
3911 }
3912 }
3913 }
3914
3915 #ifdef CONFIG_PROFILER
3916
3917 /* avoid copy/paste errors */
3918 #define PROF_ADD(to, from, field) \
3919 do { \
3920 (to)->field += atomic_read(&((from)->field)); \
3921 } while (0)
3922
3923 #define PROF_MAX(to, from, field) \
3924 do { \
3925 typeof((from)->field) val__ = atomic_read(&((from)->field)); \
3926 if (val__ > (to)->field) { \
3927 (to)->field = val__; \
3928 } \
3929 } while (0)
3930
3931 /* Pass in a zero'ed @prof */
3932 static inline
tcg_profile_snapshot(TCGProfile * prof,bool counters,bool table)3933 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3934 {
3935 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3936 unsigned int i;
3937
3938 for (i = 0; i < n_ctxs; i++) {
3939 TCGContext *s = atomic_read(&tcg_ctxs[i]);
3940 const TCGProfile *orig = &s->prof;
3941
3942 if (counters) {
3943 PROF_ADD(prof, orig, cpu_exec_time);
3944 PROF_ADD(prof, orig, tb_count1);
3945 PROF_ADD(prof, orig, tb_count);
3946 PROF_ADD(prof, orig, op_count);
3947 PROF_MAX(prof, orig, op_count_max);
3948 PROF_ADD(prof, orig, temp_count);
3949 PROF_MAX(prof, orig, temp_count_max);
3950 PROF_ADD(prof, orig, del_op_count);
3951 PROF_ADD(prof, orig, code_in_len);
3952 PROF_ADD(prof, orig, code_out_len);
3953 PROF_ADD(prof, orig, search_out_len);
3954 PROF_ADD(prof, orig, interm_time);
3955 PROF_ADD(prof, orig, code_time);
3956 PROF_ADD(prof, orig, la_time);
3957 PROF_ADD(prof, orig, opt_time);
3958 PROF_ADD(prof, orig, restore_count);
3959 PROF_ADD(prof, orig, restore_time);
3960 }
3961 if (table) {
3962 int i;
3963
3964 for (i = 0; i < NB_OPS; i++) {
3965 PROF_ADD(prof, orig, table_op_count[i]);
3966 }
3967 }
3968 }
3969 }
3970
3971 #undef PROF_ADD
3972 #undef PROF_MAX
3973
tcg_profile_snapshot_counters(TCGProfile * prof)3974 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3975 {
3976 tcg_profile_snapshot(prof, true, false);
3977 }
3978
tcg_profile_snapshot_table(TCGProfile * prof)3979 static void tcg_profile_snapshot_table(TCGProfile *prof)
3980 {
3981 tcg_profile_snapshot(prof, false, true);
3982 }
3983
tcg_dump_op_count(void)3984 void tcg_dump_op_count(void)
3985 {
3986 TCGProfile prof = {};
3987 int i;
3988
3989 tcg_profile_snapshot_table(&prof);
3990 for (i = 0; i < NB_OPS; i++) {
3991 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3992 prof.table_op_count[i]);
3993 }
3994 }
3995
tcg_cpu_exec_time(void)3996 int64_t tcg_cpu_exec_time(void)
3997 {
3998 unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3999 unsigned int i;
4000 int64_t ret = 0;
4001
4002 for (i = 0; i < n_ctxs; i++) {
4003 const TCGContext *s = atomic_read(&tcg_ctxs[i]);
4004 const TCGProfile *prof = &s->prof;
4005
4006 ret += atomic_read(&prof->cpu_exec_time);
4007 }
4008 return ret;
4009 }
4010 #else
tcg_dump_op_count(void)4011 void tcg_dump_op_count(void)
4012 {
4013 qemu_printf("[TCG profiler not compiled]\n");
4014 }
4015
tcg_cpu_exec_time(void)4016 int64_t tcg_cpu_exec_time(void)
4017 {
4018 error_report("%s: TCG profiler not compiled", __func__);
4019 exit(EXIT_FAILURE);
4020 }
4021 #endif
4022
4023
tcg_gen_code(TCGContext * s,TranslationBlock * tb)4024 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4025 {
4026 #ifdef CONFIG_PROFILER
4027 TCGProfile *prof = &s->prof;
4028 #endif
4029 int i, num_insns;
4030 TCGOp *op;
4031
4032 #ifdef CONFIG_PROFILER
4033 {
4034 int n = 0;
4035
4036 QTAILQ_FOREACH(op, &s->ops, link) {
4037 n++;
4038 }
4039 atomic_set(&prof->op_count, prof->op_count + n);
4040 if (n > prof->op_count_max) {
4041 atomic_set(&prof->op_count_max, n);
4042 }
4043
4044 n = s->nb_temps;
4045 atomic_set(&prof->temp_count, prof->temp_count + n);
4046 if (n > prof->temp_count_max) {
4047 atomic_set(&prof->temp_count_max, n);
4048 }
4049 }
4050 #endif
4051
4052 #ifdef DEBUG_DISAS
4053 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4054 && qemu_log_in_addr_range(tb->pc))) {
4055 FILE *logfile = qemu_log_lock();
4056 qemu_log("OP:\n");
4057 tcg_dump_ops(s, false);
4058 qemu_log("\n");
4059 qemu_log_unlock(logfile);
4060 }
4061 #endif
4062
4063 #ifdef CONFIG_DEBUG_TCG
4064 /* Ensure all labels referenced have been emitted. */
4065 {
4066 TCGLabel *l;
4067 bool error = false;
4068
4069 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4070 if (unlikely(!l->present) && l->refs) {
4071 qemu_log_mask(CPU_LOG_TB_OP,
4072 "$L%d referenced but not present.\n", l->id);
4073 error = true;
4074 }
4075 }
4076 assert(!error);
4077 }
4078 #endif
4079
4080 #ifdef CONFIG_PROFILER
4081 atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4082 #endif
4083
4084 #ifdef USE_TCG_OPTIMIZATIONS
4085 tcg_optimize(s);
4086 #endif
4087
4088 #ifdef CONFIG_PROFILER
4089 atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4090 atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4091 #endif
4092
4093 reachable_code_pass(s);
4094 liveness_pass_1(s);
4095
4096 if (s->nb_indirects > 0) {
4097 #ifdef DEBUG_DISAS
4098 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4099 && qemu_log_in_addr_range(tb->pc))) {
4100 FILE *logfile = qemu_log_lock();
4101 qemu_log("OP before indirect lowering:\n");
4102 tcg_dump_ops(s, false);
4103 qemu_log("\n");
4104 qemu_log_unlock(logfile);
4105 }
4106 #endif
4107 /* Replace indirect temps with direct temps. */
4108 if (liveness_pass_2(s)) {
4109 /* If changes were made, re-run liveness. */
4110 liveness_pass_1(s);
4111 }
4112 }
4113
4114 #ifdef CONFIG_PROFILER
4115 atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4116 #endif
4117
4118 #ifdef DEBUG_DISAS
4119 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4120 && qemu_log_in_addr_range(tb->pc))) {
4121 FILE *logfile = qemu_log_lock();
4122 qemu_log("OP after optimization and liveness analysis:\n");
4123 tcg_dump_ops(s, true);
4124 qemu_log("\n");
4125 qemu_log_unlock(logfile);
4126 }
4127 #endif
4128
4129 tcg_reg_alloc_start(s);
4130
4131 s->code_buf = tb->tc.ptr;
4132 s->code_ptr = tb->tc.ptr;
4133
4134 #ifdef TCG_TARGET_NEED_LDST_LABELS
4135 QSIMPLEQ_INIT(&s->ldst_labels);
4136 #endif
4137 #ifdef TCG_TARGET_NEED_POOL_LABELS
4138 s->pool_labels = NULL;
4139 #endif
4140
4141 num_insns = -1;
4142 QTAILQ_FOREACH(op, &s->ops, link) {
4143 TCGOpcode opc = op->opc;
4144
4145 #ifdef CONFIG_PROFILER
4146 atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4147 #endif
4148
4149 switch (opc) {
4150 case INDEX_op_mov_i32:
4151 case INDEX_op_mov_i64:
4152 case INDEX_op_mov_vec:
4153 tcg_reg_alloc_mov(s, op);
4154 break;
4155 case INDEX_op_movi_i32:
4156 case INDEX_op_movi_i64:
4157 case INDEX_op_dupi_vec:
4158 tcg_reg_alloc_movi(s, op);
4159 break;
4160 case INDEX_op_dup_vec:
4161 tcg_reg_alloc_dup(s, op);
4162 break;
4163 case INDEX_op_insn_start:
4164 if (num_insns >= 0) {
4165 size_t off = tcg_current_code_size(s);
4166 s->gen_insn_end_off[num_insns] = off;
4167 /* Assert that we do not overflow our stored offset. */
4168 assert(s->gen_insn_end_off[num_insns] == off);
4169 }
4170 num_insns++;
4171 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4172 target_ulong a;
4173 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4174 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4175 #else
4176 a = op->args[i];
4177 #endif
4178 s->gen_insn_data[num_insns][i] = a;
4179 }
4180 break;
4181 case INDEX_op_discard:
4182 temp_dead(s, arg_temp(op->args[0]));
4183 break;
4184 case INDEX_op_set_label:
4185 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4186 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4187 break;
4188 case INDEX_op_call:
4189 tcg_reg_alloc_call(s, op);
4190 break;
4191 default:
4192 /* Sanity check that we've not introduced any unhandled opcodes. */
4193 tcg_debug_assert(tcg_op_supported(opc));
4194 /* Note: in order to speed up the code, it would be much
4195 faster to have specialized register allocator functions for
4196 some common argument patterns */
4197 tcg_reg_alloc_op(s, op);
4198 break;
4199 }
4200 #ifdef CONFIG_DEBUG_TCG
4201 check_regs(s);
4202 #endif
4203 /* Test for (pending) buffer overflow. The assumption is that any
4204 one operation beginning below the high water mark cannot overrun
4205 the buffer completely. Thus we can test for overflow after
4206 generating code without having to check during generation. */
4207 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4208 return -1;
4209 }
4210 /* Test for TB overflow, as seen by gen_insn_end_off. */
4211 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4212 return -2;
4213 }
4214 }
4215 tcg_debug_assert(num_insns >= 0);
4216 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4217
4218 /* Generate TB finalization at the end of block */
4219 #ifdef TCG_TARGET_NEED_LDST_LABELS
4220 i = tcg_out_ldst_finalize(s);
4221 if (i < 0) {
4222 return i;
4223 }
4224 #endif
4225 #ifdef TCG_TARGET_NEED_POOL_LABELS
4226 i = tcg_out_pool_finalize(s);
4227 if (i < 0) {
4228 return i;
4229 }
4230 #endif
4231 if (!tcg_resolve_relocs(s)) {
4232 return -2;
4233 }
4234
4235 /* flush instruction cache */
4236 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4237
4238 return tcg_current_code_size(s);
4239 }
4240
4241 #ifdef CONFIG_PROFILER
tcg_dump_info(void)4242 void tcg_dump_info(void)
4243 {
4244 TCGProfile prof = {};
4245 const TCGProfile *s;
4246 int64_t tb_count;
4247 int64_t tb_div_count;
4248 int64_t tot;
4249
4250 tcg_profile_snapshot_counters(&prof);
4251 s = &prof;
4252 tb_count = s->tb_count;
4253 tb_div_count = tb_count ? tb_count : 1;
4254 tot = s->interm_time + s->code_time;
4255
4256 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4257 tot, tot / 2.4e9);
4258 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4259 " %0.1f%%)\n",
4260 tb_count, s->tb_count1 - tb_count,
4261 (double)(s->tb_count1 - s->tb_count)
4262 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4263 qemu_printf("avg ops/TB %0.1f max=%d\n",
4264 (double)s->op_count / tb_div_count, s->op_count_max);
4265 qemu_printf("deleted ops/TB %0.2f\n",
4266 (double)s->del_op_count / tb_div_count);
4267 qemu_printf("avg temps/TB %0.2f max=%d\n",
4268 (double)s->temp_count / tb_div_count, s->temp_count_max);
4269 qemu_printf("avg host code/TB %0.1f\n",
4270 (double)s->code_out_len / tb_div_count);
4271 qemu_printf("avg search data/TB %0.1f\n",
4272 (double)s->search_out_len / tb_div_count);
4273
4274 qemu_printf("cycles/op %0.1f\n",
4275 s->op_count ? (double)tot / s->op_count : 0);
4276 qemu_printf("cycles/in byte %0.1f\n",
4277 s->code_in_len ? (double)tot / s->code_in_len : 0);
4278 qemu_printf("cycles/out byte %0.1f\n",
4279 s->code_out_len ? (double)tot / s->code_out_len : 0);
4280 qemu_printf("cycles/search byte %0.1f\n",
4281 s->search_out_len ? (double)tot / s->search_out_len : 0);
4282 if (tot == 0) {
4283 tot = 1;
4284 }
4285 qemu_printf(" gen_interm time %0.1f%%\n",
4286 (double)s->interm_time / tot * 100.0);
4287 qemu_printf(" gen_code time %0.1f%%\n",
4288 (double)s->code_time / tot * 100.0);
4289 qemu_printf("optim./code time %0.1f%%\n",
4290 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4291 * 100.0);
4292 qemu_printf("liveness/code time %0.1f%%\n",
4293 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4294 qemu_printf("cpu_restore count %" PRId64 "\n",
4295 s->restore_count);
4296 qemu_printf(" avg cycles %0.1f\n",
4297 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4298 }
4299 #else
tcg_dump_info(void)4300 void tcg_dump_info(void)
4301 {
4302 qemu_printf("[TCG profiler not compiled]\n");
4303 }
4304 #endif
4305
4306 #ifdef ELF_HOST_MACHINE
4307 /* In order to use this feature, the backend needs to do three things:
4308
4309 (1) Define ELF_HOST_MACHINE to indicate both what value to
4310 put into the ELF image and to indicate support for the feature.
4311
4312 (2) Define tcg_register_jit. This should create a buffer containing
4313 the contents of a .debug_frame section that describes the post-
4314 prologue unwind info for the tcg machine.
4315
4316 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4317 */
4318
4319 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4320 typedef enum {
4321 JIT_NOACTION = 0,
4322 JIT_REGISTER_FN,
4323 JIT_UNREGISTER_FN
4324 } jit_actions_t;
4325
4326 struct jit_code_entry {
4327 struct jit_code_entry *next_entry;
4328 struct jit_code_entry *prev_entry;
4329 const void *symfile_addr;
4330 uint64_t symfile_size;
4331 };
4332
4333 struct jit_descriptor {
4334 uint32_t version;
4335 uint32_t action_flag;
4336 struct jit_code_entry *relevant_entry;
4337 struct jit_code_entry *first_entry;
4338 };
4339
4340 void __jit_debug_register_code(void) __attribute__((noinline));
__jit_debug_register_code(void)4341 void __jit_debug_register_code(void)
4342 {
4343 asm("");
4344 }
4345
4346 /* Must statically initialize the version, because GDB may check
4347 the version before we can set it. */
4348 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4349
4350 /* End GDB interface. */
4351
find_string(const char * strtab,const char * str)4352 static int find_string(const char *strtab, const char *str)
4353 {
4354 const char *p = strtab + 1;
4355
4356 while (1) {
4357 if (strcmp(p, str) == 0) {
4358 return p - strtab;
4359 }
4360 p += strlen(p) + 1;
4361 }
4362 }
4363
tcg_register_jit_int(void * buf_ptr,size_t buf_size,const void * debug_frame,size_t debug_frame_size)4364 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4365 const void *debug_frame,
4366 size_t debug_frame_size)
4367 {
4368 struct __attribute__((packed)) DebugInfo {
4369 uint32_t len;
4370 uint16_t version;
4371 uint32_t abbrev;
4372 uint8_t ptr_size;
4373 uint8_t cu_die;
4374 uint16_t cu_lang;
4375 uintptr_t cu_low_pc;
4376 uintptr_t cu_high_pc;
4377 uint8_t fn_die;
4378 char fn_name[16];
4379 uintptr_t fn_low_pc;
4380 uintptr_t fn_high_pc;
4381 uint8_t cu_eoc;
4382 };
4383
4384 struct ElfImage {
4385 ElfW(Ehdr) ehdr;
4386 ElfW(Phdr) phdr;
4387 ElfW(Shdr) shdr[7];
4388 ElfW(Sym) sym[2];
4389 struct DebugInfo di;
4390 uint8_t da[24];
4391 char str[80];
4392 };
4393
4394 struct ElfImage *img;
4395
4396 static const struct ElfImage img_template = {
4397 .ehdr = {
4398 .e_ident[EI_MAG0] = ELFMAG0,
4399 .e_ident[EI_MAG1] = ELFMAG1,
4400 .e_ident[EI_MAG2] = ELFMAG2,
4401 .e_ident[EI_MAG3] = ELFMAG3,
4402 .e_ident[EI_CLASS] = ELF_CLASS,
4403 .e_ident[EI_DATA] = ELF_DATA,
4404 .e_ident[EI_VERSION] = EV_CURRENT,
4405 .e_type = ET_EXEC,
4406 .e_machine = ELF_HOST_MACHINE,
4407 .e_version = EV_CURRENT,
4408 .e_phoff = offsetof(struct ElfImage, phdr),
4409 .e_shoff = offsetof(struct ElfImage, shdr),
4410 .e_ehsize = sizeof(ElfW(Shdr)),
4411 .e_phentsize = sizeof(ElfW(Phdr)),
4412 .e_phnum = 1,
4413 .e_shentsize = sizeof(ElfW(Shdr)),
4414 .e_shnum = ARRAY_SIZE(img->shdr),
4415 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4416 #ifdef ELF_HOST_FLAGS
4417 .e_flags = ELF_HOST_FLAGS,
4418 #endif
4419 #ifdef ELF_OSABI
4420 .e_ident[EI_OSABI] = ELF_OSABI,
4421 #endif
4422 },
4423 .phdr = {
4424 .p_type = PT_LOAD,
4425 .p_flags = PF_X,
4426 },
4427 .shdr = {
4428 [0] = { .sh_type = SHT_NULL },
4429 /* Trick: The contents of code_gen_buffer are not present in
4430 this fake ELF file; that got allocated elsewhere. Therefore
4431 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4432 will not look for contents. We can record any address. */
4433 [1] = { /* .text */
4434 .sh_type = SHT_NOBITS,
4435 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4436 },
4437 [2] = { /* .debug_info */
4438 .sh_type = SHT_PROGBITS,
4439 .sh_offset = offsetof(struct ElfImage, di),
4440 .sh_size = sizeof(struct DebugInfo),
4441 },
4442 [3] = { /* .debug_abbrev */
4443 .sh_type = SHT_PROGBITS,
4444 .sh_offset = offsetof(struct ElfImage, da),
4445 .sh_size = sizeof(img->da),
4446 },
4447 [4] = { /* .debug_frame */
4448 .sh_type = SHT_PROGBITS,
4449 .sh_offset = sizeof(struct ElfImage),
4450 },
4451 [5] = { /* .symtab */
4452 .sh_type = SHT_SYMTAB,
4453 .sh_offset = offsetof(struct ElfImage, sym),
4454 .sh_size = sizeof(img->sym),
4455 .sh_info = 1,
4456 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4457 .sh_entsize = sizeof(ElfW(Sym)),
4458 },
4459 [6] = { /* .strtab */
4460 .sh_type = SHT_STRTAB,
4461 .sh_offset = offsetof(struct ElfImage, str),
4462 .sh_size = sizeof(img->str),
4463 }
4464 },
4465 .sym = {
4466 [1] = { /* code_gen_buffer */
4467 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4468 .st_shndx = 1,
4469 }
4470 },
4471 .di = {
4472 .len = sizeof(struct DebugInfo) - 4,
4473 .version = 2,
4474 .ptr_size = sizeof(void *),
4475 .cu_die = 1,
4476 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4477 .fn_die = 2,
4478 .fn_name = "code_gen_buffer"
4479 },
4480 .da = {
4481 1, /* abbrev number (the cu) */
4482 0x11, 1, /* DW_TAG_compile_unit, has children */
4483 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4484 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4485 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4486 0, 0, /* end of abbrev */
4487 2, /* abbrev number (the fn) */
4488 0x2e, 0, /* DW_TAG_subprogram, no children */
4489 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4490 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4491 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4492 0, 0, /* end of abbrev */
4493 0 /* no more abbrev */
4494 },
4495 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4496 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4497 };
4498
4499 /* We only need a single jit entry; statically allocate it. */
4500 static struct jit_code_entry one_entry;
4501
4502 uintptr_t buf = (uintptr_t)buf_ptr;
4503 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4504 DebugFrameHeader *dfh;
4505
4506 img = g_malloc(img_size);
4507 *img = img_template;
4508
4509 img->phdr.p_vaddr = buf;
4510 img->phdr.p_paddr = buf;
4511 img->phdr.p_memsz = buf_size;
4512
4513 img->shdr[1].sh_name = find_string(img->str, ".text");
4514 img->shdr[1].sh_addr = buf;
4515 img->shdr[1].sh_size = buf_size;
4516
4517 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4518 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4519
4520 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4521 img->shdr[4].sh_size = debug_frame_size;
4522
4523 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4524 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4525
4526 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4527 img->sym[1].st_value = buf;
4528 img->sym[1].st_size = buf_size;
4529
4530 img->di.cu_low_pc = buf;
4531 img->di.cu_high_pc = buf + buf_size;
4532 img->di.fn_low_pc = buf;
4533 img->di.fn_high_pc = buf + buf_size;
4534
4535 dfh = (DebugFrameHeader *)(img + 1);
4536 memcpy(dfh, debug_frame, debug_frame_size);
4537 dfh->fde.func_start = buf;
4538 dfh->fde.func_len = buf_size;
4539
4540 #ifdef DEBUG_JIT
4541 /* Enable this block to be able to debug the ELF image file creation.
4542 One can use readelf, objdump, or other inspection utilities. */
4543 {
4544 FILE *f = fopen("/tmp/qemu.jit", "w+b");
4545 if (f) {
4546 if (fwrite(img, img_size, 1, f) != img_size) {
4547 /* Avoid stupid unused return value warning for fwrite. */
4548 }
4549 fclose(f);
4550 }
4551 }
4552 #endif
4553
4554 one_entry.symfile_addr = img;
4555 one_entry.symfile_size = img_size;
4556
4557 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4558 __jit_debug_descriptor.relevant_entry = &one_entry;
4559 __jit_debug_descriptor.first_entry = &one_entry;
4560 __jit_debug_register_code();
4561 }
4562 #else
4563 /* No support for the feature. Provide the entry point expected by exec.c,
4564 and implement the internal function we declared earlier. */
4565
tcg_register_jit_int(void * buf,size_t size,const void * debug_frame,size_t debug_frame_size)4566 static void tcg_register_jit_int(void *buf, size_t size,
4567 const void *debug_frame,
4568 size_t debug_frame_size)
4569 {
4570 }
4571
tcg_register_jit(void * buf,size_t buf_size)4572 void tcg_register_jit(void *buf, size_t buf_size)
4573 {
4574 }
4575 #endif /* ELF_HOST_MACHINE */
4576
4577 #if !TCG_TARGET_MAYBE_vec
tcg_expand_vec_op(TCGOpcode o,TCGType t,unsigned e,TCGArg a0,...)4578 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4579 {
4580 g_assert_not_reached();
4581 }
4582 #endif
4583