xref: /qemu/tcg/tcg.c (revision 2e8f72ac)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
45 
46 #include "exec/exec-all.h"
47 
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
51 
52 #include "tcg/tcg-op.h"
53 
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS  ELFCLASS32
56 #else
57 # define ELF_CLASS  ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA   ELFDATA2MSB
61 #else
62 # define ELF_DATA   ELFDATA2LSB
63 #endif
64 
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
73 static void tcg_target_qemu_prologue(TCGContext *s);
74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
75                         intptr_t value, intptr_t addend);
76 
77 /* The CIE and FDE header definitions will be common to all hosts.  */
78 typedef struct {
79     uint32_t len __attribute__((aligned((sizeof(void *)))));
80     uint32_t id;
81     uint8_t version;
82     char augmentation[1];
83     uint8_t code_align;
84     uint8_t data_align;
85     uint8_t return_column;
86 } DebugFrameCIE;
87 
88 typedef struct QEMU_PACKED {
89     uint32_t len __attribute__((aligned((sizeof(void *)))));
90     uint32_t cie_offset;
91     uintptr_t func_start;
92     uintptr_t func_len;
93 } DebugFrameFDEHeader;
94 
95 typedef struct QEMU_PACKED {
96     DebugFrameCIE cie;
97     DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
99 
100 static void tcg_register_jit_int(const void *buf, size_t size,
101                                  const void *debug_frame,
102                                  size_t debug_frame_size)
103     __attribute__((unused));
104 
105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
106 static const char *target_parse_constraint(TCGArgConstraint *ct,
107                                            const char *ct_str, TCGType type);
108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
109                        intptr_t arg2);
110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
111 static void tcg_out_movi(TCGContext *s, TCGType type,
112                          TCGReg ret, tcg_target_long arg);
113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
114                        const int *const_args);
115 #if TCG_TARGET_MAYBE_vec
116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
117                             TCGReg dst, TCGReg src);
118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, TCGReg base, intptr_t offset);
120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
121                              TCGReg dst, int64_t arg);
122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
123                            unsigned vece, const TCGArg *args,
124                            const int *const_args);
125 #else
126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
127                                    TCGReg dst, TCGReg src)
128 {
129     g_assert_not_reached();
130 }
131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
132                                     TCGReg dst, TCGReg base, intptr_t offset)
133 {
134     g_assert_not_reached();
135 }
136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
137                                     TCGReg dst, int64_t arg)
138 {
139     g_assert_not_reached();
140 }
141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
142                                   unsigned vece, const TCGArg *args,
143                                   const int *const_args)
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
153 static int tcg_target_const_match(tcg_target_long val, TCGType type,
154                                   const TCGArgConstraint *arg_ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
158 
159 #define TCG_HIGHWATER 1024
160 
161 static TCGContext **tcg_ctxs;
162 static unsigned int n_tcg_ctxs;
163 TCGv_env cpu_env = 0;
164 const void *tcg_code_gen_epilogue;
165 uintptr_t tcg_splitwx_diff;
166 
167 #ifndef CONFIG_TCG_INTERPRETER
168 tcg_prologue_fn *tcg_qemu_tb_exec;
169 #endif
170 
171 struct tcg_region_tree {
172     QemuMutex lock;
173     GTree *tree;
174     /* padding to avoid false sharing is computed at run-time */
175 };
176 
177 /*
178  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
179  * dynamically allocate from as demand dictates. Given appropriate region
180  * sizing, this minimizes flushes even when some TCG threads generate a lot
181  * more code than others.
182  */
183 struct tcg_region_state {
184     QemuMutex lock;
185 
186     /* fields set at init time */
187     void *start;
188     void *start_aligned;
189     void *end;
190     size_t n;
191     size_t size; /* size of one region */
192     size_t stride; /* .size + guard size */
193 
194     /* fields protected by the lock */
195     size_t current; /* current region index */
196     size_t agg_size_full; /* aggregate size of full regions */
197 };
198 
199 static struct tcg_region_state region;
200 /*
201  * This is an array of struct tcg_region_tree's, with padding.
202  * We use void * to simplify the computation of region_trees[i]; each
203  * struct is found every tree_size bytes.
204  */
205 static void *region_trees;
206 static size_t tree_size;
207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
208 static TCGRegSet tcg_target_call_clobber_regs;
209 
210 #if TCG_TARGET_INSN_UNIT_SIZE == 1
211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
212 {
213     *s->code_ptr++ = v;
214 }
215 
216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
217                                                       uint8_t v)
218 {
219     *p = v;
220 }
221 #endif
222 
223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
225 {
226     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
227         *s->code_ptr++ = v;
228     } else {
229         tcg_insn_unit *p = s->code_ptr;
230         memcpy(p, &v, sizeof(v));
231         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
232     }
233 }
234 
235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
236                                                        uint16_t v)
237 {
238     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
239         *p = v;
240     } else {
241         memcpy(p, &v, sizeof(v));
242     }
243 }
244 #endif
245 
246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
248 {
249     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
250         *s->code_ptr++ = v;
251     } else {
252         tcg_insn_unit *p = s->code_ptr;
253         memcpy(p, &v, sizeof(v));
254         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
255     }
256 }
257 
258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
259                                                        uint32_t v)
260 {
261     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
262         *p = v;
263     } else {
264         memcpy(p, &v, sizeof(v));
265     }
266 }
267 #endif
268 
269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
271 {
272     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
273         *s->code_ptr++ = v;
274     } else {
275         tcg_insn_unit *p = s->code_ptr;
276         memcpy(p, &v, sizeof(v));
277         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
278     }
279 }
280 
281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
282                                                        uint64_t v)
283 {
284     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
285         *p = v;
286     } else {
287         memcpy(p, &v, sizeof(v));
288     }
289 }
290 #endif
291 
292 /* label relocation processing */
293 
294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
295                           TCGLabel *l, intptr_t addend)
296 {
297     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
298 
299     r->type = type;
300     r->ptr = code_ptr;
301     r->addend = addend;
302     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
303 }
304 
305 static void tcg_out_label(TCGContext *s, TCGLabel *l)
306 {
307     tcg_debug_assert(!l->has_value);
308     l->has_value = 1;
309     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
310 }
311 
312 TCGLabel *gen_new_label(void)
313 {
314     TCGContext *s = tcg_ctx;
315     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
316 
317     memset(l, 0, sizeof(TCGLabel));
318     l->id = s->nb_labels++;
319     QSIMPLEQ_INIT(&l->relocs);
320 
321     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
322 
323     return l;
324 }
325 
326 static bool tcg_resolve_relocs(TCGContext *s)
327 {
328     TCGLabel *l;
329 
330     QSIMPLEQ_FOREACH(l, &s->labels, next) {
331         TCGRelocation *r;
332         uintptr_t value = l->u.value;
333 
334         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
335             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
336                 return false;
337             }
338         }
339     }
340     return true;
341 }
342 
343 static void set_jmp_reset_offset(TCGContext *s, int which)
344 {
345     /*
346      * We will check for overflow at the end of the opcode loop in
347      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
348      */
349     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
350 }
351 
352 #include "tcg-target.c.inc"
353 
354 /* compare a pointer @ptr and a tb_tc @s */
355 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
356 {
357     if (ptr >= s->ptr + s->size) {
358         return 1;
359     } else if (ptr < s->ptr) {
360         return -1;
361     }
362     return 0;
363 }
364 
365 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
366 {
367     const struct tb_tc *a = ap;
368     const struct tb_tc *b = bp;
369 
370     /*
371      * When both sizes are set, we know this isn't a lookup.
372      * This is the most likely case: every TB must be inserted; lookups
373      * are a lot less frequent.
374      */
375     if (likely(a->size && b->size)) {
376         if (a->ptr > b->ptr) {
377             return 1;
378         } else if (a->ptr < b->ptr) {
379             return -1;
380         }
381         /* a->ptr == b->ptr should happen only on deletions */
382         g_assert(a->size == b->size);
383         return 0;
384     }
385     /*
386      * All lookups have either .size field set to 0.
387      * From the glib sources we see that @ap is always the lookup key. However
388      * the docs provide no guarantee, so we just mark this case as likely.
389      */
390     if (likely(a->size == 0)) {
391         return ptr_cmp_tb_tc(a->ptr, b);
392     }
393     return ptr_cmp_tb_tc(b->ptr, a);
394 }
395 
396 static void tcg_region_trees_init(void)
397 {
398     size_t i;
399 
400     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
401     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
402     for (i = 0; i < region.n; i++) {
403         struct tcg_region_tree *rt = region_trees + i * tree_size;
404 
405         qemu_mutex_init(&rt->lock);
406         rt->tree = g_tree_new(tb_tc_cmp);
407     }
408 }
409 
410 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
411 {
412     void *p = tcg_splitwx_to_rw(cp);
413     size_t region_idx;
414 
415     if (p < region.start_aligned) {
416         region_idx = 0;
417     } else {
418         ptrdiff_t offset = p - region.start_aligned;
419 
420         if (offset > region.stride * (region.n - 1)) {
421             region_idx = region.n - 1;
422         } else {
423             region_idx = offset / region.stride;
424         }
425     }
426     return region_trees + region_idx * tree_size;
427 }
428 
429 void tcg_tb_insert(TranslationBlock *tb)
430 {
431     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
432 
433     qemu_mutex_lock(&rt->lock);
434     g_tree_insert(rt->tree, &tb->tc, tb);
435     qemu_mutex_unlock(&rt->lock);
436 }
437 
438 void tcg_tb_remove(TranslationBlock *tb)
439 {
440     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
441 
442     qemu_mutex_lock(&rt->lock);
443     g_tree_remove(rt->tree, &tb->tc);
444     qemu_mutex_unlock(&rt->lock);
445 }
446 
447 /*
448  * Find the TB 'tb' such that
449  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
450  * Return NULL if not found.
451  */
452 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
453 {
454     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
455     TranslationBlock *tb;
456     struct tb_tc s = { .ptr = (void *)tc_ptr };
457 
458     qemu_mutex_lock(&rt->lock);
459     tb = g_tree_lookup(rt->tree, &s);
460     qemu_mutex_unlock(&rt->lock);
461     return tb;
462 }
463 
464 static void tcg_region_tree_lock_all(void)
465 {
466     size_t i;
467 
468     for (i = 0; i < region.n; i++) {
469         struct tcg_region_tree *rt = region_trees + i * tree_size;
470 
471         qemu_mutex_lock(&rt->lock);
472     }
473 }
474 
475 static void tcg_region_tree_unlock_all(void)
476 {
477     size_t i;
478 
479     for (i = 0; i < region.n; i++) {
480         struct tcg_region_tree *rt = region_trees + i * tree_size;
481 
482         qemu_mutex_unlock(&rt->lock);
483     }
484 }
485 
486 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
487 {
488     size_t i;
489 
490     tcg_region_tree_lock_all();
491     for (i = 0; i < region.n; i++) {
492         struct tcg_region_tree *rt = region_trees + i * tree_size;
493 
494         g_tree_foreach(rt->tree, func, user_data);
495     }
496     tcg_region_tree_unlock_all();
497 }
498 
499 size_t tcg_nb_tbs(void)
500 {
501     size_t nb_tbs = 0;
502     size_t i;
503 
504     tcg_region_tree_lock_all();
505     for (i = 0; i < region.n; i++) {
506         struct tcg_region_tree *rt = region_trees + i * tree_size;
507 
508         nb_tbs += g_tree_nnodes(rt->tree);
509     }
510     tcg_region_tree_unlock_all();
511     return nb_tbs;
512 }
513 
514 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
515 {
516     TranslationBlock *tb = v;
517 
518     tb_destroy(tb);
519     return FALSE;
520 }
521 
522 static void tcg_region_tree_reset_all(void)
523 {
524     size_t i;
525 
526     tcg_region_tree_lock_all();
527     for (i = 0; i < region.n; i++) {
528         struct tcg_region_tree *rt = region_trees + i * tree_size;
529 
530         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
531         /* Increment the refcount first so that destroy acts as a reset */
532         g_tree_ref(rt->tree);
533         g_tree_destroy(rt->tree);
534     }
535     tcg_region_tree_unlock_all();
536 }
537 
538 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
539 {
540     void *start, *end;
541 
542     start = region.start_aligned + curr_region * region.stride;
543     end = start + region.size;
544 
545     if (curr_region == 0) {
546         start = region.start;
547     }
548     if (curr_region == region.n - 1) {
549         end = region.end;
550     }
551 
552     *pstart = start;
553     *pend = end;
554 }
555 
556 static void tcg_region_assign(TCGContext *s, size_t curr_region)
557 {
558     void *start, *end;
559 
560     tcg_region_bounds(curr_region, &start, &end);
561 
562     s->code_gen_buffer = start;
563     s->code_gen_ptr = start;
564     s->code_gen_buffer_size = end - start;
565     s->code_gen_highwater = end - TCG_HIGHWATER;
566 }
567 
568 static bool tcg_region_alloc__locked(TCGContext *s)
569 {
570     if (region.current == region.n) {
571         return true;
572     }
573     tcg_region_assign(s, region.current);
574     region.current++;
575     return false;
576 }
577 
578 /*
579  * Request a new region once the one in use has filled up.
580  * Returns true on error.
581  */
582 static bool tcg_region_alloc(TCGContext *s)
583 {
584     bool err;
585     /* read the region size now; alloc__locked will overwrite it on success */
586     size_t size_full = s->code_gen_buffer_size;
587 
588     qemu_mutex_lock(&region.lock);
589     err = tcg_region_alloc__locked(s);
590     if (!err) {
591         region.agg_size_full += size_full - TCG_HIGHWATER;
592     }
593     qemu_mutex_unlock(&region.lock);
594     return err;
595 }
596 
597 /*
598  * Perform a context's first region allocation.
599  * This function does _not_ increment region.agg_size_full.
600  */
601 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
602 {
603     return tcg_region_alloc__locked(s);
604 }
605 
606 /* Call from a safe-work context */
607 void tcg_region_reset_all(void)
608 {
609     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
610     unsigned int i;
611 
612     qemu_mutex_lock(&region.lock);
613     region.current = 0;
614     region.agg_size_full = 0;
615 
616     for (i = 0; i < n_ctxs; i++) {
617         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
618         bool err = tcg_region_initial_alloc__locked(s);
619 
620         g_assert(!err);
621     }
622     qemu_mutex_unlock(&region.lock);
623 
624     tcg_region_tree_reset_all();
625 }
626 
627 #ifdef CONFIG_USER_ONLY
628 static size_t tcg_n_regions(void)
629 {
630     return 1;
631 }
632 #else
633 /*
634  * It is likely that some vCPUs will translate more code than others, so we
635  * first try to set more regions than max_cpus, with those regions being of
636  * reasonable size. If that's not possible we make do by evenly dividing
637  * the code_gen_buffer among the vCPUs.
638  */
639 static size_t tcg_n_regions(void)
640 {
641     size_t i;
642 
643     /* Use a single region if all we have is one vCPU thread */
644 #if !defined(CONFIG_USER_ONLY)
645     MachineState *ms = MACHINE(qdev_get_machine());
646     unsigned int max_cpus = ms->smp.max_cpus;
647 #endif
648     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
649         return 1;
650     }
651 
652     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
653     for (i = 8; i > 0; i--) {
654         size_t regions_per_thread = i;
655         size_t region_size;
656 
657         region_size = tcg_init_ctx.code_gen_buffer_size;
658         region_size /= max_cpus * regions_per_thread;
659 
660         if (region_size >= 2 * 1024u * 1024) {
661             return max_cpus * regions_per_thread;
662         }
663     }
664     /* If we can't, then just allocate one region per vCPU thread */
665     return max_cpus;
666 }
667 #endif
668 
669 /*
670  * Initializes region partitioning.
671  *
672  * Called at init time from the parent thread (i.e. the one calling
673  * tcg_context_init), after the target's TCG globals have been set.
674  *
675  * Region partitioning works by splitting code_gen_buffer into separate regions,
676  * and then assigning regions to TCG threads so that the threads can translate
677  * code in parallel without synchronization.
678  *
679  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
680  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
681  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
682  * must have been parsed before calling this function, since it calls
683  * qemu_tcg_mttcg_enabled().
684  *
685  * In user-mode we use a single region.  Having multiple regions in user-mode
686  * is not supported, because the number of vCPU threads (recall that each thread
687  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
688  * OS, and usually this number is huge (tens of thousands is not uncommon).
689  * Thus, given this large bound on the number of vCPU threads and the fact
690  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
691  * that the availability of at least one region per vCPU thread.
692  *
693  * However, this user-mode limitation is unlikely to be a significant problem
694  * in practice. Multi-threaded guests share most if not all of their translated
695  * code, which makes parallel code generation less appealing than in softmmu.
696  */
697 void tcg_region_init(void)
698 {
699     void *buf = tcg_init_ctx.code_gen_buffer;
700     void *aligned;
701     size_t size = tcg_init_ctx.code_gen_buffer_size;
702     size_t page_size = qemu_real_host_page_size;
703     size_t region_size;
704     size_t n_regions;
705     size_t i;
706     uintptr_t splitwx_diff;
707 
708     n_regions = tcg_n_regions();
709 
710     /* The first region will be 'aligned - buf' bytes larger than the others */
711     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
712     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
713     /*
714      * Make region_size a multiple of page_size, using aligned as the start.
715      * As a result of this we might end up with a few extra pages at the end of
716      * the buffer; we will assign those to the last region.
717      */
718     region_size = (size - (aligned - buf)) / n_regions;
719     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
720 
721     /* A region must have at least 2 pages; one code, one guard */
722     g_assert(region_size >= 2 * page_size);
723 
724     /* init the region struct */
725     qemu_mutex_init(&region.lock);
726     region.n = n_regions;
727     region.size = region_size - page_size;
728     region.stride = region_size;
729     region.start = buf;
730     region.start_aligned = aligned;
731     /* page-align the end, since its last page will be a guard page */
732     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
733     /* account for that last guard page */
734     region.end -= page_size;
735 
736     /* set guard pages */
737     splitwx_diff = tcg_splitwx_diff;
738     for (i = 0; i < region.n; i++) {
739         void *start, *end;
740         int rc;
741 
742         tcg_region_bounds(i, &start, &end);
743         rc = qemu_mprotect_none(end, page_size);
744         g_assert(!rc);
745         if (splitwx_diff) {
746             rc = qemu_mprotect_none(end + splitwx_diff, page_size);
747             g_assert(!rc);
748         }
749     }
750 
751     tcg_region_trees_init();
752 
753     /* In user-mode we support only one ctx, so do the initial allocation now */
754 #ifdef CONFIG_USER_ONLY
755     {
756         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
757 
758         g_assert(!err);
759     }
760 #endif
761 }
762 
763 #ifdef CONFIG_DEBUG_TCG
764 const void *tcg_splitwx_to_rx(void *rw)
765 {
766     /* Pass NULL pointers unchanged. */
767     if (rw) {
768         g_assert(in_code_gen_buffer(rw));
769         rw += tcg_splitwx_diff;
770     }
771     return rw;
772 }
773 
774 void *tcg_splitwx_to_rw(const void *rx)
775 {
776     /* Pass NULL pointers unchanged. */
777     if (rx) {
778         rx -= tcg_splitwx_diff;
779         /* Assert that we end with a pointer in the rw region. */
780         g_assert(in_code_gen_buffer(rx));
781     }
782     return (void *)rx;
783 }
784 #endif /* CONFIG_DEBUG_TCG */
785 
786 static void alloc_tcg_plugin_context(TCGContext *s)
787 {
788 #ifdef CONFIG_PLUGIN
789     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
790     s->plugin_tb->insns =
791         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
792 #endif
793 }
794 
795 /*
796  * All TCG threads except the parent (i.e. the one that called tcg_context_init
797  * and registered the target's TCG globals) must register with this function
798  * before initiating translation.
799  *
800  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
801  * of tcg_region_init() for the reasoning behind this.
802  *
803  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
804  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
805  * is not used anymore for translation once this function is called.
806  *
807  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
808  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
809  */
810 #ifdef CONFIG_USER_ONLY
811 void tcg_register_thread(void)
812 {
813     tcg_ctx = &tcg_init_ctx;
814 }
815 #else
816 void tcg_register_thread(void)
817 {
818     MachineState *ms = MACHINE(qdev_get_machine());
819     TCGContext *s = g_malloc(sizeof(*s));
820     unsigned int i, n;
821     bool err;
822 
823     *s = tcg_init_ctx;
824 
825     /* Relink mem_base.  */
826     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
827         if (tcg_init_ctx.temps[i].mem_base) {
828             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
829             tcg_debug_assert(b >= 0 && b < n);
830             s->temps[i].mem_base = &s->temps[b];
831         }
832     }
833 
834     /* Claim an entry in tcg_ctxs */
835     n = qatomic_fetch_inc(&n_tcg_ctxs);
836     g_assert(n < ms->smp.max_cpus);
837     qatomic_set(&tcg_ctxs[n], s);
838 
839     if (n > 0) {
840         alloc_tcg_plugin_context(s);
841     }
842 
843     tcg_ctx = s;
844     qemu_mutex_lock(&region.lock);
845     err = tcg_region_initial_alloc__locked(tcg_ctx);
846     g_assert(!err);
847     qemu_mutex_unlock(&region.lock);
848 }
849 #endif /* !CONFIG_USER_ONLY */
850 
851 /*
852  * Returns the size (in bytes) of all translated code (i.e. from all regions)
853  * currently in the cache.
854  * See also: tcg_code_capacity()
855  * Do not confuse with tcg_current_code_size(); that one applies to a single
856  * TCG context.
857  */
858 size_t tcg_code_size(void)
859 {
860     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
861     unsigned int i;
862     size_t total;
863 
864     qemu_mutex_lock(&region.lock);
865     total = region.agg_size_full;
866     for (i = 0; i < n_ctxs; i++) {
867         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
868         size_t size;
869 
870         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
871         g_assert(size <= s->code_gen_buffer_size);
872         total += size;
873     }
874     qemu_mutex_unlock(&region.lock);
875     return total;
876 }
877 
878 /*
879  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
880  * regions.
881  * See also: tcg_code_size()
882  */
883 size_t tcg_code_capacity(void)
884 {
885     size_t guard_size, capacity;
886 
887     /* no need for synchronization; these variables are set at init time */
888     guard_size = region.stride - region.size;
889     capacity = region.end + guard_size - region.start;
890     capacity -= region.n * (guard_size + TCG_HIGHWATER);
891     return capacity;
892 }
893 
894 size_t tcg_tb_phys_invalidate_count(void)
895 {
896     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
897     unsigned int i;
898     size_t total = 0;
899 
900     for (i = 0; i < n_ctxs; i++) {
901         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
902 
903         total += qatomic_read(&s->tb_phys_invalidate_count);
904     }
905     return total;
906 }
907 
908 /* pool based memory allocation */
909 void *tcg_malloc_internal(TCGContext *s, int size)
910 {
911     TCGPool *p;
912     int pool_size;
913 
914     if (size > TCG_POOL_CHUNK_SIZE) {
915         /* big malloc: insert a new pool (XXX: could optimize) */
916         p = g_malloc(sizeof(TCGPool) + size);
917         p->size = size;
918         p->next = s->pool_first_large;
919         s->pool_first_large = p;
920         return p->data;
921     } else {
922         p = s->pool_current;
923         if (!p) {
924             p = s->pool_first;
925             if (!p)
926                 goto new_pool;
927         } else {
928             if (!p->next) {
929             new_pool:
930                 pool_size = TCG_POOL_CHUNK_SIZE;
931                 p = g_malloc(sizeof(TCGPool) + pool_size);
932                 p->size = pool_size;
933                 p->next = NULL;
934                 if (s->pool_current)
935                     s->pool_current->next = p;
936                 else
937                     s->pool_first = p;
938             } else {
939                 p = p->next;
940             }
941         }
942     }
943     s->pool_current = p;
944     s->pool_cur = p->data + size;
945     s->pool_end = p->data + p->size;
946     return p->data;
947 }
948 
949 void tcg_pool_reset(TCGContext *s)
950 {
951     TCGPool *p, *t;
952     for (p = s->pool_first_large; p; p = t) {
953         t = p->next;
954         g_free(p);
955     }
956     s->pool_first_large = NULL;
957     s->pool_cur = s->pool_end = NULL;
958     s->pool_current = NULL;
959 }
960 
961 typedef struct TCGHelperInfo {
962     void *func;
963     const char *name;
964     unsigned flags;
965     unsigned sizemask;
966 } TCGHelperInfo;
967 
968 #include "exec/helper-proto.h"
969 
970 static const TCGHelperInfo all_helpers[] = {
971 #include "exec/helper-tcg.h"
972 };
973 static GHashTable *helper_table;
974 
975 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
976 static void process_op_defs(TCGContext *s);
977 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
978                                             TCGReg reg, const char *name);
979 
980 void tcg_context_init(TCGContext *s)
981 {
982     int op, total_args, n, i;
983     TCGOpDef *def;
984     TCGArgConstraint *args_ct;
985     TCGTemp *ts;
986 
987     memset(s, 0, sizeof(*s));
988     s->nb_globals = 0;
989 
990     /* Count total number of arguments and allocate the corresponding
991        space */
992     total_args = 0;
993     for(op = 0; op < NB_OPS; op++) {
994         def = &tcg_op_defs[op];
995         n = def->nb_iargs + def->nb_oargs;
996         total_args += n;
997     }
998 
999     args_ct = g_new0(TCGArgConstraint, total_args);
1000 
1001     for(op = 0; op < NB_OPS; op++) {
1002         def = &tcg_op_defs[op];
1003         def->args_ct = args_ct;
1004         n = def->nb_iargs + def->nb_oargs;
1005         args_ct += n;
1006     }
1007 
1008     /* Register helpers.  */
1009     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1010     helper_table = g_hash_table_new(NULL, NULL);
1011 
1012     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1013         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1014                             (gpointer)&all_helpers[i]);
1015     }
1016 
1017     tcg_target_init(s);
1018     process_op_defs(s);
1019 
1020     /* Reverse the order of the saved registers, assuming they're all at
1021        the start of tcg_target_reg_alloc_order.  */
1022     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1023         int r = tcg_target_reg_alloc_order[n];
1024         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1025             break;
1026         }
1027     }
1028     for (i = 0; i < n; ++i) {
1029         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1030     }
1031     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1032         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1033     }
1034 
1035     alloc_tcg_plugin_context(s);
1036 
1037     tcg_ctx = s;
1038     /*
1039      * In user-mode we simply share the init context among threads, since we
1040      * use a single region. See the documentation tcg_region_init() for the
1041      * reasoning behind this.
1042      * In softmmu we will have at most max_cpus TCG threads.
1043      */
1044 #ifdef CONFIG_USER_ONLY
1045     tcg_ctxs = &tcg_ctx;
1046     n_tcg_ctxs = 1;
1047 #else
1048     MachineState *ms = MACHINE(qdev_get_machine());
1049     unsigned int max_cpus = ms->smp.max_cpus;
1050     tcg_ctxs = g_new(TCGContext *, max_cpus);
1051 #endif
1052 
1053     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1054     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1055     cpu_env = temp_tcgv_ptr(ts);
1056 }
1057 
1058 /*
1059  * Allocate TBs right before their corresponding translated code, making
1060  * sure that TBs and code are on different cache lines.
1061  */
1062 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1063 {
1064     uintptr_t align = qemu_icache_linesize;
1065     TranslationBlock *tb;
1066     void *next;
1067 
1068  retry:
1069     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1070     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1071 
1072     if (unlikely(next > s->code_gen_highwater)) {
1073         if (tcg_region_alloc(s)) {
1074             return NULL;
1075         }
1076         goto retry;
1077     }
1078     qatomic_set(&s->code_gen_ptr, next);
1079     s->data_gen_ptr = NULL;
1080     return tb;
1081 }
1082 
1083 void tcg_prologue_init(TCGContext *s)
1084 {
1085     size_t prologue_size, total_size;
1086     void *buf0, *buf1;
1087 
1088     /* Put the prologue at the beginning of code_gen_buffer.  */
1089     buf0 = s->code_gen_buffer;
1090     total_size = s->code_gen_buffer_size;
1091     s->code_ptr = buf0;
1092     s->code_buf = buf0;
1093     s->data_gen_ptr = NULL;
1094 
1095     /*
1096      * The region trees are not yet configured, but tcg_splitwx_to_rx
1097      * needs the bounds for an assert.
1098      */
1099     region.start = buf0;
1100     region.end = buf0 + total_size;
1101 
1102 #ifndef CONFIG_TCG_INTERPRETER
1103     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1104 #endif
1105 
1106     /* Compute a high-water mark, at which we voluntarily flush the buffer
1107        and start over.  The size here is arbitrary, significantly larger
1108        than we expect the code generation for any one opcode to require.  */
1109     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1110 
1111 #ifdef TCG_TARGET_NEED_POOL_LABELS
1112     s->pool_labels = NULL;
1113 #endif
1114 
1115     /* Generate the prologue.  */
1116     tcg_target_qemu_prologue(s);
1117 
1118 #ifdef TCG_TARGET_NEED_POOL_LABELS
1119     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1120     {
1121         int result = tcg_out_pool_finalize(s);
1122         tcg_debug_assert(result == 0);
1123     }
1124 #endif
1125 
1126     buf1 = s->code_ptr;
1127 #ifndef CONFIG_TCG_INTERPRETER
1128     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1129                         tcg_ptr_byte_diff(buf1, buf0));
1130 #endif
1131 
1132     /* Deduct the prologue from the buffer.  */
1133     prologue_size = tcg_current_code_size(s);
1134     s->code_gen_ptr = buf1;
1135     s->code_gen_buffer = buf1;
1136     s->code_buf = buf1;
1137     total_size -= prologue_size;
1138     s->code_gen_buffer_size = total_size;
1139 
1140     tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1141 
1142 #ifdef DEBUG_DISAS
1143     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1144         FILE *logfile = qemu_log_lock();
1145         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1146         if (s->data_gen_ptr) {
1147             size_t code_size = s->data_gen_ptr - buf0;
1148             size_t data_size = prologue_size - code_size;
1149             size_t i;
1150 
1151             log_disas(buf0, code_size);
1152 
1153             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1154                 if (sizeof(tcg_target_ulong) == 8) {
1155                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1156                              (uintptr_t)s->data_gen_ptr + i,
1157                              *(uint64_t *)(s->data_gen_ptr + i));
1158                 } else {
1159                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1160                              (uintptr_t)s->data_gen_ptr + i,
1161                              *(uint32_t *)(s->data_gen_ptr + i));
1162                 }
1163             }
1164         } else {
1165             log_disas(buf0, prologue_size);
1166         }
1167         qemu_log("\n");
1168         qemu_log_flush();
1169         qemu_log_unlock(logfile);
1170     }
1171 #endif
1172 
1173     /* Assert that goto_ptr is implemented completely.  */
1174     if (TCG_TARGET_HAS_goto_ptr) {
1175         tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1176     }
1177 }
1178 
1179 void tcg_func_start(TCGContext *s)
1180 {
1181     tcg_pool_reset(s);
1182     s->nb_temps = s->nb_globals;
1183 
1184     /* No temps have been previously allocated for size or locality.  */
1185     memset(s->free_temps, 0, sizeof(s->free_temps));
1186 
1187     /* No constant temps have been previously allocated. */
1188     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1189         if (s->const_table[i]) {
1190             g_hash_table_remove_all(s->const_table[i]);
1191         }
1192     }
1193 
1194     s->nb_ops = 0;
1195     s->nb_labels = 0;
1196     s->current_frame_offset = s->frame_start;
1197 
1198 #ifdef CONFIG_DEBUG_TCG
1199     s->goto_tb_issue_mask = 0;
1200 #endif
1201 
1202     QTAILQ_INIT(&s->ops);
1203     QTAILQ_INIT(&s->free_ops);
1204     QSIMPLEQ_INIT(&s->labels);
1205 }
1206 
1207 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1208 {
1209     int n = s->nb_temps++;
1210     tcg_debug_assert(n < TCG_MAX_TEMPS);
1211     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1212 }
1213 
1214 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1215 {
1216     TCGTemp *ts;
1217 
1218     tcg_debug_assert(s->nb_globals == s->nb_temps);
1219     s->nb_globals++;
1220     ts = tcg_temp_alloc(s);
1221     ts->kind = TEMP_GLOBAL;
1222 
1223     return ts;
1224 }
1225 
1226 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1227                                             TCGReg reg, const char *name)
1228 {
1229     TCGTemp *ts;
1230 
1231     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1232         tcg_abort();
1233     }
1234 
1235     ts = tcg_global_alloc(s);
1236     ts->base_type = type;
1237     ts->type = type;
1238     ts->kind = TEMP_FIXED;
1239     ts->reg = reg;
1240     ts->name = name;
1241     tcg_regset_set_reg(s->reserved_regs, reg);
1242 
1243     return ts;
1244 }
1245 
1246 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1247 {
1248     s->frame_start = start;
1249     s->frame_end = start + size;
1250     s->frame_temp
1251         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1252 }
1253 
1254 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1255                                      intptr_t offset, const char *name)
1256 {
1257     TCGContext *s = tcg_ctx;
1258     TCGTemp *base_ts = tcgv_ptr_temp(base);
1259     TCGTemp *ts = tcg_global_alloc(s);
1260     int indirect_reg = 0, bigendian = 0;
1261 #ifdef HOST_WORDS_BIGENDIAN
1262     bigendian = 1;
1263 #endif
1264 
1265     switch (base_ts->kind) {
1266     case TEMP_FIXED:
1267         break;
1268     case TEMP_GLOBAL:
1269         /* We do not support double-indirect registers.  */
1270         tcg_debug_assert(!base_ts->indirect_reg);
1271         base_ts->indirect_base = 1;
1272         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1273                             ? 2 : 1);
1274         indirect_reg = 1;
1275         break;
1276     default:
1277         g_assert_not_reached();
1278     }
1279 
1280     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1281         TCGTemp *ts2 = tcg_global_alloc(s);
1282         char buf[64];
1283 
1284         ts->base_type = TCG_TYPE_I64;
1285         ts->type = TCG_TYPE_I32;
1286         ts->indirect_reg = indirect_reg;
1287         ts->mem_allocated = 1;
1288         ts->mem_base = base_ts;
1289         ts->mem_offset = offset + bigendian * 4;
1290         pstrcpy(buf, sizeof(buf), name);
1291         pstrcat(buf, sizeof(buf), "_0");
1292         ts->name = strdup(buf);
1293 
1294         tcg_debug_assert(ts2 == ts + 1);
1295         ts2->base_type = TCG_TYPE_I64;
1296         ts2->type = TCG_TYPE_I32;
1297         ts2->indirect_reg = indirect_reg;
1298         ts2->mem_allocated = 1;
1299         ts2->mem_base = base_ts;
1300         ts2->mem_offset = offset + (1 - bigendian) * 4;
1301         pstrcpy(buf, sizeof(buf), name);
1302         pstrcat(buf, sizeof(buf), "_1");
1303         ts2->name = strdup(buf);
1304     } else {
1305         ts->base_type = type;
1306         ts->type = type;
1307         ts->indirect_reg = indirect_reg;
1308         ts->mem_allocated = 1;
1309         ts->mem_base = base_ts;
1310         ts->mem_offset = offset;
1311         ts->name = name;
1312     }
1313     return ts;
1314 }
1315 
1316 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1317 {
1318     TCGContext *s = tcg_ctx;
1319     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1320     TCGTemp *ts;
1321     int idx, k;
1322 
1323     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1324     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1325     if (idx < TCG_MAX_TEMPS) {
1326         /* There is already an available temp with the right type.  */
1327         clear_bit(idx, s->free_temps[k].l);
1328 
1329         ts = &s->temps[idx];
1330         ts->temp_allocated = 1;
1331         tcg_debug_assert(ts->base_type == type);
1332         tcg_debug_assert(ts->kind == kind);
1333     } else {
1334         ts = tcg_temp_alloc(s);
1335         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1336             TCGTemp *ts2 = tcg_temp_alloc(s);
1337 
1338             ts->base_type = type;
1339             ts->type = TCG_TYPE_I32;
1340             ts->temp_allocated = 1;
1341             ts->kind = kind;
1342 
1343             tcg_debug_assert(ts2 == ts + 1);
1344             ts2->base_type = TCG_TYPE_I64;
1345             ts2->type = TCG_TYPE_I32;
1346             ts2->temp_allocated = 1;
1347             ts2->kind = kind;
1348         } else {
1349             ts->base_type = type;
1350             ts->type = type;
1351             ts->temp_allocated = 1;
1352             ts->kind = kind;
1353         }
1354     }
1355 
1356 #if defined(CONFIG_DEBUG_TCG)
1357     s->temps_in_use++;
1358 #endif
1359     return ts;
1360 }
1361 
1362 TCGv_vec tcg_temp_new_vec(TCGType type)
1363 {
1364     TCGTemp *t;
1365 
1366 #ifdef CONFIG_DEBUG_TCG
1367     switch (type) {
1368     case TCG_TYPE_V64:
1369         assert(TCG_TARGET_HAS_v64);
1370         break;
1371     case TCG_TYPE_V128:
1372         assert(TCG_TARGET_HAS_v128);
1373         break;
1374     case TCG_TYPE_V256:
1375         assert(TCG_TARGET_HAS_v256);
1376         break;
1377     default:
1378         g_assert_not_reached();
1379     }
1380 #endif
1381 
1382     t = tcg_temp_new_internal(type, 0);
1383     return temp_tcgv_vec(t);
1384 }
1385 
1386 /* Create a new temp of the same type as an existing temp.  */
1387 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1388 {
1389     TCGTemp *t = tcgv_vec_temp(match);
1390 
1391     tcg_debug_assert(t->temp_allocated != 0);
1392 
1393     t = tcg_temp_new_internal(t->base_type, 0);
1394     return temp_tcgv_vec(t);
1395 }
1396 
1397 void tcg_temp_free_internal(TCGTemp *ts)
1398 {
1399     TCGContext *s = tcg_ctx;
1400     int k, idx;
1401 
1402     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1403     if (ts->kind == TEMP_CONST) {
1404         return;
1405     }
1406 
1407 #if defined(CONFIG_DEBUG_TCG)
1408     s->temps_in_use--;
1409     if (s->temps_in_use < 0) {
1410         fprintf(stderr, "More temporaries freed than allocated!\n");
1411     }
1412 #endif
1413 
1414     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1415     tcg_debug_assert(ts->temp_allocated != 0);
1416     ts->temp_allocated = 0;
1417 
1418     idx = temp_idx(ts);
1419     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1420     set_bit(idx, s->free_temps[k].l);
1421 }
1422 
1423 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1424 {
1425     TCGContext *s = tcg_ctx;
1426     GHashTable *h = s->const_table[type];
1427     TCGTemp *ts;
1428 
1429     if (h == NULL) {
1430         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1431         s->const_table[type] = h;
1432     }
1433 
1434     ts = g_hash_table_lookup(h, &val);
1435     if (ts == NULL) {
1436         ts = tcg_temp_alloc(s);
1437 
1438         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1439             TCGTemp *ts2 = tcg_temp_alloc(s);
1440 
1441             ts->base_type = TCG_TYPE_I64;
1442             ts->type = TCG_TYPE_I32;
1443             ts->kind = TEMP_CONST;
1444             ts->temp_allocated = 1;
1445             /*
1446              * Retain the full value of the 64-bit constant in the low
1447              * part, so that the hash table works.  Actual uses will
1448              * truncate the value to the low part.
1449              */
1450             ts->val = val;
1451 
1452             tcg_debug_assert(ts2 == ts + 1);
1453             ts2->base_type = TCG_TYPE_I64;
1454             ts2->type = TCG_TYPE_I32;
1455             ts2->kind = TEMP_CONST;
1456             ts2->temp_allocated = 1;
1457             ts2->val = val >> 32;
1458         } else {
1459             ts->base_type = type;
1460             ts->type = type;
1461             ts->kind = TEMP_CONST;
1462             ts->temp_allocated = 1;
1463             ts->val = val;
1464         }
1465         g_hash_table_insert(h, &ts->val, ts);
1466     }
1467 
1468     return ts;
1469 }
1470 
1471 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1472 {
1473     val = dup_const(vece, val);
1474     return temp_tcgv_vec(tcg_constant_internal(type, val));
1475 }
1476 
1477 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1478 {
1479     TCGTemp *t = tcgv_vec_temp(match);
1480 
1481     tcg_debug_assert(t->temp_allocated != 0);
1482     return tcg_constant_vec(t->base_type, vece, val);
1483 }
1484 
1485 TCGv_i32 tcg_const_i32(int32_t val)
1486 {
1487     TCGv_i32 t0;
1488     t0 = tcg_temp_new_i32();
1489     tcg_gen_movi_i32(t0, val);
1490     return t0;
1491 }
1492 
1493 TCGv_i64 tcg_const_i64(int64_t val)
1494 {
1495     TCGv_i64 t0;
1496     t0 = tcg_temp_new_i64();
1497     tcg_gen_movi_i64(t0, val);
1498     return t0;
1499 }
1500 
1501 TCGv_i32 tcg_const_local_i32(int32_t val)
1502 {
1503     TCGv_i32 t0;
1504     t0 = tcg_temp_local_new_i32();
1505     tcg_gen_movi_i32(t0, val);
1506     return t0;
1507 }
1508 
1509 TCGv_i64 tcg_const_local_i64(int64_t val)
1510 {
1511     TCGv_i64 t0;
1512     t0 = tcg_temp_local_new_i64();
1513     tcg_gen_movi_i64(t0, val);
1514     return t0;
1515 }
1516 
1517 #if defined(CONFIG_DEBUG_TCG)
1518 void tcg_clear_temp_count(void)
1519 {
1520     TCGContext *s = tcg_ctx;
1521     s->temps_in_use = 0;
1522 }
1523 
1524 int tcg_check_temp_count(void)
1525 {
1526     TCGContext *s = tcg_ctx;
1527     if (s->temps_in_use) {
1528         /* Clear the count so that we don't give another
1529          * warning immediately next time around.
1530          */
1531         s->temps_in_use = 0;
1532         return 1;
1533     }
1534     return 0;
1535 }
1536 #endif
1537 
1538 /* Return true if OP may appear in the opcode stream.
1539    Test the runtime variable that controls each opcode.  */
1540 bool tcg_op_supported(TCGOpcode op)
1541 {
1542     const bool have_vec
1543         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1544 
1545     switch (op) {
1546     case INDEX_op_discard:
1547     case INDEX_op_set_label:
1548     case INDEX_op_call:
1549     case INDEX_op_br:
1550     case INDEX_op_mb:
1551     case INDEX_op_insn_start:
1552     case INDEX_op_exit_tb:
1553     case INDEX_op_goto_tb:
1554     case INDEX_op_qemu_ld_i32:
1555     case INDEX_op_qemu_st_i32:
1556     case INDEX_op_qemu_ld_i64:
1557     case INDEX_op_qemu_st_i64:
1558         return true;
1559 
1560     case INDEX_op_qemu_st8_i32:
1561         return TCG_TARGET_HAS_qemu_st8_i32;
1562 
1563     case INDEX_op_goto_ptr:
1564         return TCG_TARGET_HAS_goto_ptr;
1565 
1566     case INDEX_op_mov_i32:
1567     case INDEX_op_setcond_i32:
1568     case INDEX_op_brcond_i32:
1569     case INDEX_op_ld8u_i32:
1570     case INDEX_op_ld8s_i32:
1571     case INDEX_op_ld16u_i32:
1572     case INDEX_op_ld16s_i32:
1573     case INDEX_op_ld_i32:
1574     case INDEX_op_st8_i32:
1575     case INDEX_op_st16_i32:
1576     case INDEX_op_st_i32:
1577     case INDEX_op_add_i32:
1578     case INDEX_op_sub_i32:
1579     case INDEX_op_mul_i32:
1580     case INDEX_op_and_i32:
1581     case INDEX_op_or_i32:
1582     case INDEX_op_xor_i32:
1583     case INDEX_op_shl_i32:
1584     case INDEX_op_shr_i32:
1585     case INDEX_op_sar_i32:
1586         return true;
1587 
1588     case INDEX_op_movcond_i32:
1589         return TCG_TARGET_HAS_movcond_i32;
1590     case INDEX_op_div_i32:
1591     case INDEX_op_divu_i32:
1592         return TCG_TARGET_HAS_div_i32;
1593     case INDEX_op_rem_i32:
1594     case INDEX_op_remu_i32:
1595         return TCG_TARGET_HAS_rem_i32;
1596     case INDEX_op_div2_i32:
1597     case INDEX_op_divu2_i32:
1598         return TCG_TARGET_HAS_div2_i32;
1599     case INDEX_op_rotl_i32:
1600     case INDEX_op_rotr_i32:
1601         return TCG_TARGET_HAS_rot_i32;
1602     case INDEX_op_deposit_i32:
1603         return TCG_TARGET_HAS_deposit_i32;
1604     case INDEX_op_extract_i32:
1605         return TCG_TARGET_HAS_extract_i32;
1606     case INDEX_op_sextract_i32:
1607         return TCG_TARGET_HAS_sextract_i32;
1608     case INDEX_op_extract2_i32:
1609         return TCG_TARGET_HAS_extract2_i32;
1610     case INDEX_op_add2_i32:
1611         return TCG_TARGET_HAS_add2_i32;
1612     case INDEX_op_sub2_i32:
1613         return TCG_TARGET_HAS_sub2_i32;
1614     case INDEX_op_mulu2_i32:
1615         return TCG_TARGET_HAS_mulu2_i32;
1616     case INDEX_op_muls2_i32:
1617         return TCG_TARGET_HAS_muls2_i32;
1618     case INDEX_op_muluh_i32:
1619         return TCG_TARGET_HAS_muluh_i32;
1620     case INDEX_op_mulsh_i32:
1621         return TCG_TARGET_HAS_mulsh_i32;
1622     case INDEX_op_ext8s_i32:
1623         return TCG_TARGET_HAS_ext8s_i32;
1624     case INDEX_op_ext16s_i32:
1625         return TCG_TARGET_HAS_ext16s_i32;
1626     case INDEX_op_ext8u_i32:
1627         return TCG_TARGET_HAS_ext8u_i32;
1628     case INDEX_op_ext16u_i32:
1629         return TCG_TARGET_HAS_ext16u_i32;
1630     case INDEX_op_bswap16_i32:
1631         return TCG_TARGET_HAS_bswap16_i32;
1632     case INDEX_op_bswap32_i32:
1633         return TCG_TARGET_HAS_bswap32_i32;
1634     case INDEX_op_not_i32:
1635         return TCG_TARGET_HAS_not_i32;
1636     case INDEX_op_neg_i32:
1637         return TCG_TARGET_HAS_neg_i32;
1638     case INDEX_op_andc_i32:
1639         return TCG_TARGET_HAS_andc_i32;
1640     case INDEX_op_orc_i32:
1641         return TCG_TARGET_HAS_orc_i32;
1642     case INDEX_op_eqv_i32:
1643         return TCG_TARGET_HAS_eqv_i32;
1644     case INDEX_op_nand_i32:
1645         return TCG_TARGET_HAS_nand_i32;
1646     case INDEX_op_nor_i32:
1647         return TCG_TARGET_HAS_nor_i32;
1648     case INDEX_op_clz_i32:
1649         return TCG_TARGET_HAS_clz_i32;
1650     case INDEX_op_ctz_i32:
1651         return TCG_TARGET_HAS_ctz_i32;
1652     case INDEX_op_ctpop_i32:
1653         return TCG_TARGET_HAS_ctpop_i32;
1654 
1655     case INDEX_op_brcond2_i32:
1656     case INDEX_op_setcond2_i32:
1657         return TCG_TARGET_REG_BITS == 32;
1658 
1659     case INDEX_op_mov_i64:
1660     case INDEX_op_setcond_i64:
1661     case INDEX_op_brcond_i64:
1662     case INDEX_op_ld8u_i64:
1663     case INDEX_op_ld8s_i64:
1664     case INDEX_op_ld16u_i64:
1665     case INDEX_op_ld16s_i64:
1666     case INDEX_op_ld32u_i64:
1667     case INDEX_op_ld32s_i64:
1668     case INDEX_op_ld_i64:
1669     case INDEX_op_st8_i64:
1670     case INDEX_op_st16_i64:
1671     case INDEX_op_st32_i64:
1672     case INDEX_op_st_i64:
1673     case INDEX_op_add_i64:
1674     case INDEX_op_sub_i64:
1675     case INDEX_op_mul_i64:
1676     case INDEX_op_and_i64:
1677     case INDEX_op_or_i64:
1678     case INDEX_op_xor_i64:
1679     case INDEX_op_shl_i64:
1680     case INDEX_op_shr_i64:
1681     case INDEX_op_sar_i64:
1682     case INDEX_op_ext_i32_i64:
1683     case INDEX_op_extu_i32_i64:
1684         return TCG_TARGET_REG_BITS == 64;
1685 
1686     case INDEX_op_movcond_i64:
1687         return TCG_TARGET_HAS_movcond_i64;
1688     case INDEX_op_div_i64:
1689     case INDEX_op_divu_i64:
1690         return TCG_TARGET_HAS_div_i64;
1691     case INDEX_op_rem_i64:
1692     case INDEX_op_remu_i64:
1693         return TCG_TARGET_HAS_rem_i64;
1694     case INDEX_op_div2_i64:
1695     case INDEX_op_divu2_i64:
1696         return TCG_TARGET_HAS_div2_i64;
1697     case INDEX_op_rotl_i64:
1698     case INDEX_op_rotr_i64:
1699         return TCG_TARGET_HAS_rot_i64;
1700     case INDEX_op_deposit_i64:
1701         return TCG_TARGET_HAS_deposit_i64;
1702     case INDEX_op_extract_i64:
1703         return TCG_TARGET_HAS_extract_i64;
1704     case INDEX_op_sextract_i64:
1705         return TCG_TARGET_HAS_sextract_i64;
1706     case INDEX_op_extract2_i64:
1707         return TCG_TARGET_HAS_extract2_i64;
1708     case INDEX_op_extrl_i64_i32:
1709         return TCG_TARGET_HAS_extrl_i64_i32;
1710     case INDEX_op_extrh_i64_i32:
1711         return TCG_TARGET_HAS_extrh_i64_i32;
1712     case INDEX_op_ext8s_i64:
1713         return TCG_TARGET_HAS_ext8s_i64;
1714     case INDEX_op_ext16s_i64:
1715         return TCG_TARGET_HAS_ext16s_i64;
1716     case INDEX_op_ext32s_i64:
1717         return TCG_TARGET_HAS_ext32s_i64;
1718     case INDEX_op_ext8u_i64:
1719         return TCG_TARGET_HAS_ext8u_i64;
1720     case INDEX_op_ext16u_i64:
1721         return TCG_TARGET_HAS_ext16u_i64;
1722     case INDEX_op_ext32u_i64:
1723         return TCG_TARGET_HAS_ext32u_i64;
1724     case INDEX_op_bswap16_i64:
1725         return TCG_TARGET_HAS_bswap16_i64;
1726     case INDEX_op_bswap32_i64:
1727         return TCG_TARGET_HAS_bswap32_i64;
1728     case INDEX_op_bswap64_i64:
1729         return TCG_TARGET_HAS_bswap64_i64;
1730     case INDEX_op_not_i64:
1731         return TCG_TARGET_HAS_not_i64;
1732     case INDEX_op_neg_i64:
1733         return TCG_TARGET_HAS_neg_i64;
1734     case INDEX_op_andc_i64:
1735         return TCG_TARGET_HAS_andc_i64;
1736     case INDEX_op_orc_i64:
1737         return TCG_TARGET_HAS_orc_i64;
1738     case INDEX_op_eqv_i64:
1739         return TCG_TARGET_HAS_eqv_i64;
1740     case INDEX_op_nand_i64:
1741         return TCG_TARGET_HAS_nand_i64;
1742     case INDEX_op_nor_i64:
1743         return TCG_TARGET_HAS_nor_i64;
1744     case INDEX_op_clz_i64:
1745         return TCG_TARGET_HAS_clz_i64;
1746     case INDEX_op_ctz_i64:
1747         return TCG_TARGET_HAS_ctz_i64;
1748     case INDEX_op_ctpop_i64:
1749         return TCG_TARGET_HAS_ctpop_i64;
1750     case INDEX_op_add2_i64:
1751         return TCG_TARGET_HAS_add2_i64;
1752     case INDEX_op_sub2_i64:
1753         return TCG_TARGET_HAS_sub2_i64;
1754     case INDEX_op_mulu2_i64:
1755         return TCG_TARGET_HAS_mulu2_i64;
1756     case INDEX_op_muls2_i64:
1757         return TCG_TARGET_HAS_muls2_i64;
1758     case INDEX_op_muluh_i64:
1759         return TCG_TARGET_HAS_muluh_i64;
1760     case INDEX_op_mulsh_i64:
1761         return TCG_TARGET_HAS_mulsh_i64;
1762 
1763     case INDEX_op_mov_vec:
1764     case INDEX_op_dup_vec:
1765     case INDEX_op_dupm_vec:
1766     case INDEX_op_ld_vec:
1767     case INDEX_op_st_vec:
1768     case INDEX_op_add_vec:
1769     case INDEX_op_sub_vec:
1770     case INDEX_op_and_vec:
1771     case INDEX_op_or_vec:
1772     case INDEX_op_xor_vec:
1773     case INDEX_op_cmp_vec:
1774         return have_vec;
1775     case INDEX_op_dup2_vec:
1776         return have_vec && TCG_TARGET_REG_BITS == 32;
1777     case INDEX_op_not_vec:
1778         return have_vec && TCG_TARGET_HAS_not_vec;
1779     case INDEX_op_neg_vec:
1780         return have_vec && TCG_TARGET_HAS_neg_vec;
1781     case INDEX_op_abs_vec:
1782         return have_vec && TCG_TARGET_HAS_abs_vec;
1783     case INDEX_op_andc_vec:
1784         return have_vec && TCG_TARGET_HAS_andc_vec;
1785     case INDEX_op_orc_vec:
1786         return have_vec && TCG_TARGET_HAS_orc_vec;
1787     case INDEX_op_mul_vec:
1788         return have_vec && TCG_TARGET_HAS_mul_vec;
1789     case INDEX_op_shli_vec:
1790     case INDEX_op_shri_vec:
1791     case INDEX_op_sari_vec:
1792         return have_vec && TCG_TARGET_HAS_shi_vec;
1793     case INDEX_op_shls_vec:
1794     case INDEX_op_shrs_vec:
1795     case INDEX_op_sars_vec:
1796         return have_vec && TCG_TARGET_HAS_shs_vec;
1797     case INDEX_op_shlv_vec:
1798     case INDEX_op_shrv_vec:
1799     case INDEX_op_sarv_vec:
1800         return have_vec && TCG_TARGET_HAS_shv_vec;
1801     case INDEX_op_rotli_vec:
1802         return have_vec && TCG_TARGET_HAS_roti_vec;
1803     case INDEX_op_rotls_vec:
1804         return have_vec && TCG_TARGET_HAS_rots_vec;
1805     case INDEX_op_rotlv_vec:
1806     case INDEX_op_rotrv_vec:
1807         return have_vec && TCG_TARGET_HAS_rotv_vec;
1808     case INDEX_op_ssadd_vec:
1809     case INDEX_op_usadd_vec:
1810     case INDEX_op_sssub_vec:
1811     case INDEX_op_ussub_vec:
1812         return have_vec && TCG_TARGET_HAS_sat_vec;
1813     case INDEX_op_smin_vec:
1814     case INDEX_op_umin_vec:
1815     case INDEX_op_smax_vec:
1816     case INDEX_op_umax_vec:
1817         return have_vec && TCG_TARGET_HAS_minmax_vec;
1818     case INDEX_op_bitsel_vec:
1819         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1820     case INDEX_op_cmpsel_vec:
1821         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1822 
1823     default:
1824         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1825         return true;
1826     }
1827 }
1828 
1829 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1830    and endian swap. Maybe it would be better to do the alignment
1831    and endian swap in tcg_reg_alloc_call(). */
1832 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1833 {
1834     int i, real_args, nb_rets, pi;
1835     unsigned sizemask, flags;
1836     TCGHelperInfo *info;
1837     TCGOp *op;
1838 
1839     info = g_hash_table_lookup(helper_table, (gpointer)func);
1840     flags = info->flags;
1841     sizemask = info->sizemask;
1842 
1843 #ifdef CONFIG_PLUGIN
1844     /* detect non-plugin helpers */
1845     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1846         tcg_ctx->plugin_insn->calls_helpers = true;
1847     }
1848 #endif
1849 
1850 #if defined(__sparc__) && !defined(__arch64__) \
1851     && !defined(CONFIG_TCG_INTERPRETER)
1852     /* We have 64-bit values in one register, but need to pass as two
1853        separate parameters.  Split them.  */
1854     int orig_sizemask = sizemask;
1855     int orig_nargs = nargs;
1856     TCGv_i64 retl, reth;
1857     TCGTemp *split_args[MAX_OPC_PARAM];
1858 
1859     retl = NULL;
1860     reth = NULL;
1861     if (sizemask != 0) {
1862         for (i = real_args = 0; i < nargs; ++i) {
1863             int is_64bit = sizemask & (1 << (i+1)*2);
1864             if (is_64bit) {
1865                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1866                 TCGv_i32 h = tcg_temp_new_i32();
1867                 TCGv_i32 l = tcg_temp_new_i32();
1868                 tcg_gen_extr_i64_i32(l, h, orig);
1869                 split_args[real_args++] = tcgv_i32_temp(h);
1870                 split_args[real_args++] = tcgv_i32_temp(l);
1871             } else {
1872                 split_args[real_args++] = args[i];
1873             }
1874         }
1875         nargs = real_args;
1876         args = split_args;
1877         sizemask = 0;
1878     }
1879 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1880     for (i = 0; i < nargs; ++i) {
1881         int is_64bit = sizemask & (1 << (i+1)*2);
1882         int is_signed = sizemask & (2 << (i+1)*2);
1883         if (!is_64bit) {
1884             TCGv_i64 temp = tcg_temp_new_i64();
1885             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1886             if (is_signed) {
1887                 tcg_gen_ext32s_i64(temp, orig);
1888             } else {
1889                 tcg_gen_ext32u_i64(temp, orig);
1890             }
1891             args[i] = tcgv_i64_temp(temp);
1892         }
1893     }
1894 #endif /* TCG_TARGET_EXTEND_ARGS */
1895 
1896     op = tcg_emit_op(INDEX_op_call);
1897 
1898     pi = 0;
1899     if (ret != NULL) {
1900 #if defined(__sparc__) && !defined(__arch64__) \
1901     && !defined(CONFIG_TCG_INTERPRETER)
1902         if (orig_sizemask & 1) {
1903             /* The 32-bit ABI is going to return the 64-bit value in
1904                the %o0/%o1 register pair.  Prepare for this by using
1905                two return temporaries, and reassemble below.  */
1906             retl = tcg_temp_new_i64();
1907             reth = tcg_temp_new_i64();
1908             op->args[pi++] = tcgv_i64_arg(reth);
1909             op->args[pi++] = tcgv_i64_arg(retl);
1910             nb_rets = 2;
1911         } else {
1912             op->args[pi++] = temp_arg(ret);
1913             nb_rets = 1;
1914         }
1915 #else
1916         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1917 #ifdef HOST_WORDS_BIGENDIAN
1918             op->args[pi++] = temp_arg(ret + 1);
1919             op->args[pi++] = temp_arg(ret);
1920 #else
1921             op->args[pi++] = temp_arg(ret);
1922             op->args[pi++] = temp_arg(ret + 1);
1923 #endif
1924             nb_rets = 2;
1925         } else {
1926             op->args[pi++] = temp_arg(ret);
1927             nb_rets = 1;
1928         }
1929 #endif
1930     } else {
1931         nb_rets = 0;
1932     }
1933     TCGOP_CALLO(op) = nb_rets;
1934 
1935     real_args = 0;
1936     for (i = 0; i < nargs; i++) {
1937         int is_64bit = sizemask & (1 << (i+1)*2);
1938         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1939 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1940             /* some targets want aligned 64 bit args */
1941             if (real_args & 1) {
1942                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1943                 real_args++;
1944             }
1945 #endif
1946            /* If stack grows up, then we will be placing successive
1947               arguments at lower addresses, which means we need to
1948               reverse the order compared to how we would normally
1949               treat either big or little-endian.  For those arguments
1950               that will wind up in registers, this still works for
1951               HPPA (the only current STACK_GROWSUP target) since the
1952               argument registers are *also* allocated in decreasing
1953               order.  If another such target is added, this logic may
1954               have to get more complicated to differentiate between
1955               stack arguments and register arguments.  */
1956 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1957             op->args[pi++] = temp_arg(args[i] + 1);
1958             op->args[pi++] = temp_arg(args[i]);
1959 #else
1960             op->args[pi++] = temp_arg(args[i]);
1961             op->args[pi++] = temp_arg(args[i] + 1);
1962 #endif
1963             real_args += 2;
1964             continue;
1965         }
1966 
1967         op->args[pi++] = temp_arg(args[i]);
1968         real_args++;
1969     }
1970     op->args[pi++] = (uintptr_t)func;
1971     op->args[pi++] = flags;
1972     TCGOP_CALLI(op) = real_args;
1973 
1974     /* Make sure the fields didn't overflow.  */
1975     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1976     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1977 
1978 #if defined(__sparc__) && !defined(__arch64__) \
1979     && !defined(CONFIG_TCG_INTERPRETER)
1980     /* Free all of the parts we allocated above.  */
1981     for (i = real_args = 0; i < orig_nargs; ++i) {
1982         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1983         if (is_64bit) {
1984             tcg_temp_free_internal(args[real_args++]);
1985             tcg_temp_free_internal(args[real_args++]);
1986         } else {
1987             real_args++;
1988         }
1989     }
1990     if (orig_sizemask & 1) {
1991         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1992            Note that describing these as TCGv_i64 eliminates an unnecessary
1993            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1994         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1995         tcg_temp_free_i64(retl);
1996         tcg_temp_free_i64(reth);
1997     }
1998 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1999     for (i = 0; i < nargs; ++i) {
2000         int is_64bit = sizemask & (1 << (i+1)*2);
2001         if (!is_64bit) {
2002             tcg_temp_free_internal(args[i]);
2003         }
2004     }
2005 #endif /* TCG_TARGET_EXTEND_ARGS */
2006 }
2007 
2008 static void tcg_reg_alloc_start(TCGContext *s)
2009 {
2010     int i, n;
2011 
2012     for (i = 0, n = s->nb_temps; i < n; i++) {
2013         TCGTemp *ts = &s->temps[i];
2014         TCGTempVal val = TEMP_VAL_MEM;
2015 
2016         switch (ts->kind) {
2017         case TEMP_CONST:
2018             val = TEMP_VAL_CONST;
2019             break;
2020         case TEMP_FIXED:
2021             val = TEMP_VAL_REG;
2022             break;
2023         case TEMP_GLOBAL:
2024             break;
2025         case TEMP_NORMAL:
2026             val = TEMP_VAL_DEAD;
2027             /* fall through */
2028         case TEMP_LOCAL:
2029             ts->mem_allocated = 0;
2030             break;
2031         default:
2032             g_assert_not_reached();
2033         }
2034         ts->val_type = val;
2035     }
2036 
2037     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2038 }
2039 
2040 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2041                                  TCGTemp *ts)
2042 {
2043     int idx = temp_idx(ts);
2044 
2045     switch (ts->kind) {
2046     case TEMP_FIXED:
2047     case TEMP_GLOBAL:
2048         pstrcpy(buf, buf_size, ts->name);
2049         break;
2050     case TEMP_LOCAL:
2051         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2052         break;
2053     case TEMP_NORMAL:
2054         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2055         break;
2056     case TEMP_CONST:
2057         switch (ts->type) {
2058         case TCG_TYPE_I32:
2059             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2060             break;
2061 #if TCG_TARGET_REG_BITS > 32
2062         case TCG_TYPE_I64:
2063             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2064             break;
2065 #endif
2066         case TCG_TYPE_V64:
2067         case TCG_TYPE_V128:
2068         case TCG_TYPE_V256:
2069             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2070                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2071             break;
2072         default:
2073             g_assert_not_reached();
2074         }
2075         break;
2076     }
2077     return buf;
2078 }
2079 
2080 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2081                              int buf_size, TCGArg arg)
2082 {
2083     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2084 }
2085 
2086 /* Find helper name.  */
2087 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2088 {
2089     const char *ret = NULL;
2090     if (helper_table) {
2091         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2092         if (info) {
2093             ret = info->name;
2094         }
2095     }
2096     return ret;
2097 }
2098 
2099 static const char * const cond_name[] =
2100 {
2101     [TCG_COND_NEVER] = "never",
2102     [TCG_COND_ALWAYS] = "always",
2103     [TCG_COND_EQ] = "eq",
2104     [TCG_COND_NE] = "ne",
2105     [TCG_COND_LT] = "lt",
2106     [TCG_COND_GE] = "ge",
2107     [TCG_COND_LE] = "le",
2108     [TCG_COND_GT] = "gt",
2109     [TCG_COND_LTU] = "ltu",
2110     [TCG_COND_GEU] = "geu",
2111     [TCG_COND_LEU] = "leu",
2112     [TCG_COND_GTU] = "gtu"
2113 };
2114 
2115 static const char * const ldst_name[] =
2116 {
2117     [MO_UB]   = "ub",
2118     [MO_SB]   = "sb",
2119     [MO_LEUW] = "leuw",
2120     [MO_LESW] = "lesw",
2121     [MO_LEUL] = "leul",
2122     [MO_LESL] = "lesl",
2123     [MO_LEQ]  = "leq",
2124     [MO_BEUW] = "beuw",
2125     [MO_BESW] = "besw",
2126     [MO_BEUL] = "beul",
2127     [MO_BESL] = "besl",
2128     [MO_BEQ]  = "beq",
2129 };
2130 
2131 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2132 #ifdef TARGET_ALIGNED_ONLY
2133     [MO_UNALN >> MO_ASHIFT]    = "un+",
2134     [MO_ALIGN >> MO_ASHIFT]    = "",
2135 #else
2136     [MO_UNALN >> MO_ASHIFT]    = "",
2137     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2138 #endif
2139     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2140     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2141     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2142     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2143     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2144     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2145 };
2146 
2147 static inline bool tcg_regset_single(TCGRegSet d)
2148 {
2149     return (d & (d - 1)) == 0;
2150 }
2151 
2152 static inline TCGReg tcg_regset_first(TCGRegSet d)
2153 {
2154     if (TCG_TARGET_NB_REGS <= 32) {
2155         return ctz32(d);
2156     } else {
2157         return ctz64(d);
2158     }
2159 }
2160 
2161 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2162 {
2163     char buf[128];
2164     TCGOp *op;
2165 
2166     QTAILQ_FOREACH(op, &s->ops, link) {
2167         int i, k, nb_oargs, nb_iargs, nb_cargs;
2168         const TCGOpDef *def;
2169         TCGOpcode c;
2170         int col = 0;
2171 
2172         c = op->opc;
2173         def = &tcg_op_defs[c];
2174 
2175         if (c == INDEX_op_insn_start) {
2176             nb_oargs = 0;
2177             col += qemu_log("\n ----");
2178 
2179             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2180                 target_ulong a;
2181 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2182                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2183 #else
2184                 a = op->args[i];
2185 #endif
2186                 col += qemu_log(" " TARGET_FMT_lx, a);
2187             }
2188         } else if (c == INDEX_op_call) {
2189             /* variable number of arguments */
2190             nb_oargs = TCGOP_CALLO(op);
2191             nb_iargs = TCGOP_CALLI(op);
2192             nb_cargs = def->nb_cargs;
2193 
2194             /* function name, flags, out args */
2195             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2196                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2197                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2198             for (i = 0; i < nb_oargs; i++) {
2199                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2200                                                        op->args[i]));
2201             }
2202             for (i = 0; i < nb_iargs; i++) {
2203                 TCGArg arg = op->args[nb_oargs + i];
2204                 const char *t = "<dummy>";
2205                 if (arg != TCG_CALL_DUMMY_ARG) {
2206                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2207                 }
2208                 col += qemu_log(",%s", t);
2209             }
2210         } else {
2211             col += qemu_log(" %s ", def->name);
2212 
2213             nb_oargs = def->nb_oargs;
2214             nb_iargs = def->nb_iargs;
2215             nb_cargs = def->nb_cargs;
2216 
2217             if (def->flags & TCG_OPF_VECTOR) {
2218                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2219                                 8 << TCGOP_VECE(op));
2220             }
2221 
2222             k = 0;
2223             for (i = 0; i < nb_oargs; i++) {
2224                 if (k != 0) {
2225                     col += qemu_log(",");
2226                 }
2227                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2228                                                       op->args[k++]));
2229             }
2230             for (i = 0; i < nb_iargs; i++) {
2231                 if (k != 0) {
2232                     col += qemu_log(",");
2233                 }
2234                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2235                                                       op->args[k++]));
2236             }
2237             switch (c) {
2238             case INDEX_op_brcond_i32:
2239             case INDEX_op_setcond_i32:
2240             case INDEX_op_movcond_i32:
2241             case INDEX_op_brcond2_i32:
2242             case INDEX_op_setcond2_i32:
2243             case INDEX_op_brcond_i64:
2244             case INDEX_op_setcond_i64:
2245             case INDEX_op_movcond_i64:
2246             case INDEX_op_cmp_vec:
2247             case INDEX_op_cmpsel_vec:
2248                 if (op->args[k] < ARRAY_SIZE(cond_name)
2249                     && cond_name[op->args[k]]) {
2250                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2251                 } else {
2252                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2253                 }
2254                 i = 1;
2255                 break;
2256             case INDEX_op_qemu_ld_i32:
2257             case INDEX_op_qemu_st_i32:
2258             case INDEX_op_qemu_st8_i32:
2259             case INDEX_op_qemu_ld_i64:
2260             case INDEX_op_qemu_st_i64:
2261                 {
2262                     TCGMemOpIdx oi = op->args[k++];
2263                     MemOp op = get_memop(oi);
2264                     unsigned ix = get_mmuidx(oi);
2265 
2266                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2267                         col += qemu_log(",$0x%x,%u", op, ix);
2268                     } else {
2269                         const char *s_al, *s_op;
2270                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2271                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2272                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2273                     }
2274                     i = 1;
2275                 }
2276                 break;
2277             default:
2278                 i = 0;
2279                 break;
2280             }
2281             switch (c) {
2282             case INDEX_op_set_label:
2283             case INDEX_op_br:
2284             case INDEX_op_brcond_i32:
2285             case INDEX_op_brcond_i64:
2286             case INDEX_op_brcond2_i32:
2287                 col += qemu_log("%s$L%d", k ? "," : "",
2288                                 arg_label(op->args[k])->id);
2289                 i++, k++;
2290                 break;
2291             default:
2292                 break;
2293             }
2294             for (; i < nb_cargs; i++, k++) {
2295                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2296             }
2297         }
2298 
2299         if (have_prefs || op->life) {
2300 
2301             QemuLogFile *logfile;
2302 
2303             rcu_read_lock();
2304             logfile = qatomic_rcu_read(&qemu_logfile);
2305             if (logfile) {
2306                 for (; col < 40; ++col) {
2307                     putc(' ', logfile->fd);
2308                 }
2309             }
2310             rcu_read_unlock();
2311         }
2312 
2313         if (op->life) {
2314             unsigned life = op->life;
2315 
2316             if (life & (SYNC_ARG * 3)) {
2317                 qemu_log("  sync:");
2318                 for (i = 0; i < 2; ++i) {
2319                     if (life & (SYNC_ARG << i)) {
2320                         qemu_log(" %d", i);
2321                     }
2322                 }
2323             }
2324             life /= DEAD_ARG;
2325             if (life) {
2326                 qemu_log("  dead:");
2327                 for (i = 0; life; ++i, life >>= 1) {
2328                     if (life & 1) {
2329                         qemu_log(" %d", i);
2330                     }
2331                 }
2332             }
2333         }
2334 
2335         if (have_prefs) {
2336             for (i = 0; i < nb_oargs; ++i) {
2337                 TCGRegSet set = op->output_pref[i];
2338 
2339                 if (i == 0) {
2340                     qemu_log("  pref=");
2341                 } else {
2342                     qemu_log(",");
2343                 }
2344                 if (set == 0) {
2345                     qemu_log("none");
2346                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2347                     qemu_log("all");
2348 #ifdef CONFIG_DEBUG_TCG
2349                 } else if (tcg_regset_single(set)) {
2350                     TCGReg reg = tcg_regset_first(set);
2351                     qemu_log("%s", tcg_target_reg_names[reg]);
2352 #endif
2353                 } else if (TCG_TARGET_NB_REGS <= 32) {
2354                     qemu_log("%#x", (uint32_t)set);
2355                 } else {
2356                     qemu_log("%#" PRIx64, (uint64_t)set);
2357                 }
2358             }
2359         }
2360 
2361         qemu_log("\n");
2362     }
2363 }
2364 
2365 /* we give more priority to constraints with less registers */
2366 static int get_constraint_priority(const TCGOpDef *def, int k)
2367 {
2368     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2369     int n;
2370 
2371     if (arg_ct->oalias) {
2372         /* an alias is equivalent to a single register */
2373         n = 1;
2374     } else {
2375         n = ctpop64(arg_ct->regs);
2376     }
2377     return TCG_TARGET_NB_REGS - n + 1;
2378 }
2379 
2380 /* sort from highest priority to lowest */
2381 static void sort_constraints(TCGOpDef *def, int start, int n)
2382 {
2383     int i, j;
2384     TCGArgConstraint *a = def->args_ct;
2385 
2386     for (i = 0; i < n; i++) {
2387         a[start + i].sort_index = start + i;
2388     }
2389     if (n <= 1) {
2390         return;
2391     }
2392     for (i = 0; i < n - 1; i++) {
2393         for (j = i + 1; j < n; j++) {
2394             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2395             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2396             if (p1 < p2) {
2397                 int tmp = a[start + i].sort_index;
2398                 a[start + i].sort_index = a[start + j].sort_index;
2399                 a[start + j].sort_index = tmp;
2400             }
2401         }
2402     }
2403 }
2404 
2405 static void process_op_defs(TCGContext *s)
2406 {
2407     TCGOpcode op;
2408 
2409     for (op = 0; op < NB_OPS; op++) {
2410         TCGOpDef *def = &tcg_op_defs[op];
2411         const TCGTargetOpDef *tdefs;
2412         TCGType type;
2413         int i, nb_args;
2414 
2415         if (def->flags & TCG_OPF_NOT_PRESENT) {
2416             continue;
2417         }
2418 
2419         nb_args = def->nb_iargs + def->nb_oargs;
2420         if (nb_args == 0) {
2421             continue;
2422         }
2423 
2424         tdefs = tcg_target_op_def(op);
2425         /* Missing TCGTargetOpDef entry. */
2426         tcg_debug_assert(tdefs != NULL);
2427 
2428         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2429         for (i = 0; i < nb_args; i++) {
2430             const char *ct_str = tdefs->args_ct_str[i];
2431             /* Incomplete TCGTargetOpDef entry. */
2432             tcg_debug_assert(ct_str != NULL);
2433 
2434             while (*ct_str != '\0') {
2435                 switch(*ct_str) {
2436                 case '0' ... '9':
2437                     {
2438                         int oarg = *ct_str - '0';
2439                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2440                         tcg_debug_assert(oarg < def->nb_oargs);
2441                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2442                         def->args_ct[i] = def->args_ct[oarg];
2443                         /* The output sets oalias.  */
2444                         def->args_ct[oarg].oalias = true;
2445                         def->args_ct[oarg].alias_index = i;
2446                         /* The input sets ialias. */
2447                         def->args_ct[i].ialias = true;
2448                         def->args_ct[i].alias_index = oarg;
2449                     }
2450                     ct_str++;
2451                     break;
2452                 case '&':
2453                     def->args_ct[i].newreg = true;
2454                     ct_str++;
2455                     break;
2456                 case 'i':
2457                     def->args_ct[i].ct |= TCG_CT_CONST;
2458                     ct_str++;
2459                     break;
2460                 default:
2461                     ct_str = target_parse_constraint(&def->args_ct[i],
2462                                                      ct_str, type);
2463                     /* Typo in TCGTargetOpDef constraint. */
2464                     tcg_debug_assert(ct_str != NULL);
2465                 }
2466             }
2467         }
2468 
2469         /* TCGTargetOpDef entry with too much information? */
2470         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2471 
2472         /* sort the constraints (XXX: this is just an heuristic) */
2473         sort_constraints(def, 0, def->nb_oargs);
2474         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2475     }
2476 }
2477 
2478 void tcg_op_remove(TCGContext *s, TCGOp *op)
2479 {
2480     TCGLabel *label;
2481 
2482     switch (op->opc) {
2483     case INDEX_op_br:
2484         label = arg_label(op->args[0]);
2485         label->refs--;
2486         break;
2487     case INDEX_op_brcond_i32:
2488     case INDEX_op_brcond_i64:
2489         label = arg_label(op->args[3]);
2490         label->refs--;
2491         break;
2492     case INDEX_op_brcond2_i32:
2493         label = arg_label(op->args[5]);
2494         label->refs--;
2495         break;
2496     default:
2497         break;
2498     }
2499 
2500     QTAILQ_REMOVE(&s->ops, op, link);
2501     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2502     s->nb_ops--;
2503 
2504 #ifdef CONFIG_PROFILER
2505     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2506 #endif
2507 }
2508 
2509 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2510 {
2511     TCGContext *s = tcg_ctx;
2512     TCGOp *op;
2513 
2514     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2515         op = tcg_malloc(sizeof(TCGOp));
2516     } else {
2517         op = QTAILQ_FIRST(&s->free_ops);
2518         QTAILQ_REMOVE(&s->free_ops, op, link);
2519     }
2520     memset(op, 0, offsetof(TCGOp, link));
2521     op->opc = opc;
2522     s->nb_ops++;
2523 
2524     return op;
2525 }
2526 
2527 TCGOp *tcg_emit_op(TCGOpcode opc)
2528 {
2529     TCGOp *op = tcg_op_alloc(opc);
2530     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2531     return op;
2532 }
2533 
2534 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2535 {
2536     TCGOp *new_op = tcg_op_alloc(opc);
2537     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2538     return new_op;
2539 }
2540 
2541 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2542 {
2543     TCGOp *new_op = tcg_op_alloc(opc);
2544     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2545     return new_op;
2546 }
2547 
2548 /* Reachable analysis : remove unreachable code.  */
2549 static void reachable_code_pass(TCGContext *s)
2550 {
2551     TCGOp *op, *op_next;
2552     bool dead = false;
2553 
2554     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2555         bool remove = dead;
2556         TCGLabel *label;
2557         int call_flags;
2558 
2559         switch (op->opc) {
2560         case INDEX_op_set_label:
2561             label = arg_label(op->args[0]);
2562             if (label->refs == 0) {
2563                 /*
2564                  * While there is an occasional backward branch, virtually
2565                  * all branches generated by the translators are forward.
2566                  * Which means that generally we will have already removed
2567                  * all references to the label that will be, and there is
2568                  * little to be gained by iterating.
2569                  */
2570                 remove = true;
2571             } else {
2572                 /* Once we see a label, insns become live again.  */
2573                 dead = false;
2574                 remove = false;
2575 
2576                 /*
2577                  * Optimization can fold conditional branches to unconditional.
2578                  * If we find a label with one reference which is preceded by
2579                  * an unconditional branch to it, remove both.  This needed to
2580                  * wait until the dead code in between them was removed.
2581                  */
2582                 if (label->refs == 1) {
2583                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2584                     if (op_prev->opc == INDEX_op_br &&
2585                         label == arg_label(op_prev->args[0])) {
2586                         tcg_op_remove(s, op_prev);
2587                         remove = true;
2588                     }
2589                 }
2590             }
2591             break;
2592 
2593         case INDEX_op_br:
2594         case INDEX_op_exit_tb:
2595         case INDEX_op_goto_ptr:
2596             /* Unconditional branches; everything following is dead.  */
2597             dead = true;
2598             break;
2599 
2600         case INDEX_op_call:
2601             /* Notice noreturn helper calls, raising exceptions.  */
2602             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2603             if (call_flags & TCG_CALL_NO_RETURN) {
2604                 dead = true;
2605             }
2606             break;
2607 
2608         case INDEX_op_insn_start:
2609             /* Never remove -- we need to keep these for unwind.  */
2610             remove = false;
2611             break;
2612 
2613         default:
2614             break;
2615         }
2616 
2617         if (remove) {
2618             tcg_op_remove(s, op);
2619         }
2620     }
2621 }
2622 
2623 #define TS_DEAD  1
2624 #define TS_MEM   2
2625 
2626 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2627 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2628 
2629 /* For liveness_pass_1, the register preferences for a given temp.  */
2630 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2631 {
2632     return ts->state_ptr;
2633 }
2634 
2635 /* For liveness_pass_1, reset the preferences for a given temp to the
2636  * maximal regset for its type.
2637  */
2638 static inline void la_reset_pref(TCGTemp *ts)
2639 {
2640     *la_temp_pref(ts)
2641         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2642 }
2643 
2644 /* liveness analysis: end of function: all temps are dead, and globals
2645    should be in memory. */
2646 static void la_func_end(TCGContext *s, int ng, int nt)
2647 {
2648     int i;
2649 
2650     for (i = 0; i < ng; ++i) {
2651         s->temps[i].state = TS_DEAD | TS_MEM;
2652         la_reset_pref(&s->temps[i]);
2653     }
2654     for (i = ng; i < nt; ++i) {
2655         s->temps[i].state = TS_DEAD;
2656         la_reset_pref(&s->temps[i]);
2657     }
2658 }
2659 
2660 /* liveness analysis: end of basic block: all temps are dead, globals
2661    and local temps should be in memory. */
2662 static void la_bb_end(TCGContext *s, int ng, int nt)
2663 {
2664     int i;
2665 
2666     for (i = 0; i < nt; ++i) {
2667         TCGTemp *ts = &s->temps[i];
2668         int state;
2669 
2670         switch (ts->kind) {
2671         case TEMP_FIXED:
2672         case TEMP_GLOBAL:
2673         case TEMP_LOCAL:
2674             state = TS_DEAD | TS_MEM;
2675             break;
2676         case TEMP_NORMAL:
2677         case TEMP_CONST:
2678             state = TS_DEAD;
2679             break;
2680         default:
2681             g_assert_not_reached();
2682         }
2683         ts->state = state;
2684         la_reset_pref(ts);
2685     }
2686 }
2687 
2688 /* liveness analysis: sync globals back to memory.  */
2689 static void la_global_sync(TCGContext *s, int ng)
2690 {
2691     int i;
2692 
2693     for (i = 0; i < ng; ++i) {
2694         int state = s->temps[i].state;
2695         s->temps[i].state = state | TS_MEM;
2696         if (state == TS_DEAD) {
2697             /* If the global was previously dead, reset prefs.  */
2698             la_reset_pref(&s->temps[i]);
2699         }
2700     }
2701 }
2702 
2703 /*
2704  * liveness analysis: conditional branch: all temps are dead,
2705  * globals and local temps should be synced.
2706  */
2707 static void la_bb_sync(TCGContext *s, int ng, int nt)
2708 {
2709     la_global_sync(s, ng);
2710 
2711     for (int i = ng; i < nt; ++i) {
2712         TCGTemp *ts = &s->temps[i];
2713         int state;
2714 
2715         switch (ts->kind) {
2716         case TEMP_LOCAL:
2717             state = ts->state;
2718             ts->state = state | TS_MEM;
2719             if (state != TS_DEAD) {
2720                 continue;
2721             }
2722             break;
2723         case TEMP_NORMAL:
2724             s->temps[i].state = TS_DEAD;
2725             break;
2726         case TEMP_CONST:
2727             continue;
2728         default:
2729             g_assert_not_reached();
2730         }
2731         la_reset_pref(&s->temps[i]);
2732     }
2733 }
2734 
2735 /* liveness analysis: sync globals back to memory and kill.  */
2736 static void la_global_kill(TCGContext *s, int ng)
2737 {
2738     int i;
2739 
2740     for (i = 0; i < ng; i++) {
2741         s->temps[i].state = TS_DEAD | TS_MEM;
2742         la_reset_pref(&s->temps[i]);
2743     }
2744 }
2745 
2746 /* liveness analysis: note live globals crossing calls.  */
2747 static void la_cross_call(TCGContext *s, int nt)
2748 {
2749     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2750     int i;
2751 
2752     for (i = 0; i < nt; i++) {
2753         TCGTemp *ts = &s->temps[i];
2754         if (!(ts->state & TS_DEAD)) {
2755             TCGRegSet *pset = la_temp_pref(ts);
2756             TCGRegSet set = *pset;
2757 
2758             set &= mask;
2759             /* If the combination is not possible, restart.  */
2760             if (set == 0) {
2761                 set = tcg_target_available_regs[ts->type] & mask;
2762             }
2763             *pset = set;
2764         }
2765     }
2766 }
2767 
2768 /* Liveness analysis : update the opc_arg_life array to tell if a
2769    given input arguments is dead. Instructions updating dead
2770    temporaries are removed. */
2771 static void liveness_pass_1(TCGContext *s)
2772 {
2773     int nb_globals = s->nb_globals;
2774     int nb_temps = s->nb_temps;
2775     TCGOp *op, *op_prev;
2776     TCGRegSet *prefs;
2777     int i;
2778 
2779     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2780     for (i = 0; i < nb_temps; ++i) {
2781         s->temps[i].state_ptr = prefs + i;
2782     }
2783 
2784     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2785     la_func_end(s, nb_globals, nb_temps);
2786 
2787     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2788         int nb_iargs, nb_oargs;
2789         TCGOpcode opc_new, opc_new2;
2790         bool have_opc_new2;
2791         TCGLifeData arg_life = 0;
2792         TCGTemp *ts;
2793         TCGOpcode opc = op->opc;
2794         const TCGOpDef *def = &tcg_op_defs[opc];
2795 
2796         switch (opc) {
2797         case INDEX_op_call:
2798             {
2799                 int call_flags;
2800                 int nb_call_regs;
2801 
2802                 nb_oargs = TCGOP_CALLO(op);
2803                 nb_iargs = TCGOP_CALLI(op);
2804                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2805 
2806                 /* pure functions can be removed if their result is unused */
2807                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2808                     for (i = 0; i < nb_oargs; i++) {
2809                         ts = arg_temp(op->args[i]);
2810                         if (ts->state != TS_DEAD) {
2811                             goto do_not_remove_call;
2812                         }
2813                     }
2814                     goto do_remove;
2815                 }
2816             do_not_remove_call:
2817 
2818                 /* Output args are dead.  */
2819                 for (i = 0; i < nb_oargs; i++) {
2820                     ts = arg_temp(op->args[i]);
2821                     if (ts->state & TS_DEAD) {
2822                         arg_life |= DEAD_ARG << i;
2823                     }
2824                     if (ts->state & TS_MEM) {
2825                         arg_life |= SYNC_ARG << i;
2826                     }
2827                     ts->state = TS_DEAD;
2828                     la_reset_pref(ts);
2829 
2830                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2831                     op->output_pref[i] = 0;
2832                 }
2833 
2834                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2835                                     TCG_CALL_NO_READ_GLOBALS))) {
2836                     la_global_kill(s, nb_globals);
2837                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2838                     la_global_sync(s, nb_globals);
2839                 }
2840 
2841                 /* Record arguments that die in this helper.  */
2842                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2843                     ts = arg_temp(op->args[i]);
2844                     if (ts && ts->state & TS_DEAD) {
2845                         arg_life |= DEAD_ARG << i;
2846                     }
2847                 }
2848 
2849                 /* For all live registers, remove call-clobbered prefs.  */
2850                 la_cross_call(s, nb_temps);
2851 
2852                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2853 
2854                 /* Input arguments are live for preceding opcodes.  */
2855                 for (i = 0; i < nb_iargs; i++) {
2856                     ts = arg_temp(op->args[i + nb_oargs]);
2857                     if (ts && ts->state & TS_DEAD) {
2858                         /* For those arguments that die, and will be allocated
2859                          * in registers, clear the register set for that arg,
2860                          * to be filled in below.  For args that will be on
2861                          * the stack, reset to any available reg.
2862                          */
2863                         *la_temp_pref(ts)
2864                             = (i < nb_call_regs ? 0 :
2865                                tcg_target_available_regs[ts->type]);
2866                         ts->state &= ~TS_DEAD;
2867                     }
2868                 }
2869 
2870                 /* For each input argument, add its input register to prefs.
2871                    If a temp is used once, this produces a single set bit.  */
2872                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2873                     ts = arg_temp(op->args[i + nb_oargs]);
2874                     if (ts) {
2875                         tcg_regset_set_reg(*la_temp_pref(ts),
2876                                            tcg_target_call_iarg_regs[i]);
2877                     }
2878                 }
2879             }
2880             break;
2881         case INDEX_op_insn_start:
2882             break;
2883         case INDEX_op_discard:
2884             /* mark the temporary as dead */
2885             ts = arg_temp(op->args[0]);
2886             ts->state = TS_DEAD;
2887             la_reset_pref(ts);
2888             break;
2889 
2890         case INDEX_op_add2_i32:
2891             opc_new = INDEX_op_add_i32;
2892             goto do_addsub2;
2893         case INDEX_op_sub2_i32:
2894             opc_new = INDEX_op_sub_i32;
2895             goto do_addsub2;
2896         case INDEX_op_add2_i64:
2897             opc_new = INDEX_op_add_i64;
2898             goto do_addsub2;
2899         case INDEX_op_sub2_i64:
2900             opc_new = INDEX_op_sub_i64;
2901         do_addsub2:
2902             nb_iargs = 4;
2903             nb_oargs = 2;
2904             /* Test if the high part of the operation is dead, but not
2905                the low part.  The result can be optimized to a simple
2906                add or sub.  This happens often for x86_64 guest when the
2907                cpu mode is set to 32 bit.  */
2908             if (arg_temp(op->args[1])->state == TS_DEAD) {
2909                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2910                     goto do_remove;
2911                 }
2912                 /* Replace the opcode and adjust the args in place,
2913                    leaving 3 unused args at the end.  */
2914                 op->opc = opc = opc_new;
2915                 op->args[1] = op->args[2];
2916                 op->args[2] = op->args[4];
2917                 /* Fall through and mark the single-word operation live.  */
2918                 nb_iargs = 2;
2919                 nb_oargs = 1;
2920             }
2921             goto do_not_remove;
2922 
2923         case INDEX_op_mulu2_i32:
2924             opc_new = INDEX_op_mul_i32;
2925             opc_new2 = INDEX_op_muluh_i32;
2926             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2927             goto do_mul2;
2928         case INDEX_op_muls2_i32:
2929             opc_new = INDEX_op_mul_i32;
2930             opc_new2 = INDEX_op_mulsh_i32;
2931             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2932             goto do_mul2;
2933         case INDEX_op_mulu2_i64:
2934             opc_new = INDEX_op_mul_i64;
2935             opc_new2 = INDEX_op_muluh_i64;
2936             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2937             goto do_mul2;
2938         case INDEX_op_muls2_i64:
2939             opc_new = INDEX_op_mul_i64;
2940             opc_new2 = INDEX_op_mulsh_i64;
2941             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2942             goto do_mul2;
2943         do_mul2:
2944             nb_iargs = 2;
2945             nb_oargs = 2;
2946             if (arg_temp(op->args[1])->state == TS_DEAD) {
2947                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2948                     /* Both parts of the operation are dead.  */
2949                     goto do_remove;
2950                 }
2951                 /* The high part of the operation is dead; generate the low. */
2952                 op->opc = opc = opc_new;
2953                 op->args[1] = op->args[2];
2954                 op->args[2] = op->args[3];
2955             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2956                 /* The low part of the operation is dead; generate the high. */
2957                 op->opc = opc = opc_new2;
2958                 op->args[0] = op->args[1];
2959                 op->args[1] = op->args[2];
2960                 op->args[2] = op->args[3];
2961             } else {
2962                 goto do_not_remove;
2963             }
2964             /* Mark the single-word operation live.  */
2965             nb_oargs = 1;
2966             goto do_not_remove;
2967 
2968         default:
2969             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2970             nb_iargs = def->nb_iargs;
2971             nb_oargs = def->nb_oargs;
2972 
2973             /* Test if the operation can be removed because all
2974                its outputs are dead. We assume that nb_oargs == 0
2975                implies side effects */
2976             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2977                 for (i = 0; i < nb_oargs; i++) {
2978                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2979                         goto do_not_remove;
2980                     }
2981                 }
2982                 goto do_remove;
2983             }
2984             goto do_not_remove;
2985 
2986         do_remove:
2987             tcg_op_remove(s, op);
2988             break;
2989 
2990         do_not_remove:
2991             for (i = 0; i < nb_oargs; i++) {
2992                 ts = arg_temp(op->args[i]);
2993 
2994                 /* Remember the preference of the uses that followed.  */
2995                 op->output_pref[i] = *la_temp_pref(ts);
2996 
2997                 /* Output args are dead.  */
2998                 if (ts->state & TS_DEAD) {
2999                     arg_life |= DEAD_ARG << i;
3000                 }
3001                 if (ts->state & TS_MEM) {
3002                     arg_life |= SYNC_ARG << i;
3003                 }
3004                 ts->state = TS_DEAD;
3005                 la_reset_pref(ts);
3006             }
3007 
3008             /* If end of basic block, update.  */
3009             if (def->flags & TCG_OPF_BB_EXIT) {
3010                 la_func_end(s, nb_globals, nb_temps);
3011             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3012                 la_bb_sync(s, nb_globals, nb_temps);
3013             } else if (def->flags & TCG_OPF_BB_END) {
3014                 la_bb_end(s, nb_globals, nb_temps);
3015             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3016                 la_global_sync(s, nb_globals);
3017                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3018                     la_cross_call(s, nb_temps);
3019                 }
3020             }
3021 
3022             /* Record arguments that die in this opcode.  */
3023             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3024                 ts = arg_temp(op->args[i]);
3025                 if (ts->state & TS_DEAD) {
3026                     arg_life |= DEAD_ARG << i;
3027                 }
3028             }
3029 
3030             /* Input arguments are live for preceding opcodes.  */
3031             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3032                 ts = arg_temp(op->args[i]);
3033                 if (ts->state & TS_DEAD) {
3034                     /* For operands that were dead, initially allow
3035                        all regs for the type.  */
3036                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3037                     ts->state &= ~TS_DEAD;
3038                 }
3039             }
3040 
3041             /* Incorporate constraints for this operand.  */
3042             switch (opc) {
3043             case INDEX_op_mov_i32:
3044             case INDEX_op_mov_i64:
3045                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3046                    have proper constraints.  That said, special case
3047                    moves to propagate preferences backward.  */
3048                 if (IS_DEAD_ARG(1)) {
3049                     *la_temp_pref(arg_temp(op->args[0]))
3050                         = *la_temp_pref(arg_temp(op->args[1]));
3051                 }
3052                 break;
3053 
3054             default:
3055                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3056                     const TCGArgConstraint *ct = &def->args_ct[i];
3057                     TCGRegSet set, *pset;
3058 
3059                     ts = arg_temp(op->args[i]);
3060                     pset = la_temp_pref(ts);
3061                     set = *pset;
3062 
3063                     set &= ct->regs;
3064                     if (ct->ialias) {
3065                         set &= op->output_pref[ct->alias_index];
3066                     }
3067                     /* If the combination is not possible, restart.  */
3068                     if (set == 0) {
3069                         set = ct->regs;
3070                     }
3071                     *pset = set;
3072                 }
3073                 break;
3074             }
3075             break;
3076         }
3077         op->life = arg_life;
3078     }
3079 }
3080 
3081 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3082 static bool liveness_pass_2(TCGContext *s)
3083 {
3084     int nb_globals = s->nb_globals;
3085     int nb_temps, i;
3086     bool changes = false;
3087     TCGOp *op, *op_next;
3088 
3089     /* Create a temporary for each indirect global.  */
3090     for (i = 0; i < nb_globals; ++i) {
3091         TCGTemp *its = &s->temps[i];
3092         if (its->indirect_reg) {
3093             TCGTemp *dts = tcg_temp_alloc(s);
3094             dts->type = its->type;
3095             dts->base_type = its->base_type;
3096             its->state_ptr = dts;
3097         } else {
3098             its->state_ptr = NULL;
3099         }
3100         /* All globals begin dead.  */
3101         its->state = TS_DEAD;
3102     }
3103     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3104         TCGTemp *its = &s->temps[i];
3105         its->state_ptr = NULL;
3106         its->state = TS_DEAD;
3107     }
3108 
3109     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3110         TCGOpcode opc = op->opc;
3111         const TCGOpDef *def = &tcg_op_defs[opc];
3112         TCGLifeData arg_life = op->life;
3113         int nb_iargs, nb_oargs, call_flags;
3114         TCGTemp *arg_ts, *dir_ts;
3115 
3116         if (opc == INDEX_op_call) {
3117             nb_oargs = TCGOP_CALLO(op);
3118             nb_iargs = TCGOP_CALLI(op);
3119             call_flags = op->args[nb_oargs + nb_iargs + 1];
3120         } else {
3121             nb_iargs = def->nb_iargs;
3122             nb_oargs = def->nb_oargs;
3123 
3124             /* Set flags similar to how calls require.  */
3125             if (def->flags & TCG_OPF_COND_BRANCH) {
3126                 /* Like reading globals: sync_globals */
3127                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3128             } else if (def->flags & TCG_OPF_BB_END) {
3129                 /* Like writing globals: save_globals */
3130                 call_flags = 0;
3131             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3132                 /* Like reading globals: sync_globals */
3133                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3134             } else {
3135                 /* No effect on globals.  */
3136                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3137                               TCG_CALL_NO_WRITE_GLOBALS);
3138             }
3139         }
3140 
3141         /* Make sure that input arguments are available.  */
3142         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3143             arg_ts = arg_temp(op->args[i]);
3144             if (arg_ts) {
3145                 dir_ts = arg_ts->state_ptr;
3146                 if (dir_ts && arg_ts->state == TS_DEAD) {
3147                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3148                                       ? INDEX_op_ld_i32
3149                                       : INDEX_op_ld_i64);
3150                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3151 
3152                     lop->args[0] = temp_arg(dir_ts);
3153                     lop->args[1] = temp_arg(arg_ts->mem_base);
3154                     lop->args[2] = arg_ts->mem_offset;
3155 
3156                     /* Loaded, but synced with memory.  */
3157                     arg_ts->state = TS_MEM;
3158                 }
3159             }
3160         }
3161 
3162         /* Perform input replacement, and mark inputs that became dead.
3163            No action is required except keeping temp_state up to date
3164            so that we reload when needed.  */
3165         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3166             arg_ts = arg_temp(op->args[i]);
3167             if (arg_ts) {
3168                 dir_ts = arg_ts->state_ptr;
3169                 if (dir_ts) {
3170                     op->args[i] = temp_arg(dir_ts);
3171                     changes = true;
3172                     if (IS_DEAD_ARG(i)) {
3173                         arg_ts->state = TS_DEAD;
3174                     }
3175                 }
3176             }
3177         }
3178 
3179         /* Liveness analysis should ensure that the following are
3180            all correct, for call sites and basic block end points.  */
3181         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3182             /* Nothing to do */
3183         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3184             for (i = 0; i < nb_globals; ++i) {
3185                 /* Liveness should see that globals are synced back,
3186                    that is, either TS_DEAD or TS_MEM.  */
3187                 arg_ts = &s->temps[i];
3188                 tcg_debug_assert(arg_ts->state_ptr == 0
3189                                  || arg_ts->state != 0);
3190             }
3191         } else {
3192             for (i = 0; i < nb_globals; ++i) {
3193                 /* Liveness should see that globals are saved back,
3194                    that is, TS_DEAD, waiting to be reloaded.  */
3195                 arg_ts = &s->temps[i];
3196                 tcg_debug_assert(arg_ts->state_ptr == 0
3197                                  || arg_ts->state == TS_DEAD);
3198             }
3199         }
3200 
3201         /* Outputs become available.  */
3202         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3203             arg_ts = arg_temp(op->args[0]);
3204             dir_ts = arg_ts->state_ptr;
3205             if (dir_ts) {
3206                 op->args[0] = temp_arg(dir_ts);
3207                 changes = true;
3208 
3209                 /* The output is now live and modified.  */
3210                 arg_ts->state = 0;
3211 
3212                 if (NEED_SYNC_ARG(0)) {
3213                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3214                                       ? INDEX_op_st_i32
3215                                       : INDEX_op_st_i64);
3216                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3217                     TCGTemp *out_ts = dir_ts;
3218 
3219                     if (IS_DEAD_ARG(0)) {
3220                         out_ts = arg_temp(op->args[1]);
3221                         arg_ts->state = TS_DEAD;
3222                         tcg_op_remove(s, op);
3223                     } else {
3224                         arg_ts->state = TS_MEM;
3225                     }
3226 
3227                     sop->args[0] = temp_arg(out_ts);
3228                     sop->args[1] = temp_arg(arg_ts->mem_base);
3229                     sop->args[2] = arg_ts->mem_offset;
3230                 } else {
3231                     tcg_debug_assert(!IS_DEAD_ARG(0));
3232                 }
3233             }
3234         } else {
3235             for (i = 0; i < nb_oargs; i++) {
3236                 arg_ts = arg_temp(op->args[i]);
3237                 dir_ts = arg_ts->state_ptr;
3238                 if (!dir_ts) {
3239                     continue;
3240                 }
3241                 op->args[i] = temp_arg(dir_ts);
3242                 changes = true;
3243 
3244                 /* The output is now live and modified.  */
3245                 arg_ts->state = 0;
3246 
3247                 /* Sync outputs upon their last write.  */
3248                 if (NEED_SYNC_ARG(i)) {
3249                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3250                                       ? INDEX_op_st_i32
3251                                       : INDEX_op_st_i64);
3252                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3253 
3254                     sop->args[0] = temp_arg(dir_ts);
3255                     sop->args[1] = temp_arg(arg_ts->mem_base);
3256                     sop->args[2] = arg_ts->mem_offset;
3257 
3258                     arg_ts->state = TS_MEM;
3259                 }
3260                 /* Drop outputs that are dead.  */
3261                 if (IS_DEAD_ARG(i)) {
3262                     arg_ts->state = TS_DEAD;
3263                 }
3264             }
3265         }
3266     }
3267 
3268     return changes;
3269 }
3270 
3271 #ifdef CONFIG_DEBUG_TCG
3272 static void dump_regs(TCGContext *s)
3273 {
3274     TCGTemp *ts;
3275     int i;
3276     char buf[64];
3277 
3278     for(i = 0; i < s->nb_temps; i++) {
3279         ts = &s->temps[i];
3280         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3281         switch(ts->val_type) {
3282         case TEMP_VAL_REG:
3283             printf("%s", tcg_target_reg_names[ts->reg]);
3284             break;
3285         case TEMP_VAL_MEM:
3286             printf("%d(%s)", (int)ts->mem_offset,
3287                    tcg_target_reg_names[ts->mem_base->reg]);
3288             break;
3289         case TEMP_VAL_CONST:
3290             printf("$0x%" PRIx64, ts->val);
3291             break;
3292         case TEMP_VAL_DEAD:
3293             printf("D");
3294             break;
3295         default:
3296             printf("???");
3297             break;
3298         }
3299         printf("\n");
3300     }
3301 
3302     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3303         if (s->reg_to_temp[i] != NULL) {
3304             printf("%s: %s\n",
3305                    tcg_target_reg_names[i],
3306                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3307         }
3308     }
3309 }
3310 
3311 static void check_regs(TCGContext *s)
3312 {
3313     int reg;
3314     int k;
3315     TCGTemp *ts;
3316     char buf[64];
3317 
3318     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3319         ts = s->reg_to_temp[reg];
3320         if (ts != NULL) {
3321             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3322                 printf("Inconsistency for register %s:\n",
3323                        tcg_target_reg_names[reg]);
3324                 goto fail;
3325             }
3326         }
3327     }
3328     for (k = 0; k < s->nb_temps; k++) {
3329         ts = &s->temps[k];
3330         if (ts->val_type == TEMP_VAL_REG
3331             && ts->kind != TEMP_FIXED
3332             && s->reg_to_temp[ts->reg] != ts) {
3333             printf("Inconsistency for temp %s:\n",
3334                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3335         fail:
3336             printf("reg state:\n");
3337             dump_regs(s);
3338             tcg_abort();
3339         }
3340     }
3341 }
3342 #endif
3343 
3344 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3345 {
3346 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3347     /* Sparc64 stack is accessed with offset of 2047 */
3348     s->current_frame_offset = (s->current_frame_offset +
3349                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3350         ~(sizeof(tcg_target_long) - 1);
3351 #endif
3352     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3353         s->frame_end) {
3354         tcg_abort();
3355     }
3356     ts->mem_offset = s->current_frame_offset;
3357     ts->mem_base = s->frame_temp;
3358     ts->mem_allocated = 1;
3359     s->current_frame_offset += sizeof(tcg_target_long);
3360 }
3361 
3362 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3363 
3364 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3365    mark it free; otherwise mark it dead.  */
3366 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3367 {
3368     TCGTempVal new_type;
3369 
3370     switch (ts->kind) {
3371     case TEMP_FIXED:
3372         return;
3373     case TEMP_GLOBAL:
3374     case TEMP_LOCAL:
3375         new_type = TEMP_VAL_MEM;
3376         break;
3377     case TEMP_NORMAL:
3378         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3379         break;
3380     case TEMP_CONST:
3381         new_type = TEMP_VAL_CONST;
3382         break;
3383     default:
3384         g_assert_not_reached();
3385     }
3386     if (ts->val_type == TEMP_VAL_REG) {
3387         s->reg_to_temp[ts->reg] = NULL;
3388     }
3389     ts->val_type = new_type;
3390 }
3391 
3392 /* Mark a temporary as dead.  */
3393 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3394 {
3395     temp_free_or_dead(s, ts, 1);
3396 }
3397 
3398 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3399    registers needs to be allocated to store a constant.  If 'free_or_dead'
3400    is non-zero, subsequently release the temporary; if it is positive, the
3401    temp is dead; if it is negative, the temp is free.  */
3402 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3403                       TCGRegSet preferred_regs, int free_or_dead)
3404 {
3405     if (!temp_readonly(ts) && !ts->mem_coherent) {
3406         if (!ts->mem_allocated) {
3407             temp_allocate_frame(s, ts);
3408         }
3409         switch (ts->val_type) {
3410         case TEMP_VAL_CONST:
3411             /* If we're going to free the temp immediately, then we won't
3412                require it later in a register, so attempt to store the
3413                constant to memory directly.  */
3414             if (free_or_dead
3415                 && tcg_out_sti(s, ts->type, ts->val,
3416                                ts->mem_base->reg, ts->mem_offset)) {
3417                 break;
3418             }
3419             temp_load(s, ts, tcg_target_available_regs[ts->type],
3420                       allocated_regs, preferred_regs);
3421             /* fallthrough */
3422 
3423         case TEMP_VAL_REG:
3424             tcg_out_st(s, ts->type, ts->reg,
3425                        ts->mem_base->reg, ts->mem_offset);
3426             break;
3427 
3428         case TEMP_VAL_MEM:
3429             break;
3430 
3431         case TEMP_VAL_DEAD:
3432         default:
3433             tcg_abort();
3434         }
3435         ts->mem_coherent = 1;
3436     }
3437     if (free_or_dead) {
3438         temp_free_or_dead(s, ts, free_or_dead);
3439     }
3440 }
3441 
3442 /* free register 'reg' by spilling the corresponding temporary if necessary */
3443 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3444 {
3445     TCGTemp *ts = s->reg_to_temp[reg];
3446     if (ts != NULL) {
3447         temp_sync(s, ts, allocated_regs, 0, -1);
3448     }
3449 }
3450 
3451 /**
3452  * tcg_reg_alloc:
3453  * @required_regs: Set of registers in which we must allocate.
3454  * @allocated_regs: Set of registers which must be avoided.
3455  * @preferred_regs: Set of registers we should prefer.
3456  * @rev: True if we search the registers in "indirect" order.
3457  *
3458  * The allocated register must be in @required_regs & ~@allocated_regs,
3459  * but if we can put it in @preferred_regs we may save a move later.
3460  */
3461 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3462                             TCGRegSet allocated_regs,
3463                             TCGRegSet preferred_regs, bool rev)
3464 {
3465     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3466     TCGRegSet reg_ct[2];
3467     const int *order;
3468 
3469     reg_ct[1] = required_regs & ~allocated_regs;
3470     tcg_debug_assert(reg_ct[1] != 0);
3471     reg_ct[0] = reg_ct[1] & preferred_regs;
3472 
3473     /* Skip the preferred_regs option if it cannot be satisfied,
3474        or if the preference made no difference.  */
3475     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3476 
3477     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3478 
3479     /* Try free registers, preferences first.  */
3480     for (j = f; j < 2; j++) {
3481         TCGRegSet set = reg_ct[j];
3482 
3483         if (tcg_regset_single(set)) {
3484             /* One register in the set.  */
3485             TCGReg reg = tcg_regset_first(set);
3486             if (s->reg_to_temp[reg] == NULL) {
3487                 return reg;
3488             }
3489         } else {
3490             for (i = 0; i < n; i++) {
3491                 TCGReg reg = order[i];
3492                 if (s->reg_to_temp[reg] == NULL &&
3493                     tcg_regset_test_reg(set, reg)) {
3494                     return reg;
3495                 }
3496             }
3497         }
3498     }
3499 
3500     /* We must spill something.  */
3501     for (j = f; j < 2; j++) {
3502         TCGRegSet set = reg_ct[j];
3503 
3504         if (tcg_regset_single(set)) {
3505             /* One register in the set.  */
3506             TCGReg reg = tcg_regset_first(set);
3507             tcg_reg_free(s, reg, allocated_regs);
3508             return reg;
3509         } else {
3510             for (i = 0; i < n; i++) {
3511                 TCGReg reg = order[i];
3512                 if (tcg_regset_test_reg(set, reg)) {
3513                     tcg_reg_free(s, reg, allocated_regs);
3514                     return reg;
3515                 }
3516             }
3517         }
3518     }
3519 
3520     tcg_abort();
3521 }
3522 
3523 /* Make sure the temporary is in a register.  If needed, allocate the register
3524    from DESIRED while avoiding ALLOCATED.  */
3525 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3526                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3527 {
3528     TCGReg reg;
3529 
3530     switch (ts->val_type) {
3531     case TEMP_VAL_REG:
3532         return;
3533     case TEMP_VAL_CONST:
3534         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3535                             preferred_regs, ts->indirect_base);
3536         if (ts->type <= TCG_TYPE_I64) {
3537             tcg_out_movi(s, ts->type, reg, ts->val);
3538         } else {
3539             uint64_t val = ts->val;
3540             MemOp vece = MO_64;
3541 
3542             /*
3543              * Find the minimal vector element that matches the constant.
3544              * The targets will, in general, have to do this search anyway,
3545              * do this generically.
3546              */
3547             if (val == dup_const(MO_8, val)) {
3548                 vece = MO_8;
3549             } else if (val == dup_const(MO_16, val)) {
3550                 vece = MO_16;
3551             } else if (val == dup_const(MO_32, val)) {
3552                 vece = MO_32;
3553             }
3554 
3555             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3556         }
3557         ts->mem_coherent = 0;
3558         break;
3559     case TEMP_VAL_MEM:
3560         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3561                             preferred_regs, ts->indirect_base);
3562         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3563         ts->mem_coherent = 1;
3564         break;
3565     case TEMP_VAL_DEAD:
3566     default:
3567         tcg_abort();
3568     }
3569     ts->reg = reg;
3570     ts->val_type = TEMP_VAL_REG;
3571     s->reg_to_temp[reg] = ts;
3572 }
3573 
3574 /* Save a temporary to memory. 'allocated_regs' is used in case a
3575    temporary registers needs to be allocated to store a constant.  */
3576 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3577 {
3578     /* The liveness analysis already ensures that globals are back
3579        in memory. Keep an tcg_debug_assert for safety. */
3580     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3581 }
3582 
3583 /* save globals to their canonical location and assume they can be
3584    modified be the following code. 'allocated_regs' is used in case a
3585    temporary registers needs to be allocated to store a constant. */
3586 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3587 {
3588     int i, n;
3589 
3590     for (i = 0, n = s->nb_globals; i < n; i++) {
3591         temp_save(s, &s->temps[i], allocated_regs);
3592     }
3593 }
3594 
3595 /* sync globals to their canonical location and assume they can be
3596    read by the following code. 'allocated_regs' is used in case a
3597    temporary registers needs to be allocated to store a constant. */
3598 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3599 {
3600     int i, n;
3601 
3602     for (i = 0, n = s->nb_globals; i < n; i++) {
3603         TCGTemp *ts = &s->temps[i];
3604         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3605                          || ts->kind == TEMP_FIXED
3606                          || ts->mem_coherent);
3607     }
3608 }
3609 
3610 /* at the end of a basic block, we assume all temporaries are dead and
3611    all globals are stored at their canonical location. */
3612 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3613 {
3614     int i;
3615 
3616     for (i = s->nb_globals; i < s->nb_temps; i++) {
3617         TCGTemp *ts = &s->temps[i];
3618 
3619         switch (ts->kind) {
3620         case TEMP_LOCAL:
3621             temp_save(s, ts, allocated_regs);
3622             break;
3623         case TEMP_NORMAL:
3624             /* The liveness analysis already ensures that temps are dead.
3625                Keep an tcg_debug_assert for safety. */
3626             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3627             break;
3628         case TEMP_CONST:
3629             /* Similarly, we should have freed any allocated register. */
3630             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3631             break;
3632         default:
3633             g_assert_not_reached();
3634         }
3635     }
3636 
3637     save_globals(s, allocated_regs);
3638 }
3639 
3640 /*
3641  * At a conditional branch, we assume all temporaries are dead and
3642  * all globals and local temps are synced to their location.
3643  */
3644 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3645 {
3646     sync_globals(s, allocated_regs);
3647 
3648     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3649         TCGTemp *ts = &s->temps[i];
3650         /*
3651          * The liveness analysis already ensures that temps are dead.
3652          * Keep tcg_debug_asserts for safety.
3653          */
3654         switch (ts->kind) {
3655         case TEMP_LOCAL:
3656             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3657             break;
3658         case TEMP_NORMAL:
3659             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3660             break;
3661         case TEMP_CONST:
3662             break;
3663         default:
3664             g_assert_not_reached();
3665         }
3666     }
3667 }
3668 
3669 /*
3670  * Specialized code generation for INDEX_op_mov_* with a constant.
3671  */
3672 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3673                                   tcg_target_ulong val, TCGLifeData arg_life,
3674                                   TCGRegSet preferred_regs)
3675 {
3676     /* ENV should not be modified.  */
3677     tcg_debug_assert(!temp_readonly(ots));
3678 
3679     /* The movi is not explicitly generated here.  */
3680     if (ots->val_type == TEMP_VAL_REG) {
3681         s->reg_to_temp[ots->reg] = NULL;
3682     }
3683     ots->val_type = TEMP_VAL_CONST;
3684     ots->val = val;
3685     ots->mem_coherent = 0;
3686     if (NEED_SYNC_ARG(0)) {
3687         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3688     } else if (IS_DEAD_ARG(0)) {
3689         temp_dead(s, ots);
3690     }
3691 }
3692 
3693 /*
3694  * Specialized code generation for INDEX_op_mov_*.
3695  */
3696 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3697 {
3698     const TCGLifeData arg_life = op->life;
3699     TCGRegSet allocated_regs, preferred_regs;
3700     TCGTemp *ts, *ots;
3701     TCGType otype, itype;
3702 
3703     allocated_regs = s->reserved_regs;
3704     preferred_regs = op->output_pref[0];
3705     ots = arg_temp(op->args[0]);
3706     ts = arg_temp(op->args[1]);
3707 
3708     /* ENV should not be modified.  */
3709     tcg_debug_assert(!temp_readonly(ots));
3710 
3711     /* Note that otype != itype for no-op truncation.  */
3712     otype = ots->type;
3713     itype = ts->type;
3714 
3715     if (ts->val_type == TEMP_VAL_CONST) {
3716         /* propagate constant or generate sti */
3717         tcg_target_ulong val = ts->val;
3718         if (IS_DEAD_ARG(1)) {
3719             temp_dead(s, ts);
3720         }
3721         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3722         return;
3723     }
3724 
3725     /* If the source value is in memory we're going to be forced
3726        to have it in a register in order to perform the copy.  Copy
3727        the SOURCE value into its own register first, that way we
3728        don't have to reload SOURCE the next time it is used. */
3729     if (ts->val_type == TEMP_VAL_MEM) {
3730         temp_load(s, ts, tcg_target_available_regs[itype],
3731                   allocated_regs, preferred_regs);
3732     }
3733 
3734     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3735     if (IS_DEAD_ARG(0)) {
3736         /* mov to a non-saved dead register makes no sense (even with
3737            liveness analysis disabled). */
3738         tcg_debug_assert(NEED_SYNC_ARG(0));
3739         if (!ots->mem_allocated) {
3740             temp_allocate_frame(s, ots);
3741         }
3742         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3743         if (IS_DEAD_ARG(1)) {
3744             temp_dead(s, ts);
3745         }
3746         temp_dead(s, ots);
3747     } else {
3748         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3749             /* the mov can be suppressed */
3750             if (ots->val_type == TEMP_VAL_REG) {
3751                 s->reg_to_temp[ots->reg] = NULL;
3752             }
3753             ots->reg = ts->reg;
3754             temp_dead(s, ts);
3755         } else {
3756             if (ots->val_type != TEMP_VAL_REG) {
3757                 /* When allocating a new register, make sure to not spill the
3758                    input one. */
3759                 tcg_regset_set_reg(allocated_regs, ts->reg);
3760                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3761                                          allocated_regs, preferred_regs,
3762                                          ots->indirect_base);
3763             }
3764             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3765                 /*
3766                  * Cross register class move not supported.
3767                  * Store the source register into the destination slot
3768                  * and leave the destination temp as TEMP_VAL_MEM.
3769                  */
3770                 assert(!temp_readonly(ots));
3771                 if (!ts->mem_allocated) {
3772                     temp_allocate_frame(s, ots);
3773                 }
3774                 tcg_out_st(s, ts->type, ts->reg,
3775                            ots->mem_base->reg, ots->mem_offset);
3776                 ots->mem_coherent = 1;
3777                 temp_free_or_dead(s, ots, -1);
3778                 return;
3779             }
3780         }
3781         ots->val_type = TEMP_VAL_REG;
3782         ots->mem_coherent = 0;
3783         s->reg_to_temp[ots->reg] = ots;
3784         if (NEED_SYNC_ARG(0)) {
3785             temp_sync(s, ots, allocated_regs, 0, 0);
3786         }
3787     }
3788 }
3789 
3790 /*
3791  * Specialized code generation for INDEX_op_dup_vec.
3792  */
3793 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3794 {
3795     const TCGLifeData arg_life = op->life;
3796     TCGRegSet dup_out_regs, dup_in_regs;
3797     TCGTemp *its, *ots;
3798     TCGType itype, vtype;
3799     intptr_t endian_fixup;
3800     unsigned vece;
3801     bool ok;
3802 
3803     ots = arg_temp(op->args[0]);
3804     its = arg_temp(op->args[1]);
3805 
3806     /* ENV should not be modified.  */
3807     tcg_debug_assert(!temp_readonly(ots));
3808 
3809     itype = its->type;
3810     vece = TCGOP_VECE(op);
3811     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3812 
3813     if (its->val_type == TEMP_VAL_CONST) {
3814         /* Propagate constant via movi -> dupi.  */
3815         tcg_target_ulong val = its->val;
3816         if (IS_DEAD_ARG(1)) {
3817             temp_dead(s, its);
3818         }
3819         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3820         return;
3821     }
3822 
3823     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3824     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3825 
3826     /* Allocate the output register now.  */
3827     if (ots->val_type != TEMP_VAL_REG) {
3828         TCGRegSet allocated_regs = s->reserved_regs;
3829 
3830         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3831             /* Make sure to not spill the input register. */
3832             tcg_regset_set_reg(allocated_regs, its->reg);
3833         }
3834         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3835                                  op->output_pref[0], ots->indirect_base);
3836         ots->val_type = TEMP_VAL_REG;
3837         ots->mem_coherent = 0;
3838         s->reg_to_temp[ots->reg] = ots;
3839     }
3840 
3841     switch (its->val_type) {
3842     case TEMP_VAL_REG:
3843         /*
3844          * The dup constriaints must be broad, covering all possible VECE.
3845          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3846          * to fail, indicating that extra moves are required for that case.
3847          */
3848         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3849             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3850                 goto done;
3851             }
3852             /* Try again from memory or a vector input register.  */
3853         }
3854         if (!its->mem_coherent) {
3855             /*
3856              * The input register is not synced, and so an extra store
3857              * would be required to use memory.  Attempt an integer-vector
3858              * register move first.  We do not have a TCGRegSet for this.
3859              */
3860             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3861                 break;
3862             }
3863             /* Sync the temp back to its slot and load from there.  */
3864             temp_sync(s, its, s->reserved_regs, 0, 0);
3865         }
3866         /* fall through */
3867 
3868     case TEMP_VAL_MEM:
3869 #ifdef HOST_WORDS_BIGENDIAN
3870         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3871         endian_fixup -= 1 << vece;
3872 #else
3873         endian_fixup = 0;
3874 #endif
3875         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3876                              its->mem_offset + endian_fixup)) {
3877             goto done;
3878         }
3879         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3880         break;
3881 
3882     default:
3883         g_assert_not_reached();
3884     }
3885 
3886     /* We now have a vector input register, so dup must succeed. */
3887     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3888     tcg_debug_assert(ok);
3889 
3890  done:
3891     if (IS_DEAD_ARG(1)) {
3892         temp_dead(s, its);
3893     }
3894     if (NEED_SYNC_ARG(0)) {
3895         temp_sync(s, ots, s->reserved_regs, 0, 0);
3896     }
3897     if (IS_DEAD_ARG(0)) {
3898         temp_dead(s, ots);
3899     }
3900 }
3901 
3902 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3903 {
3904     const TCGLifeData arg_life = op->life;
3905     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3906     TCGRegSet i_allocated_regs;
3907     TCGRegSet o_allocated_regs;
3908     int i, k, nb_iargs, nb_oargs;
3909     TCGReg reg;
3910     TCGArg arg;
3911     const TCGArgConstraint *arg_ct;
3912     TCGTemp *ts;
3913     TCGArg new_args[TCG_MAX_OP_ARGS];
3914     int const_args[TCG_MAX_OP_ARGS];
3915 
3916     nb_oargs = def->nb_oargs;
3917     nb_iargs = def->nb_iargs;
3918 
3919     /* copy constants */
3920     memcpy(new_args + nb_oargs + nb_iargs,
3921            op->args + nb_oargs + nb_iargs,
3922            sizeof(TCGArg) * def->nb_cargs);
3923 
3924     i_allocated_regs = s->reserved_regs;
3925     o_allocated_regs = s->reserved_regs;
3926 
3927     /* satisfy input constraints */
3928     for (k = 0; k < nb_iargs; k++) {
3929         TCGRegSet i_preferred_regs, o_preferred_regs;
3930 
3931         i = def->args_ct[nb_oargs + k].sort_index;
3932         arg = op->args[i];
3933         arg_ct = &def->args_ct[i];
3934         ts = arg_temp(arg);
3935 
3936         if (ts->val_type == TEMP_VAL_CONST
3937             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3938             /* constant is OK for instruction */
3939             const_args[i] = 1;
3940             new_args[i] = ts->val;
3941             continue;
3942         }
3943 
3944         i_preferred_regs = o_preferred_regs = 0;
3945         if (arg_ct->ialias) {
3946             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3947 
3948             /*
3949              * If the input is readonly, then it cannot also be an
3950              * output and aliased to itself.  If the input is not
3951              * dead after the instruction, we must allocate a new
3952              * register and move it.
3953              */
3954             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3955                 goto allocate_in_reg;
3956             }
3957 
3958             /*
3959              * Check if the current register has already been allocated
3960              * for another input aliased to an output.
3961              */
3962             if (ts->val_type == TEMP_VAL_REG) {
3963                 reg = ts->reg;
3964                 for (int k2 = 0; k2 < k; k2++) {
3965                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3966                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3967                         goto allocate_in_reg;
3968                     }
3969                 }
3970             }
3971             i_preferred_regs = o_preferred_regs;
3972         }
3973 
3974         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3975         reg = ts->reg;
3976 
3977         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3978  allocate_in_reg:
3979             /*
3980              * Allocate a new register matching the constraint
3981              * and move the temporary register into it.
3982              */
3983             temp_load(s, ts, tcg_target_available_regs[ts->type],
3984                       i_allocated_regs, 0);
3985             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3986                                 o_preferred_regs, ts->indirect_base);
3987             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3988                 /*
3989                  * Cross register class move not supported.  Sync the
3990                  * temp back to its slot and load from there.
3991                  */
3992                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3993                 tcg_out_ld(s, ts->type, reg,
3994                            ts->mem_base->reg, ts->mem_offset);
3995             }
3996         }
3997         new_args[i] = reg;
3998         const_args[i] = 0;
3999         tcg_regset_set_reg(i_allocated_regs, reg);
4000     }
4001 
4002     /* mark dead temporaries and free the associated registers */
4003     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4004         if (IS_DEAD_ARG(i)) {
4005             temp_dead(s, arg_temp(op->args[i]));
4006         }
4007     }
4008 
4009     if (def->flags & TCG_OPF_COND_BRANCH) {
4010         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4011     } else if (def->flags & TCG_OPF_BB_END) {
4012         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4013     } else {
4014         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4015             /* XXX: permit generic clobber register list ? */
4016             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4017                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4018                     tcg_reg_free(s, i, i_allocated_regs);
4019                 }
4020             }
4021         }
4022         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4023             /* sync globals if the op has side effects and might trigger
4024                an exception. */
4025             sync_globals(s, i_allocated_regs);
4026         }
4027 
4028         /* satisfy the output constraints */
4029         for(k = 0; k < nb_oargs; k++) {
4030             i = def->args_ct[k].sort_index;
4031             arg = op->args[i];
4032             arg_ct = &def->args_ct[i];
4033             ts = arg_temp(arg);
4034 
4035             /* ENV should not be modified.  */
4036             tcg_debug_assert(!temp_readonly(ts));
4037 
4038             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4039                 reg = new_args[arg_ct->alias_index];
4040             } else if (arg_ct->newreg) {
4041                 reg = tcg_reg_alloc(s, arg_ct->regs,
4042                                     i_allocated_regs | o_allocated_regs,
4043                                     op->output_pref[k], ts->indirect_base);
4044             } else {
4045                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4046                                     op->output_pref[k], ts->indirect_base);
4047             }
4048             tcg_regset_set_reg(o_allocated_regs, reg);
4049             if (ts->val_type == TEMP_VAL_REG) {
4050                 s->reg_to_temp[ts->reg] = NULL;
4051             }
4052             ts->val_type = TEMP_VAL_REG;
4053             ts->reg = reg;
4054             /*
4055              * Temp value is modified, so the value kept in memory is
4056              * potentially not the same.
4057              */
4058             ts->mem_coherent = 0;
4059             s->reg_to_temp[reg] = ts;
4060             new_args[i] = reg;
4061         }
4062     }
4063 
4064     /* emit instruction */
4065     if (def->flags & TCG_OPF_VECTOR) {
4066         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4067                        new_args, const_args);
4068     } else {
4069         tcg_out_op(s, op->opc, new_args, const_args);
4070     }
4071 
4072     /* move the outputs in the correct register if needed */
4073     for(i = 0; i < nb_oargs; i++) {
4074         ts = arg_temp(op->args[i]);
4075 
4076         /* ENV should not be modified.  */
4077         tcg_debug_assert(!temp_readonly(ts));
4078 
4079         if (NEED_SYNC_ARG(i)) {
4080             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4081         } else if (IS_DEAD_ARG(i)) {
4082             temp_dead(s, ts);
4083         }
4084     }
4085 }
4086 
4087 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4088 {
4089     const TCGLifeData arg_life = op->life;
4090     TCGTemp *ots, *itsl, *itsh;
4091     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4092 
4093     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4094     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4095     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4096 
4097     ots = arg_temp(op->args[0]);
4098     itsl = arg_temp(op->args[1]);
4099     itsh = arg_temp(op->args[2]);
4100 
4101     /* ENV should not be modified.  */
4102     tcg_debug_assert(!temp_readonly(ots));
4103 
4104     /* Allocate the output register now.  */
4105     if (ots->val_type != TEMP_VAL_REG) {
4106         TCGRegSet allocated_regs = s->reserved_regs;
4107         TCGRegSet dup_out_regs =
4108             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4109 
4110         /* Make sure to not spill the input registers. */
4111         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4112             tcg_regset_set_reg(allocated_regs, itsl->reg);
4113         }
4114         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4115             tcg_regset_set_reg(allocated_regs, itsh->reg);
4116         }
4117 
4118         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4119                                  op->output_pref[0], ots->indirect_base);
4120         ots->val_type = TEMP_VAL_REG;
4121         ots->mem_coherent = 0;
4122         s->reg_to_temp[ots->reg] = ots;
4123     }
4124 
4125     /* Promote dup2 of immediates to dupi_vec. */
4126     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4127         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4128         MemOp vece = MO_64;
4129 
4130         if (val == dup_const(MO_8, val)) {
4131             vece = MO_8;
4132         } else if (val == dup_const(MO_16, val)) {
4133             vece = MO_16;
4134         } else if (val == dup_const(MO_32, val)) {
4135             vece = MO_32;
4136         }
4137 
4138         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4139         goto done;
4140     }
4141 
4142     /* If the two inputs form one 64-bit value, try dupm_vec. */
4143     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4144         if (!itsl->mem_coherent) {
4145             temp_sync(s, itsl, s->reserved_regs, 0, 0);
4146         }
4147         if (!itsh->mem_coherent) {
4148             temp_sync(s, itsh, s->reserved_regs, 0, 0);
4149         }
4150 #ifdef HOST_WORDS_BIGENDIAN
4151         TCGTemp *its = itsh;
4152 #else
4153         TCGTemp *its = itsl;
4154 #endif
4155         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4156                              its->mem_base->reg, its->mem_offset)) {
4157             goto done;
4158         }
4159     }
4160 
4161     /* Fall back to generic expansion. */
4162     return false;
4163 
4164  done:
4165     if (IS_DEAD_ARG(1)) {
4166         temp_dead(s, itsl);
4167     }
4168     if (IS_DEAD_ARG(2)) {
4169         temp_dead(s, itsh);
4170     }
4171     if (NEED_SYNC_ARG(0)) {
4172         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4173     } else if (IS_DEAD_ARG(0)) {
4174         temp_dead(s, ots);
4175     }
4176     return true;
4177 }
4178 
4179 #ifdef TCG_TARGET_STACK_GROWSUP
4180 #define STACK_DIR(x) (-(x))
4181 #else
4182 #define STACK_DIR(x) (x)
4183 #endif
4184 
4185 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4186 {
4187     const int nb_oargs = TCGOP_CALLO(op);
4188     const int nb_iargs = TCGOP_CALLI(op);
4189     const TCGLifeData arg_life = op->life;
4190     int flags, nb_regs, i;
4191     TCGReg reg;
4192     TCGArg arg;
4193     TCGTemp *ts;
4194     intptr_t stack_offset;
4195     size_t call_stack_size;
4196     tcg_insn_unit *func_addr;
4197     int allocate_args;
4198     TCGRegSet allocated_regs;
4199 
4200     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4201     flags = op->args[nb_oargs + nb_iargs + 1];
4202 
4203     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4204     if (nb_regs > nb_iargs) {
4205         nb_regs = nb_iargs;
4206     }
4207 
4208     /* assign stack slots first */
4209     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4210     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4211         ~(TCG_TARGET_STACK_ALIGN - 1);
4212     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4213     if (allocate_args) {
4214         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4215            preallocate call stack */
4216         tcg_abort();
4217     }
4218 
4219     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4220     for (i = nb_regs; i < nb_iargs; i++) {
4221         arg = op->args[nb_oargs + i];
4222 #ifdef TCG_TARGET_STACK_GROWSUP
4223         stack_offset -= sizeof(tcg_target_long);
4224 #endif
4225         if (arg != TCG_CALL_DUMMY_ARG) {
4226             ts = arg_temp(arg);
4227             temp_load(s, ts, tcg_target_available_regs[ts->type],
4228                       s->reserved_regs, 0);
4229             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4230         }
4231 #ifndef TCG_TARGET_STACK_GROWSUP
4232         stack_offset += sizeof(tcg_target_long);
4233 #endif
4234     }
4235 
4236     /* assign input registers */
4237     allocated_regs = s->reserved_regs;
4238     for (i = 0; i < nb_regs; i++) {
4239         arg = op->args[nb_oargs + i];
4240         if (arg != TCG_CALL_DUMMY_ARG) {
4241             ts = arg_temp(arg);
4242             reg = tcg_target_call_iarg_regs[i];
4243 
4244             if (ts->val_type == TEMP_VAL_REG) {
4245                 if (ts->reg != reg) {
4246                     tcg_reg_free(s, reg, allocated_regs);
4247                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4248                         /*
4249                          * Cross register class move not supported.  Sync the
4250                          * temp back to its slot and load from there.
4251                          */
4252                         temp_sync(s, ts, allocated_regs, 0, 0);
4253                         tcg_out_ld(s, ts->type, reg,
4254                                    ts->mem_base->reg, ts->mem_offset);
4255                     }
4256                 }
4257             } else {
4258                 TCGRegSet arg_set = 0;
4259 
4260                 tcg_reg_free(s, reg, allocated_regs);
4261                 tcg_regset_set_reg(arg_set, reg);
4262                 temp_load(s, ts, arg_set, allocated_regs, 0);
4263             }
4264 
4265             tcg_regset_set_reg(allocated_regs, reg);
4266         }
4267     }
4268 
4269     /* mark dead temporaries and free the associated registers */
4270     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4271         if (IS_DEAD_ARG(i)) {
4272             temp_dead(s, arg_temp(op->args[i]));
4273         }
4274     }
4275 
4276     /* clobber call registers */
4277     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4278         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4279             tcg_reg_free(s, i, allocated_regs);
4280         }
4281     }
4282 
4283     /* Save globals if they might be written by the helper, sync them if
4284        they might be read. */
4285     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4286         /* Nothing to do */
4287     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4288         sync_globals(s, allocated_regs);
4289     } else {
4290         save_globals(s, allocated_regs);
4291     }
4292 
4293     tcg_out_call(s, func_addr);
4294 
4295     /* assign output registers and emit moves if needed */
4296     for(i = 0; i < nb_oargs; i++) {
4297         arg = op->args[i];
4298         ts = arg_temp(arg);
4299 
4300         /* ENV should not be modified.  */
4301         tcg_debug_assert(!temp_readonly(ts));
4302 
4303         reg = tcg_target_call_oarg_regs[i];
4304         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4305         if (ts->val_type == TEMP_VAL_REG) {
4306             s->reg_to_temp[ts->reg] = NULL;
4307         }
4308         ts->val_type = TEMP_VAL_REG;
4309         ts->reg = reg;
4310         ts->mem_coherent = 0;
4311         s->reg_to_temp[reg] = ts;
4312         if (NEED_SYNC_ARG(i)) {
4313             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4314         } else if (IS_DEAD_ARG(i)) {
4315             temp_dead(s, ts);
4316         }
4317     }
4318 }
4319 
4320 #ifdef CONFIG_PROFILER
4321 
4322 /* avoid copy/paste errors */
4323 #define PROF_ADD(to, from, field)                       \
4324     do {                                                \
4325         (to)->field += qatomic_read(&((from)->field));  \
4326     } while (0)
4327 
4328 #define PROF_MAX(to, from, field)                                       \
4329     do {                                                                \
4330         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4331         if (val__ > (to)->field) {                                      \
4332             (to)->field = val__;                                        \
4333         }                                                               \
4334     } while (0)
4335 
4336 /* Pass in a zero'ed @prof */
4337 static inline
4338 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4339 {
4340     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4341     unsigned int i;
4342 
4343     for (i = 0; i < n_ctxs; i++) {
4344         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4345         const TCGProfile *orig = &s->prof;
4346 
4347         if (counters) {
4348             PROF_ADD(prof, orig, cpu_exec_time);
4349             PROF_ADD(prof, orig, tb_count1);
4350             PROF_ADD(prof, orig, tb_count);
4351             PROF_ADD(prof, orig, op_count);
4352             PROF_MAX(prof, orig, op_count_max);
4353             PROF_ADD(prof, orig, temp_count);
4354             PROF_MAX(prof, orig, temp_count_max);
4355             PROF_ADD(prof, orig, del_op_count);
4356             PROF_ADD(prof, orig, code_in_len);
4357             PROF_ADD(prof, orig, code_out_len);
4358             PROF_ADD(prof, orig, search_out_len);
4359             PROF_ADD(prof, orig, interm_time);
4360             PROF_ADD(prof, orig, code_time);
4361             PROF_ADD(prof, orig, la_time);
4362             PROF_ADD(prof, orig, opt_time);
4363             PROF_ADD(prof, orig, restore_count);
4364             PROF_ADD(prof, orig, restore_time);
4365         }
4366         if (table) {
4367             int i;
4368 
4369             for (i = 0; i < NB_OPS; i++) {
4370                 PROF_ADD(prof, orig, table_op_count[i]);
4371             }
4372         }
4373     }
4374 }
4375 
4376 #undef PROF_ADD
4377 #undef PROF_MAX
4378 
4379 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4380 {
4381     tcg_profile_snapshot(prof, true, false);
4382 }
4383 
4384 static void tcg_profile_snapshot_table(TCGProfile *prof)
4385 {
4386     tcg_profile_snapshot(prof, false, true);
4387 }
4388 
4389 void tcg_dump_op_count(void)
4390 {
4391     TCGProfile prof = {};
4392     int i;
4393 
4394     tcg_profile_snapshot_table(&prof);
4395     for (i = 0; i < NB_OPS; i++) {
4396         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4397                     prof.table_op_count[i]);
4398     }
4399 }
4400 
4401 int64_t tcg_cpu_exec_time(void)
4402 {
4403     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4404     unsigned int i;
4405     int64_t ret = 0;
4406 
4407     for (i = 0; i < n_ctxs; i++) {
4408         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4409         const TCGProfile *prof = &s->prof;
4410 
4411         ret += qatomic_read(&prof->cpu_exec_time);
4412     }
4413     return ret;
4414 }
4415 #else
4416 void tcg_dump_op_count(void)
4417 {
4418     qemu_printf("[TCG profiler not compiled]\n");
4419 }
4420 
4421 int64_t tcg_cpu_exec_time(void)
4422 {
4423     error_report("%s: TCG profiler not compiled", __func__);
4424     exit(EXIT_FAILURE);
4425 }
4426 #endif
4427 
4428 
4429 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4430 {
4431 #ifdef CONFIG_PROFILER
4432     TCGProfile *prof = &s->prof;
4433 #endif
4434     int i, num_insns;
4435     TCGOp *op;
4436 
4437 #ifdef CONFIG_PROFILER
4438     {
4439         int n = 0;
4440 
4441         QTAILQ_FOREACH(op, &s->ops, link) {
4442             n++;
4443         }
4444         qatomic_set(&prof->op_count, prof->op_count + n);
4445         if (n > prof->op_count_max) {
4446             qatomic_set(&prof->op_count_max, n);
4447         }
4448 
4449         n = s->nb_temps;
4450         qatomic_set(&prof->temp_count, prof->temp_count + n);
4451         if (n > prof->temp_count_max) {
4452             qatomic_set(&prof->temp_count_max, n);
4453         }
4454     }
4455 #endif
4456 
4457 #ifdef DEBUG_DISAS
4458     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4459                  && qemu_log_in_addr_range(tb->pc))) {
4460         FILE *logfile = qemu_log_lock();
4461         qemu_log("OP:\n");
4462         tcg_dump_ops(s, false);
4463         qemu_log("\n");
4464         qemu_log_unlock(logfile);
4465     }
4466 #endif
4467 
4468 #ifdef CONFIG_DEBUG_TCG
4469     /* Ensure all labels referenced have been emitted.  */
4470     {
4471         TCGLabel *l;
4472         bool error = false;
4473 
4474         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4475             if (unlikely(!l->present) && l->refs) {
4476                 qemu_log_mask(CPU_LOG_TB_OP,
4477                               "$L%d referenced but not present.\n", l->id);
4478                 error = true;
4479             }
4480         }
4481         assert(!error);
4482     }
4483 #endif
4484 
4485 #ifdef CONFIG_PROFILER
4486     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4487 #endif
4488 
4489 #ifdef USE_TCG_OPTIMIZATIONS
4490     tcg_optimize(s);
4491 #endif
4492 
4493 #ifdef CONFIG_PROFILER
4494     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4495     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4496 #endif
4497 
4498     reachable_code_pass(s);
4499     liveness_pass_1(s);
4500 
4501     if (s->nb_indirects > 0) {
4502 #ifdef DEBUG_DISAS
4503         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4504                      && qemu_log_in_addr_range(tb->pc))) {
4505             FILE *logfile = qemu_log_lock();
4506             qemu_log("OP before indirect lowering:\n");
4507             tcg_dump_ops(s, false);
4508             qemu_log("\n");
4509             qemu_log_unlock(logfile);
4510         }
4511 #endif
4512         /* Replace indirect temps with direct temps.  */
4513         if (liveness_pass_2(s)) {
4514             /* If changes were made, re-run liveness.  */
4515             liveness_pass_1(s);
4516         }
4517     }
4518 
4519 #ifdef CONFIG_PROFILER
4520     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4521 #endif
4522 
4523 #ifdef DEBUG_DISAS
4524     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4525                  && qemu_log_in_addr_range(tb->pc))) {
4526         FILE *logfile = qemu_log_lock();
4527         qemu_log("OP after optimization and liveness analysis:\n");
4528         tcg_dump_ops(s, true);
4529         qemu_log("\n");
4530         qemu_log_unlock(logfile);
4531     }
4532 #endif
4533 
4534     tcg_reg_alloc_start(s);
4535 
4536     /*
4537      * Reset the buffer pointers when restarting after overflow.
4538      * TODO: Move this into translate-all.c with the rest of the
4539      * buffer management.  Having only this done here is confusing.
4540      */
4541     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4542     s->code_ptr = s->code_buf;
4543 
4544 #ifdef TCG_TARGET_NEED_LDST_LABELS
4545     QSIMPLEQ_INIT(&s->ldst_labels);
4546 #endif
4547 #ifdef TCG_TARGET_NEED_POOL_LABELS
4548     s->pool_labels = NULL;
4549 #endif
4550 
4551     num_insns = -1;
4552     QTAILQ_FOREACH(op, &s->ops, link) {
4553         TCGOpcode opc = op->opc;
4554 
4555 #ifdef CONFIG_PROFILER
4556         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4557 #endif
4558 
4559         switch (opc) {
4560         case INDEX_op_mov_i32:
4561         case INDEX_op_mov_i64:
4562         case INDEX_op_mov_vec:
4563             tcg_reg_alloc_mov(s, op);
4564             break;
4565         case INDEX_op_dup_vec:
4566             tcg_reg_alloc_dup(s, op);
4567             break;
4568         case INDEX_op_insn_start:
4569             if (num_insns >= 0) {
4570                 size_t off = tcg_current_code_size(s);
4571                 s->gen_insn_end_off[num_insns] = off;
4572                 /* Assert that we do not overflow our stored offset.  */
4573                 assert(s->gen_insn_end_off[num_insns] == off);
4574             }
4575             num_insns++;
4576             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4577                 target_ulong a;
4578 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4579                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4580 #else
4581                 a = op->args[i];
4582 #endif
4583                 s->gen_insn_data[num_insns][i] = a;
4584             }
4585             break;
4586         case INDEX_op_discard:
4587             temp_dead(s, arg_temp(op->args[0]));
4588             break;
4589         case INDEX_op_set_label:
4590             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4591             tcg_out_label(s, arg_label(op->args[0]));
4592             break;
4593         case INDEX_op_call:
4594             tcg_reg_alloc_call(s, op);
4595             break;
4596         case INDEX_op_dup2_vec:
4597             if (tcg_reg_alloc_dup2(s, op)) {
4598                 break;
4599             }
4600             /* fall through */
4601         default:
4602             /* Sanity check that we've not introduced any unhandled opcodes. */
4603             tcg_debug_assert(tcg_op_supported(opc));
4604             /* Note: in order to speed up the code, it would be much
4605                faster to have specialized register allocator functions for
4606                some common argument patterns */
4607             tcg_reg_alloc_op(s, op);
4608             break;
4609         }
4610 #ifdef CONFIG_DEBUG_TCG
4611         check_regs(s);
4612 #endif
4613         /* Test for (pending) buffer overflow.  The assumption is that any
4614            one operation beginning below the high water mark cannot overrun
4615            the buffer completely.  Thus we can test for overflow after
4616            generating code without having to check during generation.  */
4617         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4618             return -1;
4619         }
4620         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4621         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4622             return -2;
4623         }
4624     }
4625     tcg_debug_assert(num_insns >= 0);
4626     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4627 
4628     /* Generate TB finalization at the end of block */
4629 #ifdef TCG_TARGET_NEED_LDST_LABELS
4630     i = tcg_out_ldst_finalize(s);
4631     if (i < 0) {
4632         return i;
4633     }
4634 #endif
4635 #ifdef TCG_TARGET_NEED_POOL_LABELS
4636     i = tcg_out_pool_finalize(s);
4637     if (i < 0) {
4638         return i;
4639     }
4640 #endif
4641     if (!tcg_resolve_relocs(s)) {
4642         return -2;
4643     }
4644 
4645 #ifndef CONFIG_TCG_INTERPRETER
4646     /* flush instruction cache */
4647     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4648                         (uintptr_t)s->code_buf,
4649                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4650 #endif
4651 
4652     return tcg_current_code_size(s);
4653 }
4654 
4655 #ifdef CONFIG_PROFILER
4656 void tcg_dump_info(void)
4657 {
4658     TCGProfile prof = {};
4659     const TCGProfile *s;
4660     int64_t tb_count;
4661     int64_t tb_div_count;
4662     int64_t tot;
4663 
4664     tcg_profile_snapshot_counters(&prof);
4665     s = &prof;
4666     tb_count = s->tb_count;
4667     tb_div_count = tb_count ? tb_count : 1;
4668     tot = s->interm_time + s->code_time;
4669 
4670     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4671                 tot, tot / 2.4e9);
4672     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4673                 " %0.1f%%)\n",
4674                 tb_count, s->tb_count1 - tb_count,
4675                 (double)(s->tb_count1 - s->tb_count)
4676                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4677     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4678                 (double)s->op_count / tb_div_count, s->op_count_max);
4679     qemu_printf("deleted ops/TB      %0.2f\n",
4680                 (double)s->del_op_count / tb_div_count);
4681     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4682                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4683     qemu_printf("avg host code/TB    %0.1f\n",
4684                 (double)s->code_out_len / tb_div_count);
4685     qemu_printf("avg search data/TB  %0.1f\n",
4686                 (double)s->search_out_len / tb_div_count);
4687 
4688     qemu_printf("cycles/op           %0.1f\n",
4689                 s->op_count ? (double)tot / s->op_count : 0);
4690     qemu_printf("cycles/in byte      %0.1f\n",
4691                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4692     qemu_printf("cycles/out byte     %0.1f\n",
4693                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4694     qemu_printf("cycles/search byte     %0.1f\n",
4695                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4696     if (tot == 0) {
4697         tot = 1;
4698     }
4699     qemu_printf("  gen_interm time   %0.1f%%\n",
4700                 (double)s->interm_time / tot * 100.0);
4701     qemu_printf("  gen_code time     %0.1f%%\n",
4702                 (double)s->code_time / tot * 100.0);
4703     qemu_printf("optim./code time    %0.1f%%\n",
4704                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4705                 * 100.0);
4706     qemu_printf("liveness/code time  %0.1f%%\n",
4707                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4708     qemu_printf("cpu_restore count   %" PRId64 "\n",
4709                 s->restore_count);
4710     qemu_printf("  avg cycles        %0.1f\n",
4711                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4712 }
4713 #else
4714 void tcg_dump_info(void)
4715 {
4716     qemu_printf("[TCG profiler not compiled]\n");
4717 }
4718 #endif
4719 
4720 #ifdef ELF_HOST_MACHINE
4721 /* In order to use this feature, the backend needs to do three things:
4722 
4723    (1) Define ELF_HOST_MACHINE to indicate both what value to
4724        put into the ELF image and to indicate support for the feature.
4725 
4726    (2) Define tcg_register_jit.  This should create a buffer containing
4727        the contents of a .debug_frame section that describes the post-
4728        prologue unwind info for the tcg machine.
4729 
4730    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4731 */
4732 
4733 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4734 typedef enum {
4735     JIT_NOACTION = 0,
4736     JIT_REGISTER_FN,
4737     JIT_UNREGISTER_FN
4738 } jit_actions_t;
4739 
4740 struct jit_code_entry {
4741     struct jit_code_entry *next_entry;
4742     struct jit_code_entry *prev_entry;
4743     const void *symfile_addr;
4744     uint64_t symfile_size;
4745 };
4746 
4747 struct jit_descriptor {
4748     uint32_t version;
4749     uint32_t action_flag;
4750     struct jit_code_entry *relevant_entry;
4751     struct jit_code_entry *first_entry;
4752 };
4753 
4754 void __jit_debug_register_code(void) __attribute__((noinline));
4755 void __jit_debug_register_code(void)
4756 {
4757     asm("");
4758 }
4759 
4760 /* Must statically initialize the version, because GDB may check
4761    the version before we can set it.  */
4762 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4763 
4764 /* End GDB interface.  */
4765 
4766 static int find_string(const char *strtab, const char *str)
4767 {
4768     const char *p = strtab + 1;
4769 
4770     while (1) {
4771         if (strcmp(p, str) == 0) {
4772             return p - strtab;
4773         }
4774         p += strlen(p) + 1;
4775     }
4776 }
4777 
4778 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4779                                  const void *debug_frame,
4780                                  size_t debug_frame_size)
4781 {
4782     struct __attribute__((packed)) DebugInfo {
4783         uint32_t  len;
4784         uint16_t  version;
4785         uint32_t  abbrev;
4786         uint8_t   ptr_size;
4787         uint8_t   cu_die;
4788         uint16_t  cu_lang;
4789         uintptr_t cu_low_pc;
4790         uintptr_t cu_high_pc;
4791         uint8_t   fn_die;
4792         char      fn_name[16];
4793         uintptr_t fn_low_pc;
4794         uintptr_t fn_high_pc;
4795         uint8_t   cu_eoc;
4796     };
4797 
4798     struct ElfImage {
4799         ElfW(Ehdr) ehdr;
4800         ElfW(Phdr) phdr;
4801         ElfW(Shdr) shdr[7];
4802         ElfW(Sym)  sym[2];
4803         struct DebugInfo di;
4804         uint8_t    da[24];
4805         char       str[80];
4806     };
4807 
4808     struct ElfImage *img;
4809 
4810     static const struct ElfImage img_template = {
4811         .ehdr = {
4812             .e_ident[EI_MAG0] = ELFMAG0,
4813             .e_ident[EI_MAG1] = ELFMAG1,
4814             .e_ident[EI_MAG2] = ELFMAG2,
4815             .e_ident[EI_MAG3] = ELFMAG3,
4816             .e_ident[EI_CLASS] = ELF_CLASS,
4817             .e_ident[EI_DATA] = ELF_DATA,
4818             .e_ident[EI_VERSION] = EV_CURRENT,
4819             .e_type = ET_EXEC,
4820             .e_machine = ELF_HOST_MACHINE,
4821             .e_version = EV_CURRENT,
4822             .e_phoff = offsetof(struct ElfImage, phdr),
4823             .e_shoff = offsetof(struct ElfImage, shdr),
4824             .e_ehsize = sizeof(ElfW(Shdr)),
4825             .e_phentsize = sizeof(ElfW(Phdr)),
4826             .e_phnum = 1,
4827             .e_shentsize = sizeof(ElfW(Shdr)),
4828             .e_shnum = ARRAY_SIZE(img->shdr),
4829             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4830 #ifdef ELF_HOST_FLAGS
4831             .e_flags = ELF_HOST_FLAGS,
4832 #endif
4833 #ifdef ELF_OSABI
4834             .e_ident[EI_OSABI] = ELF_OSABI,
4835 #endif
4836         },
4837         .phdr = {
4838             .p_type = PT_LOAD,
4839             .p_flags = PF_X,
4840         },
4841         .shdr = {
4842             [0] = { .sh_type = SHT_NULL },
4843             /* Trick: The contents of code_gen_buffer are not present in
4844                this fake ELF file; that got allocated elsewhere.  Therefore
4845                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4846                will not look for contents.  We can record any address.  */
4847             [1] = { /* .text */
4848                 .sh_type = SHT_NOBITS,
4849                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4850             },
4851             [2] = { /* .debug_info */
4852                 .sh_type = SHT_PROGBITS,
4853                 .sh_offset = offsetof(struct ElfImage, di),
4854                 .sh_size = sizeof(struct DebugInfo),
4855             },
4856             [3] = { /* .debug_abbrev */
4857                 .sh_type = SHT_PROGBITS,
4858                 .sh_offset = offsetof(struct ElfImage, da),
4859                 .sh_size = sizeof(img->da),
4860             },
4861             [4] = { /* .debug_frame */
4862                 .sh_type = SHT_PROGBITS,
4863                 .sh_offset = sizeof(struct ElfImage),
4864             },
4865             [5] = { /* .symtab */
4866                 .sh_type = SHT_SYMTAB,
4867                 .sh_offset = offsetof(struct ElfImage, sym),
4868                 .sh_size = sizeof(img->sym),
4869                 .sh_info = 1,
4870                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4871                 .sh_entsize = sizeof(ElfW(Sym)),
4872             },
4873             [6] = { /* .strtab */
4874                 .sh_type = SHT_STRTAB,
4875                 .sh_offset = offsetof(struct ElfImage, str),
4876                 .sh_size = sizeof(img->str),
4877             }
4878         },
4879         .sym = {
4880             [1] = { /* code_gen_buffer */
4881                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4882                 .st_shndx = 1,
4883             }
4884         },
4885         .di = {
4886             .len = sizeof(struct DebugInfo) - 4,
4887             .version = 2,
4888             .ptr_size = sizeof(void *),
4889             .cu_die = 1,
4890             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4891             .fn_die = 2,
4892             .fn_name = "code_gen_buffer"
4893         },
4894         .da = {
4895             1,          /* abbrev number (the cu) */
4896             0x11, 1,    /* DW_TAG_compile_unit, has children */
4897             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4898             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4899             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4900             0, 0,       /* end of abbrev */
4901             2,          /* abbrev number (the fn) */
4902             0x2e, 0,    /* DW_TAG_subprogram, no children */
4903             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4904             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4905             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4906             0, 0,       /* end of abbrev */
4907             0           /* no more abbrev */
4908         },
4909         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4910                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4911     };
4912 
4913     /* We only need a single jit entry; statically allocate it.  */
4914     static struct jit_code_entry one_entry;
4915 
4916     uintptr_t buf = (uintptr_t)buf_ptr;
4917     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4918     DebugFrameHeader *dfh;
4919 
4920     img = g_malloc(img_size);
4921     *img = img_template;
4922 
4923     img->phdr.p_vaddr = buf;
4924     img->phdr.p_paddr = buf;
4925     img->phdr.p_memsz = buf_size;
4926 
4927     img->shdr[1].sh_name = find_string(img->str, ".text");
4928     img->shdr[1].sh_addr = buf;
4929     img->shdr[1].sh_size = buf_size;
4930 
4931     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4932     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4933 
4934     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4935     img->shdr[4].sh_size = debug_frame_size;
4936 
4937     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4938     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4939 
4940     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4941     img->sym[1].st_value = buf;
4942     img->sym[1].st_size = buf_size;
4943 
4944     img->di.cu_low_pc = buf;
4945     img->di.cu_high_pc = buf + buf_size;
4946     img->di.fn_low_pc = buf;
4947     img->di.fn_high_pc = buf + buf_size;
4948 
4949     dfh = (DebugFrameHeader *)(img + 1);
4950     memcpy(dfh, debug_frame, debug_frame_size);
4951     dfh->fde.func_start = buf;
4952     dfh->fde.func_len = buf_size;
4953 
4954 #ifdef DEBUG_JIT
4955     /* Enable this block to be able to debug the ELF image file creation.
4956        One can use readelf, objdump, or other inspection utilities.  */
4957     {
4958         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4959         if (f) {
4960             if (fwrite(img, img_size, 1, f) != img_size) {
4961                 /* Avoid stupid unused return value warning for fwrite.  */
4962             }
4963             fclose(f);
4964         }
4965     }
4966 #endif
4967 
4968     one_entry.symfile_addr = img;
4969     one_entry.symfile_size = img_size;
4970 
4971     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4972     __jit_debug_descriptor.relevant_entry = &one_entry;
4973     __jit_debug_descriptor.first_entry = &one_entry;
4974     __jit_debug_register_code();
4975 }
4976 #else
4977 /* No support for the feature.  Provide the entry point expected by exec.c,
4978    and implement the internal function we declared earlier.  */
4979 
4980 static void tcg_register_jit_int(const void *buf, size_t size,
4981                                  const void *debug_frame,
4982                                  size_t debug_frame_size)
4983 {
4984 }
4985 
4986 void tcg_register_jit(const void *buf, size_t buf_size)
4987 {
4988 }
4989 #endif /* ELF_HOST_MACHINE */
4990 
4991 #if !TCG_TARGET_MAYBE_vec
4992 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4993 {
4994     g_assert_not_reached();
4995 }
4996 #endif
4997