xref: /qemu/accel/tcg/translate-all.c (revision 75ac231c)
1 /*
2  *  Host code generation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46 
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "exec/translator.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/timer.h"
53 #include "qemu/main-loop.h"
54 #include "qemu/cacheinfo.h"
55 #include "exec/log.h"
56 #include "sysemu/cpus.h"
57 #include "sysemu/cpu-timers.h"
58 #include "sysemu/tcg.h"
59 #include "qapi/error.h"
60 #include "hw/core/tcg-cpu-ops.h"
61 #include "tb-jmp-cache.h"
62 #include "tb-hash.h"
63 #include "tb-context.h"
64 #include "internal.h"
65 
66 /* make various TB consistency checks */
67 
68 /**
69  * struct page_entry - page descriptor entry
70  * @pd:     pointer to the &struct PageDesc of the page this entry represents
71  * @index:  page index of the page
72  * @locked: whether the page is locked
73  *
74  * This struct helps us keep track of the locked state of a page, without
75  * bloating &struct PageDesc.
76  *
77  * A page lock protects accesses to all fields of &struct PageDesc.
78  *
79  * See also: &struct page_collection.
80  */
81 struct page_entry {
82     PageDesc *pd;
83     tb_page_addr_t index;
84     bool locked;
85 };
86 
87 /**
88  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
89  * @tree:   Binary search tree (BST) of the pages, with key == page index
90  * @max:    Pointer to the page in @tree with the highest page index
91  *
92  * To avoid deadlock we lock pages in ascending order of page index.
93  * When operating on a set of pages, we need to keep track of them so that
94  * we can lock them in order and also unlock them later. For this we collect
95  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
96  * @tree implementation we use does not provide an O(1) operation to obtain the
97  * highest-ranked element, we use @max to keep track of the inserted page
98  * with the highest index. This is valuable because if a page is not in
99  * the tree and its index is higher than @max's, then we can lock it
100  * without breaking the locking order rule.
101  *
102  * Note on naming: 'struct page_set' would be shorter, but we already have a few
103  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
104  *
105  * See also: page_collection_lock().
106  */
107 struct page_collection {
108     GTree *tree;
109     struct page_entry *max;
110 };
111 
112 /*
113  * In system mode we want L1_MAP to be based on ram offsets,
114  * while in user mode we want it to be based on virtual addresses.
115  *
116  * TODO: For user mode, see the caveat re host vs guest virtual
117  * address spaces near GUEST_ADDR_MAX.
118  */
119 #if !defined(CONFIG_USER_ONLY)
120 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
121 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
122 #else
123 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
124 #endif
125 #else
126 # define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
127 #endif
128 
129 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
130 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
131                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
132                   * BITS_PER_BYTE);
133 
134 /*
135  * L1 Mapping properties
136  */
137 int v_l1_size;
138 int v_l1_shift;
139 int v_l2_levels;
140 
141 void *l1_map[V_L1_MAX_SIZE];
142 
143 TBContext tb_ctx;
144 
145 static void page_table_config_init(void)
146 {
147     uint32_t v_l1_bits;
148 
149     assert(TARGET_PAGE_BITS);
150     /* The bits remaining after N lower levels of page tables.  */
151     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
152     if (v_l1_bits < V_L1_MIN_BITS) {
153         v_l1_bits += V_L2_BITS;
154     }
155 
156     v_l1_size = 1 << v_l1_bits;
157     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
158     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
159 
160     assert(v_l1_bits <= V_L1_MAX_BITS);
161     assert(v_l1_shift % V_L2_BITS == 0);
162     assert(v_l2_levels >= 0);
163 }
164 
165 /* Encode VAL as a signed leb128 sequence at P.
166    Return P incremented past the encoded value.  */
167 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
168 {
169     int more, byte;
170 
171     do {
172         byte = val & 0x7f;
173         val >>= 7;
174         more = !((val == 0 && (byte & 0x40) == 0)
175                  || (val == -1 && (byte & 0x40) != 0));
176         if (more) {
177             byte |= 0x80;
178         }
179         *p++ = byte;
180     } while (more);
181 
182     return p;
183 }
184 
185 /* Decode a signed leb128 sequence at *PP; increment *PP past the
186    decoded value.  Return the decoded value.  */
187 static target_long decode_sleb128(const uint8_t **pp)
188 {
189     const uint8_t *p = *pp;
190     target_long val = 0;
191     int byte, shift = 0;
192 
193     do {
194         byte = *p++;
195         val |= (target_ulong)(byte & 0x7f) << shift;
196         shift += 7;
197     } while (byte & 0x80);
198     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
199         val |= -(target_ulong)1 << shift;
200     }
201 
202     *pp = p;
203     return val;
204 }
205 
206 /* Encode the data collected about the instructions while compiling TB.
207    Place the data at BLOCK, and return the number of bytes consumed.
208 
209    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
210    which come from the target's insn_start data, followed by a uintptr_t
211    which comes from the host pc of the end of the code implementing the insn.
212 
213    Each line of the table is encoded as sleb128 deltas from the previous
214    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
215    That is, the first column is seeded with the guest pc, the last column
216    with the host pc, and the middle columns with zeros.  */
217 
218 static int encode_search(TranslationBlock *tb, uint8_t *block)
219 {
220     uint8_t *highwater = tcg_ctx->code_gen_highwater;
221     uint8_t *p = block;
222     int i, j, n;
223 
224     for (i = 0, n = tb->icount; i < n; ++i) {
225         target_ulong prev;
226 
227         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
228             if (i == 0) {
229                 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
230             } else {
231                 prev = tcg_ctx->gen_insn_data[i - 1][j];
232             }
233             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
234         }
235         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
236         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
237 
238         /* Test for (pending) buffer overflow.  The assumption is that any
239            one row beginning below the high water mark cannot overrun
240            the buffer completely.  Thus we can test for overflow after
241            encoding a row without having to check during encoding.  */
242         if (unlikely(p > highwater)) {
243             return -1;
244         }
245     }
246 
247     return p - block;
248 }
249 
250 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
251                                    uint64_t *data)
252 {
253     uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
254     const uint8_t *p = tb->tc.ptr + tb->tc.size;
255     int i, j, num_insns = tb->icount;
256 
257     host_pc -= GETPC_ADJ;
258 
259     if (host_pc < iter_pc) {
260         return -1;
261     }
262 
263     memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
264     if (!TARGET_TB_PCREL) {
265         data[0] = tb_pc(tb);
266     }
267 
268     /*
269      * Reconstruct the stored insn data while looking for the point
270      * at which the end of the insn exceeds host_pc.
271      */
272     for (i = 0; i < num_insns; ++i) {
273         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
274             data[j] += decode_sleb128(&p);
275         }
276         iter_pc += decode_sleb128(&p);
277         if (iter_pc > host_pc) {
278             return num_insns - i;
279         }
280     }
281     return -1;
282 }
283 
284 /*
285  * The cpu state corresponding to 'host_pc' is restored in
286  * preparation for exiting the TB.
287  */
288 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
289                                uintptr_t host_pc)
290 {
291     uint64_t data[TARGET_INSN_START_WORDS];
292 #ifdef CONFIG_PROFILER
293     TCGProfile *prof = &tcg_ctx->prof;
294     int64_t ti = profile_getclock();
295 #endif
296     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
297 
298     if (insns_left < 0) {
299         return;
300     }
301 
302     if (tb_cflags(tb) & CF_USE_ICOUNT) {
303         assert(icount_enabled());
304         /*
305          * Reset the cycle counter to the start of the block and
306          * shift if to the number of actually executed instructions.
307          */
308         cpu_neg(cpu)->icount_decr.u16.low += insns_left;
309     }
310 
311     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
312 
313 #ifdef CONFIG_PROFILER
314     qatomic_set(&prof->restore_time,
315                 prof->restore_time + profile_getclock() - ti);
316     qatomic_set(&prof->restore_count, prof->restore_count + 1);
317 #endif
318 }
319 
320 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
321 {
322     /*
323      * The host_pc has to be in the rx region of the code buffer.
324      * If it is not we will not be able to resolve it here.
325      * The two cases where host_pc will not be correct are:
326      *
327      *  - fault during translation (instruction fetch)
328      *  - fault from helper (not using GETPC() macro)
329      *
330      * Either way we need return early as we can't resolve it here.
331      */
332     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
333         TranslationBlock *tb = tcg_tb_lookup(host_pc);
334         if (tb) {
335             cpu_restore_state_from_tb(cpu, tb, host_pc);
336             return true;
337         }
338     }
339     return false;
340 }
341 
342 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
343 {
344     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
345         TranslationBlock *tb = tcg_tb_lookup(host_pc);
346         if (tb) {
347             return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
348         }
349     }
350     return false;
351 }
352 
353 void page_init(void)
354 {
355     page_size_init();
356     page_table_config_init();
357 
358 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
359     {
360 #ifdef HAVE_KINFO_GETVMMAP
361         struct kinfo_vmentry *freep;
362         int i, cnt;
363 
364         freep = kinfo_getvmmap(getpid(), &cnt);
365         if (freep) {
366             mmap_lock();
367             for (i = 0; i < cnt; i++) {
368                 unsigned long startaddr, endaddr;
369 
370                 startaddr = freep[i].kve_start;
371                 endaddr = freep[i].kve_end;
372                 if (h2g_valid(startaddr)) {
373                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
374 
375                     if (h2g_valid(endaddr)) {
376                         endaddr = h2g(endaddr);
377                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
378                     } else {
379 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
380                         endaddr = ~0ul;
381                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
382 #endif
383                     }
384                 }
385             }
386             free(freep);
387             mmap_unlock();
388         }
389 #else
390         FILE *f;
391 
392         last_brk = (unsigned long)sbrk(0);
393 
394         f = fopen("/compat/linux/proc/self/maps", "r");
395         if (f) {
396             mmap_lock();
397 
398             do {
399                 unsigned long startaddr, endaddr;
400                 int n;
401 
402                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
403 
404                 if (n == 2 && h2g_valid(startaddr)) {
405                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
406 
407                     if (h2g_valid(endaddr)) {
408                         endaddr = h2g(endaddr);
409                     } else {
410                         endaddr = ~0ul;
411                     }
412                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
413                 }
414             } while (!feof(f));
415 
416             fclose(f);
417             mmap_unlock();
418         }
419 #endif
420     }
421 #endif
422 }
423 
424 PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
425 {
426     PageDesc *pd;
427     void **lp;
428     int i;
429 
430     /* Level 1.  Always allocated.  */
431     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
432 
433     /* Level 2..N-1.  */
434     for (i = v_l2_levels; i > 0; i--) {
435         void **p = qatomic_rcu_read(lp);
436 
437         if (p == NULL) {
438             void *existing;
439 
440             if (!alloc) {
441                 return NULL;
442             }
443             p = g_new0(void *, V_L2_SIZE);
444             existing = qatomic_cmpxchg(lp, NULL, p);
445             if (unlikely(existing)) {
446                 g_free(p);
447                 p = existing;
448             }
449         }
450 
451         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
452     }
453 
454     pd = qatomic_rcu_read(lp);
455     if (pd == NULL) {
456         void *existing;
457 
458         if (!alloc) {
459             return NULL;
460         }
461         pd = g_new0(PageDesc, V_L2_SIZE);
462 #ifndef CONFIG_USER_ONLY
463         {
464             int i;
465 
466             for (i = 0; i < V_L2_SIZE; i++) {
467                 qemu_spin_init(&pd[i].lock);
468             }
469         }
470 #endif
471         existing = qatomic_cmpxchg(lp, NULL, pd);
472         if (unlikely(existing)) {
473 #ifndef CONFIG_USER_ONLY
474             {
475                 int i;
476 
477                 for (i = 0; i < V_L2_SIZE; i++) {
478                     qemu_spin_destroy(&pd[i].lock);
479                 }
480             }
481 #endif
482             g_free(pd);
483             pd = existing;
484         }
485     }
486 
487     return pd + (index & (V_L2_SIZE - 1));
488 }
489 
490 /* In user-mode page locks aren't used; mmap_lock is enough */
491 #ifdef CONFIG_USER_ONLY
492 struct page_collection *
493 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
494 {
495     return NULL;
496 }
497 
498 void page_collection_unlock(struct page_collection *set)
499 { }
500 #else /* !CONFIG_USER_ONLY */
501 
502 #ifdef CONFIG_DEBUG_TCG
503 
504 static __thread GHashTable *ht_pages_locked_debug;
505 
506 static void ht_pages_locked_debug_init(void)
507 {
508     if (ht_pages_locked_debug) {
509         return;
510     }
511     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
512 }
513 
514 static bool page_is_locked(const PageDesc *pd)
515 {
516     PageDesc *found;
517 
518     ht_pages_locked_debug_init();
519     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
520     return !!found;
521 }
522 
523 static void page_lock__debug(PageDesc *pd)
524 {
525     ht_pages_locked_debug_init();
526     g_assert(!page_is_locked(pd));
527     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
528 }
529 
530 static void page_unlock__debug(const PageDesc *pd)
531 {
532     bool removed;
533 
534     ht_pages_locked_debug_init();
535     g_assert(page_is_locked(pd));
536     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
537     g_assert(removed);
538 }
539 
540 void do_assert_page_locked(const PageDesc *pd, const char *file, int line)
541 {
542     if (unlikely(!page_is_locked(pd))) {
543         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
544                      pd, file, line);
545         abort();
546     }
547 }
548 
549 void assert_no_pages_locked(void)
550 {
551     ht_pages_locked_debug_init();
552     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
553 }
554 
555 #else /* !CONFIG_DEBUG_TCG */
556 
557 static inline void page_lock__debug(const PageDesc *pd) { }
558 static inline void page_unlock__debug(const PageDesc *pd) { }
559 
560 #endif /* CONFIG_DEBUG_TCG */
561 
562 void page_lock(PageDesc *pd)
563 {
564     page_lock__debug(pd);
565     qemu_spin_lock(&pd->lock);
566 }
567 
568 void page_unlock(PageDesc *pd)
569 {
570     qemu_spin_unlock(&pd->lock);
571     page_unlock__debug(pd);
572 }
573 
574 static inline struct page_entry *
575 page_entry_new(PageDesc *pd, tb_page_addr_t index)
576 {
577     struct page_entry *pe = g_malloc(sizeof(*pe));
578 
579     pe->index = index;
580     pe->pd = pd;
581     pe->locked = false;
582     return pe;
583 }
584 
585 static void page_entry_destroy(gpointer p)
586 {
587     struct page_entry *pe = p;
588 
589     g_assert(pe->locked);
590     page_unlock(pe->pd);
591     g_free(pe);
592 }
593 
594 /* returns false on success */
595 static bool page_entry_trylock(struct page_entry *pe)
596 {
597     bool busy;
598 
599     busy = qemu_spin_trylock(&pe->pd->lock);
600     if (!busy) {
601         g_assert(!pe->locked);
602         pe->locked = true;
603         page_lock__debug(pe->pd);
604     }
605     return busy;
606 }
607 
608 static void do_page_entry_lock(struct page_entry *pe)
609 {
610     page_lock(pe->pd);
611     g_assert(!pe->locked);
612     pe->locked = true;
613 }
614 
615 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
616 {
617     struct page_entry *pe = value;
618 
619     do_page_entry_lock(pe);
620     return FALSE;
621 }
622 
623 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
624 {
625     struct page_entry *pe = value;
626 
627     if (pe->locked) {
628         pe->locked = false;
629         page_unlock(pe->pd);
630     }
631     return FALSE;
632 }
633 
634 /*
635  * Trylock a page, and if successful, add the page to a collection.
636  * Returns true ("busy") if the page could not be locked; false otherwise.
637  */
638 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
639 {
640     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
641     struct page_entry *pe;
642     PageDesc *pd;
643 
644     pe = g_tree_lookup(set->tree, &index);
645     if (pe) {
646         return false;
647     }
648 
649     pd = page_find(index);
650     if (pd == NULL) {
651         return false;
652     }
653 
654     pe = page_entry_new(pd, index);
655     g_tree_insert(set->tree, &pe->index, pe);
656 
657     /*
658      * If this is either (1) the first insertion or (2) a page whose index
659      * is higher than any other so far, just lock the page and move on.
660      */
661     if (set->max == NULL || pe->index > set->max->index) {
662         set->max = pe;
663         do_page_entry_lock(pe);
664         return false;
665     }
666     /*
667      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
668      * locks in order.
669      */
670     return page_entry_trylock(pe);
671 }
672 
673 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
674 {
675     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
676     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
677 
678     if (a == b) {
679         return 0;
680     } else if (a < b) {
681         return -1;
682     }
683     return 1;
684 }
685 
686 /*
687  * Lock a range of pages ([@start,@end[) as well as the pages of all
688  * intersecting TBs.
689  * Locking order: acquire locks in ascending order of page index.
690  */
691 struct page_collection *
692 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
693 {
694     struct page_collection *set = g_malloc(sizeof(*set));
695     tb_page_addr_t index;
696     PageDesc *pd;
697 
698     start >>= TARGET_PAGE_BITS;
699     end   >>= TARGET_PAGE_BITS;
700     g_assert(start <= end);
701 
702     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
703                                 page_entry_destroy);
704     set->max = NULL;
705     assert_no_pages_locked();
706 
707  retry:
708     g_tree_foreach(set->tree, page_entry_lock, NULL);
709 
710     for (index = start; index <= end; index++) {
711         TranslationBlock *tb;
712         int n;
713 
714         pd = page_find(index);
715         if (pd == NULL) {
716             continue;
717         }
718         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
719             g_tree_foreach(set->tree, page_entry_unlock, NULL);
720             goto retry;
721         }
722         assert_page_locked(pd);
723         PAGE_FOR_EACH_TB(pd, tb, n) {
724             if (page_trylock_add(set, tb_page_addr0(tb)) ||
725                 (tb_page_addr1(tb) != -1 &&
726                  page_trylock_add(set, tb_page_addr1(tb)))) {
727                 /* drop all locks, and reacquire in order */
728                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
729                 goto retry;
730             }
731         }
732     }
733     return set;
734 }
735 
736 void page_collection_unlock(struct page_collection *set)
737 {
738     /* entries are unlocked and freed via page_entry_destroy */
739     g_tree_destroy(set->tree);
740     g_free(set);
741 }
742 
743 #endif /* !CONFIG_USER_ONLY */
744 
745 /* Called with mmap_lock held for user mode emulation.  */
746 TranslationBlock *tb_gen_code(CPUState *cpu,
747                               target_ulong pc, target_ulong cs_base,
748                               uint32_t flags, int cflags)
749 {
750     CPUArchState *env = cpu->env_ptr;
751     TranslationBlock *tb, *existing_tb;
752     tb_page_addr_t phys_pc;
753     tcg_insn_unit *gen_code_buf;
754     int gen_code_size, search_size, max_insns;
755 #ifdef CONFIG_PROFILER
756     TCGProfile *prof = &tcg_ctx->prof;
757     int64_t ti;
758 #endif
759     void *host_pc;
760 
761     assert_memory_lock();
762     qemu_thread_jit_write();
763 
764     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
765 
766     if (phys_pc == -1) {
767         /* Generate a one-shot TB with 1 insn in it */
768         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
769     }
770 
771     max_insns = cflags & CF_COUNT_MASK;
772     if (max_insns == 0) {
773         max_insns = TCG_MAX_INSNS;
774     }
775     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
776 
777  buffer_overflow:
778     tb = tcg_tb_alloc(tcg_ctx);
779     if (unlikely(!tb)) {
780         /* flush must be done */
781         tb_flush(cpu);
782         mmap_unlock();
783         /* Make the execution loop process the flush as soon as possible.  */
784         cpu->exception_index = EXCP_INTERRUPT;
785         cpu_loop_exit(cpu);
786     }
787 
788     gen_code_buf = tcg_ctx->code_gen_ptr;
789     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
790 #if !TARGET_TB_PCREL
791     tb->pc = pc;
792 #endif
793     tb->cs_base = cs_base;
794     tb->flags = flags;
795     tb->cflags = cflags;
796     tb->trace_vcpu_dstate = *cpu->trace_dstate;
797     tb_set_page_addr0(tb, phys_pc);
798     tb_set_page_addr1(tb, -1);
799     tcg_ctx->tb_cflags = cflags;
800  tb_overflow:
801 
802 #ifdef CONFIG_PROFILER
803     /* includes aborted translations because of exceptions */
804     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
805     ti = profile_getclock();
806 #endif
807 
808     gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
809     if (unlikely(gen_code_size != 0)) {
810         goto error_return;
811     }
812 
813     tcg_func_start(tcg_ctx);
814 
815     tcg_ctx->cpu = env_cpu(env);
816     gen_intermediate_code(cpu, tb, max_insns, pc, host_pc);
817     assert(tb->size != 0);
818     tcg_ctx->cpu = NULL;
819     max_insns = tb->icount;
820 
821     trace_translate_block(tb, pc, tb->tc.ptr);
822 
823     /* generate machine code */
824     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
825     tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
826     tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
827     if (TCG_TARGET_HAS_direct_jump) {
828         tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
829         tcg_ctx->tb_jmp_target_addr = NULL;
830     } else {
831         tcg_ctx->tb_jmp_insn_offset = NULL;
832         tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
833     }
834 
835 #ifdef CONFIG_PROFILER
836     qatomic_set(&prof->tb_count, prof->tb_count + 1);
837     qatomic_set(&prof->interm_time,
838                 prof->interm_time + profile_getclock() - ti);
839     ti = profile_getclock();
840 #endif
841 
842     gen_code_size = tcg_gen_code(tcg_ctx, tb, pc);
843     if (unlikely(gen_code_size < 0)) {
844  error_return:
845         switch (gen_code_size) {
846         case -1:
847             /*
848              * Overflow of code_gen_buffer, or the current slice of it.
849              *
850              * TODO: We don't need to re-do gen_intermediate_code, nor
851              * should we re-do the tcg optimization currently hidden
852              * inside tcg_gen_code.  All that should be required is to
853              * flush the TBs, allocate a new TB, re-initialize it per
854              * above, and re-do the actual code generation.
855              */
856             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
857                           "Restarting code generation for "
858                           "code_gen_buffer overflow\n");
859             goto buffer_overflow;
860 
861         case -2:
862             /*
863              * The code generated for the TranslationBlock is too large.
864              * The maximum size allowed by the unwind info is 64k.
865              * There may be stricter constraints from relocations
866              * in the tcg backend.
867              *
868              * Try again with half as many insns as we attempted this time.
869              * If a single insn overflows, there's a bug somewhere...
870              */
871             assert(max_insns > 1);
872             max_insns /= 2;
873             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
874                           "Restarting code generation with "
875                           "smaller translation block (max %d insns)\n",
876                           max_insns);
877             goto tb_overflow;
878 
879         default:
880             g_assert_not_reached();
881         }
882     }
883     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
884     if (unlikely(search_size < 0)) {
885         goto buffer_overflow;
886     }
887     tb->tc.size = gen_code_size;
888 
889 #ifdef CONFIG_PROFILER
890     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
891     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
892     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
893     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
894 #endif
895 
896 #ifdef DEBUG_DISAS
897     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
898         qemu_log_in_addr_range(pc)) {
899         FILE *logfile = qemu_log_trylock();
900         if (logfile) {
901             int code_size, data_size;
902             const tcg_target_ulong *rx_data_gen_ptr;
903             size_t chunk_start;
904             int insn = 0;
905 
906             if (tcg_ctx->data_gen_ptr) {
907                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
908                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
909                 data_size = gen_code_size - code_size;
910             } else {
911                 rx_data_gen_ptr = 0;
912                 code_size = gen_code_size;
913                 data_size = 0;
914             }
915 
916             /* Dump header and the first instruction */
917             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
918             fprintf(logfile,
919                     "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
920                     tcg_ctx->gen_insn_data[insn][0]);
921             chunk_start = tcg_ctx->gen_insn_end_off[insn];
922             disas(logfile, tb->tc.ptr, chunk_start);
923 
924             /*
925              * Dump each instruction chunk, wrapping up empty chunks into
926              * the next instruction. The whole array is offset so the
927              * first entry is the beginning of the 2nd instruction.
928              */
929             while (insn < tb->icount) {
930                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
931                 if (chunk_end > chunk_start) {
932                     fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
933                             tcg_ctx->gen_insn_data[insn][0]);
934                     disas(logfile, tb->tc.ptr + chunk_start,
935                           chunk_end - chunk_start);
936                     chunk_start = chunk_end;
937                 }
938                 insn++;
939             }
940 
941             if (chunk_start < code_size) {
942                 fprintf(logfile, "  -- tb slow paths + alignment\n");
943                 disas(logfile, tb->tc.ptr + chunk_start,
944                       code_size - chunk_start);
945             }
946 
947             /* Finally dump any data we may have after the block */
948             if (data_size) {
949                 int i;
950                 fprintf(logfile, "  data: [size=%d]\n", data_size);
951                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
952                     if (sizeof(tcg_target_ulong) == 8) {
953                         fprintf(logfile,
954                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
955                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
956                     } else if (sizeof(tcg_target_ulong) == 4) {
957                         fprintf(logfile,
958                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
959                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
960                     } else {
961                         qemu_build_not_reached();
962                     }
963                 }
964             }
965             fprintf(logfile, "\n");
966             qemu_log_unlock(logfile);
967         }
968     }
969 #endif
970 
971     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
972         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
973                  CODE_GEN_ALIGN));
974 
975     /* init jump list */
976     qemu_spin_init(&tb->jmp_lock);
977     tb->jmp_list_head = (uintptr_t)NULL;
978     tb->jmp_list_next[0] = (uintptr_t)NULL;
979     tb->jmp_list_next[1] = (uintptr_t)NULL;
980     tb->jmp_dest[0] = (uintptr_t)NULL;
981     tb->jmp_dest[1] = (uintptr_t)NULL;
982 
983     /* init original jump addresses which have been set during tcg_gen_code() */
984     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
985         tb_reset_jump(tb, 0);
986     }
987     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
988         tb_reset_jump(tb, 1);
989     }
990 
991     /*
992      * If the TB is not associated with a physical RAM page then it must be
993      * a temporary one-insn TB, and we have nothing left to do. Return early
994      * before attempting to link to other TBs or add to the lookup table.
995      */
996     if (tb_page_addr0(tb) == -1) {
997         return tb;
998     }
999 
1000     /*
1001      * Insert TB into the corresponding region tree before publishing it
1002      * through QHT. Otherwise rewinding happened in the TB might fail to
1003      * lookup itself using host PC.
1004      */
1005     tcg_tb_insert(tb);
1006 
1007     /*
1008      * No explicit memory barrier is required -- tb_link_page() makes the
1009      * TB visible in a consistent state.
1010      */
1011     existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
1012     /* if the TB already exists, discard what we just translated */
1013     if (unlikely(existing_tb != tb)) {
1014         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
1015 
1016         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
1017         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
1018         tcg_tb_remove(tb);
1019         return existing_tb;
1020     }
1021     return tb;
1022 }
1023 
1024 /* user-mode: call with mmap_lock held */
1025 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
1026 {
1027     TranslationBlock *tb;
1028 
1029     assert_memory_lock();
1030 
1031     tb = tcg_tb_lookup(retaddr);
1032     if (tb) {
1033         /* We can use retranslation to find the PC.  */
1034         cpu_restore_state_from_tb(cpu, tb, retaddr);
1035         tb_phys_invalidate(tb, -1);
1036     } else {
1037         /* The exception probably happened in a helper.  The CPU state should
1038            have been saved before calling it. Fetch the PC from there.  */
1039         CPUArchState *env = cpu->env_ptr;
1040         target_ulong pc, cs_base;
1041         tb_page_addr_t addr;
1042         uint32_t flags;
1043 
1044         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
1045         addr = get_page_addr_code(env, pc);
1046         if (addr != -1) {
1047             tb_invalidate_phys_range(addr, addr + 1);
1048         }
1049     }
1050 }
1051 
1052 #ifndef CONFIG_USER_ONLY
1053 /*
1054  * In deterministic execution mode, instructions doing device I/Os
1055  * must be at the end of the TB.
1056  *
1057  * Called by softmmu_template.h, with iothread mutex not held.
1058  */
1059 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
1060 {
1061     TranslationBlock *tb;
1062     CPUClass *cc;
1063     uint32_t n;
1064 
1065     tb = tcg_tb_lookup(retaddr);
1066     if (!tb) {
1067         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
1068                   (void *)retaddr);
1069     }
1070     cpu_restore_state_from_tb(cpu, tb, retaddr);
1071 
1072     /*
1073      * Some guests must re-execute the branch when re-executing a delay
1074      * slot instruction.  When this is the case, adjust icount and N
1075      * to account for the re-execution of the branch.
1076      */
1077     n = 1;
1078     cc = CPU_GET_CLASS(cpu);
1079     if (cc->tcg_ops->io_recompile_replay_branch &&
1080         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
1081         cpu_neg(cpu)->icount_decr.u16.low++;
1082         n = 2;
1083     }
1084 
1085     /*
1086      * Exit the loop and potentially generate a new TB executing the
1087      * just the I/O insns. We also limit instrumentation to memory
1088      * operations only (which execute after completion) so we don't
1089      * double instrument the instruction.
1090      */
1091     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
1092 
1093     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
1094         target_ulong pc = log_pc(cpu, tb);
1095         if (qemu_log_in_addr_range(pc)) {
1096             qemu_log("cpu_io_recompile: rewound execution of TB to "
1097                      TARGET_FMT_lx "\n", pc);
1098         }
1099     }
1100 
1101     cpu_loop_exit_noexc(cpu);
1102 }
1103 
1104 static void print_qht_statistics(struct qht_stats hst, GString *buf)
1105 {
1106     uint32_t hgram_opts;
1107     size_t hgram_bins;
1108     char *hgram;
1109 
1110     if (!hst.head_buckets) {
1111         return;
1112     }
1113     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
1114                            "(%0.2f%% head buckets used)\n",
1115                            hst.used_head_buckets, hst.head_buckets,
1116                            (double)hst.used_head_buckets /
1117                            hst.head_buckets * 100);
1118 
1119     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
1120     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
1121     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
1122         hgram_opts |= QDIST_PR_NODECIMAL;
1123     }
1124     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
1125     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
1126                            "Histogram: %s\n",
1127                            qdist_avg(&hst.occupancy) * 100, hgram);
1128     g_free(hgram);
1129 
1130     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
1131     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
1132     if (hgram_bins > 10) {
1133         hgram_bins = 10;
1134     } else {
1135         hgram_bins = 0;
1136         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
1137     }
1138     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
1139     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
1140                            "Histogram: %s\n",
1141                            qdist_avg(&hst.chain), hgram);
1142     g_free(hgram);
1143 }
1144 
1145 struct tb_tree_stats {
1146     size_t nb_tbs;
1147     size_t host_size;
1148     size_t target_size;
1149     size_t max_target_size;
1150     size_t direct_jmp_count;
1151     size_t direct_jmp2_count;
1152     size_t cross_page;
1153 };
1154 
1155 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
1156 {
1157     const TranslationBlock *tb = value;
1158     struct tb_tree_stats *tst = data;
1159 
1160     tst->nb_tbs++;
1161     tst->host_size += tb->tc.size;
1162     tst->target_size += tb->size;
1163     if (tb->size > tst->max_target_size) {
1164         tst->max_target_size = tb->size;
1165     }
1166     if (tb_page_addr1(tb) != -1) {
1167         tst->cross_page++;
1168     }
1169     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
1170         tst->direct_jmp_count++;
1171         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
1172             tst->direct_jmp2_count++;
1173         }
1174     }
1175     return false;
1176 }
1177 
1178 void dump_exec_info(GString *buf)
1179 {
1180     struct tb_tree_stats tst = {};
1181     struct qht_stats hst;
1182     size_t nb_tbs, flush_full, flush_part, flush_elide;
1183 
1184     tcg_tb_foreach(tb_tree_stats_iter, &tst);
1185     nb_tbs = tst.nb_tbs;
1186     /* XXX: avoid using doubles ? */
1187     g_string_append_printf(buf, "Translation buffer state:\n");
1188     /*
1189      * Report total code size including the padding and TB structs;
1190      * otherwise users might think "-accel tcg,tb-size" is not honoured.
1191      * For avg host size we use the precise numbers from tb_tree_stats though.
1192      */
1193     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
1194                            tcg_code_size(), tcg_code_capacity());
1195     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
1196     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
1197                            nb_tbs ? tst.target_size / nb_tbs : 0,
1198                            tst.max_target_size);
1199     g_string_append_printf(buf, "TB avg host size    %zu bytes "
1200                            "(expansion ratio: %0.1f)\n",
1201                            nb_tbs ? tst.host_size / nb_tbs : 0,
1202                            tst.target_size ?
1203                            (double)tst.host_size / tst.target_size : 0);
1204     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
1205                            tst.cross_page,
1206                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
1207     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
1208                            "(2 jumps=%zu %zu%%)\n",
1209                            tst.direct_jmp_count,
1210                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
1211                            tst.direct_jmp2_count,
1212                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
1213 
1214     qht_statistics_init(&tb_ctx.htable, &hst);
1215     print_qht_statistics(hst, buf);
1216     qht_statistics_destroy(&hst);
1217 
1218     g_string_append_printf(buf, "\nStatistics:\n");
1219     g_string_append_printf(buf, "TB flush count      %u\n",
1220                            qatomic_read(&tb_ctx.tb_flush_count));
1221     g_string_append_printf(buf, "TB invalidate count %u\n",
1222                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
1223 
1224     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
1225     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
1226     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
1227     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
1228     tcg_dump_info(buf);
1229 }
1230 
1231 #else /* CONFIG_USER_ONLY */
1232 
1233 void cpu_interrupt(CPUState *cpu, int mask)
1234 {
1235     g_assert(qemu_mutex_iothread_locked());
1236     cpu->interrupt_request |= mask;
1237     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
1238 }
1239 
1240 /*
1241  * Walks guest process memory "regions" one by one
1242  * and calls callback function 'fn' for each region.
1243  */
1244 struct walk_memory_regions_data {
1245     walk_memory_regions_fn fn;
1246     void *priv;
1247     target_ulong start;
1248     int prot;
1249 };
1250 
1251 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1252                                    target_ulong end, int new_prot)
1253 {
1254     if (data->start != -1u) {
1255         int rc = data->fn(data->priv, data->start, end, data->prot);
1256         if (rc != 0) {
1257             return rc;
1258         }
1259     }
1260 
1261     data->start = (new_prot ? end : -1u);
1262     data->prot = new_prot;
1263 
1264     return 0;
1265 }
1266 
1267 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1268                                  target_ulong base, int level, void **lp)
1269 {
1270     target_ulong pa;
1271     int i, rc;
1272 
1273     if (*lp == NULL) {
1274         return walk_memory_regions_end(data, base, 0);
1275     }
1276 
1277     if (level == 0) {
1278         PageDesc *pd = *lp;
1279 
1280         for (i = 0; i < V_L2_SIZE; ++i) {
1281             int prot = pd[i].flags;
1282 
1283             pa = base | (i << TARGET_PAGE_BITS);
1284             if (prot != data->prot) {
1285                 rc = walk_memory_regions_end(data, pa, prot);
1286                 if (rc != 0) {
1287                     return rc;
1288                 }
1289             }
1290         }
1291     } else {
1292         void **pp = *lp;
1293 
1294         for (i = 0; i < V_L2_SIZE; ++i) {
1295             pa = base | ((target_ulong)i <<
1296                 (TARGET_PAGE_BITS + V_L2_BITS * level));
1297             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1298             if (rc != 0) {
1299                 return rc;
1300             }
1301         }
1302     }
1303 
1304     return 0;
1305 }
1306 
1307 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1308 {
1309     struct walk_memory_regions_data data;
1310     uintptr_t i, l1_sz = v_l1_size;
1311 
1312     data.fn = fn;
1313     data.priv = priv;
1314     data.start = -1u;
1315     data.prot = 0;
1316 
1317     for (i = 0; i < l1_sz; i++) {
1318         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
1319         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
1320         if (rc != 0) {
1321             return rc;
1322         }
1323     }
1324 
1325     return walk_memory_regions_end(&data, 0, 0);
1326 }
1327 
1328 static int dump_region(void *priv, target_ulong start,
1329     target_ulong end, unsigned long prot)
1330 {
1331     FILE *f = (FILE *)priv;
1332 
1333     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
1334         " "TARGET_FMT_lx" %c%c%c\n",
1335         start, end, end - start,
1336         ((prot & PAGE_READ) ? 'r' : '-'),
1337         ((prot & PAGE_WRITE) ? 'w' : '-'),
1338         ((prot & PAGE_EXEC) ? 'x' : '-'));
1339 
1340     return 0;
1341 }
1342 
1343 /* dump memory mappings */
1344 void page_dump(FILE *f)
1345 {
1346     const int length = sizeof(target_ulong) * 2;
1347     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
1348             length, "start", length, "end", length, "size", "prot");
1349     walk_memory_regions(f, dump_region);
1350 }
1351 
1352 int page_get_flags(target_ulong address)
1353 {
1354     PageDesc *p;
1355 
1356     p = page_find(address >> TARGET_PAGE_BITS);
1357     if (!p) {
1358         return 0;
1359     }
1360     return p->flags;
1361 }
1362 
1363 /*
1364  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
1365  * By default, they are not kept.
1366  */
1367 #ifndef PAGE_TARGET_STICKY
1368 #define PAGE_TARGET_STICKY  0
1369 #endif
1370 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
1371 
1372 /* Modify the flags of a page and invalidate the code if necessary.
1373    The flag PAGE_WRITE_ORG is positioned automatically depending
1374    on PAGE_WRITE.  The mmap_lock should already be held.  */
1375 void page_set_flags(target_ulong start, target_ulong end, int flags)
1376 {
1377     target_ulong addr, len;
1378     bool reset, inval_tb = false;
1379 
1380     /* This function should never be called with addresses outside the
1381        guest address space.  If this assert fires, it probably indicates
1382        a missing call to h2g_valid.  */
1383     assert(end - 1 <= GUEST_ADDR_MAX);
1384     assert(start < end);
1385     /* Only set PAGE_ANON with new mappings. */
1386     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
1387     assert_memory_lock();
1388 
1389     start = start & TARGET_PAGE_MASK;
1390     end = TARGET_PAGE_ALIGN(end);
1391 
1392     if (flags & PAGE_WRITE) {
1393         flags |= PAGE_WRITE_ORG;
1394     }
1395     reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
1396     if (reset) {
1397         page_reset_target_data(start, end);
1398     }
1399     flags &= ~PAGE_RESET;
1400 
1401     for (addr = start, len = end - start;
1402          len != 0;
1403          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
1404         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
1405 
1406         /*
1407          * If the page was executable, but is reset, or is no longer
1408          * executable, or has become writable, then invalidate any code.
1409          */
1410         if ((p->flags & PAGE_EXEC)
1411             && (reset ||
1412                 !(flags & PAGE_EXEC) ||
1413                 (flags & ~p->flags & PAGE_WRITE))) {
1414             inval_tb = true;
1415         }
1416         /* Using mprotect on a page does not change sticky bits. */
1417         p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags;
1418     }
1419 
1420     if (inval_tb) {
1421         tb_invalidate_phys_range(start, end);
1422     }
1423 }
1424 
1425 int page_check_range(target_ulong start, target_ulong len, int flags)
1426 {
1427     PageDesc *p;
1428     target_ulong end;
1429     target_ulong addr;
1430 
1431     /* This function should never be called with addresses outside the
1432        guest address space.  If this assert fires, it probably indicates
1433        a missing call to h2g_valid.  */
1434     if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
1435         assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
1436     }
1437 
1438     if (len == 0) {
1439         return 0;
1440     }
1441     if (start + len - 1 < start) {
1442         /* We've wrapped around.  */
1443         return -1;
1444     }
1445 
1446     /* must do before we loose bits in the next step */
1447     end = TARGET_PAGE_ALIGN(start + len);
1448     start = start & TARGET_PAGE_MASK;
1449 
1450     for (addr = start, len = end - start;
1451          len != 0;
1452          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
1453         p = page_find(addr >> TARGET_PAGE_BITS);
1454         if (!p) {
1455             return -1;
1456         }
1457         if (!(p->flags & PAGE_VALID)) {
1458             return -1;
1459         }
1460 
1461         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
1462             return -1;
1463         }
1464         if (flags & PAGE_WRITE) {
1465             if (!(p->flags & PAGE_WRITE_ORG)) {
1466                 return -1;
1467             }
1468             /* unprotect the page if it was put read-only because it
1469                contains translated code */
1470             if (!(p->flags & PAGE_WRITE)) {
1471                 if (!page_unprotect(addr, 0)) {
1472                     return -1;
1473                 }
1474             }
1475         }
1476     }
1477     return 0;
1478 }
1479 
1480 void page_protect(tb_page_addr_t page_addr)
1481 {
1482     target_ulong addr;
1483     PageDesc *p;
1484     int prot;
1485 
1486     p = page_find(page_addr >> TARGET_PAGE_BITS);
1487     if (p && (p->flags & PAGE_WRITE)) {
1488         /*
1489          * Force the host page as non writable (writes will have a page fault +
1490          * mprotect overhead).
1491          */
1492         page_addr &= qemu_host_page_mask;
1493         prot = 0;
1494         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1495              addr += TARGET_PAGE_SIZE) {
1496 
1497             p = page_find(addr >> TARGET_PAGE_BITS);
1498             if (!p) {
1499                 continue;
1500             }
1501             prot |= p->flags;
1502             p->flags &= ~PAGE_WRITE;
1503         }
1504         mprotect(g2h_untagged(page_addr), qemu_host_page_size,
1505                  (prot & PAGE_BITS) & ~PAGE_WRITE);
1506     }
1507 }
1508 
1509 /* called from signal handler: invalidate the code and unprotect the
1510  * page. Return 0 if the fault was not handled, 1 if it was handled,
1511  * and 2 if it was handled but the caller must cause the TB to be
1512  * immediately exited. (We can only return 2 if the 'pc' argument is
1513  * non-zero.)
1514  */
1515 int page_unprotect(target_ulong address, uintptr_t pc)
1516 {
1517     unsigned int prot;
1518     bool current_tb_invalidated;
1519     PageDesc *p;
1520     target_ulong host_start, host_end, addr;
1521 
1522     /* Technically this isn't safe inside a signal handler.  However we
1523        know this only ever happens in a synchronous SEGV handler, so in
1524        practice it seems to be ok.  */
1525     mmap_lock();
1526 
1527     p = page_find(address >> TARGET_PAGE_BITS);
1528     if (!p) {
1529         mmap_unlock();
1530         return 0;
1531     }
1532 
1533     /* if the page was really writable, then we change its
1534        protection back to writable */
1535     if (p->flags & PAGE_WRITE_ORG) {
1536         current_tb_invalidated = false;
1537         if (p->flags & PAGE_WRITE) {
1538             /* If the page is actually marked WRITE then assume this is because
1539              * this thread raced with another one which got here first and
1540              * set the page to PAGE_WRITE and did the TB invalidate for us.
1541              */
1542 #ifdef TARGET_HAS_PRECISE_SMC
1543             TranslationBlock *current_tb = tcg_tb_lookup(pc);
1544             if (current_tb) {
1545                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
1546             }
1547 #endif
1548         } else {
1549             host_start = address & qemu_host_page_mask;
1550             host_end = host_start + qemu_host_page_size;
1551 
1552             prot = 0;
1553             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
1554                 p = page_find(addr >> TARGET_PAGE_BITS);
1555                 p->flags |= PAGE_WRITE;
1556                 prot |= p->flags;
1557 
1558                 /* and since the content will be modified, we must invalidate
1559                    the corresponding translated code. */
1560                 current_tb_invalidated |=
1561                     tb_invalidate_phys_page_unwind(addr, pc);
1562             }
1563             mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
1564                      prot & PAGE_BITS);
1565         }
1566         mmap_unlock();
1567         /* If current TB was invalidated return to main loop */
1568         return current_tb_invalidated ? 2 : 1;
1569     }
1570     mmap_unlock();
1571     return 0;
1572 }
1573 #endif /* CONFIG_USER_ONLY */
1574 
1575 /*
1576  * Called by generic code at e.g. cpu reset after cpu creation,
1577  * therefore we must be prepared to allocate the jump cache.
1578  */
1579 void tcg_flush_jmp_cache(CPUState *cpu)
1580 {
1581     CPUJumpCache *jc = cpu->tb_jmp_cache;
1582 
1583     /* During early initialization, the cache may not yet be allocated. */
1584     if (unlikely(jc == NULL)) {
1585         return;
1586     }
1587 
1588     for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
1589         qatomic_set(&jc->array[i].tb, NULL);
1590     }
1591 }
1592 
1593 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
1594 void tcg_flush_softmmu_tlb(CPUState *cs)
1595 {
1596 #ifdef CONFIG_SOFTMMU
1597     tlb_flush(cs);
1598 #endif
1599 }
1600