xref: /qemu/accel/tcg/tb-maint.c (revision 5f6d4f79)
1 /*
2  * Translation Block Maintenance
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/interval-tree.h"
22 #include "qemu/qtree.h"
23 #include "exec/cputlb.h"
24 #include "exec/log.h"
25 #include "exec/exec-all.h"
26 #include "exec/tb-flush.h"
27 #include "exec/translate-all.h"
28 #include "sysemu/tcg.h"
29 #include "tcg/tcg.h"
30 #include "tb-hash.h"
31 #include "tb-context.h"
32 #include "internal.h"
33 
34 
35 /* List iterators for lists of tagged pointers in TranslationBlock. */
36 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
37     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
38          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
39              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
40 
41 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
42     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
43 
44 static bool tb_cmp(const void *ap, const void *bp)
45 {
46     const TranslationBlock *a = ap;
47     const TranslationBlock *b = bp;
48 
49     return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
50             a->cs_base == b->cs_base &&
51             a->flags == b->flags &&
52             (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
53             tb_page_addr0(a) == tb_page_addr0(b) &&
54             tb_page_addr1(a) == tb_page_addr1(b));
55 }
56 
57 void tb_htable_init(void)
58 {
59     unsigned int mode = QHT_MODE_AUTO_RESIZE;
60 
61     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
62 }
63 
64 typedef struct PageDesc PageDesc;
65 
66 #ifdef CONFIG_USER_ONLY
67 
68 /*
69  * In user-mode page locks aren't used; mmap_lock is enough.
70  */
71 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
72 
73 static inline void tb_lock_pages(const TranslationBlock *tb) { }
74 
75 /*
76  * For user-only, since we are protecting all of memory with a single lock,
77  * and because the two pages of a TranslationBlock are always contiguous,
78  * use a single data structure to record all TranslationBlocks.
79  */
80 static IntervalTreeRoot tb_root;
81 
82 static void tb_remove_all(void)
83 {
84     assert_memory_lock();
85     memset(&tb_root, 0, sizeof(tb_root));
86 }
87 
88 /* Call with mmap_lock held. */
89 static void tb_record(TranslationBlock *tb)
90 {
91     vaddr addr;
92     int flags;
93 
94     assert_memory_lock();
95     tb->itree.last = tb->itree.start + tb->size - 1;
96 
97     /* translator_loop() must have made all TB pages non-writable */
98     addr = tb_page_addr0(tb);
99     flags = page_get_flags(addr);
100     assert(!(flags & PAGE_WRITE));
101 
102     addr = tb_page_addr1(tb);
103     if (addr != -1) {
104         flags = page_get_flags(addr);
105         assert(!(flags & PAGE_WRITE));
106     }
107 
108     interval_tree_insert(&tb->itree, &tb_root);
109 }
110 
111 /* Call with mmap_lock held. */
112 static void tb_remove(TranslationBlock *tb)
113 {
114     assert_memory_lock();
115     interval_tree_remove(&tb->itree, &tb_root);
116 }
117 
118 /* TODO: For now, still shared with translate-all.c for system mode. */
119 #define PAGE_FOR_EACH_TB(start, last, pagedesc, T, N)   \
120     for (T = foreach_tb_first(start, last),             \
121          N = foreach_tb_next(T, start, last);           \
122          T != NULL;                                     \
123          T = N, N = foreach_tb_next(N, start, last))
124 
125 typedef TranslationBlock *PageForEachNext;
126 
127 static PageForEachNext foreach_tb_first(tb_page_addr_t start,
128                                         tb_page_addr_t last)
129 {
130     IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, last);
131     return n ? container_of(n, TranslationBlock, itree) : NULL;
132 }
133 
134 static PageForEachNext foreach_tb_next(PageForEachNext tb,
135                                        tb_page_addr_t start,
136                                        tb_page_addr_t last)
137 {
138     IntervalTreeNode *n;
139 
140     if (tb) {
141         n = interval_tree_iter_next(&tb->itree, start, last);
142         if (n) {
143             return container_of(n, TranslationBlock, itree);
144         }
145     }
146     return NULL;
147 }
148 
149 #else
150 /*
151  * In system mode we want L1_MAP to be based on ram offsets.
152  */
153 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
154 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
155 #else
156 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
157 #endif
158 
159 /* Size of the L2 (and L3, etc) page tables.  */
160 #define V_L2_BITS 10
161 #define V_L2_SIZE (1 << V_L2_BITS)
162 
163 /*
164  * L1 Mapping properties
165  */
166 static int v_l1_size;
167 static int v_l1_shift;
168 static int v_l2_levels;
169 
170 /*
171  * The bottom level has pointers to PageDesc, and is indexed by
172  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
173  */
174 #define V_L1_MIN_BITS 4
175 #define V_L1_MAX_BITS (V_L2_BITS + 3)
176 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
177 
178 static void *l1_map[V_L1_MAX_SIZE];
179 
180 struct PageDesc {
181     QemuSpin lock;
182     /* list of TBs intersecting this ram page */
183     uintptr_t first_tb;
184 };
185 
186 void page_table_config_init(void)
187 {
188     uint32_t v_l1_bits;
189 
190     assert(TARGET_PAGE_BITS);
191     /* The bits remaining after N lower levels of page tables.  */
192     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
193     if (v_l1_bits < V_L1_MIN_BITS) {
194         v_l1_bits += V_L2_BITS;
195     }
196 
197     v_l1_size = 1 << v_l1_bits;
198     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
199     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
200 
201     assert(v_l1_bits <= V_L1_MAX_BITS);
202     assert(v_l1_shift % V_L2_BITS == 0);
203     assert(v_l2_levels >= 0);
204 }
205 
206 static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
207 {
208     PageDesc *pd;
209     void **lp;
210 
211     /* Level 1.  Always allocated.  */
212     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
213 
214     /* Level 2..N-1.  */
215     for (int i = v_l2_levels; i > 0; i--) {
216         void **p = qatomic_rcu_read(lp);
217 
218         if (p == NULL) {
219             void *existing;
220 
221             if (!alloc) {
222                 return NULL;
223             }
224             p = g_new0(void *, V_L2_SIZE);
225             existing = qatomic_cmpxchg(lp, NULL, p);
226             if (unlikely(existing)) {
227                 g_free(p);
228                 p = existing;
229             }
230         }
231 
232         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
233     }
234 
235     pd = qatomic_rcu_read(lp);
236     if (pd == NULL) {
237         void *existing;
238 
239         if (!alloc) {
240             return NULL;
241         }
242 
243         pd = g_new0(PageDesc, V_L2_SIZE);
244         for (int i = 0; i < V_L2_SIZE; i++) {
245             qemu_spin_init(&pd[i].lock);
246         }
247 
248         existing = qatomic_cmpxchg(lp, NULL, pd);
249         if (unlikely(existing)) {
250             for (int i = 0; i < V_L2_SIZE; i++) {
251                 qemu_spin_destroy(&pd[i].lock);
252             }
253             g_free(pd);
254             pd = existing;
255         }
256     }
257 
258     return pd + (index & (V_L2_SIZE - 1));
259 }
260 
261 static inline PageDesc *page_find(tb_page_addr_t index)
262 {
263     return page_find_alloc(index, false);
264 }
265 
266 /**
267  * struct page_entry - page descriptor entry
268  * @pd:     pointer to the &struct PageDesc of the page this entry represents
269  * @index:  page index of the page
270  * @locked: whether the page is locked
271  *
272  * This struct helps us keep track of the locked state of a page, without
273  * bloating &struct PageDesc.
274  *
275  * A page lock protects accesses to all fields of &struct PageDesc.
276  *
277  * See also: &struct page_collection.
278  */
279 struct page_entry {
280     PageDesc *pd;
281     tb_page_addr_t index;
282     bool locked;
283 };
284 
285 /**
286  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
287  * @tree:   Binary search tree (BST) of the pages, with key == page index
288  * @max:    Pointer to the page in @tree with the highest page index
289  *
290  * To avoid deadlock we lock pages in ascending order of page index.
291  * When operating on a set of pages, we need to keep track of them so that
292  * we can lock them in order and also unlock them later. For this we collect
293  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
294  * @tree implementation we use does not provide an O(1) operation to obtain the
295  * highest-ranked element, we use @max to keep track of the inserted page
296  * with the highest index. This is valuable because if a page is not in
297  * the tree and its index is higher than @max's, then we can lock it
298  * without breaking the locking order rule.
299  *
300  * Note on naming: 'struct page_set' would be shorter, but we already have a few
301  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
302  *
303  * See also: page_collection_lock().
304  */
305 struct page_collection {
306     QTree *tree;
307     struct page_entry *max;
308 };
309 
310 typedef int PageForEachNext;
311 #define PAGE_FOR_EACH_TB(start, last, pagedesc, tb, n) \
312     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
313 
314 #ifdef CONFIG_DEBUG_TCG
315 
316 static __thread GHashTable *ht_pages_locked_debug;
317 
318 static void ht_pages_locked_debug_init(void)
319 {
320     if (ht_pages_locked_debug) {
321         return;
322     }
323     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
324 }
325 
326 static bool page_is_locked(const PageDesc *pd)
327 {
328     PageDesc *found;
329 
330     ht_pages_locked_debug_init();
331     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
332     return !!found;
333 }
334 
335 static void page_lock__debug(PageDesc *pd)
336 {
337     ht_pages_locked_debug_init();
338     g_assert(!page_is_locked(pd));
339     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
340 }
341 
342 static void page_unlock__debug(const PageDesc *pd)
343 {
344     bool removed;
345 
346     ht_pages_locked_debug_init();
347     g_assert(page_is_locked(pd));
348     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
349     g_assert(removed);
350 }
351 
352 static void do_assert_page_locked(const PageDesc *pd,
353                                   const char *file, int line)
354 {
355     if (unlikely(!page_is_locked(pd))) {
356         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
357                      pd, file, line);
358         abort();
359     }
360 }
361 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
362 
363 void assert_no_pages_locked(void)
364 {
365     ht_pages_locked_debug_init();
366     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
367 }
368 
369 #else /* !CONFIG_DEBUG_TCG */
370 
371 static inline void page_lock__debug(const PageDesc *pd) { }
372 static inline void page_unlock__debug(const PageDesc *pd) { }
373 static inline void assert_page_locked(const PageDesc *pd) { }
374 
375 #endif /* CONFIG_DEBUG_TCG */
376 
377 static void page_lock(PageDesc *pd)
378 {
379     page_lock__debug(pd);
380     qemu_spin_lock(&pd->lock);
381 }
382 
383 /* Like qemu_spin_trylock, returns false on success */
384 static bool page_trylock(PageDesc *pd)
385 {
386     bool busy = qemu_spin_trylock(&pd->lock);
387     if (!busy) {
388         page_lock__debug(pd);
389     }
390     return busy;
391 }
392 
393 static void page_unlock(PageDesc *pd)
394 {
395     qemu_spin_unlock(&pd->lock);
396     page_unlock__debug(pd);
397 }
398 
399 void tb_lock_page0(tb_page_addr_t paddr)
400 {
401     page_lock(page_find_alloc(paddr >> TARGET_PAGE_BITS, true));
402 }
403 
404 void tb_lock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
405 {
406     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
407     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
408     PageDesc *pd0, *pd1;
409 
410     if (pindex0 == pindex1) {
411         /* Identical pages, and the first page is already locked. */
412         return;
413     }
414 
415     pd1 = page_find_alloc(pindex1, true);
416     if (pindex0 < pindex1) {
417         /* Correct locking order, we may block. */
418         page_lock(pd1);
419         return;
420     }
421 
422     /* Incorrect locking order, we cannot block lest we deadlock. */
423     if (!page_trylock(pd1)) {
424         return;
425     }
426 
427     /*
428      * Drop the lock on page0 and get both page locks in the right order.
429      * Restart translation via longjmp.
430      */
431     pd0 = page_find_alloc(pindex0, false);
432     page_unlock(pd0);
433     page_lock(pd1);
434     page_lock(pd0);
435     siglongjmp(tcg_ctx->jmp_trans, -3);
436 }
437 
438 void tb_unlock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
439 {
440     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
441     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
442 
443     if (pindex0 != pindex1) {
444         page_unlock(page_find_alloc(pindex1, false));
445     }
446 }
447 
448 static void tb_lock_pages(TranslationBlock *tb)
449 {
450     tb_page_addr_t paddr0 = tb_page_addr0(tb);
451     tb_page_addr_t paddr1 = tb_page_addr1(tb);
452     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
453     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
454 
455     if (unlikely(paddr0 == -1)) {
456         return;
457     }
458     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
459         if (pindex0 < pindex1) {
460             page_lock(page_find_alloc(pindex0, true));
461             page_lock(page_find_alloc(pindex1, true));
462             return;
463         }
464         page_lock(page_find_alloc(pindex1, true));
465     }
466     page_lock(page_find_alloc(pindex0, true));
467 }
468 
469 void tb_unlock_pages(TranslationBlock *tb)
470 {
471     tb_page_addr_t paddr0 = tb_page_addr0(tb);
472     tb_page_addr_t paddr1 = tb_page_addr1(tb);
473     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
474     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
475 
476     if (unlikely(paddr0 == -1)) {
477         return;
478     }
479     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
480         page_unlock(page_find_alloc(pindex1, false));
481     }
482     page_unlock(page_find_alloc(pindex0, false));
483 }
484 
485 static inline struct page_entry *
486 page_entry_new(PageDesc *pd, tb_page_addr_t index)
487 {
488     struct page_entry *pe = g_malloc(sizeof(*pe));
489 
490     pe->index = index;
491     pe->pd = pd;
492     pe->locked = false;
493     return pe;
494 }
495 
496 static void page_entry_destroy(gpointer p)
497 {
498     struct page_entry *pe = p;
499 
500     g_assert(pe->locked);
501     page_unlock(pe->pd);
502     g_free(pe);
503 }
504 
505 /* returns false on success */
506 static bool page_entry_trylock(struct page_entry *pe)
507 {
508     bool busy = page_trylock(pe->pd);
509     if (!busy) {
510         g_assert(!pe->locked);
511         pe->locked = true;
512     }
513     return busy;
514 }
515 
516 static void do_page_entry_lock(struct page_entry *pe)
517 {
518     page_lock(pe->pd);
519     g_assert(!pe->locked);
520     pe->locked = true;
521 }
522 
523 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
524 {
525     struct page_entry *pe = value;
526 
527     do_page_entry_lock(pe);
528     return FALSE;
529 }
530 
531 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
532 {
533     struct page_entry *pe = value;
534 
535     if (pe->locked) {
536         pe->locked = false;
537         page_unlock(pe->pd);
538     }
539     return FALSE;
540 }
541 
542 /*
543  * Trylock a page, and if successful, add the page to a collection.
544  * Returns true ("busy") if the page could not be locked; false otherwise.
545  */
546 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
547 {
548     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
549     struct page_entry *pe;
550     PageDesc *pd;
551 
552     pe = q_tree_lookup(set->tree, &index);
553     if (pe) {
554         return false;
555     }
556 
557     pd = page_find(index);
558     if (pd == NULL) {
559         return false;
560     }
561 
562     pe = page_entry_new(pd, index);
563     q_tree_insert(set->tree, &pe->index, pe);
564 
565     /*
566      * If this is either (1) the first insertion or (2) a page whose index
567      * is higher than any other so far, just lock the page and move on.
568      */
569     if (set->max == NULL || pe->index > set->max->index) {
570         set->max = pe;
571         do_page_entry_lock(pe);
572         return false;
573     }
574     /*
575      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
576      * locks in order.
577      */
578     return page_entry_trylock(pe);
579 }
580 
581 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
582 {
583     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
584     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
585 
586     if (a == b) {
587         return 0;
588     } else if (a < b) {
589         return -1;
590     }
591     return 1;
592 }
593 
594 /*
595  * Lock a range of pages ([@start,@last]) as well as the pages of all
596  * intersecting TBs.
597  * Locking order: acquire locks in ascending order of page index.
598  */
599 static struct page_collection *page_collection_lock(tb_page_addr_t start,
600                                                     tb_page_addr_t last)
601 {
602     struct page_collection *set = g_malloc(sizeof(*set));
603     tb_page_addr_t index;
604     PageDesc *pd;
605 
606     start >>= TARGET_PAGE_BITS;
607     last >>= TARGET_PAGE_BITS;
608     g_assert(start <= last);
609 
610     set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL,
611                                 page_entry_destroy);
612     set->max = NULL;
613     assert_no_pages_locked();
614 
615  retry:
616     q_tree_foreach(set->tree, page_entry_lock, NULL);
617 
618     for (index = start; index <= last; index++) {
619         TranslationBlock *tb;
620         PageForEachNext n;
621 
622         pd = page_find(index);
623         if (pd == NULL) {
624             continue;
625         }
626         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
627             q_tree_foreach(set->tree, page_entry_unlock, NULL);
628             goto retry;
629         }
630         assert_page_locked(pd);
631         PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
632             if (page_trylock_add(set, tb_page_addr0(tb)) ||
633                 (tb_page_addr1(tb) != -1 &&
634                  page_trylock_add(set, tb_page_addr1(tb)))) {
635                 /* drop all locks, and reacquire in order */
636                 q_tree_foreach(set->tree, page_entry_unlock, NULL);
637                 goto retry;
638             }
639         }
640     }
641     return set;
642 }
643 
644 static void page_collection_unlock(struct page_collection *set)
645 {
646     /* entries are unlocked and freed via page_entry_destroy */
647     q_tree_destroy(set->tree);
648     g_free(set);
649 }
650 
651 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
652 static void tb_remove_all_1(int level, void **lp)
653 {
654     int i;
655 
656     if (*lp == NULL) {
657         return;
658     }
659     if (level == 0) {
660         PageDesc *pd = *lp;
661 
662         for (i = 0; i < V_L2_SIZE; ++i) {
663             page_lock(&pd[i]);
664             pd[i].first_tb = (uintptr_t)NULL;
665             page_unlock(&pd[i]);
666         }
667     } else {
668         void **pp = *lp;
669 
670         for (i = 0; i < V_L2_SIZE; ++i) {
671             tb_remove_all_1(level - 1, pp + i);
672         }
673     }
674 }
675 
676 static void tb_remove_all(void)
677 {
678     int i, l1_sz = v_l1_size;
679 
680     for (i = 0; i < l1_sz; i++) {
681         tb_remove_all_1(v_l2_levels, l1_map + i);
682     }
683 }
684 
685 /*
686  * Add the tb in the target page and protect it if necessary.
687  * Called with @p->lock held.
688  */
689 static void tb_page_add(PageDesc *p, TranslationBlock *tb, unsigned int n)
690 {
691     bool page_already_protected;
692 
693     assert_page_locked(p);
694 
695     tb->page_next[n] = p->first_tb;
696     page_already_protected = p->first_tb != 0;
697     p->first_tb = (uintptr_t)tb | n;
698 
699     /*
700      * If some code is already present, then the pages are already
701      * protected. So we handle the case where only the first TB is
702      * allocated in a physical page.
703      */
704     if (!page_already_protected) {
705         tlb_protect_code(tb->page_addr[n] & TARGET_PAGE_MASK);
706     }
707 }
708 
709 static void tb_record(TranslationBlock *tb)
710 {
711     tb_page_addr_t paddr0 = tb_page_addr0(tb);
712     tb_page_addr_t paddr1 = tb_page_addr1(tb);
713     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
714     tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
715 
716     assert(paddr0 != -1);
717     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
718         tb_page_add(page_find_alloc(pindex1, false), tb, 1);
719     }
720     tb_page_add(page_find_alloc(pindex0, false), tb, 0);
721 }
722 
723 static void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
724 {
725     TranslationBlock *tb1;
726     uintptr_t *pprev;
727     PageForEachNext n1;
728 
729     assert_page_locked(pd);
730     pprev = &pd->first_tb;
731     PAGE_FOR_EACH_TB(unused, unused, pd, tb1, n1) {
732         if (tb1 == tb) {
733             *pprev = tb1->page_next[n1];
734             return;
735         }
736         pprev = &tb1->page_next[n1];
737     }
738     g_assert_not_reached();
739 }
740 
741 static void tb_remove(TranslationBlock *tb)
742 {
743     tb_page_addr_t paddr0 = tb_page_addr0(tb);
744     tb_page_addr_t paddr1 = tb_page_addr1(tb);
745     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
746     tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
747 
748     assert(paddr0 != -1);
749     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
750         tb_page_remove(page_find_alloc(pindex1, false), tb);
751     }
752     tb_page_remove(page_find_alloc(pindex0, false), tb);
753 }
754 #endif /* CONFIG_USER_ONLY */
755 
756 /* flush all the translation blocks */
757 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
758 {
759     bool did_flush = false;
760 
761     mmap_lock();
762     /* If it is already been done on request of another CPU, just retry. */
763     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
764         goto done;
765     }
766     did_flush = true;
767 
768     CPU_FOREACH(cpu) {
769         tcg_flush_jmp_cache(cpu);
770     }
771 
772     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
773     tb_remove_all();
774 
775     tcg_region_reset_all();
776     /* XXX: flush processor icache at this point if cache flush is expensive */
777     qatomic_inc(&tb_ctx.tb_flush_count);
778 
779 done:
780     mmap_unlock();
781     if (did_flush) {
782         qemu_plugin_flush_cb();
783     }
784 }
785 
786 void tb_flush(CPUState *cpu)
787 {
788     if (tcg_enabled()) {
789         unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
790 
791         if (cpu_in_serial_context(cpu)) {
792             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
793         } else {
794             async_safe_run_on_cpu(cpu, do_tb_flush,
795                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
796         }
797     }
798 }
799 
800 /* remove @orig from its @n_orig-th jump list */
801 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
802 {
803     uintptr_t ptr, ptr_locked;
804     TranslationBlock *dest;
805     TranslationBlock *tb;
806     uintptr_t *pprev;
807     int n;
808 
809     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
810     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
811     dest = (TranslationBlock *)(ptr & ~1);
812     if (dest == NULL) {
813         return;
814     }
815 
816     qemu_spin_lock(&dest->jmp_lock);
817     /*
818      * While acquiring the lock, the jump might have been removed if the
819      * destination TB was invalidated; check again.
820      */
821     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
822     if (ptr_locked != ptr) {
823         qemu_spin_unlock(&dest->jmp_lock);
824         /*
825          * The only possibility is that the jump was unlinked via
826          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
827          * because we set the LSB above.
828          */
829         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
830         return;
831     }
832     /*
833      * We first acquired the lock, and since the destination pointer matches,
834      * we know for sure that @orig is in the jmp list.
835      */
836     pprev = &dest->jmp_list_head;
837     TB_FOR_EACH_JMP(dest, tb, n) {
838         if (tb == orig && n == n_orig) {
839             *pprev = tb->jmp_list_next[n];
840             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
841             qemu_spin_unlock(&dest->jmp_lock);
842             return;
843         }
844         pprev = &tb->jmp_list_next[n];
845     }
846     g_assert_not_reached();
847 }
848 
849 /*
850  * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
851  */
852 void tb_reset_jump(TranslationBlock *tb, int n)
853 {
854     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
855     tb_set_jmp_target(tb, n, addr);
856 }
857 
858 /* remove any jumps to the TB */
859 static inline void tb_jmp_unlink(TranslationBlock *dest)
860 {
861     TranslationBlock *tb;
862     int n;
863 
864     qemu_spin_lock(&dest->jmp_lock);
865 
866     TB_FOR_EACH_JMP(dest, tb, n) {
867         tb_reset_jump(tb, n);
868         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
869         /* No need to clear the list entry; setting the dest ptr is enough */
870     }
871     dest->jmp_list_head = (uintptr_t)NULL;
872 
873     qemu_spin_unlock(&dest->jmp_lock);
874 }
875 
876 static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
877 {
878     CPUState *cpu;
879 
880     if (tb_cflags(tb) & CF_PCREL) {
881         /* A TB may be at any virtual address */
882         CPU_FOREACH(cpu) {
883             tcg_flush_jmp_cache(cpu);
884         }
885     } else {
886         uint32_t h = tb_jmp_cache_hash_func(tb->pc);
887 
888         CPU_FOREACH(cpu) {
889             CPUJumpCache *jc = cpu->tb_jmp_cache;
890 
891             if (qatomic_read(&jc->array[h].tb) == tb) {
892                 qatomic_set(&jc->array[h].tb, NULL);
893             }
894         }
895     }
896 }
897 
898 /*
899  * In user-mode, call with mmap_lock held.
900  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
901  * locks held.
902  */
903 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
904 {
905     uint32_t h;
906     tb_page_addr_t phys_pc;
907     uint32_t orig_cflags = tb_cflags(tb);
908 
909     assert_memory_lock();
910 
911     /* make sure no further incoming jumps will be chained to this TB */
912     qemu_spin_lock(&tb->jmp_lock);
913     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
914     qemu_spin_unlock(&tb->jmp_lock);
915 
916     /* remove the TB from the hash list */
917     phys_pc = tb_page_addr0(tb);
918     h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
919                      tb->flags, tb->cs_base, orig_cflags);
920     if (!qht_remove(&tb_ctx.htable, tb, h)) {
921         return;
922     }
923 
924     /* remove the TB from the page list */
925     if (rm_from_page_list) {
926         tb_remove(tb);
927     }
928 
929     /* remove the TB from the hash list */
930     tb_jmp_cache_inval_tb(tb);
931 
932     /* suppress this TB from the two jump lists */
933     tb_remove_from_jmp_list(tb, 0);
934     tb_remove_from_jmp_list(tb, 1);
935 
936     /* suppress any remaining jumps to this TB */
937     tb_jmp_unlink(tb);
938 
939     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
940                 tb_ctx.tb_phys_invalidate_count + 1);
941 }
942 
943 static void tb_phys_invalidate__locked(TranslationBlock *tb)
944 {
945     qemu_thread_jit_write();
946     do_tb_phys_invalidate(tb, true);
947     qemu_thread_jit_execute();
948 }
949 
950 /*
951  * Invalidate one TB.
952  * Called with mmap_lock held in user-mode.
953  */
954 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
955 {
956     if (page_addr == -1 && tb_page_addr0(tb) != -1) {
957         tb_lock_pages(tb);
958         do_tb_phys_invalidate(tb, true);
959         tb_unlock_pages(tb);
960     } else {
961         do_tb_phys_invalidate(tb, false);
962     }
963 }
964 
965 /*
966  * Add a new TB and link it to the physical page tables.
967  * Called with mmap_lock held for user-mode emulation.
968  *
969  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
970  * Note that in !user-mode, another thread might have already added a TB
971  * for the same block of guest code that @tb corresponds to. In that case,
972  * the caller should discard the original @tb, and use instead the returned TB.
973  */
974 TranslationBlock *tb_link_page(TranslationBlock *tb)
975 {
976     void *existing_tb = NULL;
977     uint32_t h;
978 
979     assert_memory_lock();
980     tcg_debug_assert(!(tb->cflags & CF_INVALID));
981 
982     tb_record(tb);
983 
984     /* add in the hash table */
985     h = tb_hash_func(tb_page_addr0(tb), (tb->cflags & CF_PCREL ? 0 : tb->pc),
986                      tb->flags, tb->cs_base, tb->cflags);
987     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
988 
989     /* remove TB from the page(s) if we couldn't insert it */
990     if (unlikely(existing_tb)) {
991         tb_remove(tb);
992         tb_unlock_pages(tb);
993         return existing_tb;
994     }
995 
996     tb_unlock_pages(tb);
997     return tb;
998 }
999 
1000 #ifdef CONFIG_USER_ONLY
1001 /*
1002  * Invalidate all TBs which intersect with the target address range.
1003  * Called with mmap_lock held for user-mode emulation.
1004  * NOTE: this function must not be called while a TB is running.
1005  */
1006 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1007 {
1008     TranslationBlock *tb;
1009     PageForEachNext n;
1010 
1011     assert_memory_lock();
1012 
1013     PAGE_FOR_EACH_TB(start, last, unused, tb, n) {
1014         tb_phys_invalidate__locked(tb);
1015     }
1016 }
1017 
1018 /*
1019  * Invalidate all TBs which intersect with the target address page @addr.
1020  * Called with mmap_lock held for user-mode emulation
1021  * NOTE: this function must not be called while a TB is running.
1022  */
1023 void tb_invalidate_phys_page(tb_page_addr_t addr)
1024 {
1025     tb_page_addr_t start, last;
1026 
1027     start = addr & TARGET_PAGE_MASK;
1028     last = addr | ~TARGET_PAGE_MASK;
1029     tb_invalidate_phys_range(start, last);
1030 }
1031 
1032 /*
1033  * Called with mmap_lock held. If pc is not 0 then it indicates the
1034  * host PC of the faulting store instruction that caused this invalidate.
1035  * Returns true if the caller needs to abort execution of the current
1036  * TB (because it was modified by this store and the guest CPU has
1037  * precise-SMC semantics).
1038  */
1039 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
1040 {
1041     TranslationBlock *current_tb;
1042     bool current_tb_modified;
1043     TranslationBlock *tb;
1044     PageForEachNext n;
1045     tb_page_addr_t last;
1046 
1047     /*
1048      * Without precise smc semantics, or when outside of a TB,
1049      * we can skip to invalidate.
1050      */
1051 #ifndef TARGET_HAS_PRECISE_SMC
1052     pc = 0;
1053 #endif
1054     if (!pc) {
1055         tb_invalidate_phys_page(addr);
1056         return false;
1057     }
1058 
1059     assert_memory_lock();
1060     current_tb = tcg_tb_lookup(pc);
1061 
1062     last = addr | ~TARGET_PAGE_MASK;
1063     addr &= TARGET_PAGE_MASK;
1064     current_tb_modified = false;
1065 
1066     PAGE_FOR_EACH_TB(addr, last, unused, tb, n) {
1067         if (current_tb == tb &&
1068             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1069             /*
1070              * If we are modifying the current TB, we must stop its
1071              * execution. We could be more precise by checking that
1072              * the modification is after the current PC, but it would
1073              * require a specialized function to partially restore
1074              * the CPU state.
1075              */
1076             current_tb_modified = true;
1077             cpu_restore_state_from_tb(current_cpu, current_tb, pc);
1078         }
1079         tb_phys_invalidate__locked(tb);
1080     }
1081 
1082     if (current_tb_modified) {
1083         /* Force execution of one insn next time.  */
1084         CPUState *cpu = current_cpu;
1085         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1086         return true;
1087     }
1088     return false;
1089 }
1090 #else
1091 /*
1092  * @p must be non-NULL.
1093  * Call with all @pages locked.
1094  */
1095 static void
1096 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1097                                       PageDesc *p, tb_page_addr_t start,
1098                                       tb_page_addr_t last,
1099                                       uintptr_t retaddr)
1100 {
1101     TranslationBlock *tb;
1102     PageForEachNext n;
1103 #ifdef TARGET_HAS_PRECISE_SMC
1104     bool current_tb_modified = false;
1105     TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
1106 #endif /* TARGET_HAS_PRECISE_SMC */
1107 
1108     /* Range may not cross a page. */
1109     tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
1110 
1111     /*
1112      * We remove all the TBs in the range [start, last].
1113      * XXX: see if in some cases it could be faster to invalidate all the code
1114      */
1115     PAGE_FOR_EACH_TB(start, last, p, tb, n) {
1116         tb_page_addr_t tb_start, tb_last;
1117 
1118         /* NOTE: this is subtle as a TB may span two physical pages */
1119         tb_start = tb_page_addr0(tb);
1120         tb_last = tb_start + tb->size - 1;
1121         if (n == 0) {
1122             tb_last = MIN(tb_last, tb_start | ~TARGET_PAGE_MASK);
1123         } else {
1124             tb_start = tb_page_addr1(tb);
1125             tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK);
1126         }
1127         if (!(tb_last < start || tb_start > last)) {
1128 #ifdef TARGET_HAS_PRECISE_SMC
1129             if (current_tb == tb &&
1130                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1131                 /*
1132                  * If we are modifying the current TB, we must stop
1133                  * its execution. We could be more precise by checking
1134                  * that the modification is after the current PC, but it
1135                  * would require a specialized function to partially
1136                  * restore the CPU state.
1137                  */
1138                 current_tb_modified = true;
1139                 cpu_restore_state_from_tb(current_cpu, current_tb, retaddr);
1140             }
1141 #endif /* TARGET_HAS_PRECISE_SMC */
1142             tb_phys_invalidate__locked(tb);
1143         }
1144     }
1145 
1146     /* if no code remaining, no need to continue to use slow writes */
1147     if (!p->first_tb) {
1148         tlb_unprotect_code(start);
1149     }
1150 
1151 #ifdef TARGET_HAS_PRECISE_SMC
1152     if (current_tb_modified) {
1153         page_collection_unlock(pages);
1154         /* Force execution of one insn next time.  */
1155         current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1156         mmap_unlock();
1157         cpu_loop_exit_noexc(current_cpu);
1158     }
1159 #endif
1160 }
1161 
1162 /*
1163  * Invalidate all TBs which intersect with the target physical
1164  * address page @addr.
1165  */
1166 void tb_invalidate_phys_page(tb_page_addr_t addr)
1167 {
1168     struct page_collection *pages;
1169     tb_page_addr_t start, last;
1170     PageDesc *p;
1171 
1172     p = page_find(addr >> TARGET_PAGE_BITS);
1173     if (p == NULL) {
1174         return;
1175     }
1176 
1177     start = addr & TARGET_PAGE_MASK;
1178     last = addr | ~TARGET_PAGE_MASK;
1179     pages = page_collection_lock(start, last);
1180     tb_invalidate_phys_page_range__locked(pages, p, start, last, 0);
1181     page_collection_unlock(pages);
1182 }
1183 
1184 /*
1185  * Invalidate all TBs which intersect with the target physical address range
1186  * [start;last]. NOTE: start and end may refer to *different* physical pages.
1187  * 'is_cpu_write_access' should be true if called from a real cpu write
1188  * access: the virtual CPU will exit the current TB if code is modified inside
1189  * this TB.
1190  */
1191 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1192 {
1193     struct page_collection *pages;
1194     tb_page_addr_t index, index_last;
1195 
1196     pages = page_collection_lock(start, last);
1197 
1198     index_last = last >> TARGET_PAGE_BITS;
1199     for (index = start >> TARGET_PAGE_BITS; index <= index_last; index++) {
1200         PageDesc *pd = page_find(index);
1201         tb_page_addr_t page_start, page_last;
1202 
1203         if (pd == NULL) {
1204             continue;
1205         }
1206         assert_page_locked(pd);
1207         page_start = index << TARGET_PAGE_BITS;
1208         page_last = page_start | ~TARGET_PAGE_MASK;
1209         page_last = MIN(page_last, last);
1210         tb_invalidate_phys_page_range__locked(pages, pd,
1211                                               page_start, page_last, 0);
1212     }
1213     page_collection_unlock(pages);
1214 }
1215 
1216 /*
1217  * Call with all @pages in the range [@start, @start + len[ locked.
1218  */
1219 static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
1220                                                  tb_page_addr_t start,
1221                                                  unsigned len, uintptr_t ra)
1222 {
1223     PageDesc *p;
1224 
1225     p = page_find(start >> TARGET_PAGE_BITS);
1226     if (!p) {
1227         return;
1228     }
1229 
1230     assert_page_locked(p);
1231     tb_invalidate_phys_page_range__locked(pages, p, start, start + len - 1, ra);
1232 }
1233 
1234 /*
1235  * len must be <= 8 and start must be a multiple of len.
1236  * Called via softmmu_template.h when code areas are written to with
1237  * iothread mutex not held.
1238  */
1239 void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
1240                                    unsigned size,
1241                                    uintptr_t retaddr)
1242 {
1243     struct page_collection *pages;
1244 
1245     pages = page_collection_lock(ram_addr, ram_addr + size - 1);
1246     tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
1247     page_collection_unlock(pages);
1248 }
1249 
1250 #endif /* CONFIG_USER_ONLY */
1251