xref: /qemu/accel/tcg/tb-maint.c (revision 74781c08)
1 /*
2  * Translation Block Maintenance
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/interval-tree.h"
22 #include "qemu/qtree.h"
23 #include "exec/cputlb.h"
24 #include "exec/log.h"
25 #include "exec/exec-all.h"
26 #include "exec/page-protection.h"
27 #include "exec/tb-flush.h"
28 #include "exec/translate-all.h"
29 #include "sysemu/tcg.h"
30 #include "tcg/tcg.h"
31 #include "tb-hash.h"
32 #include "tb-context.h"
33 #include "internal-common.h"
34 #include "internal-target.h"
35 
36 
37 /* List iterators for lists of tagged pointers in TranslationBlock. */
38 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
39     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
40          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
41              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
42 
43 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
44     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
45 
tb_cmp(const void * ap,const void * bp)46 static bool tb_cmp(const void *ap, const void *bp)
47 {
48     const TranslationBlock *a = ap;
49     const TranslationBlock *b = bp;
50 
51     return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
52             a->cs_base == b->cs_base &&
53             a->flags == b->flags &&
54             (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
55             tb_page_addr0(a) == tb_page_addr0(b) &&
56             tb_page_addr1(a) == tb_page_addr1(b));
57 }
58 
tb_htable_init(void)59 void tb_htable_init(void)
60 {
61     unsigned int mode = QHT_MODE_AUTO_RESIZE;
62 
63     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
64 }
65 
66 typedef struct PageDesc PageDesc;
67 
68 #ifdef CONFIG_USER_ONLY
69 
70 /*
71  * In user-mode page locks aren't used; mmap_lock is enough.
72  */
73 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
74 
tb_lock_pages(const TranslationBlock * tb)75 static inline void tb_lock_pages(const TranslationBlock *tb) { }
76 
77 /*
78  * For user-only, since we are protecting all of memory with a single lock,
79  * and because the two pages of a TranslationBlock are always contiguous,
80  * use a single data structure to record all TranslationBlocks.
81  */
82 static IntervalTreeRoot tb_root;
83 
tb_remove_all(void)84 static void tb_remove_all(void)
85 {
86     assert_memory_lock();
87     memset(&tb_root, 0, sizeof(tb_root));
88 }
89 
90 /* Call with mmap_lock held. */
tb_record(TranslationBlock * tb)91 static void tb_record(TranslationBlock *tb)
92 {
93     vaddr addr;
94     int flags;
95 
96     assert_memory_lock();
97     tb->itree.last = tb->itree.start + tb->size - 1;
98 
99     /* translator_loop() must have made all TB pages non-writable */
100     addr = tb_page_addr0(tb);
101     flags = page_get_flags(addr);
102     assert(!(flags & PAGE_WRITE));
103 
104     addr = tb_page_addr1(tb);
105     if (addr != -1) {
106         flags = page_get_flags(addr);
107         assert(!(flags & PAGE_WRITE));
108     }
109 
110     interval_tree_insert(&tb->itree, &tb_root);
111 }
112 
113 /* Call with mmap_lock held. */
tb_remove(TranslationBlock * tb)114 static void tb_remove(TranslationBlock *tb)
115 {
116     assert_memory_lock();
117     interval_tree_remove(&tb->itree, &tb_root);
118 }
119 
120 /* TODO: For now, still shared with translate-all.c for system mode. */
121 #define PAGE_FOR_EACH_TB(start, last, pagedesc, T, N)   \
122     for (T = foreach_tb_first(start, last),             \
123          N = foreach_tb_next(T, start, last);           \
124          T != NULL;                                     \
125          T = N, N = foreach_tb_next(N, start, last))
126 
127 typedef TranslationBlock *PageForEachNext;
128 
foreach_tb_first(tb_page_addr_t start,tb_page_addr_t last)129 static PageForEachNext foreach_tb_first(tb_page_addr_t start,
130                                         tb_page_addr_t last)
131 {
132     IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, last);
133     return n ? container_of(n, TranslationBlock, itree) : NULL;
134 }
135 
foreach_tb_next(PageForEachNext tb,tb_page_addr_t start,tb_page_addr_t last)136 static PageForEachNext foreach_tb_next(PageForEachNext tb,
137                                        tb_page_addr_t start,
138                                        tb_page_addr_t last)
139 {
140     IntervalTreeNode *n;
141 
142     if (tb) {
143         n = interval_tree_iter_next(&tb->itree, start, last);
144         if (n) {
145             return container_of(n, TranslationBlock, itree);
146         }
147     }
148     return NULL;
149 }
150 
151 #else
152 /*
153  * In system mode we want L1_MAP to be based on ram offsets.
154  */
155 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
156 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
157 #else
158 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
159 #endif
160 
161 /* Size of the L2 (and L3, etc) page tables.  */
162 #define V_L2_BITS 10
163 #define V_L2_SIZE (1 << V_L2_BITS)
164 
165 /*
166  * L1 Mapping properties
167  */
168 static int v_l1_size;
169 static int v_l1_shift;
170 static int v_l2_levels;
171 
172 /*
173  * The bottom level has pointers to PageDesc, and is indexed by
174  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
175  */
176 #define V_L1_MIN_BITS 4
177 #define V_L1_MAX_BITS (V_L2_BITS + 3)
178 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
179 
180 static void *l1_map[V_L1_MAX_SIZE];
181 
182 struct PageDesc {
183     QemuSpin lock;
184     /* list of TBs intersecting this ram page */
185     uintptr_t first_tb;
186 };
187 
page_table_config_init(void)188 void page_table_config_init(void)
189 {
190     uint32_t v_l1_bits;
191 
192     assert(TARGET_PAGE_BITS);
193     /* The bits remaining after N lower levels of page tables.  */
194     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
195     if (v_l1_bits < V_L1_MIN_BITS) {
196         v_l1_bits += V_L2_BITS;
197     }
198 
199     v_l1_size = 1 << v_l1_bits;
200     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
201     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
202 
203     assert(v_l1_bits <= V_L1_MAX_BITS);
204     assert(v_l1_shift % V_L2_BITS == 0);
205     assert(v_l2_levels >= 0);
206 }
207 
page_find_alloc(tb_page_addr_t index,bool alloc)208 static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
209 {
210     PageDesc *pd;
211     void **lp;
212 
213     /* Level 1.  Always allocated.  */
214     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
215 
216     /* Level 2..N-1.  */
217     for (int i = v_l2_levels; i > 0; i--) {
218         void **p = qatomic_rcu_read(lp);
219 
220         if (p == NULL) {
221             void *existing;
222 
223             if (!alloc) {
224                 return NULL;
225             }
226             p = g_new0(void *, V_L2_SIZE);
227             existing = qatomic_cmpxchg(lp, NULL, p);
228             if (unlikely(existing)) {
229                 g_free(p);
230                 p = existing;
231             }
232         }
233 
234         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
235     }
236 
237     pd = qatomic_rcu_read(lp);
238     if (pd == NULL) {
239         void *existing;
240 
241         if (!alloc) {
242             return NULL;
243         }
244 
245         pd = g_new0(PageDesc, V_L2_SIZE);
246         for (int i = 0; i < V_L2_SIZE; i++) {
247             qemu_spin_init(&pd[i].lock);
248         }
249 
250         existing = qatomic_cmpxchg(lp, NULL, pd);
251         if (unlikely(existing)) {
252             for (int i = 0; i < V_L2_SIZE; i++) {
253                 qemu_spin_destroy(&pd[i].lock);
254             }
255             g_free(pd);
256             pd = existing;
257         }
258     }
259 
260     return pd + (index & (V_L2_SIZE - 1));
261 }
262 
page_find(tb_page_addr_t index)263 static inline PageDesc *page_find(tb_page_addr_t index)
264 {
265     return page_find_alloc(index, false);
266 }
267 
268 /**
269  * struct page_entry - page descriptor entry
270  * @pd:     pointer to the &struct PageDesc of the page this entry represents
271  * @index:  page index of the page
272  * @locked: whether the page is locked
273  *
274  * This struct helps us keep track of the locked state of a page, without
275  * bloating &struct PageDesc.
276  *
277  * A page lock protects accesses to all fields of &struct PageDesc.
278  *
279  * See also: &struct page_collection.
280  */
281 struct page_entry {
282     PageDesc *pd;
283     tb_page_addr_t index;
284     bool locked;
285 };
286 
287 /**
288  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
289  * @tree:   Binary search tree (BST) of the pages, with key == page index
290  * @max:    Pointer to the page in @tree with the highest page index
291  *
292  * To avoid deadlock we lock pages in ascending order of page index.
293  * When operating on a set of pages, we need to keep track of them so that
294  * we can lock them in order and also unlock them later. For this we collect
295  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
296  * @tree implementation we use does not provide an O(1) operation to obtain the
297  * highest-ranked element, we use @max to keep track of the inserted page
298  * with the highest index. This is valuable because if a page is not in
299  * the tree and its index is higher than @max's, then we can lock it
300  * without breaking the locking order rule.
301  *
302  * Note on naming: 'struct page_set' would be shorter, but we already have a few
303  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
304  *
305  * See also: page_collection_lock().
306  */
307 struct page_collection {
308     QTree *tree;
309     struct page_entry *max;
310 };
311 
312 typedef int PageForEachNext;
313 #define PAGE_FOR_EACH_TB(start, last, pagedesc, tb, n) \
314     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
315 
316 #ifdef CONFIG_DEBUG_TCG
317 
318 static __thread GHashTable *ht_pages_locked_debug;
319 
ht_pages_locked_debug_init(void)320 static void ht_pages_locked_debug_init(void)
321 {
322     if (ht_pages_locked_debug) {
323         return;
324     }
325     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
326 }
327 
page_is_locked(const PageDesc * pd)328 static bool page_is_locked(const PageDesc *pd)
329 {
330     PageDesc *found;
331 
332     ht_pages_locked_debug_init();
333     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
334     return !!found;
335 }
336 
page_lock__debug(PageDesc * pd)337 static void page_lock__debug(PageDesc *pd)
338 {
339     ht_pages_locked_debug_init();
340     g_assert(!page_is_locked(pd));
341     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
342 }
343 
page_unlock__debug(const PageDesc * pd)344 static void page_unlock__debug(const PageDesc *pd)
345 {
346     bool removed;
347 
348     ht_pages_locked_debug_init();
349     g_assert(page_is_locked(pd));
350     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
351     g_assert(removed);
352 }
353 
do_assert_page_locked(const PageDesc * pd,const char * file,int line)354 static void do_assert_page_locked(const PageDesc *pd,
355                                   const char *file, int line)
356 {
357     if (unlikely(!page_is_locked(pd))) {
358         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
359                      pd, file, line);
360         abort();
361     }
362 }
363 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
364 
assert_no_pages_locked(void)365 void assert_no_pages_locked(void)
366 {
367     ht_pages_locked_debug_init();
368     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
369 }
370 
371 #else /* !CONFIG_DEBUG_TCG */
372 
page_lock__debug(const PageDesc * pd)373 static inline void page_lock__debug(const PageDesc *pd) { }
page_unlock__debug(const PageDesc * pd)374 static inline void page_unlock__debug(const PageDesc *pd) { }
assert_page_locked(const PageDesc * pd)375 static inline void assert_page_locked(const PageDesc *pd) { }
376 
377 #endif /* CONFIG_DEBUG_TCG */
378 
page_lock(PageDesc * pd)379 static void page_lock(PageDesc *pd)
380 {
381     page_lock__debug(pd);
382     qemu_spin_lock(&pd->lock);
383 }
384 
385 /* Like qemu_spin_trylock, returns false on success */
page_trylock(PageDesc * pd)386 static bool page_trylock(PageDesc *pd)
387 {
388     bool busy = qemu_spin_trylock(&pd->lock);
389     if (!busy) {
390         page_lock__debug(pd);
391     }
392     return busy;
393 }
394 
page_unlock(PageDesc * pd)395 static void page_unlock(PageDesc *pd)
396 {
397     qemu_spin_unlock(&pd->lock);
398     page_unlock__debug(pd);
399 }
400 
tb_lock_page0(tb_page_addr_t paddr)401 void tb_lock_page0(tb_page_addr_t paddr)
402 {
403     page_lock(page_find_alloc(paddr >> TARGET_PAGE_BITS, true));
404 }
405 
tb_lock_page1(tb_page_addr_t paddr0,tb_page_addr_t paddr1)406 void tb_lock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
407 {
408     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
409     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
410     PageDesc *pd0, *pd1;
411 
412     if (pindex0 == pindex1) {
413         /* Identical pages, and the first page is already locked. */
414         return;
415     }
416 
417     pd1 = page_find_alloc(pindex1, true);
418     if (pindex0 < pindex1) {
419         /* Correct locking order, we may block. */
420         page_lock(pd1);
421         return;
422     }
423 
424     /* Incorrect locking order, we cannot block lest we deadlock. */
425     if (!page_trylock(pd1)) {
426         return;
427     }
428 
429     /*
430      * Drop the lock on page0 and get both page locks in the right order.
431      * Restart translation via longjmp.
432      */
433     pd0 = page_find_alloc(pindex0, false);
434     page_unlock(pd0);
435     page_lock(pd1);
436     page_lock(pd0);
437     siglongjmp(tcg_ctx->jmp_trans, -3);
438 }
439 
tb_unlock_page1(tb_page_addr_t paddr0,tb_page_addr_t paddr1)440 void tb_unlock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
441 {
442     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
443     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
444 
445     if (pindex0 != pindex1) {
446         page_unlock(page_find_alloc(pindex1, false));
447     }
448 }
449 
tb_lock_pages(TranslationBlock * tb)450 static void tb_lock_pages(TranslationBlock *tb)
451 {
452     tb_page_addr_t paddr0 = tb_page_addr0(tb);
453     tb_page_addr_t paddr1 = tb_page_addr1(tb);
454     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
455     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
456 
457     if (unlikely(paddr0 == -1)) {
458         return;
459     }
460     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
461         if (pindex0 < pindex1) {
462             page_lock(page_find_alloc(pindex0, true));
463             page_lock(page_find_alloc(pindex1, true));
464             return;
465         }
466         page_lock(page_find_alloc(pindex1, true));
467     }
468     page_lock(page_find_alloc(pindex0, true));
469 }
470 
tb_unlock_pages(TranslationBlock * tb)471 void tb_unlock_pages(TranslationBlock *tb)
472 {
473     tb_page_addr_t paddr0 = tb_page_addr0(tb);
474     tb_page_addr_t paddr1 = tb_page_addr1(tb);
475     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
476     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
477 
478     if (unlikely(paddr0 == -1)) {
479         return;
480     }
481     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
482         page_unlock(page_find_alloc(pindex1, false));
483     }
484     page_unlock(page_find_alloc(pindex0, false));
485 }
486 
487 static inline struct page_entry *
page_entry_new(PageDesc * pd,tb_page_addr_t index)488 page_entry_new(PageDesc *pd, tb_page_addr_t index)
489 {
490     struct page_entry *pe = g_malloc(sizeof(*pe));
491 
492     pe->index = index;
493     pe->pd = pd;
494     pe->locked = false;
495     return pe;
496 }
497 
page_entry_destroy(gpointer p)498 static void page_entry_destroy(gpointer p)
499 {
500     struct page_entry *pe = p;
501 
502     g_assert(pe->locked);
503     page_unlock(pe->pd);
504     g_free(pe);
505 }
506 
507 /* returns false on success */
page_entry_trylock(struct page_entry * pe)508 static bool page_entry_trylock(struct page_entry *pe)
509 {
510     bool busy = page_trylock(pe->pd);
511     if (!busy) {
512         g_assert(!pe->locked);
513         pe->locked = true;
514     }
515     return busy;
516 }
517 
do_page_entry_lock(struct page_entry * pe)518 static void do_page_entry_lock(struct page_entry *pe)
519 {
520     page_lock(pe->pd);
521     g_assert(!pe->locked);
522     pe->locked = true;
523 }
524 
page_entry_lock(gpointer key,gpointer value,gpointer data)525 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
526 {
527     struct page_entry *pe = value;
528 
529     do_page_entry_lock(pe);
530     return FALSE;
531 }
532 
page_entry_unlock(gpointer key,gpointer value,gpointer data)533 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
534 {
535     struct page_entry *pe = value;
536 
537     if (pe->locked) {
538         pe->locked = false;
539         page_unlock(pe->pd);
540     }
541     return FALSE;
542 }
543 
544 /*
545  * Trylock a page, and if successful, add the page to a collection.
546  * Returns true ("busy") if the page could not be locked; false otherwise.
547  */
page_trylock_add(struct page_collection * set,tb_page_addr_t addr)548 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
549 {
550     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
551     struct page_entry *pe;
552     PageDesc *pd;
553 
554     pe = q_tree_lookup(set->tree, &index);
555     if (pe) {
556         return false;
557     }
558 
559     pd = page_find(index);
560     if (pd == NULL) {
561         return false;
562     }
563 
564     pe = page_entry_new(pd, index);
565     q_tree_insert(set->tree, &pe->index, pe);
566 
567     /*
568      * If this is either (1) the first insertion or (2) a page whose index
569      * is higher than any other so far, just lock the page and move on.
570      */
571     if (set->max == NULL || pe->index > set->max->index) {
572         set->max = pe;
573         do_page_entry_lock(pe);
574         return false;
575     }
576     /*
577      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
578      * locks in order.
579      */
580     return page_entry_trylock(pe);
581 }
582 
tb_page_addr_cmp(gconstpointer ap,gconstpointer bp,gpointer udata)583 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
584 {
585     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
586     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
587 
588     if (a == b) {
589         return 0;
590     } else if (a < b) {
591         return -1;
592     }
593     return 1;
594 }
595 
596 /*
597  * Lock a range of pages ([@start,@last]) as well as the pages of all
598  * intersecting TBs.
599  * Locking order: acquire locks in ascending order of page index.
600  */
page_collection_lock(tb_page_addr_t start,tb_page_addr_t last)601 static struct page_collection *page_collection_lock(tb_page_addr_t start,
602                                                     tb_page_addr_t last)
603 {
604     struct page_collection *set = g_malloc(sizeof(*set));
605     tb_page_addr_t index;
606     PageDesc *pd;
607 
608     start >>= TARGET_PAGE_BITS;
609     last >>= TARGET_PAGE_BITS;
610     g_assert(start <= last);
611 
612     set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL,
613                                 page_entry_destroy);
614     set->max = NULL;
615     assert_no_pages_locked();
616 
617  retry:
618     q_tree_foreach(set->tree, page_entry_lock, NULL);
619 
620     for (index = start; index <= last; index++) {
621         TranslationBlock *tb;
622         PageForEachNext n;
623 
624         pd = page_find(index);
625         if (pd == NULL) {
626             continue;
627         }
628         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
629             q_tree_foreach(set->tree, page_entry_unlock, NULL);
630             goto retry;
631         }
632         assert_page_locked(pd);
633         PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
634             if (page_trylock_add(set, tb_page_addr0(tb)) ||
635                 (tb_page_addr1(tb) != -1 &&
636                  page_trylock_add(set, tb_page_addr1(tb)))) {
637                 /* drop all locks, and reacquire in order */
638                 q_tree_foreach(set->tree, page_entry_unlock, NULL);
639                 goto retry;
640             }
641         }
642     }
643     return set;
644 }
645 
page_collection_unlock(struct page_collection * set)646 static void page_collection_unlock(struct page_collection *set)
647 {
648     /* entries are unlocked and freed via page_entry_destroy */
649     q_tree_destroy(set->tree);
650     g_free(set);
651 }
652 
653 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
tb_remove_all_1(int level,void ** lp)654 static void tb_remove_all_1(int level, void **lp)
655 {
656     int i;
657 
658     if (*lp == NULL) {
659         return;
660     }
661     if (level == 0) {
662         PageDesc *pd = *lp;
663 
664         for (i = 0; i < V_L2_SIZE; ++i) {
665             page_lock(&pd[i]);
666             pd[i].first_tb = (uintptr_t)NULL;
667             page_unlock(&pd[i]);
668         }
669     } else {
670         void **pp = *lp;
671 
672         for (i = 0; i < V_L2_SIZE; ++i) {
673             tb_remove_all_1(level - 1, pp + i);
674         }
675     }
676 }
677 
tb_remove_all(void)678 static void tb_remove_all(void)
679 {
680     int i, l1_sz = v_l1_size;
681 
682     for (i = 0; i < l1_sz; i++) {
683         tb_remove_all_1(v_l2_levels, l1_map + i);
684     }
685 }
686 
687 /*
688  * Add the tb in the target page and protect it if necessary.
689  * Called with @p->lock held.
690  */
tb_page_add(PageDesc * p,TranslationBlock * tb,unsigned int n)691 static void tb_page_add(PageDesc *p, TranslationBlock *tb, unsigned int n)
692 {
693     bool page_already_protected;
694 
695     assert_page_locked(p);
696 
697     tb->page_next[n] = p->first_tb;
698     page_already_protected = p->first_tb != 0;
699     p->first_tb = (uintptr_t)tb | n;
700 
701     /*
702      * If some code is already present, then the pages are already
703      * protected. So we handle the case where only the first TB is
704      * allocated in a physical page.
705      */
706     if (!page_already_protected) {
707         tlb_protect_code(tb->page_addr[n] & TARGET_PAGE_MASK);
708     }
709 }
710 
tb_record(TranslationBlock * tb)711 static void tb_record(TranslationBlock *tb)
712 {
713     tb_page_addr_t paddr0 = tb_page_addr0(tb);
714     tb_page_addr_t paddr1 = tb_page_addr1(tb);
715     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
716     tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
717 
718     assert(paddr0 != -1);
719     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
720         tb_page_add(page_find_alloc(pindex1, false), tb, 1);
721     }
722     tb_page_add(page_find_alloc(pindex0, false), tb, 0);
723 }
724 
tb_page_remove(PageDesc * pd,TranslationBlock * tb)725 static void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
726 {
727     TranslationBlock *tb1;
728     uintptr_t *pprev;
729     PageForEachNext n1;
730 
731     assert_page_locked(pd);
732     pprev = &pd->first_tb;
733     PAGE_FOR_EACH_TB(unused, unused, pd, tb1, n1) {
734         if (tb1 == tb) {
735             *pprev = tb1->page_next[n1];
736             return;
737         }
738         pprev = &tb1->page_next[n1];
739     }
740     g_assert_not_reached();
741 }
742 
tb_remove(TranslationBlock * tb)743 static void tb_remove(TranslationBlock *tb)
744 {
745     tb_page_addr_t paddr0 = tb_page_addr0(tb);
746     tb_page_addr_t paddr1 = tb_page_addr1(tb);
747     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
748     tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
749 
750     assert(paddr0 != -1);
751     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
752         tb_page_remove(page_find_alloc(pindex1, false), tb);
753     }
754     tb_page_remove(page_find_alloc(pindex0, false), tb);
755 }
756 #endif /* CONFIG_USER_ONLY */
757 
758 /* flush all the translation blocks */
do_tb_flush(CPUState * cpu,run_on_cpu_data tb_flush_count)759 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
760 {
761     bool did_flush = false;
762 
763     mmap_lock();
764     /* If it is already been done on request of another CPU, just retry. */
765     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
766         goto done;
767     }
768     did_flush = true;
769 
770     CPU_FOREACH(cpu) {
771         tcg_flush_jmp_cache(cpu);
772     }
773 
774     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
775     tb_remove_all();
776 
777     tcg_region_reset_all();
778     /* XXX: flush processor icache at this point if cache flush is expensive */
779     qatomic_inc(&tb_ctx.tb_flush_count);
780 
781 done:
782     mmap_unlock();
783     if (did_flush) {
784         qemu_plugin_flush_cb();
785     }
786 }
787 
tb_flush(CPUState * cpu)788 void tb_flush(CPUState *cpu)
789 {
790     if (tcg_enabled()) {
791         unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
792 
793         if (cpu_in_serial_context(cpu)) {
794             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
795         } else {
796             async_safe_run_on_cpu(cpu, do_tb_flush,
797                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
798         }
799     }
800 }
801 
802 /* remove @orig from its @n_orig-th jump list */
tb_remove_from_jmp_list(TranslationBlock * orig,int n_orig)803 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
804 {
805     uintptr_t ptr, ptr_locked;
806     TranslationBlock *dest;
807     TranslationBlock *tb;
808     uintptr_t *pprev;
809     int n;
810 
811     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
812     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
813     dest = (TranslationBlock *)(ptr & ~1);
814     if (dest == NULL) {
815         return;
816     }
817 
818     qemu_spin_lock(&dest->jmp_lock);
819     /*
820      * While acquiring the lock, the jump might have been removed if the
821      * destination TB was invalidated; check again.
822      */
823     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
824     if (ptr_locked != ptr) {
825         qemu_spin_unlock(&dest->jmp_lock);
826         /*
827          * The only possibility is that the jump was unlinked via
828          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
829          * because we set the LSB above.
830          */
831         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
832         return;
833     }
834     /*
835      * We first acquired the lock, and since the destination pointer matches,
836      * we know for sure that @orig is in the jmp list.
837      */
838     pprev = &dest->jmp_list_head;
839     TB_FOR_EACH_JMP(dest, tb, n) {
840         if (tb == orig && n == n_orig) {
841             *pprev = tb->jmp_list_next[n];
842             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
843             qemu_spin_unlock(&dest->jmp_lock);
844             return;
845         }
846         pprev = &tb->jmp_list_next[n];
847     }
848     g_assert_not_reached();
849 }
850 
851 /*
852  * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
853  */
tb_reset_jump(TranslationBlock * tb,int n)854 void tb_reset_jump(TranslationBlock *tb, int n)
855 {
856     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
857     tb_set_jmp_target(tb, n, addr);
858 }
859 
860 /* remove any jumps to the TB */
tb_jmp_unlink(TranslationBlock * dest)861 static inline void tb_jmp_unlink(TranslationBlock *dest)
862 {
863     TranslationBlock *tb;
864     int n;
865 
866     qemu_spin_lock(&dest->jmp_lock);
867 
868     TB_FOR_EACH_JMP(dest, tb, n) {
869         tb_reset_jump(tb, n);
870         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
871         /* No need to clear the list entry; setting the dest ptr is enough */
872     }
873     dest->jmp_list_head = (uintptr_t)NULL;
874 
875     qemu_spin_unlock(&dest->jmp_lock);
876 }
877 
tb_jmp_cache_inval_tb(TranslationBlock * tb)878 static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
879 {
880     CPUState *cpu;
881 
882     if (tb_cflags(tb) & CF_PCREL) {
883         /* A TB may be at any virtual address */
884         CPU_FOREACH(cpu) {
885             tcg_flush_jmp_cache(cpu);
886         }
887     } else {
888         uint32_t h = tb_jmp_cache_hash_func(tb->pc);
889 
890         CPU_FOREACH(cpu) {
891             CPUJumpCache *jc = cpu->tb_jmp_cache;
892 
893             if (qatomic_read(&jc->array[h].tb) == tb) {
894                 qatomic_set(&jc->array[h].tb, NULL);
895             }
896         }
897     }
898 }
899 
900 /*
901  * In user-mode, call with mmap_lock held.
902  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
903  * locks held.
904  */
do_tb_phys_invalidate(TranslationBlock * tb,bool rm_from_page_list)905 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
906 {
907     uint32_t h;
908     tb_page_addr_t phys_pc;
909     uint32_t orig_cflags = tb_cflags(tb);
910 
911     assert_memory_lock();
912 
913     /* make sure no further incoming jumps will be chained to this TB */
914     qemu_spin_lock(&tb->jmp_lock);
915     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
916     qemu_spin_unlock(&tb->jmp_lock);
917 
918     /* remove the TB from the hash list */
919     phys_pc = tb_page_addr0(tb);
920     h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
921                      tb->flags, tb->cs_base, orig_cflags);
922     if (!qht_remove(&tb_ctx.htable, tb, h)) {
923         return;
924     }
925 
926     /* remove the TB from the page list */
927     if (rm_from_page_list) {
928         tb_remove(tb);
929     }
930 
931     /* remove the TB from the hash list */
932     tb_jmp_cache_inval_tb(tb);
933 
934     /* suppress this TB from the two jump lists */
935     tb_remove_from_jmp_list(tb, 0);
936     tb_remove_from_jmp_list(tb, 1);
937 
938     /* suppress any remaining jumps to this TB */
939     tb_jmp_unlink(tb);
940 
941     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
942                 tb_ctx.tb_phys_invalidate_count + 1);
943 }
944 
tb_phys_invalidate__locked(TranslationBlock * tb)945 static void tb_phys_invalidate__locked(TranslationBlock *tb)
946 {
947     qemu_thread_jit_write();
948     do_tb_phys_invalidate(tb, true);
949     qemu_thread_jit_execute();
950 }
951 
952 /*
953  * Invalidate one TB.
954  * Called with mmap_lock held in user-mode.
955  */
tb_phys_invalidate(TranslationBlock * tb,tb_page_addr_t page_addr)956 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
957 {
958     if (page_addr == -1 && tb_page_addr0(tb) != -1) {
959         tb_lock_pages(tb);
960         do_tb_phys_invalidate(tb, true);
961         tb_unlock_pages(tb);
962     } else {
963         do_tb_phys_invalidate(tb, false);
964     }
965 }
966 
967 /*
968  * Add a new TB and link it to the physical page tables.
969  * Called with mmap_lock held for user-mode emulation.
970  *
971  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
972  * Note that in !user-mode, another thread might have already added a TB
973  * for the same block of guest code that @tb corresponds to. In that case,
974  * the caller should discard the original @tb, and use instead the returned TB.
975  */
tb_link_page(TranslationBlock * tb)976 TranslationBlock *tb_link_page(TranslationBlock *tb)
977 {
978     void *existing_tb = NULL;
979     uint32_t h;
980 
981     assert_memory_lock();
982     tcg_debug_assert(!(tb->cflags & CF_INVALID));
983 
984     tb_record(tb);
985 
986     /* add in the hash table */
987     h = tb_hash_func(tb_page_addr0(tb), (tb->cflags & CF_PCREL ? 0 : tb->pc),
988                      tb->flags, tb->cs_base, tb->cflags);
989     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
990 
991     /* remove TB from the page(s) if we couldn't insert it */
992     if (unlikely(existing_tb)) {
993         tb_remove(tb);
994         tb_unlock_pages(tb);
995         return existing_tb;
996     }
997 
998     tb_unlock_pages(tb);
999     return tb;
1000 }
1001 
1002 #ifdef CONFIG_USER_ONLY
1003 /*
1004  * Invalidate all TBs which intersect with the target address range.
1005  * Called with mmap_lock held for user-mode emulation.
1006  * NOTE: this function must not be called while a TB is running.
1007  */
tb_invalidate_phys_range(tb_page_addr_t start,tb_page_addr_t last)1008 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1009 {
1010     TranslationBlock *tb;
1011     PageForEachNext n;
1012 
1013     assert_memory_lock();
1014 
1015     PAGE_FOR_EACH_TB(start, last, unused, tb, n) {
1016         tb_phys_invalidate__locked(tb);
1017     }
1018 }
1019 
1020 /*
1021  * Invalidate all TBs which intersect with the target address page @addr.
1022  * Called with mmap_lock held for user-mode emulation
1023  * NOTE: this function must not be called while a TB is running.
1024  */
tb_invalidate_phys_page(tb_page_addr_t addr)1025 static void tb_invalidate_phys_page(tb_page_addr_t addr)
1026 {
1027     tb_page_addr_t start, last;
1028 
1029     start = addr & TARGET_PAGE_MASK;
1030     last = addr | ~TARGET_PAGE_MASK;
1031     tb_invalidate_phys_range(start, last);
1032 }
1033 
1034 /*
1035  * Called with mmap_lock held. If pc is not 0 then it indicates the
1036  * host PC of the faulting store instruction that caused this invalidate.
1037  * Returns true if the caller needs to abort execution of the current
1038  * TB (because it was modified by this store and the guest CPU has
1039  * precise-SMC semantics).
1040  */
tb_invalidate_phys_page_unwind(tb_page_addr_t addr,uintptr_t pc)1041 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
1042 {
1043     TranslationBlock *current_tb;
1044     bool current_tb_modified;
1045     TranslationBlock *tb;
1046     PageForEachNext n;
1047     tb_page_addr_t last;
1048 
1049     /*
1050      * Without precise smc semantics, or when outside of a TB,
1051      * we can skip to invalidate.
1052      */
1053 #ifndef TARGET_HAS_PRECISE_SMC
1054     pc = 0;
1055 #endif
1056     if (!pc) {
1057         tb_invalidate_phys_page(addr);
1058         return false;
1059     }
1060 
1061     assert_memory_lock();
1062     current_tb = tcg_tb_lookup(pc);
1063 
1064     last = addr | ~TARGET_PAGE_MASK;
1065     addr &= TARGET_PAGE_MASK;
1066     current_tb_modified = false;
1067 
1068     PAGE_FOR_EACH_TB(addr, last, unused, tb, n) {
1069         if (current_tb == tb &&
1070             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1071             /*
1072              * If we are modifying the current TB, we must stop its
1073              * execution. We could be more precise by checking that
1074              * the modification is after the current PC, but it would
1075              * require a specialized function to partially restore
1076              * the CPU state.
1077              */
1078             current_tb_modified = true;
1079             cpu_restore_state_from_tb(current_cpu, current_tb, pc);
1080         }
1081         tb_phys_invalidate__locked(tb);
1082     }
1083 
1084     if (current_tb_modified) {
1085         /* Force execution of one insn next time.  */
1086         CPUState *cpu = current_cpu;
1087         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1088         return true;
1089     }
1090     return false;
1091 }
1092 #else
1093 /*
1094  * @p must be non-NULL.
1095  * Call with all @pages locked.
1096  */
1097 static void
tb_invalidate_phys_page_range__locked(struct page_collection * pages,PageDesc * p,tb_page_addr_t start,tb_page_addr_t last,uintptr_t retaddr)1098 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1099                                       PageDesc *p, tb_page_addr_t start,
1100                                       tb_page_addr_t last,
1101                                       uintptr_t retaddr)
1102 {
1103     TranslationBlock *tb;
1104     PageForEachNext n;
1105 #ifdef TARGET_HAS_PRECISE_SMC
1106     bool current_tb_modified = false;
1107     TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
1108 #endif /* TARGET_HAS_PRECISE_SMC */
1109 
1110     /* Range may not cross a page. */
1111     tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
1112 
1113     /*
1114      * We remove all the TBs in the range [start, last].
1115      * XXX: see if in some cases it could be faster to invalidate all the code
1116      */
1117     PAGE_FOR_EACH_TB(start, last, p, tb, n) {
1118         tb_page_addr_t tb_start, tb_last;
1119 
1120         /* NOTE: this is subtle as a TB may span two physical pages */
1121         tb_start = tb_page_addr0(tb);
1122         tb_last = tb_start + tb->size - 1;
1123         if (n == 0) {
1124             tb_last = MIN(tb_last, tb_start | ~TARGET_PAGE_MASK);
1125         } else {
1126             tb_start = tb_page_addr1(tb);
1127             tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK);
1128         }
1129         if (!(tb_last < start || tb_start > last)) {
1130 #ifdef TARGET_HAS_PRECISE_SMC
1131             if (current_tb == tb &&
1132                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1133                 /*
1134                  * If we are modifying the current TB, we must stop
1135                  * its execution. We could be more precise by checking
1136                  * that the modification is after the current PC, but it
1137                  * would require a specialized function to partially
1138                  * restore the CPU state.
1139                  */
1140                 current_tb_modified = true;
1141                 cpu_restore_state_from_tb(current_cpu, current_tb, retaddr);
1142             }
1143 #endif /* TARGET_HAS_PRECISE_SMC */
1144             tb_phys_invalidate__locked(tb);
1145         }
1146     }
1147 
1148     /* if no code remaining, no need to continue to use slow writes */
1149     if (!p->first_tb) {
1150         tlb_unprotect_code(start);
1151     }
1152 
1153 #ifdef TARGET_HAS_PRECISE_SMC
1154     if (current_tb_modified) {
1155         page_collection_unlock(pages);
1156         /* Force execution of one insn next time.  */
1157         current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1158         mmap_unlock();
1159         cpu_loop_exit_noexc(current_cpu);
1160     }
1161 #endif
1162 }
1163 
1164 /*
1165  * Invalidate all TBs which intersect with the target physical address range
1166  * [start;last]. NOTE: start and end may refer to *different* physical pages.
1167  * 'is_cpu_write_access' should be true if called from a real cpu write
1168  * access: the virtual CPU will exit the current TB if code is modified inside
1169  * this TB.
1170  */
tb_invalidate_phys_range(tb_page_addr_t start,tb_page_addr_t last)1171 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1172 {
1173     struct page_collection *pages;
1174     tb_page_addr_t index, index_last;
1175 
1176     pages = page_collection_lock(start, last);
1177 
1178     index_last = last >> TARGET_PAGE_BITS;
1179     for (index = start >> TARGET_PAGE_BITS; index <= index_last; index++) {
1180         PageDesc *pd = page_find(index);
1181         tb_page_addr_t page_start, page_last;
1182 
1183         if (pd == NULL) {
1184             continue;
1185         }
1186         assert_page_locked(pd);
1187         page_start = index << TARGET_PAGE_BITS;
1188         page_last = page_start | ~TARGET_PAGE_MASK;
1189         page_last = MIN(page_last, last);
1190         tb_invalidate_phys_page_range__locked(pages, pd,
1191                                               page_start, page_last, 0);
1192     }
1193     page_collection_unlock(pages);
1194 }
1195 
1196 /*
1197  * Call with all @pages in the range [@start, @start + len[ locked.
1198  */
tb_invalidate_phys_page_fast__locked(struct page_collection * pages,tb_page_addr_t start,unsigned len,uintptr_t ra)1199 static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
1200                                                  tb_page_addr_t start,
1201                                                  unsigned len, uintptr_t ra)
1202 {
1203     PageDesc *p;
1204 
1205     p = page_find(start >> TARGET_PAGE_BITS);
1206     if (!p) {
1207         return;
1208     }
1209 
1210     assert_page_locked(p);
1211     tb_invalidate_phys_page_range__locked(pages, p, start, start + len - 1, ra);
1212 }
1213 
1214 /*
1215  * len must be <= 8 and start must be a multiple of len.
1216  * Called via softmmu_template.h when code areas are written to with
1217  * iothread mutex not held.
1218  */
tb_invalidate_phys_range_fast(ram_addr_t ram_addr,unsigned size,uintptr_t retaddr)1219 void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
1220                                    unsigned size,
1221                                    uintptr_t retaddr)
1222 {
1223     struct page_collection *pages;
1224 
1225     pages = page_collection_lock(ram_addr, ram_addr + size - 1);
1226     tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
1227     page_collection_unlock(pages);
1228 }
1229 
1230 #endif /* CONFIG_USER_ONLY */
1231