xref: /qemu/accel/tcg/user-exec.c (revision ce32a9e9)
1 /*
2  *  User emulator execution
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "hw/core/tcg-cpu-ops.h"
21 #include "disas/disas.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg.h"
24 #include "qemu/bitops.h"
25 #include "qemu/rcu.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translate-all.h"
28 #include "exec/helper-proto.h"
29 #include "qemu/atomic128.h"
30 #include "trace/trace-root.h"
31 #include "tcg/tcg-ldst.h"
32 #include "internal-common.h"
33 #include "internal-target.h"
34 
35 __thread uintptr_t helper_retaddr;
36 
37 //#define DEBUG_SIGNAL
38 
39 /*
40  * Adjust the pc to pass to cpu_restore_state; return the memop type.
41  */
42 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
43 {
44     switch (helper_retaddr) {
45     default:
46         /*
47          * Fault during host memory operation within a helper function.
48          * The helper's host return address, saved here, gives us a
49          * pointer into the generated code that will unwind to the
50          * correct guest pc.
51          */
52         *pc = helper_retaddr;
53         break;
54 
55     case 0:
56         /*
57          * Fault during host memory operation within generated code.
58          * (Or, a unrelated bug within qemu, but we can't tell from here).
59          *
60          * We take the host pc from the signal frame.  However, we cannot
61          * use that value directly.  Within cpu_restore_state_from_tb, we
62          * assume PC comes from GETPC(), as used by the helper functions,
63          * so we adjust the address by -GETPC_ADJ to form an address that
64          * is within the call insn, so that the address does not accidentally
65          * match the beginning of the next guest insn.  However, when the
66          * pc comes from the signal frame it points to the actual faulting
67          * host memory insn and not the return from a call insn.
68          *
69          * Therefore, adjust to compensate for what will be done later
70          * by cpu_restore_state_from_tb.
71          */
72         *pc += GETPC_ADJ;
73         break;
74 
75     case 1:
76         /*
77          * Fault during host read for translation, or loosely, "execution".
78          *
79          * The guest pc is already pointing to the start of the TB for which
80          * code is being generated.  If the guest translator manages the
81          * page crossings correctly, this is exactly the correct address
82          * (and if the translator doesn't handle page boundaries correctly
83          * there's little we can do about that here).  Therefore, do not
84          * trigger the unwinder.
85          */
86         *pc = 0;
87         return MMU_INST_FETCH;
88     }
89 
90     return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
91 }
92 
93 /**
94  * handle_sigsegv_accerr_write:
95  * @cpu: the cpu context
96  * @old_set: the sigset_t from the signal ucontext_t
97  * @host_pc: the host pc, adjusted for the signal
98  * @guest_addr: the guest address of the fault
99  *
100  * Return true if the write fault has been handled, and should be re-tried.
101  *
102  * Note that it is important that we don't call page_unprotect() unless
103  * this is really a "write to nonwritable page" fault, because
104  * page_unprotect() assumes that if it is called for an access to
105  * a page that's writable this means we had two threads racing and
106  * another thread got there first and already made the page writable;
107  * so we will retry the access. If we were to call page_unprotect()
108  * for some other kind of fault that should really be passed to the
109  * guest, we'd end up in an infinite loop of retrying the faulting access.
110  */
111 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
112                                  uintptr_t host_pc, abi_ptr guest_addr)
113 {
114     switch (page_unprotect(guest_addr, host_pc)) {
115     case 0:
116         /*
117          * Fault not caused by a page marked unwritable to protect
118          * cached translations, must be the guest binary's problem.
119          */
120         return false;
121     case 1:
122         /*
123          * Fault caused by protection of cached translation; TBs
124          * invalidated, so resume execution.
125          */
126         return true;
127     case 2:
128         /*
129          * Fault caused by protection of cached translation, and the
130          * currently executing TB was modified and must be exited immediately.
131          */
132         sigprocmask(SIG_SETMASK, old_set, NULL);
133         cpu_loop_exit_noexc(cpu);
134         /* NORETURN */
135     default:
136         g_assert_not_reached();
137     }
138 }
139 
140 typedef struct PageFlagsNode {
141     struct rcu_head rcu;
142     IntervalTreeNode itree;
143     int flags;
144 } PageFlagsNode;
145 
146 static IntervalTreeRoot pageflags_root;
147 
148 static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
149 {
150     IntervalTreeNode *n;
151 
152     n = interval_tree_iter_first(&pageflags_root, start, last);
153     return n ? container_of(n, PageFlagsNode, itree) : NULL;
154 }
155 
156 static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
157                                      target_ulong last)
158 {
159     IntervalTreeNode *n;
160 
161     n = interval_tree_iter_next(&p->itree, start, last);
162     return n ? container_of(n, PageFlagsNode, itree) : NULL;
163 }
164 
165 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
166 {
167     IntervalTreeNode *n;
168     int rc = 0;
169 
170     mmap_lock();
171     for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
172          n != NULL;
173          n = interval_tree_iter_next(n, 0, -1)) {
174         PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
175 
176         rc = fn(priv, n->start, n->last + 1, p->flags);
177         if (rc != 0) {
178             break;
179         }
180     }
181     mmap_unlock();
182 
183     return rc;
184 }
185 
186 static int dump_region(void *priv, target_ulong start,
187                        target_ulong end, unsigned long prot)
188 {
189     FILE *f = (FILE *)priv;
190 
191     fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx" "TARGET_FMT_lx" %c%c%c\n",
192             start, end, end - start,
193             ((prot & PAGE_READ) ? 'r' : '-'),
194             ((prot & PAGE_WRITE) ? 'w' : '-'),
195             ((prot & PAGE_EXEC) ? 'x' : '-'));
196     return 0;
197 }
198 
199 /* dump memory mappings */
200 void page_dump(FILE *f)
201 {
202     const int length = sizeof(target_ulong) * 2;
203 
204     fprintf(f, "%-*s %-*s %-*s %s\n",
205             length, "start", length, "end", length, "size", "prot");
206     walk_memory_regions(f, dump_region);
207 }
208 
209 int page_get_flags(target_ulong address)
210 {
211     PageFlagsNode *p = pageflags_find(address, address);
212 
213     /*
214      * See util/interval-tree.c re lockless lookups: no false positives but
215      * there are false negatives.  If we find nothing, retry with the mmap
216      * lock acquired.
217      */
218     if (p) {
219         return p->flags;
220     }
221     if (have_mmap_lock()) {
222         return 0;
223     }
224 
225     mmap_lock();
226     p = pageflags_find(address, address);
227     mmap_unlock();
228     return p ? p->flags : 0;
229 }
230 
231 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
232 static void pageflags_create(target_ulong start, target_ulong last, int flags)
233 {
234     PageFlagsNode *p = g_new(PageFlagsNode, 1);
235 
236     p->itree.start = start;
237     p->itree.last = last;
238     p->flags = flags;
239     interval_tree_insert(&p->itree, &pageflags_root);
240 }
241 
242 /* A subroutine of page_set_flags: remove everything in [start,last]. */
243 static bool pageflags_unset(target_ulong start, target_ulong last)
244 {
245     bool inval_tb = false;
246 
247     while (true) {
248         PageFlagsNode *p = pageflags_find(start, last);
249         target_ulong p_last;
250 
251         if (!p) {
252             break;
253         }
254 
255         if (p->flags & PAGE_EXEC) {
256             inval_tb = true;
257         }
258 
259         interval_tree_remove(&p->itree, &pageflags_root);
260         p_last = p->itree.last;
261 
262         if (p->itree.start < start) {
263             /* Truncate the node from the end, or split out the middle. */
264             p->itree.last = start - 1;
265             interval_tree_insert(&p->itree, &pageflags_root);
266             if (last < p_last) {
267                 pageflags_create(last + 1, p_last, p->flags);
268                 break;
269             }
270         } else if (p_last <= last) {
271             /* Range completely covers node -- remove it. */
272             g_free_rcu(p, rcu);
273         } else {
274             /* Truncate the node from the start. */
275             p->itree.start = last + 1;
276             interval_tree_insert(&p->itree, &pageflags_root);
277             break;
278         }
279     }
280 
281     return inval_tb;
282 }
283 
284 /*
285  * A subroutine of page_set_flags: nothing overlaps [start,last],
286  * but check adjacent mappings and maybe merge into a single range.
287  */
288 static void pageflags_create_merge(target_ulong start, target_ulong last,
289                                    int flags)
290 {
291     PageFlagsNode *next = NULL, *prev = NULL;
292 
293     if (start > 0) {
294         prev = pageflags_find(start - 1, start - 1);
295         if (prev) {
296             if (prev->flags == flags) {
297                 interval_tree_remove(&prev->itree, &pageflags_root);
298             } else {
299                 prev = NULL;
300             }
301         }
302     }
303     if (last + 1 != 0) {
304         next = pageflags_find(last + 1, last + 1);
305         if (next) {
306             if (next->flags == flags) {
307                 interval_tree_remove(&next->itree, &pageflags_root);
308             } else {
309                 next = NULL;
310             }
311         }
312     }
313 
314     if (prev) {
315         if (next) {
316             prev->itree.last = next->itree.last;
317             g_free_rcu(next, rcu);
318         } else {
319             prev->itree.last = last;
320         }
321         interval_tree_insert(&prev->itree, &pageflags_root);
322     } else if (next) {
323         next->itree.start = start;
324         interval_tree_insert(&next->itree, &pageflags_root);
325     } else {
326         pageflags_create(start, last, flags);
327     }
328 }
329 
330 /*
331  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
332  * By default, they are not kept.
333  */
334 #ifndef PAGE_TARGET_STICKY
335 #define PAGE_TARGET_STICKY  0
336 #endif
337 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
338 
339 /* A subroutine of page_set_flags: add flags to [start,last]. */
340 static bool pageflags_set_clear(target_ulong start, target_ulong last,
341                                 int set_flags, int clear_flags)
342 {
343     PageFlagsNode *p;
344     target_ulong p_start, p_last;
345     int p_flags, merge_flags;
346     bool inval_tb = false;
347 
348  restart:
349     p = pageflags_find(start, last);
350     if (!p) {
351         if (set_flags) {
352             pageflags_create_merge(start, last, set_flags);
353         }
354         goto done;
355     }
356 
357     p_start = p->itree.start;
358     p_last = p->itree.last;
359     p_flags = p->flags;
360     /* Using mprotect on a page does not change sticky bits. */
361     merge_flags = (p_flags & ~clear_flags) | set_flags;
362 
363     /*
364      * Need to flush if an overlapping executable region
365      * removes exec, or adds write.
366      */
367     if ((p_flags & PAGE_EXEC)
368         && (!(merge_flags & PAGE_EXEC)
369             || (merge_flags & ~p_flags & PAGE_WRITE))) {
370         inval_tb = true;
371     }
372 
373     /*
374      * If there is an exact range match, update and return without
375      * attempting to merge with adjacent regions.
376      */
377     if (start == p_start && last == p_last) {
378         if (merge_flags) {
379             p->flags = merge_flags;
380         } else {
381             interval_tree_remove(&p->itree, &pageflags_root);
382             g_free_rcu(p, rcu);
383         }
384         goto done;
385     }
386 
387     /*
388      * If sticky bits affect the original mapping, then we must be more
389      * careful about the existing intervals and the separate flags.
390      */
391     if (set_flags != merge_flags) {
392         if (p_start < start) {
393             interval_tree_remove(&p->itree, &pageflags_root);
394             p->itree.last = start - 1;
395             interval_tree_insert(&p->itree, &pageflags_root);
396 
397             if (last < p_last) {
398                 if (merge_flags) {
399                     pageflags_create(start, last, merge_flags);
400                 }
401                 pageflags_create(last + 1, p_last, p_flags);
402             } else {
403                 if (merge_flags) {
404                     pageflags_create(start, p_last, merge_flags);
405                 }
406                 if (p_last < last) {
407                     start = p_last + 1;
408                     goto restart;
409                 }
410             }
411         } else {
412             if (start < p_start && set_flags) {
413                 pageflags_create(start, p_start - 1, set_flags);
414             }
415             if (last < p_last) {
416                 interval_tree_remove(&p->itree, &pageflags_root);
417                 p->itree.start = last + 1;
418                 interval_tree_insert(&p->itree, &pageflags_root);
419                 if (merge_flags) {
420                     pageflags_create(start, last, merge_flags);
421                 }
422             } else {
423                 if (merge_flags) {
424                     p->flags = merge_flags;
425                 } else {
426                     interval_tree_remove(&p->itree, &pageflags_root);
427                     g_free_rcu(p, rcu);
428                 }
429                 if (p_last < last) {
430                     start = p_last + 1;
431                     goto restart;
432                 }
433             }
434         }
435         goto done;
436     }
437 
438     /* If flags are not changing for this range, incorporate it. */
439     if (set_flags == p_flags) {
440         if (start < p_start) {
441             interval_tree_remove(&p->itree, &pageflags_root);
442             p->itree.start = start;
443             interval_tree_insert(&p->itree, &pageflags_root);
444         }
445         if (p_last < last) {
446             start = p_last + 1;
447             goto restart;
448         }
449         goto done;
450     }
451 
452     /* Maybe split out head and/or tail ranges with the original flags. */
453     interval_tree_remove(&p->itree, &pageflags_root);
454     if (p_start < start) {
455         p->itree.last = start - 1;
456         interval_tree_insert(&p->itree, &pageflags_root);
457 
458         if (p_last < last) {
459             goto restart;
460         }
461         if (last < p_last) {
462             pageflags_create(last + 1, p_last, p_flags);
463         }
464     } else if (last < p_last) {
465         p->itree.start = last + 1;
466         interval_tree_insert(&p->itree, &pageflags_root);
467     } else {
468         g_free_rcu(p, rcu);
469         goto restart;
470     }
471     if (set_flags) {
472         pageflags_create(start, last, set_flags);
473     }
474 
475  done:
476     return inval_tb;
477 }
478 
479 /*
480  * Modify the flags of a page and invalidate the code if necessary.
481  * The flag PAGE_WRITE_ORG is positioned automatically depending
482  * on PAGE_WRITE.  The mmap_lock should already be held.
483  */
484 void page_set_flags(target_ulong start, target_ulong last, int flags)
485 {
486     bool reset = false;
487     bool inval_tb = false;
488 
489     /* This function should never be called with addresses outside the
490        guest address space.  If this assert fires, it probably indicates
491        a missing call to h2g_valid.  */
492     assert(start <= last);
493     assert(last <= GUEST_ADDR_MAX);
494     /* Only set PAGE_ANON with new mappings. */
495     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
496     assert_memory_lock();
497 
498     start &= TARGET_PAGE_MASK;
499     last |= ~TARGET_PAGE_MASK;
500 
501     if (!(flags & PAGE_VALID)) {
502         flags = 0;
503     } else {
504         reset = flags & PAGE_RESET;
505         flags &= ~PAGE_RESET;
506         if (flags & PAGE_WRITE) {
507             flags |= PAGE_WRITE_ORG;
508         }
509     }
510 
511     if (!flags || reset) {
512         page_reset_target_data(start, last);
513         inval_tb |= pageflags_unset(start, last);
514     }
515     if (flags) {
516         inval_tb |= pageflags_set_clear(start, last, flags,
517                                         ~(reset ? 0 : PAGE_STICKY));
518     }
519     if (inval_tb) {
520         tb_invalidate_phys_range(start, last);
521     }
522 }
523 
524 bool page_check_range(target_ulong start, target_ulong len, int flags)
525 {
526     target_ulong last;
527     int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
528     bool ret;
529 
530     if (len == 0) {
531         return true;  /* trivial length */
532     }
533 
534     last = start + len - 1;
535     if (last < start) {
536         return false; /* wrap around */
537     }
538 
539     locked = have_mmap_lock();
540     while (true) {
541         PageFlagsNode *p = pageflags_find(start, last);
542         int missing;
543 
544         if (!p) {
545             if (!locked) {
546                 /*
547                  * Lockless lookups have false negatives.
548                  * Retry with the lock held.
549                  */
550                 mmap_lock();
551                 locked = -1;
552                 p = pageflags_find(start, last);
553             }
554             if (!p) {
555                 ret = false; /* entire region invalid */
556                 break;
557             }
558         }
559         if (start < p->itree.start) {
560             ret = false; /* initial bytes invalid */
561             break;
562         }
563 
564         missing = flags & ~p->flags;
565         if (missing & ~PAGE_WRITE) {
566             ret = false; /* page doesn't match */
567             break;
568         }
569         if (missing & PAGE_WRITE) {
570             if (!(p->flags & PAGE_WRITE_ORG)) {
571                 ret = false; /* page not writable */
572                 break;
573             }
574             /* Asking about writable, but has been protected: undo. */
575             if (!page_unprotect(start, 0)) {
576                 ret = false;
577                 break;
578             }
579             /* TODO: page_unprotect should take a range, not a single page. */
580             if (last - start < TARGET_PAGE_SIZE) {
581                 ret = true; /* ok */
582                 break;
583             }
584             start += TARGET_PAGE_SIZE;
585             continue;
586         }
587 
588         if (last <= p->itree.last) {
589             ret = true; /* ok */
590             break;
591         }
592         start = p->itree.last + 1;
593     }
594 
595     /* Release the lock if acquired locally. */
596     if (locked < 0) {
597         mmap_unlock();
598     }
599     return ret;
600 }
601 
602 bool page_check_range_empty(target_ulong start, target_ulong last)
603 {
604     assert(last >= start);
605     assert_memory_lock();
606     return pageflags_find(start, last) == NULL;
607 }
608 
609 target_ulong page_find_range_empty(target_ulong min, target_ulong max,
610                                    target_ulong len, target_ulong align)
611 {
612     target_ulong len_m1, align_m1;
613 
614     assert(min <= max);
615     assert(max <= GUEST_ADDR_MAX);
616     assert(len != 0);
617     assert(is_power_of_2(align));
618     assert_memory_lock();
619 
620     len_m1 = len - 1;
621     align_m1 = align - 1;
622 
623     /* Iteratively narrow the search region. */
624     while (1) {
625         PageFlagsNode *p;
626 
627         /* Align min and double-check there's enough space remaining. */
628         min = (min + align_m1) & ~align_m1;
629         if (min > max) {
630             return -1;
631         }
632         if (len_m1 > max - min) {
633             return -1;
634         }
635 
636         p = pageflags_find(min, min + len_m1);
637         if (p == NULL) {
638             /* Found! */
639             return min;
640         }
641         if (max <= p->itree.last) {
642             /* Existing allocation fills the remainder of the search region. */
643             return -1;
644         }
645         /* Skip across existing allocation. */
646         min = p->itree.last + 1;
647     }
648 }
649 
650 void page_protect(tb_page_addr_t address)
651 {
652     PageFlagsNode *p;
653     target_ulong start, last;
654     int prot;
655 
656     assert_memory_lock();
657 
658     if (qemu_host_page_size <= TARGET_PAGE_SIZE) {
659         start = address & TARGET_PAGE_MASK;
660         last = start + TARGET_PAGE_SIZE - 1;
661     } else {
662         start = address & qemu_host_page_mask;
663         last = start + qemu_host_page_size - 1;
664     }
665 
666     p = pageflags_find(start, last);
667     if (!p) {
668         return;
669     }
670     prot = p->flags;
671 
672     if (unlikely(p->itree.last < last)) {
673         /* More than one protection region covers the one host page. */
674         assert(TARGET_PAGE_SIZE < qemu_host_page_size);
675         while ((p = pageflags_next(p, start, last)) != NULL) {
676             prot |= p->flags;
677         }
678     }
679 
680     if (prot & PAGE_WRITE) {
681         pageflags_set_clear(start, last, 0, PAGE_WRITE);
682         mprotect(g2h_untagged(start), qemu_host_page_size,
683                  prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
684     }
685 }
686 
687 /*
688  * Called from signal handler: invalidate the code and unprotect the
689  * page. Return 0 if the fault was not handled, 1 if it was handled,
690  * and 2 if it was handled but the caller must cause the TB to be
691  * immediately exited. (We can only return 2 if the 'pc' argument is
692  * non-zero.)
693  */
694 int page_unprotect(target_ulong address, uintptr_t pc)
695 {
696     PageFlagsNode *p;
697     bool current_tb_invalidated;
698 
699     /*
700      * Technically this isn't safe inside a signal handler.  However we
701      * know this only ever happens in a synchronous SEGV handler, so in
702      * practice it seems to be ok.
703      */
704     mmap_lock();
705 
706     p = pageflags_find(address, address);
707 
708     /* If this address was not really writable, nothing to do. */
709     if (!p || !(p->flags & PAGE_WRITE_ORG)) {
710         mmap_unlock();
711         return 0;
712     }
713 
714     current_tb_invalidated = false;
715     if (p->flags & PAGE_WRITE) {
716         /*
717          * If the page is actually marked WRITE then assume this is because
718          * this thread raced with another one which got here first and
719          * set the page to PAGE_WRITE and did the TB invalidate for us.
720          */
721 #ifdef TARGET_HAS_PRECISE_SMC
722         TranslationBlock *current_tb = tcg_tb_lookup(pc);
723         if (current_tb) {
724             current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
725         }
726 #endif
727     } else {
728         target_ulong start, len, i;
729         int prot;
730 
731         if (qemu_host_page_size <= TARGET_PAGE_SIZE) {
732             start = address & TARGET_PAGE_MASK;
733             len = TARGET_PAGE_SIZE;
734             prot = p->flags | PAGE_WRITE;
735             pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
736             current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc);
737         } else {
738             start = address & qemu_host_page_mask;
739             len = qemu_host_page_size;
740             prot = 0;
741 
742             for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
743                 target_ulong addr = start + i;
744 
745                 p = pageflags_find(addr, addr);
746                 if (p) {
747                     prot |= p->flags;
748                     if (p->flags & PAGE_WRITE_ORG) {
749                         prot |= PAGE_WRITE;
750                         pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
751                                             PAGE_WRITE, 0);
752                     }
753                 }
754                 /*
755                  * Since the content will be modified, we must invalidate
756                  * the corresponding translated code.
757                  */
758                 current_tb_invalidated |=
759                     tb_invalidate_phys_page_unwind(addr, pc);
760             }
761         }
762         if (prot & PAGE_EXEC) {
763             prot = (prot & ~PAGE_EXEC) | PAGE_READ;
764         }
765         mprotect((void *)g2h_untagged(start), len, prot & PAGE_BITS);
766     }
767     mmap_unlock();
768 
769     /* If current TB was invalidated return to main loop */
770     return current_tb_invalidated ? 2 : 1;
771 }
772 
773 static int probe_access_internal(CPUArchState *env, vaddr addr,
774                                  int fault_size, MMUAccessType access_type,
775                                  bool nonfault, uintptr_t ra)
776 {
777     int acc_flag;
778     bool maperr;
779 
780     switch (access_type) {
781     case MMU_DATA_STORE:
782         acc_flag = PAGE_WRITE_ORG;
783         break;
784     case MMU_DATA_LOAD:
785         acc_flag = PAGE_READ;
786         break;
787     case MMU_INST_FETCH:
788         acc_flag = PAGE_EXEC;
789         break;
790     default:
791         g_assert_not_reached();
792     }
793 
794     if (guest_addr_valid_untagged(addr)) {
795         int page_flags = page_get_flags(addr);
796         if (page_flags & acc_flag) {
797             if ((acc_flag == PAGE_READ || acc_flag == PAGE_WRITE)
798                 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
799                 return TLB_MMIO;
800             }
801             return 0; /* success */
802         }
803         maperr = !(page_flags & PAGE_VALID);
804     } else {
805         maperr = true;
806     }
807 
808     if (nonfault) {
809         return TLB_INVALID_MASK;
810     }
811 
812     cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
813 }
814 
815 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
816                        MMUAccessType access_type, int mmu_idx,
817                        bool nonfault, void **phost, uintptr_t ra)
818 {
819     int flags;
820 
821     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
822     flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
823     *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
824     return flags;
825 }
826 
827 void *probe_access(CPUArchState *env, vaddr addr, int size,
828                    MMUAccessType access_type, int mmu_idx, uintptr_t ra)
829 {
830     int flags;
831 
832     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
833     flags = probe_access_internal(env, addr, size, access_type, false, ra);
834     g_assert((flags & ~TLB_MMIO) == 0);
835 
836     return size ? g2h(env_cpu(env), addr) : NULL;
837 }
838 
839 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
840                                         void **hostp)
841 {
842     int flags;
843 
844     flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
845     g_assert(flags == 0);
846 
847     if (hostp) {
848         *hostp = g2h_untagged(addr);
849     }
850     return addr;
851 }
852 
853 #ifdef TARGET_PAGE_DATA_SIZE
854 /*
855  * Allocate chunks of target data together.  For the only current user,
856  * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
857  * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
858  */
859 #define TPD_PAGES  64
860 #define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
861 
862 typedef struct TargetPageDataNode {
863     struct rcu_head rcu;
864     IntervalTreeNode itree;
865     char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned));
866 } TargetPageDataNode;
867 
868 static IntervalTreeRoot targetdata_root;
869 
870 void page_reset_target_data(target_ulong start, target_ulong last)
871 {
872     IntervalTreeNode *n, *next;
873 
874     assert_memory_lock();
875 
876     start &= TARGET_PAGE_MASK;
877     last |= ~TARGET_PAGE_MASK;
878 
879     for (n = interval_tree_iter_first(&targetdata_root, start, last),
880          next = n ? interval_tree_iter_next(n, start, last) : NULL;
881          n != NULL;
882          n = next,
883          next = next ? interval_tree_iter_next(n, start, last) : NULL) {
884         target_ulong n_start, n_last, p_ofs, p_len;
885         TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
886 
887         if (n->start >= start && n->last <= last) {
888             interval_tree_remove(n, &targetdata_root);
889             g_free_rcu(t, rcu);
890             continue;
891         }
892 
893         if (n->start < start) {
894             n_start = start;
895             p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
896         } else {
897             n_start = n->start;
898             p_ofs = 0;
899         }
900         n_last = MIN(last, n->last);
901         p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
902 
903         memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE);
904     }
905 }
906 
907 void *page_get_target_data(target_ulong address)
908 {
909     IntervalTreeNode *n;
910     TargetPageDataNode *t;
911     target_ulong page, region;
912 
913     page = address & TARGET_PAGE_MASK;
914     region = address & TBD_MASK;
915 
916     n = interval_tree_iter_first(&targetdata_root, page, page);
917     if (!n) {
918         /*
919          * See util/interval-tree.c re lockless lookups: no false positives
920          * but there are false negatives.  If we find nothing, retry with
921          * the mmap lock acquired.  We also need the lock for the
922          * allocation + insert.
923          */
924         mmap_lock();
925         n = interval_tree_iter_first(&targetdata_root, page, page);
926         if (!n) {
927             t = g_new0(TargetPageDataNode, 1);
928             n = &t->itree;
929             n->start = region;
930             n->last = region | ~TBD_MASK;
931             interval_tree_insert(n, &targetdata_root);
932         }
933         mmap_unlock();
934     }
935 
936     t = container_of(n, TargetPageDataNode, itree);
937     return t->data[(page - region) >> TARGET_PAGE_BITS];
938 }
939 #else
940 void page_reset_target_data(target_ulong start, target_ulong last) { }
941 #endif /* TARGET_PAGE_DATA_SIZE */
942 
943 /* The system-mode versions of these helpers are in cputlb.c.  */
944 
945 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
946                             MemOp mop, uintptr_t ra, MMUAccessType type)
947 {
948     int a_bits = get_alignment_bits(mop);
949     void *ret;
950 
951     /* Enforce guest required alignment.  */
952     if (unlikely(addr & ((1 << a_bits) - 1))) {
953         cpu_loop_exit_sigbus(cpu, addr, type, ra);
954     }
955 
956     ret = g2h(cpu, addr);
957     set_helper_retaddr(ra);
958     return ret;
959 }
960 
961 #include "ldst_atomicity.c.inc"
962 
963 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
964                           uintptr_t ra, MMUAccessType access_type)
965 {
966     void *haddr;
967     uint8_t ret;
968 
969     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
970     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
971     ret = ldub_p(haddr);
972     clear_helper_retaddr();
973     return ret;
974 }
975 
976 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
977                            uintptr_t ra, MMUAccessType access_type)
978 {
979     void *haddr;
980     uint16_t ret;
981     MemOp mop = get_memop(oi);
982 
983     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
984     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
985     ret = load_atom_2(cpu, ra, haddr, mop);
986     clear_helper_retaddr();
987 
988     if (mop & MO_BSWAP) {
989         ret = bswap16(ret);
990     }
991     return ret;
992 }
993 
994 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
995                            uintptr_t ra, MMUAccessType access_type)
996 {
997     void *haddr;
998     uint32_t ret;
999     MemOp mop = get_memop(oi);
1000 
1001     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1002     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1003     ret = load_atom_4(cpu, ra, haddr, mop);
1004     clear_helper_retaddr();
1005 
1006     if (mop & MO_BSWAP) {
1007         ret = bswap32(ret);
1008     }
1009     return ret;
1010 }
1011 
1012 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1013                            uintptr_t ra, MMUAccessType access_type)
1014 {
1015     void *haddr;
1016     uint64_t ret;
1017     MemOp mop = get_memop(oi);
1018 
1019     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1020     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1021     ret = load_atom_8(cpu, ra, haddr, mop);
1022     clear_helper_retaddr();
1023 
1024     if (mop & MO_BSWAP) {
1025         ret = bswap64(ret);
1026     }
1027     return ret;
1028 }
1029 
1030 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
1031                           MemOpIdx oi, uintptr_t ra)
1032 {
1033     void *haddr;
1034     Int128 ret;
1035     MemOp mop = get_memop(oi);
1036 
1037     tcg_debug_assert((mop & MO_SIZE) == MO_128);
1038     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1039     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1040     ret = load_atom_16(cpu, ra, haddr, mop);
1041     clear_helper_retaddr();
1042 
1043     if (mop & MO_BSWAP) {
1044         ret = bswap128(ret);
1045     }
1046     return ret;
1047 }
1048 
1049 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1050                        MemOpIdx oi, uintptr_t ra)
1051 {
1052     void *haddr;
1053 
1054     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1055     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1056     stb_p(haddr, val);
1057     clear_helper_retaddr();
1058 }
1059 
1060 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1061                        MemOpIdx oi, uintptr_t ra)
1062 {
1063     void *haddr;
1064     MemOp mop = get_memop(oi);
1065 
1066     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1067     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1068 
1069     if (mop & MO_BSWAP) {
1070         val = bswap16(val);
1071     }
1072     store_atom_2(cpu, ra, haddr, mop, val);
1073     clear_helper_retaddr();
1074 }
1075 
1076 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1077                        MemOpIdx oi, uintptr_t ra)
1078 {
1079     void *haddr;
1080     MemOp mop = get_memop(oi);
1081 
1082     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1083     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1084 
1085     if (mop & MO_BSWAP) {
1086         val = bswap32(val);
1087     }
1088     store_atom_4(cpu, ra, haddr, mop, val);
1089     clear_helper_retaddr();
1090 }
1091 
1092 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1093                        MemOpIdx oi, uintptr_t ra)
1094 {
1095     void *haddr;
1096     MemOp mop = get_memop(oi);
1097 
1098     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1099     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1100 
1101     if (mop & MO_BSWAP) {
1102         val = bswap64(val);
1103     }
1104     store_atom_8(cpu, ra, haddr, mop, val);
1105     clear_helper_retaddr();
1106 }
1107 
1108 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1109                         MemOpIdx oi, uintptr_t ra)
1110 {
1111     void *haddr;
1112     MemOpIdx mop = get_memop(oi);
1113 
1114     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1115     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1116 
1117     if (mop & MO_BSWAP) {
1118         val = bswap128(val);
1119     }
1120     store_atom_16(cpu, ra, haddr, mop, val);
1121     clear_helper_retaddr();
1122 }
1123 
1124 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
1125 {
1126     uint32_t ret;
1127 
1128     set_helper_retaddr(1);
1129     ret = ldub_p(g2h_untagged(ptr));
1130     clear_helper_retaddr();
1131     return ret;
1132 }
1133 
1134 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr ptr)
1135 {
1136     uint32_t ret;
1137 
1138     set_helper_retaddr(1);
1139     ret = lduw_p(g2h_untagged(ptr));
1140     clear_helper_retaddr();
1141     return ret;
1142 }
1143 
1144 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr ptr)
1145 {
1146     uint32_t ret;
1147 
1148     set_helper_retaddr(1);
1149     ret = ldl_p(g2h_untagged(ptr));
1150     clear_helper_retaddr();
1151     return ret;
1152 }
1153 
1154 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
1155 {
1156     uint64_t ret;
1157 
1158     set_helper_retaddr(1);
1159     ret = ldq_p(g2h_untagged(ptr));
1160     clear_helper_retaddr();
1161     return ret;
1162 }
1163 
1164 uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
1165                          MemOpIdx oi, uintptr_t ra)
1166 {
1167     void *haddr;
1168     uint8_t ret;
1169 
1170     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1171     ret = ldub_p(haddr);
1172     clear_helper_retaddr();
1173     return ret;
1174 }
1175 
1176 uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
1177                           MemOpIdx oi, uintptr_t ra)
1178 {
1179     void *haddr;
1180     uint16_t ret;
1181 
1182     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1183     ret = lduw_p(haddr);
1184     clear_helper_retaddr();
1185     if (get_memop(oi) & MO_BSWAP) {
1186         ret = bswap16(ret);
1187     }
1188     return ret;
1189 }
1190 
1191 uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
1192                           MemOpIdx oi, uintptr_t ra)
1193 {
1194     void *haddr;
1195     uint32_t ret;
1196 
1197     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1198     ret = ldl_p(haddr);
1199     clear_helper_retaddr();
1200     if (get_memop(oi) & MO_BSWAP) {
1201         ret = bswap32(ret);
1202     }
1203     return ret;
1204 }
1205 
1206 uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
1207                           MemOpIdx oi, uintptr_t ra)
1208 {
1209     void *haddr;
1210     uint64_t ret;
1211 
1212     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
1213     ret = ldq_p(haddr);
1214     clear_helper_retaddr();
1215     if (get_memop(oi) & MO_BSWAP) {
1216         ret = bswap64(ret);
1217     }
1218     return ret;
1219 }
1220 
1221 #include "ldst_common.c.inc"
1222 
1223 /*
1224  * Do not allow unaligned operations to proceed.  Return the host address.
1225  */
1226 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1227                                int size, uintptr_t retaddr)
1228 {
1229     MemOp mop = get_memop(oi);
1230     int a_bits = get_alignment_bits(mop);
1231     void *ret;
1232 
1233     /* Enforce guest required alignment.  */
1234     if (unlikely(addr & ((1 << a_bits) - 1))) {
1235         cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1236     }
1237 
1238     /* Enforce qemu required alignment.  */
1239     if (unlikely(addr & (size - 1))) {
1240         cpu_loop_exit_atomic(cpu, retaddr);
1241     }
1242 
1243     ret = g2h(cpu, addr);
1244     set_helper_retaddr(retaddr);
1245     return ret;
1246 }
1247 
1248 #include "atomic_common.c.inc"
1249 
1250 /*
1251  * First set of functions passes in OI and RETADDR.
1252  * This makes them callable from other helpers.
1253  */
1254 
1255 #define ATOMIC_NAME(X) \
1256     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1257 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1258 
1259 #define DATA_SIZE 1
1260 #include "atomic_template.h"
1261 
1262 #define DATA_SIZE 2
1263 #include "atomic_template.h"
1264 
1265 #define DATA_SIZE 4
1266 #include "atomic_template.h"
1267 
1268 #ifdef CONFIG_ATOMIC64
1269 #define DATA_SIZE 8
1270 #include "atomic_template.h"
1271 #endif
1272 
1273 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1274 #define DATA_SIZE 16
1275 #include "atomic_template.h"
1276 #endif
1277