xref: /qemu/accel/tcg/cputlb.c (revision 5aa62804)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "translate-all.h"
37 #include "trace-root.h"
38 #include "trace/mem.h"
39 #ifdef CONFIG_PLUGIN
40 #include "qemu/plugin-memory.h"
41 #endif
42 
43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
44 /* #define DEBUG_TLB */
45 /* #define DEBUG_TLB_LOG */
46 
47 #ifdef DEBUG_TLB
48 # define DEBUG_TLB_GATE 1
49 # ifdef DEBUG_TLB_LOG
50 #  define DEBUG_TLB_LOG_GATE 1
51 # else
52 #  define DEBUG_TLB_LOG_GATE 0
53 # endif
54 #else
55 # define DEBUG_TLB_GATE 0
56 # define DEBUG_TLB_LOG_GATE 0
57 #endif
58 
59 #define tlb_debug(fmt, ...) do { \
60     if (DEBUG_TLB_LOG_GATE) { \
61         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
62                       ## __VA_ARGS__); \
63     } else if (DEBUG_TLB_GATE) { \
64         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
65     } \
66 } while (0)
67 
68 #define assert_cpu_is_self(cpu) do {                              \
69         if (DEBUG_TLB_GATE) {                                     \
70             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
71         }                                                         \
72     } while (0)
73 
74 /* run_on_cpu_data.target_ptr should always be big enough for a
75  * target_ulong even on 32 bit builds */
76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
77 
78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
79  */
80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
82 
83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
84 {
85     return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
86 }
87 
88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
89 {
90     return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
91 }
92 
93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
94                              size_t max_entries)
95 {
96     desc->window_begin_ns = ns;
97     desc->window_max_entries = max_entries;
98 }
99 
100 /**
101  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
102  * @desc: The CPUTLBDesc portion of the TLB
103  * @fast: The CPUTLBDescFast portion of the same TLB
104  *
105  * Called with tlb_lock_held.
106  *
107  * We have two main constraints when resizing a TLB: (1) we only resize it
108  * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
109  * the array or unnecessarily flushing it), which means we do not control how
110  * frequently the resizing can occur; (2) we don't have access to the guest's
111  * future scheduling decisions, and therefore have to decide the magnitude of
112  * the resize based on past observations.
113  *
114  * In general, a memory-hungry process can benefit greatly from an appropriately
115  * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
116  * we just have to make the TLB as large as possible; while an oversized TLB
117  * results in minimal TLB miss rates, it also takes longer to be flushed
118  * (flushes can be _very_ frequent), and the reduced locality can also hurt
119  * performance.
120  *
121  * To achieve near-optimal performance for all kinds of workloads, we:
122  *
123  * 1. Aggressively increase the size of the TLB when the use rate of the
124  * TLB being flushed is high, since it is likely that in the near future this
125  * memory-hungry process will execute again, and its memory hungriness will
126  * probably be similar.
127  *
128  * 2. Slowly reduce the size of the TLB as the use rate declines over a
129  * reasonably large time window. The rationale is that if in such a time window
130  * we have not observed a high TLB use rate, it is likely that we won't observe
131  * it in the near future. In that case, once a time window expires we downsize
132  * the TLB to match the maximum use rate observed in the window.
133  *
134  * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
135  * since in that range performance is likely near-optimal. Recall that the TLB
136  * is direct mapped, so we want the use rate to be low (or at least not too
137  * high), since otherwise we are likely to have a significant amount of
138  * conflict misses.
139  */
140 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
141                                   int64_t now)
142 {
143     size_t old_size = tlb_n_entries(fast);
144     size_t rate;
145     size_t new_size = old_size;
146     int64_t window_len_ms = 100;
147     int64_t window_len_ns = window_len_ms * 1000 * 1000;
148     bool window_expired = now > desc->window_begin_ns + window_len_ns;
149 
150     if (desc->n_used_entries > desc->window_max_entries) {
151         desc->window_max_entries = desc->n_used_entries;
152     }
153     rate = desc->window_max_entries * 100 / old_size;
154 
155     if (rate > 70) {
156         new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
157     } else if (rate < 30 && window_expired) {
158         size_t ceil = pow2ceil(desc->window_max_entries);
159         size_t expected_rate = desc->window_max_entries * 100 / ceil;
160 
161         /*
162          * Avoid undersizing when the max number of entries seen is just below
163          * a pow2. For instance, if max_entries == 1025, the expected use rate
164          * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
165          * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
166          * later. Thus, make sure that the expected use rate remains below 70%.
167          * (and since we double the size, that means the lowest rate we'd
168          * expect to get is 35%, which is still in the 30-70% range where
169          * we consider that the size is appropriate.)
170          */
171         if (expected_rate > 70) {
172             ceil *= 2;
173         }
174         new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
175     }
176 
177     if (new_size == old_size) {
178         if (window_expired) {
179             tlb_window_reset(desc, now, desc->n_used_entries);
180         }
181         return;
182     }
183 
184     g_free(fast->table);
185     g_free(desc->iotlb);
186 
187     tlb_window_reset(desc, now, 0);
188     /* desc->n_used_entries is cleared by the caller */
189     fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
190     fast->table = g_try_new(CPUTLBEntry, new_size);
191     desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
192 
193     /*
194      * If the allocations fail, try smaller sizes. We just freed some
195      * memory, so going back to half of new_size has a good chance of working.
196      * Increased memory pressure elsewhere in the system might cause the
197      * allocations to fail though, so we progressively reduce the allocation
198      * size, aborting if we cannot even allocate the smallest TLB we support.
199      */
200     while (fast->table == NULL || desc->iotlb == NULL) {
201         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
202             error_report("%s: %s", __func__, strerror(errno));
203             abort();
204         }
205         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
206         fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
207 
208         g_free(fast->table);
209         g_free(desc->iotlb);
210         fast->table = g_try_new(CPUTLBEntry, new_size);
211         desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
212     }
213 }
214 
215 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
216 {
217     desc->n_used_entries = 0;
218     desc->large_page_addr = -1;
219     desc->large_page_mask = -1;
220     desc->vindex = 0;
221     memset(fast->table, -1, sizeof_tlb(fast));
222     memset(desc->vtable, -1, sizeof(desc->vtable));
223 }
224 
225 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
226                                         int64_t now)
227 {
228     CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
229     CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
230 
231     tlb_mmu_resize_locked(desc, fast, now);
232     tlb_mmu_flush_locked(desc, fast);
233 }
234 
235 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
236 {
237     size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
238 
239     tlb_window_reset(desc, now, 0);
240     desc->n_used_entries = 0;
241     fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
242     fast->table = g_new(CPUTLBEntry, n_entries);
243     desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
244     tlb_mmu_flush_locked(desc, fast);
245 }
246 
247 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
248 {
249     env_tlb(env)->d[mmu_idx].n_used_entries++;
250 }
251 
252 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
253 {
254     env_tlb(env)->d[mmu_idx].n_used_entries--;
255 }
256 
257 void tlb_init(CPUState *cpu)
258 {
259     CPUArchState *env = cpu->env_ptr;
260     int64_t now = get_clock_realtime();
261     int i;
262 
263     qemu_spin_init(&env_tlb(env)->c.lock);
264 
265     /* All tlbs are initialized flushed. */
266     env_tlb(env)->c.dirty = 0;
267 
268     for (i = 0; i < NB_MMU_MODES; i++) {
269         tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
270     }
271 }
272 
273 /* flush_all_helper: run fn across all cpus
274  *
275  * If the wait flag is set then the src cpu's helper will be queued as
276  * "safe" work and the loop exited creating a synchronisation point
277  * where all queued work will be finished before execution starts
278  * again.
279  */
280 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
281                              run_on_cpu_data d)
282 {
283     CPUState *cpu;
284 
285     CPU_FOREACH(cpu) {
286         if (cpu != src) {
287             async_run_on_cpu(cpu, fn, d);
288         }
289     }
290 }
291 
292 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
293 {
294     CPUState *cpu;
295     size_t full = 0, part = 0, elide = 0;
296 
297     CPU_FOREACH(cpu) {
298         CPUArchState *env = cpu->env_ptr;
299 
300         full += atomic_read(&env_tlb(env)->c.full_flush_count);
301         part += atomic_read(&env_tlb(env)->c.part_flush_count);
302         elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
303     }
304     *pfull = full;
305     *ppart = part;
306     *pelide = elide;
307 }
308 
309 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
310 {
311     CPUArchState *env = cpu->env_ptr;
312     uint16_t asked = data.host_int;
313     uint16_t all_dirty, work, to_clean;
314     int64_t now = get_clock_realtime();
315 
316     assert_cpu_is_self(cpu);
317 
318     tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
319 
320     qemu_spin_lock(&env_tlb(env)->c.lock);
321 
322     all_dirty = env_tlb(env)->c.dirty;
323     to_clean = asked & all_dirty;
324     all_dirty &= ~to_clean;
325     env_tlb(env)->c.dirty = all_dirty;
326 
327     for (work = to_clean; work != 0; work &= work - 1) {
328         int mmu_idx = ctz32(work);
329         tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
330     }
331 
332     qemu_spin_unlock(&env_tlb(env)->c.lock);
333 
334     cpu_tb_jmp_cache_clear(cpu);
335 
336     if (to_clean == ALL_MMUIDX_BITS) {
337         atomic_set(&env_tlb(env)->c.full_flush_count,
338                    env_tlb(env)->c.full_flush_count + 1);
339     } else {
340         atomic_set(&env_tlb(env)->c.part_flush_count,
341                    env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
342         if (to_clean != asked) {
343             atomic_set(&env_tlb(env)->c.elide_flush_count,
344                        env_tlb(env)->c.elide_flush_count +
345                        ctpop16(asked & ~to_clean));
346         }
347     }
348 }
349 
350 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
351 {
352     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
353 
354     if (cpu->created && !qemu_cpu_is_self(cpu)) {
355         async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
356                          RUN_ON_CPU_HOST_INT(idxmap));
357     } else {
358         tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
359     }
360 }
361 
362 void tlb_flush(CPUState *cpu)
363 {
364     tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
365 }
366 
367 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
368 {
369     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
370 
371     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
372 
373     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
374     fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
375 }
376 
377 void tlb_flush_all_cpus(CPUState *src_cpu)
378 {
379     tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
380 }
381 
382 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
383 {
384     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
385 
386     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
387 
388     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
389     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
390 }
391 
392 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
393 {
394     tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
395 }
396 
397 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
398                                         target_ulong page)
399 {
400     return tlb_hit_page(tlb_entry->addr_read, page) ||
401            tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
402            tlb_hit_page(tlb_entry->addr_code, page);
403 }
404 
405 /**
406  * tlb_entry_is_empty - return true if the entry is not in use
407  * @te: pointer to CPUTLBEntry
408  */
409 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
410 {
411     return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
412 }
413 
414 /* Called with tlb_c.lock held */
415 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
416                                           target_ulong page)
417 {
418     if (tlb_hit_page_anyprot(tlb_entry, page)) {
419         memset(tlb_entry, -1, sizeof(*tlb_entry));
420         return true;
421     }
422     return false;
423 }
424 
425 /* Called with tlb_c.lock held */
426 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
427                                               target_ulong page)
428 {
429     CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
430     int k;
431 
432     assert_cpu_is_self(env_cpu(env));
433     for (k = 0; k < CPU_VTLB_SIZE; k++) {
434         if (tlb_flush_entry_locked(&d->vtable[k], page)) {
435             tlb_n_used_entries_dec(env, mmu_idx);
436         }
437     }
438 }
439 
440 static void tlb_flush_page_locked(CPUArchState *env, int midx,
441                                   target_ulong page)
442 {
443     target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
444     target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
445 
446     /* Check if we need to flush due to large pages.  */
447     if ((page & lp_mask) == lp_addr) {
448         tlb_debug("forcing full flush midx %d ("
449                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
450                   midx, lp_addr, lp_mask);
451         tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
452     } else {
453         if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
454             tlb_n_used_entries_dec(env, midx);
455         }
456         tlb_flush_vtlb_page_locked(env, midx, page);
457     }
458 }
459 
460 /**
461  * tlb_flush_page_by_mmuidx_async_0:
462  * @cpu: cpu on which to flush
463  * @addr: page of virtual address to flush
464  * @idxmap: set of mmu_idx to flush
465  *
466  * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
467  * at @addr from the tlbs indicated by @idxmap from @cpu.
468  */
469 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
470                                              target_ulong addr,
471                                              uint16_t idxmap)
472 {
473     CPUArchState *env = cpu->env_ptr;
474     int mmu_idx;
475 
476     assert_cpu_is_self(cpu);
477 
478     tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
479 
480     qemu_spin_lock(&env_tlb(env)->c.lock);
481     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
482         if ((idxmap >> mmu_idx) & 1) {
483             tlb_flush_page_locked(env, mmu_idx, addr);
484         }
485     }
486     qemu_spin_unlock(&env_tlb(env)->c.lock);
487 
488     tb_flush_jmp_cache(cpu, addr);
489 }
490 
491 /**
492  * tlb_flush_page_by_mmuidx_async_1:
493  * @cpu: cpu on which to flush
494  * @data: encoded addr + idxmap
495  *
496  * Helper for tlb_flush_page_by_mmuidx and friends, called through
497  * async_run_on_cpu.  The idxmap parameter is encoded in the page
498  * offset of the target_ptr field.  This limits the set of mmu_idx
499  * that can be passed via this method.
500  */
501 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
502                                              run_on_cpu_data data)
503 {
504     target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
505     target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
506     uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
507 
508     tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
509 }
510 
511 typedef struct {
512     target_ulong addr;
513     uint16_t idxmap;
514 } TLBFlushPageByMMUIdxData;
515 
516 /**
517  * tlb_flush_page_by_mmuidx_async_2:
518  * @cpu: cpu on which to flush
519  * @data: allocated addr + idxmap
520  *
521  * Helper for tlb_flush_page_by_mmuidx and friends, called through
522  * async_run_on_cpu.  The addr+idxmap parameters are stored in a
523  * TLBFlushPageByMMUIdxData structure that has been allocated
524  * specifically for this helper.  Free the structure when done.
525  */
526 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
527                                              run_on_cpu_data data)
528 {
529     TLBFlushPageByMMUIdxData *d = data.host_ptr;
530 
531     tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
532     g_free(d);
533 }
534 
535 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
536 {
537     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
538 
539     /* This should already be page aligned */
540     addr &= TARGET_PAGE_MASK;
541 
542     if (qemu_cpu_is_self(cpu)) {
543         tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
544     } else if (idxmap < TARGET_PAGE_SIZE) {
545         /*
546          * Most targets have only a few mmu_idx.  In the case where
547          * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
548          * allocating memory for this operation.
549          */
550         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
551                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
552     } else {
553         TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
554 
555         /* Otherwise allocate a structure, freed by the worker.  */
556         d->addr = addr;
557         d->idxmap = idxmap;
558         async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
559                          RUN_ON_CPU_HOST_PTR(d));
560     }
561 }
562 
563 void tlb_flush_page(CPUState *cpu, target_ulong addr)
564 {
565     tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
566 }
567 
568 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
569                                        uint16_t idxmap)
570 {
571     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
572 
573     /* This should already be page aligned */
574     addr &= TARGET_PAGE_MASK;
575 
576     /*
577      * Allocate memory to hold addr+idxmap only when needed.
578      * See tlb_flush_page_by_mmuidx for details.
579      */
580     if (idxmap < TARGET_PAGE_SIZE) {
581         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
582                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
583     } else {
584         CPUState *dst_cpu;
585 
586         /* Allocate a separate data block for each destination cpu.  */
587         CPU_FOREACH(dst_cpu) {
588             if (dst_cpu != src_cpu) {
589                 TLBFlushPageByMMUIdxData *d
590                     = g_new(TLBFlushPageByMMUIdxData, 1);
591 
592                 d->addr = addr;
593                 d->idxmap = idxmap;
594                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
595                                  RUN_ON_CPU_HOST_PTR(d));
596             }
597         }
598     }
599 
600     tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
601 }
602 
603 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
604 {
605     tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
606 }
607 
608 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
609                                               target_ulong addr,
610                                               uint16_t idxmap)
611 {
612     tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
613 
614     /* This should already be page aligned */
615     addr &= TARGET_PAGE_MASK;
616 
617     /*
618      * Allocate memory to hold addr+idxmap only when needed.
619      * See tlb_flush_page_by_mmuidx for details.
620      */
621     if (idxmap < TARGET_PAGE_SIZE) {
622         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
623                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
624         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
625                               RUN_ON_CPU_TARGET_PTR(addr | idxmap));
626     } else {
627         CPUState *dst_cpu;
628         TLBFlushPageByMMUIdxData *d;
629 
630         /* Allocate a separate data block for each destination cpu.  */
631         CPU_FOREACH(dst_cpu) {
632             if (dst_cpu != src_cpu) {
633                 d = g_new(TLBFlushPageByMMUIdxData, 1);
634                 d->addr = addr;
635                 d->idxmap = idxmap;
636                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
637                                  RUN_ON_CPU_HOST_PTR(d));
638             }
639         }
640 
641         d = g_new(TLBFlushPageByMMUIdxData, 1);
642         d->addr = addr;
643         d->idxmap = idxmap;
644         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
645                               RUN_ON_CPU_HOST_PTR(d));
646     }
647 }
648 
649 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
650 {
651     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
652 }
653 
654 /* update the TLBs so that writes to code in the virtual page 'addr'
655    can be detected */
656 void tlb_protect_code(ram_addr_t ram_addr)
657 {
658     cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
659                                              DIRTY_MEMORY_CODE);
660 }
661 
662 /* update the TLB so that writes in physical page 'phys_addr' are no longer
663    tested for self modifying code */
664 void tlb_unprotect_code(ram_addr_t ram_addr)
665 {
666     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
667 }
668 
669 
670 /*
671  * Dirty write flag handling
672  *
673  * When the TCG code writes to a location it looks up the address in
674  * the TLB and uses that data to compute the final address. If any of
675  * the lower bits of the address are set then the slow path is forced.
676  * There are a number of reasons to do this but for normal RAM the
677  * most usual is detecting writes to code regions which may invalidate
678  * generated code.
679  *
680  * Other vCPUs might be reading their TLBs during guest execution, so we update
681  * te->addr_write with atomic_set. We don't need to worry about this for
682  * oversized guests as MTTCG is disabled for them.
683  *
684  * Called with tlb_c.lock held.
685  */
686 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
687                                          uintptr_t start, uintptr_t length)
688 {
689     uintptr_t addr = tlb_entry->addr_write;
690 
691     if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
692                  TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
693         addr &= TARGET_PAGE_MASK;
694         addr += tlb_entry->addend;
695         if ((addr - start) < length) {
696 #if TCG_OVERSIZED_GUEST
697             tlb_entry->addr_write |= TLB_NOTDIRTY;
698 #else
699             atomic_set(&tlb_entry->addr_write,
700                        tlb_entry->addr_write | TLB_NOTDIRTY);
701 #endif
702         }
703     }
704 }
705 
706 /*
707  * Called with tlb_c.lock held.
708  * Called only from the vCPU context, i.e. the TLB's owner thread.
709  */
710 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
711 {
712     *d = *s;
713 }
714 
715 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
716  * the target vCPU).
717  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
718  * thing actually updated is the target TLB entry ->addr_write flags.
719  */
720 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
721 {
722     CPUArchState *env;
723 
724     int mmu_idx;
725 
726     env = cpu->env_ptr;
727     qemu_spin_lock(&env_tlb(env)->c.lock);
728     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
729         unsigned int i;
730         unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
731 
732         for (i = 0; i < n; i++) {
733             tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
734                                          start1, length);
735         }
736 
737         for (i = 0; i < CPU_VTLB_SIZE; i++) {
738             tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
739                                          start1, length);
740         }
741     }
742     qemu_spin_unlock(&env_tlb(env)->c.lock);
743 }
744 
745 /* Called with tlb_c.lock held */
746 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
747                                          target_ulong vaddr)
748 {
749     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
750         tlb_entry->addr_write = vaddr;
751     }
752 }
753 
754 /* update the TLB corresponding to virtual page vaddr
755    so that it is no longer dirty */
756 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
757 {
758     CPUArchState *env = cpu->env_ptr;
759     int mmu_idx;
760 
761     assert_cpu_is_self(cpu);
762 
763     vaddr &= TARGET_PAGE_MASK;
764     qemu_spin_lock(&env_tlb(env)->c.lock);
765     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
766         tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
767     }
768 
769     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
770         int k;
771         for (k = 0; k < CPU_VTLB_SIZE; k++) {
772             tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
773         }
774     }
775     qemu_spin_unlock(&env_tlb(env)->c.lock);
776 }
777 
778 /* Our TLB does not support large pages, so remember the area covered by
779    large pages and trigger a full TLB flush if these are invalidated.  */
780 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
781                                target_ulong vaddr, target_ulong size)
782 {
783     target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
784     target_ulong lp_mask = ~(size - 1);
785 
786     if (lp_addr == (target_ulong)-1) {
787         /* No previous large page.  */
788         lp_addr = vaddr;
789     } else {
790         /* Extend the existing region to include the new page.
791            This is a compromise between unnecessary flushes and
792            the cost of maintaining a full variable size TLB.  */
793         lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
794         while (((lp_addr ^ vaddr) & lp_mask) != 0) {
795             lp_mask <<= 1;
796         }
797     }
798     env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
799     env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
800 }
801 
802 /* Add a new TLB entry. At most one entry for a given virtual address
803  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
804  * supplied size is only used by tlb_flush_page.
805  *
806  * Called from TCG-generated code, which is under an RCU read-side
807  * critical section.
808  */
809 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
810                              hwaddr paddr, MemTxAttrs attrs, int prot,
811                              int mmu_idx, target_ulong size)
812 {
813     CPUArchState *env = cpu->env_ptr;
814     CPUTLB *tlb = env_tlb(env);
815     CPUTLBDesc *desc = &tlb->d[mmu_idx];
816     MemoryRegionSection *section;
817     unsigned int index;
818     target_ulong address;
819     target_ulong write_address;
820     uintptr_t addend;
821     CPUTLBEntry *te, tn;
822     hwaddr iotlb, xlat, sz, paddr_page;
823     target_ulong vaddr_page;
824     int asidx = cpu_asidx_from_attrs(cpu, attrs);
825     int wp_flags;
826     bool is_ram, is_romd;
827 
828     assert_cpu_is_self(cpu);
829 
830     if (size <= TARGET_PAGE_SIZE) {
831         sz = TARGET_PAGE_SIZE;
832     } else {
833         tlb_add_large_page(env, mmu_idx, vaddr, size);
834         sz = size;
835     }
836     vaddr_page = vaddr & TARGET_PAGE_MASK;
837     paddr_page = paddr & TARGET_PAGE_MASK;
838 
839     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
840                                                 &xlat, &sz, attrs, &prot);
841     assert(sz >= TARGET_PAGE_SIZE);
842 
843     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
844               " prot=%x idx=%d\n",
845               vaddr, paddr, prot, mmu_idx);
846 
847     address = vaddr_page;
848     if (size < TARGET_PAGE_SIZE) {
849         /* Repeat the MMU check and TLB fill on every access.  */
850         address |= TLB_INVALID_MASK;
851     }
852     if (attrs.byte_swap) {
853         address |= TLB_BSWAP;
854     }
855 
856     is_ram = memory_region_is_ram(section->mr);
857     is_romd = memory_region_is_romd(section->mr);
858 
859     if (is_ram || is_romd) {
860         /* RAM and ROMD both have associated host memory. */
861         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
862     } else {
863         /* I/O does not; force the host address to NULL. */
864         addend = 0;
865     }
866 
867     write_address = address;
868     if (is_ram) {
869         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
870         /*
871          * Computing is_clean is expensive; avoid all that unless
872          * the page is actually writable.
873          */
874         if (prot & PAGE_WRITE) {
875             if (section->readonly) {
876                 write_address |= TLB_DISCARD_WRITE;
877             } else if (cpu_physical_memory_is_clean(iotlb)) {
878                 write_address |= TLB_NOTDIRTY;
879             }
880         }
881     } else {
882         /* I/O or ROMD */
883         iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
884         /*
885          * Writes to romd devices must go through MMIO to enable write.
886          * Reads to romd devices go through the ram_ptr found above,
887          * but of course reads to I/O must go through MMIO.
888          */
889         write_address |= TLB_MMIO;
890         if (!is_romd) {
891             address = write_address;
892         }
893     }
894 
895     wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
896                                               TARGET_PAGE_SIZE);
897 
898     index = tlb_index(env, mmu_idx, vaddr_page);
899     te = tlb_entry(env, mmu_idx, vaddr_page);
900 
901     /*
902      * Hold the TLB lock for the rest of the function. We could acquire/release
903      * the lock several times in the function, but it is faster to amortize the
904      * acquisition cost by acquiring it just once. Note that this leads to
905      * a longer critical section, but this is not a concern since the TLB lock
906      * is unlikely to be contended.
907      */
908     qemu_spin_lock(&tlb->c.lock);
909 
910     /* Note that the tlb is no longer clean.  */
911     tlb->c.dirty |= 1 << mmu_idx;
912 
913     /* Make sure there's no cached translation for the new page.  */
914     tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
915 
916     /*
917      * Only evict the old entry to the victim tlb if it's for a
918      * different page; otherwise just overwrite the stale data.
919      */
920     if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
921         unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
922         CPUTLBEntry *tv = &desc->vtable[vidx];
923 
924         /* Evict the old entry into the victim tlb.  */
925         copy_tlb_helper_locked(tv, te);
926         desc->viotlb[vidx] = desc->iotlb[index];
927         tlb_n_used_entries_dec(env, mmu_idx);
928     }
929 
930     /* refill the tlb */
931     /*
932      * At this point iotlb contains a physical section number in the lower
933      * TARGET_PAGE_BITS, and either
934      *  + the ram_addr_t of the page base of the target RAM (RAM)
935      *  + the offset within section->mr of the page base (I/O, ROMD)
936      * We subtract the vaddr_page (which is page aligned and thus won't
937      * disturb the low bits) to give an offset which can be added to the
938      * (non-page-aligned) vaddr of the eventual memory access to get
939      * the MemoryRegion offset for the access. Note that the vaddr we
940      * subtract here is that of the page base, and not the same as the
941      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
942      */
943     desc->iotlb[index].addr = iotlb - vaddr_page;
944     desc->iotlb[index].attrs = attrs;
945 
946     /* Now calculate the new entry */
947     tn.addend = addend - vaddr_page;
948     if (prot & PAGE_READ) {
949         tn.addr_read = address;
950         if (wp_flags & BP_MEM_READ) {
951             tn.addr_read |= TLB_WATCHPOINT;
952         }
953     } else {
954         tn.addr_read = -1;
955     }
956 
957     if (prot & PAGE_EXEC) {
958         tn.addr_code = address;
959     } else {
960         tn.addr_code = -1;
961     }
962 
963     tn.addr_write = -1;
964     if (prot & PAGE_WRITE) {
965         tn.addr_write = write_address;
966         if (prot & PAGE_WRITE_INV) {
967             tn.addr_write |= TLB_INVALID_MASK;
968         }
969         if (wp_flags & BP_MEM_WRITE) {
970             tn.addr_write |= TLB_WATCHPOINT;
971         }
972     }
973 
974     copy_tlb_helper_locked(te, &tn);
975     tlb_n_used_entries_inc(env, mmu_idx);
976     qemu_spin_unlock(&tlb->c.lock);
977 }
978 
979 /* Add a new TLB entry, but without specifying the memory
980  * transaction attributes to be used.
981  */
982 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
983                   hwaddr paddr, int prot,
984                   int mmu_idx, target_ulong size)
985 {
986     tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
987                             prot, mmu_idx, size);
988 }
989 
990 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
991 {
992     ram_addr_t ram_addr;
993 
994     ram_addr = qemu_ram_addr_from_host(ptr);
995     if (ram_addr == RAM_ADDR_INVALID) {
996         error_report("Bad ram pointer %p", ptr);
997         abort();
998     }
999     return ram_addr;
1000 }
1001 
1002 /*
1003  * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
1004  * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
1005  * be discarded and looked up again (e.g. via tlb_entry()).
1006  */
1007 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
1008                      MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1009 {
1010     CPUClass *cc = CPU_GET_CLASS(cpu);
1011     bool ok;
1012 
1013     /*
1014      * This is not a probe, so only valid return is success; failure
1015      * should result in exception + longjmp to the cpu loop.
1016      */
1017     ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
1018     assert(ok);
1019 }
1020 
1021 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1022                          int mmu_idx, target_ulong addr, uintptr_t retaddr,
1023                          MMUAccessType access_type, MemOp op)
1024 {
1025     CPUState *cpu = env_cpu(env);
1026     hwaddr mr_offset;
1027     MemoryRegionSection *section;
1028     MemoryRegion *mr;
1029     uint64_t val;
1030     bool locked = false;
1031     MemTxResult r;
1032 
1033     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1034     mr = section->mr;
1035     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1036     cpu->mem_io_pc = retaddr;
1037     if (!cpu->can_do_io) {
1038         cpu_io_recompile(cpu, retaddr);
1039     }
1040 
1041     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
1042         qemu_mutex_lock_iothread();
1043         locked = true;
1044     }
1045     r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
1046     if (r != MEMTX_OK) {
1047         hwaddr physaddr = mr_offset +
1048             section->offset_within_address_space -
1049             section->offset_within_region;
1050 
1051         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
1052                                mmu_idx, iotlbentry->attrs, r, retaddr);
1053     }
1054     if (locked) {
1055         qemu_mutex_unlock_iothread();
1056     }
1057 
1058     return val;
1059 }
1060 
1061 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1062                       int mmu_idx, uint64_t val, target_ulong addr,
1063                       uintptr_t retaddr, MemOp op)
1064 {
1065     CPUState *cpu = env_cpu(env);
1066     hwaddr mr_offset;
1067     MemoryRegionSection *section;
1068     MemoryRegion *mr;
1069     bool locked = false;
1070     MemTxResult r;
1071 
1072     section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1073     mr = section->mr;
1074     mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1075     if (!cpu->can_do_io) {
1076         cpu_io_recompile(cpu, retaddr);
1077     }
1078     cpu->mem_io_pc = retaddr;
1079 
1080     if (mr->global_locking && !qemu_mutex_iothread_locked()) {
1081         qemu_mutex_lock_iothread();
1082         locked = true;
1083     }
1084     r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
1085     if (r != MEMTX_OK) {
1086         hwaddr physaddr = mr_offset +
1087             section->offset_within_address_space -
1088             section->offset_within_region;
1089 
1090         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
1091                                MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
1092                                retaddr);
1093     }
1094     if (locked) {
1095         qemu_mutex_unlock_iothread();
1096     }
1097 }
1098 
1099 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
1100 {
1101 #if TCG_OVERSIZED_GUEST
1102     return *(target_ulong *)((uintptr_t)entry + ofs);
1103 #else
1104     /* ofs might correspond to .addr_write, so use atomic_read */
1105     return atomic_read((target_ulong *)((uintptr_t)entry + ofs));
1106 #endif
1107 }
1108 
1109 /* Return true if ADDR is present in the victim tlb, and has been copied
1110    back to the main tlb.  */
1111 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1112                            size_t elt_ofs, target_ulong page)
1113 {
1114     size_t vidx;
1115 
1116     assert_cpu_is_self(env_cpu(env));
1117     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1118         CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1119         target_ulong cmp;
1120 
1121         /* elt_ofs might correspond to .addr_write, so use atomic_read */
1122 #if TCG_OVERSIZED_GUEST
1123         cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1124 #else
1125         cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1126 #endif
1127 
1128         if (cmp == page) {
1129             /* Found entry in victim tlb, swap tlb and iotlb.  */
1130             CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1131 
1132             qemu_spin_lock(&env_tlb(env)->c.lock);
1133             copy_tlb_helper_locked(&tmptlb, tlb);
1134             copy_tlb_helper_locked(tlb, vtlb);
1135             copy_tlb_helper_locked(vtlb, &tmptlb);
1136             qemu_spin_unlock(&env_tlb(env)->c.lock);
1137 
1138             CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1139             CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1140             tmpio = *io; *io = *vio; *vio = tmpio;
1141             return true;
1142         }
1143     }
1144     return false;
1145 }
1146 
1147 /* Macro to call the above, with local variables from the use context.  */
1148 #define VICTIM_TLB_HIT(TY, ADDR) \
1149   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1150                  (ADDR) & TARGET_PAGE_MASK)
1151 
1152 /*
1153  * Return a ram_addr_t for the virtual address for execution.
1154  *
1155  * Return -1 if we can't translate and execute from an entire page
1156  * of RAM.  This will force us to execute by loading and translating
1157  * one insn at a time, without caching.
1158  *
1159  * NOTE: This function will trigger an exception if the page is
1160  * not executable.
1161  */
1162 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1163                                         void **hostp)
1164 {
1165     uintptr_t mmu_idx = cpu_mmu_index(env, true);
1166     uintptr_t index = tlb_index(env, mmu_idx, addr);
1167     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1168     void *p;
1169 
1170     if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1171         if (!VICTIM_TLB_HIT(addr_code, addr)) {
1172             tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1173             index = tlb_index(env, mmu_idx, addr);
1174             entry = tlb_entry(env, mmu_idx, addr);
1175 
1176             if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1177                 /*
1178                  * The MMU protection covers a smaller range than a target
1179                  * page, so we must redo the MMU check for every insn.
1180                  */
1181                 return -1;
1182             }
1183         }
1184         assert(tlb_hit(entry->addr_code, addr));
1185     }
1186 
1187     if (unlikely(entry->addr_code & TLB_MMIO)) {
1188         /* The region is not backed by RAM.  */
1189         if (hostp) {
1190             *hostp = NULL;
1191         }
1192         return -1;
1193     }
1194 
1195     p = (void *)((uintptr_t)addr + entry->addend);
1196     if (hostp) {
1197         *hostp = p;
1198     }
1199     return qemu_ram_addr_from_host_nofail(p);
1200 }
1201 
1202 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1203 {
1204     return get_page_addr_code_hostp(env, addr, NULL);
1205 }
1206 
1207 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1208                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1209 {
1210     ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1211 
1212     trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1213 
1214     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1215         struct page_collection *pages
1216             = page_collection_lock(ram_addr, ram_addr + size);
1217         tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1218         page_collection_unlock(pages);
1219     }
1220 
1221     /*
1222      * Set both VGA and migration bits for simplicity and to remove
1223      * the notdirty callback faster.
1224      */
1225     cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1226 
1227     /* We remove the notdirty callback only if the code has been flushed. */
1228     if (!cpu_physical_memory_is_clean(ram_addr)) {
1229         trace_memory_notdirty_set_dirty(mem_vaddr);
1230         tlb_set_dirty(cpu, mem_vaddr);
1231     }
1232 }
1233 
1234 static int probe_access_internal(CPUArchState *env, target_ulong addr,
1235                                  int fault_size, MMUAccessType access_type,
1236                                  int mmu_idx, bool nonfault,
1237                                  void **phost, uintptr_t retaddr)
1238 {
1239     uintptr_t index = tlb_index(env, mmu_idx, addr);
1240     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1241     target_ulong tlb_addr, page_addr;
1242     size_t elt_ofs;
1243     int flags;
1244 
1245     switch (access_type) {
1246     case MMU_DATA_LOAD:
1247         elt_ofs = offsetof(CPUTLBEntry, addr_read);
1248         break;
1249     case MMU_DATA_STORE:
1250         elt_ofs = offsetof(CPUTLBEntry, addr_write);
1251         break;
1252     case MMU_INST_FETCH:
1253         elt_ofs = offsetof(CPUTLBEntry, addr_code);
1254         break;
1255     default:
1256         g_assert_not_reached();
1257     }
1258     tlb_addr = tlb_read_ofs(entry, elt_ofs);
1259 
1260     page_addr = addr & TARGET_PAGE_MASK;
1261     if (!tlb_hit_page(tlb_addr, page_addr)) {
1262         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
1263             CPUState *cs = env_cpu(env);
1264             CPUClass *cc = CPU_GET_CLASS(cs);
1265 
1266             if (!cc->tlb_fill(cs, addr, fault_size, access_type,
1267                               mmu_idx, nonfault, retaddr)) {
1268                 /* Non-faulting page table read failed.  */
1269                 *phost = NULL;
1270                 return TLB_INVALID_MASK;
1271             }
1272 
1273             /* TLB resize via tlb_fill may have moved the entry.  */
1274             entry = tlb_entry(env, mmu_idx, addr);
1275         }
1276         tlb_addr = tlb_read_ofs(entry, elt_ofs);
1277     }
1278     flags = tlb_addr & TLB_FLAGS_MASK;
1279 
1280     /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
1281     if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1282         *phost = NULL;
1283         return TLB_MMIO;
1284     }
1285 
1286     /* Everything else is RAM. */
1287     *phost = (void *)((uintptr_t)addr + entry->addend);
1288     return flags;
1289 }
1290 
1291 int probe_access_flags(CPUArchState *env, target_ulong addr,
1292                        MMUAccessType access_type, int mmu_idx,
1293                        bool nonfault, void **phost, uintptr_t retaddr)
1294 {
1295     int flags;
1296 
1297     flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
1298                                   nonfault, phost, retaddr);
1299 
1300     /* Handle clean RAM pages.  */
1301     if (unlikely(flags & TLB_NOTDIRTY)) {
1302         uintptr_t index = tlb_index(env, mmu_idx, addr);
1303         CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1304 
1305         notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1306         flags &= ~TLB_NOTDIRTY;
1307     }
1308 
1309     return flags;
1310 }
1311 
1312 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1313                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1314 {
1315     void *host;
1316     int flags;
1317 
1318     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1319 
1320     flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
1321                                   false, &host, retaddr);
1322 
1323     /* Per the interface, size == 0 merely faults the access. */
1324     if (size == 0) {
1325         return NULL;
1326     }
1327 
1328     if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
1329         uintptr_t index = tlb_index(env, mmu_idx, addr);
1330         CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1331 
1332         /* Handle watchpoints.  */
1333         if (flags & TLB_WATCHPOINT) {
1334             int wp_access = (access_type == MMU_DATA_STORE
1335                              ? BP_MEM_WRITE : BP_MEM_READ);
1336             cpu_check_watchpoint(env_cpu(env), addr, size,
1337                                  iotlbentry->attrs, wp_access, retaddr);
1338         }
1339 
1340         /* Handle clean RAM pages.  */
1341         if (flags & TLB_NOTDIRTY) {
1342             notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1343         }
1344     }
1345 
1346     return host;
1347 }
1348 
1349 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1350                         MMUAccessType access_type, int mmu_idx)
1351 {
1352     void *host;
1353     int flags;
1354 
1355     flags = probe_access_internal(env, addr, 0, access_type,
1356                                   mmu_idx, true, &host, 0);
1357 
1358     /* No combination of flags are expected by the caller. */
1359     return flags ? NULL : host;
1360 }
1361 
1362 #ifdef CONFIG_PLUGIN
1363 /*
1364  * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1365  * This should be a hot path as we will have just looked this path up
1366  * in the softmmu lookup code (or helper). We don't handle re-fills or
1367  * checking the victim table. This is purely informational.
1368  *
1369  * This should never fail as the memory access being instrumented
1370  * should have just filled the TLB.
1371  */
1372 
1373 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
1374                        bool is_store, struct qemu_plugin_hwaddr *data)
1375 {
1376     CPUArchState *env = cpu->env_ptr;
1377     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1378     uintptr_t index = tlb_index(env, mmu_idx, addr);
1379     target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
1380 
1381     if (likely(tlb_hit(tlb_addr, addr))) {
1382         /* We must have an iotlb entry for MMIO */
1383         if (tlb_addr & TLB_MMIO) {
1384             CPUIOTLBEntry *iotlbentry;
1385             iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1386             data->is_io = true;
1387             data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1388             data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1389         } else {
1390             data->is_io = false;
1391             data->v.ram.hostaddr = addr + tlbe->addend;
1392         }
1393         return true;
1394     }
1395     return false;
1396 }
1397 
1398 #endif
1399 
1400 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
1401  * operations, or io operations to proceed.  Return the host address.  */
1402 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1403                                TCGMemOpIdx oi, uintptr_t retaddr)
1404 {
1405     size_t mmu_idx = get_mmuidx(oi);
1406     uintptr_t index = tlb_index(env, mmu_idx, addr);
1407     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1408     target_ulong tlb_addr = tlb_addr_write(tlbe);
1409     MemOp mop = get_memop(oi);
1410     int a_bits = get_alignment_bits(mop);
1411     int s_bits = mop & MO_SIZE;
1412     void *hostaddr;
1413 
1414     /* Adjust the given return address.  */
1415     retaddr -= GETPC_ADJ;
1416 
1417     /* Enforce guest required alignment.  */
1418     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1419         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1420         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1421                              mmu_idx, retaddr);
1422     }
1423 
1424     /* Enforce qemu required alignment.  */
1425     if (unlikely(addr & ((1 << s_bits) - 1))) {
1426         /* We get here if guest alignment was not requested,
1427            or was not enforced by cpu_unaligned_access above.
1428            We might widen the access and emulate, but for now
1429            mark an exception and exit the cpu loop.  */
1430         goto stop_the_world;
1431     }
1432 
1433     /* Check TLB entry and enforce page permissions.  */
1434     if (!tlb_hit(tlb_addr, addr)) {
1435         if (!VICTIM_TLB_HIT(addr_write, addr)) {
1436             tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1437                      mmu_idx, retaddr);
1438             index = tlb_index(env, mmu_idx, addr);
1439             tlbe = tlb_entry(env, mmu_idx, addr);
1440         }
1441         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1442     }
1443 
1444     /* Notice an IO access or a needs-MMU-lookup access */
1445     if (unlikely(tlb_addr & TLB_MMIO)) {
1446         /* There's really nothing that can be done to
1447            support this apart from stop-the-world.  */
1448         goto stop_the_world;
1449     }
1450 
1451     /* Let the guest notice RMW on a write-only page.  */
1452     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1453         tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1454                  mmu_idx, retaddr);
1455         /* Since we don't support reads and writes to different addresses,
1456            and we do have the proper page loaded for write, this shouldn't
1457            ever return.  But just in case, handle via stop-the-world.  */
1458         goto stop_the_world;
1459     }
1460 
1461     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1462 
1463     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1464         notdirty_write(env_cpu(env), addr, 1 << s_bits,
1465                        &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1466     }
1467 
1468     return hostaddr;
1469 
1470  stop_the_world:
1471     cpu_loop_exit_atomic(env_cpu(env), retaddr);
1472 }
1473 
1474 /*
1475  * Load Helpers
1476  *
1477  * We support two different access types. SOFTMMU_CODE_ACCESS is
1478  * specifically for reading instructions from system memory. It is
1479  * called by the translation loop and in some helpers where the code
1480  * is disassembled. It shouldn't be called directly by guest code.
1481  */
1482 
1483 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1484                                 TCGMemOpIdx oi, uintptr_t retaddr);
1485 
1486 static inline uint64_t QEMU_ALWAYS_INLINE
1487 load_memop(const void *haddr, MemOp op)
1488 {
1489     switch (op) {
1490     case MO_UB:
1491         return ldub_p(haddr);
1492     case MO_BEUW:
1493         return lduw_be_p(haddr);
1494     case MO_LEUW:
1495         return lduw_le_p(haddr);
1496     case MO_BEUL:
1497         return (uint32_t)ldl_be_p(haddr);
1498     case MO_LEUL:
1499         return (uint32_t)ldl_le_p(haddr);
1500     case MO_BEQ:
1501         return ldq_be_p(haddr);
1502     case MO_LEQ:
1503         return ldq_le_p(haddr);
1504     default:
1505         qemu_build_not_reached();
1506     }
1507 }
1508 
1509 static inline uint64_t QEMU_ALWAYS_INLINE
1510 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1511             uintptr_t retaddr, MemOp op, bool code_read,
1512             FullLoadHelper *full_load)
1513 {
1514     uintptr_t mmu_idx = get_mmuidx(oi);
1515     uintptr_t index = tlb_index(env, mmu_idx, addr);
1516     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1517     target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1518     const size_t tlb_off = code_read ?
1519         offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1520     const MMUAccessType access_type =
1521         code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1522     unsigned a_bits = get_alignment_bits(get_memop(oi));
1523     void *haddr;
1524     uint64_t res;
1525     size_t size = memop_size(op);
1526 
1527     /* Handle CPU specific unaligned behaviour */
1528     if (addr & ((1 << a_bits) - 1)) {
1529         cpu_unaligned_access(env_cpu(env), addr, access_type,
1530                              mmu_idx, retaddr);
1531     }
1532 
1533     /* If the TLB entry is for a different page, reload and try again.  */
1534     if (!tlb_hit(tlb_addr, addr)) {
1535         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1536                             addr & TARGET_PAGE_MASK)) {
1537             tlb_fill(env_cpu(env), addr, size,
1538                      access_type, mmu_idx, retaddr);
1539             index = tlb_index(env, mmu_idx, addr);
1540             entry = tlb_entry(env, mmu_idx, addr);
1541         }
1542         tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1543         tlb_addr &= ~TLB_INVALID_MASK;
1544     }
1545 
1546     /* Handle anything that isn't just a straight memory access.  */
1547     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1548         CPUIOTLBEntry *iotlbentry;
1549         bool need_swap;
1550 
1551         /* For anything that is unaligned, recurse through full_load.  */
1552         if ((addr & (size - 1)) != 0) {
1553             goto do_unaligned_access;
1554         }
1555 
1556         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1557 
1558         /* Handle watchpoints.  */
1559         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1560             /* On watchpoint hit, this will longjmp out.  */
1561             cpu_check_watchpoint(env_cpu(env), addr, size,
1562                                  iotlbentry->attrs, BP_MEM_READ, retaddr);
1563         }
1564 
1565         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1566 
1567         /* Handle I/O access.  */
1568         if (likely(tlb_addr & TLB_MMIO)) {
1569             return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1570                             access_type, op ^ (need_swap * MO_BSWAP));
1571         }
1572 
1573         haddr = (void *)((uintptr_t)addr + entry->addend);
1574 
1575         /*
1576          * Keep these two load_memop separate to ensure that the compiler
1577          * is able to fold the entire function to a single instruction.
1578          * There is a build-time assert inside to remind you of this.  ;-)
1579          */
1580         if (unlikely(need_swap)) {
1581             return load_memop(haddr, op ^ MO_BSWAP);
1582         }
1583         return load_memop(haddr, op);
1584     }
1585 
1586     /* Handle slow unaligned access (it spans two pages or IO).  */
1587     if (size > 1
1588         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1589                     >= TARGET_PAGE_SIZE)) {
1590         target_ulong addr1, addr2;
1591         uint64_t r1, r2;
1592         unsigned shift;
1593     do_unaligned_access:
1594         addr1 = addr & ~((target_ulong)size - 1);
1595         addr2 = addr1 + size;
1596         r1 = full_load(env, addr1, oi, retaddr);
1597         r2 = full_load(env, addr2, oi, retaddr);
1598         shift = (addr & (size - 1)) * 8;
1599 
1600         if (memop_big_endian(op)) {
1601             /* Big-endian combine.  */
1602             res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1603         } else {
1604             /* Little-endian combine.  */
1605             res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1606         }
1607         return res & MAKE_64BIT_MASK(0, size * 8);
1608     }
1609 
1610     haddr = (void *)((uintptr_t)addr + entry->addend);
1611     return load_memop(haddr, op);
1612 }
1613 
1614 /*
1615  * For the benefit of TCG generated code, we want to avoid the
1616  * complication of ABI-specific return type promotion and always
1617  * return a value extended to the register size of the host. This is
1618  * tcg_target_long, except in the case of a 32-bit host and 64-bit
1619  * data, and for that we always have uint64_t.
1620  *
1621  * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1622  */
1623 
1624 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1625                               TCGMemOpIdx oi, uintptr_t retaddr)
1626 {
1627     return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1628 }
1629 
1630 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1631                                      TCGMemOpIdx oi, uintptr_t retaddr)
1632 {
1633     return full_ldub_mmu(env, addr, oi, retaddr);
1634 }
1635 
1636 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1637                                  TCGMemOpIdx oi, uintptr_t retaddr)
1638 {
1639     return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1640                        full_le_lduw_mmu);
1641 }
1642 
1643 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1644                                     TCGMemOpIdx oi, uintptr_t retaddr)
1645 {
1646     return full_le_lduw_mmu(env, addr, oi, retaddr);
1647 }
1648 
1649 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1650                                  TCGMemOpIdx oi, uintptr_t retaddr)
1651 {
1652     return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1653                        full_be_lduw_mmu);
1654 }
1655 
1656 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1657                                     TCGMemOpIdx oi, uintptr_t retaddr)
1658 {
1659     return full_be_lduw_mmu(env, addr, oi, retaddr);
1660 }
1661 
1662 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1663                                  TCGMemOpIdx oi, uintptr_t retaddr)
1664 {
1665     return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1666                        full_le_ldul_mmu);
1667 }
1668 
1669 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1670                                     TCGMemOpIdx oi, uintptr_t retaddr)
1671 {
1672     return full_le_ldul_mmu(env, addr, oi, retaddr);
1673 }
1674 
1675 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1676                                  TCGMemOpIdx oi, uintptr_t retaddr)
1677 {
1678     return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1679                        full_be_ldul_mmu);
1680 }
1681 
1682 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1683                                     TCGMemOpIdx oi, uintptr_t retaddr)
1684 {
1685     return full_be_ldul_mmu(env, addr, oi, retaddr);
1686 }
1687 
1688 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1689                            TCGMemOpIdx oi, uintptr_t retaddr)
1690 {
1691     return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1692                        helper_le_ldq_mmu);
1693 }
1694 
1695 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1696                            TCGMemOpIdx oi, uintptr_t retaddr)
1697 {
1698     return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1699                        helper_be_ldq_mmu);
1700 }
1701 
1702 /*
1703  * Provide signed versions of the load routines as well.  We can of course
1704  * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1705  */
1706 
1707 
1708 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1709                                      TCGMemOpIdx oi, uintptr_t retaddr)
1710 {
1711     return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1712 }
1713 
1714 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1715                                     TCGMemOpIdx oi, uintptr_t retaddr)
1716 {
1717     return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1718 }
1719 
1720 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1721                                     TCGMemOpIdx oi, uintptr_t retaddr)
1722 {
1723     return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1724 }
1725 
1726 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1727                                     TCGMemOpIdx oi, uintptr_t retaddr)
1728 {
1729     return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1730 }
1731 
1732 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1733                                     TCGMemOpIdx oi, uintptr_t retaddr)
1734 {
1735     return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1736 }
1737 
1738 /*
1739  * Load helpers for cpu_ldst.h.
1740  */
1741 
1742 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
1743                                        int mmu_idx, uintptr_t retaddr,
1744                                        MemOp op, FullLoadHelper *full_load)
1745 {
1746     uint16_t meminfo;
1747     TCGMemOpIdx oi;
1748     uint64_t ret;
1749 
1750     meminfo = trace_mem_get_info(op, mmu_idx, false);
1751     trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
1752 
1753     op &= ~MO_SIGN;
1754     oi = make_memop_idx(op, mmu_idx);
1755     ret = full_load(env, addr, oi, retaddr);
1756 
1757     qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
1758 
1759     return ret;
1760 }
1761 
1762 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1763                             int mmu_idx, uintptr_t ra)
1764 {
1765     return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
1766 }
1767 
1768 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1769                        int mmu_idx, uintptr_t ra)
1770 {
1771     return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
1772                                    full_ldub_mmu);
1773 }
1774 
1775 uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1776                                int mmu_idx, uintptr_t ra)
1777 {
1778     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu);
1779 }
1780 
1781 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1782                           int mmu_idx, uintptr_t ra)
1783 {
1784     return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW,
1785                                     full_be_lduw_mmu);
1786 }
1787 
1788 uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1789                               int mmu_idx, uintptr_t ra)
1790 {
1791     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu);
1792 }
1793 
1794 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1795                               int mmu_idx, uintptr_t ra)
1796 {
1797     return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu);
1798 }
1799 
1800 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1801                                int mmu_idx, uintptr_t ra)
1802 {
1803     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu);
1804 }
1805 
1806 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1807                           int mmu_idx, uintptr_t ra)
1808 {
1809     return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW,
1810                                     full_le_lduw_mmu);
1811 }
1812 
1813 uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1814                               int mmu_idx, uintptr_t ra)
1815 {
1816     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu);
1817 }
1818 
1819 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1820                               int mmu_idx, uintptr_t ra)
1821 {
1822     return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu);
1823 }
1824 
1825 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
1826                           uintptr_t retaddr)
1827 {
1828     return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1829 }
1830 
1831 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1832 {
1833     return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1834 }
1835 
1836 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr,
1837                              uintptr_t retaddr)
1838 {
1839     return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1840 }
1841 
1842 int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1843 {
1844     return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1845 }
1846 
1847 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr,
1848                             uintptr_t retaddr)
1849 {
1850     return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1851 }
1852 
1853 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr,
1854                             uintptr_t retaddr)
1855 {
1856     return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1857 }
1858 
1859 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr,
1860                              uintptr_t retaddr)
1861 {
1862     return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1863 }
1864 
1865 int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1866 {
1867     return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1868 }
1869 
1870 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr,
1871                             uintptr_t retaddr)
1872 {
1873     return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1874 }
1875 
1876 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr,
1877                             uintptr_t retaddr)
1878 {
1879     return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1880 }
1881 
1882 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
1883 {
1884     return cpu_ldub_data_ra(env, ptr, 0);
1885 }
1886 
1887 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
1888 {
1889     return cpu_ldsb_data_ra(env, ptr, 0);
1890 }
1891 
1892 uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr)
1893 {
1894     return cpu_lduw_be_data_ra(env, ptr, 0);
1895 }
1896 
1897 int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr)
1898 {
1899     return cpu_ldsw_be_data_ra(env, ptr, 0);
1900 }
1901 
1902 uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr)
1903 {
1904     return cpu_ldl_be_data_ra(env, ptr, 0);
1905 }
1906 
1907 uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr)
1908 {
1909     return cpu_ldq_be_data_ra(env, ptr, 0);
1910 }
1911 
1912 uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr)
1913 {
1914     return cpu_lduw_le_data_ra(env, ptr, 0);
1915 }
1916 
1917 int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr)
1918 {
1919     return cpu_ldsw_le_data_ra(env, ptr, 0);
1920 }
1921 
1922 uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr)
1923 {
1924     return cpu_ldl_le_data_ra(env, ptr, 0);
1925 }
1926 
1927 uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr)
1928 {
1929     return cpu_ldq_le_data_ra(env, ptr, 0);
1930 }
1931 
1932 /*
1933  * Store Helpers
1934  */
1935 
1936 static inline void QEMU_ALWAYS_INLINE
1937 store_memop(void *haddr, uint64_t val, MemOp op)
1938 {
1939     switch (op) {
1940     case MO_UB:
1941         stb_p(haddr, val);
1942         break;
1943     case MO_BEUW:
1944         stw_be_p(haddr, val);
1945         break;
1946     case MO_LEUW:
1947         stw_le_p(haddr, val);
1948         break;
1949     case MO_BEUL:
1950         stl_be_p(haddr, val);
1951         break;
1952     case MO_LEUL:
1953         stl_le_p(haddr, val);
1954         break;
1955     case MO_BEQ:
1956         stq_be_p(haddr, val);
1957         break;
1958     case MO_LEQ:
1959         stq_le_p(haddr, val);
1960         break;
1961     default:
1962         qemu_build_not_reached();
1963     }
1964 }
1965 
1966 static inline void QEMU_ALWAYS_INLINE
1967 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
1968              TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
1969 {
1970     uintptr_t mmu_idx = get_mmuidx(oi);
1971     uintptr_t index = tlb_index(env, mmu_idx, addr);
1972     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1973     target_ulong tlb_addr = tlb_addr_write(entry);
1974     const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
1975     unsigned a_bits = get_alignment_bits(get_memop(oi));
1976     void *haddr;
1977     size_t size = memop_size(op);
1978 
1979     /* Handle CPU specific unaligned behaviour */
1980     if (addr & ((1 << a_bits) - 1)) {
1981         cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1982                              mmu_idx, retaddr);
1983     }
1984 
1985     /* If the TLB entry is for a different page, reload and try again.  */
1986     if (!tlb_hit(tlb_addr, addr)) {
1987         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1988             addr & TARGET_PAGE_MASK)) {
1989             tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
1990                      mmu_idx, retaddr);
1991             index = tlb_index(env, mmu_idx, addr);
1992             entry = tlb_entry(env, mmu_idx, addr);
1993         }
1994         tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
1995     }
1996 
1997     /* Handle anything that isn't just a straight memory access.  */
1998     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1999         CPUIOTLBEntry *iotlbentry;
2000         bool need_swap;
2001 
2002         /* For anything that is unaligned, recurse through byte stores.  */
2003         if ((addr & (size - 1)) != 0) {
2004             goto do_unaligned_access;
2005         }
2006 
2007         iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
2008 
2009         /* Handle watchpoints.  */
2010         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2011             /* On watchpoint hit, this will longjmp out.  */
2012             cpu_check_watchpoint(env_cpu(env), addr, size,
2013                                  iotlbentry->attrs, BP_MEM_WRITE, retaddr);
2014         }
2015 
2016         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
2017 
2018         /* Handle I/O access.  */
2019         if (tlb_addr & TLB_MMIO) {
2020             io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
2021                       op ^ (need_swap * MO_BSWAP));
2022             return;
2023         }
2024 
2025         /* Ignore writes to ROM.  */
2026         if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
2027             return;
2028         }
2029 
2030         /* Handle clean RAM pages.  */
2031         if (tlb_addr & TLB_NOTDIRTY) {
2032             notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
2033         }
2034 
2035         haddr = (void *)((uintptr_t)addr + entry->addend);
2036 
2037         /*
2038          * Keep these two store_memop separate to ensure that the compiler
2039          * is able to fold the entire function to a single instruction.
2040          * There is a build-time assert inside to remind you of this.  ;-)
2041          */
2042         if (unlikely(need_swap)) {
2043             store_memop(haddr, val, op ^ MO_BSWAP);
2044         } else {
2045             store_memop(haddr, val, op);
2046         }
2047         return;
2048     }
2049 
2050     /* Handle slow unaligned access (it spans two pages or IO).  */
2051     if (size > 1
2052         && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
2053                      >= TARGET_PAGE_SIZE)) {
2054         int i;
2055         uintptr_t index2;
2056         CPUTLBEntry *entry2;
2057         target_ulong page2, tlb_addr2;
2058         size_t size2;
2059 
2060     do_unaligned_access:
2061         /*
2062          * Ensure the second page is in the TLB.  Note that the first page
2063          * is already guaranteed to be filled, and that the second page
2064          * cannot evict the first.
2065          */
2066         page2 = (addr + size) & TARGET_PAGE_MASK;
2067         size2 = (addr + size) & ~TARGET_PAGE_MASK;
2068         index2 = tlb_index(env, mmu_idx, page2);
2069         entry2 = tlb_entry(env, mmu_idx, page2);
2070         tlb_addr2 = tlb_addr_write(entry2);
2071         if (!tlb_hit_page(tlb_addr2, page2)) {
2072             if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
2073                 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
2074                          mmu_idx, retaddr);
2075                 index2 = tlb_index(env, mmu_idx, page2);
2076                 entry2 = tlb_entry(env, mmu_idx, page2);
2077             }
2078             tlb_addr2 = tlb_addr_write(entry2);
2079         }
2080 
2081         /*
2082          * Handle watchpoints.  Since this may trap, all checks
2083          * must happen before any store.
2084          */
2085         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2086             cpu_check_watchpoint(env_cpu(env), addr, size - size2,
2087                                  env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
2088                                  BP_MEM_WRITE, retaddr);
2089         }
2090         if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
2091             cpu_check_watchpoint(env_cpu(env), page2, size2,
2092                                  env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
2093                                  BP_MEM_WRITE, retaddr);
2094         }
2095 
2096         /*
2097          * XXX: not efficient, but simple.
2098          * This loop must go in the forward direction to avoid issues
2099          * with self-modifying code in Windows 64-bit.
2100          */
2101         for (i = 0; i < size; ++i) {
2102             uint8_t val8;
2103             if (memop_big_endian(op)) {
2104                 /* Big-endian extract.  */
2105                 val8 = val >> (((size - 1) * 8) - (i * 8));
2106             } else {
2107                 /* Little-endian extract.  */
2108                 val8 = val >> (i * 8);
2109             }
2110             helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2111         }
2112         return;
2113     }
2114 
2115     haddr = (void *)((uintptr_t)addr + entry->addend);
2116     store_memop(haddr, val, op);
2117 }
2118 
2119 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
2120                         TCGMemOpIdx oi, uintptr_t retaddr)
2121 {
2122     store_helper(env, addr, val, oi, retaddr, MO_UB);
2123 }
2124 
2125 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2126                        TCGMemOpIdx oi, uintptr_t retaddr)
2127 {
2128     store_helper(env, addr, val, oi, retaddr, MO_LEUW);
2129 }
2130 
2131 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2132                        TCGMemOpIdx oi, uintptr_t retaddr)
2133 {
2134     store_helper(env, addr, val, oi, retaddr, MO_BEUW);
2135 }
2136 
2137 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2138                        TCGMemOpIdx oi, uintptr_t retaddr)
2139 {
2140     store_helper(env, addr, val, oi, retaddr, MO_LEUL);
2141 }
2142 
2143 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2144                        TCGMemOpIdx oi, uintptr_t retaddr)
2145 {
2146     store_helper(env, addr, val, oi, retaddr, MO_BEUL);
2147 }
2148 
2149 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2150                        TCGMemOpIdx oi, uintptr_t retaddr)
2151 {
2152     store_helper(env, addr, val, oi, retaddr, MO_LEQ);
2153 }
2154 
2155 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2156                        TCGMemOpIdx oi, uintptr_t retaddr)
2157 {
2158     store_helper(env, addr, val, oi, retaddr, MO_BEQ);
2159 }
2160 
2161 /*
2162  * Store Helpers for cpu_ldst.h
2163  */
2164 
2165 static inline void QEMU_ALWAYS_INLINE
2166 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2167                  int mmu_idx, uintptr_t retaddr, MemOp op)
2168 {
2169     TCGMemOpIdx oi;
2170     uint16_t meminfo;
2171 
2172     meminfo = trace_mem_get_info(op, mmu_idx, true);
2173     trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2174 
2175     oi = make_memop_idx(op, mmu_idx);
2176     store_helper(env, addr, val, oi, retaddr, op);
2177 
2178     qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2179 }
2180 
2181 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2182                        int mmu_idx, uintptr_t retaddr)
2183 {
2184     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
2185 }
2186 
2187 void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2188                           int mmu_idx, uintptr_t retaddr)
2189 {
2190     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW);
2191 }
2192 
2193 void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2194                           int mmu_idx, uintptr_t retaddr)
2195 {
2196     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL);
2197 }
2198 
2199 void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2200                           int mmu_idx, uintptr_t retaddr)
2201 {
2202     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ);
2203 }
2204 
2205 void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2206                           int mmu_idx, uintptr_t retaddr)
2207 {
2208     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW);
2209 }
2210 
2211 void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2212                           int mmu_idx, uintptr_t retaddr)
2213 {
2214     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL);
2215 }
2216 
2217 void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2218                           int mmu_idx, uintptr_t retaddr)
2219 {
2220     cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ);
2221 }
2222 
2223 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
2224                      uint32_t val, uintptr_t retaddr)
2225 {
2226     cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2227 }
2228 
2229 void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr,
2230                         uint32_t val, uintptr_t retaddr)
2231 {
2232     cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2233 }
2234 
2235 void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr,
2236                         uint32_t val, uintptr_t retaddr)
2237 {
2238     cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2239 }
2240 
2241 void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr,
2242                         uint64_t val, uintptr_t retaddr)
2243 {
2244     cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2245 }
2246 
2247 void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr,
2248                         uint32_t val, uintptr_t retaddr)
2249 {
2250     cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2251 }
2252 
2253 void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr,
2254                         uint32_t val, uintptr_t retaddr)
2255 {
2256     cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2257 }
2258 
2259 void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr,
2260                         uint64_t val, uintptr_t retaddr)
2261 {
2262     cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2263 }
2264 
2265 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2266 {
2267     cpu_stb_data_ra(env, ptr, val, 0);
2268 }
2269 
2270 void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2271 {
2272     cpu_stw_be_data_ra(env, ptr, val, 0);
2273 }
2274 
2275 void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2276 {
2277     cpu_stl_be_data_ra(env, ptr, val, 0);
2278 }
2279 
2280 void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2281 {
2282     cpu_stq_be_data_ra(env, ptr, val, 0);
2283 }
2284 
2285 void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2286 {
2287     cpu_stw_le_data_ra(env, ptr, val, 0);
2288 }
2289 
2290 void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2291 {
2292     cpu_stl_le_data_ra(env, ptr, val, 0);
2293 }
2294 
2295 void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2296 {
2297     cpu_stq_le_data_ra(env, ptr, val, 0);
2298 }
2299 
2300 /* First set of helpers allows passing in of OI and RETADDR.  This makes
2301    them callable from other helpers.  */
2302 
2303 #define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
2304 #define ATOMIC_NAME(X) \
2305     HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
2306 #define ATOMIC_MMU_DECLS
2307 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
2308 #define ATOMIC_MMU_CLEANUP
2309 #define ATOMIC_MMU_IDX   get_mmuidx(oi)
2310 
2311 #include "atomic_common.inc.c"
2312 
2313 #define DATA_SIZE 1
2314 #include "atomic_template.h"
2315 
2316 #define DATA_SIZE 2
2317 #include "atomic_template.h"
2318 
2319 #define DATA_SIZE 4
2320 #include "atomic_template.h"
2321 
2322 #ifdef CONFIG_ATOMIC64
2323 #define DATA_SIZE 8
2324 #include "atomic_template.h"
2325 #endif
2326 
2327 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
2328 #define DATA_SIZE 16
2329 #include "atomic_template.h"
2330 #endif
2331 
2332 /* Second set of helpers are directly callable from TCG as helpers.  */
2333 
2334 #undef EXTRA_ARGS
2335 #undef ATOMIC_NAME
2336 #undef ATOMIC_MMU_LOOKUP
2337 #define EXTRA_ARGS         , TCGMemOpIdx oi
2338 #define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
2339 #define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())
2340 
2341 #define DATA_SIZE 1
2342 #include "atomic_template.h"
2343 
2344 #define DATA_SIZE 2
2345 #include "atomic_template.h"
2346 
2347 #define DATA_SIZE 4
2348 #include "atomic_template.h"
2349 
2350 #ifdef CONFIG_ATOMIC64
2351 #define DATA_SIZE 8
2352 #include "atomic_template.h"
2353 #endif
2354 #undef ATOMIC_MMU_IDX
2355 
2356 /* Code access functions.  */
2357 
2358 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
2359                                TCGMemOpIdx oi, uintptr_t retaddr)
2360 {
2361     return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
2362 }
2363 
2364 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
2365 {
2366     TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
2367     return full_ldub_code(env, addr, oi, 0);
2368 }
2369 
2370 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
2371                                TCGMemOpIdx oi, uintptr_t retaddr)
2372 {
2373     return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
2374 }
2375 
2376 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
2377 {
2378     TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
2379     return full_lduw_code(env, addr, oi, 0);
2380 }
2381 
2382 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
2383                               TCGMemOpIdx oi, uintptr_t retaddr)
2384 {
2385     return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
2386 }
2387 
2388 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
2389 {
2390     TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
2391     return full_ldl_code(env, addr, oi, 0);
2392 }
2393 
2394 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
2395                               TCGMemOpIdx oi, uintptr_t retaddr)
2396 {
2397     return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
2398 }
2399 
2400 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
2401 {
2402     TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
2403     return full_ldq_code(env, addr, oi, 0);
2404 }
2405