xref: /qemu/linux-user/mmap.c (revision 99982beb)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26 
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29 
30 void mmap_lock(void)
31 {
32     if (mmap_lock_count++ == 0) {
33         pthread_mutex_lock(&mmap_mutex);
34     }
35 }
36 
37 void mmap_unlock(void)
38 {
39     if (--mmap_lock_count == 0) {
40         pthread_mutex_unlock(&mmap_mutex);
41     }
42 }
43 
44 bool have_mmap_lock(void)
45 {
46     return mmap_lock_count > 0 ? true : false;
47 }
48 
49 /* Grab lock to make sure things are in a consistent state after fork().  */
50 void mmap_fork_start(void)
51 {
52     if (mmap_lock_count)
53         abort();
54     pthread_mutex_lock(&mmap_mutex);
55 }
56 
57 void mmap_fork_end(int child)
58 {
59     if (child) {
60         pthread_mutex_init(&mmap_mutex, NULL);
61     } else {
62         pthread_mutex_unlock(&mmap_mutex);
63     }
64 }
65 
66 /*
67  * Validate target prot bitmask.
68  * Return the prot bitmask for the host in *HOST_PROT.
69  * Return 0 if the target prot bitmask is invalid, otherwise
70  * the internal qemu page_flags (which will include PAGE_VALID).
71  */
72 static int validate_prot_to_pageflags(int prot)
73 {
74     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
75     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
76 
77 #ifdef TARGET_AARCH64
78     {
79         ARMCPU *cpu = ARM_CPU(thread_cpu);
80 
81         /*
82          * The PROT_BTI bit is only accepted if the cpu supports the feature.
83          * Since this is the unusual case, don't bother checking unless
84          * the bit has been requested.  If set and valid, record the bit
85          * within QEMU's page_flags.
86          */
87         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
88             valid |= TARGET_PROT_BTI;
89             page_flags |= PAGE_BTI;
90         }
91         /* Similarly for the PROT_MTE bit. */
92         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
93             valid |= TARGET_PROT_MTE;
94             page_flags |= PAGE_MTE;
95         }
96     }
97 #elif defined(TARGET_HPPA)
98     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
99 #endif
100 
101     return prot & ~valid ? 0 : page_flags;
102 }
103 
104 /*
105  * For the host, we need not pass anything except read/write/exec.
106  * While PROT_SEM is allowed by all hosts, it is also ignored, so
107  * don't bother transforming guest bit to host bit.  Any other
108  * target-specific prot bits will not be understood by the host
109  * and will need to be encoded into page_flags for qemu emulation.
110  *
111  * Pages that are executable by the guest will never be executed
112  * by the host, but the host will need to be able to read them.
113  */
114 static int target_to_host_prot(int prot)
115 {
116     return (prot & (PROT_READ | PROT_WRITE)) |
117            (prot & PROT_EXEC ? PROT_READ : 0);
118 }
119 
120 /* NOTE: all the constants are the HOST ones, but addresses are target. */
121 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
122 {
123     abi_ulong starts[3];
124     abi_ulong lens[3];
125     int prots[3];
126     abi_ulong host_start, host_last, last;
127     int prot1, ret, page_flags, nranges;
128 
129     trace_target_mprotect(start, len, target_prot);
130 
131     if ((start & ~TARGET_PAGE_MASK) != 0) {
132         return -TARGET_EINVAL;
133     }
134     page_flags = validate_prot_to_pageflags(target_prot);
135     if (!page_flags) {
136         return -TARGET_EINVAL;
137     }
138     if (len == 0) {
139         return 0;
140     }
141     len = TARGET_PAGE_ALIGN(len);
142     if (!guest_range_valid_untagged(start, len)) {
143         return -TARGET_ENOMEM;
144     }
145 
146     last = start + len - 1;
147     host_start = start & qemu_host_page_mask;
148     host_last = HOST_PAGE_ALIGN(last) - 1;
149     nranges = 0;
150 
151     mmap_lock();
152 
153     if (host_last - host_start < qemu_host_page_size) {
154         /* Single host page contains all guest pages: sum the prot. */
155         prot1 = target_prot;
156         for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
157             prot1 |= page_get_flags(a);
158         }
159         for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
160             prot1 |= page_get_flags(a + 1);
161         }
162         starts[nranges] = host_start;
163         lens[nranges] = qemu_host_page_size;
164         prots[nranges] = prot1;
165         nranges++;
166     } else {
167         if (host_start < start) {
168             /* Host page contains more than one guest page: sum the prot. */
169             prot1 = target_prot;
170             for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
171                 prot1 |= page_get_flags(a);
172             }
173             /* If the resulting sum differs, create a new range. */
174             if (prot1 != target_prot) {
175                 starts[nranges] = host_start;
176                 lens[nranges] = qemu_host_page_size;
177                 prots[nranges] = prot1;
178                 nranges++;
179                 host_start += qemu_host_page_size;
180             }
181         }
182 
183         if (last < host_last) {
184             /* Host page contains more than one guest page: sum the prot. */
185             prot1 = target_prot;
186             for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
187                 prot1 |= page_get_flags(a + 1);
188             }
189             /* If the resulting sum differs, create a new range. */
190             if (prot1 != target_prot) {
191                 host_last -= qemu_host_page_size;
192                 starts[nranges] = host_last + 1;
193                 lens[nranges] = qemu_host_page_size;
194                 prots[nranges] = prot1;
195                 nranges++;
196             }
197         }
198 
199         /* Create a range for the middle, if any remains. */
200         if (host_start < host_last) {
201             starts[nranges] = host_start;
202             lens[nranges] = host_last - host_start + 1;
203             prots[nranges] = target_prot;
204             nranges++;
205         }
206     }
207 
208     for (int i = 0; i < nranges; ++i) {
209         ret = mprotect(g2h_untagged(starts[i]), lens[i],
210                        target_to_host_prot(prots[i]));
211         if (ret != 0) {
212             goto error;
213         }
214     }
215 
216     page_set_flags(start, last, page_flags);
217     ret = 0;
218 
219  error:
220     mmap_unlock();
221     return ret;
222 }
223 
224 /* map an incomplete host page */
225 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
226                       int prot, int flags, int fd, off_t offset)
227 {
228     abi_ulong real_last;
229     void *host_start;
230     int prot_old, prot_new;
231     int host_prot_old, host_prot_new;
232 
233     if (!(flags & MAP_ANONYMOUS)
234         && (flags & MAP_TYPE) == MAP_SHARED
235         && (prot & PROT_WRITE)) {
236         /*
237          * msync() won't work with the partial page, so we return an
238          * error if write is possible while it is a shared mapping.
239          */
240         errno = EINVAL;
241         return false;
242     }
243 
244     real_last = real_start + qemu_host_page_size - 1;
245     host_start = g2h_untagged(real_start);
246 
247     /* Get the protection of the target pages outside the mapping. */
248     prot_old = 0;
249     for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
250         prot_old |= page_get_flags(a);
251     }
252     for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
253         prot_old |= page_get_flags(a);
254     }
255 
256     if (prot_old == 0) {
257         /*
258          * Since !(prot_old & PAGE_VALID), there were no guest pages
259          * outside of the fragment we need to map.  Allocate a new host
260          * page to cover, discarding whatever else may have been present.
261          */
262         void *p = mmap(host_start, qemu_host_page_size,
263                        target_to_host_prot(prot),
264                        flags | MAP_ANONYMOUS, -1, 0);
265         if (p == MAP_FAILED) {
266             return false;
267         }
268         prot_old = prot;
269     }
270     prot_new = prot | prot_old;
271 
272     host_prot_old = target_to_host_prot(prot_old);
273     host_prot_new = target_to_host_prot(prot_new);
274 
275     /* Adjust protection to be able to write. */
276     if (!(host_prot_old & PROT_WRITE)) {
277         host_prot_old |= PROT_WRITE;
278         mprotect(host_start, qemu_host_page_size, host_prot_old);
279     }
280 
281     /* Read or zero the new guest pages. */
282     if (flags & MAP_ANONYMOUS) {
283         memset(g2h_untagged(start), 0, last - start + 1);
284     } else {
285         if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
286             return false;
287         }
288     }
289 
290     /* Put final protection */
291     if (host_prot_new != host_prot_old) {
292         mprotect(host_start, qemu_host_page_size, host_prot_new);
293     }
294     return true;
295 }
296 
297 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
298 #ifdef TARGET_AARCH64
299 # define TASK_UNMAPPED_BASE  0x5500000000
300 #else
301 # define TASK_UNMAPPED_BASE  (1ul << 38)
302 #endif
303 #else
304 #ifdef TARGET_HPPA
305 # define TASK_UNMAPPED_BASE  0xfa000000
306 #else
307 # define TASK_UNMAPPED_BASE  0x40000000
308 #endif
309 #endif
310 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
311 
312 unsigned long last_brk;
313 
314 /*
315  * Subroutine of mmap_find_vma, used when we have pre-allocated
316  * a chunk of guest address space.
317  */
318 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
319                                         abi_ulong align)
320 {
321     abi_ulong addr, end_addr, incr = qemu_host_page_size;
322     int prot;
323     bool looped = false;
324 
325     if (size > reserved_va) {
326         return (abi_ulong)-1;
327     }
328 
329     /* Note that start and size have already been aligned by mmap_find_vma. */
330 
331     end_addr = start + size;
332     /*
333      * Start at the top of the address space, ignoring the last page.
334      * If reserved_va == UINT32_MAX, then end_addr wraps to 0,
335      * throwing the rest of the calculations off.
336      * TODO: rewrite using last_addr instead.
337      * TODO: use the interval tree instead of probing every page.
338      */
339     if (start > reserved_va - size) {
340         end_addr = ((reserved_va - size) & -align) + size;
341         looped = true;
342     }
343 
344     /* Search downward from END_ADDR, checking to see if a page is in use.  */
345     addr = end_addr;
346     while (1) {
347         addr -= incr;
348         if (addr > end_addr) {
349             if (looped) {
350                 /* Failure.  The entire address space has been searched.  */
351                 return (abi_ulong)-1;
352             }
353             /* Re-start at the top of the address space (see above). */
354             addr = end_addr = ((reserved_va - size) & -align) + size;
355             looped = true;
356         } else {
357             prot = page_get_flags(addr);
358             if (prot) {
359                 /* Page in use.  Restart below this page.  */
360                 addr = end_addr = ((addr - size) & -align) + size;
361             } else if (addr && addr + size == end_addr) {
362                 /* Success!  All pages between ADDR and END_ADDR are free.  */
363                 if (start == mmap_next_start) {
364                     mmap_next_start = addr;
365                 }
366                 return addr;
367             }
368         }
369     }
370 }
371 
372 /*
373  * Find and reserve a free memory area of size 'size'. The search
374  * starts at 'start'.
375  * It must be called with mmap_lock() held.
376  * Return -1 if error.
377  */
378 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
379 {
380     void *ptr, *prev;
381     abi_ulong addr;
382     int wrapped, repeat;
383 
384     align = MAX(align, qemu_host_page_size);
385 
386     /* If 'start' == 0, then a default start address is used. */
387     if (start == 0) {
388         start = mmap_next_start;
389     } else {
390         start &= qemu_host_page_mask;
391     }
392     start = ROUND_UP(start, align);
393 
394     size = HOST_PAGE_ALIGN(size);
395 
396     if (reserved_va) {
397         return mmap_find_vma_reserved(start, size, align);
398     }
399 
400     addr = start;
401     wrapped = repeat = 0;
402     prev = 0;
403 
404     for (;; prev = ptr) {
405         /*
406          * Reserve needed memory area to avoid a race.
407          * It should be discarded using:
408          *  - mmap() with MAP_FIXED flag
409          *  - mremap() with MREMAP_FIXED flag
410          *  - shmat() with SHM_REMAP flag
411          */
412         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
413                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
414 
415         /* ENOMEM, if host address space has no memory */
416         if (ptr == MAP_FAILED) {
417             return (abi_ulong)-1;
418         }
419 
420         /*
421          * Count the number of sequential returns of the same address.
422          * This is used to modify the search algorithm below.
423          */
424         repeat = (ptr == prev ? repeat + 1 : 0);
425 
426         if (h2g_valid(ptr + size - 1)) {
427             addr = h2g(ptr);
428 
429             if ((addr & (align - 1)) == 0) {
430                 /* Success.  */
431                 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
432                     mmap_next_start = addr + size;
433                 }
434                 return addr;
435             }
436 
437             /* The address is not properly aligned for the target.  */
438             switch (repeat) {
439             case 0:
440                 /*
441                  * Assume the result that the kernel gave us is the
442                  * first with enough free space, so start again at the
443                  * next higher target page.
444                  */
445                 addr = ROUND_UP(addr, align);
446                 break;
447             case 1:
448                 /*
449                  * Sometimes the kernel decides to perform the allocation
450                  * at the top end of memory instead.
451                  */
452                 addr &= -align;
453                 break;
454             case 2:
455                 /* Start over at low memory.  */
456                 addr = 0;
457                 break;
458             default:
459                 /* Fail.  This unaligned block must the last.  */
460                 addr = -1;
461                 break;
462             }
463         } else {
464             /*
465              * Since the result the kernel gave didn't fit, start
466              * again at low memory.  If any repetition, fail.
467              */
468             addr = (repeat ? -1 : 0);
469         }
470 
471         /* Unmap and try again.  */
472         munmap(ptr, size);
473 
474         /* ENOMEM if we checked the whole of the target address space.  */
475         if (addr == (abi_ulong)-1) {
476             return (abi_ulong)-1;
477         } else if (addr == 0) {
478             if (wrapped) {
479                 return (abi_ulong)-1;
480             }
481             wrapped = 1;
482             /*
483              * Don't actually use 0 when wrapping, instead indicate
484              * that we'd truly like an allocation in low memory.
485              */
486             addr = (mmap_min_addr > TARGET_PAGE_SIZE
487                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
488                      : TARGET_PAGE_SIZE);
489         } else if (wrapped && addr >= start) {
490             return (abi_ulong)-1;
491         }
492     }
493 }
494 
495 /* NOTE: all the constants are the HOST ones */
496 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
497                      int flags, int fd, off_t offset)
498 {
499     abi_ulong ret, end, real_start, real_end, retaddr, host_len,
500               passthrough_start = -1, passthrough_end = -1;
501     int page_flags;
502     off_t host_offset;
503 
504     mmap_lock();
505     trace_target_mmap(start, len, target_prot, flags, fd, offset);
506 
507     if (!len) {
508         errno = EINVAL;
509         goto fail;
510     }
511 
512     page_flags = validate_prot_to_pageflags(target_prot);
513     if (!page_flags) {
514         errno = EINVAL;
515         goto fail;
516     }
517 
518     /* Also check for overflows... */
519     len = TARGET_PAGE_ALIGN(len);
520     if (!len) {
521         errno = ENOMEM;
522         goto fail;
523     }
524 
525     if (offset & ~TARGET_PAGE_MASK) {
526         errno = EINVAL;
527         goto fail;
528     }
529 
530     /*
531      * If we're mapping shared memory, ensure we generate code for parallel
532      * execution and flush old translations.  This will work up to the level
533      * supported by the host -- anything that requires EXCP_ATOMIC will not
534      * be atomic with respect to an external process.
535      */
536     if (flags & MAP_SHARED) {
537         CPUState *cpu = thread_cpu;
538         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
539             cpu->tcg_cflags |= CF_PARALLEL;
540             tb_flush(cpu);
541         }
542     }
543 
544     real_start = start & qemu_host_page_mask;
545     host_offset = offset & qemu_host_page_mask;
546 
547     /*
548      * If the user is asking for the kernel to find a location, do that
549      * before we truncate the length for mapping files below.
550      */
551     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
552         host_len = len + offset - host_offset;
553         host_len = HOST_PAGE_ALIGN(host_len);
554         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
555         if (start == (abi_ulong)-1) {
556             errno = ENOMEM;
557             goto fail;
558         }
559     }
560 
561     /*
562      * When mapping files into a memory area larger than the file, accesses
563      * to pages beyond the file size will cause a SIGBUS.
564      *
565      * For example, if mmaping a file of 100 bytes on a host with 4K pages
566      * emulating a target with 8K pages, the target expects to be able to
567      * access the first 8K. But the host will trap us on any access beyond
568      * 4K.
569      *
570      * When emulating a target with a larger page-size than the hosts, we
571      * may need to truncate file maps at EOF and add extra anonymous pages
572      * up to the targets page boundary.
573      */
574     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
575         !(flags & MAP_ANONYMOUS)) {
576         struct stat sb;
577 
578         if (fstat(fd, &sb) == -1) {
579             goto fail;
580         }
581 
582         /* Are we trying to create a map beyond EOF?.  */
583         if (offset + len > sb.st_size) {
584             /*
585              * If so, truncate the file map at eof aligned with
586              * the hosts real pagesize. Additional anonymous maps
587              * will be created beyond EOF.
588              */
589             len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
590         }
591     }
592 
593     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
594         uintptr_t host_start;
595         int host_prot;
596         void *p;
597 
598         host_len = len + offset - host_offset;
599         host_len = HOST_PAGE_ALIGN(host_len);
600         host_prot = target_to_host_prot(target_prot);
601 
602         /*
603          * Note: we prefer to control the mapping address. It is
604          * especially important if qemu_host_page_size >
605          * qemu_real_host_page_size.
606          */
607         p = mmap(g2h_untagged(start), host_len, host_prot,
608                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
609         if (p == MAP_FAILED) {
610             goto fail;
611         }
612         /* update start so that it points to the file position at 'offset' */
613         host_start = (uintptr_t)p;
614         if (!(flags & MAP_ANONYMOUS)) {
615             p = mmap(g2h_untagged(start), len, host_prot,
616                      flags | MAP_FIXED, fd, host_offset);
617             if (p == MAP_FAILED) {
618                 munmap(g2h_untagged(start), host_len);
619                 goto fail;
620             }
621             host_start += offset - host_offset;
622         }
623         start = h2g(host_start);
624         passthrough_start = start;
625         passthrough_end = start + len;
626     } else {
627         if (start & ~TARGET_PAGE_MASK) {
628             errno = EINVAL;
629             goto fail;
630         }
631         end = start + len;
632         real_end = HOST_PAGE_ALIGN(end);
633 
634         /*
635          * Test if requested memory area fits target address space
636          * It can fail only on 64-bit host with 32-bit target.
637          * On any other target/host host mmap() handles this error correctly.
638          */
639         if (end < start || !guest_range_valid_untagged(start, len)) {
640             errno = ENOMEM;
641             goto fail;
642         }
643 
644         /* Validate that the chosen range is empty. */
645         if ((flags & MAP_FIXED_NOREPLACE)
646             && !page_check_range_empty(start, end - 1)) {
647             errno = EEXIST;
648             goto fail;
649         }
650 
651         /*
652          * worst case: we cannot map the file because the offset is not
653          * aligned, so we read it
654          */
655         if (!(flags & MAP_ANONYMOUS) &&
656             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
657             /*
658              * msync() won't work here, so we return an error if write is
659              * possible while it is a shared mapping
660              */
661             if ((flags & MAP_TYPE) == MAP_SHARED
662                 && (target_prot & PROT_WRITE)) {
663                 errno = EINVAL;
664                 goto fail;
665             }
666             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
667                                   (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
668                                   | MAP_PRIVATE | MAP_ANONYMOUS,
669                                   -1, 0);
670             if (retaddr == -1) {
671                 goto fail;
672             }
673             if (pread(fd, g2h_untagged(start), len, offset) == -1) {
674                 goto fail;
675             }
676             if (!(target_prot & PROT_WRITE)) {
677                 ret = target_mprotect(start, len, target_prot);
678                 assert(ret == 0);
679             }
680             goto the_end;
681         }
682 
683         /* handle the start of the mapping */
684         if (start > real_start) {
685             if (real_end == real_start + qemu_host_page_size) {
686                 /* one single host page */
687                 if (!mmap_frag(real_start, start, end - 1,
688                                target_prot, flags, fd, offset)) {
689                     goto fail;
690                 }
691                 goto the_end1;
692             }
693             if (!mmap_frag(real_start, start,
694                            real_start + qemu_host_page_size - 1,
695                            target_prot, flags, fd, offset)) {
696                 goto fail;
697             }
698             real_start += qemu_host_page_size;
699         }
700         /* handle the end of the mapping */
701         if (end < real_end) {
702             if (!mmap_frag(real_end - qemu_host_page_size,
703                            real_end - qemu_host_page_size, end - 1,
704                            target_prot, flags, fd,
705                            offset + real_end - qemu_host_page_size - start)) {
706                 goto fail;
707             }
708             real_end -= qemu_host_page_size;
709         }
710 
711         /* map the middle (easier) */
712         if (real_start < real_end) {
713             void *p;
714             off_t offset1;
715 
716             if (flags & MAP_ANONYMOUS) {
717                 offset1 = 0;
718             } else {
719                 offset1 = offset + real_start - start;
720             }
721             p = mmap(g2h_untagged(real_start), real_end - real_start,
722                      target_to_host_prot(target_prot), flags, fd, offset1);
723             if (p == MAP_FAILED) {
724                 goto fail;
725             }
726             passthrough_start = real_start;
727             passthrough_end = real_end;
728         }
729     }
730  the_end1:
731     if (flags & MAP_ANONYMOUS) {
732         page_flags |= PAGE_ANON;
733     }
734     page_flags |= PAGE_RESET;
735     if (passthrough_start == passthrough_end) {
736         page_set_flags(start, start + len - 1, page_flags);
737     } else {
738         if (start < passthrough_start) {
739             page_set_flags(start, passthrough_start - 1, page_flags);
740         }
741         page_set_flags(passthrough_start, passthrough_end - 1,
742                        page_flags | PAGE_PASSTHROUGH);
743         if (passthrough_end < start + len) {
744             page_set_flags(passthrough_end, start + len - 1, page_flags);
745         }
746     }
747  the_end:
748     trace_target_mmap_complete(start);
749     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
750         FILE *f = qemu_log_trylock();
751         if (f) {
752             fprintf(f, "page layout changed following mmap\n");
753             page_dump(f);
754             qemu_log_unlock(f);
755         }
756     }
757     mmap_unlock();
758     return start;
759 fail:
760     mmap_unlock();
761     return -1;
762 }
763 
764 static void mmap_reserve(abi_ulong start, abi_ulong size)
765 {
766     abi_ulong real_start;
767     abi_ulong real_end;
768     abi_ulong addr;
769     abi_ulong end;
770     int prot;
771 
772     real_start = start & qemu_host_page_mask;
773     real_end = HOST_PAGE_ALIGN(start + size);
774     end = start + size;
775     if (start > real_start) {
776         /* handle host page containing start */
777         prot = 0;
778         for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
779             prot |= page_get_flags(addr);
780         }
781         if (real_end == real_start + qemu_host_page_size) {
782             for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
783                 prot |= page_get_flags(addr);
784             }
785             end = real_end;
786         }
787         if (prot != 0) {
788             real_start += qemu_host_page_size;
789         }
790     }
791     if (end < real_end) {
792         prot = 0;
793         for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
794             prot |= page_get_flags(addr);
795         }
796         if (prot != 0) {
797             real_end -= qemu_host_page_size;
798         }
799     }
800     if (real_start != real_end) {
801         mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
802                  MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
803                  -1, 0);
804     }
805 }
806 
807 int target_munmap(abi_ulong start, abi_ulong len)
808 {
809     abi_ulong end, real_start, real_end, addr;
810     int prot, ret;
811 
812     trace_target_munmap(start, len);
813 
814     if (start & ~TARGET_PAGE_MASK) {
815         return -TARGET_EINVAL;
816     }
817     len = TARGET_PAGE_ALIGN(len);
818     if (len == 0 || !guest_range_valid_untagged(start, len)) {
819         return -TARGET_EINVAL;
820     }
821 
822     mmap_lock();
823     end = start + len;
824     real_start = start & qemu_host_page_mask;
825     real_end = HOST_PAGE_ALIGN(end);
826 
827     if (start > real_start) {
828         /* handle host page containing start */
829         prot = 0;
830         for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
831             prot |= page_get_flags(addr);
832         }
833         if (real_end == real_start + qemu_host_page_size) {
834             for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
835                 prot |= page_get_flags(addr);
836             }
837             end = real_end;
838         }
839         if (prot != 0) {
840             real_start += qemu_host_page_size;
841         }
842     }
843     if (end < real_end) {
844         prot = 0;
845         for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
846             prot |= page_get_flags(addr);
847         }
848         if (prot != 0) {
849             real_end -= qemu_host_page_size;
850         }
851     }
852 
853     ret = 0;
854     /* unmap what we can */
855     if (real_start < real_end) {
856         if (reserved_va) {
857             mmap_reserve(real_start, real_end - real_start);
858         } else {
859             ret = munmap(g2h_untagged(real_start), real_end - real_start);
860         }
861     }
862 
863     if (ret == 0) {
864         page_set_flags(start, start + len - 1, 0);
865     }
866     mmap_unlock();
867     return ret;
868 }
869 
870 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
871                        abi_ulong new_size, unsigned long flags,
872                        abi_ulong new_addr)
873 {
874     int prot;
875     void *host_addr;
876 
877     if (!guest_range_valid_untagged(old_addr, old_size) ||
878         ((flags & MREMAP_FIXED) &&
879          !guest_range_valid_untagged(new_addr, new_size)) ||
880         ((flags & MREMAP_MAYMOVE) == 0 &&
881          !guest_range_valid_untagged(old_addr, new_size))) {
882         errno = ENOMEM;
883         return -1;
884     }
885 
886     mmap_lock();
887 
888     if (flags & MREMAP_FIXED) {
889         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
890                            flags, g2h_untagged(new_addr));
891 
892         if (reserved_va && host_addr != MAP_FAILED) {
893             /*
894              * If new and old addresses overlap then the above mremap will
895              * already have failed with EINVAL.
896              */
897             mmap_reserve(old_addr, old_size);
898         }
899     } else if (flags & MREMAP_MAYMOVE) {
900         abi_ulong mmap_start;
901 
902         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
903 
904         if (mmap_start == -1) {
905             errno = ENOMEM;
906             host_addr = MAP_FAILED;
907         } else {
908             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
909                                flags | MREMAP_FIXED,
910                                g2h_untagged(mmap_start));
911             if (reserved_va) {
912                 mmap_reserve(old_addr, old_size);
913             }
914         }
915     } else {
916         int prot = 0;
917         if (reserved_va && old_size < new_size) {
918             abi_ulong addr;
919             for (addr = old_addr + old_size;
920                  addr < old_addr + new_size;
921                  addr++) {
922                 prot |= page_get_flags(addr);
923             }
924         }
925         if (prot == 0) {
926             host_addr = mremap(g2h_untagged(old_addr),
927                                old_size, new_size, flags);
928 
929             if (host_addr != MAP_FAILED) {
930                 /* Check if address fits target address space */
931                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
932                     /* Revert mremap() changes */
933                     host_addr = mremap(g2h_untagged(old_addr),
934                                        new_size, old_size, flags);
935                     errno = ENOMEM;
936                     host_addr = MAP_FAILED;
937                 } else if (reserved_va && old_size > new_size) {
938                     mmap_reserve(old_addr + old_size, old_size - new_size);
939                 }
940             }
941         } else {
942             errno = ENOMEM;
943             host_addr = MAP_FAILED;
944         }
945     }
946 
947     if (host_addr == MAP_FAILED) {
948         new_addr = -1;
949     } else {
950         new_addr = h2g(host_addr);
951         prot = page_get_flags(old_addr);
952         page_set_flags(old_addr, old_addr + old_size - 1, 0);
953         page_set_flags(new_addr, new_addr + new_size - 1,
954                        prot | PAGE_VALID | PAGE_RESET);
955     }
956     mmap_unlock();
957     return new_addr;
958 }
959 
960 static bool can_passthrough_madvise(abi_ulong start, abi_ulong end)
961 {
962     ulong addr;
963 
964     if ((start | end) & ~qemu_host_page_mask) {
965         return false;
966     }
967 
968     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
969         if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
970             return false;
971         }
972     }
973 
974     return true;
975 }
976 
977 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
978 {
979     abi_ulong len, end;
980     int ret = 0;
981 
982     if (start & ~TARGET_PAGE_MASK) {
983         return -TARGET_EINVAL;
984     }
985     len = TARGET_PAGE_ALIGN(len_in);
986 
987     if (len_in && !len) {
988         return -TARGET_EINVAL;
989     }
990 
991     end = start + len;
992     if (end < start) {
993         return -TARGET_EINVAL;
994     }
995 
996     if (end == start) {
997         return 0;
998     }
999 
1000     if (!guest_range_valid_untagged(start, len)) {
1001         return -TARGET_EINVAL;
1002     }
1003 
1004     /* Translate for some architectures which have different MADV_xxx values */
1005     switch (advice) {
1006     case TARGET_MADV_DONTNEED:      /* alpha */
1007         advice = MADV_DONTNEED;
1008         break;
1009     case TARGET_MADV_WIPEONFORK:    /* parisc */
1010         advice = MADV_WIPEONFORK;
1011         break;
1012     case TARGET_MADV_KEEPONFORK:    /* parisc */
1013         advice = MADV_KEEPONFORK;
1014         break;
1015     /* we do not care about the other MADV_xxx values yet */
1016     }
1017 
1018     /*
1019      * Most advice values are hints, so ignoring and returning success is ok.
1020      *
1021      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
1022      * MADV_KEEPONFORK are not hints and need to be emulated.
1023      *
1024      * A straight passthrough for those may not be safe because qemu sometimes
1025      * turns private file-backed mappings into anonymous mappings.
1026      * can_passthrough_madvise() helps to check if a passthrough is possible by
1027      * comparing mappings that are known to have the same semantics in the host
1028      * and the guest. In this case passthrough is safe.
1029      *
1030      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
1031      * return failure if not.
1032      *
1033      * MADV_DONTNEED is passed through as well, if possible.
1034      * If passthrough isn't possible, we nevertheless (wrongly!) return
1035      * success, which is broken but some userspace programs fail to work
1036      * otherwise. Completely implementing such emulation is quite complicated
1037      * though.
1038      */
1039     mmap_lock();
1040     switch (advice) {
1041     case MADV_WIPEONFORK:
1042     case MADV_KEEPONFORK:
1043         ret = -EINVAL;
1044         /* fall through */
1045     case MADV_DONTNEED:
1046         if (can_passthrough_madvise(start, end)) {
1047             ret = get_errno(madvise(g2h_untagged(start), len, advice));
1048             if ((advice == MADV_DONTNEED) && (ret == 0)) {
1049                 page_reset_target_data(start, start + len - 1);
1050             }
1051         }
1052     }
1053     mmap_unlock();
1054 
1055     return ret;
1056 }
1057