xref: /qemu/linux-user/mmap.c (revision dcaaf2bf)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26 
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29 
30 void mmap_lock(void)
31 {
32     if (mmap_lock_count++ == 0) {
33         pthread_mutex_lock(&mmap_mutex);
34     }
35 }
36 
37 void mmap_unlock(void)
38 {
39     if (--mmap_lock_count == 0) {
40         pthread_mutex_unlock(&mmap_mutex);
41     }
42 }
43 
44 bool have_mmap_lock(void)
45 {
46     return mmap_lock_count > 0 ? true : false;
47 }
48 
49 /* Grab lock to make sure things are in a consistent state after fork().  */
50 void mmap_fork_start(void)
51 {
52     if (mmap_lock_count)
53         abort();
54     pthread_mutex_lock(&mmap_mutex);
55 }
56 
57 void mmap_fork_end(int child)
58 {
59     if (child) {
60         pthread_mutex_init(&mmap_mutex, NULL);
61     } else {
62         pthread_mutex_unlock(&mmap_mutex);
63     }
64 }
65 
66 /*
67  * Validate target prot bitmask.
68  * Return the prot bitmask for the host in *HOST_PROT.
69  * Return 0 if the target prot bitmask is invalid, otherwise
70  * the internal qemu page_flags (which will include PAGE_VALID).
71  */
72 static int validate_prot_to_pageflags(int prot)
73 {
74     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
75     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
76 
77 #ifdef TARGET_AARCH64
78     {
79         ARMCPU *cpu = ARM_CPU(thread_cpu);
80 
81         /*
82          * The PROT_BTI bit is only accepted if the cpu supports the feature.
83          * Since this is the unusual case, don't bother checking unless
84          * the bit has been requested.  If set and valid, record the bit
85          * within QEMU's page_flags.
86          */
87         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
88             valid |= TARGET_PROT_BTI;
89             page_flags |= PAGE_BTI;
90         }
91         /* Similarly for the PROT_MTE bit. */
92         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
93             valid |= TARGET_PROT_MTE;
94             page_flags |= PAGE_MTE;
95         }
96     }
97 #elif defined(TARGET_HPPA)
98     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
99 #endif
100 
101     return prot & ~valid ? 0 : page_flags;
102 }
103 
104 /*
105  * For the host, we need not pass anything except read/write/exec.
106  * While PROT_SEM is allowed by all hosts, it is also ignored, so
107  * don't bother transforming guest bit to host bit.  Any other
108  * target-specific prot bits will not be understood by the host
109  * and will need to be encoded into page_flags for qemu emulation.
110  *
111  * Pages that are executable by the guest will never be executed
112  * by the host, but the host will need to be able to read them.
113  */
114 static int target_to_host_prot(int prot)
115 {
116     return (prot & (PROT_READ | PROT_WRITE)) |
117            (prot & PROT_EXEC ? PROT_READ : 0);
118 }
119 
120 /* NOTE: all the constants are the HOST ones, but addresses are target. */
121 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
122 {
123     abi_ulong starts[3];
124     abi_ulong lens[3];
125     int prots[3];
126     abi_ulong host_start, host_last, last;
127     int prot1, ret, page_flags, nranges;
128 
129     trace_target_mprotect(start, len, target_prot);
130 
131     if ((start & ~TARGET_PAGE_MASK) != 0) {
132         return -TARGET_EINVAL;
133     }
134     page_flags = validate_prot_to_pageflags(target_prot);
135     if (!page_flags) {
136         return -TARGET_EINVAL;
137     }
138     if (len == 0) {
139         return 0;
140     }
141     len = TARGET_PAGE_ALIGN(len);
142     if (!guest_range_valid_untagged(start, len)) {
143         return -TARGET_ENOMEM;
144     }
145 
146     last = start + len - 1;
147     host_start = start & qemu_host_page_mask;
148     host_last = HOST_PAGE_ALIGN(last) - 1;
149     nranges = 0;
150 
151     mmap_lock();
152 
153     if (host_last - host_start < qemu_host_page_size) {
154         /* Single host page contains all guest pages: sum the prot. */
155         prot1 = target_prot;
156         for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
157             prot1 |= page_get_flags(a);
158         }
159         for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
160             prot1 |= page_get_flags(a + 1);
161         }
162         starts[nranges] = host_start;
163         lens[nranges] = qemu_host_page_size;
164         prots[nranges] = prot1;
165         nranges++;
166     } else {
167         if (host_start < start) {
168             /* Host page contains more than one guest page: sum the prot. */
169             prot1 = target_prot;
170             for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
171                 prot1 |= page_get_flags(a);
172             }
173             /* If the resulting sum differs, create a new range. */
174             if (prot1 != target_prot) {
175                 starts[nranges] = host_start;
176                 lens[nranges] = qemu_host_page_size;
177                 prots[nranges] = prot1;
178                 nranges++;
179                 host_start += qemu_host_page_size;
180             }
181         }
182 
183         if (last < host_last) {
184             /* Host page contains more than one guest page: sum the prot. */
185             prot1 = target_prot;
186             for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
187                 prot1 |= page_get_flags(a + 1);
188             }
189             /* If the resulting sum differs, create a new range. */
190             if (prot1 != target_prot) {
191                 host_last -= qemu_host_page_size;
192                 starts[nranges] = host_last + 1;
193                 lens[nranges] = qemu_host_page_size;
194                 prots[nranges] = prot1;
195                 nranges++;
196             }
197         }
198 
199         /* Create a range for the middle, if any remains. */
200         if (host_start < host_last) {
201             starts[nranges] = host_start;
202             lens[nranges] = host_last - host_start + 1;
203             prots[nranges] = target_prot;
204             nranges++;
205         }
206     }
207 
208     for (int i = 0; i < nranges; ++i) {
209         ret = mprotect(g2h_untagged(starts[i]), lens[i],
210                        target_to_host_prot(prots[i]));
211         if (ret != 0) {
212             goto error;
213         }
214     }
215 
216     page_set_flags(start, last, page_flags);
217     ret = 0;
218 
219  error:
220     mmap_unlock();
221     return ret;
222 }
223 
224 /* map an incomplete host page */
225 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
226                       int prot, int flags, int fd, off_t offset)
227 {
228     abi_ulong real_last;
229     void *host_start;
230     int prot_old, prot_new;
231     int host_prot_old, host_prot_new;
232 
233     if (!(flags & MAP_ANONYMOUS)
234         && (flags & MAP_TYPE) == MAP_SHARED
235         && (prot & PROT_WRITE)) {
236         /*
237          * msync() won't work with the partial page, so we return an
238          * error if write is possible while it is a shared mapping.
239          */
240         errno = EINVAL;
241         return false;
242     }
243 
244     real_last = real_start + qemu_host_page_size - 1;
245     host_start = g2h_untagged(real_start);
246 
247     /* Get the protection of the target pages outside the mapping. */
248     prot_old = 0;
249     for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
250         prot_old |= page_get_flags(a);
251     }
252     for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
253         prot_old |= page_get_flags(a);
254     }
255 
256     if (prot_old == 0) {
257         /*
258          * Since !(prot_old & PAGE_VALID), there were no guest pages
259          * outside of the fragment we need to map.  Allocate a new host
260          * page to cover, discarding whatever else may have been present.
261          */
262         void *p = mmap(host_start, qemu_host_page_size,
263                        target_to_host_prot(prot),
264                        flags | MAP_ANONYMOUS, -1, 0);
265         if (p == MAP_FAILED) {
266             return false;
267         }
268         prot_old = prot;
269     }
270     prot_new = prot | prot_old;
271 
272     host_prot_old = target_to_host_prot(prot_old);
273     host_prot_new = target_to_host_prot(prot_new);
274 
275     /* Adjust protection to be able to write. */
276     if (!(host_prot_old & PROT_WRITE)) {
277         host_prot_old |= PROT_WRITE;
278         mprotect(host_start, qemu_host_page_size, host_prot_old);
279     }
280 
281     /* Read or zero the new guest pages. */
282     if (flags & MAP_ANONYMOUS) {
283         memset(g2h_untagged(start), 0, last - start + 1);
284     } else {
285         if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
286             return false;
287         }
288     }
289 
290     /* Put final protection */
291     if (host_prot_new != host_prot_old) {
292         mprotect(host_start, qemu_host_page_size, host_prot_new);
293     }
294     return true;
295 }
296 
297 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
298 #ifdef TARGET_AARCH64
299 # define TASK_UNMAPPED_BASE  0x5500000000
300 #else
301 # define TASK_UNMAPPED_BASE  (1ul << 38)
302 #endif
303 #else
304 #ifdef TARGET_HPPA
305 # define TASK_UNMAPPED_BASE  0xfa000000
306 #else
307 # define TASK_UNMAPPED_BASE  0x40000000
308 #endif
309 #endif
310 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
311 
312 unsigned long last_brk;
313 
314 /*
315  * Subroutine of mmap_find_vma, used when we have pre-allocated
316  * a chunk of guest address space.
317  */
318 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
319                                         abi_ulong align)
320 {
321     target_ulong ret;
322 
323     ret = page_find_range_empty(start, reserved_va, size, align);
324     if (ret == -1 && start > mmap_min_addr) {
325         /* Restart at the beginning of the address space. */
326         ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
327     }
328 
329     return ret;
330 }
331 
332 /*
333  * Find and reserve a free memory area of size 'size'. The search
334  * starts at 'start'.
335  * It must be called with mmap_lock() held.
336  * Return -1 if error.
337  */
338 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
339 {
340     void *ptr, *prev;
341     abi_ulong addr;
342     int wrapped, repeat;
343 
344     align = MAX(align, qemu_host_page_size);
345 
346     /* If 'start' == 0, then a default start address is used. */
347     if (start == 0) {
348         start = mmap_next_start;
349     } else {
350         start &= qemu_host_page_mask;
351     }
352     start = ROUND_UP(start, align);
353 
354     size = HOST_PAGE_ALIGN(size);
355 
356     if (reserved_va) {
357         return mmap_find_vma_reserved(start, size, align);
358     }
359 
360     addr = start;
361     wrapped = repeat = 0;
362     prev = 0;
363 
364     for (;; prev = ptr) {
365         /*
366          * Reserve needed memory area to avoid a race.
367          * It should be discarded using:
368          *  - mmap() with MAP_FIXED flag
369          *  - mremap() with MREMAP_FIXED flag
370          *  - shmat() with SHM_REMAP flag
371          */
372         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
373                    MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
374 
375         /* ENOMEM, if host address space has no memory */
376         if (ptr == MAP_FAILED) {
377             return (abi_ulong)-1;
378         }
379 
380         /*
381          * Count the number of sequential returns of the same address.
382          * This is used to modify the search algorithm below.
383          */
384         repeat = (ptr == prev ? repeat + 1 : 0);
385 
386         if (h2g_valid(ptr + size - 1)) {
387             addr = h2g(ptr);
388 
389             if ((addr & (align - 1)) == 0) {
390                 /* Success.  */
391                 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
392                     mmap_next_start = addr + size;
393                 }
394                 return addr;
395             }
396 
397             /* The address is not properly aligned for the target.  */
398             switch (repeat) {
399             case 0:
400                 /*
401                  * Assume the result that the kernel gave us is the
402                  * first with enough free space, so start again at the
403                  * next higher target page.
404                  */
405                 addr = ROUND_UP(addr, align);
406                 break;
407             case 1:
408                 /*
409                  * Sometimes the kernel decides to perform the allocation
410                  * at the top end of memory instead.
411                  */
412                 addr &= -align;
413                 break;
414             case 2:
415                 /* Start over at low memory.  */
416                 addr = 0;
417                 break;
418             default:
419                 /* Fail.  This unaligned block must the last.  */
420                 addr = -1;
421                 break;
422             }
423         } else {
424             /*
425              * Since the result the kernel gave didn't fit, start
426              * again at low memory.  If any repetition, fail.
427              */
428             addr = (repeat ? -1 : 0);
429         }
430 
431         /* Unmap and try again.  */
432         munmap(ptr, size);
433 
434         /* ENOMEM if we checked the whole of the target address space.  */
435         if (addr == (abi_ulong)-1) {
436             return (abi_ulong)-1;
437         } else if (addr == 0) {
438             if (wrapped) {
439                 return (abi_ulong)-1;
440             }
441             wrapped = 1;
442             /*
443              * Don't actually use 0 when wrapping, instead indicate
444              * that we'd truly like an allocation in low memory.
445              */
446             addr = (mmap_min_addr > TARGET_PAGE_SIZE
447                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
448                      : TARGET_PAGE_SIZE);
449         } else if (wrapped && addr >= start) {
450             return (abi_ulong)-1;
451         }
452     }
453 }
454 
455 /* NOTE: all the constants are the HOST ones */
456 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
457                      int flags, int fd, off_t offset)
458 {
459     abi_ulong ret, last, real_start, real_last, retaddr, host_len;
460     abi_ulong passthrough_start = -1, passthrough_last = 0;
461     int page_flags;
462     off_t host_offset;
463 
464     mmap_lock();
465     trace_target_mmap(start, len, target_prot, flags, fd, offset);
466 
467     if (!len) {
468         errno = EINVAL;
469         goto fail;
470     }
471 
472     page_flags = validate_prot_to_pageflags(target_prot);
473     if (!page_flags) {
474         errno = EINVAL;
475         goto fail;
476     }
477 
478     /* Also check for overflows... */
479     len = TARGET_PAGE_ALIGN(len);
480     if (!len) {
481         errno = ENOMEM;
482         goto fail;
483     }
484 
485     if (offset & ~TARGET_PAGE_MASK) {
486         errno = EINVAL;
487         goto fail;
488     }
489 
490     /*
491      * If we're mapping shared memory, ensure we generate code for parallel
492      * execution and flush old translations.  This will work up to the level
493      * supported by the host -- anything that requires EXCP_ATOMIC will not
494      * be atomic with respect to an external process.
495      */
496     if (flags & MAP_SHARED) {
497         CPUState *cpu = thread_cpu;
498         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
499             cpu->tcg_cflags |= CF_PARALLEL;
500             tb_flush(cpu);
501         }
502     }
503 
504     real_start = start & qemu_host_page_mask;
505     host_offset = offset & qemu_host_page_mask;
506 
507     /*
508      * If the user is asking for the kernel to find a location, do that
509      * before we truncate the length for mapping files below.
510      */
511     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
512         host_len = len + offset - host_offset;
513         host_len = HOST_PAGE_ALIGN(host_len);
514         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
515         if (start == (abi_ulong)-1) {
516             errno = ENOMEM;
517             goto fail;
518         }
519     }
520 
521     /*
522      * When mapping files into a memory area larger than the file, accesses
523      * to pages beyond the file size will cause a SIGBUS.
524      *
525      * For example, if mmaping a file of 100 bytes on a host with 4K pages
526      * emulating a target with 8K pages, the target expects to be able to
527      * access the first 8K. But the host will trap us on any access beyond
528      * 4K.
529      *
530      * When emulating a target with a larger page-size than the hosts, we
531      * may need to truncate file maps at EOF and add extra anonymous pages
532      * up to the targets page boundary.
533      */
534     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
535         !(flags & MAP_ANONYMOUS)) {
536         struct stat sb;
537 
538         if (fstat(fd, &sb) == -1) {
539             goto fail;
540         }
541 
542         /* Are we trying to create a map beyond EOF?.  */
543         if (offset + len > sb.st_size) {
544             /*
545              * If so, truncate the file map at eof aligned with
546              * the hosts real pagesize. Additional anonymous maps
547              * will be created beyond EOF.
548              */
549             len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
550         }
551     }
552 
553     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
554         uintptr_t host_start;
555         int host_prot;
556         void *p;
557 
558         host_len = len + offset - host_offset;
559         host_len = HOST_PAGE_ALIGN(host_len);
560         host_prot = target_to_host_prot(target_prot);
561 
562         /*
563          * Note: we prefer to control the mapping address. It is
564          * especially important if qemu_host_page_size >
565          * qemu_real_host_page_size.
566          */
567         p = mmap(g2h_untagged(start), host_len, host_prot,
568                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
569         if (p == MAP_FAILED) {
570             goto fail;
571         }
572         /* update start so that it points to the file position at 'offset' */
573         host_start = (uintptr_t)p;
574         if (!(flags & MAP_ANONYMOUS)) {
575             p = mmap(g2h_untagged(start), len, host_prot,
576                      flags | MAP_FIXED, fd, host_offset);
577             if (p == MAP_FAILED) {
578                 munmap(g2h_untagged(start), host_len);
579                 goto fail;
580             }
581             host_start += offset - host_offset;
582         }
583         start = h2g(host_start);
584         last = start + len - 1;
585         passthrough_start = start;
586         passthrough_last = last;
587     } else {
588         if (start & ~TARGET_PAGE_MASK) {
589             errno = EINVAL;
590             goto fail;
591         }
592         last = start + len - 1;
593         real_last = HOST_PAGE_ALIGN(last) - 1;
594 
595         /*
596          * Test if requested memory area fits target address space
597          * It can fail only on 64-bit host with 32-bit target.
598          * On any other target/host host mmap() handles this error correctly.
599          */
600         if (last < start || !guest_range_valid_untagged(start, len)) {
601             errno = ENOMEM;
602             goto fail;
603         }
604 
605         /* Validate that the chosen range is empty. */
606         if ((flags & MAP_FIXED_NOREPLACE)
607             && !page_check_range_empty(start, last)) {
608             errno = EEXIST;
609             goto fail;
610         }
611 
612         /*
613          * worst case: we cannot map the file because the offset is not
614          * aligned, so we read it
615          */
616         if (!(flags & MAP_ANONYMOUS) &&
617             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
618             /*
619              * msync() won't work here, so we return an error if write is
620              * possible while it is a shared mapping
621              */
622             if ((flags & MAP_TYPE) == MAP_SHARED
623                 && (target_prot & PROT_WRITE)) {
624                 errno = EINVAL;
625                 goto fail;
626             }
627             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
628                                   (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
629                                   | MAP_PRIVATE | MAP_ANONYMOUS,
630                                   -1, 0);
631             if (retaddr == -1) {
632                 goto fail;
633             }
634             if (pread(fd, g2h_untagged(start), len, offset) == -1) {
635                 goto fail;
636             }
637             if (!(target_prot & PROT_WRITE)) {
638                 ret = target_mprotect(start, len, target_prot);
639                 assert(ret == 0);
640             }
641             goto the_end;
642         }
643 
644         /* handle the start of the mapping */
645         if (start > real_start) {
646             if (real_last == real_start + qemu_host_page_size - 1) {
647                 /* one single host page */
648                 if (!mmap_frag(real_start, start, last,
649                                target_prot, flags, fd, offset)) {
650                     goto fail;
651                 }
652                 goto the_end1;
653             }
654             if (!mmap_frag(real_start, start,
655                            real_start + qemu_host_page_size - 1,
656                            target_prot, flags, fd, offset)) {
657                 goto fail;
658             }
659             real_start += qemu_host_page_size;
660         }
661         /* handle the end of the mapping */
662         if (last < real_last) {
663             abi_ulong real_page = real_last - qemu_host_page_size + 1;
664             if (!mmap_frag(real_page, real_page, last,
665                            target_prot, flags, fd,
666                            offset + real_page - start)) {
667                 goto fail;
668             }
669             real_last -= qemu_host_page_size;
670         }
671 
672         /* map the middle (easier) */
673         if (real_start < real_last) {
674             void *p;
675             off_t offset1;
676 
677             if (flags & MAP_ANONYMOUS) {
678                 offset1 = 0;
679             } else {
680                 offset1 = offset + real_start - start;
681             }
682             p = mmap(g2h_untagged(real_start), real_last - real_start + 1,
683                      target_to_host_prot(target_prot), flags, fd, offset1);
684             if (p == MAP_FAILED) {
685                 goto fail;
686             }
687             passthrough_start = real_start;
688             passthrough_last = real_last;
689         }
690     }
691  the_end1:
692     if (flags & MAP_ANONYMOUS) {
693         page_flags |= PAGE_ANON;
694     }
695     page_flags |= PAGE_RESET;
696     if (passthrough_start > passthrough_last) {
697         page_set_flags(start, last, page_flags);
698     } else {
699         if (start < passthrough_start) {
700             page_set_flags(start, passthrough_start - 1, page_flags);
701         }
702         page_set_flags(passthrough_start, passthrough_last,
703                        page_flags | PAGE_PASSTHROUGH);
704         if (passthrough_last < last) {
705             page_set_flags(passthrough_last + 1, last, page_flags);
706         }
707     }
708  the_end:
709     trace_target_mmap_complete(start);
710     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
711         FILE *f = qemu_log_trylock();
712         if (f) {
713             fprintf(f, "page layout changed following mmap\n");
714             page_dump(f);
715             qemu_log_unlock(f);
716         }
717     }
718     mmap_unlock();
719     return start;
720 fail:
721     mmap_unlock();
722     return -1;
723 }
724 
725 static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
726 {
727     abi_ulong real_start;
728     abi_ulong real_last;
729     abi_ulong real_len;
730     abi_ulong last;
731     abi_ulong a;
732     void *host_start;
733     int prot;
734 
735     last = start + len - 1;
736     real_start = start & qemu_host_page_mask;
737     real_last = HOST_PAGE_ALIGN(last) - 1;
738 
739     /*
740      * If guest pages remain on the first or last host pages,
741      * adjust the deallocation to retain those guest pages.
742      * The single page special case is required for the last page,
743      * lest real_start overflow to zero.
744      */
745     if (real_last - real_start < qemu_host_page_size) {
746         prot = 0;
747         for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
748             prot |= page_get_flags(a);
749         }
750         for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
751             prot |= page_get_flags(a + 1);
752         }
753         if (prot != 0) {
754             return;
755         }
756     } else {
757         for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
758             prot |= page_get_flags(a);
759         }
760         if (prot != 0) {
761             real_start += qemu_host_page_size;
762         }
763 
764         for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
765             prot |= page_get_flags(a + 1);
766         }
767         if (prot != 0) {
768             real_last -= qemu_host_page_size;
769         }
770 
771         if (real_last < real_start) {
772             return;
773         }
774     }
775 
776     real_len = real_last - real_start + 1;
777     host_start = g2h_untagged(real_start);
778 
779     if (reserved_va) {
780         void *ptr = mmap(host_start, real_len, PROT_NONE,
781                          MAP_FIXED | MAP_ANONYMOUS
782                          | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
783         assert(ptr == host_start);
784     } else {
785         int ret = munmap(host_start, real_len);
786         assert(ret == 0);
787     }
788 }
789 
790 int target_munmap(abi_ulong start, abi_ulong len)
791 {
792     trace_target_munmap(start, len);
793 
794     if (start & ~TARGET_PAGE_MASK) {
795         return -TARGET_EINVAL;
796     }
797     len = TARGET_PAGE_ALIGN(len);
798     if (len == 0 || !guest_range_valid_untagged(start, len)) {
799         return -TARGET_EINVAL;
800     }
801 
802     mmap_lock();
803     mmap_reserve_or_unmap(start, len);
804     page_set_flags(start, start + len - 1, 0);
805     mmap_unlock();
806 
807     return 0;
808 }
809 
810 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
811                        abi_ulong new_size, unsigned long flags,
812                        abi_ulong new_addr)
813 {
814     int prot;
815     void *host_addr;
816 
817     if (!guest_range_valid_untagged(old_addr, old_size) ||
818         ((flags & MREMAP_FIXED) &&
819          !guest_range_valid_untagged(new_addr, new_size)) ||
820         ((flags & MREMAP_MAYMOVE) == 0 &&
821          !guest_range_valid_untagged(old_addr, new_size))) {
822         errno = ENOMEM;
823         return -1;
824     }
825 
826     mmap_lock();
827 
828     if (flags & MREMAP_FIXED) {
829         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
830                            flags, g2h_untagged(new_addr));
831 
832         if (reserved_va && host_addr != MAP_FAILED) {
833             /*
834              * If new and old addresses overlap then the above mremap will
835              * already have failed with EINVAL.
836              */
837             mmap_reserve_or_unmap(old_addr, old_size);
838         }
839     } else if (flags & MREMAP_MAYMOVE) {
840         abi_ulong mmap_start;
841 
842         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
843 
844         if (mmap_start == -1) {
845             errno = ENOMEM;
846             host_addr = MAP_FAILED;
847         } else {
848             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
849                                flags | MREMAP_FIXED,
850                                g2h_untagged(mmap_start));
851             if (reserved_va) {
852                 mmap_reserve_or_unmap(old_addr, old_size);
853             }
854         }
855     } else {
856         int prot = 0;
857         if (reserved_va && old_size < new_size) {
858             abi_ulong addr;
859             for (addr = old_addr + old_size;
860                  addr < old_addr + new_size;
861                  addr++) {
862                 prot |= page_get_flags(addr);
863             }
864         }
865         if (prot == 0) {
866             host_addr = mremap(g2h_untagged(old_addr),
867                                old_size, new_size, flags);
868 
869             if (host_addr != MAP_FAILED) {
870                 /* Check if address fits target address space */
871                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
872                     /* Revert mremap() changes */
873                     host_addr = mremap(g2h_untagged(old_addr),
874                                        new_size, old_size, flags);
875                     errno = ENOMEM;
876                     host_addr = MAP_FAILED;
877                 } else if (reserved_va && old_size > new_size) {
878                     mmap_reserve_or_unmap(old_addr + old_size,
879                                           old_size - new_size);
880                 }
881             }
882         } else {
883             errno = ENOMEM;
884             host_addr = MAP_FAILED;
885         }
886     }
887 
888     if (host_addr == MAP_FAILED) {
889         new_addr = -1;
890     } else {
891         new_addr = h2g(host_addr);
892         prot = page_get_flags(old_addr);
893         page_set_flags(old_addr, old_addr + old_size - 1, 0);
894         page_set_flags(new_addr, new_addr + new_size - 1,
895                        prot | PAGE_VALID | PAGE_RESET);
896     }
897     mmap_unlock();
898     return new_addr;
899 }
900 
901 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
902 {
903     abi_ulong len;
904     int ret = 0;
905 
906     if (start & ~TARGET_PAGE_MASK) {
907         return -TARGET_EINVAL;
908     }
909     if (len_in == 0) {
910         return 0;
911     }
912     len = TARGET_PAGE_ALIGN(len_in);
913     if (len == 0 || !guest_range_valid_untagged(start, len)) {
914         return -TARGET_EINVAL;
915     }
916 
917     /* Translate for some architectures which have different MADV_xxx values */
918     switch (advice) {
919     case TARGET_MADV_DONTNEED:      /* alpha */
920         advice = MADV_DONTNEED;
921         break;
922     case TARGET_MADV_WIPEONFORK:    /* parisc */
923         advice = MADV_WIPEONFORK;
924         break;
925     case TARGET_MADV_KEEPONFORK:    /* parisc */
926         advice = MADV_KEEPONFORK;
927         break;
928     /* we do not care about the other MADV_xxx values yet */
929     }
930 
931     /*
932      * Most advice values are hints, so ignoring and returning success is ok.
933      *
934      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
935      * MADV_KEEPONFORK are not hints and need to be emulated.
936      *
937      * A straight passthrough for those may not be safe because qemu sometimes
938      * turns private file-backed mappings into anonymous mappings.
939      * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
940      * same semantics for the host as for the guest.
941      *
942      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
943      * return failure if not.
944      *
945      * MADV_DONTNEED is passed through as well, if possible.
946      * If passthrough isn't possible, we nevertheless (wrongly!) return
947      * success, which is broken but some userspace programs fail to work
948      * otherwise. Completely implementing such emulation is quite complicated
949      * though.
950      */
951     mmap_lock();
952     switch (advice) {
953     case MADV_WIPEONFORK:
954     case MADV_KEEPONFORK:
955         ret = -EINVAL;
956         /* fall through */
957     case MADV_DONTNEED:
958         if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
959             ret = get_errno(madvise(g2h_untagged(start), len, advice));
960             if ((advice == MADV_DONTNEED) && (ret == 0)) {
961                 page_reset_target_data(start, start + len - 1);
962             }
963         }
964     }
965     mmap_unlock();
966 
967     return ret;
968 }
969