xref: /qemu/linux-user/mmap.c (revision c925f40a)
1 /*
2  *  mmap support for qemu
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26 
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29 
30 void mmap_lock(void)
31 {
32     if (mmap_lock_count++ == 0) {
33         pthread_mutex_lock(&mmap_mutex);
34     }
35 }
36 
37 void mmap_unlock(void)
38 {
39     if (--mmap_lock_count == 0) {
40         pthread_mutex_unlock(&mmap_mutex);
41     }
42 }
43 
44 bool have_mmap_lock(void)
45 {
46     return mmap_lock_count > 0 ? true : false;
47 }
48 
49 /* Grab lock to make sure things are in a consistent state after fork().  */
50 void mmap_fork_start(void)
51 {
52     if (mmap_lock_count)
53         abort();
54     pthread_mutex_lock(&mmap_mutex);
55 }
56 
57 void mmap_fork_end(int child)
58 {
59     if (child)
60         pthread_mutex_init(&mmap_mutex, NULL);
61     else
62         pthread_mutex_unlock(&mmap_mutex);
63 }
64 
65 /*
66  * Validate target prot bitmask.
67  * Return the prot bitmask for the host in *HOST_PROT.
68  * Return 0 if the target prot bitmask is invalid, otherwise
69  * the internal qemu page_flags (which will include PAGE_VALID).
70  */
71 static int validate_prot_to_pageflags(int *host_prot, int prot)
72 {
73     int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
74     int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
75 
76     /*
77      * For the host, we need not pass anything except read/write/exec.
78      * While PROT_SEM is allowed by all hosts, it is also ignored, so
79      * don't bother transforming guest bit to host bit.  Any other
80      * target-specific prot bits will not be understood by the host
81      * and will need to be encoded into page_flags for qemu emulation.
82      *
83      * Pages that are executable by the guest will never be executed
84      * by the host, but the host will need to be able to read them.
85      */
86     *host_prot = (prot & (PROT_READ | PROT_WRITE))
87                | (prot & PROT_EXEC ? PROT_READ : 0);
88 
89 #ifdef TARGET_AARCH64
90     {
91         ARMCPU *cpu = ARM_CPU(thread_cpu);
92 
93         /*
94          * The PROT_BTI bit is only accepted if the cpu supports the feature.
95          * Since this is the unusual case, don't bother checking unless
96          * the bit has been requested.  If set and valid, record the bit
97          * within QEMU's page_flags.
98          */
99         if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
100             valid |= TARGET_PROT_BTI;
101             page_flags |= PAGE_BTI;
102         }
103         /* Similarly for the PROT_MTE bit. */
104         if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
105             valid |= TARGET_PROT_MTE;
106             page_flags |= PAGE_MTE;
107         }
108     }
109 #elif defined(TARGET_HPPA)
110     valid |= PROT_GROWSDOWN | PROT_GROWSUP;
111 #endif
112 
113     return prot & ~valid ? 0 : page_flags;
114 }
115 
116 /* NOTE: all the constants are the HOST ones, but addresses are target. */
117 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
118 {
119     abi_ulong end, host_start, host_end, addr;
120     int prot1, ret, page_flags, host_prot;
121 
122     trace_target_mprotect(start, len, target_prot);
123 
124     if ((start & ~TARGET_PAGE_MASK) != 0) {
125         return -TARGET_EINVAL;
126     }
127     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
128     if (!page_flags) {
129         return -TARGET_EINVAL;
130     }
131     len = TARGET_PAGE_ALIGN(len);
132     end = start + len;
133     if (!guest_range_valid_untagged(start, len)) {
134         return -TARGET_ENOMEM;
135     }
136     if (len == 0) {
137         return 0;
138     }
139 
140     mmap_lock();
141     host_start = start & qemu_host_page_mask;
142     host_end = HOST_PAGE_ALIGN(end);
143     if (start > host_start) {
144         /* handle host page containing start */
145         prot1 = host_prot;
146         for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
147             prot1 |= page_get_flags(addr);
148         }
149         if (host_end == host_start + qemu_host_page_size) {
150             for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
151                 prot1 |= page_get_flags(addr);
152             }
153             end = host_end;
154         }
155         ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
156                        prot1 & PAGE_BITS);
157         if (ret != 0) {
158             goto error;
159         }
160         host_start += qemu_host_page_size;
161     }
162     if (end < host_end) {
163         prot1 = host_prot;
164         for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
165             prot1 |= page_get_flags(addr);
166         }
167         ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
168                        qemu_host_page_size, prot1 & PAGE_BITS);
169         if (ret != 0) {
170             goto error;
171         }
172         host_end -= qemu_host_page_size;
173     }
174 
175     /* handle the pages in the middle */
176     if (host_start < host_end) {
177         ret = mprotect(g2h_untagged(host_start),
178                        host_end - host_start, host_prot);
179         if (ret != 0) {
180             goto error;
181         }
182     }
183 
184     page_set_flags(start, start + len - 1, page_flags);
185     ret = 0;
186 
187 error:
188     mmap_unlock();
189     return ret;
190 }
191 
192 /* map an incomplete host page */
193 static int mmap_frag(abi_ulong real_start,
194                      abi_ulong start, abi_ulong end,
195                      int prot, int flags, int fd, abi_ulong offset)
196 {
197     abi_ulong real_end, addr;
198     void *host_start;
199     int prot1, prot_new;
200 
201     real_end = real_start + qemu_host_page_size;
202     host_start = g2h_untagged(real_start);
203 
204     /* get the protection of the target pages outside the mapping */
205     prot1 = 0;
206     for(addr = real_start; addr < real_end; addr++) {
207         if (addr < start || addr >= end)
208             prot1 |= page_get_flags(addr);
209     }
210 
211     if (prot1 == 0) {
212         /* no page was there, so we allocate one */
213         void *p = mmap(host_start, qemu_host_page_size, prot,
214                        flags | MAP_ANONYMOUS, -1, 0);
215         if (p == MAP_FAILED)
216             return -1;
217         prot1 = prot;
218     }
219     prot1 &= PAGE_BITS;
220 
221     prot_new = prot | prot1;
222     if (!(flags & MAP_ANONYMOUS)) {
223         /* msync() won't work here, so we return an error if write is
224            possible while it is a shared mapping */
225         if ((flags & MAP_TYPE) == MAP_SHARED &&
226             (prot & PROT_WRITE))
227             return -1;
228 
229         /* adjust protection to be able to read */
230         if (!(prot1 & PROT_WRITE))
231             mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
232 
233         /* read the corresponding file data */
234         if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
235             return -1;
236 
237         /* put final protection */
238         if (prot_new != (prot1 | PROT_WRITE))
239             mprotect(host_start, qemu_host_page_size, prot_new);
240     } else {
241         if (prot_new != prot1) {
242             mprotect(host_start, qemu_host_page_size, prot_new);
243         }
244         if (prot_new & PROT_WRITE) {
245             memset(g2h_untagged(start), 0, end - start);
246         }
247     }
248     return 0;
249 }
250 
251 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
252 #ifdef TARGET_AARCH64
253 # define TASK_UNMAPPED_BASE  0x5500000000
254 #else
255 # define TASK_UNMAPPED_BASE  (1ul << 38)
256 #endif
257 #else
258 #ifdef TARGET_HPPA
259 # define TASK_UNMAPPED_BASE  0xfa000000
260 #else
261 # define TASK_UNMAPPED_BASE  0x40000000
262 #endif
263 #endif
264 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
265 
266 unsigned long last_brk;
267 
268 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
269    of guest address space.  */
270 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
271                                         abi_ulong align)
272 {
273     abi_ulong addr, end_addr, incr = qemu_host_page_size;
274     int prot;
275     bool looped = false;
276 
277     if (size > reserved_va) {
278         return (abi_ulong)-1;
279     }
280 
281     /* Note that start and size have already been aligned by mmap_find_vma. */
282 
283     end_addr = start + size;
284     /*
285      * Start at the top of the address space, ignoring the last page.
286      * If reserved_va == UINT32_MAX, then end_addr wraps to 0,
287      * throwing the rest of the calculations off.
288      * TODO: rewrite using last_addr instead.
289      * TODO: use the interval tree instead of probing every page.
290      */
291     if (start > reserved_va - size) {
292         end_addr = ((reserved_va - size) & -align) + size;
293         looped = true;
294     }
295 
296     /* Search downward from END_ADDR, checking to see if a page is in use.  */
297     addr = end_addr;
298     while (1) {
299         addr -= incr;
300         if (addr > end_addr) {
301             if (looped) {
302                 /* Failure.  The entire address space has been searched.  */
303                 return (abi_ulong)-1;
304             }
305             /* Re-start at the top of the address space (see above). */
306             addr = end_addr = ((reserved_va - size) & -align) + size;
307             looped = true;
308         } else {
309             prot = page_get_flags(addr);
310             if (prot) {
311                 /* Page in use.  Restart below this page.  */
312                 addr = end_addr = ((addr - size) & -align) + size;
313             } else if (addr && addr + size == end_addr) {
314                 /* Success!  All pages between ADDR and END_ADDR are free.  */
315                 if (start == mmap_next_start) {
316                     mmap_next_start = addr;
317                 }
318                 return addr;
319             }
320         }
321     }
322 }
323 
324 /*
325  * Find and reserve a free memory area of size 'size'. The search
326  * starts at 'start'.
327  * It must be called with mmap_lock() held.
328  * Return -1 if error.
329  */
330 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
331 {
332     void *ptr, *prev;
333     abi_ulong addr;
334     int wrapped, repeat;
335 
336     align = MAX(align, qemu_host_page_size);
337 
338     /* If 'start' == 0, then a default start address is used. */
339     if (start == 0) {
340         start = mmap_next_start;
341     } else {
342         start &= qemu_host_page_mask;
343     }
344     start = ROUND_UP(start, align);
345 
346     size = HOST_PAGE_ALIGN(size);
347 
348     if (reserved_va) {
349         return mmap_find_vma_reserved(start, size, align);
350     }
351 
352     addr = start;
353     wrapped = repeat = 0;
354     prev = 0;
355 
356     for (;; prev = ptr) {
357         /*
358          * Reserve needed memory area to avoid a race.
359          * It should be discarded using:
360          *  - mmap() with MAP_FIXED flag
361          *  - mremap() with MREMAP_FIXED flag
362          *  - shmat() with SHM_REMAP flag
363          */
364         ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
365                    MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
366 
367         /* ENOMEM, if host address space has no memory */
368         if (ptr == MAP_FAILED) {
369             return (abi_ulong)-1;
370         }
371 
372         /* Count the number of sequential returns of the same address.
373            This is used to modify the search algorithm below.  */
374         repeat = (ptr == prev ? repeat + 1 : 0);
375 
376         if (h2g_valid(ptr + size - 1)) {
377             addr = h2g(ptr);
378 
379             if ((addr & (align - 1)) == 0) {
380                 /* Success.  */
381                 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
382                     mmap_next_start = addr + size;
383                 }
384                 return addr;
385             }
386 
387             /* The address is not properly aligned for the target.  */
388             switch (repeat) {
389             case 0:
390                 /* Assume the result that the kernel gave us is the
391                    first with enough free space, so start again at the
392                    next higher target page.  */
393                 addr = ROUND_UP(addr, align);
394                 break;
395             case 1:
396                 /* Sometimes the kernel decides to perform the allocation
397                    at the top end of memory instead.  */
398                 addr &= -align;
399                 break;
400             case 2:
401                 /* Start over at low memory.  */
402                 addr = 0;
403                 break;
404             default:
405                 /* Fail.  This unaligned block must the last.  */
406                 addr = -1;
407                 break;
408             }
409         } else {
410             /* Since the result the kernel gave didn't fit, start
411                again at low memory.  If any repetition, fail.  */
412             addr = (repeat ? -1 : 0);
413         }
414 
415         /* Unmap and try again.  */
416         munmap(ptr, size);
417 
418         /* ENOMEM if we checked the whole of the target address space.  */
419         if (addr == (abi_ulong)-1) {
420             return (abi_ulong)-1;
421         } else if (addr == 0) {
422             if (wrapped) {
423                 return (abi_ulong)-1;
424             }
425             wrapped = 1;
426             /* Don't actually use 0 when wrapping, instead indicate
427                that we'd truly like an allocation in low memory.  */
428             addr = (mmap_min_addr > TARGET_PAGE_SIZE
429                      ? TARGET_PAGE_ALIGN(mmap_min_addr)
430                      : TARGET_PAGE_SIZE);
431         } else if (wrapped && addr >= start) {
432             return (abi_ulong)-1;
433         }
434     }
435 }
436 
437 /* NOTE: all the constants are the HOST ones */
438 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
439                      int flags, int fd, abi_ulong offset)
440 {
441     abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len,
442               passthrough_start = -1, passthrough_end = -1;
443     int page_flags, host_prot;
444 
445     mmap_lock();
446     trace_target_mmap(start, len, target_prot, flags, fd, offset);
447 
448     if (!len) {
449         errno = EINVAL;
450         goto fail;
451     }
452 
453     page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
454     if (!page_flags) {
455         errno = EINVAL;
456         goto fail;
457     }
458 
459     /* Also check for overflows... */
460     len = TARGET_PAGE_ALIGN(len);
461     if (!len) {
462         errno = ENOMEM;
463         goto fail;
464     }
465 
466     if (offset & ~TARGET_PAGE_MASK) {
467         errno = EINVAL;
468         goto fail;
469     }
470 
471     /*
472      * If we're mapping shared memory, ensure we generate code for parallel
473      * execution and flush old translations.  This will work up to the level
474      * supported by the host -- anything that requires EXCP_ATOMIC will not
475      * be atomic with respect to an external process.
476      */
477     if (flags & MAP_SHARED) {
478         CPUState *cpu = thread_cpu;
479         if (!(cpu->tcg_cflags & CF_PARALLEL)) {
480             cpu->tcg_cflags |= CF_PARALLEL;
481             tb_flush(cpu);
482         }
483     }
484 
485     real_start = start & qemu_host_page_mask;
486     host_offset = offset & qemu_host_page_mask;
487 
488     /* If the user is asking for the kernel to find a location, do that
489        before we truncate the length for mapping files below.  */
490     if (!(flags & MAP_FIXED)) {
491         host_len = len + offset - host_offset;
492         host_len = HOST_PAGE_ALIGN(host_len);
493         start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
494         if (start == (abi_ulong)-1) {
495             errno = ENOMEM;
496             goto fail;
497         }
498     }
499 
500     /* When mapping files into a memory area larger than the file, accesses
501        to pages beyond the file size will cause a SIGBUS.
502 
503        For example, if mmaping a file of 100 bytes on a host with 4K pages
504        emulating a target with 8K pages, the target expects to be able to
505        access the first 8K. But the host will trap us on any access beyond
506        4K.
507 
508        When emulating a target with a larger page-size than the hosts, we
509        may need to truncate file maps at EOF and add extra anonymous pages
510        up to the targets page boundary.  */
511 
512     if ((qemu_real_host_page_size() < qemu_host_page_size) &&
513         !(flags & MAP_ANONYMOUS)) {
514         struct stat sb;
515 
516        if (fstat (fd, &sb) == -1)
517            goto fail;
518 
519        /* Are we trying to create a map beyond EOF?.  */
520        if (offset + len > sb.st_size) {
521            /* If so, truncate the file map at eof aligned with
522               the hosts real pagesize. Additional anonymous maps
523               will be created beyond EOF.  */
524            len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
525        }
526     }
527 
528     if (!(flags & MAP_FIXED)) {
529         unsigned long host_start;
530         void *p;
531 
532         host_len = len + offset - host_offset;
533         host_len = HOST_PAGE_ALIGN(host_len);
534 
535         /* Note: we prefer to control the mapping address. It is
536            especially important if qemu_host_page_size >
537            qemu_real_host_page_size */
538         p = mmap(g2h_untagged(start), host_len, host_prot,
539                  flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
540         if (p == MAP_FAILED) {
541             goto fail;
542         }
543         /* update start so that it points to the file position at 'offset' */
544         host_start = (unsigned long)p;
545         if (!(flags & MAP_ANONYMOUS)) {
546             p = mmap(g2h_untagged(start), len, host_prot,
547                      flags | MAP_FIXED, fd, host_offset);
548             if (p == MAP_FAILED) {
549                 munmap(g2h_untagged(start), host_len);
550                 goto fail;
551             }
552             host_start += offset - host_offset;
553         }
554         start = h2g(host_start);
555         passthrough_start = start;
556         passthrough_end = start + len;
557     } else {
558         if (start & ~TARGET_PAGE_MASK) {
559             errno = EINVAL;
560             goto fail;
561         }
562         end = start + len;
563         real_end = HOST_PAGE_ALIGN(end);
564 
565         /*
566          * Test if requested memory area fits target address space
567          * It can fail only on 64-bit host with 32-bit target.
568          * On any other target/host host mmap() handles this error correctly.
569          */
570         if (end < start || !guest_range_valid_untagged(start, len)) {
571             errno = ENOMEM;
572             goto fail;
573         }
574 
575         /* worst case: we cannot map the file because the offset is not
576            aligned, so we read it */
577         if (!(flags & MAP_ANONYMOUS) &&
578             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
579             /* msync() won't work here, so we return an error if write is
580                possible while it is a shared mapping */
581             if ((flags & MAP_TYPE) == MAP_SHARED &&
582                 (host_prot & PROT_WRITE)) {
583                 errno = EINVAL;
584                 goto fail;
585             }
586             retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
587                                   MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
588                                   -1, 0);
589             if (retaddr == -1)
590                 goto fail;
591             if (pread(fd, g2h_untagged(start), len, offset) == -1)
592                 goto fail;
593             if (!(host_prot & PROT_WRITE)) {
594                 ret = target_mprotect(start, len, target_prot);
595                 assert(ret == 0);
596             }
597             goto the_end;
598         }
599 
600         /* handle the start of the mapping */
601         if (start > real_start) {
602             if (real_end == real_start + qemu_host_page_size) {
603                 /* one single host page */
604                 ret = mmap_frag(real_start, start, end,
605                                 host_prot, flags, fd, offset);
606                 if (ret == -1)
607                     goto fail;
608                 goto the_end1;
609             }
610             ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
611                             host_prot, flags, fd, offset);
612             if (ret == -1)
613                 goto fail;
614             real_start += qemu_host_page_size;
615         }
616         /* handle the end of the mapping */
617         if (end < real_end) {
618             ret = mmap_frag(real_end - qemu_host_page_size,
619                             real_end - qemu_host_page_size, end,
620                             host_prot, flags, fd,
621                             offset + real_end - qemu_host_page_size - start);
622             if (ret == -1)
623                 goto fail;
624             real_end -= qemu_host_page_size;
625         }
626 
627         /* map the middle (easier) */
628         if (real_start < real_end) {
629             void *p;
630             unsigned long offset1;
631             if (flags & MAP_ANONYMOUS)
632                 offset1 = 0;
633             else
634                 offset1 = offset + real_start - start;
635             p = mmap(g2h_untagged(real_start), real_end - real_start,
636                      host_prot, flags, fd, offset1);
637             if (p == MAP_FAILED)
638                 goto fail;
639             passthrough_start = real_start;
640             passthrough_end = real_end;
641         }
642     }
643  the_end1:
644     if (flags & MAP_ANONYMOUS) {
645         page_flags |= PAGE_ANON;
646     }
647     page_flags |= PAGE_RESET;
648     if (passthrough_start == passthrough_end) {
649         page_set_flags(start, start + len - 1, page_flags);
650     } else {
651         if (start < passthrough_start) {
652             page_set_flags(start, passthrough_start - 1, page_flags);
653         }
654         page_set_flags(passthrough_start, passthrough_end - 1,
655                        page_flags | PAGE_PASSTHROUGH);
656         if (passthrough_end < start + len) {
657             page_set_flags(passthrough_end, start + len - 1, page_flags);
658         }
659     }
660  the_end:
661     trace_target_mmap_complete(start);
662     if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
663         FILE *f = qemu_log_trylock();
664         if (f) {
665             fprintf(f, "page layout changed following mmap\n");
666             page_dump(f);
667             qemu_log_unlock(f);
668         }
669     }
670     mmap_unlock();
671     return start;
672 fail:
673     mmap_unlock();
674     return -1;
675 }
676 
677 static void mmap_reserve(abi_ulong start, abi_ulong size)
678 {
679     abi_ulong real_start;
680     abi_ulong real_end;
681     abi_ulong addr;
682     abi_ulong end;
683     int prot;
684 
685     real_start = start & qemu_host_page_mask;
686     real_end = HOST_PAGE_ALIGN(start + size);
687     end = start + size;
688     if (start > real_start) {
689         /* handle host page containing start */
690         prot = 0;
691         for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
692             prot |= page_get_flags(addr);
693         }
694         if (real_end == real_start + qemu_host_page_size) {
695             for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
696                 prot |= page_get_flags(addr);
697             }
698             end = real_end;
699         }
700         if (prot != 0)
701             real_start += qemu_host_page_size;
702     }
703     if (end < real_end) {
704         prot = 0;
705         for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
706             prot |= page_get_flags(addr);
707         }
708         if (prot != 0)
709             real_end -= qemu_host_page_size;
710     }
711     if (real_start != real_end) {
712         mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
713                  MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
714                  -1, 0);
715     }
716 }
717 
718 int target_munmap(abi_ulong start, abi_ulong len)
719 {
720     abi_ulong end, real_start, real_end, addr;
721     int prot, ret;
722 
723     trace_target_munmap(start, len);
724 
725     if (start & ~TARGET_PAGE_MASK)
726         return -TARGET_EINVAL;
727     len = TARGET_PAGE_ALIGN(len);
728     if (len == 0 || !guest_range_valid_untagged(start, len)) {
729         return -TARGET_EINVAL;
730     }
731 
732     mmap_lock();
733     end = start + len;
734     real_start = start & qemu_host_page_mask;
735     real_end = HOST_PAGE_ALIGN(end);
736 
737     if (start > real_start) {
738         /* handle host page containing start */
739         prot = 0;
740         for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
741             prot |= page_get_flags(addr);
742         }
743         if (real_end == real_start + qemu_host_page_size) {
744             for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
745                 prot |= page_get_flags(addr);
746             }
747             end = real_end;
748         }
749         if (prot != 0)
750             real_start += qemu_host_page_size;
751     }
752     if (end < real_end) {
753         prot = 0;
754         for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
755             prot |= page_get_flags(addr);
756         }
757         if (prot != 0)
758             real_end -= qemu_host_page_size;
759     }
760 
761     ret = 0;
762     /* unmap what we can */
763     if (real_start < real_end) {
764         if (reserved_va) {
765             mmap_reserve(real_start, real_end - real_start);
766         } else {
767             ret = munmap(g2h_untagged(real_start), real_end - real_start);
768         }
769     }
770 
771     if (ret == 0) {
772         page_set_flags(start, start + len - 1, 0);
773     }
774     mmap_unlock();
775     return ret;
776 }
777 
778 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
779                        abi_ulong new_size, unsigned long flags,
780                        abi_ulong new_addr)
781 {
782     int prot;
783     void *host_addr;
784 
785     if (!guest_range_valid_untagged(old_addr, old_size) ||
786         ((flags & MREMAP_FIXED) &&
787          !guest_range_valid_untagged(new_addr, new_size)) ||
788         ((flags & MREMAP_MAYMOVE) == 0 &&
789          !guest_range_valid_untagged(old_addr, new_size))) {
790         errno = ENOMEM;
791         return -1;
792     }
793 
794     mmap_lock();
795 
796     if (flags & MREMAP_FIXED) {
797         host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
798                            flags, g2h_untagged(new_addr));
799 
800         if (reserved_va && host_addr != MAP_FAILED) {
801             /* If new and old addresses overlap then the above mremap will
802                already have failed with EINVAL.  */
803             mmap_reserve(old_addr, old_size);
804         }
805     } else if (flags & MREMAP_MAYMOVE) {
806         abi_ulong mmap_start;
807 
808         mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
809 
810         if (mmap_start == -1) {
811             errno = ENOMEM;
812             host_addr = MAP_FAILED;
813         } else {
814             host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
815                                flags | MREMAP_FIXED,
816                                g2h_untagged(mmap_start));
817             if (reserved_va) {
818                 mmap_reserve(old_addr, old_size);
819             }
820         }
821     } else {
822         int prot = 0;
823         if (reserved_va && old_size < new_size) {
824             abi_ulong addr;
825             for (addr = old_addr + old_size;
826                  addr < old_addr + new_size;
827                  addr++) {
828                 prot |= page_get_flags(addr);
829             }
830         }
831         if (prot == 0) {
832             host_addr = mremap(g2h_untagged(old_addr),
833                                old_size, new_size, flags);
834 
835             if (host_addr != MAP_FAILED) {
836                 /* Check if address fits target address space */
837                 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
838                     /* Revert mremap() changes */
839                     host_addr = mremap(g2h_untagged(old_addr),
840                                        new_size, old_size, flags);
841                     errno = ENOMEM;
842                     host_addr = MAP_FAILED;
843                 } else if (reserved_va && old_size > new_size) {
844                     mmap_reserve(old_addr + old_size, old_size - new_size);
845                 }
846             }
847         } else {
848             errno = ENOMEM;
849             host_addr = MAP_FAILED;
850         }
851     }
852 
853     if (host_addr == MAP_FAILED) {
854         new_addr = -1;
855     } else {
856         new_addr = h2g(host_addr);
857         prot = page_get_flags(old_addr);
858         page_set_flags(old_addr, old_addr + old_size - 1, 0);
859         page_set_flags(new_addr, new_addr + new_size - 1,
860                        prot | PAGE_VALID | PAGE_RESET);
861     }
862     mmap_unlock();
863     return new_addr;
864 }
865 
866 static bool can_passthrough_madvise(abi_ulong start, abi_ulong end)
867 {
868     ulong addr;
869 
870     if ((start | end) & ~qemu_host_page_mask) {
871         return false;
872     }
873 
874     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
875         if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
876             return false;
877         }
878     }
879 
880     return true;
881 }
882 
883 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
884 {
885     abi_ulong len, end;
886     int ret = 0;
887 
888     if (start & ~TARGET_PAGE_MASK) {
889         return -TARGET_EINVAL;
890     }
891     len = TARGET_PAGE_ALIGN(len_in);
892 
893     if (len_in && !len) {
894         return -TARGET_EINVAL;
895     }
896 
897     end = start + len;
898     if (end < start) {
899         return -TARGET_EINVAL;
900     }
901 
902     if (end == start) {
903         return 0;
904     }
905 
906     if (!guest_range_valid_untagged(start, len)) {
907         return -TARGET_EINVAL;
908     }
909 
910     /* Translate for some architectures which have different MADV_xxx values */
911     switch (advice) {
912     case TARGET_MADV_DONTNEED:      /* alpha */
913         advice = MADV_DONTNEED;
914         break;
915     case TARGET_MADV_WIPEONFORK:    /* parisc */
916         advice = MADV_WIPEONFORK;
917         break;
918     case TARGET_MADV_KEEPONFORK:    /* parisc */
919         advice = MADV_KEEPONFORK;
920         break;
921     /* we do not care about the other MADV_xxx values yet */
922     }
923 
924     /*
925      * Most advice values are hints, so ignoring and returning success is ok.
926      *
927      * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
928      * MADV_KEEPONFORK are not hints and need to be emulated.
929      *
930      * A straight passthrough for those may not be safe because qemu sometimes
931      * turns private file-backed mappings into anonymous mappings.
932      * can_passthrough_madvise() helps to check if a passthrough is possible by
933      * comparing mappings that are known to have the same semantics in the host
934      * and the guest. In this case passthrough is safe.
935      *
936      * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
937      * return failure if not.
938      *
939      * MADV_DONTNEED is passed through as well, if possible.
940      * If passthrough isn't possible, we nevertheless (wrongly!) return
941      * success, which is broken but some userspace programs fail to work
942      * otherwise. Completely implementing such emulation is quite complicated
943      * though.
944      */
945     mmap_lock();
946     switch (advice) {
947     case MADV_WIPEONFORK:
948     case MADV_KEEPONFORK:
949         ret = -EINVAL;
950         /* fall through */
951     case MADV_DONTNEED:
952         if (can_passthrough_madvise(start, end)) {
953             ret = get_errno(madvise(g2h_untagged(start), len, advice));
954             if ((advice == MADV_DONTNEED) && (ret == 0)) {
955                 page_reset_target_data(start, start + len - 1);
956             }
957         }
958     }
959     mmap_unlock();
960 
961     return ret;
962 }
963