154936004Sbellard /* 254936004Sbellard * mmap support for qemu 354936004Sbellard * 454936004Sbellard * Copyright (c) 2003 Fabrice Bellard 554936004Sbellard * 654936004Sbellard * This program is free software; you can redistribute it and/or modify 754936004Sbellard * it under the terms of the GNU General Public License as published by 854936004Sbellard * the Free Software Foundation; either version 2 of the License, or 954936004Sbellard * (at your option) any later version. 1054936004Sbellard * 1154936004Sbellard * This program is distributed in the hope that it will be useful, 1254936004Sbellard * but WITHOUT ANY WARRANTY; without even the implied warranty of 1354936004Sbellard * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1454936004Sbellard * GNU General Public License for more details. 1554936004Sbellard * 1654936004Sbellard * You should have received a copy of the GNU General Public License 178167ee88SBlue Swirl * along with this program; if not, see <http://www.gnu.org/licenses/>. 1854936004Sbellard */ 19d39594e9SPeter Maydell #include "qemu/osdep.h" 20225a206cSRichard Henderson #include <sys/shm.h> 2111d96056SAlex Bennée #include "trace.h" 2210d0d505SAlex Bennée #include "exec/log.h" 2354936004Sbellard #include "qemu.h" 243b249d26SPeter Maydell #include "user-internals.h" 255423e6d3SPeter Maydell #include "user-mmap.h" 268655b4c7SIlya Leoshkevich #include "target_mman.h" 27044e95c8SRichard Henderson #include "qemu/interval-tree.h" 2854936004Sbellard 295a534314SPeter Maydell #ifdef TARGET_ARM 305a534314SPeter Maydell #include "target/arm/cpu-features.h" 315a534314SPeter Maydell #endif 325a534314SPeter Maydell 331e6eec8bSBlue Swirl static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 34dfd3f85cSJuan Quintela static __thread int mmap_lock_count; 35c8a706feSpbrook 36c8a706feSpbrook void mmap_lock(void) 37c8a706feSpbrook { 38c8a706feSpbrook if (mmap_lock_count++ == 0) { 39c8a706feSpbrook pthread_mutex_lock(&mmap_mutex); 40c8a706feSpbrook } 41c8a706feSpbrook } 42c8a706feSpbrook 43c8a706feSpbrook void mmap_unlock(void) 44c8a706feSpbrook { 45990ef918SRichard Henderson assert(mmap_lock_count > 0); 46c8a706feSpbrook if (--mmap_lock_count == 0) { 47c8a706feSpbrook pthread_mutex_unlock(&mmap_mutex); 48c8a706feSpbrook } 49c8a706feSpbrook } 50d5975363Spbrook 51301e40edSAlex Bennée bool have_mmap_lock(void) 52301e40edSAlex Bennée { 53301e40edSAlex Bennée return mmap_lock_count > 0 ? true : false; 54301e40edSAlex Bennée } 55301e40edSAlex Bennée 56d5975363Spbrook /* Grab lock to make sure things are in a consistent state after fork(). */ 57d5975363Spbrook void mmap_fork_start(void) 58d5975363Spbrook { 59d5975363Spbrook if (mmap_lock_count) 60d5975363Spbrook abort(); 61d5975363Spbrook pthread_mutex_lock(&mmap_mutex); 62d5975363Spbrook } 63d5975363Spbrook 64d5975363Spbrook void mmap_fork_end(int child) 65d5975363Spbrook { 662b730f79SRichard Henderson if (child) { 67d5975363Spbrook pthread_mutex_init(&mmap_mutex, NULL); 682b730f79SRichard Henderson } else { 69d5975363Spbrook pthread_mutex_unlock(&mmap_mutex); 70d5975363Spbrook } 712b730f79SRichard Henderson } 72c8a706feSpbrook 73044e95c8SRichard Henderson /* Protected by mmap_lock. */ 74044e95c8SRichard Henderson static IntervalTreeRoot shm_regions; 75044e95c8SRichard Henderson 76044e95c8SRichard Henderson static void shm_region_add(abi_ptr start, abi_ptr last) 77044e95c8SRichard Henderson { 78044e95c8SRichard Henderson IntervalTreeNode *i = g_new0(IntervalTreeNode, 1); 79044e95c8SRichard Henderson 80044e95c8SRichard Henderson i->start = start; 81044e95c8SRichard Henderson i->last = last; 82044e95c8SRichard Henderson interval_tree_insert(i, &shm_regions); 83044e95c8SRichard Henderson } 84044e95c8SRichard Henderson 85044e95c8SRichard Henderson static abi_ptr shm_region_find(abi_ptr start) 86044e95c8SRichard Henderson { 87044e95c8SRichard Henderson IntervalTreeNode *i; 88044e95c8SRichard Henderson 89044e95c8SRichard Henderson for (i = interval_tree_iter_first(&shm_regions, start, start); i; 90044e95c8SRichard Henderson i = interval_tree_iter_next(i, start, start)) { 91044e95c8SRichard Henderson if (i->start == start) { 92044e95c8SRichard Henderson return i->last; 93044e95c8SRichard Henderson } 94044e95c8SRichard Henderson } 95044e95c8SRichard Henderson return 0; 96044e95c8SRichard Henderson } 97044e95c8SRichard Henderson 98044e95c8SRichard Henderson static void shm_region_rm_complete(abi_ptr start, abi_ptr last) 99044e95c8SRichard Henderson { 100044e95c8SRichard Henderson IntervalTreeNode *i, *n; 101044e95c8SRichard Henderson 102044e95c8SRichard Henderson for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) { 103044e95c8SRichard Henderson n = interval_tree_iter_next(i, start, last); 104044e95c8SRichard Henderson if (i->start >= start && i->last <= last) { 105044e95c8SRichard Henderson interval_tree_remove(i, &shm_regions); 106044e95c8SRichard Henderson g_free(i); 107044e95c8SRichard Henderson } 108044e95c8SRichard Henderson } 109044e95c8SRichard Henderson } 110044e95c8SRichard Henderson 1119dba3ca5SRichard Henderson /* 1129dba3ca5SRichard Henderson * Validate target prot bitmask. 1139dba3ca5SRichard Henderson * Return the prot bitmask for the host in *HOST_PROT. 1149dba3ca5SRichard Henderson * Return 0 if the target prot bitmask is invalid, otherwise 1159dba3ca5SRichard Henderson * the internal qemu page_flags (which will include PAGE_VALID). 1169dba3ca5SRichard Henderson */ 1170dd55812SRichard Henderson static int validate_prot_to_pageflags(int prot) 1189dba3ca5SRichard Henderson { 1199dba3ca5SRichard Henderson int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM; 1209dba3ca5SRichard Henderson int page_flags = (prot & PAGE_BITS) | PAGE_VALID; 1219dba3ca5SRichard Henderson 122be5d6f48SRichard Henderson #ifdef TARGET_AARCH64 123d109b46dSRichard Henderson { 124d109b46dSRichard Henderson ARMCPU *cpu = ARM_CPU(thread_cpu); 125d109b46dSRichard Henderson 126be5d6f48SRichard Henderson /* 127be5d6f48SRichard Henderson * The PROT_BTI bit is only accepted if the cpu supports the feature. 128be5d6f48SRichard Henderson * Since this is the unusual case, don't bother checking unless 129be5d6f48SRichard Henderson * the bit has been requested. If set and valid, record the bit 130be5d6f48SRichard Henderson * within QEMU's page_flags. 131be5d6f48SRichard Henderson */ 132d109b46dSRichard Henderson if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) { 133be5d6f48SRichard Henderson valid |= TARGET_PROT_BTI; 134be5d6f48SRichard Henderson page_flags |= PAGE_BTI; 135be5d6f48SRichard Henderson } 136d109b46dSRichard Henderson /* Similarly for the PROT_MTE bit. */ 137d109b46dSRichard Henderson if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) { 138d109b46dSRichard Henderson valid |= TARGET_PROT_MTE; 139d109b46dSRichard Henderson page_flags |= PAGE_MTE; 140d109b46dSRichard Henderson } 141be5d6f48SRichard Henderson } 1424c184e70SHelge Deller #elif defined(TARGET_HPPA) 1434c184e70SHelge Deller valid |= PROT_GROWSDOWN | PROT_GROWSUP; 144be5d6f48SRichard Henderson #endif 145be5d6f48SRichard Henderson 1469dba3ca5SRichard Henderson return prot & ~valid ? 0 : page_flags; 1479dba3ca5SRichard Henderson } 1489dba3ca5SRichard Henderson 1490dd55812SRichard Henderson /* 1500dd55812SRichard Henderson * For the host, we need not pass anything except read/write/exec. 1510dd55812SRichard Henderson * While PROT_SEM is allowed by all hosts, it is also ignored, so 1520dd55812SRichard Henderson * don't bother transforming guest bit to host bit. Any other 1530dd55812SRichard Henderson * target-specific prot bits will not be understood by the host 1540dd55812SRichard Henderson * and will need to be encoded into page_flags for qemu emulation. 1550dd55812SRichard Henderson * 1560dd55812SRichard Henderson * Pages that are executable by the guest will never be executed 1570dd55812SRichard Henderson * by the host, but the host will need to be able to read them. 1580dd55812SRichard Henderson */ 1590dd55812SRichard Henderson static int target_to_host_prot(int prot) 1600dd55812SRichard Henderson { 1610dd55812SRichard Henderson return (prot & (PROT_READ | PROT_WRITE)) | 1620dd55812SRichard Henderson (prot & PROT_EXEC ? PROT_READ : 0); 1630dd55812SRichard Henderson } 1640dd55812SRichard Henderson 16553a5960aSpbrook /* NOTE: all the constants are the HOST ones, but addresses are target. */ 1669dba3ca5SRichard Henderson int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) 16754936004Sbellard { 1687bdc1accSRichard Henderson abi_ulong starts[3]; 1697bdc1accSRichard Henderson abi_ulong lens[3]; 1707bdc1accSRichard Henderson int prots[3]; 1717bdc1accSRichard Henderson abi_ulong host_start, host_last, last; 1727bdc1accSRichard Henderson int prot1, ret, page_flags, nranges; 17354936004Sbellard 1749dba3ca5SRichard Henderson trace_target_mprotect(start, len, target_prot); 17554936004Sbellard 1769dba3ca5SRichard Henderson if ((start & ~TARGET_PAGE_MASK) != 0) { 17778cf3390SMax Filippov return -TARGET_EINVAL; 1789dba3ca5SRichard Henderson } 1790dd55812SRichard Henderson page_flags = validate_prot_to_pageflags(target_prot); 1809dba3ca5SRichard Henderson if (!page_flags) { 1819dba3ca5SRichard Henderson return -TARGET_EINVAL; 1829dba3ca5SRichard Henderson } 1839dba3ca5SRichard Henderson if (len == 0) { 18454936004Sbellard return 0; 1859dba3ca5SRichard Henderson } 1867bdc1accSRichard Henderson len = TARGET_PAGE_ALIGN(len); 1877bdc1accSRichard Henderson if (!guest_range_valid_untagged(start, len)) { 1887bdc1accSRichard Henderson return -TARGET_ENOMEM; 1897bdc1accSRichard Henderson } 1907bdc1accSRichard Henderson 1917bdc1accSRichard Henderson last = start + len - 1; 1927bdc1accSRichard Henderson host_start = start & qemu_host_page_mask; 1937bdc1accSRichard Henderson host_last = HOST_PAGE_ALIGN(last) - 1; 1947bdc1accSRichard Henderson nranges = 0; 19554936004Sbellard 196c8a706feSpbrook mmap_lock(); 1977bdc1accSRichard Henderson 1987bdc1accSRichard Henderson if (host_last - host_start < qemu_host_page_size) { 1997bdc1accSRichard Henderson /* Single host page contains all guest pages: sum the prot. */ 2000dd55812SRichard Henderson prot1 = target_prot; 2017bdc1accSRichard Henderson for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 2027bdc1accSRichard Henderson prot1 |= page_get_flags(a); 20354936004Sbellard } 2047bdc1accSRichard Henderson for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 2057bdc1accSRichard Henderson prot1 |= page_get_flags(a + 1); 206d418c81eSbellard } 2077bdc1accSRichard Henderson starts[nranges] = host_start; 2087bdc1accSRichard Henderson lens[nranges] = qemu_host_page_size; 2097bdc1accSRichard Henderson prots[nranges] = prot1; 2107bdc1accSRichard Henderson nranges++; 2117bdc1accSRichard Henderson } else { 2127bdc1accSRichard Henderson if (host_start < start) { 2137bdc1accSRichard Henderson /* Host page contains more than one guest page: sum the prot. */ 2147bdc1accSRichard Henderson prot1 = target_prot; 2157bdc1accSRichard Henderson for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 2167bdc1accSRichard Henderson prot1 |= page_get_flags(a); 217d418c81eSbellard } 2187bdc1accSRichard Henderson /* If the resulting sum differs, create a new range. */ 2197bdc1accSRichard Henderson if (prot1 != target_prot) { 2207bdc1accSRichard Henderson starts[nranges] = host_start; 2217bdc1accSRichard Henderson lens[nranges] = qemu_host_page_size; 2227bdc1accSRichard Henderson prots[nranges] = prot1; 2237bdc1accSRichard Henderson nranges++; 22483fb7adfSbellard host_start += qemu_host_page_size; 22554936004Sbellard } 2267bdc1accSRichard Henderson } 2277bdc1accSRichard Henderson 2287bdc1accSRichard Henderson if (last < host_last) { 2297bdc1accSRichard Henderson /* Host page contains more than one guest page: sum the prot. */ 2300dd55812SRichard Henderson prot1 = target_prot; 2317bdc1accSRichard Henderson for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 2327bdc1accSRichard Henderson prot1 |= page_get_flags(a + 1); 23354936004Sbellard } 2347bdc1accSRichard Henderson /* If the resulting sum differs, create a new range. */ 2357bdc1accSRichard Henderson if (prot1 != target_prot) { 2367bdc1accSRichard Henderson host_last -= qemu_host_page_size; 2377bdc1accSRichard Henderson starts[nranges] = host_last + 1; 2387bdc1accSRichard Henderson lens[nranges] = qemu_host_page_size; 2397bdc1accSRichard Henderson prots[nranges] = prot1; 2407bdc1accSRichard Henderson nranges++; 2419dba3ca5SRichard Henderson } 24254936004Sbellard } 24354936004Sbellard 2447bdc1accSRichard Henderson /* Create a range for the middle, if any remains. */ 2457bdc1accSRichard Henderson if (host_start < host_last) { 2467bdc1accSRichard Henderson starts[nranges] = host_start; 2477bdc1accSRichard Henderson lens[nranges] = host_last - host_start + 1; 2487bdc1accSRichard Henderson prots[nranges] = target_prot; 2497bdc1accSRichard Henderson nranges++; 2507bdc1accSRichard Henderson } 2517bdc1accSRichard Henderson } 2527bdc1accSRichard Henderson 2537bdc1accSRichard Henderson for (int i = 0; i < nranges; ++i) { 2547bdc1accSRichard Henderson ret = mprotect(g2h_untagged(starts[i]), lens[i], 2557bdc1accSRichard Henderson target_to_host_prot(prots[i])); 2569dba3ca5SRichard Henderson if (ret != 0) { 257c8a706feSpbrook goto error; 25854936004Sbellard } 2599dba3ca5SRichard Henderson } 260aa98e2d8SIlya Leoshkevich 2617bdc1accSRichard Henderson page_set_flags(start, last, page_flags); 262aa98e2d8SIlya Leoshkevich ret = 0; 263aa98e2d8SIlya Leoshkevich 264c8a706feSpbrook error: 265c8a706feSpbrook mmap_unlock(); 266c8a706feSpbrook return ret; 26754936004Sbellard } 26854936004Sbellard 26954936004Sbellard /* map an incomplete host page */ 27099982bebSRichard Henderson static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, 27155baec0fSRichard Henderson int prot, int flags, int fd, off_t offset) 27254936004Sbellard { 27399982bebSRichard Henderson abi_ulong real_last; 27453a5960aSpbrook void *host_start; 27599982bebSRichard Henderson int prot_old, prot_new; 27699982bebSRichard Henderson int host_prot_old, host_prot_new; 27754936004Sbellard 27899982bebSRichard Henderson if (!(flags & MAP_ANONYMOUS) 27999982bebSRichard Henderson && (flags & MAP_TYPE) == MAP_SHARED 28099982bebSRichard Henderson && (prot & PROT_WRITE)) { 28199982bebSRichard Henderson /* 28299982bebSRichard Henderson * msync() won't work with the partial page, so we return an 28399982bebSRichard Henderson * error if write is possible while it is a shared mapping. 28499982bebSRichard Henderson */ 28599982bebSRichard Henderson errno = EINVAL; 28699982bebSRichard Henderson return false; 28799982bebSRichard Henderson } 28899982bebSRichard Henderson 28999982bebSRichard Henderson real_last = real_start + qemu_host_page_size - 1; 2903e8f1628SRichard Henderson host_start = g2h_untagged(real_start); 29154936004Sbellard 29299982bebSRichard Henderson /* Get the protection of the target pages outside the mapping. */ 29399982bebSRichard Henderson prot_old = 0; 29499982bebSRichard Henderson for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) { 29599982bebSRichard Henderson prot_old |= page_get_flags(a); 29654936004Sbellard } 29799982bebSRichard Henderson for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) { 29899982bebSRichard Henderson prot_old |= page_get_flags(a); 2992b730f79SRichard Henderson } 30054936004Sbellard 30199982bebSRichard Henderson if (prot_old == 0) { 30299982bebSRichard Henderson /* 30399982bebSRichard Henderson * Since !(prot_old & PAGE_VALID), there were no guest pages 30499982bebSRichard Henderson * outside of the fragment we need to map. Allocate a new host 30599982bebSRichard Henderson * page to cover, discarding whatever else may have been present. 30699982bebSRichard Henderson */ 3070dd55812SRichard Henderson void *p = mmap(host_start, qemu_host_page_size, 3080dd55812SRichard Henderson target_to_host_prot(prot), 30954936004Sbellard flags | MAP_ANONYMOUS, -1, 0); 310ddcdd8c4SAkihiko Odaki if (p != host_start) { 311ddcdd8c4SAkihiko Odaki if (p != MAP_FAILED) { 312ddcdd8c4SAkihiko Odaki munmap(p, qemu_host_page_size); 313ddcdd8c4SAkihiko Odaki errno = EEXIST; 314ddcdd8c4SAkihiko Odaki } 31599982bebSRichard Henderson return false; 3162b730f79SRichard Henderson } 31799982bebSRichard Henderson prot_old = prot; 31854936004Sbellard } 31999982bebSRichard Henderson prot_new = prot | prot_old; 32054936004Sbellard 32199982bebSRichard Henderson host_prot_old = target_to_host_prot(prot_old); 32299982bebSRichard Henderson host_prot_new = target_to_host_prot(prot_new); 32399982bebSRichard Henderson 32499982bebSRichard Henderson /* Adjust protection to be able to write. */ 32599982bebSRichard Henderson if (!(host_prot_old & PROT_WRITE)) { 32699982bebSRichard Henderson host_prot_old |= PROT_WRITE; 32799982bebSRichard Henderson mprotect(host_start, qemu_host_page_size, host_prot_old); 3282b730f79SRichard Henderson } 32954936004Sbellard 33099982bebSRichard Henderson /* Read or zero the new guest pages. */ 33199982bebSRichard Henderson if (flags & MAP_ANONYMOUS) { 33299982bebSRichard Henderson memset(g2h_untagged(start), 0, last - start + 1); 33354936004Sbellard } else { 33499982bebSRichard Henderson if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) { 33599982bebSRichard Henderson return false; 336e6deac9cSChen Gang } 33754936004Sbellard } 33899982bebSRichard Henderson 33999982bebSRichard Henderson /* Put final protection */ 34099982bebSRichard Henderson if (host_prot_new != host_prot_old) { 34199982bebSRichard Henderson mprotect(host_start, qemu_host_page_size, host_prot_new); 34299982bebSRichard Henderson } 34399982bebSRichard Henderson return true; 34454936004Sbellard } 34554936004Sbellard 346c8fb5cf9SRichard Henderson abi_ulong task_unmapped_base; 347da2b71faSRichard Henderson abi_ulong elf_et_dyn_base; 348c8fb5cf9SRichard Henderson abi_ulong mmap_next_start; 349a03e2d42Sbellard 3502b730f79SRichard Henderson /* 3512b730f79SRichard Henderson * Subroutine of mmap_find_vma, used when we have pre-allocated 3522b730f79SRichard Henderson * a chunk of guest address space. 3532b730f79SRichard Henderson */ 35430ab9ef2SRichard Henderson static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 35530ab9ef2SRichard Henderson abi_ulong align) 35668a1c816SPaul Brook { 3574c13048eSRichard Henderson target_ulong ret; 35868a1c816SPaul Brook 3594c13048eSRichard Henderson ret = page_find_range_empty(start, reserved_va, size, align); 3604c13048eSRichard Henderson if (ret == -1 && start > mmap_min_addr) { 3614c13048eSRichard Henderson /* Restart at the beginning of the address space. */ 3624c13048eSRichard Henderson ret = page_find_range_empty(mmap_min_addr, start - 1, size, align); 36368a1c816SPaul Brook } 36468a1c816SPaul Brook 3654c13048eSRichard Henderson return ret; 36630ab9ef2SRichard Henderson } 36768a1c816SPaul Brook 368fe3b4152SKirill A. Shutemov /* 369fe3b4152SKirill A. Shutemov * Find and reserve a free memory area of size 'size'. The search 370fe3b4152SKirill A. Shutemov * starts at 'start'. 371fe3b4152SKirill A. Shutemov * It must be called with mmap_lock() held. 372fe3b4152SKirill A. Shutemov * Return -1 if error. 373a03e2d42Sbellard */ 37430ab9ef2SRichard Henderson abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 375a03e2d42Sbellard { 37614f24e14SRichard Henderson void *ptr, *prev; 377fe3b4152SKirill A. Shutemov abi_ulong addr; 37814f24e14SRichard Henderson int wrapped, repeat; 379fe3b4152SKirill A. Shutemov 380443b7505SRichard Henderson align = MAX(align, qemu_host_page_size); 381443b7505SRichard Henderson 382fe3b4152SKirill A. Shutemov /* If 'start' == 0, then a default start address is used. */ 38314f24e14SRichard Henderson if (start == 0) { 384fe3b4152SKirill A. Shutemov start = mmap_next_start; 38514f24e14SRichard Henderson } else { 38614f24e14SRichard Henderson start &= qemu_host_page_mask; 38714f24e14SRichard Henderson } 38830ab9ef2SRichard Henderson start = ROUND_UP(start, align); 38914f24e14SRichard Henderson 39014f24e14SRichard Henderson size = HOST_PAGE_ALIGN(size); 391fe3b4152SKirill A. Shutemov 392b76f21a7SLaurent Vivier if (reserved_va) { 39330ab9ef2SRichard Henderson return mmap_find_vma_reserved(start, size, align); 39468a1c816SPaul Brook } 39568a1c816SPaul Brook 396a03e2d42Sbellard addr = start; 39714f24e14SRichard Henderson wrapped = repeat = 0; 39814f24e14SRichard Henderson prev = 0; 399fe3b4152SKirill A. Shutemov 40014f24e14SRichard Henderson for (;; prev = ptr) { 401fe3b4152SKirill A. Shutemov /* 402fe3b4152SKirill A. Shutemov * Reserve needed memory area to avoid a race. 403fe3b4152SKirill A. Shutemov * It should be discarded using: 404fe3b4152SKirill A. Shutemov * - mmap() with MAP_FIXED flag 405fe3b4152SKirill A. Shutemov * - mremap() with MREMAP_FIXED flag 406fe3b4152SKirill A. Shutemov * - shmat() with SHM_REMAP flag 407fe3b4152SKirill A. Shutemov */ 4083e8f1628SRichard Henderson ptr = mmap(g2h_untagged(addr), size, PROT_NONE, 409fe3b4152SKirill A. Shutemov MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 410fe3b4152SKirill A. Shutemov 411fe3b4152SKirill A. Shutemov /* ENOMEM, if host address space has no memory */ 41214f24e14SRichard Henderson if (ptr == MAP_FAILED) { 413a03e2d42Sbellard return (abi_ulong)-1; 414a03e2d42Sbellard } 415fe3b4152SKirill A. Shutemov 4162b730f79SRichard Henderson /* 4172b730f79SRichard Henderson * Count the number of sequential returns of the same address. 4182b730f79SRichard Henderson * This is used to modify the search algorithm below. 4192b730f79SRichard Henderson */ 42014f24e14SRichard Henderson repeat = (ptr == prev ? repeat + 1 : 0); 421fe3b4152SKirill A. Shutemov 42214f24e14SRichard Henderson if (h2g_valid(ptr + size - 1)) { 42314f24e14SRichard Henderson addr = h2g(ptr); 42414f24e14SRichard Henderson 42530ab9ef2SRichard Henderson if ((addr & (align - 1)) == 0) { 42614f24e14SRichard Henderson /* Success. */ 427c8fb5cf9SRichard Henderson if (start == mmap_next_start && addr >= task_unmapped_base) { 42814f24e14SRichard Henderson mmap_next_start = addr + size; 42914f24e14SRichard Henderson } 43014f24e14SRichard Henderson return addr; 43114f24e14SRichard Henderson } 43214f24e14SRichard Henderson 43314f24e14SRichard Henderson /* The address is not properly aligned for the target. */ 43414f24e14SRichard Henderson switch (repeat) { 43514f24e14SRichard Henderson case 0: 4362b730f79SRichard Henderson /* 4372b730f79SRichard Henderson * Assume the result that the kernel gave us is the 4382b730f79SRichard Henderson * first with enough free space, so start again at the 4392b730f79SRichard Henderson * next higher target page. 4402b730f79SRichard Henderson */ 44130ab9ef2SRichard Henderson addr = ROUND_UP(addr, align); 44214f24e14SRichard Henderson break; 44314f24e14SRichard Henderson case 1: 4442b730f79SRichard Henderson /* 4452b730f79SRichard Henderson * Sometimes the kernel decides to perform the allocation 4462b730f79SRichard Henderson * at the top end of memory instead. 4472b730f79SRichard Henderson */ 44830ab9ef2SRichard Henderson addr &= -align; 44914f24e14SRichard Henderson break; 45014f24e14SRichard Henderson case 2: 45114f24e14SRichard Henderson /* Start over at low memory. */ 45214f24e14SRichard Henderson addr = 0; 45314f24e14SRichard Henderson break; 45414f24e14SRichard Henderson default: 45514f24e14SRichard Henderson /* Fail. This unaligned block must the last. */ 45614f24e14SRichard Henderson addr = -1; 45714f24e14SRichard Henderson break; 45814f24e14SRichard Henderson } 45914f24e14SRichard Henderson } else { 4602b730f79SRichard Henderson /* 4612b730f79SRichard Henderson * Since the result the kernel gave didn't fit, start 4622b730f79SRichard Henderson * again at low memory. If any repetition, fail. 4632b730f79SRichard Henderson */ 46414f24e14SRichard Henderson addr = (repeat ? -1 : 0); 46514f24e14SRichard Henderson } 46614f24e14SRichard Henderson 46714f24e14SRichard Henderson /* Unmap and try again. */ 46814f24e14SRichard Henderson munmap(ptr, size); 46914f24e14SRichard Henderson 47014f24e14SRichard Henderson /* ENOMEM if we checked the whole of the target address space. */ 471d0b3e4f5SBlue Swirl if (addr == (abi_ulong)-1) { 47214f24e14SRichard Henderson return (abi_ulong)-1; 47314f24e14SRichard Henderson } else if (addr == 0) { 47414f24e14SRichard Henderson if (wrapped) { 47514f24e14SRichard Henderson return (abi_ulong)-1; 47614f24e14SRichard Henderson } 47714f24e14SRichard Henderson wrapped = 1; 4782b730f79SRichard Henderson /* 4792b730f79SRichard Henderson * Don't actually use 0 when wrapping, instead indicate 4802b730f79SRichard Henderson * that we'd truly like an allocation in low memory. 4812b730f79SRichard Henderson */ 48214f24e14SRichard Henderson addr = (mmap_min_addr > TARGET_PAGE_SIZE 48314f24e14SRichard Henderson ? TARGET_PAGE_ALIGN(mmap_min_addr) 48414f24e14SRichard Henderson : TARGET_PAGE_SIZE); 48514f24e14SRichard Henderson } else if (wrapped && addr >= start) { 48614f24e14SRichard Henderson return (abi_ulong)-1; 48714f24e14SRichard Henderson } 48814f24e14SRichard Henderson } 489a03e2d42Sbellard } 490a03e2d42Sbellard 49154936004Sbellard /* NOTE: all the constants are the HOST ones */ 4929dba3ca5SRichard Henderson abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, 49355baec0fSRichard Henderson int flags, int fd, off_t offset) 49454936004Sbellard { 495f9cd8f5eSRichard Henderson abi_ulong ret, last, real_start, real_last, retaddr, host_len; 496f9cd8f5eSRichard Henderson abi_ulong passthrough_start = -1, passthrough_last = 0; 4970dd55812SRichard Henderson int page_flags; 49855baec0fSRichard Henderson off_t host_offset; 49954936004Sbellard 500c8a706feSpbrook mmap_lock(); 5019dba3ca5SRichard Henderson trace_target_mmap(start, len, target_prot, flags, fd, offset); 50254936004Sbellard 50338138fabSAlex Bennée if (!len) { 50438138fabSAlex Bennée errno = EINVAL; 50538138fabSAlex Bennée goto fail; 50638138fabSAlex Bennée } 50738138fabSAlex Bennée 5080dd55812SRichard Henderson page_flags = validate_prot_to_pageflags(target_prot); 5099dba3ca5SRichard Henderson if (!page_flags) { 5109dba3ca5SRichard Henderson errno = EINVAL; 5119dba3ca5SRichard Henderson goto fail; 5129dba3ca5SRichard Henderson } 5139dba3ca5SRichard Henderson 51438138fabSAlex Bennée /* Also check for overflows... */ 51538138fabSAlex Bennée len = TARGET_PAGE_ALIGN(len); 51638138fabSAlex Bennée if (!len) { 51738138fabSAlex Bennée errno = ENOMEM; 51838138fabSAlex Bennée goto fail; 51938138fabSAlex Bennée } 52038138fabSAlex Bennée 521e89f07d3Spbrook if (offset & ~TARGET_PAGE_MASK) { 522e89f07d3Spbrook errno = EINVAL; 523c8a706feSpbrook goto fail; 524e89f07d3Spbrook } 52554936004Sbellard 526228168cbSRichard Henderson /* 527228168cbSRichard Henderson * If we're mapping shared memory, ensure we generate code for parallel 528228168cbSRichard Henderson * execution and flush old translations. This will work up to the level 529228168cbSRichard Henderson * supported by the host -- anything that requires EXCP_ATOMIC will not 530228168cbSRichard Henderson * be atomic with respect to an external process. 531228168cbSRichard Henderson */ 532228168cbSRichard Henderson if (flags & MAP_SHARED) { 533228168cbSRichard Henderson CPUState *cpu = thread_cpu; 534228168cbSRichard Henderson if (!(cpu->tcg_cflags & CF_PARALLEL)) { 535228168cbSRichard Henderson cpu->tcg_cflags |= CF_PARALLEL; 536228168cbSRichard Henderson tb_flush(cpu); 537228168cbSRichard Henderson } 538228168cbSRichard Henderson } 539228168cbSRichard Henderson 54053a5960aSpbrook real_start = start & qemu_host_page_mask; 541a5e7ee46SRichard Henderson host_offset = offset & qemu_host_page_mask; 542a5e7ee46SRichard Henderson 5432b730f79SRichard Henderson /* 5442b730f79SRichard Henderson * If the user is asking for the kernel to find a location, do that 5452b730f79SRichard Henderson * before we truncate the length for mapping files below. 5462b730f79SRichard Henderson */ 54703798605SRichard Henderson if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 548a5e7ee46SRichard Henderson host_len = len + offset - host_offset; 549a5e7ee46SRichard Henderson host_len = HOST_PAGE_ALIGN(host_len); 55030ab9ef2SRichard Henderson start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); 551a5e7ee46SRichard Henderson if (start == (abi_ulong)-1) { 552a5e7ee46SRichard Henderson errno = ENOMEM; 553a5e7ee46SRichard Henderson goto fail; 554a5e7ee46SRichard Henderson } 555a5e7ee46SRichard Henderson } 55654936004Sbellard 5572b730f79SRichard Henderson /* 5582b730f79SRichard Henderson * When mapping files into a memory area larger than the file, accesses 5592b730f79SRichard Henderson * to pages beyond the file size will cause a SIGBUS. 5602b730f79SRichard Henderson * 5612b730f79SRichard Henderson * For example, if mmaping a file of 100 bytes on a host with 4K pages 5622b730f79SRichard Henderson * emulating a target with 8K pages, the target expects to be able to 5632b730f79SRichard Henderson * access the first 8K. But the host will trap us on any access beyond 5642b730f79SRichard Henderson * 4K. 5652b730f79SRichard Henderson * 5662b730f79SRichard Henderson * When emulating a target with a larger page-size than the hosts, we 5672b730f79SRichard Henderson * may need to truncate file maps at EOF and add extra anonymous pages 5682b730f79SRichard Henderson * up to the targets page boundary. 5692b730f79SRichard Henderson */ 5708e3b0cbbSMarc-André Lureau if ((qemu_real_host_page_size() < qemu_host_page_size) && 57135f2fd04SMarc-André Lureau !(flags & MAP_ANONYMOUS)) { 57254c5a2aeSedgar_igl struct stat sb; 57354c5a2aeSedgar_igl 5742b730f79SRichard Henderson if (fstat(fd, &sb) == -1) { 57554c5a2aeSedgar_igl goto fail; 5762b730f79SRichard Henderson } 57754c5a2aeSedgar_igl 57854c5a2aeSedgar_igl /* Are we trying to create a map beyond EOF?. */ 57954c5a2aeSedgar_igl if (offset + len > sb.st_size) { 5802b730f79SRichard Henderson /* 5812b730f79SRichard Henderson * If so, truncate the file map at eof aligned with 5822b730f79SRichard Henderson * the hosts real pagesize. Additional anonymous maps 5832b730f79SRichard Henderson * will be created beyond EOF. 5842b730f79SRichard Henderson */ 5850c2d70c4SPaolo Bonzini len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); 58654c5a2aeSedgar_igl } 58754c5a2aeSedgar_igl } 58854c5a2aeSedgar_igl 58903798605SRichard Henderson if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 59055baec0fSRichard Henderson uintptr_t host_start; 5910dd55812SRichard Henderson int host_prot; 592a03e2d42Sbellard void *p; 593a5e7ee46SRichard Henderson 59454936004Sbellard host_len = len + offset - host_offset; 595a03e2d42Sbellard host_len = HOST_PAGE_ALIGN(host_len); 5960dd55812SRichard Henderson host_prot = target_to_host_prot(target_prot); 597a5e7ee46SRichard Henderson 5982b730f79SRichard Henderson /* 5992b730f79SRichard Henderson * Note: we prefer to control the mapping address. It is 6002b730f79SRichard Henderson * especially important if qemu_host_page_size > 6012b730f79SRichard Henderson * qemu_real_host_page_size. 6022b730f79SRichard Henderson */ 6033e8f1628SRichard Henderson p = mmap(g2h_untagged(start), host_len, host_prot, 604a5e7ee46SRichard Henderson flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); 6059dba3ca5SRichard Henderson if (p == MAP_FAILED) { 606c8a706feSpbrook goto fail; 6079dba3ca5SRichard Henderson } 60854936004Sbellard /* update start so that it points to the file position at 'offset' */ 60955baec0fSRichard Henderson host_start = (uintptr_t)p; 61054c5a2aeSedgar_igl if (!(flags & MAP_ANONYMOUS)) { 6113e8f1628SRichard Henderson p = mmap(g2h_untagged(start), len, host_prot, 61254c5a2aeSedgar_igl flags | MAP_FIXED, fd, host_offset); 6138384274eSJürg Billeter if (p == MAP_FAILED) { 6143e8f1628SRichard Henderson munmap(g2h_untagged(start), host_len); 6158384274eSJürg Billeter goto fail; 6168384274eSJürg Billeter } 61753a5960aSpbrook host_start += offset - host_offset; 61854c5a2aeSedgar_igl } 61953a5960aSpbrook start = h2g(host_start); 620f9cd8f5eSRichard Henderson last = start + len - 1; 621f93b7695SIlya Leoshkevich passthrough_start = start; 622f9cd8f5eSRichard Henderson passthrough_last = last; 623a03e2d42Sbellard } else { 624e89f07d3Spbrook if (start & ~TARGET_PAGE_MASK) { 625e89f07d3Spbrook errno = EINVAL; 626c8a706feSpbrook goto fail; 627e89f07d3Spbrook } 628f9cd8f5eSRichard Henderson last = start + len - 1; 629f9cd8f5eSRichard Henderson real_last = HOST_PAGE_ALIGN(last) - 1; 63054936004Sbellard 63145bc1f52Saurel32 /* 63245bc1f52Saurel32 * Test if requested memory area fits target address space 63345bc1f52Saurel32 * It can fail only on 64-bit host with 32-bit target. 63445bc1f52Saurel32 * On any other target/host host mmap() handles this error correctly. 63545bc1f52Saurel32 */ 636f9cd8f5eSRichard Henderson if (last < start || !guest_range_valid_untagged(start, len)) { 637ebf9a363SMax Filippov errno = ENOMEM; 63845bc1f52Saurel32 goto fail; 63945bc1f52Saurel32 } 64045bc1f52Saurel32 641c3dd50daSAkihiko Odaki if (flags & MAP_FIXED_NOREPLACE) { 64203798605SRichard Henderson /* Validate that the chosen range is empty. */ 643c3dd50daSAkihiko Odaki if (!page_check_range_empty(start, last)) { 64403798605SRichard Henderson errno = EEXIST; 64503798605SRichard Henderson goto fail; 64603798605SRichard Henderson } 64703798605SRichard Henderson 6482b730f79SRichard Henderson /* 649c3dd50daSAkihiko Odaki * With reserved_va, the entire address space is mmaped in the 650c3dd50daSAkihiko Odaki * host to ensure it isn't accidentally used for something else. 651c3dd50daSAkihiko Odaki * We have just checked that the guest address is not mapped 652c3dd50daSAkihiko Odaki * within the guest, but need to replace the host reservation. 653c3dd50daSAkihiko Odaki * 654c3dd50daSAkihiko Odaki * Without reserved_va, despite the guest address check above, 655c3dd50daSAkihiko Odaki * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite 656c3dd50daSAkihiko Odaki * any host address mappings. 657c3dd50daSAkihiko Odaki */ 658c3dd50daSAkihiko Odaki if (reserved_va) { 659c3dd50daSAkihiko Odaki flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; 660c3dd50daSAkihiko Odaki } 661c3dd50daSAkihiko Odaki } 662c3dd50daSAkihiko Odaki 663c3dd50daSAkihiko Odaki /* 6642b730f79SRichard Henderson * worst case: we cannot map the file because the offset is not 6652b730f79SRichard Henderson * aligned, so we read it 6662b730f79SRichard Henderson */ 66754936004Sbellard if (!(flags & MAP_ANONYMOUS) && 66883fb7adfSbellard (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { 6692b730f79SRichard Henderson /* 6702b730f79SRichard Henderson * msync() won't work here, so we return an error if write is 6712b730f79SRichard Henderson * possible while it is a shared mapping 6722b730f79SRichard Henderson */ 6730dd55812SRichard Henderson if ((flags & MAP_TYPE) == MAP_SHARED 6740dd55812SRichard Henderson && (target_prot & PROT_WRITE)) { 675e89f07d3Spbrook errno = EINVAL; 676c8a706feSpbrook goto fail; 677e89f07d3Spbrook } 6789dba3ca5SRichard Henderson retaddr = target_mmap(start, len, target_prot | PROT_WRITE, 67903798605SRichard Henderson (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) 68003798605SRichard Henderson | MAP_PRIVATE | MAP_ANONYMOUS, 68154936004Sbellard -1, 0); 6822b730f79SRichard Henderson if (retaddr == -1) { 683c8a706feSpbrook goto fail; 6842b730f79SRichard Henderson } 6852b730f79SRichard Henderson if (pread(fd, g2h_untagged(start), len, offset) == -1) { 686fb7e378cSKirill A. Shutemov goto fail; 6872b730f79SRichard Henderson } 6880dd55812SRichard Henderson if (!(target_prot & PROT_WRITE)) { 6899dba3ca5SRichard Henderson ret = target_mprotect(start, len, target_prot); 69086abac06SPaolo Bonzini assert(ret == 0); 69154936004Sbellard } 69254936004Sbellard goto the_end; 69354936004Sbellard } 69454936004Sbellard 69554936004Sbellard /* handle the start of the mapping */ 69653a5960aSpbrook if (start > real_start) { 697f9cd8f5eSRichard Henderson if (real_last == real_start + qemu_host_page_size - 1) { 69854936004Sbellard /* one single host page */ 699f9cd8f5eSRichard Henderson if (!mmap_frag(real_start, start, last, 70099982bebSRichard Henderson target_prot, flags, fd, offset)) { 701c8a706feSpbrook goto fail; 7022b730f79SRichard Henderson } 70354936004Sbellard goto the_end1; 70454936004Sbellard } 70599982bebSRichard Henderson if (!mmap_frag(real_start, start, 70699982bebSRichard Henderson real_start + qemu_host_page_size - 1, 70799982bebSRichard Henderson target_prot, flags, fd, offset)) { 708c8a706feSpbrook goto fail; 7092b730f79SRichard Henderson } 71053a5960aSpbrook real_start += qemu_host_page_size; 71154936004Sbellard } 71254936004Sbellard /* handle the end of the mapping */ 713f9cd8f5eSRichard Henderson if (last < real_last) { 714f9cd8f5eSRichard Henderson abi_ulong real_page = real_last - qemu_host_page_size + 1; 715f9cd8f5eSRichard Henderson if (!mmap_frag(real_page, real_page, last, 7160dd55812SRichard Henderson target_prot, flags, fd, 717f9cd8f5eSRichard Henderson offset + real_page - start)) { 718c8a706feSpbrook goto fail; 7192b730f79SRichard Henderson } 720f9cd8f5eSRichard Henderson real_last -= qemu_host_page_size; 72154936004Sbellard } 72254936004Sbellard 72354936004Sbellard /* map the middle (easier) */ 724f9cd8f5eSRichard Henderson if (real_start < real_last) { 725ddcdd8c4SAkihiko Odaki void *p, *want_p; 72655baec0fSRichard Henderson off_t offset1; 727ddcdd8c4SAkihiko Odaki size_t len1; 72855baec0fSRichard Henderson 7292b730f79SRichard Henderson if (flags & MAP_ANONYMOUS) { 7304a585ccbSbellard offset1 = 0; 7312b730f79SRichard Henderson } else { 73253a5960aSpbrook offset1 = offset + real_start - start; 7332b730f79SRichard Henderson } 734ddcdd8c4SAkihiko Odaki len1 = real_last - real_start + 1; 735ddcdd8c4SAkihiko Odaki want_p = g2h_untagged(real_start); 736ddcdd8c4SAkihiko Odaki 737ddcdd8c4SAkihiko Odaki p = mmap(want_p, len1, target_to_host_prot(target_prot), 738ddcdd8c4SAkihiko Odaki flags, fd, offset1); 739ddcdd8c4SAkihiko Odaki if (p != want_p) { 740ddcdd8c4SAkihiko Odaki if (p != MAP_FAILED) { 741ddcdd8c4SAkihiko Odaki munmap(p, len1); 742ddcdd8c4SAkihiko Odaki errno = EEXIST; 743ddcdd8c4SAkihiko Odaki } 744c8a706feSpbrook goto fail; 7452b730f79SRichard Henderson } 746f93b7695SIlya Leoshkevich passthrough_start = real_start; 747f9cd8f5eSRichard Henderson passthrough_last = real_last; 74854936004Sbellard } 749a03e2d42Sbellard } 75054936004Sbellard the_end1: 75126bab757SRichard Henderson if (flags & MAP_ANONYMOUS) { 75226bab757SRichard Henderson page_flags |= PAGE_ANON; 75326bab757SRichard Henderson } 754d9c58585SRichard Henderson page_flags |= PAGE_RESET; 755f9cd8f5eSRichard Henderson if (passthrough_start > passthrough_last) { 756f9cd8f5eSRichard Henderson page_set_flags(start, last, page_flags); 757f93b7695SIlya Leoshkevich } else { 758f93b7695SIlya Leoshkevich if (start < passthrough_start) { 75949840a4aSRichard Henderson page_set_flags(start, passthrough_start - 1, page_flags); 760f93b7695SIlya Leoshkevich } 761f9cd8f5eSRichard Henderson page_set_flags(passthrough_start, passthrough_last, 762f93b7695SIlya Leoshkevich page_flags | PAGE_PASSTHROUGH); 763f9cd8f5eSRichard Henderson if (passthrough_last < last) { 764f9cd8f5eSRichard Henderson page_set_flags(passthrough_last + 1, last, page_flags); 765f93b7695SIlya Leoshkevich } 766f93b7695SIlya Leoshkevich } 767044e95c8SRichard Henderson shm_region_rm_complete(start, last); 76854936004Sbellard the_end: 769d0e165aeSAlex Bennée trace_target_mmap_complete(start); 77010d0d505SAlex Bennée if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 77193756fdcSRichard Henderson FILE *f = qemu_log_trylock(); 77293756fdcSRichard Henderson if (f) { 77393756fdcSRichard Henderson fprintf(f, "page layout changed following mmap\n"); 77493756fdcSRichard Henderson page_dump(f); 77593756fdcSRichard Henderson qemu_log_unlock(f); 77693756fdcSRichard Henderson } 77710d0d505SAlex Bennée } 778c8a706feSpbrook mmap_unlock(); 77954936004Sbellard return start; 780c8a706feSpbrook fail: 781c8a706feSpbrook mmap_unlock(); 782c8a706feSpbrook return -1; 78354936004Sbellard } 78454936004Sbellard 785912ff698SRichard Henderson static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) 78668a1c816SPaul Brook { 78768a1c816SPaul Brook abi_ulong real_start; 788260561d8SRichard Henderson abi_ulong real_last; 789260561d8SRichard Henderson abi_ulong real_len; 790260561d8SRichard Henderson abi_ulong last; 791260561d8SRichard Henderson abi_ulong a; 792558a4411SRichard Henderson void *host_start; 79368a1c816SPaul Brook int prot; 79468a1c816SPaul Brook 795260561d8SRichard Henderson last = start + len - 1; 79668a1c816SPaul Brook real_start = start & qemu_host_page_mask; 797260561d8SRichard Henderson real_last = HOST_PAGE_ALIGN(last) - 1; 798260561d8SRichard Henderson 799260561d8SRichard Henderson /* 800260561d8SRichard Henderson * If guest pages remain on the first or last host pages, 801260561d8SRichard Henderson * adjust the deallocation to retain those guest pages. 802260561d8SRichard Henderson * The single page special case is required for the last page, 803260561d8SRichard Henderson * lest real_start overflow to zero. 804260561d8SRichard Henderson */ 805260561d8SRichard Henderson if (real_last - real_start < qemu_host_page_size) { 80668a1c816SPaul Brook prot = 0; 807260561d8SRichard Henderson for (a = real_start; a < start; a += TARGET_PAGE_SIZE) { 808260561d8SRichard Henderson prot |= page_get_flags(a); 80968a1c816SPaul Brook } 810260561d8SRichard Henderson for (a = last; a < real_last; a += TARGET_PAGE_SIZE) { 811260561d8SRichard Henderson prot |= page_get_flags(a + 1); 81268a1c816SPaul Brook } 813260561d8SRichard Henderson if (prot != 0) { 814912ff698SRichard Henderson return 0; 815260561d8SRichard Henderson } 816260561d8SRichard Henderson } else { 817260561d8SRichard Henderson for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) { 818260561d8SRichard Henderson prot |= page_get_flags(a); 81968a1c816SPaul Brook } 8202b730f79SRichard Henderson if (prot != 0) { 82168a1c816SPaul Brook real_start += qemu_host_page_size; 82268a1c816SPaul Brook } 823260561d8SRichard Henderson 824260561d8SRichard Henderson for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) { 825260561d8SRichard Henderson prot |= page_get_flags(a + 1); 82668a1c816SPaul Brook } 8272b730f79SRichard Henderson if (prot != 0) { 828260561d8SRichard Henderson real_last -= qemu_host_page_size; 829260561d8SRichard Henderson } 830260561d8SRichard Henderson 831260561d8SRichard Henderson if (real_last < real_start) { 832912ff698SRichard Henderson return 0; 83368a1c816SPaul Brook } 8342b730f79SRichard Henderson } 835260561d8SRichard Henderson 836260561d8SRichard Henderson real_len = real_last - real_start + 1; 837260561d8SRichard Henderson host_start = g2h_untagged(real_start); 838260561d8SRichard Henderson 839558a4411SRichard Henderson if (reserved_va) { 840558a4411SRichard Henderson void *ptr = mmap(host_start, real_len, PROT_NONE, 841558a4411SRichard Henderson MAP_FIXED | MAP_ANONYMOUS 842558a4411SRichard Henderson | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 843912ff698SRichard Henderson return ptr == host_start ? 0 : -1; 844558a4411SRichard Henderson } 845912ff698SRichard Henderson return munmap(host_start, real_len); 84668a1c816SPaul Brook } 84768a1c816SPaul Brook 848992f48a0Sblueswir1 int target_munmap(abi_ulong start, abi_ulong len) 84954936004Sbellard { 850912ff698SRichard Henderson int ret; 851912ff698SRichard Henderson 852b7b18d26SAlex Bennée trace_target_munmap(start, len); 853b7b18d26SAlex Bennée 8542b730f79SRichard Henderson if (start & ~TARGET_PAGE_MASK) { 855912ff698SRichard Henderson errno = EINVAL; 856912ff698SRichard Henderson return -1; 8572b730f79SRichard Henderson } 85854936004Sbellard len = TARGET_PAGE_ALIGN(len); 85946b12f46SRichard Henderson if (len == 0 || !guest_range_valid_untagged(start, len)) { 860912ff698SRichard Henderson errno = EINVAL; 861912ff698SRichard Henderson return -1; 862ebf9a363SMax Filippov } 863ebf9a363SMax Filippov 864c8a706feSpbrook mmap_lock(); 865912ff698SRichard Henderson ret = mmap_reserve_or_unmap(start, len); 866912ff698SRichard Henderson if (likely(ret == 0)) { 86749840a4aSRichard Henderson page_set_flags(start, start + len - 1, 0); 868044e95c8SRichard Henderson shm_region_rm_complete(start, start + len - 1); 869912ff698SRichard Henderson } 870c8a706feSpbrook mmap_unlock(); 871d7b0c5d0SRichard Henderson 872912ff698SRichard Henderson return ret; 87354936004Sbellard } 87454936004Sbellard 875992f48a0Sblueswir1 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 876992f48a0Sblueswir1 abi_ulong new_size, unsigned long flags, 877992f48a0Sblueswir1 abi_ulong new_addr) 87854936004Sbellard { 87954936004Sbellard int prot; 880f19412a2Saurel32 void *host_addr; 88154936004Sbellard 88246b12f46SRichard Henderson if (!guest_range_valid_untagged(old_addr, old_size) || 883ebf9a363SMax Filippov ((flags & MREMAP_FIXED) && 88446b12f46SRichard Henderson !guest_range_valid_untagged(new_addr, new_size)) || 885ccc5ccc1SRichard Purdie ((flags & MREMAP_MAYMOVE) == 0 && 88646b12f46SRichard Henderson !guest_range_valid_untagged(old_addr, new_size))) { 887ebf9a363SMax Filippov errno = ENOMEM; 888ebf9a363SMax Filippov return -1; 889ebf9a363SMax Filippov } 890ebf9a363SMax Filippov 891c8a706feSpbrook mmap_lock(); 892f19412a2Saurel32 89368a1c816SPaul Brook if (flags & MREMAP_FIXED) { 8943e8f1628SRichard Henderson host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 8953e8f1628SRichard Henderson flags, g2h_untagged(new_addr)); 89668a1c816SPaul Brook 897b76f21a7SLaurent Vivier if (reserved_va && host_addr != MAP_FAILED) { 8982b730f79SRichard Henderson /* 8992b730f79SRichard Henderson * If new and old addresses overlap then the above mremap will 9002b730f79SRichard Henderson * already have failed with EINVAL. 9012b730f79SRichard Henderson */ 902558a4411SRichard Henderson mmap_reserve_or_unmap(old_addr, old_size); 90368a1c816SPaul Brook } 90468a1c816SPaul Brook } else if (flags & MREMAP_MAYMOVE) { 905f19412a2Saurel32 abi_ulong mmap_start; 906f19412a2Saurel32 90730ab9ef2SRichard Henderson mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 908f19412a2Saurel32 909f19412a2Saurel32 if (mmap_start == -1) { 910f19412a2Saurel32 errno = ENOMEM; 911f19412a2Saurel32 host_addr = MAP_FAILED; 91268a1c816SPaul Brook } else { 9133e8f1628SRichard Henderson host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 9143e8f1628SRichard Henderson flags | MREMAP_FIXED, 9153e8f1628SRichard Henderson g2h_untagged(mmap_start)); 916b76f21a7SLaurent Vivier if (reserved_va) { 917558a4411SRichard Henderson mmap_reserve_or_unmap(old_addr, old_size); 91868a1c816SPaul Brook } 919c65ffe6dSamateur } 9203af72a4dSblueswir1 } else { 921ea800033SLaurent Vivier int page_flags = 0; 922b76f21a7SLaurent Vivier if (reserved_va && old_size < new_size) { 92368a1c816SPaul Brook abi_ulong addr; 92468a1c816SPaul Brook for (addr = old_addr + old_size; 92568a1c816SPaul Brook addr < old_addr + new_size; 92668a1c816SPaul Brook addr++) { 927ea800033SLaurent Vivier page_flags |= page_get_flags(addr); 92868a1c816SPaul Brook } 92968a1c816SPaul Brook } 930ea800033SLaurent Vivier if (page_flags == 0) { 9313e8f1628SRichard Henderson host_addr = mremap(g2h_untagged(old_addr), 9323e8f1628SRichard Henderson old_size, new_size, flags); 93356d19084STobias Koch 93456d19084STobias Koch if (host_addr != MAP_FAILED) { 93556d19084STobias Koch /* Check if address fits target address space */ 93646b12f46SRichard Henderson if (!guest_range_valid_untagged(h2g(host_addr), new_size)) { 93756d19084STobias Koch /* Revert mremap() changes */ 9383e8f1628SRichard Henderson host_addr = mremap(g2h_untagged(old_addr), 9393e8f1628SRichard Henderson new_size, old_size, flags); 94068a1c816SPaul Brook errno = ENOMEM; 94168a1c816SPaul Brook host_addr = MAP_FAILED; 94256d19084STobias Koch } else if (reserved_va && old_size > new_size) { 943558a4411SRichard Henderson mmap_reserve_or_unmap(old_addr + old_size, 944558a4411SRichard Henderson old_size - new_size); 94568a1c816SPaul Brook } 94656d19084STobias Koch } 94756d19084STobias Koch } else { 948f19412a2Saurel32 errno = ENOMEM; 949f19412a2Saurel32 host_addr = MAP_FAILED; 950f19412a2Saurel32 } 951f19412a2Saurel32 } 952f19412a2Saurel32 953f19412a2Saurel32 if (host_addr == MAP_FAILED) { 954c8a706feSpbrook new_addr = -1; 955c8a706feSpbrook } else { 956a5b85f79Sths new_addr = h2g(host_addr); 95754936004Sbellard prot = page_get_flags(old_addr); 95849840a4aSRichard Henderson page_set_flags(old_addr, old_addr + old_size - 1, 0); 959044e95c8SRichard Henderson shm_region_rm_complete(old_addr, old_addr + old_size - 1); 96049840a4aSRichard Henderson page_set_flags(new_addr, new_addr + new_size - 1, 961d9c58585SRichard Henderson prot | PAGE_VALID | PAGE_RESET); 962044e95c8SRichard Henderson shm_region_rm_complete(new_addr, new_addr + new_size - 1); 963c8a706feSpbrook } 964c8a706feSpbrook mmap_unlock(); 96554936004Sbellard return new_addr; 96654936004Sbellard } 967892a4f6aSIlya Leoshkevich 968892a4f6aSIlya Leoshkevich abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) 969892a4f6aSIlya Leoshkevich { 970e230ec09SRichard Henderson abi_ulong len; 971892a4f6aSIlya Leoshkevich int ret = 0; 972892a4f6aSIlya Leoshkevich 973892a4f6aSIlya Leoshkevich if (start & ~TARGET_PAGE_MASK) { 974892a4f6aSIlya Leoshkevich return -TARGET_EINVAL; 975892a4f6aSIlya Leoshkevich } 976e230ec09SRichard Henderson if (len_in == 0) { 977892a4f6aSIlya Leoshkevich return 0; 978892a4f6aSIlya Leoshkevich } 979e230ec09SRichard Henderson len = TARGET_PAGE_ALIGN(len_in); 980e230ec09SRichard Henderson if (len == 0 || !guest_range_valid_untagged(start, len)) { 981892a4f6aSIlya Leoshkevich return -TARGET_EINVAL; 982892a4f6aSIlya Leoshkevich } 983892a4f6aSIlya Leoshkevich 9844530deb1SHelge Deller /* Translate for some architectures which have different MADV_xxx values */ 9854530deb1SHelge Deller switch (advice) { 9864530deb1SHelge Deller case TARGET_MADV_DONTNEED: /* alpha */ 9874530deb1SHelge Deller advice = MADV_DONTNEED; 9884530deb1SHelge Deller break; 9894530deb1SHelge Deller case TARGET_MADV_WIPEONFORK: /* parisc */ 9904530deb1SHelge Deller advice = MADV_WIPEONFORK; 9914530deb1SHelge Deller break; 9924530deb1SHelge Deller case TARGET_MADV_KEEPONFORK: /* parisc */ 9934530deb1SHelge Deller advice = MADV_KEEPONFORK; 9944530deb1SHelge Deller break; 9954530deb1SHelge Deller /* we do not care about the other MADV_xxx values yet */ 9964530deb1SHelge Deller } 9974530deb1SHelge Deller 998892a4f6aSIlya Leoshkevich /* 9994530deb1SHelge Deller * Most advice values are hints, so ignoring and returning success is ok. 1000892a4f6aSIlya Leoshkevich * 10014530deb1SHelge Deller * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and 10024530deb1SHelge Deller * MADV_KEEPONFORK are not hints and need to be emulated. 1003892a4f6aSIlya Leoshkevich * 10044530deb1SHelge Deller * A straight passthrough for those may not be safe because qemu sometimes 10054530deb1SHelge Deller * turns private file-backed mappings into anonymous mappings. 1006ecb796dbSRichard Henderson * If all guest pages have PAGE_PASSTHROUGH set, mappings have the 1007ecb796dbSRichard Henderson * same semantics for the host as for the guest. 10084530deb1SHelge Deller * 10094530deb1SHelge Deller * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and 10104530deb1SHelge Deller * return failure if not. 10114530deb1SHelge Deller * 10124530deb1SHelge Deller * MADV_DONTNEED is passed through as well, if possible. 10134530deb1SHelge Deller * If passthrough isn't possible, we nevertheless (wrongly!) return 10144530deb1SHelge Deller * success, which is broken but some userspace programs fail to work 10154530deb1SHelge Deller * otherwise. Completely implementing such emulation is quite complicated 10164530deb1SHelge Deller * though. 1017892a4f6aSIlya Leoshkevich */ 1018892a4f6aSIlya Leoshkevich mmap_lock(); 10194530deb1SHelge Deller switch (advice) { 10204530deb1SHelge Deller case MADV_WIPEONFORK: 10214530deb1SHelge Deller case MADV_KEEPONFORK: 10224530deb1SHelge Deller ret = -EINVAL; 10234530deb1SHelge Deller /* fall through */ 10244530deb1SHelge Deller case MADV_DONTNEED: 1025ecb796dbSRichard Henderson if (page_check_range(start, len, PAGE_PASSTHROUGH)) { 10264530deb1SHelge Deller ret = get_errno(madvise(g2h_untagged(start), len, advice)); 10274530deb1SHelge Deller if ((advice == MADV_DONTNEED) && (ret == 0)) { 102810310cbdSRichard Henderson page_reset_target_data(start, start + len - 1); 1029dbbf8975SVitaly Buka } 1030892a4f6aSIlya Leoshkevich } 10314530deb1SHelge Deller } 1032892a4f6aSIlya Leoshkevich mmap_unlock(); 1033892a4f6aSIlya Leoshkevich 1034892a4f6aSIlya Leoshkevich return ret; 1035892a4f6aSIlya Leoshkevich } 1036225a206cSRichard Henderson 1037225a206cSRichard Henderson #ifndef TARGET_FORCE_SHMLBA 1038225a206cSRichard Henderson /* 1039225a206cSRichard Henderson * For most architectures, SHMLBA is the same as the page size; 1040225a206cSRichard Henderson * some architectures have larger values, in which case they should 1041225a206cSRichard Henderson * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function. 1042225a206cSRichard Henderson * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA 1043225a206cSRichard Henderson * and defining its own value for SHMLBA. 1044225a206cSRichard Henderson * 1045225a206cSRichard Henderson * The kernel also permits SHMLBA to be set by the architecture to a 1046225a206cSRichard Henderson * value larger than the page size without setting __ARCH_FORCE_SHMLBA; 1047225a206cSRichard Henderson * this means that addresses are rounded to the large size if 1048225a206cSRichard Henderson * SHM_RND is set but addresses not aligned to that size are not rejected 1049225a206cSRichard Henderson * as long as they are at least page-aligned. Since the only architecture 1050225a206cSRichard Henderson * which uses this is ia64 this code doesn't provide for that oddity. 1051225a206cSRichard Henderson */ 1052225a206cSRichard Henderson static inline abi_ulong target_shmlba(CPUArchState *cpu_env) 1053225a206cSRichard Henderson { 1054225a206cSRichard Henderson return TARGET_PAGE_SIZE; 1055225a206cSRichard Henderson } 1056225a206cSRichard Henderson #endif 1057225a206cSRichard Henderson 1058225a206cSRichard Henderson abi_ulong target_shmat(CPUArchState *cpu_env, int shmid, 1059225a206cSRichard Henderson abi_ulong shmaddr, int shmflg) 1060225a206cSRichard Henderson { 1061225a206cSRichard Henderson CPUState *cpu = env_cpu(cpu_env); 1062225a206cSRichard Henderson abi_ulong raddr; 1063225a206cSRichard Henderson struct shmid_ds shm_info; 106469fa2708SRichard Henderson int ret; 1065225a206cSRichard Henderson abi_ulong shmlba; 1066225a206cSRichard Henderson 1067225a206cSRichard Henderson /* shmat pointers are always untagged */ 1068225a206cSRichard Henderson 1069225a206cSRichard Henderson /* find out the length of the shared memory segment */ 1070225a206cSRichard Henderson ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info)); 1071225a206cSRichard Henderson if (is_error(ret)) { 1072225a206cSRichard Henderson /* can't get length, bail out */ 1073225a206cSRichard Henderson return ret; 1074225a206cSRichard Henderson } 1075225a206cSRichard Henderson 1076225a206cSRichard Henderson shmlba = target_shmlba(cpu_env); 1077225a206cSRichard Henderson 1078225a206cSRichard Henderson if (shmaddr & (shmlba - 1)) { 1079225a206cSRichard Henderson if (shmflg & SHM_RND) { 1080225a206cSRichard Henderson shmaddr &= ~(shmlba - 1); 1081225a206cSRichard Henderson } else { 1082225a206cSRichard Henderson return -TARGET_EINVAL; 1083225a206cSRichard Henderson } 1084225a206cSRichard Henderson } 1085225a206cSRichard Henderson if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) { 1086225a206cSRichard Henderson return -TARGET_EINVAL; 1087225a206cSRichard Henderson } 1088225a206cSRichard Henderson 108969fa2708SRichard Henderson WITH_MMAP_LOCK_GUARD() { 109069fa2708SRichard Henderson void *host_raddr; 1091044e95c8SRichard Henderson abi_ulong last; 109269fa2708SRichard Henderson 109369fa2708SRichard Henderson if (shmaddr) { 109469fa2708SRichard Henderson host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg); 109569fa2708SRichard Henderson } else { 109669fa2708SRichard Henderson abi_ulong mmap_start; 109769fa2708SRichard Henderson 109869fa2708SRichard Henderson /* In order to use the host shmat, we need to honor host SHMLBA. */ 109969fa2708SRichard Henderson mmap_start = mmap_find_vma(0, shm_info.shm_segsz, 110069fa2708SRichard Henderson MAX(SHMLBA, shmlba)); 110169fa2708SRichard Henderson 110269fa2708SRichard Henderson if (mmap_start == -1) { 110369fa2708SRichard Henderson return -TARGET_ENOMEM; 110469fa2708SRichard Henderson } 110569fa2708SRichard Henderson host_raddr = shmat(shmid, g2h_untagged(mmap_start), 110669fa2708SRichard Henderson shmflg | SHM_REMAP); 110769fa2708SRichard Henderson } 110869fa2708SRichard Henderson 110969fa2708SRichard Henderson if (host_raddr == (void *)-1) { 111069fa2708SRichard Henderson return get_errno(-1); 111169fa2708SRichard Henderson } 111269fa2708SRichard Henderson raddr = h2g(host_raddr); 1113044e95c8SRichard Henderson last = raddr + shm_info.shm_segsz - 1; 111469fa2708SRichard Henderson 1115044e95c8SRichard Henderson page_set_flags(raddr, last, 111669fa2708SRichard Henderson PAGE_VALID | PAGE_RESET | PAGE_READ | 111769fa2708SRichard Henderson (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE)); 111869fa2708SRichard Henderson 1119044e95c8SRichard Henderson shm_region_rm_complete(raddr, last); 1120044e95c8SRichard Henderson shm_region_add(raddr, last); 112169fa2708SRichard Henderson } 1122225a206cSRichard Henderson 1123225a206cSRichard Henderson /* 1124225a206cSRichard Henderson * We're mapping shared memory, so ensure we generate code for parallel 1125225a206cSRichard Henderson * execution and flush old translations. This will work up to the level 1126225a206cSRichard Henderson * supported by the host -- anything that requires EXCP_ATOMIC will not 1127225a206cSRichard Henderson * be atomic with respect to an external process. 1128225a206cSRichard Henderson */ 1129225a206cSRichard Henderson if (!(cpu->tcg_cflags & CF_PARALLEL)) { 1130225a206cSRichard Henderson cpu->tcg_cflags |= CF_PARALLEL; 1131225a206cSRichard Henderson tb_flush(cpu); 1132225a206cSRichard Henderson } 1133225a206cSRichard Henderson 1134225a206cSRichard Henderson return raddr; 1135225a206cSRichard Henderson } 1136225a206cSRichard Henderson 1137225a206cSRichard Henderson abi_long target_shmdt(abi_ulong shmaddr) 1138225a206cSRichard Henderson { 1139225a206cSRichard Henderson abi_long rv; 1140225a206cSRichard Henderson 1141225a206cSRichard Henderson /* shmdt pointers are always untagged */ 1142225a206cSRichard Henderson 114369fa2708SRichard Henderson WITH_MMAP_LOCK_GUARD() { 1144044e95c8SRichard Henderson abi_ulong last = shm_region_find(shmaddr); 1145044e95c8SRichard Henderson if (last == 0) { 1146ceda5688SRichard Henderson return -TARGET_EINVAL; 1147ceda5688SRichard Henderson } 1148ceda5688SRichard Henderson 1149225a206cSRichard Henderson rv = get_errno(shmdt(g2h_untagged(shmaddr))); 1150ceda5688SRichard Henderson if (rv == 0) { 1151044e95c8SRichard Henderson abi_ulong size = last - shmaddr + 1; 1152ceda5688SRichard Henderson 1153044e95c8SRichard Henderson page_set_flags(shmaddr, last, 0); 1154044e95c8SRichard Henderson shm_region_rm_complete(shmaddr, last); 1155ceda5688SRichard Henderson mmap_reserve_or_unmap(shmaddr, size); 1156ceda5688SRichard Henderson } 115769fa2708SRichard Henderson } 1158225a206cSRichard Henderson return rv; 1159225a206cSRichard Henderson } 1160