1 /* 2 * mmap support for qemu 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include <sys/shm.h> 21 #include "trace.h" 22 #include "exec/log.h" 23 #include "qemu.h" 24 #include "user-internals.h" 25 #include "user-mmap.h" 26 #include "target_mman.h" 27 #include "qemu/interval-tree.h" 28 29 #ifdef TARGET_ARM 30 #include "target/arm/cpu-features.h" 31 #endif 32 33 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 34 static __thread int mmap_lock_count; 35 36 void mmap_lock(void) 37 { 38 if (mmap_lock_count++ == 0) { 39 pthread_mutex_lock(&mmap_mutex); 40 } 41 } 42 43 void mmap_unlock(void) 44 { 45 assert(mmap_lock_count > 0); 46 if (--mmap_lock_count == 0) { 47 pthread_mutex_unlock(&mmap_mutex); 48 } 49 } 50 51 bool have_mmap_lock(void) 52 { 53 return mmap_lock_count > 0 ? true : false; 54 } 55 56 /* Grab lock to make sure things are in a consistent state after fork(). */ 57 void mmap_fork_start(void) 58 { 59 if (mmap_lock_count) 60 abort(); 61 pthread_mutex_lock(&mmap_mutex); 62 } 63 64 void mmap_fork_end(int child) 65 { 66 if (child) { 67 pthread_mutex_init(&mmap_mutex, NULL); 68 } else { 69 pthread_mutex_unlock(&mmap_mutex); 70 } 71 } 72 73 /* Protected by mmap_lock. */ 74 static IntervalTreeRoot shm_regions; 75 76 static void shm_region_add(abi_ptr start, abi_ptr last) 77 { 78 IntervalTreeNode *i = g_new0(IntervalTreeNode, 1); 79 80 i->start = start; 81 i->last = last; 82 interval_tree_insert(i, &shm_regions); 83 } 84 85 static abi_ptr shm_region_find(abi_ptr start) 86 { 87 IntervalTreeNode *i; 88 89 for (i = interval_tree_iter_first(&shm_regions, start, start); i; 90 i = interval_tree_iter_next(i, start, start)) { 91 if (i->start == start) { 92 return i->last; 93 } 94 } 95 return 0; 96 } 97 98 static void shm_region_rm_complete(abi_ptr start, abi_ptr last) 99 { 100 IntervalTreeNode *i, *n; 101 102 for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) { 103 n = interval_tree_iter_next(i, start, last); 104 if (i->start >= start && i->last <= last) { 105 interval_tree_remove(i, &shm_regions); 106 g_free(i); 107 } 108 } 109 } 110 111 /* 112 * Validate target prot bitmask. 113 * Return the prot bitmask for the host in *HOST_PROT. 114 * Return 0 if the target prot bitmask is invalid, otherwise 115 * the internal qemu page_flags (which will include PAGE_VALID). 116 */ 117 static int validate_prot_to_pageflags(int prot) 118 { 119 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM; 120 int page_flags = (prot & PAGE_BITS) | PAGE_VALID; 121 122 #ifdef TARGET_AARCH64 123 { 124 ARMCPU *cpu = ARM_CPU(thread_cpu); 125 126 /* 127 * The PROT_BTI bit is only accepted if the cpu supports the feature. 128 * Since this is the unusual case, don't bother checking unless 129 * the bit has been requested. If set and valid, record the bit 130 * within QEMU's page_flags. 131 */ 132 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) { 133 valid |= TARGET_PROT_BTI; 134 page_flags |= PAGE_BTI; 135 } 136 /* Similarly for the PROT_MTE bit. */ 137 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) { 138 valid |= TARGET_PROT_MTE; 139 page_flags |= PAGE_MTE; 140 } 141 } 142 #elif defined(TARGET_HPPA) 143 valid |= PROT_GROWSDOWN | PROT_GROWSUP; 144 #endif 145 146 return prot & ~valid ? 0 : page_flags; 147 } 148 149 /* 150 * For the host, we need not pass anything except read/write/exec. 151 * While PROT_SEM is allowed by all hosts, it is also ignored, so 152 * don't bother transforming guest bit to host bit. Any other 153 * target-specific prot bits will not be understood by the host 154 * and will need to be encoded into page_flags for qemu emulation. 155 * 156 * Pages that are executable by the guest will never be executed 157 * by the host, but the host will need to be able to read them. 158 */ 159 static int target_to_host_prot(int prot) 160 { 161 return (prot & (PROT_READ | PROT_WRITE)) | 162 (prot & PROT_EXEC ? PROT_READ : 0); 163 } 164 165 /* NOTE: all the constants are the HOST ones, but addresses are target. */ 166 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) 167 { 168 int host_page_size = qemu_real_host_page_size(); 169 abi_ulong starts[3]; 170 abi_ulong lens[3]; 171 int prots[3]; 172 abi_ulong host_start, host_last, last; 173 int prot1, ret, page_flags, nranges; 174 175 trace_target_mprotect(start, len, target_prot); 176 177 if ((start & ~TARGET_PAGE_MASK) != 0) { 178 return -TARGET_EINVAL; 179 } 180 page_flags = validate_prot_to_pageflags(target_prot); 181 if (!page_flags) { 182 return -TARGET_EINVAL; 183 } 184 if (len == 0) { 185 return 0; 186 } 187 len = TARGET_PAGE_ALIGN(len); 188 if (!guest_range_valid_untagged(start, len)) { 189 return -TARGET_ENOMEM; 190 } 191 192 last = start + len - 1; 193 host_start = start & -host_page_size; 194 host_last = ROUND_UP(last, host_page_size) - 1; 195 nranges = 0; 196 197 mmap_lock(); 198 199 if (host_last - host_start < host_page_size) { 200 /* Single host page contains all guest pages: sum the prot. */ 201 prot1 = target_prot; 202 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 203 prot1 |= page_get_flags(a); 204 } 205 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 206 prot1 |= page_get_flags(a + 1); 207 } 208 starts[nranges] = host_start; 209 lens[nranges] = host_page_size; 210 prots[nranges] = prot1; 211 nranges++; 212 } else { 213 if (host_start < start) { 214 /* Host page contains more than one guest page: sum the prot. */ 215 prot1 = target_prot; 216 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 217 prot1 |= page_get_flags(a); 218 } 219 /* If the resulting sum differs, create a new range. */ 220 if (prot1 != target_prot) { 221 starts[nranges] = host_start; 222 lens[nranges] = host_page_size; 223 prots[nranges] = prot1; 224 nranges++; 225 host_start += host_page_size; 226 } 227 } 228 229 if (last < host_last) { 230 /* Host page contains more than one guest page: sum the prot. */ 231 prot1 = target_prot; 232 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 233 prot1 |= page_get_flags(a + 1); 234 } 235 /* If the resulting sum differs, create a new range. */ 236 if (prot1 != target_prot) { 237 host_last -= host_page_size; 238 starts[nranges] = host_last + 1; 239 lens[nranges] = host_page_size; 240 prots[nranges] = prot1; 241 nranges++; 242 } 243 } 244 245 /* Create a range for the middle, if any remains. */ 246 if (host_start < host_last) { 247 starts[nranges] = host_start; 248 lens[nranges] = host_last - host_start + 1; 249 prots[nranges] = target_prot; 250 nranges++; 251 } 252 } 253 254 for (int i = 0; i < nranges; ++i) { 255 ret = mprotect(g2h_untagged(starts[i]), lens[i], 256 target_to_host_prot(prots[i])); 257 if (ret != 0) { 258 goto error; 259 } 260 } 261 262 page_set_flags(start, last, page_flags); 263 ret = 0; 264 265 error: 266 mmap_unlock(); 267 return ret; 268 } 269 270 /* map an incomplete host page */ 271 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, 272 int prot, int flags, int fd, off_t offset) 273 { 274 int host_page_size = qemu_real_host_page_size(); 275 abi_ulong real_last; 276 void *host_start; 277 int prot_old, prot_new; 278 int host_prot_old, host_prot_new; 279 280 if (!(flags & MAP_ANONYMOUS) 281 && (flags & MAP_TYPE) == MAP_SHARED 282 && (prot & PROT_WRITE)) { 283 /* 284 * msync() won't work with the partial page, so we return an 285 * error if write is possible while it is a shared mapping. 286 */ 287 errno = EINVAL; 288 return false; 289 } 290 291 real_last = real_start + host_page_size - 1; 292 host_start = g2h_untagged(real_start); 293 294 /* Get the protection of the target pages outside the mapping. */ 295 prot_old = 0; 296 for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) { 297 prot_old |= page_get_flags(a); 298 } 299 for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) { 300 prot_old |= page_get_flags(a); 301 } 302 303 if (prot_old == 0) { 304 /* 305 * Since !(prot_old & PAGE_VALID), there were no guest pages 306 * outside of the fragment we need to map. Allocate a new host 307 * page to cover, discarding whatever else may have been present. 308 */ 309 void *p = mmap(host_start, host_page_size, 310 target_to_host_prot(prot), 311 flags | MAP_ANONYMOUS, -1, 0); 312 if (p != host_start) { 313 if (p != MAP_FAILED) { 314 munmap(p, host_page_size); 315 errno = EEXIST; 316 } 317 return false; 318 } 319 prot_old = prot; 320 } 321 prot_new = prot | prot_old; 322 323 host_prot_old = target_to_host_prot(prot_old); 324 host_prot_new = target_to_host_prot(prot_new); 325 326 /* Adjust protection to be able to write. */ 327 if (!(host_prot_old & PROT_WRITE)) { 328 host_prot_old |= PROT_WRITE; 329 mprotect(host_start, host_page_size, host_prot_old); 330 } 331 332 /* Read or zero the new guest pages. */ 333 if (flags & MAP_ANONYMOUS) { 334 memset(g2h_untagged(start), 0, last - start + 1); 335 } else { 336 if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) { 337 return false; 338 } 339 } 340 341 /* Put final protection */ 342 if (host_prot_new != host_prot_old) { 343 mprotect(host_start, host_page_size, host_prot_new); 344 } 345 return true; 346 } 347 348 abi_ulong task_unmapped_base; 349 abi_ulong elf_et_dyn_base; 350 abi_ulong mmap_next_start; 351 352 /* 353 * Subroutine of mmap_find_vma, used when we have pre-allocated 354 * a chunk of guest address space. 355 */ 356 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 357 abi_ulong align) 358 { 359 target_ulong ret; 360 361 ret = page_find_range_empty(start, reserved_va, size, align); 362 if (ret == -1 && start > mmap_min_addr) { 363 /* Restart at the beginning of the address space. */ 364 ret = page_find_range_empty(mmap_min_addr, start - 1, size, align); 365 } 366 367 return ret; 368 } 369 370 /* 371 * Find and reserve a free memory area of size 'size'. The search 372 * starts at 'start'. 373 * It must be called with mmap_lock() held. 374 * Return -1 if error. 375 */ 376 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 377 { 378 int host_page_size = qemu_real_host_page_size(); 379 void *ptr, *prev; 380 abi_ulong addr; 381 int wrapped, repeat; 382 383 align = MAX(align, host_page_size); 384 385 /* If 'start' == 0, then a default start address is used. */ 386 if (start == 0) { 387 start = mmap_next_start; 388 } else { 389 start &= -host_page_size; 390 } 391 start = ROUND_UP(start, align); 392 size = ROUND_UP(size, host_page_size); 393 394 if (reserved_va) { 395 return mmap_find_vma_reserved(start, size, align); 396 } 397 398 addr = start; 399 wrapped = repeat = 0; 400 prev = 0; 401 402 for (;; prev = ptr) { 403 /* 404 * Reserve needed memory area to avoid a race. 405 * It should be discarded using: 406 * - mmap() with MAP_FIXED flag 407 * - mremap() with MREMAP_FIXED flag 408 * - shmat() with SHM_REMAP flag 409 */ 410 ptr = mmap(g2h_untagged(addr), size, PROT_NONE, 411 MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 412 413 /* ENOMEM, if host address space has no memory */ 414 if (ptr == MAP_FAILED) { 415 return (abi_ulong)-1; 416 } 417 418 /* 419 * Count the number of sequential returns of the same address. 420 * This is used to modify the search algorithm below. 421 */ 422 repeat = (ptr == prev ? repeat + 1 : 0); 423 424 if (h2g_valid(ptr + size - 1)) { 425 addr = h2g(ptr); 426 427 if ((addr & (align - 1)) == 0) { 428 /* Success. */ 429 if (start == mmap_next_start && addr >= task_unmapped_base) { 430 mmap_next_start = addr + size; 431 } 432 return addr; 433 } 434 435 /* The address is not properly aligned for the target. */ 436 switch (repeat) { 437 case 0: 438 /* 439 * Assume the result that the kernel gave us is the 440 * first with enough free space, so start again at the 441 * next higher target page. 442 */ 443 addr = ROUND_UP(addr, align); 444 break; 445 case 1: 446 /* 447 * Sometimes the kernel decides to perform the allocation 448 * at the top end of memory instead. 449 */ 450 addr &= -align; 451 break; 452 case 2: 453 /* Start over at low memory. */ 454 addr = 0; 455 break; 456 default: 457 /* Fail. This unaligned block must the last. */ 458 addr = -1; 459 break; 460 } 461 } else { 462 /* 463 * Since the result the kernel gave didn't fit, start 464 * again at low memory. If any repetition, fail. 465 */ 466 addr = (repeat ? -1 : 0); 467 } 468 469 /* Unmap and try again. */ 470 munmap(ptr, size); 471 472 /* ENOMEM if we checked the whole of the target address space. */ 473 if (addr == (abi_ulong)-1) { 474 return (abi_ulong)-1; 475 } else if (addr == 0) { 476 if (wrapped) { 477 return (abi_ulong)-1; 478 } 479 wrapped = 1; 480 /* 481 * Don't actually use 0 when wrapping, instead indicate 482 * that we'd truly like an allocation in low memory. 483 */ 484 addr = (mmap_min_addr > TARGET_PAGE_SIZE 485 ? TARGET_PAGE_ALIGN(mmap_min_addr) 486 : TARGET_PAGE_SIZE); 487 } else if (wrapped && addr >= start) { 488 return (abi_ulong)-1; 489 } 490 } 491 } 492 493 /* NOTE: all the constants are the HOST ones */ 494 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, 495 int flags, int fd, off_t offset) 496 { 497 int host_page_size = qemu_real_host_page_size(); 498 abi_ulong ret, last, real_start, real_last, retaddr, host_len; 499 abi_ulong passthrough_start = -1, passthrough_last = 0; 500 int page_flags; 501 off_t host_offset; 502 503 mmap_lock(); 504 trace_target_mmap(start, len, target_prot, flags, fd, offset); 505 506 if (!len) { 507 errno = EINVAL; 508 goto fail; 509 } 510 511 page_flags = validate_prot_to_pageflags(target_prot); 512 if (!page_flags) { 513 errno = EINVAL; 514 goto fail; 515 } 516 517 /* Also check for overflows... */ 518 len = TARGET_PAGE_ALIGN(len); 519 if (!len) { 520 errno = ENOMEM; 521 goto fail; 522 } 523 524 if (offset & ~TARGET_PAGE_MASK) { 525 errno = EINVAL; 526 goto fail; 527 } 528 529 /* 530 * If we're mapping shared memory, ensure we generate code for parallel 531 * execution and flush old translations. This will work up to the level 532 * supported by the host -- anything that requires EXCP_ATOMIC will not 533 * be atomic with respect to an external process. 534 */ 535 if (flags & MAP_SHARED) { 536 CPUState *cpu = thread_cpu; 537 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 538 cpu->tcg_cflags |= CF_PARALLEL; 539 tb_flush(cpu); 540 } 541 } 542 543 real_start = start & -host_page_size; 544 host_offset = offset & -host_page_size; 545 546 /* 547 * If the user is asking for the kernel to find a location, do that 548 * before we truncate the length for mapping files below. 549 */ 550 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 551 host_len = len + offset - host_offset; 552 host_len = ROUND_UP(host_len, host_page_size); 553 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); 554 if (start == (abi_ulong)-1) { 555 errno = ENOMEM; 556 goto fail; 557 } 558 } 559 560 /* 561 * When mapping files into a memory area larger than the file, accesses 562 * to pages beyond the file size will cause a SIGBUS. 563 * 564 * For example, if mmaping a file of 100 bytes on a host with 4K pages 565 * emulating a target with 8K pages, the target expects to be able to 566 * access the first 8K. But the host will trap us on any access beyond 567 * 4K. 568 * 569 * When emulating a target with a larger page-size than the hosts, we 570 * may need to truncate file maps at EOF and add extra anonymous pages 571 * up to the targets page boundary. 572 */ 573 if (host_page_size < TARGET_PAGE_SIZE && !(flags & MAP_ANONYMOUS)) { 574 struct stat sb; 575 576 if (fstat(fd, &sb) == -1) { 577 goto fail; 578 } 579 580 /* Are we trying to create a map beyond EOF?. */ 581 if (offset + len > sb.st_size) { 582 /* 583 * If so, truncate the file map at eof aligned with 584 * the hosts real pagesize. Additional anonymous maps 585 * will be created beyond EOF. 586 */ 587 len = ROUND_UP(sb.st_size - offset, host_page_size); 588 } 589 } 590 591 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 592 uintptr_t host_start; 593 int host_prot; 594 void *p; 595 596 host_len = len + offset - host_offset; 597 host_len = ROUND_UP(host_len, host_page_size); 598 host_prot = target_to_host_prot(target_prot); 599 600 /* Note: we prefer to control the mapping address. */ 601 p = mmap(g2h_untagged(start), host_len, host_prot, 602 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); 603 if (p == MAP_FAILED) { 604 goto fail; 605 } 606 /* update start so that it points to the file position at 'offset' */ 607 host_start = (uintptr_t)p; 608 if (!(flags & MAP_ANONYMOUS)) { 609 p = mmap(g2h_untagged(start), len, host_prot, 610 flags | MAP_FIXED, fd, host_offset); 611 if (p == MAP_FAILED) { 612 munmap(g2h_untagged(start), host_len); 613 goto fail; 614 } 615 host_start += offset - host_offset; 616 } 617 start = h2g(host_start); 618 last = start + len - 1; 619 passthrough_start = start; 620 passthrough_last = last; 621 } else { 622 if (start & ~TARGET_PAGE_MASK) { 623 errno = EINVAL; 624 goto fail; 625 } 626 last = start + len - 1; 627 real_last = ROUND_UP(last, host_page_size) - 1; 628 629 /* 630 * Test if requested memory area fits target address space 631 * It can fail only on 64-bit host with 32-bit target. 632 * On any other target/host host mmap() handles this error correctly. 633 */ 634 if (last < start || !guest_range_valid_untagged(start, len)) { 635 errno = ENOMEM; 636 goto fail; 637 } 638 639 if (flags & MAP_FIXED_NOREPLACE) { 640 /* Validate that the chosen range is empty. */ 641 if (!page_check_range_empty(start, last)) { 642 errno = EEXIST; 643 goto fail; 644 } 645 646 /* 647 * With reserved_va, the entire address space is mmaped in the 648 * host to ensure it isn't accidentally used for something else. 649 * We have just checked that the guest address is not mapped 650 * within the guest, but need to replace the host reservation. 651 * 652 * Without reserved_va, despite the guest address check above, 653 * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite 654 * any host address mappings. 655 */ 656 if (reserved_va) { 657 flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED; 658 } 659 } 660 661 /* 662 * worst case: we cannot map the file because the offset is not 663 * aligned, so we read it 664 */ 665 if (!(flags & MAP_ANONYMOUS) && 666 (offset & (host_page_size - 1)) != (start & (host_page_size - 1))) { 667 /* 668 * msync() won't work here, so we return an error if write is 669 * possible while it is a shared mapping 670 */ 671 if ((flags & MAP_TYPE) == MAP_SHARED 672 && (target_prot & PROT_WRITE)) { 673 errno = EINVAL; 674 goto fail; 675 } 676 retaddr = target_mmap(start, len, target_prot | PROT_WRITE, 677 (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) 678 | MAP_PRIVATE | MAP_ANONYMOUS, 679 -1, 0); 680 if (retaddr == -1) { 681 goto fail; 682 } 683 if (pread(fd, g2h_untagged(start), len, offset) == -1) { 684 goto fail; 685 } 686 if (!(target_prot & PROT_WRITE)) { 687 ret = target_mprotect(start, len, target_prot); 688 assert(ret == 0); 689 } 690 goto the_end; 691 } 692 693 /* handle the start of the mapping */ 694 if (start > real_start) { 695 if (real_last == real_start + host_page_size - 1) { 696 /* one single host page */ 697 if (!mmap_frag(real_start, start, last, 698 target_prot, flags, fd, offset)) { 699 goto fail; 700 } 701 goto the_end1; 702 } 703 if (!mmap_frag(real_start, start, 704 real_start + host_page_size - 1, 705 target_prot, flags, fd, offset)) { 706 goto fail; 707 } 708 real_start += host_page_size; 709 } 710 /* handle the end of the mapping */ 711 if (last < real_last) { 712 abi_ulong real_page = real_last - host_page_size + 1; 713 if (!mmap_frag(real_page, real_page, last, 714 target_prot, flags, fd, 715 offset + real_page - start)) { 716 goto fail; 717 } 718 real_last -= host_page_size; 719 } 720 721 /* map the middle (easier) */ 722 if (real_start < real_last) { 723 void *p, *want_p; 724 off_t offset1; 725 size_t len1; 726 727 if (flags & MAP_ANONYMOUS) { 728 offset1 = 0; 729 } else { 730 offset1 = offset + real_start - start; 731 } 732 len1 = real_last - real_start + 1; 733 want_p = g2h_untagged(real_start); 734 735 p = mmap(want_p, len1, target_to_host_prot(target_prot), 736 flags, fd, offset1); 737 if (p != want_p) { 738 if (p != MAP_FAILED) { 739 munmap(p, len1); 740 errno = EEXIST; 741 } 742 goto fail; 743 } 744 passthrough_start = real_start; 745 passthrough_last = real_last; 746 } 747 } 748 the_end1: 749 if (flags & MAP_ANONYMOUS) { 750 page_flags |= PAGE_ANON; 751 } 752 page_flags |= PAGE_RESET; 753 if (passthrough_start > passthrough_last) { 754 page_set_flags(start, last, page_flags); 755 } else { 756 if (start < passthrough_start) { 757 page_set_flags(start, passthrough_start - 1, page_flags); 758 } 759 page_set_flags(passthrough_start, passthrough_last, 760 page_flags | PAGE_PASSTHROUGH); 761 if (passthrough_last < last) { 762 page_set_flags(passthrough_last + 1, last, page_flags); 763 } 764 } 765 shm_region_rm_complete(start, last); 766 the_end: 767 trace_target_mmap_complete(start); 768 if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 769 FILE *f = qemu_log_trylock(); 770 if (f) { 771 fprintf(f, "page layout changed following mmap\n"); 772 page_dump(f); 773 qemu_log_unlock(f); 774 } 775 } 776 mmap_unlock(); 777 return start; 778 fail: 779 mmap_unlock(); 780 return -1; 781 } 782 783 static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) 784 { 785 int host_page_size = qemu_real_host_page_size(); 786 abi_ulong real_start; 787 abi_ulong real_last; 788 abi_ulong real_len; 789 abi_ulong last; 790 abi_ulong a; 791 void *host_start; 792 int prot; 793 794 last = start + len - 1; 795 real_start = start & -host_page_size; 796 real_last = ROUND_UP(last, host_page_size) - 1; 797 798 /* 799 * If guest pages remain on the first or last host pages, 800 * adjust the deallocation to retain those guest pages. 801 * The single page special case is required for the last page, 802 * lest real_start overflow to zero. 803 */ 804 if (real_last - real_start < host_page_size) { 805 prot = 0; 806 for (a = real_start; a < start; a += TARGET_PAGE_SIZE) { 807 prot |= page_get_flags(a); 808 } 809 for (a = last; a < real_last; a += TARGET_PAGE_SIZE) { 810 prot |= page_get_flags(a + 1); 811 } 812 if (prot != 0) { 813 return 0; 814 } 815 } else { 816 for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) { 817 prot |= page_get_flags(a); 818 } 819 if (prot != 0) { 820 real_start += host_page_size; 821 } 822 823 for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) { 824 prot |= page_get_flags(a + 1); 825 } 826 if (prot != 0) { 827 real_last -= host_page_size; 828 } 829 830 if (real_last < real_start) { 831 return 0; 832 } 833 } 834 835 real_len = real_last - real_start + 1; 836 host_start = g2h_untagged(real_start); 837 838 if (reserved_va) { 839 void *ptr = mmap(host_start, real_len, PROT_NONE, 840 MAP_FIXED | MAP_ANONYMOUS 841 | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 842 return ptr == host_start ? 0 : -1; 843 } 844 return munmap(host_start, real_len); 845 } 846 847 int target_munmap(abi_ulong start, abi_ulong len) 848 { 849 int ret; 850 851 trace_target_munmap(start, len); 852 853 if (start & ~TARGET_PAGE_MASK) { 854 errno = EINVAL; 855 return -1; 856 } 857 len = TARGET_PAGE_ALIGN(len); 858 if (len == 0 || !guest_range_valid_untagged(start, len)) { 859 errno = EINVAL; 860 return -1; 861 } 862 863 mmap_lock(); 864 ret = mmap_reserve_or_unmap(start, len); 865 if (likely(ret == 0)) { 866 page_set_flags(start, start + len - 1, 0); 867 shm_region_rm_complete(start, start + len - 1); 868 } 869 mmap_unlock(); 870 871 return ret; 872 } 873 874 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 875 abi_ulong new_size, unsigned long flags, 876 abi_ulong new_addr) 877 { 878 int prot; 879 void *host_addr; 880 881 if (!guest_range_valid_untagged(old_addr, old_size) || 882 ((flags & MREMAP_FIXED) && 883 !guest_range_valid_untagged(new_addr, new_size)) || 884 ((flags & MREMAP_MAYMOVE) == 0 && 885 !guest_range_valid_untagged(old_addr, new_size))) { 886 errno = ENOMEM; 887 return -1; 888 } 889 890 mmap_lock(); 891 892 if (flags & MREMAP_FIXED) { 893 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 894 flags, g2h_untagged(new_addr)); 895 896 if (reserved_va && host_addr != MAP_FAILED) { 897 /* 898 * If new and old addresses overlap then the above mremap will 899 * already have failed with EINVAL. 900 */ 901 mmap_reserve_or_unmap(old_addr, old_size); 902 } 903 } else if (flags & MREMAP_MAYMOVE) { 904 abi_ulong mmap_start; 905 906 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 907 908 if (mmap_start == -1) { 909 errno = ENOMEM; 910 host_addr = MAP_FAILED; 911 } else { 912 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 913 flags | MREMAP_FIXED, 914 g2h_untagged(mmap_start)); 915 if (reserved_va) { 916 mmap_reserve_or_unmap(old_addr, old_size); 917 } 918 } 919 } else { 920 int page_flags = 0; 921 if (reserved_va && old_size < new_size) { 922 abi_ulong addr; 923 for (addr = old_addr + old_size; 924 addr < old_addr + new_size; 925 addr++) { 926 page_flags |= page_get_flags(addr); 927 } 928 } 929 if (page_flags == 0) { 930 host_addr = mremap(g2h_untagged(old_addr), 931 old_size, new_size, flags); 932 933 if (host_addr != MAP_FAILED) { 934 /* Check if address fits target address space */ 935 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) { 936 /* Revert mremap() changes */ 937 host_addr = mremap(g2h_untagged(old_addr), 938 new_size, old_size, flags); 939 errno = ENOMEM; 940 host_addr = MAP_FAILED; 941 } else if (reserved_va && old_size > new_size) { 942 mmap_reserve_or_unmap(old_addr + old_size, 943 old_size - new_size); 944 } 945 } 946 } else { 947 errno = ENOMEM; 948 host_addr = MAP_FAILED; 949 } 950 } 951 952 if (host_addr == MAP_FAILED) { 953 new_addr = -1; 954 } else { 955 new_addr = h2g(host_addr); 956 prot = page_get_flags(old_addr); 957 page_set_flags(old_addr, old_addr + old_size - 1, 0); 958 shm_region_rm_complete(old_addr, old_addr + old_size - 1); 959 page_set_flags(new_addr, new_addr + new_size - 1, 960 prot | PAGE_VALID | PAGE_RESET); 961 shm_region_rm_complete(new_addr, new_addr + new_size - 1); 962 } 963 mmap_unlock(); 964 return new_addr; 965 } 966 967 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) 968 { 969 abi_ulong len; 970 int ret = 0; 971 972 if (start & ~TARGET_PAGE_MASK) { 973 return -TARGET_EINVAL; 974 } 975 if (len_in == 0) { 976 return 0; 977 } 978 len = TARGET_PAGE_ALIGN(len_in); 979 if (len == 0 || !guest_range_valid_untagged(start, len)) { 980 return -TARGET_EINVAL; 981 } 982 983 /* Translate for some architectures which have different MADV_xxx values */ 984 switch (advice) { 985 case TARGET_MADV_DONTNEED: /* alpha */ 986 advice = MADV_DONTNEED; 987 break; 988 case TARGET_MADV_WIPEONFORK: /* parisc */ 989 advice = MADV_WIPEONFORK; 990 break; 991 case TARGET_MADV_KEEPONFORK: /* parisc */ 992 advice = MADV_KEEPONFORK; 993 break; 994 /* we do not care about the other MADV_xxx values yet */ 995 } 996 997 /* 998 * Most advice values are hints, so ignoring and returning success is ok. 999 * 1000 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and 1001 * MADV_KEEPONFORK are not hints and need to be emulated. 1002 * 1003 * A straight passthrough for those may not be safe because qemu sometimes 1004 * turns private file-backed mappings into anonymous mappings. 1005 * If all guest pages have PAGE_PASSTHROUGH set, mappings have the 1006 * same semantics for the host as for the guest. 1007 * 1008 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and 1009 * return failure if not. 1010 * 1011 * MADV_DONTNEED is passed through as well, if possible. 1012 * If passthrough isn't possible, we nevertheless (wrongly!) return 1013 * success, which is broken but some userspace programs fail to work 1014 * otherwise. Completely implementing such emulation is quite complicated 1015 * though. 1016 */ 1017 mmap_lock(); 1018 switch (advice) { 1019 case MADV_WIPEONFORK: 1020 case MADV_KEEPONFORK: 1021 ret = -EINVAL; 1022 /* fall through */ 1023 case MADV_DONTNEED: 1024 if (page_check_range(start, len, PAGE_PASSTHROUGH)) { 1025 ret = get_errno(madvise(g2h_untagged(start), len, advice)); 1026 if ((advice == MADV_DONTNEED) && (ret == 0)) { 1027 page_reset_target_data(start, start + len - 1); 1028 } 1029 } 1030 } 1031 mmap_unlock(); 1032 1033 return ret; 1034 } 1035 1036 #ifndef TARGET_FORCE_SHMLBA 1037 /* 1038 * For most architectures, SHMLBA is the same as the page size; 1039 * some architectures have larger values, in which case they should 1040 * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function. 1041 * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA 1042 * and defining its own value for SHMLBA. 1043 * 1044 * The kernel also permits SHMLBA to be set by the architecture to a 1045 * value larger than the page size without setting __ARCH_FORCE_SHMLBA; 1046 * this means that addresses are rounded to the large size if 1047 * SHM_RND is set but addresses not aligned to that size are not rejected 1048 * as long as they are at least page-aligned. Since the only architecture 1049 * which uses this is ia64 this code doesn't provide for that oddity. 1050 */ 1051 static inline abi_ulong target_shmlba(CPUArchState *cpu_env) 1052 { 1053 return TARGET_PAGE_SIZE; 1054 } 1055 #endif 1056 1057 abi_ulong target_shmat(CPUArchState *cpu_env, int shmid, 1058 abi_ulong shmaddr, int shmflg) 1059 { 1060 CPUState *cpu = env_cpu(cpu_env); 1061 abi_ulong raddr; 1062 struct shmid_ds shm_info; 1063 int ret; 1064 abi_ulong shmlba; 1065 1066 /* shmat pointers are always untagged */ 1067 1068 /* find out the length of the shared memory segment */ 1069 ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info)); 1070 if (is_error(ret)) { 1071 /* can't get length, bail out */ 1072 return ret; 1073 } 1074 1075 shmlba = target_shmlba(cpu_env); 1076 1077 if (shmaddr & (shmlba - 1)) { 1078 if (shmflg & SHM_RND) { 1079 shmaddr &= ~(shmlba - 1); 1080 } else { 1081 return -TARGET_EINVAL; 1082 } 1083 } 1084 if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) { 1085 return -TARGET_EINVAL; 1086 } 1087 1088 WITH_MMAP_LOCK_GUARD() { 1089 void *host_raddr; 1090 abi_ulong last; 1091 1092 if (shmaddr) { 1093 host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg); 1094 } else { 1095 abi_ulong mmap_start; 1096 1097 /* In order to use the host shmat, we need to honor host SHMLBA. */ 1098 mmap_start = mmap_find_vma(0, shm_info.shm_segsz, 1099 MAX(SHMLBA, shmlba)); 1100 1101 if (mmap_start == -1) { 1102 return -TARGET_ENOMEM; 1103 } 1104 host_raddr = shmat(shmid, g2h_untagged(mmap_start), 1105 shmflg | SHM_REMAP); 1106 } 1107 1108 if (host_raddr == (void *)-1) { 1109 return get_errno(-1); 1110 } 1111 raddr = h2g(host_raddr); 1112 last = raddr + shm_info.shm_segsz - 1; 1113 1114 page_set_flags(raddr, last, 1115 PAGE_VALID | PAGE_RESET | PAGE_READ | 1116 (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE)); 1117 1118 shm_region_rm_complete(raddr, last); 1119 shm_region_add(raddr, last); 1120 } 1121 1122 /* 1123 * We're mapping shared memory, so ensure we generate code for parallel 1124 * execution and flush old translations. This will work up to the level 1125 * supported by the host -- anything that requires EXCP_ATOMIC will not 1126 * be atomic with respect to an external process. 1127 */ 1128 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 1129 cpu->tcg_cflags |= CF_PARALLEL; 1130 tb_flush(cpu); 1131 } 1132 1133 return raddr; 1134 } 1135 1136 abi_long target_shmdt(abi_ulong shmaddr) 1137 { 1138 abi_long rv; 1139 1140 /* shmdt pointers are always untagged */ 1141 1142 WITH_MMAP_LOCK_GUARD() { 1143 abi_ulong last = shm_region_find(shmaddr); 1144 if (last == 0) { 1145 return -TARGET_EINVAL; 1146 } 1147 1148 rv = get_errno(shmdt(g2h_untagged(shmaddr))); 1149 if (rv == 0) { 1150 abi_ulong size = last - shmaddr + 1; 1151 1152 page_set_flags(shmaddr, last, 0); 1153 shm_region_rm_complete(shmaddr, last); 1154 mmap_reserve_or_unmap(shmaddr, size); 1155 } 1156 } 1157 return rv; 1158 } 1159