1 /* 2 * mmap support for qemu 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "trace.h" 21 #include "exec/log.h" 22 #include "qemu.h" 23 #include "user-internals.h" 24 #include "user-mmap.h" 25 #include "target_mman.h" 26 27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; 28 static __thread int mmap_lock_count; 29 30 void mmap_lock(void) 31 { 32 if (mmap_lock_count++ == 0) { 33 pthread_mutex_lock(&mmap_mutex); 34 } 35 } 36 37 void mmap_unlock(void) 38 { 39 if (--mmap_lock_count == 0) { 40 pthread_mutex_unlock(&mmap_mutex); 41 } 42 } 43 44 bool have_mmap_lock(void) 45 { 46 return mmap_lock_count > 0 ? true : false; 47 } 48 49 /* Grab lock to make sure things are in a consistent state after fork(). */ 50 void mmap_fork_start(void) 51 { 52 if (mmap_lock_count) 53 abort(); 54 pthread_mutex_lock(&mmap_mutex); 55 } 56 57 void mmap_fork_end(int child) 58 { 59 if (child) { 60 pthread_mutex_init(&mmap_mutex, NULL); 61 } else { 62 pthread_mutex_unlock(&mmap_mutex); 63 } 64 } 65 66 /* 67 * Validate target prot bitmask. 68 * Return the prot bitmask for the host in *HOST_PROT. 69 * Return 0 if the target prot bitmask is invalid, otherwise 70 * the internal qemu page_flags (which will include PAGE_VALID). 71 */ 72 static int validate_prot_to_pageflags(int prot) 73 { 74 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM; 75 int page_flags = (prot & PAGE_BITS) | PAGE_VALID; 76 77 #ifdef TARGET_AARCH64 78 { 79 ARMCPU *cpu = ARM_CPU(thread_cpu); 80 81 /* 82 * The PROT_BTI bit is only accepted if the cpu supports the feature. 83 * Since this is the unusual case, don't bother checking unless 84 * the bit has been requested. If set and valid, record the bit 85 * within QEMU's page_flags. 86 */ 87 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) { 88 valid |= TARGET_PROT_BTI; 89 page_flags |= PAGE_BTI; 90 } 91 /* Similarly for the PROT_MTE bit. */ 92 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) { 93 valid |= TARGET_PROT_MTE; 94 page_flags |= PAGE_MTE; 95 } 96 } 97 #elif defined(TARGET_HPPA) 98 valid |= PROT_GROWSDOWN | PROT_GROWSUP; 99 #endif 100 101 return prot & ~valid ? 0 : page_flags; 102 } 103 104 /* 105 * For the host, we need not pass anything except read/write/exec. 106 * While PROT_SEM is allowed by all hosts, it is also ignored, so 107 * don't bother transforming guest bit to host bit. Any other 108 * target-specific prot bits will not be understood by the host 109 * and will need to be encoded into page_flags for qemu emulation. 110 * 111 * Pages that are executable by the guest will never be executed 112 * by the host, but the host will need to be able to read them. 113 */ 114 static int target_to_host_prot(int prot) 115 { 116 return (prot & (PROT_READ | PROT_WRITE)) | 117 (prot & PROT_EXEC ? PROT_READ : 0); 118 } 119 120 /* NOTE: all the constants are the HOST ones, but addresses are target. */ 121 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot) 122 { 123 abi_ulong starts[3]; 124 abi_ulong lens[3]; 125 int prots[3]; 126 abi_ulong host_start, host_last, last; 127 int prot1, ret, page_flags, nranges; 128 129 trace_target_mprotect(start, len, target_prot); 130 131 if ((start & ~TARGET_PAGE_MASK) != 0) { 132 return -TARGET_EINVAL; 133 } 134 page_flags = validate_prot_to_pageflags(target_prot); 135 if (!page_flags) { 136 return -TARGET_EINVAL; 137 } 138 if (len == 0) { 139 return 0; 140 } 141 len = TARGET_PAGE_ALIGN(len); 142 if (!guest_range_valid_untagged(start, len)) { 143 return -TARGET_ENOMEM; 144 } 145 146 last = start + len - 1; 147 host_start = start & qemu_host_page_mask; 148 host_last = HOST_PAGE_ALIGN(last) - 1; 149 nranges = 0; 150 151 mmap_lock(); 152 153 if (host_last - host_start < qemu_host_page_size) { 154 /* Single host page contains all guest pages: sum the prot. */ 155 prot1 = target_prot; 156 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 157 prot1 |= page_get_flags(a); 158 } 159 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 160 prot1 |= page_get_flags(a + 1); 161 } 162 starts[nranges] = host_start; 163 lens[nranges] = qemu_host_page_size; 164 prots[nranges] = prot1; 165 nranges++; 166 } else { 167 if (host_start < start) { 168 /* Host page contains more than one guest page: sum the prot. */ 169 prot1 = target_prot; 170 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) { 171 prot1 |= page_get_flags(a); 172 } 173 /* If the resulting sum differs, create a new range. */ 174 if (prot1 != target_prot) { 175 starts[nranges] = host_start; 176 lens[nranges] = qemu_host_page_size; 177 prots[nranges] = prot1; 178 nranges++; 179 host_start += qemu_host_page_size; 180 } 181 } 182 183 if (last < host_last) { 184 /* Host page contains more than one guest page: sum the prot. */ 185 prot1 = target_prot; 186 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) { 187 prot1 |= page_get_flags(a + 1); 188 } 189 /* If the resulting sum differs, create a new range. */ 190 if (prot1 != target_prot) { 191 host_last -= qemu_host_page_size; 192 starts[nranges] = host_last + 1; 193 lens[nranges] = qemu_host_page_size; 194 prots[nranges] = prot1; 195 nranges++; 196 } 197 } 198 199 /* Create a range for the middle, if any remains. */ 200 if (host_start < host_last) { 201 starts[nranges] = host_start; 202 lens[nranges] = host_last - host_start + 1; 203 prots[nranges] = target_prot; 204 nranges++; 205 } 206 } 207 208 for (int i = 0; i < nranges; ++i) { 209 ret = mprotect(g2h_untagged(starts[i]), lens[i], 210 target_to_host_prot(prots[i])); 211 if (ret != 0) { 212 goto error; 213 } 214 } 215 216 page_set_flags(start, last, page_flags); 217 ret = 0; 218 219 error: 220 mmap_unlock(); 221 return ret; 222 } 223 224 /* map an incomplete host page */ 225 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last, 226 int prot, int flags, int fd, off_t offset) 227 { 228 abi_ulong real_last; 229 void *host_start; 230 int prot_old, prot_new; 231 int host_prot_old, host_prot_new; 232 233 if (!(flags & MAP_ANONYMOUS) 234 && (flags & MAP_TYPE) == MAP_SHARED 235 && (prot & PROT_WRITE)) { 236 /* 237 * msync() won't work with the partial page, so we return an 238 * error if write is possible while it is a shared mapping. 239 */ 240 errno = EINVAL; 241 return false; 242 } 243 244 real_last = real_start + qemu_host_page_size - 1; 245 host_start = g2h_untagged(real_start); 246 247 /* Get the protection of the target pages outside the mapping. */ 248 prot_old = 0; 249 for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) { 250 prot_old |= page_get_flags(a); 251 } 252 for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) { 253 prot_old |= page_get_flags(a); 254 } 255 256 if (prot_old == 0) { 257 /* 258 * Since !(prot_old & PAGE_VALID), there were no guest pages 259 * outside of the fragment we need to map. Allocate a new host 260 * page to cover, discarding whatever else may have been present. 261 */ 262 void *p = mmap(host_start, qemu_host_page_size, 263 target_to_host_prot(prot), 264 flags | MAP_ANONYMOUS, -1, 0); 265 if (p == MAP_FAILED) { 266 return false; 267 } 268 prot_old = prot; 269 } 270 prot_new = prot | prot_old; 271 272 host_prot_old = target_to_host_prot(prot_old); 273 host_prot_new = target_to_host_prot(prot_new); 274 275 /* Adjust protection to be able to write. */ 276 if (!(host_prot_old & PROT_WRITE)) { 277 host_prot_old |= PROT_WRITE; 278 mprotect(host_start, qemu_host_page_size, host_prot_old); 279 } 280 281 /* Read or zero the new guest pages. */ 282 if (flags & MAP_ANONYMOUS) { 283 memset(g2h_untagged(start), 0, last - start + 1); 284 } else { 285 if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) { 286 return false; 287 } 288 } 289 290 /* Put final protection */ 291 if (host_prot_new != host_prot_old) { 292 mprotect(host_start, qemu_host_page_size, host_prot_new); 293 } 294 return true; 295 } 296 297 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64 298 #ifdef TARGET_AARCH64 299 # define TASK_UNMAPPED_BASE 0x5500000000 300 #else 301 # define TASK_UNMAPPED_BASE (1ul << 38) 302 #endif 303 #else 304 #ifdef TARGET_HPPA 305 # define TASK_UNMAPPED_BASE 0xfa000000 306 #else 307 # define TASK_UNMAPPED_BASE 0x40000000 308 #endif 309 #endif 310 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; 311 312 unsigned long last_brk; 313 314 /* 315 * Subroutine of mmap_find_vma, used when we have pre-allocated 316 * a chunk of guest address space. 317 */ 318 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 319 abi_ulong align) 320 { 321 abi_ulong addr, end_addr, incr = qemu_host_page_size; 322 int prot; 323 bool looped = false; 324 325 if (size > reserved_va) { 326 return (abi_ulong)-1; 327 } 328 329 /* Note that start and size have already been aligned by mmap_find_vma. */ 330 331 end_addr = start + size; 332 /* 333 * Start at the top of the address space, ignoring the last page. 334 * If reserved_va == UINT32_MAX, then end_addr wraps to 0, 335 * throwing the rest of the calculations off. 336 * TODO: rewrite using last_addr instead. 337 * TODO: use the interval tree instead of probing every page. 338 */ 339 if (start > reserved_va - size) { 340 end_addr = ((reserved_va - size) & -align) + size; 341 looped = true; 342 } 343 344 /* Search downward from END_ADDR, checking to see if a page is in use. */ 345 addr = end_addr; 346 while (1) { 347 addr -= incr; 348 if (addr > end_addr) { 349 if (looped) { 350 /* Failure. The entire address space has been searched. */ 351 return (abi_ulong)-1; 352 } 353 /* Re-start at the top of the address space (see above). */ 354 addr = end_addr = ((reserved_va - size) & -align) + size; 355 looped = true; 356 } else { 357 prot = page_get_flags(addr); 358 if (prot) { 359 /* Page in use. Restart below this page. */ 360 addr = end_addr = ((addr - size) & -align) + size; 361 } else if (addr && addr + size == end_addr) { 362 /* Success! All pages between ADDR and END_ADDR are free. */ 363 if (start == mmap_next_start) { 364 mmap_next_start = addr; 365 } 366 return addr; 367 } 368 } 369 } 370 } 371 372 /* 373 * Find and reserve a free memory area of size 'size'. The search 374 * starts at 'start'. 375 * It must be called with mmap_lock() held. 376 * Return -1 if error. 377 */ 378 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) 379 { 380 void *ptr, *prev; 381 abi_ulong addr; 382 int wrapped, repeat; 383 384 align = MAX(align, qemu_host_page_size); 385 386 /* If 'start' == 0, then a default start address is used. */ 387 if (start == 0) { 388 start = mmap_next_start; 389 } else { 390 start &= qemu_host_page_mask; 391 } 392 start = ROUND_UP(start, align); 393 394 size = HOST_PAGE_ALIGN(size); 395 396 if (reserved_va) { 397 return mmap_find_vma_reserved(start, size, align); 398 } 399 400 addr = start; 401 wrapped = repeat = 0; 402 prev = 0; 403 404 for (;; prev = ptr) { 405 /* 406 * Reserve needed memory area to avoid a race. 407 * It should be discarded using: 408 * - mmap() with MAP_FIXED flag 409 * - mremap() with MREMAP_FIXED flag 410 * - shmat() with SHM_REMAP flag 411 */ 412 ptr = mmap(g2h_untagged(addr), size, PROT_NONE, 413 MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 414 415 /* ENOMEM, if host address space has no memory */ 416 if (ptr == MAP_FAILED) { 417 return (abi_ulong)-1; 418 } 419 420 /* 421 * Count the number of sequential returns of the same address. 422 * This is used to modify the search algorithm below. 423 */ 424 repeat = (ptr == prev ? repeat + 1 : 0); 425 426 if (h2g_valid(ptr + size - 1)) { 427 addr = h2g(ptr); 428 429 if ((addr & (align - 1)) == 0) { 430 /* Success. */ 431 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) { 432 mmap_next_start = addr + size; 433 } 434 return addr; 435 } 436 437 /* The address is not properly aligned for the target. */ 438 switch (repeat) { 439 case 0: 440 /* 441 * Assume the result that the kernel gave us is the 442 * first with enough free space, so start again at the 443 * next higher target page. 444 */ 445 addr = ROUND_UP(addr, align); 446 break; 447 case 1: 448 /* 449 * Sometimes the kernel decides to perform the allocation 450 * at the top end of memory instead. 451 */ 452 addr &= -align; 453 break; 454 case 2: 455 /* Start over at low memory. */ 456 addr = 0; 457 break; 458 default: 459 /* Fail. This unaligned block must the last. */ 460 addr = -1; 461 break; 462 } 463 } else { 464 /* 465 * Since the result the kernel gave didn't fit, start 466 * again at low memory. If any repetition, fail. 467 */ 468 addr = (repeat ? -1 : 0); 469 } 470 471 /* Unmap and try again. */ 472 munmap(ptr, size); 473 474 /* ENOMEM if we checked the whole of the target address space. */ 475 if (addr == (abi_ulong)-1) { 476 return (abi_ulong)-1; 477 } else if (addr == 0) { 478 if (wrapped) { 479 return (abi_ulong)-1; 480 } 481 wrapped = 1; 482 /* 483 * Don't actually use 0 when wrapping, instead indicate 484 * that we'd truly like an allocation in low memory. 485 */ 486 addr = (mmap_min_addr > TARGET_PAGE_SIZE 487 ? TARGET_PAGE_ALIGN(mmap_min_addr) 488 : TARGET_PAGE_SIZE); 489 } else if (wrapped && addr >= start) { 490 return (abi_ulong)-1; 491 } 492 } 493 } 494 495 /* NOTE: all the constants are the HOST ones */ 496 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, 497 int flags, int fd, off_t offset) 498 { 499 abi_ulong ret, end, real_start, real_end, retaddr, host_len, 500 passthrough_start = -1, passthrough_end = -1; 501 int page_flags; 502 off_t host_offset; 503 504 mmap_lock(); 505 trace_target_mmap(start, len, target_prot, flags, fd, offset); 506 507 if (!len) { 508 errno = EINVAL; 509 goto fail; 510 } 511 512 page_flags = validate_prot_to_pageflags(target_prot); 513 if (!page_flags) { 514 errno = EINVAL; 515 goto fail; 516 } 517 518 /* Also check for overflows... */ 519 len = TARGET_PAGE_ALIGN(len); 520 if (!len) { 521 errno = ENOMEM; 522 goto fail; 523 } 524 525 if (offset & ~TARGET_PAGE_MASK) { 526 errno = EINVAL; 527 goto fail; 528 } 529 530 /* 531 * If we're mapping shared memory, ensure we generate code for parallel 532 * execution and flush old translations. This will work up to the level 533 * supported by the host -- anything that requires EXCP_ATOMIC will not 534 * be atomic with respect to an external process. 535 */ 536 if (flags & MAP_SHARED) { 537 CPUState *cpu = thread_cpu; 538 if (!(cpu->tcg_cflags & CF_PARALLEL)) { 539 cpu->tcg_cflags |= CF_PARALLEL; 540 tb_flush(cpu); 541 } 542 } 543 544 real_start = start & qemu_host_page_mask; 545 host_offset = offset & qemu_host_page_mask; 546 547 /* 548 * If the user is asking for the kernel to find a location, do that 549 * before we truncate the length for mapping files below. 550 */ 551 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 552 host_len = len + offset - host_offset; 553 host_len = HOST_PAGE_ALIGN(host_len); 554 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE); 555 if (start == (abi_ulong)-1) { 556 errno = ENOMEM; 557 goto fail; 558 } 559 } 560 561 /* 562 * When mapping files into a memory area larger than the file, accesses 563 * to pages beyond the file size will cause a SIGBUS. 564 * 565 * For example, if mmaping a file of 100 bytes on a host with 4K pages 566 * emulating a target with 8K pages, the target expects to be able to 567 * access the first 8K. But the host will trap us on any access beyond 568 * 4K. 569 * 570 * When emulating a target with a larger page-size than the hosts, we 571 * may need to truncate file maps at EOF and add extra anonymous pages 572 * up to the targets page boundary. 573 */ 574 if ((qemu_real_host_page_size() < qemu_host_page_size) && 575 !(flags & MAP_ANONYMOUS)) { 576 struct stat sb; 577 578 if (fstat(fd, &sb) == -1) { 579 goto fail; 580 } 581 582 /* Are we trying to create a map beyond EOF?. */ 583 if (offset + len > sb.st_size) { 584 /* 585 * If so, truncate the file map at eof aligned with 586 * the hosts real pagesize. Additional anonymous maps 587 * will be created beyond EOF. 588 */ 589 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset); 590 } 591 } 592 593 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) { 594 uintptr_t host_start; 595 int host_prot; 596 void *p; 597 598 host_len = len + offset - host_offset; 599 host_len = HOST_PAGE_ALIGN(host_len); 600 host_prot = target_to_host_prot(target_prot); 601 602 /* 603 * Note: we prefer to control the mapping address. It is 604 * especially important if qemu_host_page_size > 605 * qemu_real_host_page_size. 606 */ 607 p = mmap(g2h_untagged(start), host_len, host_prot, 608 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0); 609 if (p == MAP_FAILED) { 610 goto fail; 611 } 612 /* update start so that it points to the file position at 'offset' */ 613 host_start = (uintptr_t)p; 614 if (!(flags & MAP_ANONYMOUS)) { 615 p = mmap(g2h_untagged(start), len, host_prot, 616 flags | MAP_FIXED, fd, host_offset); 617 if (p == MAP_FAILED) { 618 munmap(g2h_untagged(start), host_len); 619 goto fail; 620 } 621 host_start += offset - host_offset; 622 } 623 start = h2g(host_start); 624 passthrough_start = start; 625 passthrough_end = start + len; 626 } else { 627 if (start & ~TARGET_PAGE_MASK) { 628 errno = EINVAL; 629 goto fail; 630 } 631 end = start + len; 632 real_end = HOST_PAGE_ALIGN(end); 633 634 /* 635 * Test if requested memory area fits target address space 636 * It can fail only on 64-bit host with 32-bit target. 637 * On any other target/host host mmap() handles this error correctly. 638 */ 639 if (end < start || !guest_range_valid_untagged(start, len)) { 640 errno = ENOMEM; 641 goto fail; 642 } 643 644 /* Validate that the chosen range is empty. */ 645 if ((flags & MAP_FIXED_NOREPLACE) 646 && !page_check_range_empty(start, end - 1)) { 647 errno = EEXIST; 648 goto fail; 649 } 650 651 /* 652 * worst case: we cannot map the file because the offset is not 653 * aligned, so we read it 654 */ 655 if (!(flags & MAP_ANONYMOUS) && 656 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) { 657 /* 658 * msync() won't work here, so we return an error if write is 659 * possible while it is a shared mapping 660 */ 661 if ((flags & MAP_TYPE) == MAP_SHARED 662 && (target_prot & PROT_WRITE)) { 663 errno = EINVAL; 664 goto fail; 665 } 666 retaddr = target_mmap(start, len, target_prot | PROT_WRITE, 667 (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) 668 | MAP_PRIVATE | MAP_ANONYMOUS, 669 -1, 0); 670 if (retaddr == -1) { 671 goto fail; 672 } 673 if (pread(fd, g2h_untagged(start), len, offset) == -1) { 674 goto fail; 675 } 676 if (!(target_prot & PROT_WRITE)) { 677 ret = target_mprotect(start, len, target_prot); 678 assert(ret == 0); 679 } 680 goto the_end; 681 } 682 683 /* handle the start of the mapping */ 684 if (start > real_start) { 685 if (real_end == real_start + qemu_host_page_size) { 686 /* one single host page */ 687 if (!mmap_frag(real_start, start, end - 1, 688 target_prot, flags, fd, offset)) { 689 goto fail; 690 } 691 goto the_end1; 692 } 693 if (!mmap_frag(real_start, start, 694 real_start + qemu_host_page_size - 1, 695 target_prot, flags, fd, offset)) { 696 goto fail; 697 } 698 real_start += qemu_host_page_size; 699 } 700 /* handle the end of the mapping */ 701 if (end < real_end) { 702 if (!mmap_frag(real_end - qemu_host_page_size, 703 real_end - qemu_host_page_size, end - 1, 704 target_prot, flags, fd, 705 offset + real_end - qemu_host_page_size - start)) { 706 goto fail; 707 } 708 real_end -= qemu_host_page_size; 709 } 710 711 /* map the middle (easier) */ 712 if (real_start < real_end) { 713 void *p; 714 off_t offset1; 715 716 if (flags & MAP_ANONYMOUS) { 717 offset1 = 0; 718 } else { 719 offset1 = offset + real_start - start; 720 } 721 p = mmap(g2h_untagged(real_start), real_end - real_start, 722 target_to_host_prot(target_prot), flags, fd, offset1); 723 if (p == MAP_FAILED) { 724 goto fail; 725 } 726 passthrough_start = real_start; 727 passthrough_end = real_end; 728 } 729 } 730 the_end1: 731 if (flags & MAP_ANONYMOUS) { 732 page_flags |= PAGE_ANON; 733 } 734 page_flags |= PAGE_RESET; 735 if (passthrough_start == passthrough_end) { 736 page_set_flags(start, start + len - 1, page_flags); 737 } else { 738 if (start < passthrough_start) { 739 page_set_flags(start, passthrough_start - 1, page_flags); 740 } 741 page_set_flags(passthrough_start, passthrough_end - 1, 742 page_flags | PAGE_PASSTHROUGH); 743 if (passthrough_end < start + len) { 744 page_set_flags(passthrough_end, start + len - 1, page_flags); 745 } 746 } 747 the_end: 748 trace_target_mmap_complete(start); 749 if (qemu_loglevel_mask(CPU_LOG_PAGE)) { 750 FILE *f = qemu_log_trylock(); 751 if (f) { 752 fprintf(f, "page layout changed following mmap\n"); 753 page_dump(f); 754 qemu_log_unlock(f); 755 } 756 } 757 mmap_unlock(); 758 return start; 759 fail: 760 mmap_unlock(); 761 return -1; 762 } 763 764 static void mmap_reserve(abi_ulong start, abi_ulong size) 765 { 766 abi_ulong real_start; 767 abi_ulong real_end; 768 abi_ulong addr; 769 abi_ulong end; 770 int prot; 771 772 real_start = start & qemu_host_page_mask; 773 real_end = HOST_PAGE_ALIGN(start + size); 774 end = start + size; 775 if (start > real_start) { 776 /* handle host page containing start */ 777 prot = 0; 778 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { 779 prot |= page_get_flags(addr); 780 } 781 if (real_end == real_start + qemu_host_page_size) { 782 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 783 prot |= page_get_flags(addr); 784 } 785 end = real_end; 786 } 787 if (prot != 0) { 788 real_start += qemu_host_page_size; 789 } 790 } 791 if (end < real_end) { 792 prot = 0; 793 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 794 prot |= page_get_flags(addr); 795 } 796 if (prot != 0) { 797 real_end -= qemu_host_page_size; 798 } 799 } 800 if (real_start != real_end) { 801 mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE, 802 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, 803 -1, 0); 804 } 805 } 806 807 int target_munmap(abi_ulong start, abi_ulong len) 808 { 809 abi_ulong end, real_start, real_end, addr; 810 int prot, ret; 811 812 trace_target_munmap(start, len); 813 814 if (start & ~TARGET_PAGE_MASK) { 815 return -TARGET_EINVAL; 816 } 817 len = TARGET_PAGE_ALIGN(len); 818 if (len == 0 || !guest_range_valid_untagged(start, len)) { 819 return -TARGET_EINVAL; 820 } 821 822 mmap_lock(); 823 end = start + len; 824 real_start = start & qemu_host_page_mask; 825 real_end = HOST_PAGE_ALIGN(end); 826 827 if (start > real_start) { 828 /* handle host page containing start */ 829 prot = 0; 830 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) { 831 prot |= page_get_flags(addr); 832 } 833 if (real_end == real_start + qemu_host_page_size) { 834 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 835 prot |= page_get_flags(addr); 836 } 837 end = real_end; 838 } 839 if (prot != 0) { 840 real_start += qemu_host_page_size; 841 } 842 } 843 if (end < real_end) { 844 prot = 0; 845 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) { 846 prot |= page_get_flags(addr); 847 } 848 if (prot != 0) { 849 real_end -= qemu_host_page_size; 850 } 851 } 852 853 ret = 0; 854 /* unmap what we can */ 855 if (real_start < real_end) { 856 if (reserved_va) { 857 mmap_reserve(real_start, real_end - real_start); 858 } else { 859 ret = munmap(g2h_untagged(real_start), real_end - real_start); 860 } 861 } 862 863 if (ret == 0) { 864 page_set_flags(start, start + len - 1, 0); 865 } 866 mmap_unlock(); 867 return ret; 868 } 869 870 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, 871 abi_ulong new_size, unsigned long flags, 872 abi_ulong new_addr) 873 { 874 int prot; 875 void *host_addr; 876 877 if (!guest_range_valid_untagged(old_addr, old_size) || 878 ((flags & MREMAP_FIXED) && 879 !guest_range_valid_untagged(new_addr, new_size)) || 880 ((flags & MREMAP_MAYMOVE) == 0 && 881 !guest_range_valid_untagged(old_addr, new_size))) { 882 errno = ENOMEM; 883 return -1; 884 } 885 886 mmap_lock(); 887 888 if (flags & MREMAP_FIXED) { 889 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 890 flags, g2h_untagged(new_addr)); 891 892 if (reserved_va && host_addr != MAP_FAILED) { 893 /* 894 * If new and old addresses overlap then the above mremap will 895 * already have failed with EINVAL. 896 */ 897 mmap_reserve(old_addr, old_size); 898 } 899 } else if (flags & MREMAP_MAYMOVE) { 900 abi_ulong mmap_start; 901 902 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE); 903 904 if (mmap_start == -1) { 905 errno = ENOMEM; 906 host_addr = MAP_FAILED; 907 } else { 908 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size, 909 flags | MREMAP_FIXED, 910 g2h_untagged(mmap_start)); 911 if (reserved_va) { 912 mmap_reserve(old_addr, old_size); 913 } 914 } 915 } else { 916 int prot = 0; 917 if (reserved_va && old_size < new_size) { 918 abi_ulong addr; 919 for (addr = old_addr + old_size; 920 addr < old_addr + new_size; 921 addr++) { 922 prot |= page_get_flags(addr); 923 } 924 } 925 if (prot == 0) { 926 host_addr = mremap(g2h_untagged(old_addr), 927 old_size, new_size, flags); 928 929 if (host_addr != MAP_FAILED) { 930 /* Check if address fits target address space */ 931 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) { 932 /* Revert mremap() changes */ 933 host_addr = mremap(g2h_untagged(old_addr), 934 new_size, old_size, flags); 935 errno = ENOMEM; 936 host_addr = MAP_FAILED; 937 } else if (reserved_va && old_size > new_size) { 938 mmap_reserve(old_addr + old_size, old_size - new_size); 939 } 940 } 941 } else { 942 errno = ENOMEM; 943 host_addr = MAP_FAILED; 944 } 945 } 946 947 if (host_addr == MAP_FAILED) { 948 new_addr = -1; 949 } else { 950 new_addr = h2g(host_addr); 951 prot = page_get_flags(old_addr); 952 page_set_flags(old_addr, old_addr + old_size - 1, 0); 953 page_set_flags(new_addr, new_addr + new_size - 1, 954 prot | PAGE_VALID | PAGE_RESET); 955 } 956 mmap_unlock(); 957 return new_addr; 958 } 959 960 static bool can_passthrough_madvise(abi_ulong start, abi_ulong end) 961 { 962 ulong addr; 963 964 if ((start | end) & ~qemu_host_page_mask) { 965 return false; 966 } 967 968 for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) { 969 if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) { 970 return false; 971 } 972 } 973 974 return true; 975 } 976 977 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) 978 { 979 abi_ulong len, end; 980 int ret = 0; 981 982 if (start & ~TARGET_PAGE_MASK) { 983 return -TARGET_EINVAL; 984 } 985 len = TARGET_PAGE_ALIGN(len_in); 986 987 if (len_in && !len) { 988 return -TARGET_EINVAL; 989 } 990 991 end = start + len; 992 if (end < start) { 993 return -TARGET_EINVAL; 994 } 995 996 if (end == start) { 997 return 0; 998 } 999 1000 if (!guest_range_valid_untagged(start, len)) { 1001 return -TARGET_EINVAL; 1002 } 1003 1004 /* Translate for some architectures which have different MADV_xxx values */ 1005 switch (advice) { 1006 case TARGET_MADV_DONTNEED: /* alpha */ 1007 advice = MADV_DONTNEED; 1008 break; 1009 case TARGET_MADV_WIPEONFORK: /* parisc */ 1010 advice = MADV_WIPEONFORK; 1011 break; 1012 case TARGET_MADV_KEEPONFORK: /* parisc */ 1013 advice = MADV_KEEPONFORK; 1014 break; 1015 /* we do not care about the other MADV_xxx values yet */ 1016 } 1017 1018 /* 1019 * Most advice values are hints, so ignoring and returning success is ok. 1020 * 1021 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and 1022 * MADV_KEEPONFORK are not hints and need to be emulated. 1023 * 1024 * A straight passthrough for those may not be safe because qemu sometimes 1025 * turns private file-backed mappings into anonymous mappings. 1026 * can_passthrough_madvise() helps to check if a passthrough is possible by 1027 * comparing mappings that are known to have the same semantics in the host 1028 * and the guest. In this case passthrough is safe. 1029 * 1030 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and 1031 * return failure if not. 1032 * 1033 * MADV_DONTNEED is passed through as well, if possible. 1034 * If passthrough isn't possible, we nevertheless (wrongly!) return 1035 * success, which is broken but some userspace programs fail to work 1036 * otherwise. Completely implementing such emulation is quite complicated 1037 * though. 1038 */ 1039 mmap_lock(); 1040 switch (advice) { 1041 case MADV_WIPEONFORK: 1042 case MADV_KEEPONFORK: 1043 ret = -EINVAL; 1044 /* fall through */ 1045 case MADV_DONTNEED: 1046 if (can_passthrough_madvise(start, end)) { 1047 ret = get_errno(madvise(g2h_untagged(start), len, advice)); 1048 if ((advice == MADV_DONTNEED) && (ret == 0)) { 1049 page_reset_target_data(start, start + len - 1); 1050 } 1051 } 1052 } 1053 mmap_unlock(); 1054 1055 return ret; 1056 } 1057