1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * %sccs.include.redist.c% 11 * 12 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 13 * 14 * @(#)vm_mmap.c 8.10 (Berkeley) 02/19/95 15 */ 16 17 /* 18 * Mapped file (mmap) interface to VM 19 */ 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/filedesc.h> 24 #include <sys/resourcevar.h> 25 #include <sys/proc.h> 26 #include <sys/vnode.h> 27 #include <sys/file.h> 28 #include <sys/mman.h> 29 #include <sys/conf.h> 30 31 #include <sys/mount.h> 32 #include <sys/syscallargs.h> 33 34 #include <miscfs/specfs/specdev.h> 35 36 #include <vm/vm.h> 37 #include <vm/vm_pager.h> 38 #include <vm/vm_prot.h> 39 40 #ifdef DEBUG 41 int mmapdebug = 0; 42 #define MDB_FOLLOW 0x01 43 #define MDB_SYNC 0x02 44 #define MDB_MAPIT 0x04 45 #endif 46 47 /* ARGSUSED */ 48 int 49 sbrk(p, uap, retval) 50 struct proc *p; 51 struct sbrk_args /* { 52 syscallarg(int) incr; 53 } */ *uap; 54 register_t *retval; 55 { 56 57 /* Not yet implemented */ 58 return (EOPNOTSUPP); 59 } 60 61 /* ARGSUSED */ 62 int 63 sstk(p, uap, retval) 64 struct proc *p; 65 struct sstk_args /* { 66 syscallarg(int) incr; 67 } */ *uap; 68 register_t *retval; 69 { 70 71 /* Not yet implemented */ 72 return (EOPNOTSUPP); 73 } 74 75 #if defined(COMPAT_43) || defined(COMPAT_SUNOS) 76 /* ARGSUSED */ 77 int 78 compat_43_getpagesize(p, uap, retval) 79 struct proc *p; 80 void *uap; 81 register_t *retval; 82 { 83 84 *retval = PAGE_SIZE; 85 return (0); 86 } 87 #endif /* COMPAT_43 || COMPAT_SUNOS */ 88 89 #ifdef COMPAT_43 90 int 91 compat_43_mmap(p, uap, retval) 92 struct proc *p; 93 register struct compat_43_mmap_args /* { 94 syscallarg(caddr_t) addr; 95 syscallarg(int) len; 96 syscallarg(int) prot; 97 syscallarg(int) flags; 98 syscallarg(int) fd; 99 syscallarg(long) pos; 100 } */ *uap; 101 register_t *retval; 102 { 103 struct mmap_args /* { 104 syscallarg(caddr_t) addr; 105 syscallarg(size_t) len; 106 syscallarg(int) prot; 107 syscallarg(int) flags; 108 syscallarg(int) fd; 109 syscallarg(long) pad; 110 syscallarg(off_t) pos; 111 } */ nargs; 112 static const char cvtbsdprot[8] = { 113 0, 114 PROT_EXEC, 115 PROT_WRITE, 116 PROT_EXEC|PROT_WRITE, 117 PROT_READ, 118 PROT_EXEC|PROT_READ, 119 PROT_WRITE|PROT_READ, 120 PROT_EXEC|PROT_WRITE|PROT_READ, 121 }; 122 #define OMAP_ANON 0x0002 123 #define OMAP_COPY 0x0020 124 #define OMAP_SHARED 0x0010 125 #define OMAP_FIXED 0x0100 126 #define OMAP_INHERIT 0x0800 127 128 SCARG(&nargs, addr) = SCARG(uap, addr); 129 SCARG(&nargs, len) = SCARG(uap, len); 130 SCARG(&nargs, prot) = cvtbsdprot[SCARG(uap, prot)&0x7]; 131 SCARG(&nargs, flags) = 0; 132 if (SCARG(uap, flags) & OMAP_ANON) 133 SCARG(&nargs, flags) |= MAP_ANON; 134 if (SCARG(uap, flags) & OMAP_COPY) 135 SCARG(&nargs, flags) |= MAP_COPY; 136 if (SCARG(uap, flags) & OMAP_SHARED) 137 SCARG(&nargs, flags) |= MAP_SHARED; 138 else 139 SCARG(&nargs, flags) |= MAP_PRIVATE; 140 if (SCARG(uap, flags) & OMAP_FIXED) 141 SCARG(&nargs, flags) |= MAP_FIXED; 142 if (SCARG(uap, flags) & OMAP_INHERIT) 143 SCARG(&nargs, flags) |= MAP_INHERIT; 144 SCARG(&nargs, fd) = SCARG(uap, fd); 145 SCARG(&nargs, pos) = SCARG(uap, pos); 146 return (mmap(p, &nargs, retval)); 147 } 148 #endif 149 150 int 151 mmap(p, uap, retval) 152 struct proc *p; 153 register struct mmap_args /* { 154 syscallarg(caddr_t) addr; 155 syscallarg(size_t) len; 156 syscallarg(int) prot; 157 syscallarg(int) flags; 158 syscallarg(int) fd; 159 syscallarg(long) pad; 160 syscallarg(off_t) pos; 161 } */ *uap; 162 register_t *retval; 163 { 164 register struct filedesc *fdp = p->p_fd; 165 register struct file *fp; 166 struct vnode *vp; 167 vm_offset_t addr, pos; 168 vm_size_t size; 169 vm_prot_t prot, maxprot; 170 caddr_t handle; 171 int flags, error; 172 173 prot = SCARG(uap, prot) & VM_PROT_ALL; 174 flags = SCARG(uap, flags); 175 pos = SCARG(uap, pos); 176 #ifdef DEBUG 177 if (mmapdebug & MDB_FOLLOW) 178 printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n", 179 p->p_pid, SCARG(uap, addr), SCARG(uap, len), prot, 180 flags, SCARG(uap, fd), pos); 181 #endif 182 /* 183 * Address (if FIXED) must be page aligned. 184 * Size is implicitly rounded to a page boundary. 185 * 186 * XXX most (all?) vendors require that the file offset be 187 * page aligned as well. However, we already have applications 188 * (e.g. nlist) that rely on unrestricted alignment. Since we 189 * support it, let it happen. 190 */ 191 addr = (vm_offset_t) SCARG(uap, addr); 192 if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || 193 #if 0 194 ((flags & MAP_ANON) == 0 && (pos & PAGE_MASK)) || 195 #endif 196 (ssize_t)SCARG(uap, len) < 0 || ((flags & MAP_ANON) && SCARG(uap, fd) != -1)) 197 return (EINVAL); 198 size = (vm_size_t) round_page(SCARG(uap, len)); 199 /* 200 * Check for illegal addresses. Watch out for address wrap... 201 * Note that VM_*_ADDRESS are not constants due to casts (argh). 202 */ 203 if (flags & MAP_FIXED) { 204 if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS) 205 return (EINVAL); 206 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 207 return (EINVAL); 208 if (addr > addr + size) 209 return (EINVAL); 210 } 211 /* 212 * XXX for non-fixed mappings where no hint is provided or 213 * the hint would fall in the potential heap space, 214 * place it after the end of the largest possible heap. 215 * 216 * There should really be a pmap call to determine a reasonable 217 * location. 218 */ 219 else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ)) 220 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); 221 if (flags & MAP_ANON) { 222 /* 223 * Mapping blank space is trivial. 224 */ 225 handle = NULL; 226 maxprot = VM_PROT_ALL; 227 pos = 0; 228 } else { 229 /* 230 * Mapping file, get fp for validation. 231 * Obtain vnode and make sure it is of appropriate type. 232 */ 233 if (((unsigned)SCARG(uap, fd)) >= fdp->fd_nfiles || 234 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) 235 return (EBADF); 236 if (fp->f_type != DTYPE_VNODE) 237 return (EINVAL); 238 vp = (struct vnode *)fp->f_data; 239 if (vp->v_type != VREG && vp->v_type != VCHR) 240 return (EINVAL); 241 /* 242 * XXX hack to handle use of /dev/zero to map anon 243 * memory (ala SunOS). 244 */ 245 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 246 handle = NULL; 247 maxprot = VM_PROT_ALL; 248 flags |= MAP_ANON; 249 } else { 250 /* 251 * Ensure that file and memory protections are 252 * compatible. Note that we only worry about 253 * writability if mapping is shared; in this case, 254 * current and max prot are dictated by the open file. 255 * XXX use the vnode instead? Problem is: what 256 * credentials do we use for determination? 257 * What if proc does a setuid? 258 */ 259 maxprot = VM_PROT_EXECUTE; /* ??? */ 260 if (fp->f_flag & FREAD) 261 maxprot |= VM_PROT_READ; 262 else if (prot & PROT_READ) 263 return (EACCES); 264 if (flags & MAP_SHARED) { 265 if (fp->f_flag & FWRITE) 266 maxprot |= VM_PROT_WRITE; 267 else if (prot & PROT_WRITE) 268 return (EACCES); 269 } else 270 maxprot |= VM_PROT_WRITE; 271 handle = (caddr_t)vp; 272 } 273 } 274 error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, 275 flags, handle, pos); 276 if (error == 0) 277 *retval = (register_t)addr; 278 return (error); 279 } 280 281 int 282 msync(p, uap, retval) 283 struct proc *p; 284 struct msync_args /* { 285 syscallarg(caddr_t) addr; 286 syscallarg(int) len; 287 } */ *uap; 288 register_t *retval; 289 { 290 vm_offset_t addr; 291 vm_size_t size; 292 vm_map_t map; 293 int rv; 294 boolean_t syncio, invalidate; 295 296 #ifdef DEBUG 297 if (mmapdebug & (MDB_FOLLOW|MDB_SYNC)) 298 printf("msync(%d): addr %x len %x\n", 299 p->p_pid, SCARG(uap, addr), SCARG(uap, len)); 300 #endif 301 if (((vm_offset_t)SCARG(uap, addr) & PAGE_MASK) || 302 SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr)) 303 return (EINVAL); 304 map = &p->p_vmspace->vm_map; 305 addr = (vm_offset_t)SCARG(uap, addr); 306 size = (vm_size_t)SCARG(uap, len); 307 /* 308 * XXX Gak! If size is zero we are supposed to sync "all modified 309 * pages with the region containing addr". Unfortunately, we 310 * don't really keep track of individual mmaps so we approximate 311 * by flushing the range of the map entry containing addr. 312 * This can be incorrect if the region splits or is coalesced 313 * with a neighbor. 314 */ 315 if (size == 0) { 316 vm_map_entry_t entry; 317 318 vm_map_lock_read(map); 319 rv = vm_map_lookup_entry(map, addr, &entry); 320 vm_map_unlock_read(map); 321 if (!rv) 322 return (EINVAL); 323 addr = entry->start; 324 size = entry->end - entry->start; 325 } 326 #ifdef DEBUG 327 if (mmapdebug & MDB_SYNC) 328 printf("msync: cleaning/flushing address range [%x-%x)\n", 329 addr, addr+size); 330 #endif 331 /* 332 * Could pass this in as a third flag argument to implement 333 * Sun's MS_ASYNC. 334 */ 335 syncio = TRUE; 336 /* 337 * XXX bummer, gotta flush all cached pages to ensure 338 * consistency with the file system cache. Otherwise, we could 339 * pass this in to implement Sun's MS_INVALIDATE. 340 */ 341 invalidate = TRUE; 342 /* 343 * Clean the pages and interpret the return value. 344 */ 345 rv = vm_map_clean(map, addr, addr+size, syncio, invalidate); 346 switch (rv) { 347 case KERN_SUCCESS: 348 break; 349 case KERN_INVALID_ADDRESS: 350 return (EINVAL); /* Sun returns ENOMEM? */ 351 case KERN_FAILURE: 352 return (EIO); 353 default: 354 return (EINVAL); 355 } 356 return (0); 357 } 358 359 int 360 munmap(p, uap, retval) 361 register struct proc *p; 362 register struct munmap_args /* { 363 syscallarg(caddr_t) addr; 364 syscallarg(int) len; 365 } */ *uap; 366 register_t *retval; 367 { 368 vm_offset_t addr; 369 vm_size_t size; 370 vm_map_t map; 371 372 #ifdef DEBUG 373 if (mmapdebug & MDB_FOLLOW) 374 printf("munmap(%d): addr %x len %x\n", 375 p->p_pid, SCARG(uap, addr), SCARG(uap, len)); 376 #endif 377 378 addr = (vm_offset_t) SCARG(uap, addr); 379 if ((addr & PAGE_MASK) || SCARG(uap, len) < 0) 380 return(EINVAL); 381 size = (vm_size_t) round_page(SCARG(uap, len)); 382 if (size == 0) 383 return(0); 384 /* 385 * Check for illegal addresses. Watch out for address wrap... 386 * Note that VM_*_ADDRESS are not constants due to casts (argh). 387 */ 388 if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS) 389 return (EINVAL); 390 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) 391 return (EINVAL); 392 if (addr > addr + size) 393 return (EINVAL); 394 map = &p->p_vmspace->vm_map; 395 /* 396 * Make sure entire range is allocated. 397 * XXX this seemed overly restrictive, so we relaxed it. 398 */ 399 #if 0 400 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 401 return(EINVAL); 402 #endif 403 /* returns nothing but KERN_SUCCESS anyway */ 404 (void) vm_map_remove(map, addr, addr+size); 405 return(0); 406 } 407 408 void 409 munmapfd(p, fd) 410 struct proc *p; 411 int fd; 412 { 413 #ifdef DEBUG 414 if (mmapdebug & MDB_FOLLOW) 415 printf("munmapfd(%d): fd %d\n", p->p_pid, fd); 416 #endif 417 418 /* 419 * XXX should vm_deallocate any regions mapped to this file 420 */ 421 p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; 422 } 423 424 int 425 mprotect(p, uap, retval) 426 struct proc *p; 427 struct mprotect_args /* { 428 syscallarg(caddr_t) addr; 429 syscallarg(int) len; 430 syscallarg(int) prot; 431 } */ *uap; 432 register_t *retval; 433 { 434 vm_offset_t addr; 435 vm_size_t size; 436 register vm_prot_t prot; 437 438 #ifdef DEBUG 439 if (mmapdebug & MDB_FOLLOW) 440 printf("mprotect(%d): addr %x len %x prot %d\n", 441 p->p_pid, SCARG(uap, addr), SCARG(uap, len), SCARG(uap, prot)); 442 #endif 443 444 addr = (vm_offset_t)SCARG(uap, addr); 445 if ((addr & PAGE_MASK) || SCARG(uap, len) < 0) 446 return(EINVAL); 447 size = (vm_size_t)SCARG(uap, len); 448 prot = SCARG(uap, prot) & VM_PROT_ALL; 449 450 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot, 451 FALSE)) { 452 case KERN_SUCCESS: 453 return (0); 454 case KERN_PROTECTION_FAILURE: 455 return (EACCES); 456 } 457 return (EINVAL); 458 } 459 460 /* ARGSUSED */ 461 int 462 madvise(p, uap, retval) 463 struct proc *p; 464 struct madvise_args /* { 465 syscallarg(caddr_t) addr; 466 syscallarg(int) len; 467 syscallarg(int) behav; 468 } */ *uap; 469 register_t *retval; 470 { 471 472 /* Not yet implemented */ 473 return (EOPNOTSUPP); 474 } 475 476 /* ARGSUSED */ 477 int 478 mincore(p, uap, retval) 479 struct proc *p; 480 struct mincore_args /* { 481 syscallarg(caddr_t) addr; 482 syscallarg(int) len; 483 syscallarg(char *) vec; 484 } */ *uap; 485 register_t *retval; 486 { 487 488 /* Not yet implemented */ 489 return (EOPNOTSUPP); 490 } 491 492 int 493 mlock(p, uap, retval) 494 struct proc *p; 495 struct mlock_args /* { 496 syscallarg(caddr_t) addr; 497 syscallarg(size_t) len; 498 } */ *uap; 499 register_t *retval; 500 { 501 vm_offset_t addr; 502 vm_size_t size; 503 int error; 504 extern int vm_page_max_wired; 505 506 #ifdef DEBUG 507 if (mmapdebug & MDB_FOLLOW) 508 printf("mlock(%d): addr %x len %x\n", 509 p->p_pid, SCARG(uap, addr), SCARG(uap, len)); 510 #endif 511 addr = (vm_offset_t)SCARG(uap, addr); 512 if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr)) 513 return (EINVAL); 514 size = round_page((vm_size_t)SCARG(uap, len)); 515 if (atop(size) + cnt.v_wire_count > vm_page_max_wired) 516 return (EAGAIN); 517 #ifdef pmap_wired_count 518 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 519 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) 520 return (EAGAIN); 521 #else 522 if (error = suser(p->p_ucred, &p->p_acflag)) 523 return (error); 524 #endif 525 526 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE); 527 return (error == KERN_SUCCESS ? 0 : ENOMEM); 528 } 529 530 int 531 munlock(p, uap, retval) 532 struct proc *p; 533 struct munlock_args /* { 534 syscallarg(caddr_t) addr; 535 syscallarg(size_t) len; 536 } */ *uap; 537 register_t *retval; 538 { 539 vm_offset_t addr; 540 vm_size_t size; 541 int error; 542 543 #ifdef DEBUG 544 if (mmapdebug & MDB_FOLLOW) 545 printf("munlock(%d): addr %x len %x\n", 546 p->p_pid, SCARG(uap, addr), SCARG(uap, len)); 547 #endif 548 addr = (vm_offset_t)SCARG(uap, addr); 549 if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr)) 550 return (EINVAL); 551 #ifndef pmap_wired_count 552 if (error = suser(p->p_ucred, &p->p_acflag)) 553 return (error); 554 #endif 555 size = round_page((vm_size_t)SCARG(uap, len)); 556 557 error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE); 558 return (error == KERN_SUCCESS ? 0 : ENOMEM); 559 } 560 561 /* 562 * Internal version of mmap. 563 * Currently used by mmap, exec, and sys5 shared memory. 564 * Handle is either a vnode pointer or NULL for MAP_ANON. 565 */ 566 int 567 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) 568 register vm_map_t map; 569 register vm_offset_t *addr; 570 register vm_size_t size; 571 vm_prot_t prot, maxprot; 572 register int flags; 573 caddr_t handle; /* XXX should be vp */ 574 vm_offset_t foff; 575 { 576 register vm_pager_t pager; 577 boolean_t fitit; 578 vm_object_t object; 579 struct vnode *vp = NULL; 580 int type; 581 int rv = KERN_SUCCESS; 582 583 if (size == 0) 584 return (0); 585 586 if ((flags & MAP_FIXED) == 0) { 587 fitit = TRUE; 588 *addr = round_page(*addr); 589 } else { 590 fitit = FALSE; 591 (void)vm_deallocate(map, *addr, size); 592 } 593 594 /* 595 * Lookup/allocate pager. All except an unnamed anonymous lookup 596 * gain a reference to ensure continued existance of the object. 597 * (XXX the exception is to appease the pageout daemon) 598 */ 599 if (flags & MAP_ANON) 600 type = PG_DFLT; 601 else { 602 vp = (struct vnode *)handle; 603 if (vp->v_type == VCHR) { 604 type = PG_DEVICE; 605 handle = (caddr_t)vp->v_rdev; 606 } else 607 type = PG_VNODE; 608 } 609 pager = vm_pager_allocate(type, handle, size, prot, foff); 610 if (pager == NULL) 611 return (type == PG_DEVICE ? EINVAL : ENOMEM); 612 /* 613 * Find object and release extra reference gained by lookup 614 */ 615 object = vm_object_lookup(pager); 616 vm_object_deallocate(object); 617 618 /* 619 * Anonymous memory. 620 */ 621 if (flags & MAP_ANON) { 622 rv = vm_allocate_with_pager(map, addr, size, fitit, 623 pager, foff, TRUE); 624 if (rv != KERN_SUCCESS) { 625 if (handle == NULL) 626 vm_pager_deallocate(pager); 627 else 628 vm_object_deallocate(object); 629 goto out; 630 } 631 /* 632 * Don't cache anonymous objects. 633 * Loses the reference gained by vm_pager_allocate. 634 * Note that object will be NULL when handle == NULL, 635 * this is ok since vm_allocate_with_pager has made 636 * sure that these objects are uncached. 637 */ 638 (void) pager_cache(object, FALSE); 639 #ifdef DEBUG 640 if (mmapdebug & MDB_MAPIT) 641 printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n", 642 curproc->p_pid, *addr, size, pager); 643 #endif 644 } 645 /* 646 * Must be a mapped file. 647 * Distinguish between character special and regular files. 648 */ 649 else if (vp->v_type == VCHR) { 650 rv = vm_allocate_with_pager(map, addr, size, fitit, 651 pager, foff, FALSE); 652 /* 653 * Uncache the object and lose the reference gained 654 * by vm_pager_allocate(). If the call to 655 * vm_allocate_with_pager() was sucessful, then we 656 * gained an additional reference ensuring the object 657 * will continue to exist. If the call failed then 658 * the deallocate call below will terminate the 659 * object which is fine. 660 */ 661 (void) pager_cache(object, FALSE); 662 if (rv != KERN_SUCCESS) 663 goto out; 664 } 665 /* 666 * A regular file 667 */ 668 else { 669 #ifdef DEBUG 670 if (object == NULL) 671 printf("vm_mmap: no object: vp %x, pager %x\n", 672 vp, pager); 673 #endif 674 /* 675 * Map it directly. 676 * Allows modifications to go out to the vnode. 677 */ 678 if (flags & MAP_SHARED) { 679 rv = vm_allocate_with_pager(map, addr, size, 680 fitit, pager, 681 foff, FALSE); 682 if (rv != KERN_SUCCESS) { 683 vm_object_deallocate(object); 684 goto out; 685 } 686 /* 687 * Don't cache the object. This is the easiest way 688 * of ensuring that data gets back to the filesystem 689 * because vnode_pager_deallocate() will fsync the 690 * vnode. pager_cache() will lose the extra ref. 691 */ 692 if (prot & VM_PROT_WRITE) 693 pager_cache(object, FALSE); 694 else 695 vm_object_deallocate(object); 696 } 697 /* 698 * Copy-on-write of file. Two flavors. 699 * MAP_COPY is true COW, you essentially get a snapshot of 700 * the region at the time of mapping. MAP_PRIVATE means only 701 * that your changes are not reflected back to the object. 702 * Changes made by others will be seen. 703 */ 704 else { 705 vm_map_t tmap; 706 vm_offset_t off; 707 708 /* locate and allocate the target address space */ 709 rv = vm_map_find(map, NULL, (vm_offset_t)0, 710 addr, size, fitit); 711 if (rv != KERN_SUCCESS) { 712 vm_object_deallocate(object); 713 goto out; 714 } 715 tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS, 716 VM_MIN_ADDRESS+size, TRUE); 717 off = VM_MIN_ADDRESS; 718 rv = vm_allocate_with_pager(tmap, &off, size, 719 TRUE, pager, 720 foff, FALSE); 721 if (rv != KERN_SUCCESS) { 722 vm_object_deallocate(object); 723 vm_map_deallocate(tmap); 724 goto out; 725 } 726 /* 727 * (XXX) 728 * MAP_PRIVATE implies that we see changes made by 729 * others. To ensure that we need to guarentee that 730 * no copy object is created (otherwise original 731 * pages would be pushed to the copy object and we 732 * would never see changes made by others). We 733 * totally sleeze it right now by marking the object 734 * internal temporarily. 735 */ 736 if ((flags & MAP_COPY) == 0) 737 object->flags |= OBJ_INTERNAL; 738 rv = vm_map_copy(map, tmap, *addr, size, off, 739 FALSE, FALSE); 740 object->flags &= ~OBJ_INTERNAL; 741 /* 742 * (XXX) 743 * My oh my, this only gets worse... 744 * Force creation of a shadow object so that 745 * vm_map_fork will do the right thing. 746 */ 747 if ((flags & MAP_COPY) == 0) { 748 vm_map_t tmap; 749 vm_map_entry_t tentry; 750 vm_object_t tobject; 751 vm_offset_t toffset; 752 vm_prot_t tprot; 753 boolean_t twired, tsu; 754 755 tmap = map; 756 vm_map_lookup(&tmap, *addr, VM_PROT_WRITE, 757 &tentry, &tobject, &toffset, 758 &tprot, &twired, &tsu); 759 vm_map_lookup_done(tmap, tentry); 760 } 761 /* 762 * (XXX) 763 * Map copy code cannot detect sharing unless a 764 * sharing map is involved. So we cheat and write 765 * protect everything ourselves. 766 */ 767 vm_object_pmap_copy(object, foff, foff + size); 768 vm_object_deallocate(object); 769 vm_map_deallocate(tmap); 770 if (rv != KERN_SUCCESS) 771 goto out; 772 } 773 #ifdef DEBUG 774 if (mmapdebug & MDB_MAPIT) 775 printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n", 776 curproc->p_pid, *addr, size, pager); 777 #endif 778 } 779 /* 780 * Correct protection (default is VM_PROT_ALL). 781 * If maxprot is different than prot, we must set both explicitly. 782 */ 783 rv = KERN_SUCCESS; 784 if (maxprot != VM_PROT_ALL) 785 rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE); 786 if (rv == KERN_SUCCESS && prot != maxprot) 787 rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE); 788 if (rv != KERN_SUCCESS) { 789 (void) vm_deallocate(map, *addr, size); 790 goto out; 791 } 792 /* 793 * Shared memory is also shared with children. 794 */ 795 if (flags & MAP_SHARED) { 796 rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE); 797 if (rv != KERN_SUCCESS) { 798 (void) vm_deallocate(map, *addr, size); 799 goto out; 800 } 801 } 802 out: 803 #ifdef DEBUG 804 if (mmapdebug & MDB_MAPIT) 805 printf("vm_mmap: rv %d\n", rv); 806 #endif 807 switch (rv) { 808 case KERN_SUCCESS: 809 return (0); 810 case KERN_INVALID_ADDRESS: 811 case KERN_NO_SPACE: 812 return (ENOMEM); 813 case KERN_PROTECTION_FAILURE: 814 return (EACCES); 815 default: 816 return (EINVAL); 817 } 818 } 819