1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 1988 University of Utah. 5 * Copyright (c) 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * the Systems Programming Group of the University of Utah Computer 10 * Science Department. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ 37 * 38 * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 39 * $FreeBSD: src/sys/vm/vm_mmap.c,v 1.108.2.6 2002/07/02 20:06:19 dillon Exp $ 40 */ 41 42 /* 43 * Mapped file (mmap) interface to VM 44 */ 45 46 #include <sys/param.h> 47 #include <sys/kernel.h> 48 #include <sys/systm.h> 49 #include <sys/sysmsg.h> 50 #include <sys/filedesc.h> 51 #include <sys/kern_syscall.h> 52 #include <sys/proc.h> 53 #include <sys/priv.h> 54 #include <sys/resource.h> 55 #include <sys/resourcevar.h> 56 #include <sys/vnode.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/mman.h> 60 #include <sys/conf.h> 61 #include <sys/stat.h> 62 #include <sys/vmmeter.h> 63 #include <sys/sysctl.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_param.h> 67 #include <sys/lock.h> 68 #include <vm/pmap.h> 69 #include <vm/vm_map.h> 70 #include <vm/vm_object.h> 71 #include <vm/vm_page.h> 72 #include <vm/vm_pager.h> 73 #include <vm/vm_pageout.h> 74 #include <vm/vm_extern.h> 75 #include <vm/vm_kern.h> 76 77 #include <sys/file2.h> 78 #include <sys/thread.h> 79 #include <vm/vm_page2.h> 80 81 static int max_proc_mmap = 1000000; 82 SYSCTL_INT(_vm, OID_AUTO, max_proc_mmap, CTLFLAG_RW, &max_proc_mmap, 0, ""); 83 int vkernel_enable; 84 SYSCTL_INT(_vm, OID_AUTO, vkernel_enable, CTLFLAG_RW, &vkernel_enable, 0, ""); 85 86 /* 87 * sstk_args(int incr) 88 * 89 * MPSAFE 90 */ 91 int 92 sys_sstk(struct sysmsg *sysmsg, const struct sstk_args *uap) 93 { 94 /* Not yet implemented */ 95 return (EOPNOTSUPP); 96 } 97 98 /* 99 * mmap_args(void *addr, size_t len, int prot, int flags, int fd, 100 * long pad, off_t pos) 101 * 102 * Memory Map (mmap) system call. Note that the file offset 103 * and address are allowed to be NOT page aligned, though if 104 * the MAP_FIXED flag it set, both must have the same remainder 105 * modulo the PAGE_SIZE (POSIX 1003.1b). If the address is not 106 * page-aligned, the actual mapping starts at trunc_page(addr) 107 * and the return value is adjusted up by the page offset. 108 * 109 * Generally speaking, only character devices which are themselves 110 * memory-based, such as a video framebuffer, can be mmap'd. Otherwise 111 * there would be no cache coherency between a descriptor and a VM mapping 112 * both to the same character device. 113 * 114 * Block devices can be mmap'd no matter what they represent. Cache coherency 115 * is maintained as long as you do not write directly to the underlying 116 * character device. 117 * 118 * No requirements 119 */ 120 int 121 kern_mmap(struct vmspace *vms, caddr_t uaddr, size_t ulen, 122 int uprot, int uflags, int fd, off_t upos, void **res) 123 { 124 struct thread *td = curthread; 125 struct proc *p = td->td_proc; 126 struct file *fp = NULL; 127 struct vnode *vp; 128 vm_offset_t addr; 129 vm_offset_t tmpaddr; 130 vm_size_t size, pageoff; 131 vm_prot_t prot, maxprot; 132 void *handle; 133 int flags, error; 134 off_t pos; 135 vm_object_t obj; 136 137 KKASSERT(p); 138 139 addr = (vm_offset_t) uaddr; 140 size = ulen; 141 prot = uprot & VM_PROT_ALL; 142 flags = uflags; 143 pos = upos; 144 145 /* 146 * Make sure mapping fits into numeric range etc. 147 * 148 * NOTE: We support the full unsigned range for size now. 149 */ 150 if (((flags & MAP_ANON) && (fd != -1 || pos != 0))) 151 return (EINVAL); 152 153 if (size == 0) 154 return (EINVAL); 155 156 if (flags & MAP_STACK) { 157 if (fd != -1) 158 return (EINVAL); 159 if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE)) 160 return (EINVAL); 161 flags |= MAP_ANON; 162 pos = 0; 163 } 164 165 /* 166 * Virtual page table support has been removed and now always 167 * returns EOPNOTSUPP. 168 */ 169 if (flags & MAP_VPAGETABLE) 170 return (EOPNOTSUPP); 171 172 /* 173 * Align the file position to a page boundary, 174 * and save its page offset component. 175 */ 176 pageoff = (pos & PAGE_MASK); 177 pos -= pageoff; 178 179 /* Adjust size for rounding (on both ends). */ 180 size += pageoff; /* low end... */ 181 size = (vm_size_t) round_page(size); /* hi end */ 182 if (size < ulen) /* wrap */ 183 return(EINVAL); 184 185 /* 186 * Check for illegal addresses. Watch out for address wrap... Note 187 * that VM_*_ADDRESS are not constants due to casts (argh). 188 */ 189 if (flags & (MAP_FIXED | MAP_TRYFIXED)) { 190 /* 191 * The specified address must have the same remainder 192 * as the file offset taken modulo PAGE_SIZE, so it 193 * should be aligned after adjustment by pageoff. 194 */ 195 addr -= pageoff; 196 if (addr & PAGE_MASK) 197 return (EINVAL); 198 199 /* 200 * Address range must be all in user VM space and not wrap. 201 */ 202 tmpaddr = addr + size; 203 if (tmpaddr < addr) 204 return (EINVAL); 205 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) 206 return (EINVAL); 207 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) 208 return (EINVAL); 209 } else { 210 /* 211 * Get a hint of where to map. It also provides mmap offset 212 * randomization if enabled. 213 */ 214 addr = vm_map_hint(p, addr, prot); 215 } 216 217 if (flags & MAP_ANON) { 218 /* 219 * Mapping blank space is trivial. 220 */ 221 handle = NULL; 222 maxprot = VM_PROT_ALL; 223 } else { 224 /* 225 * Mapping file, get fp for validation. Obtain vnode and make 226 * sure it is of appropriate type. 227 */ 228 fp = holdfp(td, fd, -1); 229 if (fp == NULL) 230 return (EBADF); 231 if (fp->f_type != DTYPE_VNODE) { 232 error = EINVAL; 233 goto done; 234 } 235 /* 236 * POSIX shared-memory objects are defined to have 237 * kernel persistence, and are not defined to support 238 * read(2)/write(2) -- or even open(2). Thus, we can 239 * use MAP_ASYNC to trade on-disk coherence for speed. 240 * The shm_open(3) library routine turns on the FPOSIXSHM 241 * flag to request this behavior. 242 */ 243 if (fp->f_flag & FPOSIXSHM) 244 flags |= MAP_NOSYNC; 245 vp = (struct vnode *) fp->f_data; 246 247 /* 248 * Validate the vnode for the operation. 249 */ 250 switch(vp->v_type) { 251 case VREG: 252 /* 253 * Get the proper underlying object 254 */ 255 if ((obj = vp->v_object) == NULL) { 256 error = EINVAL; 257 goto done; 258 } 259 KKASSERT((struct vnode *)obj->handle == vp); 260 break; 261 case VCHR: 262 /* 263 * Make sure a device has not been revoked. 264 * Mappability is handled by the device layer. 265 */ 266 if (vp->v_rdev == NULL) { 267 error = EBADF; 268 goto done; 269 } 270 break; 271 default: 272 /* 273 * Nothing else is mappable. 274 */ 275 error = EINVAL; 276 goto done; 277 } 278 279 /* 280 * XXX hack to handle use of /dev/zero to map anon memory (ala 281 * SunOS). 282 */ 283 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { 284 handle = NULL; 285 maxprot = VM_PROT_ALL; 286 flags |= MAP_ANON; 287 pos = 0; 288 } else { 289 /* 290 * cdevs does not provide private mappings of any kind. 291 */ 292 if (vp->v_type == VCHR && 293 (flags & (MAP_PRIVATE|MAP_COPY))) { 294 error = EINVAL; 295 goto done; 296 } 297 /* 298 * Ensure that file and memory protections are 299 * compatible. Note that we only worry about 300 * writability if mapping is shared; in this case, 301 * current and max prot are dictated by the open file. 302 * XXX use the vnode instead? Problem is: what 303 * credentials do we use for determination? What if 304 * proc does a setuid? 305 */ 306 maxprot = VM_PROT_EXECUTE; 307 if (fp->f_flag & FREAD) { 308 maxprot |= VM_PROT_READ; 309 } else if (prot & PROT_READ) { 310 error = EACCES; 311 goto done; 312 } 313 /* 314 * If we are sharing potential changes (either via 315 * MAP_SHARED or via the implicit sharing of character 316 * device mappings), and we are trying to get write 317 * permission although we opened it without asking 318 * for it, bail out. Check for superuser, only if 319 * we're at securelevel < 1, to allow the XIG X server 320 * to continue to work. 321 * 322 * PROT_WRITE + MAP_SHARED 323 */ 324 if ((flags & MAP_SHARED) != 0 || vp->v_type == VCHR) { 325 if ((fp->f_flag & FWRITE) != 0) { 326 struct vattr va; 327 if ((error = VOP_GETATTR(vp, &va))) { 328 goto done; 329 } 330 if ((va.va_flags & 331 (IMMUTABLE|APPEND)) == 0) { 332 maxprot |= VM_PROT_WRITE; 333 334 /* 335 * SHARED+RW regular file mmap() 336 * updates v_lastwrite_ts. 337 */ 338 if ((prot & PROT_WRITE) && 339 vp->v_type == VREG && 340 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY) == 0) { 341 vfs_timestamp(&vp->v_lastwrite_ts); 342 vsetflags(vp, VLASTWRITETS); 343 vn_unlock(vp); 344 } 345 } else if (prot & PROT_WRITE) { 346 error = EPERM; 347 goto done; 348 } 349 } else if ((prot & PROT_WRITE) != 0) { 350 error = EACCES; 351 goto done; 352 } 353 } else { 354 maxprot |= VM_PROT_WRITE; 355 } 356 handle = (void *)vp; 357 } 358 } 359 360 lwkt_gettoken(&vms->vm_map.token); 361 362 /* 363 * Do not allow more then a certain number of vm_map_entry structures 364 * per process. 0 to disable. 365 */ 366 if (max_proc_mmap && vms->vm_map.nentries >= max_proc_mmap) { 367 error = ENOMEM; 368 lwkt_reltoken(&vms->vm_map.token); 369 goto done; 370 } 371 372 error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, 373 flags, handle, pos, fp); 374 if (error == 0) 375 *res = (void *)(addr + pageoff); 376 377 lwkt_reltoken(&vms->vm_map.token); 378 done: 379 if (fp) 380 dropfp(td, fd, fp); 381 382 return (error); 383 } 384 385 /* 386 * mmap system call handler 387 * 388 * No requirements. 389 */ 390 int 391 sys_mmap(struct sysmsg *sysmsg, const struct mmap_args *uap) 392 { 393 int error; 394 int flags = uap->flags; 395 off_t upos = uap->pos; 396 397 /* 398 * Work around fairly serious problems with trying to have an 399 * auto-grow stack segment related to other unrelated calls to 400 * mmap() potentially getting addresses within such segments. 401 * 402 * Our attempt to use TRYFIXED to mediate the problem basically 403 * failed. For example, rtld-elf uses it to try to optimize 404 * shlib placement, but could run afoul of this issue. 405 * 406 * The only remaining true MAP_STACK we allow is the user stack as 407 * created by the exec code. All userland MAP_STACK's are converted 408 * to normal mmap()s right here. 409 */ 410 if (flags & MAP_STACK) { 411 if (uap->fd != -1) 412 return (EINVAL); 413 if ((uap->prot & (PROT_READ|PROT_WRITE)) != 414 (PROT_READ|PROT_WRITE)) { 415 return (EINVAL); 416 } 417 flags &= ~MAP_STACK; 418 flags |= MAP_ANON; 419 upos = 0; 420 } 421 422 error = kern_mmap(curproc->p_vmspace, uap->addr, uap->len, 423 uap->prot, flags, 424 uap->fd, upos, &sysmsg->sysmsg_resultp); 425 426 return (error); 427 } 428 429 /* 430 * msync system call handler 431 * 432 * msync_args(void *addr, size_t len, int flags) 433 * 434 * No requirements 435 */ 436 int 437 sys_msync(struct sysmsg *sysmsg, const struct msync_args *uap) 438 { 439 struct proc *p = curproc; 440 vm_offset_t addr; 441 vm_offset_t tmpaddr; 442 vm_size_t size, pageoff; 443 int flags; 444 vm_map_t map; 445 int rv; 446 447 addr = (vm_offset_t) uap->addr; 448 size = uap->len; 449 flags = uap->flags; 450 451 pageoff = (addr & PAGE_MASK); 452 addr -= pageoff; 453 size += pageoff; 454 size = (vm_size_t) round_page(size); 455 if (size < uap->len) /* wrap */ 456 return(EINVAL); 457 tmpaddr = addr + size; /* workaround gcc4 opt */ 458 if (tmpaddr < addr) /* wrap */ 459 return(EINVAL); 460 461 if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) 462 return (EINVAL); 463 464 map = &p->p_vmspace->vm_map; 465 466 /* 467 * map->token serializes extracting the address range for size == 0 468 * msyncs with the vm_map_clean call; if the token were not held 469 * across the two calls, an intervening munmap/mmap pair, for example, 470 * could cause msync to occur on a wrong region. 471 */ 472 lwkt_gettoken(&map->token); 473 474 /* 475 * XXX Gak! If size is zero we are supposed to sync "all modified 476 * pages with the region containing addr". Unfortunately, we don't 477 * really keep track of individual mmaps so we approximate by flushing 478 * the range of the map entry containing addr. This can be incorrect 479 * if the region splits or is coalesced with a neighbor. 480 */ 481 if (size == 0) { 482 vm_map_entry_t entry; 483 484 vm_map_lock_read(map); 485 rv = vm_map_lookup_entry(map, addr, &entry); 486 if (rv == FALSE) { 487 vm_map_unlock_read(map); 488 rv = KERN_INVALID_ADDRESS; 489 goto done; 490 } 491 addr = entry->ba.start; 492 size = entry->ba.end - entry->ba.start; 493 vm_map_unlock_read(map); 494 } 495 496 /* 497 * Clean the pages and interpret the return value. 498 */ 499 rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, 500 (flags & MS_INVALIDATE) != 0); 501 done: 502 lwkt_reltoken(&map->token); 503 504 switch (rv) { 505 case KERN_SUCCESS: 506 break; 507 case KERN_INVALID_ADDRESS: 508 return (EINVAL); /* Sun returns ENOMEM? */ 509 case KERN_FAILURE: 510 return (EIO); 511 default: 512 return (EINVAL); 513 } 514 515 return (0); 516 } 517 518 /* 519 * munmap system call handler 520 * 521 * munmap_args(void *addr, size_t len) 522 * 523 * No requirements 524 */ 525 int 526 sys_munmap(struct sysmsg *sysmsg, const struct munmap_args *uap) 527 { 528 struct proc *p = curproc; 529 vm_offset_t addr; 530 vm_offset_t tmpaddr; 531 vm_size_t size, pageoff; 532 vm_map_t map; 533 534 addr = (vm_offset_t) uap->addr; 535 size = uap->len; 536 537 pageoff = (addr & PAGE_MASK); 538 addr -= pageoff; 539 size += pageoff; 540 size = (vm_size_t) round_page(size); 541 if (size < uap->len) /* wrap */ 542 return(EINVAL); 543 tmpaddr = addr + size; /* workaround gcc4 opt */ 544 if (tmpaddr < addr) /* wrap */ 545 return(EINVAL); 546 547 if (size == 0) 548 return (0); 549 550 /* 551 * Check for illegal addresses. Watch out for address wrap... Note 552 * that VM_*_ADDRESS are not constants due to casts (argh). 553 */ 554 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) 555 return (EINVAL); 556 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) 557 return (EINVAL); 558 559 map = &p->p_vmspace->vm_map; 560 561 /* map->token serializes between the map check and the actual unmap */ 562 lwkt_gettoken(&map->token); 563 564 /* 565 * Make sure entire range is allocated. 566 */ 567 if (!vm_map_check_protection(map, addr, addr + size, 568 VM_PROT_NONE, FALSE)) { 569 lwkt_reltoken(&map->token); 570 return (EINVAL); 571 } 572 /* returns nothing but KERN_SUCCESS anyway */ 573 vm_map_remove(map, addr, addr + size); 574 lwkt_reltoken(&map->token); 575 return (0); 576 } 577 578 /* 579 * mprotect_args(const void *addr, size_t len, int prot) 580 * 581 * No requirements. 582 */ 583 int 584 sys_mprotect(struct sysmsg *sysmsg, const struct mprotect_args *uap) 585 { 586 struct proc *p = curproc; 587 vm_offset_t addr; 588 vm_offset_t tmpaddr; 589 vm_size_t size, pageoff; 590 vm_prot_t prot; 591 int error; 592 593 addr = (vm_offset_t) uap->addr; 594 size = uap->len; 595 prot = uap->prot & VM_PROT_ALL; 596 597 pageoff = (addr & PAGE_MASK); 598 addr -= pageoff; 599 size += pageoff; 600 size = (vm_size_t) round_page(size); 601 if (size < uap->len) /* wrap */ 602 return(EINVAL); 603 tmpaddr = addr + size; /* workaround gcc4 opt */ 604 if (tmpaddr < addr) /* wrap */ 605 return(EINVAL); 606 607 switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, 608 prot, FALSE)) { 609 case KERN_SUCCESS: 610 error = 0; 611 break; 612 case KERN_PROTECTION_FAILURE: 613 error = EACCES; 614 break; 615 default: 616 error = EINVAL; 617 break; 618 } 619 return (error); 620 } 621 622 /* 623 * minherit system call handler 624 * 625 * minherit_args(void *addr, size_t len, int inherit) 626 * 627 * No requirements. 628 */ 629 int 630 sys_minherit(struct sysmsg *sysmsg, const struct minherit_args *uap) 631 { 632 struct proc *p = curproc; 633 vm_offset_t addr; 634 vm_offset_t tmpaddr; 635 vm_size_t size, pageoff; 636 vm_inherit_t inherit; 637 int error; 638 639 addr = (vm_offset_t)uap->addr; 640 size = uap->len; 641 inherit = uap->inherit; 642 643 pageoff = (addr & PAGE_MASK); 644 addr -= pageoff; 645 size += pageoff; 646 size = (vm_size_t) round_page(size); 647 if (size < uap->len) /* wrap */ 648 return(EINVAL); 649 tmpaddr = addr + size; /* workaround gcc4 opt */ 650 if (tmpaddr < addr) /* wrap */ 651 return(EINVAL); 652 653 switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, 654 addr + size, inherit)) { 655 case KERN_SUCCESS: 656 error = 0; 657 break; 658 case KERN_PROTECTION_FAILURE: 659 error = EACCES; 660 break; 661 default: 662 error = EINVAL; 663 break; 664 } 665 return (error); 666 } 667 668 /* 669 * madvise system call handler 670 * 671 * madvise_args(void *addr, size_t len, int behav) 672 * 673 * No requirements. 674 */ 675 int 676 sys_madvise(struct sysmsg *sysmsg, const struct madvise_args *uap) 677 { 678 struct proc *p = curproc; 679 vm_offset_t start, end; 680 vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; 681 int error; 682 683 /* 684 * Check for illegal behavior 685 */ 686 if (uap->behav < 0 || uap->behav >= MADV_CONTROL_END) 687 return (EINVAL); 688 /* 689 * Check for illegal addresses. Watch out for address wrap... Note 690 * that VM_*_ADDRESS are not constants due to casts (argh). 691 */ 692 if (tmpaddr < (vm_offset_t)uap->addr) 693 return (EINVAL); 694 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) 695 return (EINVAL); 696 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) 697 return (EINVAL); 698 699 /* 700 * Since this routine is only advisory, we default to conservative 701 * behavior. 702 */ 703 start = trunc_page((vm_offset_t)uap->addr); 704 end = round_page(tmpaddr); 705 706 error = vm_map_madvise(&p->p_vmspace->vm_map, start, end, 707 uap->behav, 0); 708 return (error); 709 } 710 711 /* 712 * mcontrol system call handler 713 * 714 * mcontrol_args(void *addr, size_t len, int behav, off_t value) 715 * 716 * No requirements 717 */ 718 int 719 sys_mcontrol(struct sysmsg *sysmsg, const struct mcontrol_args *uap) 720 { 721 struct proc *p = curproc; 722 vm_offset_t start, end; 723 vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; 724 int error; 725 726 /* 727 * Check for illegal behavior 728 */ 729 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) 730 return (EINVAL); 731 /* 732 * Check for illegal addresses. Watch out for address wrap... Note 733 * that VM_*_ADDRESS are not constants due to casts (argh). 734 */ 735 if (tmpaddr < (vm_offset_t) uap->addr) 736 return (EINVAL); 737 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) 738 return (EINVAL); 739 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) 740 return (EINVAL); 741 742 /* 743 * Since this routine is only advisory, we default to conservative 744 * behavior. 745 */ 746 start = trunc_page((vm_offset_t)uap->addr); 747 end = round_page(tmpaddr); 748 749 error = vm_map_madvise(&p->p_vmspace->vm_map, start, end, 750 uap->behav, uap->value); 751 return (error); 752 } 753 754 755 /* 756 * mincore system call handler 757 * 758 * mincore_args(const void *addr, size_t len, char *vec) 759 * 760 * No requirements 761 */ 762 int 763 sys_mincore(struct sysmsg *sysmsg, const struct mincore_args *uap) 764 { 765 struct proc *p = curproc; 766 vm_offset_t addr, first_addr; 767 vm_offset_t end, cend; 768 pmap_t pmap; 769 vm_map_t map; 770 char *vec; 771 int error; 772 int vecindex, lastvecindex; 773 vm_map_entry_t current; 774 vm_map_entry_t entry; 775 int mincoreinfo; 776 unsigned int timestamp; 777 778 /* 779 * Make sure that the addresses presented are valid for user 780 * mode. 781 */ 782 first_addr = addr = trunc_page((vm_offset_t) uap->addr); 783 end = addr + (vm_size_t)round_page(uap->len); 784 if (end < addr) 785 return (EINVAL); 786 if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS) 787 return (EINVAL); 788 789 /* 790 * Address of byte vector 791 */ 792 vec = uap->vec; 793 794 map = &p->p_vmspace->vm_map; 795 pmap = vmspace_pmap(p->p_vmspace); 796 797 lwkt_gettoken(&map->token); 798 vm_map_lock_read(map); 799 RestartScan: 800 timestamp = map->timestamp; 801 802 if (!vm_map_lookup_entry(map, addr, &entry)) 803 entry = RB_MIN(vm_map_rb_tree, &map->rb_root); 804 805 /* 806 * Do this on a map entry basis so that if the pages are not 807 * in the current processes address space, we can easily look 808 * up the pages elsewhere. 809 */ 810 lastvecindex = -1; 811 for (current = entry; 812 current && current->ba.start < end; 813 current = vm_map_rb_tree_RB_NEXT(current)) { 814 /* 815 * ignore submaps (for now) or null objects 816 */ 817 if (current->maptype != VM_MAPTYPE_NORMAL) 818 continue; 819 if (current->ba.object == NULL) 820 continue; 821 822 /* 823 * limit this scan to the current map entry and the 824 * limits for the mincore call 825 */ 826 if (addr < current->ba.start) 827 addr = current->ba.start; 828 cend = current->ba.end; 829 if (cend > end) 830 cend = end; 831 832 /* 833 * scan this entry one page at a time 834 */ 835 while (addr < cend) { 836 /* 837 * Check pmap first, it is likely faster, also 838 * it can provide info as to whether we are the 839 * one referencing or modifying the page. 840 * 841 * If we have to check the VM object, only mess 842 * around with normal maps. Do not mess around 843 * with virtual page tables (XXX). 844 */ 845 mincoreinfo = pmap_mincore(pmap, addr); 846 if (mincoreinfo == 0 && 847 current->maptype == VM_MAPTYPE_NORMAL) { 848 vm_pindex_t pindex; 849 vm_ooffset_t offset; 850 vm_page_t m; 851 852 /* 853 * calculate the page index into the object 854 */ 855 offset = current->ba.offset + 856 (addr - current->ba.start); 857 pindex = OFF_TO_IDX(offset); 858 859 /* 860 * if the page is resident, then gather 861 * information about it. spl protection is 862 * required to maintain the object 863 * association. And XXX what if the page is 864 * busy? What's the deal with that? 865 * 866 * XXX vm_token - legacy for pmap_ts_referenced 867 * in x86 and vkernel pmap code. 868 */ 869 lwkt_gettoken(&vm_token); 870 vm_object_hold(current->ba.object); 871 m = vm_page_lookup(current->ba.object, pindex); 872 if (m && m->valid) { 873 mincoreinfo = MINCORE_INCORE; 874 if (m->dirty || pmap_is_modified(m)) 875 mincoreinfo |= MINCORE_MODIFIED_OTHER; 876 if ((m->flags & PG_REFERENCED) || 877 pmap_ts_referenced(m)) { 878 vm_page_flag_set(m, PG_REFERENCED); 879 mincoreinfo |= MINCORE_REFERENCED_OTHER; 880 } 881 } 882 vm_object_drop(current->ba.object); 883 lwkt_reltoken(&vm_token); 884 } 885 886 /* 887 * subyte may page fault. In case it needs to modify 888 * the map, we release the lock. 889 */ 890 vm_map_unlock_read(map); 891 892 /* 893 * calculate index into user supplied byte vector 894 */ 895 vecindex = OFF_TO_IDX(addr - first_addr); 896 897 /* 898 * If we have skipped map entries, we need to make sure that 899 * the byte vector is zeroed for those skipped entries. 900 */ 901 while((lastvecindex + 1) < vecindex) { 902 error = subyte( vec + lastvecindex, 0); 903 if (error) { 904 error = EFAULT; 905 goto done; 906 } 907 ++lastvecindex; 908 } 909 910 /* 911 * Pass the page information to the user 912 */ 913 error = subyte(vec + vecindex, mincoreinfo); 914 if (error) { 915 error = EFAULT; 916 goto done; 917 } 918 919 /* 920 * If the map has changed, due to the subyte, 921 * the previous output may be invalid. 922 */ 923 vm_map_lock_read(map); 924 if (timestamp != map->timestamp) 925 goto RestartScan; 926 927 lastvecindex = vecindex; 928 addr += PAGE_SIZE; 929 } 930 } 931 932 /* 933 * subyte may page fault. In case it needs to modify 934 * the map, we release the lock. 935 */ 936 vm_map_unlock_read(map); 937 938 /* 939 * Zero the last entries in the byte vector. 940 */ 941 vecindex = OFF_TO_IDX(end - first_addr); 942 while((lastvecindex + 1) < vecindex) { 943 error = subyte( vec + lastvecindex, 0); 944 if (error) { 945 error = EFAULT; 946 goto done; 947 } 948 ++lastvecindex; 949 } 950 951 /* 952 * If the map has changed, due to the subyte, the previous 953 * output may be invalid. 954 */ 955 vm_map_lock_read(map); 956 if (timestamp != map->timestamp) 957 goto RestartScan; 958 vm_map_unlock_read(map); 959 960 error = 0; 961 done: 962 lwkt_reltoken(&map->token); 963 return (error); 964 } 965 966 /* 967 * mlock system call handler 968 * 969 * mlock_args(const void *addr, size_t len) 970 * 971 * No requirements 972 */ 973 int 974 sys_mlock(struct sysmsg *sysmsg, const struct mlock_args *uap) 975 { 976 vm_offset_t addr; 977 vm_offset_t tmpaddr; 978 vm_size_t size, pageoff; 979 struct thread *td = curthread; 980 struct proc *p = td->td_proc; 981 int error; 982 983 addr = (vm_offset_t) uap->addr; 984 size = uap->len; 985 986 pageoff = (addr & PAGE_MASK); 987 addr -= pageoff; 988 size += pageoff; 989 size = (vm_size_t) round_page(size); 990 if (size < uap->len) /* wrap */ 991 return (EINVAL); 992 if (size == 0) /* silently allow 0 size */ 993 return (0); 994 tmpaddr = addr + size; /* workaround gcc4 opt */ 995 if (tmpaddr < addr) /* wrap */ 996 return (EINVAL); 997 998 if (atop(size) + vmstats.v_wire_count > vm_page_max_wired) 999 return (EAGAIN); 1000 1001 /* 1002 * We do not need to synchronize against other threads updating ucred; 1003 * they update p->ucred, which is synchronized into td_ucred ourselves. 1004 */ 1005 #ifdef pmap_wired_count 1006 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > 1007 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) { 1008 return (ENOMEM); 1009 } 1010 #else 1011 error = priv_check_cred(td->td_ucred, PRIV_ROOT, 0); 1012 if (error) { 1013 return (error); 1014 } 1015 #endif 1016 error = vm_map_unwire(&p->p_vmspace->vm_map, addr, addr + size, FALSE); 1017 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1018 } 1019 1020 /* 1021 * mlockall(int how) 1022 * 1023 * No requirements 1024 */ 1025 int 1026 sys_mlockall(struct sysmsg *sysmsg, const struct mlockall_args *uap) 1027 { 1028 struct thread *td = curthread; 1029 struct proc *p = td->td_proc; 1030 vm_map_t map = &p->p_vmspace->vm_map; 1031 vm_map_entry_t entry; 1032 int how = uap->how; 1033 int rc = KERN_SUCCESS; 1034 1035 if (((how & MCL_CURRENT) == 0) && ((how & MCL_FUTURE) == 0)) 1036 return (EINVAL); 1037 1038 rc = priv_check_cred(td->td_ucred, PRIV_ROOT, 0); 1039 if (rc) 1040 return (rc); 1041 1042 vm_map_lock(map); 1043 do { 1044 if (how & MCL_CURRENT) { 1045 RB_FOREACH(entry, vm_map_rb_tree, &map->rb_root) { 1046 ; /* NOT IMPLEMENTED YET */ 1047 } 1048 rc = ENOSYS; 1049 break; 1050 } 1051 if (how & MCL_FUTURE) 1052 map->flags |= MAP_WIREFUTURE; 1053 } while(0); 1054 vm_map_unlock(map); 1055 1056 return (rc); 1057 } 1058 1059 /* 1060 * munlockall(void) 1061 * 1062 * Unwire all user-wired map entries, cancel MCL_FUTURE. 1063 * 1064 * No requirements 1065 */ 1066 int 1067 sys_munlockall(struct sysmsg *sysmsg, const struct munlockall_args *uap) 1068 { 1069 struct thread *td = curthread; 1070 struct proc *p = td->td_proc; 1071 vm_map_t map = &p->p_vmspace->vm_map; 1072 vm_map_entry_t entry; 1073 int rc = KERN_SUCCESS; 1074 1075 vm_map_lock(map); 1076 1077 /* Clear MAP_WIREFUTURE to cancel mlockall(MCL_FUTURE) */ 1078 map->flags &= ~MAP_WIREFUTURE; 1079 1080 retry: 1081 RB_FOREACH(entry, vm_map_rb_tree, &map->rb_root) { 1082 if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0) 1083 continue; 1084 1085 /* 1086 * If we encounter an in-transition entry, we release the 1087 * map lock and retry the scan; we do not decrement any 1088 * wired_count more than once because we do not touch 1089 * any entries with MAP_ENTRY_USER_WIRED not set. 1090 * 1091 * There is a potential interleaving with concurrent 1092 * mlockall()s here -- if we abort a scan, an mlockall() 1093 * could start, wire a number of entries before our 1094 * current position in, and then stall itself on this 1095 * or any other in-transition entry. If that occurs, when 1096 * we resume, we will unwire those entries. 1097 */ 1098 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 1099 entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 1100 ++mycpu->gd_cnt.v_intrans_coll; 1101 ++mycpu->gd_cnt.v_intrans_wait; 1102 vm_map_transition_wait(map, 1); 1103 goto retry; 1104 } 1105 1106 KASSERT(entry->wired_count > 0, 1107 ("wired_count was 0 with USER_WIRED set! %p", entry)); 1108 1109 /* Drop wired count, if it hits zero, unwire the entry */ 1110 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 1111 entry->wired_count--; 1112 if (entry->wired_count == 0) 1113 vm_fault_unwire(map, entry); 1114 } 1115 1116 vm_map_unlock(map); 1117 1118 return (rc); 1119 } 1120 1121 /* 1122 * munlock system call handler 1123 * 1124 * munlock_args(const void *addr, size_t len) 1125 * 1126 * No requirements 1127 */ 1128 int 1129 sys_munlock(struct sysmsg *sysmsg, const struct munlock_args *uap) 1130 { 1131 struct thread *td = curthread; 1132 struct proc *p = td->td_proc; 1133 vm_offset_t addr; 1134 vm_offset_t tmpaddr; 1135 vm_size_t size, pageoff; 1136 int error; 1137 1138 addr = (vm_offset_t) uap->addr; 1139 size = uap->len; 1140 1141 pageoff = (addr & PAGE_MASK); 1142 addr -= pageoff; 1143 size += pageoff; 1144 size = (vm_size_t) round_page(size); 1145 1146 tmpaddr = addr + size; 1147 if (tmpaddr < addr) /* wrap */ 1148 return (EINVAL); 1149 if (size == 0) /* silently allow 0 size */ 1150 return (0); 1151 1152 #ifndef pmap_wired_count 1153 error = priv_check(td, PRIV_ROOT); 1154 if (error) 1155 return (error); 1156 #endif 1157 1158 error = vm_map_unwire(&p->p_vmspace->vm_map, addr, addr + size, TRUE); 1159 return (error == KERN_SUCCESS ? 0 : ENOMEM); 1160 } 1161 1162 /* 1163 * Internal version of mmap. 1164 * Currently used by mmap, exec, and sys5 shared memory. 1165 * Handle is either a vnode pointer or NULL for MAP_ANON. 1166 * 1167 * No requirements 1168 */ 1169 int 1170 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, 1171 vm_prot_t maxprot, int flags, void *handle, vm_ooffset_t foff, 1172 struct file *fp) 1173 { 1174 boolean_t fitit; 1175 vm_object_t object; 1176 vm_offset_t eaddr; 1177 vm_size_t esize; 1178 vm_size_t align; 1179 int (*uksmap)(vm_map_backing_t ba, int op, cdev_t dev, vm_page_t fake); 1180 struct vnode *vp; 1181 struct thread *td = curthread; 1182 struct proc *p; 1183 int rv = KERN_SUCCESS; 1184 off_t objsize; 1185 int docow; 1186 int error; 1187 1188 if (size == 0) 1189 return (0); 1190 1191 objsize = round_page(size); 1192 if (objsize < size) 1193 return (EINVAL); 1194 size = objsize; 1195 1196 lwkt_gettoken(&map->token); 1197 1198 /* 1199 * XXX messy code, fixme 1200 * 1201 * NOTE: Overflow checks require discrete statements or GCC4 1202 * will optimize it out. 1203 */ 1204 if ((p = curproc) != NULL && map == &p->p_vmspace->vm_map) { 1205 esize = map->size + size; /* workaround gcc4 opt */ 1206 if (esize < map->size || 1207 esize > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1208 lwkt_reltoken(&map->token); 1209 return(ENOMEM); 1210 } 1211 } 1212 1213 /* 1214 * We currently can only deal with page aligned file offsets. 1215 * The check is here rather than in the syscall because the 1216 * kernel calls this function internally for other mmaping 1217 * operations (such as in exec) and non-aligned offsets will 1218 * cause pmap inconsistencies...so we want to be sure to 1219 * disallow this in all cases. 1220 * 1221 * NOTE: Overflow checks require discrete statements or GCC4 1222 * will optimize it out. 1223 */ 1224 if (foff & PAGE_MASK) { 1225 lwkt_reltoken(&map->token); 1226 return (EINVAL); 1227 } 1228 1229 /* 1230 * Handle alignment. For large memory maps it is possible 1231 * that the MMU can optimize the page table so align anything 1232 * that is a multiple of SEG_SIZE to SEG_SIZE. 1233 * 1234 * Also align any large mapping (bigger than 16x SG_SIZE) to a 1235 * SEG_SIZE address boundary. 1236 */ 1237 if (flags & MAP_SIZEALIGN) { 1238 align = size; 1239 if ((align ^ (align - 1)) != (align << 1) - 1) { 1240 lwkt_reltoken(&map->token); 1241 return (EINVAL); 1242 } 1243 } else if ((flags & MAP_FIXED) == 0 && 1244 ((size & SEG_MASK) == 0 || size > SEG_SIZE * 16)) { 1245 align = SEG_SIZE; 1246 } else { 1247 align = PAGE_SIZE; 1248 } 1249 1250 if ((flags & (MAP_FIXED | MAP_TRYFIXED)) == 0) { 1251 fitit = TRUE; 1252 *addr = round_page(*addr); 1253 } else { 1254 if (*addr != trunc_page(*addr)) { 1255 lwkt_reltoken(&map->token); 1256 return (EINVAL); 1257 } 1258 eaddr = *addr + size; 1259 if (eaddr < *addr) { 1260 lwkt_reltoken(&map->token); 1261 return (EINVAL); 1262 } 1263 fitit = FALSE; 1264 if ((flags & MAP_TRYFIXED) == 0) 1265 vm_map_remove(map, *addr, *addr + size); 1266 } 1267 1268 uksmap = NULL; 1269 1270 /* 1271 * Lookup/allocate object. 1272 */ 1273 if (flags & MAP_ANON) { 1274 /* 1275 * Unnamed anonymous regions always start at 0. 1276 */ 1277 if (handle) { 1278 /* 1279 * Default memory object 1280 */ 1281 object = default_pager_alloc(handle, objsize, 1282 prot, foff); 1283 if (object == NULL) { 1284 lwkt_reltoken(&map->token); 1285 return(ENOMEM); 1286 } 1287 docow = MAP_PREFAULT_PARTIAL; 1288 } else { 1289 /* 1290 * Implicit single instance of a default memory 1291 * object, so we don't need a VM object yet. 1292 */ 1293 foff = 0; 1294 object = NULL; 1295 docow = 0; 1296 } 1297 vp = NULL; 1298 } else { 1299 vp = (struct vnode *)handle; 1300 1301 /* 1302 * Non-anonymous mappings of VCHR (aka not /dev/zero) 1303 * cannot specify MAP_STACK. 1304 */ 1305 if (vp->v_type == VCHR) { 1306 if (flags & MAP_STACK) { 1307 lwkt_reltoken(&map->token); 1308 return(EINVAL); 1309 } 1310 } 1311 1312 if (vp->v_type == VCHR && vp->v_rdev->si_ops->d_uksmap) { 1313 /* 1314 * Device mappings without a VM object, typically 1315 * sharing permanently allocated kernel memory or 1316 * process-context-specific (per-process) data. 1317 * 1318 * The object offset for uksmap represents the 1319 * lwp_tid that did the mapping. 1320 * 1321 * Force them to be shared. 1322 */ 1323 uksmap = vp->v_rdev->si_ops->d_uksmap; 1324 object = NULL; 1325 docow = MAP_PREFAULT_PARTIAL; 1326 flags &= ~(MAP_PRIVATE|MAP_COPY); 1327 flags |= MAP_SHARED; 1328 } else if (vp->v_type == VCHR) { 1329 /* 1330 * Device mappings (device size unknown?). 1331 * Force them to be shared. 1332 */ 1333 error = dev_dmmap_single(vp->v_rdev, &foff, objsize, 1334 &object, prot, fp); 1335 1336 if (error == ENODEV) { 1337 handle = (void *)(intptr_t)vp->v_rdev; 1338 object = dev_pager_alloc(handle, objsize, prot, foff); 1339 if (object == NULL) { 1340 lwkt_reltoken(&map->token); 1341 return(EINVAL); 1342 } 1343 } else if (error) { 1344 lwkt_reltoken(&map->token); 1345 return(error); 1346 } 1347 1348 docow = MAP_PREFAULT_PARTIAL; 1349 flags &= ~(MAP_PRIVATE|MAP_COPY); 1350 flags |= MAP_SHARED; 1351 } else { 1352 /* 1353 * Regular file mapping (typically). The attribute 1354 * check is for the link count test only. mmapable 1355 * vnodes must already have a VM object assigned. 1356 */ 1357 struct vattr vat; 1358 int error; 1359 1360 error = VOP_GETATTR(vp, &vat); 1361 if (error) { 1362 lwkt_reltoken(&map->token); 1363 return (error); 1364 } 1365 docow = MAP_PREFAULT_PARTIAL; 1366 object = vnode_pager_reference(vp); 1367 if (object == NULL && vp->v_type == VREG) { 1368 lwkt_reltoken(&map->token); 1369 kprintf("Warning: cannot mmap vnode %p, no " 1370 "object\n", vp); 1371 return(EINVAL); 1372 } 1373 1374 /* 1375 * If it is a regular file without any references 1376 * we do not need to sync it. 1377 */ 1378 if (vp->v_type == VREG && vat.va_nlink == 0) { 1379 flags |= MAP_NOSYNC; 1380 } 1381 } 1382 } 1383 1384 /* 1385 * Deal with the adjusted flags 1386 */ 1387 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) 1388 docow |= MAP_COPY_ON_WRITE; 1389 if (flags & MAP_NOSYNC) 1390 docow |= MAP_DISABLE_SYNCER; 1391 if (flags & MAP_NOCORE) 1392 docow |= MAP_DISABLE_COREDUMP; 1393 1394 /* 1395 * This may place the area in its own page directory if (size) is 1396 * large enough, otherwise it typically returns its argument. 1397 * 1398 * (object can be NULL) 1399 */ 1400 if (fitit) { 1401 *addr = pmap_addr_hint(object, *addr, size); 1402 } 1403 1404 /* 1405 * Stack mappings need special attention. 1406 * 1407 * Mappings that use virtual page tables will default to storing 1408 * the page table at offset 0. 1409 */ 1410 if (uksmap) { 1411 rv = vm_map_find(map, uksmap, vp->v_rdev, 1412 foff, addr, size, 1413 align, fitit, 1414 VM_MAPTYPE_UKSMAP, VM_SUBSYS_MMAP, 1415 prot, maxprot, docow); 1416 } else if (flags & MAP_STACK) { 1417 rv = vm_map_stack(map, addr, size, flags, 1418 prot, maxprot, docow); 1419 } else { 1420 rv = vm_map_find(map, object, NULL, 1421 foff, addr, size, 1422 align, fitit, 1423 VM_MAPTYPE_NORMAL, VM_SUBSYS_MMAP, 1424 prot, maxprot, docow); 1425 } 1426 1427 if (rv != KERN_SUCCESS) { 1428 /* 1429 * Lose the object reference. Will destroy the 1430 * object if it's an unnamed anonymous mapping 1431 * or named anonymous without other references. 1432 * 1433 * (NOTE: object can be NULL) 1434 */ 1435 vm_object_deallocate(object); 1436 goto out; 1437 } 1438 1439 /* 1440 * Shared memory is also shared with children. 1441 */ 1442 if (flags & (MAP_SHARED|MAP_INHERIT)) { 1443 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); 1444 if (rv != KERN_SUCCESS) { 1445 vm_map_remove(map, *addr, *addr + size); 1446 goto out; 1447 } 1448 } 1449 1450 /* If a process has marked all future mappings for wiring, do so */ 1451 if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) 1452 vm_map_unwire(map, *addr, *addr + size, FALSE); 1453 1454 /* 1455 * Set the access time on the vnode 1456 */ 1457 if (vp != NULL) 1458 vn_mark_atime(vp, td); 1459 out: 1460 lwkt_reltoken(&map->token); 1461 1462 switch (rv) { 1463 case KERN_SUCCESS: 1464 return (0); 1465 case KERN_INVALID_ADDRESS: 1466 case KERN_NO_SPACE: 1467 return (ENOMEM); 1468 case KERN_PROTECTION_FAILURE: 1469 return (EACCES); 1470 default: 1471 return (EINVAL); 1472 } 1473 } 1474 1475 /* 1476 * Translate a Mach VM return code to zero on success or the appropriate errno 1477 * on failure. 1478 */ 1479 int 1480 vm_mmap_to_errno(int rv) 1481 { 1482 1483 switch (rv) { 1484 case KERN_SUCCESS: 1485 return (0); 1486 case KERN_INVALID_ADDRESS: 1487 case KERN_NO_SPACE: 1488 return (ENOMEM); 1489 case KERN_PROTECTION_FAILURE: 1490 return (EACCES); 1491 default: 1492 return (EINVAL); 1493 } 1494 } 1495