1 /* 2 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vm/vm_vmspace.c,v 1.14 2007/08/15 03:15:07 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/systm.h> 40 #include <sys/sysproto.h> 41 #include <sys/kern_syscall.h> 42 #include <sys/mman.h> 43 #include <sys/proc.h> 44 #include <sys/malloc.h> 45 #include <sys/sysctl.h> 46 #include <sys/vkernel.h> 47 #include <sys/vmspace.h> 48 49 #include <vm/vm_extern.h> 50 #include <vm/pmap.h> 51 52 #include <machine/vmparam.h> 53 54 #include <sys/spinlock2.h> 55 #include <sys/sysref2.h> 56 #include <sys/mplock2.h> 57 58 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp, 59 void *id); 60 static void vmspace_entry_delete(struct vmspace_entry *ve, 61 struct vkernel_proc *vkp); 62 63 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); 64 65 /* 66 * vmspace_create (void *id, int type, void *data) 67 * 68 * Create a VMSPACE under the control of the caller with the specified id. 69 * An id of NULL cannot be used. The type and data fields must currently 70 * be 0. 71 * 72 * The vmspace starts out completely empty. Memory may be mapped into the 73 * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled 74 * with vmspace_mcontrol(). 75 * 76 * MPALMOSTSAFE 77 */ 78 int 79 sys_vmspace_create(struct vmspace_create_args *uap) 80 { 81 struct vmspace_entry *ve; 82 struct vkernel_proc *vkp; 83 int error; 84 85 if (vkernel_enable == 0) 86 return (EOPNOTSUPP); 87 88 /* 89 * Create a virtual kernel side-structure for the process if one 90 * does not exist. 91 */ 92 get_mplock(); 93 if ((vkp = curproc->p_vkernel) == NULL) { 94 vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO); 95 vkp->refs = 1; 96 spin_init(&vkp->spin); 97 RB_INIT(&vkp->root); 98 curproc->p_vkernel = vkp; 99 } 100 101 /* 102 * Create a new VMSPACE 103 * 104 * XXX race if kmalloc blocks 105 */ 106 if (vkernel_find_vmspace(vkp, uap->id)) { 107 error = EEXIST; 108 goto done; 109 } 110 ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); 111 ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 112 ve->id = uap->id; 113 pmap_pinit2(vmspace_pmap(ve->vmspace)); 114 RB_INSERT(vmspace_rb_tree, &vkp->root, ve); 115 error = 0; 116 done: 117 rel_mplock(); 118 return (error); 119 } 120 121 /* 122 * vmspace_destroy (void *id) 123 * 124 * Destroy a VMSPACE. 125 * 126 * MPALMOSTSAFE 127 */ 128 int 129 sys_vmspace_destroy(struct vmspace_destroy_args *uap) 130 { 131 struct vkernel_proc *vkp; 132 struct vmspace_entry *ve; 133 int error; 134 135 get_mplock(); 136 if ((vkp = curproc->p_vkernel) == NULL) { 137 error = EINVAL; 138 goto done; 139 } 140 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 141 error = ENOENT; 142 goto done; 143 } 144 if (ve->refs) { 145 error = EBUSY; 146 goto done; 147 } 148 vmspace_entry_delete(ve, vkp); 149 error = 0; 150 done: 151 rel_mplock(); 152 return(error); 153 } 154 155 /* 156 * vmspace_ctl (void *id, int cmd, struct trapframe *tframe, 157 * struct vextframe *vframe); 158 * 159 * Transfer control to a VMSPACE. Control is returned after the specified 160 * number of microseconds or if a page fault, signal, trap, or system call 161 * occurs. The context is updated as appropriate. 162 * 163 * MPALMOSTSAFE 164 */ 165 int 166 sys_vmspace_ctl(struct vmspace_ctl_args *uap) 167 { 168 struct vkernel_proc *vkp; 169 struct vkernel_lwp *vklp; 170 struct vmspace_entry *ve; 171 struct lwp *lp; 172 struct proc *p; 173 int framesz; 174 int error; 175 176 lp = curthread->td_lwp; 177 p = lp->lwp_proc; 178 179 get_mplock(); 180 if ((vkp = p->p_vkernel) == NULL) { 181 error = EINVAL; 182 goto done; 183 } 184 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 185 error = ENOENT; 186 goto done; 187 } 188 189 /* 190 * Signal mailbox interlock 191 */ 192 if (p->p_flag & P_MAILBOX) { 193 p->p_flag &= ~P_MAILBOX; 194 error = EINTR; 195 goto done; 196 } 197 198 switch(uap->cmd) { 199 case VMSPACE_CTL_RUN: 200 /* 201 * Save the caller's register context, swap VM spaces, and 202 * install the passed register context. Return with 203 * EJUSTRETURN so the syscall code doesn't adjust the context. 204 */ 205 atomic_add_int(&ve->refs, 1); 206 framesz = sizeof(struct trapframe); 207 if ((vklp = lp->lwp_vkernel) == NULL) { 208 vklp = kmalloc(sizeof(*vklp), M_VKERNEL, 209 M_WAITOK|M_ZERO); 210 lp->lwp_vkernel = vklp; 211 } 212 vklp->user_trapframe = uap->tframe; 213 vklp->user_vextframe = uap->vframe; 214 bcopy(uap->sysmsg_frame, &vklp->save_trapframe, framesz); 215 bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls, 216 sizeof(vklp->save_vextframe.vx_tls)); 217 error = copyin(uap->tframe, uap->sysmsg_frame, framesz); 218 if (error == 0) 219 error = copyin(&uap->vframe->vx_tls, &curthread->td_tls, sizeof(struct savetls)); 220 if (error == 0) 221 error = cpu_sanitize_frame(uap->sysmsg_frame); 222 if (error == 0) 223 error = cpu_sanitize_tls(&curthread->td_tls); 224 if (error) { 225 bcopy(&vklp->save_trapframe, uap->sysmsg_frame, framesz); 226 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 227 sizeof(vklp->save_vextframe.vx_tls)); 228 set_user_TLS(); 229 atomic_subtract_int(&ve->refs, 1); 230 } else { 231 vklp->ve = ve; 232 pmap_setlwpvm(lp, ve->vmspace); 233 set_user_TLS(); 234 set_vkernel_fp(uap->sysmsg_frame); 235 error = EJUSTRETURN; 236 } 237 break; 238 default: 239 error = EOPNOTSUPP; 240 break; 241 } 242 done: 243 rel_mplock(); 244 return(error); 245 } 246 247 /* 248 * vmspace_mmap(id, addr, len, prot, flags, fd, offset) 249 * 250 * map memory within a VMSPACE. This function is just like a normal mmap() 251 * but operates on the vmspace's memory map. Most callers use this to create 252 * a MAP_VPAGETABLE mapping. 253 * 254 * MPALMOSTSAFE 255 */ 256 int 257 sys_vmspace_mmap(struct vmspace_mmap_args *uap) 258 { 259 struct vkernel_proc *vkp; 260 struct vmspace_entry *ve; 261 int error; 262 263 get_mplock(); 264 if ((vkp = curproc->p_vkernel) == NULL) { 265 error = EINVAL; 266 goto done; 267 } 268 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 269 error = ENOENT; 270 goto done; 271 } 272 error = kern_mmap(ve->vmspace, uap->addr, uap->len, 273 uap->prot, uap->flags, 274 uap->fd, uap->offset, &uap->sysmsg_resultp); 275 done: 276 rel_mplock(); 277 return (error); 278 } 279 280 /* 281 * vmspace_munmap(id, addr, len) 282 * 283 * unmap memory within a VMSPACE. 284 * 285 * MPALMOSTSAFE 286 */ 287 int 288 sys_vmspace_munmap(struct vmspace_munmap_args *uap) 289 { 290 struct vkernel_proc *vkp; 291 struct vmspace_entry *ve; 292 vm_offset_t addr; 293 vm_offset_t tmpaddr; 294 vm_size_t size, pageoff; 295 vm_map_t map; 296 int error; 297 298 get_mplock(); 299 if ((vkp = curproc->p_vkernel) == NULL) { 300 error = EINVAL; 301 goto done; 302 } 303 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 304 error = ENOENT; 305 goto done; 306 } 307 308 /* 309 * Copied from sys_munmap() 310 */ 311 addr = (vm_offset_t)uap->addr; 312 size = uap->len; 313 314 pageoff = (addr & PAGE_MASK); 315 addr -= pageoff; 316 size += pageoff; 317 size = (vm_size_t)round_page(size); 318 if (size < uap->len) { /* wrap */ 319 error = EINVAL; 320 goto done; 321 } 322 tmpaddr = addr + size; /* workaround gcc4 opt */ 323 if (tmpaddr < addr) { /* wrap */ 324 error = EINVAL; 325 goto done; 326 } 327 if (size == 0) { 328 error = 0; 329 goto done; 330 } 331 332 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 333 error = EINVAL; 334 goto done; 335 } 336 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) { 337 error = EINVAL; 338 goto done; 339 } 340 map = &ve->vmspace->vm_map; 341 if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE)) { 342 error = EINVAL; 343 goto done; 344 } 345 vm_map_remove(map, addr, addr + size); 346 error = 0; 347 done: 348 rel_mplock(); 349 return (error); 350 } 351 352 /* 353 * vmspace_pread(id, buf, nbyte, flags, offset) 354 * 355 * Read data from a vmspace. The number of bytes read is returned or 356 * -1 if an unrecoverable error occured. If the number of bytes read is 357 * less then the request size, a page fault occured in the VMSPACE which 358 * the caller must resolve in order to proceed. 359 * 360 * (not implemented yet) 361 * 362 * MPALMOSTSAFE 363 */ 364 int 365 sys_vmspace_pread(struct vmspace_pread_args *uap) 366 { 367 struct vkernel_proc *vkp; 368 struct vmspace_entry *ve; 369 int error; 370 371 get_mplock(); 372 if ((vkp = curproc->p_vkernel) == NULL) { 373 error = EINVAL; 374 goto done; 375 } 376 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 377 error = ENOENT; 378 goto done; 379 } 380 error = EINVAL; 381 done: 382 rel_mplock(); 383 return (error); 384 } 385 386 /* 387 * vmspace_pwrite(id, buf, nbyte, flags, offset) 388 * 389 * Write data to a vmspace. The number of bytes written is returned or 390 * -1 if an unrecoverable error occured. If the number of bytes written is 391 * less then the request size, a page fault occured in the VMSPACE which 392 * the caller must resolve in order to proceed. 393 * 394 * (not implemented yet) 395 * 396 * MPALMOSTSAFE 397 */ 398 int 399 sys_vmspace_pwrite(struct vmspace_pwrite_args *uap) 400 { 401 struct vkernel_proc *vkp; 402 struct vmspace_entry *ve; 403 int error; 404 405 get_mplock(); 406 if ((vkp = curproc->p_vkernel) == NULL) { 407 error = EINVAL; 408 goto done; 409 } 410 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 411 error = ENOENT; 412 goto done; 413 } 414 error = EINVAL; 415 done: 416 rel_mplock(); 417 return (error); 418 } 419 420 /* 421 * vmspace_mcontrol(id, addr, len, behav, value) 422 * 423 * madvise/mcontrol support for a vmspace. 424 * 425 * MPALMOSTSAFE 426 */ 427 int 428 sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap) 429 { 430 struct vkernel_proc *vkp; 431 struct vmspace_entry *ve; 432 vm_offset_t start, end; 433 vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; 434 int error; 435 436 get_mplock(); 437 if ((vkp = curproc->p_vkernel) == NULL) { 438 error = EINVAL; 439 goto done; 440 } 441 if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) { 442 error = ENOENT; 443 goto done; 444 } 445 446 /* 447 * This code is basically copied from sys_mcontrol() 448 */ 449 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) { 450 error = EINVAL; 451 goto done; 452 } 453 454 if (tmpaddr < (vm_offset_t)uap->addr) { 455 error = EINVAL; 456 goto done; 457 } 458 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 459 error = EINVAL; 460 goto done; 461 } 462 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) { 463 error = EINVAL; 464 goto done; 465 } 466 467 start = trunc_page((vm_offset_t) uap->addr); 468 end = round_page(tmpaddr); 469 470 error = vm_map_madvise(&ve->vmspace->vm_map, start, end, 471 uap->behav, uap->value); 472 done: 473 rel_mplock(); 474 return (error); 475 } 476 477 /* 478 * Red black tree functions 479 */ 480 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *); 481 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); 482 483 /* a->start is address, and the only field has to be initialized */ 484 static int 485 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b) 486 { 487 if ((char *)a->id < (char *)b->id) 488 return(-1); 489 else if ((char *)a->id > (char *)b->id) 490 return(1); 491 return(0); 492 } 493 494 static 495 int 496 rb_vmspace_delete(struct vmspace_entry *ve, void *data) 497 { 498 struct vkernel_proc *vkp = data; 499 500 KKASSERT(ve->refs == 0); 501 vmspace_entry_delete(ve, vkp); 502 return(0); 503 } 504 505 /* 506 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean 507 * up the pmap, the vm_map, then destroy the vmspace. 508 */ 509 static 510 void 511 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp) 512 { 513 RB_REMOVE(vmspace_rb_tree, &vkp->root, ve); 514 515 pmap_remove_pages(vmspace_pmap(ve->vmspace), 516 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 517 vm_map_remove(&ve->vmspace->vm_map, 518 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 519 sysref_put(&ve->vmspace->vm_sysref); 520 kfree(ve, M_VKERNEL); 521 } 522 523 524 static 525 struct vmspace_entry * 526 vkernel_find_vmspace(struct vkernel_proc *vkp, void *id) 527 { 528 struct vmspace_entry *ve; 529 struct vmspace_entry key; 530 531 key.id = id; 532 ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key); 533 return (ve); 534 } 535 536 /* 537 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing 538 * a vkernel process. 539 */ 540 void 541 vkernel_inherit(struct proc *p1, struct proc *p2) 542 { 543 struct vkernel_proc *vkp; 544 545 vkp = p1->p_vkernel; 546 KKASSERT(vkp->refs > 0); 547 atomic_add_int(&vkp->refs, 1); 548 p2->p_vkernel = vkp; 549 } 550 551 void 552 vkernel_exit(struct proc *p) 553 { 554 struct vkernel_proc *vkp; 555 struct lwp *lp; 556 int freeme = 0; 557 558 vkp = p->p_vkernel; 559 /* 560 * Restore the original VM context if we are killed while running 561 * a different one. 562 * 563 * This isn't supposed to happen. What is supposed to happen is 564 * that the process should enter vkernel_trap() before the handling 565 * the signal. 566 */ 567 RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) { 568 vkernel_lwp_exit(lp); 569 } 570 571 /* 572 * Dereference the common area 573 */ 574 p->p_vkernel = NULL; 575 KKASSERT(vkp->refs > 0); 576 spin_lock_wr(&vkp->spin); 577 if (--vkp->refs == 0) 578 freeme = 1; 579 spin_unlock_wr(&vkp->spin); 580 581 if (freeme) { 582 RB_SCAN(vmspace_rb_tree, &vkp->root, NULL, 583 rb_vmspace_delete, vkp); 584 kfree(vkp, M_VKERNEL); 585 } 586 } 587 588 void 589 vkernel_lwp_exit(struct lwp *lp) 590 { 591 struct vkernel_lwp *vklp; 592 struct vmspace_entry *ve; 593 594 if ((vklp = lp->lwp_vkernel) != NULL) { 595 if ((ve = vklp->ve) != NULL) { 596 kprintf("Warning, pid %d killed with " 597 "active VC!\n", lp->lwp_proc->p_pid); 598 print_backtrace(); 599 pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace); 600 vklp->ve = NULL; 601 KKASSERT(ve->refs > 0); 602 atomic_subtract_int(&ve->refs, 1); 603 } 604 lp->lwp_vkernel = NULL; 605 kfree(vklp, M_VKERNEL); 606 } 607 } 608 609 /* 610 * A VM space under virtual kernel control trapped out or made a system call 611 * or otherwise needs to return control to the virtual kernel context. 612 */ 613 void 614 vkernel_trap(struct lwp *lp, struct trapframe *frame) 615 { 616 struct proc *p = lp->lwp_proc; 617 struct vmspace_entry *ve; 618 struct vkernel_lwp *vklp; 619 int error; 620 621 /* 622 * Which vmspace entry was running? 623 */ 624 vklp = lp->lwp_vkernel; 625 KKASSERT(vklp); 626 ve = vklp->ve; 627 KKASSERT(ve != NULL); 628 629 /* 630 * Switch the LWP vmspace back to the virtual kernel's VM space. 631 */ 632 vklp->ve = NULL; 633 pmap_setlwpvm(lp, p->p_vmspace); 634 KKASSERT(ve->refs > 0); 635 atomic_subtract_int(&ve->refs, 1); 636 637 /* 638 * Copy the emulated process frame to the virtual kernel process. 639 * The emulated process cannot change TLS descriptors so don't 640 * bother saving them, we already have a copy. 641 * 642 * Restore the virtual kernel's saved context so the virtual kernel 643 * process can resume. 644 */ 645 error = copyout(frame, vklp->user_trapframe, sizeof(*frame)); 646 bcopy(&vklp->save_trapframe, frame, sizeof(*frame)); 647 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 648 sizeof(vklp->save_vextframe.vx_tls)); 649 set_user_TLS(); 650 cpu_vkernel_trap(frame, error); 651 } 652 653