1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/systm.h> 40 #include <sys/sysproto.h> 41 #include <sys/kern_syscall.h> 42 #include <sys/mman.h> 43 #include <sys/thread.h> 44 #include <sys/proc.h> 45 #include <sys/malloc.h> 46 #include <sys/sysctl.h> 47 #include <sys/vkernel.h> 48 #include <sys/vmspace.h> 49 50 #include <vm/vm_extern.h> 51 #include <vm/pmap.h> 52 53 #include <machine/vmparam.h> 54 #include <machine/vmm.h> 55 56 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp, 57 void *id, int havetoken); 58 static int vmspace_entry_delete(struct vmspace_entry *ve, 59 struct vkernel_proc *vkp, int refs); 60 static void vmspace_entry_cache_ref(struct vmspace_entry *ve); 61 static void vmspace_entry_cache_drop(struct vmspace_entry *ve); 62 static void vmspace_entry_drop(struct vmspace_entry *ve); 63 64 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); 65 66 /* 67 * vmspace_create (void *id, int type, void *data) 68 * 69 * Create a VMSPACE under the control of the caller with the specified id. 70 * An id of NULL cannot be used. The type and data fields must currently 71 * be 0. 72 * 73 * The vmspace starts out completely empty. Memory may be mapped into the 74 * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled 75 * with vmspace_mcontrol(). 76 * 77 * No requirements. 78 */ 79 int 80 sys_vmspace_create(struct vmspace_create_args *uap) 81 { 82 struct vmspace_entry *ve; 83 struct vkernel_proc *vkp; 84 struct proc *p = curproc; 85 int error; 86 87 if (vkernel_enable == 0) 88 return (EOPNOTSUPP); 89 90 /* 91 * Create a virtual kernel side-structure for the process if one 92 * does not exist. 93 * 94 * Implement a simple resolution for SMP races. 95 */ 96 if ((vkp = p->p_vkernel) == NULL) { 97 vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO); 98 lwkt_gettoken(&p->p_token); 99 if (p->p_vkernel == NULL) { 100 vkp->refs = 1; 101 lwkt_token_init(&vkp->token, "vkernel"); 102 RB_INIT(&vkp->root); 103 p->p_vkernel = vkp; 104 } else { 105 kfree(vkp, M_VKERNEL); 106 vkp = p->p_vkernel; 107 } 108 lwkt_reltoken(&p->p_token); 109 } 110 111 if (curthread->td_vmm) 112 return 0; 113 114 /* 115 * Create a new VMSPACE, disallow conflicting ids 116 */ 117 ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); 118 ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 119 ve->id = uap->id; 120 ve->refs = 0; /* active refs (none) */ 121 ve->cache_refs = 1; /* on-tree, not deleted (prevent kfree) */ 122 pmap_pinit2(vmspace_pmap(ve->vmspace)); 123 124 lwkt_gettoken(&vkp->token); 125 if (RB_INSERT(vmspace_rb_tree, &vkp->root, ve)) { 126 vmspace_rel(ve->vmspace); 127 ve->vmspace = NULL; /* safety */ 128 kfree(ve, M_VKERNEL); 129 error = EEXIST; 130 } else { 131 error = 0; 132 } 133 lwkt_reltoken(&vkp->token); 134 135 return (error); 136 } 137 138 /* 139 * Destroy a VMSPACE given its identifier. 140 * 141 * No requirements. 142 */ 143 int 144 sys_vmspace_destroy(struct vmspace_destroy_args *uap) 145 { 146 struct vkernel_proc *vkp; 147 struct vmspace_entry *ve; 148 int error; 149 150 if ((vkp = curproc->p_vkernel) == NULL) 151 return EINVAL; 152 153 /* 154 * vkp->token protects the deletion against a new RB tree search. 155 */ 156 lwkt_gettoken(&vkp->token); 157 error = ENOENT; 158 if ((ve = vkernel_find_vmspace(vkp, uap->id, 1)) != NULL) { 159 error = vmspace_entry_delete(ve, vkp, 1); 160 if (error == 0) 161 vmspace_entry_cache_drop(ve); 162 } 163 lwkt_reltoken(&vkp->token); 164 165 return(error); 166 } 167 168 /* 169 * vmspace_ctl (void *id, int cmd, struct trapframe *tframe, 170 * struct vextframe *vframe); 171 * 172 * Transfer control to a VMSPACE. Control is returned after the specified 173 * number of microseconds or if a page fault, signal, trap, or system call 174 * occurs. The context is updated as appropriate. 175 * 176 * No requirements. 177 */ 178 int 179 sys_vmspace_ctl(struct vmspace_ctl_args *uap) 180 { 181 struct vkernel_proc *vkp; 182 struct vkernel_lwp *vklp; 183 struct vmspace_entry *ve = NULL; 184 struct lwp *lp; 185 struct proc *p; 186 int framesz; 187 int error; 188 189 lp = curthread->td_lwp; 190 p = lp->lwp_proc; 191 192 if ((vkp = p->p_vkernel) == NULL) 193 return (EINVAL); 194 195 /* 196 * ve only matters when VMM is not used. 197 */ 198 if (curthread->td_vmm == NULL) { 199 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 200 error = ENOENT; 201 goto done; 202 } 203 } 204 205 switch(uap->cmd) { 206 case VMSPACE_CTL_RUN: 207 /* 208 * Save the caller's register context, swap VM spaces, and 209 * install the passed register context. Return with 210 * EJUSTRETURN so the syscall code doesn't adjust the context. 211 */ 212 framesz = sizeof(struct trapframe); 213 if ((vklp = lp->lwp_vkernel) == NULL) { 214 vklp = kmalloc(sizeof(*vklp), M_VKERNEL, 215 M_WAITOK|M_ZERO); 216 lp->lwp_vkernel = vklp; 217 } 218 if (ve && vklp->ve_cache != ve) { 219 vmspace_entry_cache_ref(ve); 220 if (vklp->ve_cache) 221 vmspace_entry_cache_drop(vklp->ve_cache); 222 vklp->ve_cache = ve; 223 } 224 vklp->user_trapframe = uap->tframe; 225 vklp->user_vextframe = uap->vframe; 226 bcopy(uap->sysmsg_frame, &vklp->save_trapframe, framesz); 227 bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls, 228 sizeof(vklp->save_vextframe.vx_tls)); 229 error = copyin(uap->tframe, uap->sysmsg_frame, framesz); 230 if (error == 0) { 231 error = copyin(&uap->vframe->vx_tls, 232 &curthread->td_tls, 233 sizeof(struct savetls)); 234 } 235 if (error == 0) 236 error = cpu_sanitize_frame(uap->sysmsg_frame); 237 if (error == 0) 238 error = cpu_sanitize_tls(&curthread->td_tls); 239 if (error) { 240 bcopy(&vklp->save_trapframe, uap->sysmsg_frame, 241 framesz); 242 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 243 sizeof(vklp->save_vextframe.vx_tls)); 244 set_user_TLS(); 245 } else { 246 /* 247 * If it's a VMM thread just set the CR3. We also set 248 * the vklp->ve to a key to be able to distinguish 249 * when a vkernel user process runs and when not 250 * (when it's NULL) 251 */ 252 if (curthread->td_vmm == NULL) { 253 vklp->ve = ve; 254 atomic_add_int(&ve->refs, 1); 255 pmap_setlwpvm(lp, ve->vmspace); 256 } else { 257 vklp->ve = uap->id; 258 vmm_vm_set_guest_cr3((register_t)uap->id); 259 } 260 set_user_TLS(); 261 set_vkernel_fp(uap->sysmsg_frame); 262 error = EJUSTRETURN; 263 } 264 break; 265 default: 266 error = EOPNOTSUPP; 267 break; 268 } 269 done: 270 if (ve) 271 vmspace_entry_drop(ve); 272 273 return(error); 274 } 275 276 /* 277 * vmspace_mmap(id, addr, len, prot, flags, fd, offset) 278 * 279 * map memory within a VMSPACE. This function is just like a normal mmap() 280 * but operates on the vmspace's memory map. Most callers use this to create 281 * a MAP_VPAGETABLE mapping. 282 * 283 * No requirements. 284 */ 285 int 286 sys_vmspace_mmap(struct vmspace_mmap_args *uap) 287 { 288 struct vkernel_proc *vkp; 289 struct vmspace_entry *ve; 290 int error; 291 292 if ((vkp = curproc->p_vkernel) == NULL) { 293 error = EINVAL; 294 goto done2; 295 } 296 297 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 298 error = ENOENT; 299 goto done2; 300 } 301 302 error = kern_mmap(ve->vmspace, uap->addr, uap->len, 303 uap->prot, uap->flags, 304 uap->fd, uap->offset, &uap->sysmsg_resultp); 305 306 vmspace_entry_drop(ve); 307 done2: 308 return (error); 309 } 310 311 /* 312 * vmspace_munmap(id, addr, len) 313 * 314 * unmap memory within a VMSPACE. 315 * 316 * No requirements. 317 */ 318 int 319 sys_vmspace_munmap(struct vmspace_munmap_args *uap) 320 { 321 struct vkernel_proc *vkp; 322 struct vmspace_entry *ve; 323 vm_offset_t addr; 324 vm_offset_t tmpaddr; 325 vm_size_t size, pageoff; 326 vm_map_t map; 327 int error; 328 329 if ((vkp = curproc->p_vkernel) == NULL) { 330 error = EINVAL; 331 goto done2; 332 } 333 334 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 335 error = ENOENT; 336 goto done2; 337 } 338 339 /* 340 * NOTE: kern_munmap() can block so we need to temporarily 341 * ref ve->refs. 342 */ 343 344 /* 345 * Copied from sys_munmap() 346 */ 347 addr = (vm_offset_t)uap->addr; 348 size = uap->len; 349 350 pageoff = (addr & PAGE_MASK); 351 addr -= pageoff; 352 size += pageoff; 353 size = (vm_size_t)round_page(size); 354 if (size < uap->len) { /* wrap */ 355 error = EINVAL; 356 goto done1; 357 } 358 tmpaddr = addr + size; /* workaround gcc4 opt */ 359 if (tmpaddr < addr) { /* wrap */ 360 error = EINVAL; 361 goto done1; 362 } 363 if (size == 0) { 364 error = 0; 365 goto done1; 366 } 367 368 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 369 error = EINVAL; 370 goto done1; 371 } 372 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) { 373 error = EINVAL; 374 goto done1; 375 } 376 map = &ve->vmspace->vm_map; 377 if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE, FALSE)) { 378 error = EINVAL; 379 goto done1; 380 } 381 vm_map_remove(map, addr, addr + size); 382 error = 0; 383 done1: 384 vmspace_entry_drop(ve); 385 done2: 386 return (error); 387 } 388 389 /* 390 * vmspace_pread(id, buf, nbyte, flags, offset) 391 * 392 * Read data from a vmspace. The number of bytes read is returned or 393 * -1 if an unrecoverable error occured. If the number of bytes read is 394 * less then the request size, a page fault occured in the VMSPACE which 395 * the caller must resolve in order to proceed. 396 * 397 * (not implemented yet) 398 * No requirements. 399 */ 400 int 401 sys_vmspace_pread(struct vmspace_pread_args *uap) 402 { 403 struct vkernel_proc *vkp; 404 struct vmspace_entry *ve; 405 int error; 406 407 if ((vkp = curproc->p_vkernel) == NULL) { 408 error = EINVAL; 409 goto done3; 410 } 411 412 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 413 error = ENOENT; 414 goto done3; 415 } 416 vmspace_entry_drop(ve); 417 error = EINVAL; 418 done3: 419 return (error); 420 } 421 422 /* 423 * vmspace_pwrite(id, buf, nbyte, flags, offset) 424 * 425 * Write data to a vmspace. The number of bytes written is returned or 426 * -1 if an unrecoverable error occured. If the number of bytes written is 427 * less then the request size, a page fault occured in the VMSPACE which 428 * the caller must resolve in order to proceed. 429 * 430 * (not implemented yet) 431 * No requirements. 432 */ 433 int 434 sys_vmspace_pwrite(struct vmspace_pwrite_args *uap) 435 { 436 struct vkernel_proc *vkp; 437 struct vmspace_entry *ve; 438 int error; 439 440 if ((vkp = curproc->p_vkernel) == NULL) { 441 error = EINVAL; 442 goto done3; 443 } 444 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 445 error = ENOENT; 446 goto done3; 447 } 448 vmspace_entry_drop(ve); 449 error = EINVAL; 450 done3: 451 return (error); 452 } 453 454 /* 455 * vmspace_mcontrol(id, addr, len, behav, value) 456 * 457 * madvise/mcontrol support for a vmspace. 458 * 459 * No requirements. 460 */ 461 int 462 sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap) 463 { 464 struct vkernel_proc *vkp; 465 struct vmspace_entry *ve; 466 struct lwp *lp; 467 vm_offset_t start, end; 468 vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; 469 int error; 470 471 lp = curthread->td_lwp; 472 if ((vkp = curproc->p_vkernel) == NULL) { 473 error = EINVAL; 474 goto done3; 475 } 476 477 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 478 error = ENOENT; 479 goto done3; 480 } 481 482 /* 483 * This code is basically copied from sys_mcontrol() 484 */ 485 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) { 486 error = EINVAL; 487 goto done1; 488 } 489 490 if (tmpaddr < (vm_offset_t)uap->addr) { 491 error = EINVAL; 492 goto done1; 493 } 494 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 495 error = EINVAL; 496 goto done1; 497 } 498 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) { 499 error = EINVAL; 500 goto done1; 501 } 502 503 start = trunc_page((vm_offset_t) uap->addr); 504 end = round_page(tmpaddr); 505 506 error = vm_map_madvise(&ve->vmspace->vm_map, start, end, 507 uap->behav, uap->value); 508 done1: 509 vmspace_entry_drop(ve); 510 done3: 511 return (error); 512 } 513 514 /* 515 * Red black tree functions 516 */ 517 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *); 518 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); 519 520 /* 521 * a->start is address, and the only field has to be initialized. 522 * The caller must hold vkp->token. 523 * 524 * The caller must hold vkp->token. 525 */ 526 static int 527 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b) 528 { 529 if ((char *)a->id < (char *)b->id) 530 return(-1); 531 else if ((char *)a->id > (char *)b->id) 532 return(1); 533 return(0); 534 } 535 536 /* 537 * The caller must hold vkp->token. 538 */ 539 static 540 int 541 rb_vmspace_delete(struct vmspace_entry *ve, void *data) 542 { 543 struct vkernel_proc *vkp = data; 544 545 if (vmspace_entry_delete(ve, vkp, 0) == 0) 546 vmspace_entry_cache_drop(ve); 547 else 548 panic("rb_vmspace_delete: invalid refs %d", ve->refs); 549 return(0); 550 } 551 552 /* 553 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean 554 * up the pmap, the vm_map, then destroy the vmspace. We gain control of 555 * the associated cache_refs ref, which the caller will drop for us. 556 * 557 * The ve must not have any active references other than those from the 558 * caller. If it does, EBUSY is returned. The ve may still maintain 559 * any number of cache references which will drop as the related LWPs 560 * execute vmspace operations or exit. 561 * 562 * 0 is returned on success, EBUSY on failure. On success the caller must 563 * drop the last cache_refs. We have dropped the callers active refs. 564 * 565 * The caller must hold vkp->token. 566 */ 567 static 568 int 569 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp, 570 int refs) 571 { 572 /* 573 * Interlocked by vkp->token. 574 * 575 * Drop the callers refs and set VKE_REF_DELETED atomically, if 576 * the remaining refs match exactly. Dropping refs and setting 577 * the DELETED flag atomically protects other threads from trying 578 * to use the ve. 579 * 580 * The caller now owns the final cache_ref that was previously 581 * associated with the live state of the ve. 582 */ 583 if (atomic_cmpset_int(&ve->refs, refs, VKE_REF_DELETED) == 0) { 584 KKASSERT(ve->refs >= refs); 585 return EBUSY; 586 } 587 RB_REMOVE(vmspace_rb_tree, &vkp->root, ve); 588 589 pmap_remove_pages(vmspace_pmap(ve->vmspace), 590 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 591 vm_map_remove(&ve->vmspace->vm_map, 592 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 593 vmspace_rel(ve->vmspace); 594 ve->vmspace = NULL; /* safety */ 595 596 return 0; 597 } 598 599 /* 600 * Ref a ve for cache purposes 601 */ 602 static 603 void 604 vmspace_entry_cache_ref(struct vmspace_entry *ve) 605 { 606 atomic_add_int(&ve->cache_refs, 1); 607 } 608 609 /* 610 * The ve cache_drop is the final word for a ve. It gains an extra ref 611 * representing it being on the RB tree and not being in a deleted state. 612 * Removal from the RB tree and deletion manipulate this ref. The last 613 * drop will thus include full deletion of the ve in addition to the last 614 * cached user going away. 615 */ 616 static 617 void 618 vmspace_entry_cache_drop(struct vmspace_entry *ve) 619 { 620 if (atomic_fetchadd_int(&ve->cache_refs, -1) == 1) { 621 KKASSERT(ve->refs & VKE_REF_DELETED); 622 kfree(ve, M_VKERNEL); 623 } 624 } 625 626 /* 627 * Drop primary reference. The ve cannot be freed on the 1->0 transition. 628 * Instead, ve deletion interlocks the final kfree() via cache_refs. 629 */ 630 static 631 void 632 vmspace_entry_drop(struct vmspace_entry *ve) 633 { 634 atomic_fetchadd_int(&ve->refs, -1); 635 } 636 637 /* 638 * Locate the ve for (id), return the ve or NULL. If found this function 639 * will bump ve->refs which prevents the ve from being immediately destroyed 640 * (but it can still be removed). 641 * 642 * The cache can potentially contain a stale ve, check by testing ve->vmspace. 643 * 644 * The caller must hold vkp->token if excl is non-zero. 645 */ 646 static 647 struct vmspace_entry * 648 vkernel_find_vmspace(struct vkernel_proc *vkp, void *id, int excl) 649 { 650 struct vmspace_entry *ve; 651 struct vmspace_entry key; 652 struct vkernel_lwp *vklp; 653 struct lwp *lp = curthread->td_lwp; 654 655 /* 656 * Cache check. Since we already hold a ref on the cache entry 657 * the ve cannot be ripped out from under us while we cycle 658 * ve->refs. 659 */ 660 if ((vklp = lp->lwp_vkernel) != NULL) { 661 ve = vklp->ve_cache; 662 if (ve && ve->id == id) { 663 uint32_t n; 664 665 /* 666 * Bump active refs, check to see if the cache 667 * entry is stale. If not, we are good. 668 */ 669 n = atomic_fetchadd_int(&ve->refs, 1); 670 if ((n & VKE_REF_DELETED) == 0) { 671 KKASSERT(ve->vmspace); 672 return ve; 673 } 674 675 /* 676 * Cache is stale, clean it out and fall through 677 * to a normal search. 678 */ 679 vklp->ve_cache = NULL; 680 vmspace_entry_drop(ve); 681 vmspace_entry_cache_drop(ve); 682 } 683 } 684 685 /* 686 * Normal search protected by vkp->token. No new ve's can be marked 687 * DELETED while we hold the token so we are safe. 688 */ 689 if (excl == 0) 690 lwkt_gettoken_shared(&vkp->token); 691 key.id = id; 692 ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key); 693 if (ve) { 694 if (atomic_fetchadd_int(&ve->refs, 1) & VKE_REF_DELETED) { 695 vmspace_entry_drop(ve); 696 ve = NULL; 697 } 698 } 699 if (excl == 0) 700 lwkt_reltoken(&vkp->token); 701 return (ve); 702 } 703 704 /* 705 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing 706 * a vkernel process. 707 * 708 * No requirements. 709 */ 710 void 711 vkernel_inherit(struct proc *p1, struct proc *p2) 712 { 713 struct vkernel_proc *vkp; 714 715 vkp = p1->p_vkernel; 716 KKASSERT(vkp->refs > 0); 717 atomic_add_int(&vkp->refs, 1); 718 p2->p_vkernel = vkp; 719 } 720 721 /* 722 * No requirements. 723 */ 724 void 725 vkernel_exit(struct proc *p) 726 { 727 struct vkernel_proc *vkp; 728 struct lwp *lp; 729 730 vkp = p->p_vkernel; 731 732 /* 733 * Restore the original VM context if we are killed while running 734 * a different one. 735 * 736 * This isn't supposed to happen. What is supposed to happen is 737 * that the process should enter vkernel_trap() before the handling 738 * the signal. 739 */ 740 RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) { 741 vkernel_lwp_exit(lp); 742 } 743 744 /* 745 * Dereference the common area 746 */ 747 p->p_vkernel = NULL; 748 KKASSERT(vkp->refs > 0); 749 750 if (atomic_fetchadd_int(&vkp->refs, -1) == 1) { 751 lwkt_gettoken(&vkp->token); 752 RB_SCAN(vmspace_rb_tree, &vkp->root, NULL, 753 rb_vmspace_delete, vkp); 754 lwkt_reltoken(&vkp->token); 755 kfree(vkp, M_VKERNEL); 756 } 757 } 758 759 /* 760 * No requirements. 761 */ 762 void 763 vkernel_lwp_exit(struct lwp *lp) 764 { 765 struct vkernel_lwp *vklp; 766 struct vmspace_entry *ve; 767 768 if ((vklp = lp->lwp_vkernel) != NULL) { 769 if (lp->lwp_thread->td_vmm == NULL) { 770 /* 771 * vkernel thread 772 */ 773 if ((ve = vklp->ve) != NULL) { 774 kprintf("Warning, pid %d killed with " 775 "active VC!\n", lp->lwp_proc->p_pid); 776 pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace); 777 vklp->ve = NULL; 778 KKASSERT(ve->refs > 0); 779 vmspace_entry_drop(ve); 780 } 781 } else { 782 /* 783 * guest thread 784 */ 785 vklp->ve = NULL; 786 } 787 if ((ve = vklp->ve_cache) != NULL) { 788 vklp->ve_cache = NULL; 789 vmspace_entry_cache_drop(ve); 790 } 791 792 lp->lwp_vkernel = NULL; 793 kfree(vklp, M_VKERNEL); 794 } 795 } 796 797 /* 798 * A VM space under virtual kernel control trapped out or made a system call 799 * or otherwise needs to return control to the virtual kernel context. 800 * 801 * No requirements. 802 */ 803 void 804 vkernel_trap(struct lwp *lp, struct trapframe *frame) 805 { 806 struct proc *p = lp->lwp_proc; 807 struct vmspace_entry *ve; 808 struct vkernel_lwp *vklp; 809 int error; 810 811 /* 812 * Which vmspace entry was running? 813 */ 814 vklp = lp->lwp_vkernel; 815 KKASSERT(vklp); 816 817 /* If it's a VMM thread just set the vkernel CR3 back */ 818 if (curthread->td_vmm == NULL) { 819 ve = vklp->ve; 820 KKASSERT(ve != NULL); 821 822 /* 823 * Switch the LWP vmspace back to the virtual kernel's VM space. 824 */ 825 vklp->ve = NULL; 826 pmap_setlwpvm(lp, p->p_vmspace); 827 KKASSERT(ve->refs > 0); 828 vmspace_entry_drop(ve); 829 /* ve is invalid once we kill our ref */ 830 } else { 831 vklp->ve = NULL; 832 vmm_vm_set_guest_cr3(p->p_vkernel->vkernel_cr3); 833 } 834 835 /* 836 * Copy the emulated process frame to the virtual kernel process. 837 * The emulated process cannot change TLS descriptors so don't 838 * bother saving them, we already have a copy. 839 * 840 * Restore the virtual kernel's saved context so the virtual kernel 841 * process can resume. 842 */ 843 error = copyout(frame, vklp->user_trapframe, sizeof(*frame)); 844 bcopy(&vklp->save_trapframe, frame, sizeof(*frame)); 845 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 846 sizeof(vklp->save_vextframe.vx_tls)); 847 set_user_TLS(); 848 cpu_vkernel_trap(frame, error); 849 } 850