1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/systm.h> 40 #include <sys/sysproto.h> 41 #include <sys/kern_syscall.h> 42 #include <sys/mman.h> 43 #include <sys/thread.h> 44 #include <sys/proc.h> 45 #include <sys/malloc.h> 46 #include <sys/sysctl.h> 47 #include <sys/vkernel.h> 48 #include <sys/vmspace.h> 49 50 #include <vm/vm_extern.h> 51 #include <vm/pmap.h> 52 53 #include <machine/vmparam.h> 54 #include <machine/vmm.h> 55 56 #include <sys/sysref2.h> 57 58 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp, 59 void *id, int havetoken); 60 static int vmspace_entry_delete(struct vmspace_entry *ve, 61 struct vkernel_proc *vkp, int refs); 62 static void vmspace_entry_cache_ref(struct vmspace_entry *ve); 63 static void vmspace_entry_cache_drop(struct vmspace_entry *ve); 64 static void vmspace_entry_drop(struct vmspace_entry *ve); 65 66 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); 67 68 /* 69 * vmspace_create (void *id, int type, void *data) 70 * 71 * Create a VMSPACE under the control of the caller with the specified id. 72 * An id of NULL cannot be used. The type and data fields must currently 73 * be 0. 74 * 75 * The vmspace starts out completely empty. Memory may be mapped into the 76 * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled 77 * with vmspace_mcontrol(). 78 * 79 * No requirements. 80 */ 81 int 82 sys_vmspace_create(struct vmspace_create_args *uap) 83 { 84 struct vmspace_entry *ve; 85 struct vkernel_proc *vkp; 86 struct proc *p = curproc; 87 int error; 88 89 if (vkernel_enable == 0) 90 return (EOPNOTSUPP); 91 92 /* 93 * Create a virtual kernel side-structure for the process if one 94 * does not exist. 95 * 96 * Implement a simple resolution for SMP races. 97 */ 98 if ((vkp = p->p_vkernel) == NULL) { 99 vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO); 100 lwkt_gettoken(&p->p_token); 101 if (p->p_vkernel == NULL) { 102 vkp->refs = 1; 103 lwkt_token_init(&vkp->token, "vkernel"); 104 RB_INIT(&vkp->root); 105 p->p_vkernel = vkp; 106 } else { 107 kfree(vkp, M_VKERNEL); 108 vkp = p->p_vkernel; 109 } 110 lwkt_reltoken(&p->p_token); 111 } 112 113 if (curthread->td_vmm) 114 return 0; 115 116 /* 117 * Create a new VMSPACE, disallow conflicting ids 118 */ 119 ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); 120 ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 121 ve->id = uap->id; 122 ve->refs = 0; /* active refs (none) */ 123 ve->cache_refs = 1; /* on-tree, not deleted (prevent kfree) */ 124 pmap_pinit2(vmspace_pmap(ve->vmspace)); 125 126 lwkt_gettoken(&vkp->token); 127 if (RB_INSERT(vmspace_rb_tree, &vkp->root, ve)) { 128 vmspace_rel(ve->vmspace); 129 ve->vmspace = NULL; /* safety */ 130 kfree(ve, M_VKERNEL); 131 error = EEXIST; 132 } else { 133 error = 0; 134 } 135 lwkt_reltoken(&vkp->token); 136 137 return (error); 138 } 139 140 /* 141 * Destroy a VMSPACE given its identifier. 142 * 143 * No requirements. 144 */ 145 int 146 sys_vmspace_destroy(struct vmspace_destroy_args *uap) 147 { 148 struct vkernel_proc *vkp; 149 struct vmspace_entry *ve; 150 int error; 151 152 if ((vkp = curproc->p_vkernel) == NULL) 153 return EINVAL; 154 155 /* 156 * vkp->token protects the deletion against a new RB tree search. 157 */ 158 lwkt_gettoken(&vkp->token); 159 error = ENOENT; 160 if ((ve = vkernel_find_vmspace(vkp, uap->id, 1)) != NULL) { 161 error = vmspace_entry_delete(ve, vkp, 1); 162 if (error == 0) 163 vmspace_entry_cache_drop(ve); 164 } 165 lwkt_reltoken(&vkp->token); 166 167 return(error); 168 } 169 170 /* 171 * vmspace_ctl (void *id, int cmd, struct trapframe *tframe, 172 * struct vextframe *vframe); 173 * 174 * Transfer control to a VMSPACE. Control is returned after the specified 175 * number of microseconds or if a page fault, signal, trap, or system call 176 * occurs. The context is updated as appropriate. 177 * 178 * No requirements. 179 */ 180 int 181 sys_vmspace_ctl(struct vmspace_ctl_args *uap) 182 { 183 struct vkernel_proc *vkp; 184 struct vkernel_lwp *vklp; 185 struct vmspace_entry *ve = NULL; 186 struct lwp *lp; 187 struct proc *p; 188 int framesz; 189 int error; 190 191 lp = curthread->td_lwp; 192 p = lp->lwp_proc; 193 194 if ((vkp = p->p_vkernel) == NULL) 195 return (EINVAL); 196 197 /* 198 * ve only matters when VMM is not used. 199 */ 200 if (curthread->td_vmm == NULL) { 201 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 202 error = ENOENT; 203 goto done; 204 } 205 } 206 207 switch(uap->cmd) { 208 case VMSPACE_CTL_RUN: 209 /* 210 * Save the caller's register context, swap VM spaces, and 211 * install the passed register context. Return with 212 * EJUSTRETURN so the syscall code doesn't adjust the context. 213 */ 214 framesz = sizeof(struct trapframe); 215 if ((vklp = lp->lwp_vkernel) == NULL) { 216 vklp = kmalloc(sizeof(*vklp), M_VKERNEL, 217 M_WAITOK|M_ZERO); 218 lp->lwp_vkernel = vklp; 219 } 220 if (ve && vklp->ve_cache != ve) { 221 vmspace_entry_cache_ref(ve); 222 if (vklp->ve_cache) 223 vmspace_entry_cache_drop(vklp->ve_cache); 224 vklp->ve_cache = ve; 225 } 226 vklp->user_trapframe = uap->tframe; 227 vklp->user_vextframe = uap->vframe; 228 bcopy(uap->sysmsg_frame, &vklp->save_trapframe, framesz); 229 bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls, 230 sizeof(vklp->save_vextframe.vx_tls)); 231 error = copyin(uap->tframe, uap->sysmsg_frame, framesz); 232 if (error == 0) { 233 error = copyin(&uap->vframe->vx_tls, 234 &curthread->td_tls, 235 sizeof(struct savetls)); 236 } 237 if (error == 0) 238 error = cpu_sanitize_frame(uap->sysmsg_frame); 239 if (error == 0) 240 error = cpu_sanitize_tls(&curthread->td_tls); 241 if (error) { 242 bcopy(&vklp->save_trapframe, uap->sysmsg_frame, 243 framesz); 244 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 245 sizeof(vklp->save_vextframe.vx_tls)); 246 set_user_TLS(); 247 } else { 248 /* 249 * If it's a VMM thread just set the CR3. We also set 250 * the vklp->ve to a key to be able to distinguish 251 * when a vkernel user process runs and when not 252 * (when it's NULL) 253 */ 254 if (curthread->td_vmm == NULL) { 255 vklp->ve = ve; 256 atomic_add_int(&ve->refs, 1); 257 pmap_setlwpvm(lp, ve->vmspace); 258 } else { 259 vklp->ve = uap->id; 260 vmm_vm_set_guest_cr3((register_t)uap->id); 261 } 262 set_user_TLS(); 263 set_vkernel_fp(uap->sysmsg_frame); 264 error = EJUSTRETURN; 265 } 266 break; 267 default: 268 error = EOPNOTSUPP; 269 break; 270 } 271 done: 272 if (ve) 273 vmspace_entry_drop(ve); 274 275 return(error); 276 } 277 278 /* 279 * vmspace_mmap(id, addr, len, prot, flags, fd, offset) 280 * 281 * map memory within a VMSPACE. This function is just like a normal mmap() 282 * but operates on the vmspace's memory map. Most callers use this to create 283 * a MAP_VPAGETABLE mapping. 284 * 285 * No requirements. 286 */ 287 int 288 sys_vmspace_mmap(struct vmspace_mmap_args *uap) 289 { 290 struct vkernel_proc *vkp; 291 struct vmspace_entry *ve; 292 int error; 293 294 if ((vkp = curproc->p_vkernel) == NULL) { 295 error = EINVAL; 296 goto done2; 297 } 298 299 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 300 error = ENOENT; 301 goto done2; 302 } 303 304 error = kern_mmap(ve->vmspace, uap->addr, uap->len, 305 uap->prot, uap->flags, 306 uap->fd, uap->offset, &uap->sysmsg_resultp); 307 308 vmspace_entry_drop(ve); 309 done2: 310 return (error); 311 } 312 313 /* 314 * vmspace_munmap(id, addr, len) 315 * 316 * unmap memory within a VMSPACE. 317 * 318 * No requirements. 319 */ 320 int 321 sys_vmspace_munmap(struct vmspace_munmap_args *uap) 322 { 323 struct vkernel_proc *vkp; 324 struct vmspace_entry *ve; 325 vm_offset_t addr; 326 vm_offset_t tmpaddr; 327 vm_size_t size, pageoff; 328 vm_map_t map; 329 int error; 330 331 if ((vkp = curproc->p_vkernel) == NULL) { 332 error = EINVAL; 333 goto done2; 334 } 335 336 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 337 error = ENOENT; 338 goto done2; 339 } 340 341 /* 342 * NOTE: kern_munmap() can block so we need to temporarily 343 * ref ve->refs. 344 */ 345 346 /* 347 * Copied from sys_munmap() 348 */ 349 addr = (vm_offset_t)uap->addr; 350 size = uap->len; 351 352 pageoff = (addr & PAGE_MASK); 353 addr -= pageoff; 354 size += pageoff; 355 size = (vm_size_t)round_page(size); 356 if (size < uap->len) { /* wrap */ 357 error = EINVAL; 358 goto done1; 359 } 360 tmpaddr = addr + size; /* workaround gcc4 opt */ 361 if (tmpaddr < addr) { /* wrap */ 362 error = EINVAL; 363 goto done1; 364 } 365 if (size == 0) { 366 error = 0; 367 goto done1; 368 } 369 370 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 371 error = EINVAL; 372 goto done1; 373 } 374 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) { 375 error = EINVAL; 376 goto done1; 377 } 378 map = &ve->vmspace->vm_map; 379 if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE, FALSE)) { 380 error = EINVAL; 381 goto done1; 382 } 383 vm_map_remove(map, addr, addr + size); 384 error = 0; 385 done1: 386 vmspace_entry_drop(ve); 387 done2: 388 return (error); 389 } 390 391 /* 392 * vmspace_pread(id, buf, nbyte, flags, offset) 393 * 394 * Read data from a vmspace. The number of bytes read is returned or 395 * -1 if an unrecoverable error occured. If the number of bytes read is 396 * less then the request size, a page fault occured in the VMSPACE which 397 * the caller must resolve in order to proceed. 398 * 399 * (not implemented yet) 400 * No requirements. 401 */ 402 int 403 sys_vmspace_pread(struct vmspace_pread_args *uap) 404 { 405 struct vkernel_proc *vkp; 406 struct vmspace_entry *ve; 407 int error; 408 409 if ((vkp = curproc->p_vkernel) == NULL) { 410 error = EINVAL; 411 goto done3; 412 } 413 414 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 415 error = ENOENT; 416 goto done3; 417 } 418 vmspace_entry_drop(ve); 419 error = EINVAL; 420 done3: 421 return (error); 422 } 423 424 /* 425 * vmspace_pwrite(id, buf, nbyte, flags, offset) 426 * 427 * Write data to a vmspace. The number of bytes written is returned or 428 * -1 if an unrecoverable error occured. If the number of bytes written is 429 * less then the request size, a page fault occured in the VMSPACE which 430 * the caller must resolve in order to proceed. 431 * 432 * (not implemented yet) 433 * No requirements. 434 */ 435 int 436 sys_vmspace_pwrite(struct vmspace_pwrite_args *uap) 437 { 438 struct vkernel_proc *vkp; 439 struct vmspace_entry *ve; 440 int error; 441 442 if ((vkp = curproc->p_vkernel) == NULL) { 443 error = EINVAL; 444 goto done3; 445 } 446 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 447 error = ENOENT; 448 goto done3; 449 } 450 vmspace_entry_drop(ve); 451 error = EINVAL; 452 done3: 453 return (error); 454 } 455 456 /* 457 * vmspace_mcontrol(id, addr, len, behav, value) 458 * 459 * madvise/mcontrol support for a vmspace. 460 * 461 * No requirements. 462 */ 463 int 464 sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap) 465 { 466 struct vkernel_proc *vkp; 467 struct vmspace_entry *ve; 468 struct lwp *lp; 469 vm_offset_t start, end; 470 vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; 471 int error; 472 473 lp = curthread->td_lwp; 474 if ((vkp = curproc->p_vkernel) == NULL) { 475 error = EINVAL; 476 goto done3; 477 } 478 479 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 480 error = ENOENT; 481 goto done3; 482 } 483 484 /* 485 * This code is basically copied from sys_mcontrol() 486 */ 487 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) { 488 error = EINVAL; 489 goto done1; 490 } 491 492 if (tmpaddr < (vm_offset_t)uap->addr) { 493 error = EINVAL; 494 goto done1; 495 } 496 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 497 error = EINVAL; 498 goto done1; 499 } 500 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) { 501 error = EINVAL; 502 goto done1; 503 } 504 505 start = trunc_page((vm_offset_t) uap->addr); 506 end = round_page(tmpaddr); 507 508 error = vm_map_madvise(&ve->vmspace->vm_map, start, end, 509 uap->behav, uap->value); 510 done1: 511 vmspace_entry_drop(ve); 512 done3: 513 return (error); 514 } 515 516 /* 517 * Red black tree functions 518 */ 519 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *); 520 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); 521 522 /* 523 * a->start is address, and the only field has to be initialized. 524 * The caller must hold vkp->token. 525 * 526 * The caller must hold vkp->token. 527 */ 528 static int 529 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b) 530 { 531 if ((char *)a->id < (char *)b->id) 532 return(-1); 533 else if ((char *)a->id > (char *)b->id) 534 return(1); 535 return(0); 536 } 537 538 /* 539 * The caller must hold vkp->token. 540 */ 541 static 542 int 543 rb_vmspace_delete(struct vmspace_entry *ve, void *data) 544 { 545 struct vkernel_proc *vkp = data; 546 547 if (vmspace_entry_delete(ve, vkp, 0) == 0) 548 vmspace_entry_cache_drop(ve); 549 else 550 panic("rb_vmspace_delete: invalid refs %d", ve->refs); 551 return(0); 552 } 553 554 /* 555 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean 556 * up the pmap, the vm_map, then destroy the vmspace. We gain control of 557 * the associated cache_refs ref, which the caller will drop for us. 558 * 559 * The ve must not have any active references other than those from the 560 * caller. If it does, EBUSY is returned. The ve may still maintain 561 * any number of cache references which will drop as the related LWPs 562 * execute vmspace operations or exit. 563 * 564 * 0 is returned on success, EBUSY on failure. On success the caller must 565 * drop the last cache_refs. We have dropped the callers active refs. 566 * 567 * The caller must hold vkp->token. 568 */ 569 static 570 int 571 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp, 572 int refs) 573 { 574 /* 575 * Interlocked by vkp->token. 576 * 577 * Drop the callers refs and set VKE_REF_DELETED atomically, if 578 * the remaining refs match exactly. Dropping refs and setting 579 * the DELETED flag atomically protects other threads from trying 580 * to use the ve. 581 * 582 * The caller now owns the final cache_ref that was previously 583 * associated with the live state of the ve. 584 */ 585 if (atomic_cmpset_int(&ve->refs, refs, VKE_REF_DELETED) == 0) { 586 KKASSERT(ve->refs >= refs); 587 return EBUSY; 588 } 589 RB_REMOVE(vmspace_rb_tree, &vkp->root, ve); 590 591 pmap_remove_pages(vmspace_pmap(ve->vmspace), 592 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 593 vm_map_remove(&ve->vmspace->vm_map, 594 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 595 vmspace_rel(ve->vmspace); 596 ve->vmspace = NULL; /* safety */ 597 598 return 0; 599 } 600 601 /* 602 * Ref a ve for cache purposes 603 */ 604 static 605 void 606 vmspace_entry_cache_ref(struct vmspace_entry *ve) 607 { 608 atomic_add_int(&ve->cache_refs, 1); 609 } 610 611 /* 612 * The ve cache_drop is the final word for a ve. It gains an extra ref 613 * representing it being on the RB tree and not being in a deleted state. 614 * Removal from the RB tree and deletion manipulate this ref. The last 615 * drop will thus include full deletion of the ve in addition to the last 616 * cached user going away. 617 */ 618 static 619 void 620 vmspace_entry_cache_drop(struct vmspace_entry *ve) 621 { 622 if (atomic_fetchadd_int(&ve->cache_refs, -1) == 1) { 623 KKASSERT(ve->refs & VKE_REF_DELETED); 624 kfree(ve, M_VKERNEL); 625 } 626 } 627 628 /* 629 * Drop primary reference. The ve cannot be freed on the 1->0 transition. 630 * Instead, ve deletion interlocks the final kfree() via cache_refs. 631 */ 632 static 633 void 634 vmspace_entry_drop(struct vmspace_entry *ve) 635 { 636 atomic_fetchadd_int(&ve->refs, -1); 637 } 638 639 /* 640 * Locate the ve for (id), return the ve or NULL. If found this function 641 * will bump ve->refs which prevents the ve from being immediately destroyed 642 * (but it can still be removed). 643 * 644 * The cache can potentially contain a stale ve, check by testing ve->vmspace. 645 * 646 * The caller must hold vkp->token if excl is non-zero. 647 */ 648 static 649 struct vmspace_entry * 650 vkernel_find_vmspace(struct vkernel_proc *vkp, void *id, int excl) 651 { 652 struct vmspace_entry *ve; 653 struct vmspace_entry key; 654 struct vkernel_lwp *vklp; 655 struct lwp *lp = curthread->td_lwp; 656 657 /* 658 * Cache check. Since we already hold a ref on the cache entry 659 * the ve cannot be ripped out from under us while we cycle 660 * ve->refs. 661 */ 662 if ((vklp = lp->lwp_vkernel) != NULL) { 663 ve = vklp->ve_cache; 664 if (ve && ve->id == id) { 665 uint32_t n; 666 667 /* 668 * Bump active refs, check to see if the cache 669 * entry is stale. If not, we are good. 670 */ 671 n = atomic_fetchadd_int(&ve->refs, 1); 672 if ((n & VKE_REF_DELETED) == 0) { 673 KKASSERT(ve->vmspace); 674 return ve; 675 } 676 677 /* 678 * Cache is stale, clean it out and fall through 679 * to a normal search. 680 */ 681 vklp->ve_cache = NULL; 682 vmspace_entry_drop(ve); 683 vmspace_entry_cache_drop(ve); 684 } 685 } 686 687 /* 688 * Normal search protected by vkp->token. No new ve's can be marked 689 * DELETED while we hold the token so we are safe. 690 */ 691 if (excl == 0) 692 lwkt_gettoken_shared(&vkp->token); 693 key.id = id; 694 ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key); 695 if (ve) { 696 if (atomic_fetchadd_int(&ve->refs, 1) & VKE_REF_DELETED) { 697 vmspace_entry_drop(ve); 698 ve = NULL; 699 } 700 } 701 if (excl == 0) 702 lwkt_reltoken(&vkp->token); 703 return (ve); 704 } 705 706 /* 707 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing 708 * a vkernel process. 709 * 710 * No requirements. 711 */ 712 void 713 vkernel_inherit(struct proc *p1, struct proc *p2) 714 { 715 struct vkernel_proc *vkp; 716 717 vkp = p1->p_vkernel; 718 KKASSERT(vkp->refs > 0); 719 atomic_add_int(&vkp->refs, 1); 720 p2->p_vkernel = vkp; 721 } 722 723 /* 724 * No requirements. 725 */ 726 void 727 vkernel_exit(struct proc *p) 728 { 729 struct vkernel_proc *vkp; 730 struct lwp *lp; 731 732 vkp = p->p_vkernel; 733 734 /* 735 * Restore the original VM context if we are killed while running 736 * a different one. 737 * 738 * This isn't supposed to happen. What is supposed to happen is 739 * that the process should enter vkernel_trap() before the handling 740 * the signal. 741 */ 742 RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) { 743 vkernel_lwp_exit(lp); 744 } 745 746 /* 747 * Dereference the common area 748 */ 749 p->p_vkernel = NULL; 750 KKASSERT(vkp->refs > 0); 751 752 if (atomic_fetchadd_int(&vkp->refs, -1) == 1) { 753 lwkt_gettoken(&vkp->token); 754 RB_SCAN(vmspace_rb_tree, &vkp->root, NULL, 755 rb_vmspace_delete, vkp); 756 lwkt_reltoken(&vkp->token); 757 kfree(vkp, M_VKERNEL); 758 } 759 } 760 761 /* 762 * No requirements. 763 */ 764 void 765 vkernel_lwp_exit(struct lwp *lp) 766 { 767 struct vkernel_lwp *vklp; 768 struct vmspace_entry *ve; 769 770 if ((vklp = lp->lwp_vkernel) != NULL) { 771 if (lp->lwp_thread->td_vmm == NULL) { 772 /* 773 * vkernel thread 774 */ 775 if ((ve = vklp->ve) != NULL) { 776 kprintf("Warning, pid %d killed with " 777 "active VC!\n", lp->lwp_proc->p_pid); 778 pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace); 779 vklp->ve = NULL; 780 KKASSERT(ve->refs > 0); 781 vmspace_entry_drop(ve); 782 } 783 } else { 784 /* 785 * guest thread 786 */ 787 vklp->ve = NULL; 788 } 789 if ((ve = vklp->ve_cache) != NULL) { 790 vklp->ve_cache = NULL; 791 vmspace_entry_cache_drop(ve); 792 } 793 794 lp->lwp_vkernel = NULL; 795 kfree(vklp, M_VKERNEL); 796 } 797 } 798 799 /* 800 * A VM space under virtual kernel control trapped out or made a system call 801 * or otherwise needs to return control to the virtual kernel context. 802 * 803 * No requirements. 804 */ 805 void 806 vkernel_trap(struct lwp *lp, struct trapframe *frame) 807 { 808 struct proc *p = lp->lwp_proc; 809 struct vmspace_entry *ve; 810 struct vkernel_lwp *vklp; 811 int error; 812 813 /* 814 * Which vmspace entry was running? 815 */ 816 vklp = lp->lwp_vkernel; 817 KKASSERT(vklp); 818 819 /* If it's a VMM thread just set the vkernel CR3 back */ 820 if (curthread->td_vmm == NULL) { 821 ve = vklp->ve; 822 KKASSERT(ve != NULL); 823 824 /* 825 * Switch the LWP vmspace back to the virtual kernel's VM space. 826 */ 827 vklp->ve = NULL; 828 pmap_setlwpvm(lp, p->p_vmspace); 829 KKASSERT(ve->refs > 0); 830 vmspace_entry_drop(ve); 831 /* ve is invalid once we kill our ref */ 832 } else { 833 vklp->ve = NULL; 834 vmm_vm_set_guest_cr3(p->p_vkernel->vkernel_cr3); 835 } 836 837 /* 838 * Copy the emulated process frame to the virtual kernel process. 839 * The emulated process cannot change TLS descriptors so don't 840 * bother saving them, we already have a copy. 841 * 842 * Restore the virtual kernel's saved context so the virtual kernel 843 * process can resume. 844 */ 845 error = copyout(frame, vklp->user_trapframe, sizeof(*frame)); 846 bcopy(&vklp->save_trapframe, frame, sizeof(*frame)); 847 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 848 sizeof(vklp->save_vextframe.vx_tls)); 849 set_user_TLS(); 850 cpu_vkernel_trap(frame, error); 851 } 852