1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/systm.h> 40 #include <sys/sysmsg.h> 41 #include <sys/kern_syscall.h> 42 #include <sys/mman.h> 43 #include <sys/thread.h> 44 #include <sys/proc.h> 45 #include <sys/malloc.h> 46 #include <sys/sysctl.h> 47 #include <sys/vkernel.h> 48 #include <sys/vmspace.h> 49 50 #include <vm/vm_extern.h> 51 #include <vm/pmap.h> 52 53 #include <machine/vmparam.h> 54 #include <machine/vmm.h> 55 56 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp, 57 void *id, int havetoken); 58 static int vmspace_entry_delete(struct vmspace_entry *ve, 59 struct vkernel_proc *vkp, int refs); 60 static void vmspace_entry_cache_ref(struct vmspace_entry *ve); 61 static void vmspace_entry_cache_drop(struct vmspace_entry *ve); 62 static void vmspace_entry_drop(struct vmspace_entry *ve); 63 64 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); 65 66 /* 67 * vmspace_create (void *id, int type, void *data) 68 * 69 * Create a VMSPACE under the control of the caller with the specified id. 70 * An id of NULL cannot be used. The type and data fields must currently 71 * be 0. 72 * 73 * The vmspace starts out completely empty. Memory may be mapped into the 74 * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled 75 * with vmspace_mcontrol(). 76 * 77 * No requirements. 78 */ 79 int 80 sys_vmspace_create(struct sysmsg *sysmsg, 81 const struct vmspace_create_args *uap) 82 { 83 struct vmspace_entry *ve; 84 struct vkernel_proc *vkp; 85 struct proc *p = curproc; 86 int error; 87 88 if (vkernel_enable == 0) 89 return (EOPNOTSUPP); 90 91 /* 92 * Create a virtual kernel side-structure for the process if one 93 * does not exist. 94 * 95 * Implement a simple resolution for SMP races. 96 */ 97 if ((vkp = p->p_vkernel) == NULL) { 98 vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO); 99 lwkt_gettoken(&p->p_token); 100 if (p->p_vkernel == NULL) { 101 vkp->refs = 1; 102 lwkt_token_init(&vkp->token, "vkernel"); 103 RB_INIT(&vkp->root); 104 p->p_vkernel = vkp; 105 } else { 106 kfree(vkp, M_VKERNEL); 107 vkp = p->p_vkernel; 108 } 109 lwkt_reltoken(&p->p_token); 110 } 111 112 if (curthread->td_vmm) 113 return 0; 114 115 /* 116 * Create a new VMSPACE, disallow conflicting ids 117 */ 118 ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); 119 ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 120 ve->id = uap->id; 121 ve->refs = 0; /* active refs (none) */ 122 ve->cache_refs = 1; /* on-tree, not deleted (prevent kfree) */ 123 pmap_pinit2(vmspace_pmap(ve->vmspace)); 124 125 lwkt_gettoken(&vkp->token); 126 if (RB_INSERT(vmspace_rb_tree, &vkp->root, ve)) { 127 vmspace_rel(ve->vmspace); 128 ve->vmspace = NULL; /* safety */ 129 kfree(ve, M_VKERNEL); 130 error = EEXIST; 131 } else { 132 error = 0; 133 } 134 lwkt_reltoken(&vkp->token); 135 136 return (error); 137 } 138 139 /* 140 * Destroy a VMSPACE given its identifier. 141 * 142 * No requirements. 143 */ 144 int 145 sys_vmspace_destroy(struct sysmsg *sysmsg, 146 const struct vmspace_destroy_args *uap) 147 { 148 struct vkernel_proc *vkp; 149 struct vmspace_entry *ve; 150 int error; 151 152 if ((vkp = curproc->p_vkernel) == NULL) 153 return EINVAL; 154 155 /* 156 * vkp->token protects the deletion against a new RB tree search. 157 */ 158 lwkt_gettoken(&vkp->token); 159 error = ENOENT; 160 if ((ve = vkernel_find_vmspace(vkp, uap->id, 1)) != NULL) { 161 error = vmspace_entry_delete(ve, vkp, 1); 162 if (error == 0) 163 vmspace_entry_cache_drop(ve); 164 } 165 lwkt_reltoken(&vkp->token); 166 167 return(error); 168 } 169 170 /* 171 * vmspace_ctl (void *id, int cmd, struct trapframe *tframe, 172 * struct vextframe *vframe); 173 * 174 * Transfer control to a VMSPACE. Control is returned after the specified 175 * number of microseconds or if a page fault, signal, trap, or system call 176 * occurs. The context is updated as appropriate. 177 * 178 * No requirements. 179 */ 180 int 181 sys_vmspace_ctl(struct sysmsg *sysmsg, 182 const struct vmspace_ctl_args *uap) 183 { 184 struct vkernel_proc *vkp; 185 struct vkernel_lwp *vklp; 186 struct vmspace_entry *ve = NULL; 187 struct lwp *lp; 188 struct proc *p; 189 int framesz; 190 int error; 191 192 lp = curthread->td_lwp; 193 p = lp->lwp_proc; 194 195 if ((vkp = p->p_vkernel) == NULL) 196 return (EINVAL); 197 198 /* 199 * ve only matters when VMM is not used. 200 */ 201 if (curthread->td_vmm == NULL) { 202 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 203 error = ENOENT; 204 goto done; 205 } 206 } 207 208 switch(uap->cmd) { 209 case VMSPACE_CTL_RUN: 210 /* 211 * Save the caller's register context, swap VM spaces, and 212 * install the passed register context. Return with 213 * EJUSTRETURN so the syscall code doesn't adjust the context. 214 */ 215 framesz = sizeof(struct trapframe); 216 if ((vklp = lp->lwp_vkernel) == NULL) { 217 vklp = kmalloc(sizeof(*vklp), M_VKERNEL, 218 M_WAITOK|M_ZERO); 219 lp->lwp_vkernel = vklp; 220 } 221 if (ve && vklp->ve_cache != ve) { 222 vmspace_entry_cache_ref(ve); 223 if (vklp->ve_cache) 224 vmspace_entry_cache_drop(vklp->ve_cache); 225 vklp->ve_cache = ve; 226 } 227 vklp->user_trapframe = uap->tframe; 228 vklp->user_vextframe = uap->vframe; 229 bcopy(sysmsg->sysmsg_frame, &vklp->save_trapframe, framesz); 230 bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls, 231 sizeof(vklp->save_vextframe.vx_tls)); 232 error = copyin(uap->tframe, sysmsg->sysmsg_frame, framesz); 233 if (error == 0) { 234 error = copyin(&uap->vframe->vx_tls, 235 &curthread->td_tls, 236 sizeof(struct savetls)); 237 } 238 if (error == 0) 239 error = cpu_sanitize_frame(sysmsg->sysmsg_frame); 240 if (error == 0) 241 error = cpu_sanitize_tls(&curthread->td_tls); 242 if (error) { 243 bcopy(&vklp->save_trapframe, sysmsg->sysmsg_frame, 244 framesz); 245 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 246 sizeof(vklp->save_vextframe.vx_tls)); 247 set_user_TLS(); 248 } else { 249 /* 250 * If it's a VMM thread just set the CR3. We also set 251 * the vklp->ve to a key to be able to distinguish 252 * when a vkernel user process runs and when not 253 * (when it's NULL) 254 */ 255 if (curthread->td_vmm == NULL) { 256 vklp->ve = ve; 257 atomic_add_int(&ve->refs, 1); 258 pmap_setlwpvm(lp, ve->vmspace); 259 } else { 260 vklp->ve = uap->id; 261 vmm_vm_set_guest_cr3((register_t)uap->id); 262 } 263 set_user_TLS(); 264 set_vkernel_fp(sysmsg->sysmsg_frame); 265 error = EJUSTRETURN; 266 } 267 break; 268 default: 269 error = EOPNOTSUPP; 270 break; 271 } 272 done: 273 if (ve) 274 vmspace_entry_drop(ve); 275 276 return(error); 277 } 278 279 /* 280 * vmspace_mmap(id, addr, len, prot, flags, fd, offset) 281 * 282 * map memory within a VMSPACE. This function is just like a normal mmap() 283 * but operates on the vmspace's memory map. Most callers use this to create 284 * a MAP_VPAGETABLE mapping. 285 * 286 * No requirements. 287 */ 288 int 289 sys_vmspace_mmap(struct sysmsg *sysmsg, 290 const struct vmspace_mmap_args *uap) 291 { 292 struct vkernel_proc *vkp; 293 struct vmspace_entry *ve; 294 int error; 295 296 if ((vkp = curproc->p_vkernel) == NULL) { 297 error = EINVAL; 298 goto done2; 299 } 300 301 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 302 error = ENOENT; 303 goto done2; 304 } 305 306 error = kern_mmap(ve->vmspace, uap->addr, uap->len, 307 uap->prot, uap->flags, 308 uap->fd, uap->offset, &sysmsg->sysmsg_resultp); 309 310 vmspace_entry_drop(ve); 311 done2: 312 return (error); 313 } 314 315 /* 316 * vmspace_munmap(id, addr, len) 317 * 318 * unmap memory within a VMSPACE. 319 * 320 * No requirements. 321 */ 322 int 323 sys_vmspace_munmap(struct sysmsg *sysmsg, 324 const struct vmspace_munmap_args *uap) 325 { 326 struct vkernel_proc *vkp; 327 struct vmspace_entry *ve; 328 vm_offset_t addr; 329 vm_offset_t tmpaddr; 330 vm_size_t size, pageoff; 331 vm_map_t map; 332 int error; 333 334 if ((vkp = curproc->p_vkernel) == NULL) { 335 error = EINVAL; 336 goto done2; 337 } 338 339 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 340 error = ENOENT; 341 goto done2; 342 } 343 344 /* 345 * NOTE: kern_munmap() can block so we need to temporarily 346 * ref ve->refs. 347 */ 348 349 /* 350 * Copied from sys_munmap() 351 */ 352 addr = (vm_offset_t)uap->addr; 353 size = uap->len; 354 355 pageoff = (addr & PAGE_MASK); 356 addr -= pageoff; 357 size += pageoff; 358 size = (vm_size_t)round_page(size); 359 if (size < uap->len) { /* wrap */ 360 error = EINVAL; 361 goto done1; 362 } 363 tmpaddr = addr + size; /* workaround gcc4 opt */ 364 if (tmpaddr < addr) { /* wrap */ 365 error = EINVAL; 366 goto done1; 367 } 368 if (size == 0) { 369 error = 0; 370 goto done1; 371 } 372 373 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 374 error = EINVAL; 375 goto done1; 376 } 377 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) { 378 error = EINVAL; 379 goto done1; 380 } 381 map = &ve->vmspace->vm_map; 382 if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE, FALSE)) { 383 error = EINVAL; 384 goto done1; 385 } 386 vm_map_remove(map, addr, addr + size); 387 error = 0; 388 done1: 389 vmspace_entry_drop(ve); 390 done2: 391 return (error); 392 } 393 394 /* 395 * vmspace_pread(id, buf, nbyte, flags, offset) 396 * 397 * Read data from a vmspace. The number of bytes read is returned or 398 * -1 if an unrecoverable error occured. If the number of bytes read is 399 * less then the request size, a page fault occured in the VMSPACE which 400 * the caller must resolve in order to proceed. 401 * 402 * (not implemented yet) 403 * No requirements. 404 */ 405 int 406 sys_vmspace_pread(struct sysmsg *sysmsg, 407 const struct vmspace_pread_args *uap) 408 { 409 struct vkernel_proc *vkp; 410 struct vmspace_entry *ve; 411 int error; 412 413 if ((vkp = curproc->p_vkernel) == NULL) { 414 error = EINVAL; 415 goto done3; 416 } 417 418 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 419 error = ENOENT; 420 goto done3; 421 } 422 vmspace_entry_drop(ve); 423 error = EINVAL; 424 done3: 425 return (error); 426 } 427 428 /* 429 * vmspace_pwrite(id, buf, nbyte, flags, offset) 430 * 431 * Write data to a vmspace. The number of bytes written is returned or 432 * -1 if an unrecoverable error occured. If the number of bytes written is 433 * less then the request size, a page fault occured in the VMSPACE which 434 * the caller must resolve in order to proceed. 435 * 436 * (not implemented yet) 437 * No requirements. 438 */ 439 int 440 sys_vmspace_pwrite(struct sysmsg *sysmsg, 441 const struct vmspace_pwrite_args *uap) 442 { 443 struct vkernel_proc *vkp; 444 struct vmspace_entry *ve; 445 int error; 446 447 if ((vkp = curproc->p_vkernel) == NULL) { 448 error = EINVAL; 449 goto done3; 450 } 451 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 452 error = ENOENT; 453 goto done3; 454 } 455 vmspace_entry_drop(ve); 456 error = EINVAL; 457 done3: 458 return (error); 459 } 460 461 /* 462 * vmspace_mcontrol(id, addr, len, behav, value) 463 * 464 * madvise/mcontrol support for a vmspace. 465 * 466 * No requirements. 467 */ 468 int 469 sys_vmspace_mcontrol(struct sysmsg *sysmsg, 470 const struct vmspace_mcontrol_args *uap) 471 { 472 struct vkernel_proc *vkp; 473 struct vmspace_entry *ve; 474 struct lwp *lp; 475 vm_offset_t start, end; 476 vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; 477 int error; 478 479 lp = curthread->td_lwp; 480 if ((vkp = curproc->p_vkernel) == NULL) { 481 error = EINVAL; 482 goto done3; 483 } 484 485 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 486 error = ENOENT; 487 goto done3; 488 } 489 490 /* 491 * This code is basically copied from sys_mcontrol() 492 */ 493 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) { 494 error = EINVAL; 495 goto done1; 496 } 497 498 if (tmpaddr < (vm_offset_t)uap->addr) { 499 error = EINVAL; 500 goto done1; 501 } 502 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 503 error = EINVAL; 504 goto done1; 505 } 506 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) { 507 error = EINVAL; 508 goto done1; 509 } 510 511 start = trunc_page((vm_offset_t) uap->addr); 512 end = round_page(tmpaddr); 513 514 error = vm_map_madvise(&ve->vmspace->vm_map, start, end, 515 uap->behav, uap->value); 516 done1: 517 vmspace_entry_drop(ve); 518 done3: 519 return (error); 520 } 521 522 /* 523 * Red black tree functions 524 */ 525 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *); 526 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); 527 528 /* 529 * a->start is address, and the only field has to be initialized. 530 * The caller must hold vkp->token. 531 * 532 * The caller must hold vkp->token. 533 */ 534 static int 535 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b) 536 { 537 if ((char *)a->id < (char *)b->id) 538 return(-1); 539 else if ((char *)a->id > (char *)b->id) 540 return(1); 541 return(0); 542 } 543 544 /* 545 * The caller must hold vkp->token. 546 */ 547 static 548 int 549 rb_vmspace_delete(struct vmspace_entry *ve, void *data) 550 { 551 struct vkernel_proc *vkp = data; 552 553 if (vmspace_entry_delete(ve, vkp, 0) == 0) 554 vmspace_entry_cache_drop(ve); 555 else 556 panic("rb_vmspace_delete: invalid refs %d", ve->refs); 557 return(0); 558 } 559 560 /* 561 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean 562 * up the pmap, the vm_map, then destroy the vmspace. We gain control of 563 * the associated cache_refs ref, which the caller will drop for us. 564 * 565 * The ve must not have any active references other than those from the 566 * caller. If it does, EBUSY is returned. The ve may still maintain 567 * any number of cache references which will drop as the related LWPs 568 * execute vmspace operations or exit. 569 * 570 * 0 is returned on success, EBUSY on failure. On success the caller must 571 * drop the last cache_refs. We have dropped the callers active refs. 572 * 573 * The caller must hold vkp->token. 574 */ 575 static 576 int 577 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp, 578 int refs) 579 { 580 /* 581 * Interlocked by vkp->token. 582 * 583 * Drop the callers refs and set VKE_REF_DELETED atomically, if 584 * the remaining refs match exactly. Dropping refs and setting 585 * the DELETED flag atomically protects other threads from trying 586 * to use the ve. 587 * 588 * The caller now owns the final cache_ref that was previously 589 * associated with the live state of the ve. 590 */ 591 if (atomic_cmpset_int(&ve->refs, refs, VKE_REF_DELETED) == 0) { 592 KKASSERT(ve->refs >= refs); 593 return EBUSY; 594 } 595 RB_REMOVE(vmspace_rb_tree, &vkp->root, ve); 596 597 pmap_remove_pages(vmspace_pmap(ve->vmspace), 598 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 599 vm_map_remove(&ve->vmspace->vm_map, 600 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 601 vmspace_rel(ve->vmspace); 602 ve->vmspace = NULL; /* safety */ 603 604 return 0; 605 } 606 607 /* 608 * Ref a ve for cache purposes 609 */ 610 static 611 void 612 vmspace_entry_cache_ref(struct vmspace_entry *ve) 613 { 614 atomic_add_int(&ve->cache_refs, 1); 615 } 616 617 /* 618 * The ve cache_drop is the final word for a ve. It gains an extra ref 619 * representing it being on the RB tree and not being in a deleted state. 620 * Removal from the RB tree and deletion manipulate this ref. The last 621 * drop will thus include full deletion of the ve in addition to the last 622 * cached user going away. 623 */ 624 static 625 void 626 vmspace_entry_cache_drop(struct vmspace_entry *ve) 627 { 628 if (atomic_fetchadd_int(&ve->cache_refs, -1) == 1) { 629 KKASSERT(ve->refs & VKE_REF_DELETED); 630 kfree(ve, M_VKERNEL); 631 } 632 } 633 634 /* 635 * Drop primary reference. The ve cannot be freed on the 1->0 transition. 636 * Instead, ve deletion interlocks the final kfree() via cache_refs. 637 */ 638 static 639 void 640 vmspace_entry_drop(struct vmspace_entry *ve) 641 { 642 atomic_fetchadd_int(&ve->refs, -1); 643 } 644 645 /* 646 * Locate the ve for (id), return the ve or NULL. If found this function 647 * will bump ve->refs which prevents the ve from being immediately destroyed 648 * (but it can still be removed). 649 * 650 * The cache can potentially contain a stale ve, check by testing ve->vmspace. 651 * 652 * The caller must hold vkp->token if excl is non-zero. 653 */ 654 static 655 struct vmspace_entry * 656 vkernel_find_vmspace(struct vkernel_proc *vkp, void *id, int excl) 657 { 658 struct vmspace_entry *ve; 659 struct vmspace_entry key; 660 struct vkernel_lwp *vklp; 661 struct lwp *lp = curthread->td_lwp; 662 663 /* 664 * Cache check. Since we already hold a ref on the cache entry 665 * the ve cannot be ripped out from under us while we cycle 666 * ve->refs. 667 */ 668 if ((vklp = lp->lwp_vkernel) != NULL) { 669 ve = vklp->ve_cache; 670 if (ve && ve->id == id) { 671 uint32_t n; 672 673 /* 674 * Bump active refs, check to see if the cache 675 * entry is stale. If not, we are good. 676 */ 677 n = atomic_fetchadd_int(&ve->refs, 1); 678 if ((n & VKE_REF_DELETED) == 0) { 679 KKASSERT(ve->vmspace); 680 return ve; 681 } 682 683 /* 684 * Cache is stale, clean it out and fall through 685 * to a normal search. 686 */ 687 vklp->ve_cache = NULL; 688 vmspace_entry_drop(ve); 689 vmspace_entry_cache_drop(ve); 690 } 691 } 692 693 /* 694 * Normal search protected by vkp->token. No new ve's can be marked 695 * DELETED while we hold the token so we are safe. 696 */ 697 if (excl == 0) 698 lwkt_gettoken_shared(&vkp->token); 699 key.id = id; 700 ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key); 701 if (ve) { 702 if (atomic_fetchadd_int(&ve->refs, 1) & VKE_REF_DELETED) { 703 vmspace_entry_drop(ve); 704 ve = NULL; 705 } 706 } 707 if (excl == 0) 708 lwkt_reltoken(&vkp->token); 709 return (ve); 710 } 711 712 /* 713 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing 714 * a vkernel process. 715 * 716 * No requirements. 717 */ 718 void 719 vkernel_inherit(struct proc *p1, struct proc *p2) 720 { 721 struct vkernel_proc *vkp; 722 723 vkp = p1->p_vkernel; 724 KKASSERT(vkp->refs > 0); 725 atomic_add_int(&vkp->refs, 1); 726 p2->p_vkernel = vkp; 727 } 728 729 /* 730 * No requirements. 731 */ 732 void 733 vkernel_exit(struct proc *p) 734 { 735 struct vkernel_proc *vkp; 736 struct lwp *lp; 737 738 vkp = p->p_vkernel; 739 740 /* 741 * Restore the original VM context if we are killed while running 742 * a different one. 743 * 744 * This isn't supposed to happen. What is supposed to happen is 745 * that the process should enter vkernel_trap() before the handling 746 * the signal. 747 */ 748 RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) { 749 vkernel_lwp_exit(lp); 750 } 751 752 /* 753 * Dereference the common area 754 */ 755 p->p_vkernel = NULL; 756 KKASSERT(vkp->refs > 0); 757 758 if (atomic_fetchadd_int(&vkp->refs, -1) == 1) { 759 lwkt_gettoken(&vkp->token); 760 RB_SCAN(vmspace_rb_tree, &vkp->root, NULL, 761 rb_vmspace_delete, vkp); 762 lwkt_reltoken(&vkp->token); 763 kfree(vkp, M_VKERNEL); 764 } 765 } 766 767 /* 768 * No requirements. 769 */ 770 void 771 vkernel_lwp_exit(struct lwp *lp) 772 { 773 struct vkernel_lwp *vklp; 774 struct vmspace_entry *ve; 775 776 if ((vklp = lp->lwp_vkernel) != NULL) { 777 if (lp->lwp_thread->td_vmm == NULL) { 778 /* 779 * vkernel thread 780 */ 781 if ((ve = vklp->ve) != NULL) { 782 kprintf("Warning, pid %d killed with " 783 "active VC!\n", lp->lwp_proc->p_pid); 784 pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace); 785 vklp->ve = NULL; 786 KKASSERT(ve->refs > 0); 787 vmspace_entry_drop(ve); 788 } 789 } else { 790 /* 791 * guest thread 792 */ 793 vklp->ve = NULL; 794 } 795 if ((ve = vklp->ve_cache) != NULL) { 796 vklp->ve_cache = NULL; 797 vmspace_entry_cache_drop(ve); 798 } 799 800 lp->lwp_vkernel = NULL; 801 kfree(vklp, M_VKERNEL); 802 } 803 } 804 805 /* 806 * A VM space under virtual kernel control trapped out or made a system call 807 * or otherwise needs to return control to the virtual kernel context. 808 * 809 * No requirements. 810 */ 811 void 812 vkernel_trap(struct lwp *lp, struct trapframe *frame) 813 { 814 struct proc *p = lp->lwp_proc; 815 struct vmspace_entry *ve; 816 struct vkernel_lwp *vklp; 817 int error; 818 819 /* 820 * Which vmspace entry was running? 821 */ 822 vklp = lp->lwp_vkernel; 823 KKASSERT(vklp); 824 825 /* If it's a VMM thread just set the vkernel CR3 back */ 826 if (curthread->td_vmm == NULL) { 827 ve = vklp->ve; 828 KKASSERT(ve != NULL); 829 830 /* 831 * Switch the LWP vmspace back to the virtual kernel's VM space. 832 */ 833 vklp->ve = NULL; 834 pmap_setlwpvm(lp, p->p_vmspace); 835 KKASSERT(ve->refs > 0); 836 vmspace_entry_drop(ve); 837 /* ve is invalid once we kill our ref */ 838 } else { 839 vklp->ve = NULL; 840 vmm_vm_set_guest_cr3(p->p_vkernel->vkernel_cr3); 841 } 842 843 /* 844 * Copy the emulated process frame to the virtual kernel process. 845 * The emulated process cannot change TLS descriptors so don't 846 * bother saving them, we already have a copy. 847 * 848 * Restore the virtual kernel's saved context so the virtual kernel 849 * process can resume. 850 */ 851 error = copyout(frame, vklp->user_trapframe, sizeof(*frame)); 852 bcopy(&vklp->save_trapframe, frame, sizeof(*frame)); 853 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 854 sizeof(vklp->save_vextframe.vx_tls)); 855 set_user_TLS(); 856 cpu_vkernel_trap(frame, error); 857 } 858