1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/systm.h> 40 #include <sys/sysmsg.h> 41 #include <sys/kern_syscall.h> 42 #include <sys/mman.h> 43 #include <sys/thread.h> 44 #include <sys/proc.h> 45 #include <sys/malloc.h> 46 #include <sys/sysctl.h> 47 #include <sys/vkernel.h> 48 #include <sys/vmspace.h> 49 50 #include <vm/vm_extern.h> 51 #include <vm/pmap.h> 52 53 #include <machine/vmparam.h> 54 #include <machine/vmm.h> 55 56 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp, 57 void *id, int havetoken); 58 static int vmspace_entry_delete(struct vmspace_entry *ve, 59 struct vkernel_proc *vkp, int refs); 60 static void vmspace_entry_cache_ref(struct vmspace_entry *ve); 61 static void vmspace_entry_cache_drop(struct vmspace_entry *ve); 62 static void vmspace_entry_drop(struct vmspace_entry *ve); 63 64 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); 65 66 /* 67 * vmspace_create (void *id, int type, void *data) 68 * 69 * Create a VMSPACE under the control of the caller with the specified id. 70 * An id of NULL cannot be used. The type and data fields must currently 71 * be 0. 72 * 73 * The vmspace starts out completely empty. Memory may be mapped into the 74 * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled 75 * with vmspace_mcontrol(). 76 * 77 * No requirements. 78 */ 79 int 80 sys_vmspace_create(struct sysmsg *sysmsg, 81 const struct vmspace_create_args *uap) 82 { 83 struct vmspace_entry *ve; 84 struct vkernel_proc *vkp; 85 struct proc *p = curproc; 86 int error; 87 88 if (vkernel_enable == 0) 89 return (EOPNOTSUPP); 90 91 /* 92 * Create a virtual kernel side-structure for the process if one 93 * does not exist. 94 * 95 * Implement a simple resolution for SMP races. 96 */ 97 if ((vkp = p->p_vkernel) == NULL) { 98 vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO); 99 lwkt_gettoken(&p->p_token); 100 if (p->p_vkernel == NULL) { 101 vkp->refs = 1; 102 lwkt_token_init(&vkp->token, "vkernel"); 103 RB_INIT(&vkp->root); 104 p->p_vkernel = vkp; 105 } else { 106 kfree(vkp, M_VKERNEL); 107 vkp = p->p_vkernel; 108 } 109 lwkt_reltoken(&p->p_token); 110 } 111 112 if (curthread->td_vmm) 113 return 0; 114 115 /* 116 * Create a new VMSPACE, disallow conflicting ids 117 */ 118 ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); 119 ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 120 ve->id = uap->id; 121 ve->refs = 0; /* active refs (none) */ 122 ve->cache_refs = 1; /* on-tree, not deleted (prevent kfree) */ 123 pmap_pinit2(vmspace_pmap(ve->vmspace)); 124 125 lwkt_gettoken(&vkp->token); 126 if (RB_INSERT(vmspace_rb_tree, &vkp->root, ve)) { 127 vmspace_rel(ve->vmspace); 128 ve->vmspace = NULL; /* safety */ 129 kfree(ve, M_VKERNEL); 130 error = EEXIST; 131 } else { 132 error = 0; 133 } 134 lwkt_reltoken(&vkp->token); 135 136 return (error); 137 } 138 139 /* 140 * Destroy a VMSPACE given its identifier. 141 * 142 * No requirements. 143 */ 144 int 145 sys_vmspace_destroy(struct sysmsg *sysmsg, 146 const struct vmspace_destroy_args *uap) 147 { 148 struct vkernel_proc *vkp; 149 struct vmspace_entry *ve; 150 int error; 151 152 if ((vkp = curproc->p_vkernel) == NULL) 153 return EINVAL; 154 155 /* 156 * vkp->token protects the deletion against a new RB tree search. 157 */ 158 lwkt_gettoken(&vkp->token); 159 error = ENOENT; 160 if ((ve = vkernel_find_vmspace(vkp, uap->id, 1)) != NULL) { 161 error = vmspace_entry_delete(ve, vkp, 1); 162 if (error == 0) 163 vmspace_entry_cache_drop(ve); 164 } 165 lwkt_reltoken(&vkp->token); 166 167 return(error); 168 } 169 170 /* 171 * vmspace_ctl (void *id, int cmd, struct trapframe *tframe, 172 * struct vextframe *vframe); 173 * 174 * Transfer control to a VMSPACE. Control is returned after the specified 175 * number of microseconds or if a page fault, signal, trap, or system call 176 * occurs. The context is updated as appropriate. 177 * 178 * No requirements. 179 */ 180 int 181 sys_vmspace_ctl(struct sysmsg *sysmsg, 182 const struct vmspace_ctl_args *uap) 183 { 184 struct vmspace_ctl_args ua = *uap; 185 struct vkernel_proc *vkp; 186 struct vkernel_lwp *vklp; 187 struct vmspace_entry *ve = NULL; 188 struct lwp *lp; 189 struct proc *p; 190 int framesz; 191 int error; 192 193 lp = curthread->td_lwp; 194 p = lp->lwp_proc; 195 196 if ((vkp = p->p_vkernel) == NULL) 197 return (EINVAL); 198 199 /* 200 * ve only matters when VMM is not used. 201 * 202 * NOTE: We have to copy *uap into ua because uap is an aliased 203 * pointer into the sysframe, which we are replacing. 204 */ 205 if (curthread->td_vmm == NULL) { 206 if ((ve = vkernel_find_vmspace(vkp, ua.id, 0)) == NULL) { 207 error = ENOENT; 208 goto done; 209 } 210 } 211 212 switch(ua.cmd) { 213 case VMSPACE_CTL_RUN: 214 /* 215 * Save the caller's register context, swap VM spaces, and 216 * install the passed register context. Return with 217 * EJUSTRETURN so the syscall code doesn't adjust the context. 218 */ 219 framesz = sizeof(struct trapframe); 220 if ((vklp = lp->lwp_vkernel) == NULL) { 221 vklp = kmalloc(sizeof(*vklp), M_VKERNEL, 222 M_WAITOK|M_ZERO); 223 lp->lwp_vkernel = vklp; 224 } 225 if (ve && vklp->ve_cache != ve) { 226 vmspace_entry_cache_ref(ve); 227 if (vklp->ve_cache) 228 vmspace_entry_cache_drop(vklp->ve_cache); 229 vklp->ve_cache = ve; 230 } 231 vklp->user_trapframe = ua.tframe; 232 vklp->user_vextframe = ua.vframe; 233 bcopy(sysmsg->sysmsg_frame, &vklp->save_trapframe, framesz); 234 bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls, 235 sizeof(vklp->save_vextframe.vx_tls)); 236 error = copyin(ua.tframe, sysmsg->sysmsg_frame, framesz); 237 if (error == 0) { 238 error = copyin(&ua.vframe->vx_tls, 239 &curthread->td_tls, 240 sizeof(struct savetls)); 241 } 242 if (error == 0) 243 error = cpu_sanitize_frame(sysmsg->sysmsg_frame); 244 if (error == 0) 245 error = cpu_sanitize_tls(&curthread->td_tls); 246 if (error) { 247 bcopy(&vklp->save_trapframe, sysmsg->sysmsg_frame, 248 framesz); 249 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 250 sizeof(vklp->save_vextframe.vx_tls)); 251 set_user_TLS(); 252 } else { 253 /* 254 * If it's a VMM thread just set the CR3. We also set 255 * the vklp->ve to a key to be able to distinguish 256 * when a vkernel user process runs and when not 257 * (when it's NULL) 258 */ 259 if (curthread->td_vmm == NULL) { 260 vklp->ve = ve; 261 atomic_add_int(&ve->refs, 1); 262 pmap_setlwpvm(lp, ve->vmspace); 263 } else { 264 vklp->ve = ua.id; 265 vmm_vm_set_guest_cr3((register_t)ua.id); 266 } 267 set_user_TLS(); 268 set_vkernel_fp(sysmsg->sysmsg_frame); 269 error = EJUSTRETURN; 270 } 271 break; 272 default: 273 error = EOPNOTSUPP; 274 break; 275 } 276 done: 277 if (ve) 278 vmspace_entry_drop(ve); 279 280 return(error); 281 } 282 283 /* 284 * vmspace_mmap(id, addr, len, prot, flags, fd, offset) 285 * 286 * map memory within a VMSPACE. This function is just like a normal mmap() 287 * but operates on the vmspace's memory map. Most callers use this to create 288 * a MAP_VPAGETABLE mapping. 289 * 290 * No requirements. 291 */ 292 int 293 sys_vmspace_mmap(struct sysmsg *sysmsg, 294 const struct vmspace_mmap_args *uap) 295 { 296 struct vkernel_proc *vkp; 297 struct vmspace_entry *ve; 298 int error; 299 300 if ((vkp = curproc->p_vkernel) == NULL) { 301 error = EINVAL; 302 goto done2; 303 } 304 305 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 306 error = ENOENT; 307 goto done2; 308 } 309 310 error = kern_mmap(ve->vmspace, uap->addr, uap->len, 311 uap->prot, uap->flags, 312 uap->fd, uap->offset, &sysmsg->sysmsg_resultp); 313 314 vmspace_entry_drop(ve); 315 done2: 316 return (error); 317 } 318 319 /* 320 * vmspace_munmap(id, addr, len) 321 * 322 * unmap memory within a VMSPACE. 323 * 324 * No requirements. 325 */ 326 int 327 sys_vmspace_munmap(struct sysmsg *sysmsg, 328 const struct vmspace_munmap_args *uap) 329 { 330 struct vkernel_proc *vkp; 331 struct vmspace_entry *ve; 332 vm_offset_t addr; 333 vm_offset_t tmpaddr; 334 vm_size_t size, pageoff; 335 vm_map_t map; 336 int error; 337 338 if ((vkp = curproc->p_vkernel) == NULL) { 339 error = EINVAL; 340 goto done2; 341 } 342 343 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 344 error = ENOENT; 345 goto done2; 346 } 347 348 /* 349 * NOTE: kern_munmap() can block so we need to temporarily 350 * ref ve->refs. 351 */ 352 353 /* 354 * Copied from sys_munmap() 355 */ 356 addr = (vm_offset_t)uap->addr; 357 size = uap->len; 358 359 pageoff = (addr & PAGE_MASK); 360 addr -= pageoff; 361 size += pageoff; 362 size = (vm_size_t)round_page(size); 363 if (size < uap->len) { /* wrap */ 364 error = EINVAL; 365 goto done1; 366 } 367 tmpaddr = addr + size; /* workaround gcc4 opt */ 368 if (tmpaddr < addr) { /* wrap */ 369 error = EINVAL; 370 goto done1; 371 } 372 if (size == 0) { 373 error = 0; 374 goto done1; 375 } 376 377 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 378 error = EINVAL; 379 goto done1; 380 } 381 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) { 382 error = EINVAL; 383 goto done1; 384 } 385 map = &ve->vmspace->vm_map; 386 if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE, FALSE)) { 387 error = EINVAL; 388 goto done1; 389 } 390 vm_map_remove(map, addr, addr + size); 391 error = 0; 392 done1: 393 vmspace_entry_drop(ve); 394 done2: 395 return (error); 396 } 397 398 /* 399 * vmspace_pread(id, buf, nbyte, flags, offset) 400 * 401 * Read data from a vmspace. The number of bytes read is returned or 402 * -1 if an unrecoverable error occured. If the number of bytes read is 403 * less then the request size, a page fault occured in the VMSPACE which 404 * the caller must resolve in order to proceed. 405 * 406 * (not implemented yet) 407 * No requirements. 408 */ 409 int 410 sys_vmspace_pread(struct sysmsg *sysmsg, 411 const struct vmspace_pread_args *uap) 412 { 413 struct vkernel_proc *vkp; 414 struct vmspace_entry *ve; 415 int error; 416 417 if ((vkp = curproc->p_vkernel) == NULL) { 418 error = EINVAL; 419 goto done3; 420 } 421 422 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 423 error = ENOENT; 424 goto done3; 425 } 426 vmspace_entry_drop(ve); 427 error = EINVAL; 428 done3: 429 return (error); 430 } 431 432 /* 433 * vmspace_pwrite(id, buf, nbyte, flags, offset) 434 * 435 * Write data to a vmspace. The number of bytes written is returned or 436 * -1 if an unrecoverable error occured. If the number of bytes written is 437 * less then the request size, a page fault occured in the VMSPACE which 438 * the caller must resolve in order to proceed. 439 * 440 * (not implemented yet) 441 * No requirements. 442 */ 443 int 444 sys_vmspace_pwrite(struct sysmsg *sysmsg, 445 const struct vmspace_pwrite_args *uap) 446 { 447 struct vkernel_proc *vkp; 448 struct vmspace_entry *ve; 449 int error; 450 451 if ((vkp = curproc->p_vkernel) == NULL) { 452 error = EINVAL; 453 goto done3; 454 } 455 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 456 error = ENOENT; 457 goto done3; 458 } 459 vmspace_entry_drop(ve); 460 error = EINVAL; 461 done3: 462 return (error); 463 } 464 465 /* 466 * vmspace_mcontrol(id, addr, len, behav, value) 467 * 468 * madvise/mcontrol support for a vmspace. 469 * 470 * No requirements. 471 */ 472 int 473 sys_vmspace_mcontrol(struct sysmsg *sysmsg, 474 const struct vmspace_mcontrol_args *uap) 475 { 476 struct vkernel_proc *vkp; 477 struct vmspace_entry *ve; 478 struct lwp *lp; 479 vm_offset_t start, end; 480 vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; 481 int error; 482 483 lp = curthread->td_lwp; 484 if ((vkp = curproc->p_vkernel) == NULL) { 485 error = EINVAL; 486 goto done3; 487 } 488 489 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 490 error = ENOENT; 491 goto done3; 492 } 493 494 /* 495 * This code is basically copied from sys_mcontrol() 496 */ 497 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) { 498 error = EINVAL; 499 goto done1; 500 } 501 502 if (tmpaddr < (vm_offset_t)uap->addr) { 503 error = EINVAL; 504 goto done1; 505 } 506 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 507 error = EINVAL; 508 goto done1; 509 } 510 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) { 511 error = EINVAL; 512 goto done1; 513 } 514 515 start = trunc_page((vm_offset_t) uap->addr); 516 end = round_page(tmpaddr); 517 518 error = vm_map_madvise(&ve->vmspace->vm_map, start, end, 519 uap->behav, uap->value); 520 done1: 521 vmspace_entry_drop(ve); 522 done3: 523 return (error); 524 } 525 526 /* 527 * Red black tree functions 528 */ 529 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *); 530 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); 531 532 /* 533 * a->start is address, and the only field has to be initialized. 534 * The caller must hold vkp->token. 535 * 536 * The caller must hold vkp->token. 537 */ 538 static int 539 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b) 540 { 541 if ((char *)a->id < (char *)b->id) 542 return(-1); 543 else if ((char *)a->id > (char *)b->id) 544 return(1); 545 return(0); 546 } 547 548 /* 549 * The caller must hold vkp->token. 550 */ 551 static 552 int 553 rb_vmspace_delete(struct vmspace_entry *ve, void *data) 554 { 555 struct vkernel_proc *vkp = data; 556 557 if (vmspace_entry_delete(ve, vkp, 0) == 0) 558 vmspace_entry_cache_drop(ve); 559 else 560 panic("rb_vmspace_delete: invalid refs %d", ve->refs); 561 return(0); 562 } 563 564 /* 565 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean 566 * up the pmap, the vm_map, then destroy the vmspace. We gain control of 567 * the associated cache_refs ref, which the caller will drop for us. 568 * 569 * The ve must not have any active references other than those from the 570 * caller. If it does, EBUSY is returned. The ve may still maintain 571 * any number of cache references which will drop as the related LWPs 572 * execute vmspace operations or exit. 573 * 574 * 0 is returned on success, EBUSY on failure. On success the caller must 575 * drop the last cache_refs. We have dropped the callers active refs. 576 * 577 * The caller must hold vkp->token. 578 */ 579 static 580 int 581 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp, 582 int refs) 583 { 584 /* 585 * Interlocked by vkp->token. 586 * 587 * Drop the callers refs and set VKE_REF_DELETED atomically, if 588 * the remaining refs match exactly. Dropping refs and setting 589 * the DELETED flag atomically protects other threads from trying 590 * to use the ve. 591 * 592 * The caller now owns the final cache_ref that was previously 593 * associated with the live state of the ve. 594 */ 595 if (atomic_cmpset_int(&ve->refs, refs, VKE_REF_DELETED) == 0) { 596 KKASSERT(ve->refs >= refs); 597 return EBUSY; 598 } 599 RB_REMOVE(vmspace_rb_tree, &vkp->root, ve); 600 601 pmap_remove_pages(vmspace_pmap(ve->vmspace), 602 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 603 vm_map_remove(&ve->vmspace->vm_map, 604 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 605 vmspace_rel(ve->vmspace); 606 ve->vmspace = NULL; /* safety */ 607 608 return 0; 609 } 610 611 /* 612 * Ref a ve for cache purposes 613 */ 614 static 615 void 616 vmspace_entry_cache_ref(struct vmspace_entry *ve) 617 { 618 atomic_add_int(&ve->cache_refs, 1); 619 } 620 621 /* 622 * The ve cache_drop is the final word for a ve. It gains an extra ref 623 * representing it being on the RB tree and not being in a deleted state. 624 * Removal from the RB tree and deletion manipulate this ref. The last 625 * drop will thus include full deletion of the ve in addition to the last 626 * cached user going away. 627 */ 628 static 629 void 630 vmspace_entry_cache_drop(struct vmspace_entry *ve) 631 { 632 if (atomic_fetchadd_int(&ve->cache_refs, -1) == 1) { 633 KKASSERT(ve->refs & VKE_REF_DELETED); 634 kfree(ve, M_VKERNEL); 635 } 636 } 637 638 /* 639 * Drop primary reference. The ve cannot be freed on the 1->0 transition. 640 * Instead, ve deletion interlocks the final kfree() via cache_refs. 641 */ 642 static 643 void 644 vmspace_entry_drop(struct vmspace_entry *ve) 645 { 646 atomic_fetchadd_int(&ve->refs, -1); 647 } 648 649 /* 650 * Locate the ve for (id), return the ve or NULL. If found this function 651 * will bump ve->refs which prevents the ve from being immediately destroyed 652 * (but it can still be removed). 653 * 654 * The cache can potentially contain a stale ve, check by testing ve->vmspace. 655 * 656 * The caller must hold vkp->token if excl is non-zero. 657 */ 658 static 659 struct vmspace_entry * 660 vkernel_find_vmspace(struct vkernel_proc *vkp, void *id, int excl) 661 { 662 struct vmspace_entry *ve; 663 struct vmspace_entry key; 664 struct vkernel_lwp *vklp; 665 struct lwp *lp = curthread->td_lwp; 666 667 /* 668 * Cache check. Since we already hold a ref on the cache entry 669 * the ve cannot be ripped out from under us while we cycle 670 * ve->refs. 671 */ 672 if ((vklp = lp->lwp_vkernel) != NULL) { 673 ve = vklp->ve_cache; 674 if (ve && ve->id == id) { 675 uint32_t n; 676 677 /* 678 * Bump active refs, check to see if the cache 679 * entry is stale. If not, we are good. 680 */ 681 n = atomic_fetchadd_int(&ve->refs, 1); 682 if ((n & VKE_REF_DELETED) == 0) { 683 KKASSERT(ve->vmspace); 684 return ve; 685 } 686 687 /* 688 * Cache is stale, clean it out and fall through 689 * to a normal search. 690 */ 691 vklp->ve_cache = NULL; 692 vmspace_entry_drop(ve); 693 vmspace_entry_cache_drop(ve); 694 } 695 } 696 697 /* 698 * Normal search protected by vkp->token. No new ve's can be marked 699 * DELETED while we hold the token so we are safe. 700 */ 701 if (excl == 0) 702 lwkt_gettoken_shared(&vkp->token); 703 key.id = id; 704 ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key); 705 if (ve) { 706 if (atomic_fetchadd_int(&ve->refs, 1) & VKE_REF_DELETED) { 707 vmspace_entry_drop(ve); 708 ve = NULL; 709 } 710 } 711 if (excl == 0) 712 lwkt_reltoken(&vkp->token); 713 return (ve); 714 } 715 716 /* 717 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing 718 * a vkernel process. 719 * 720 * No requirements. 721 */ 722 void 723 vkernel_inherit(struct proc *p1, struct proc *p2) 724 { 725 struct vkernel_proc *vkp; 726 727 vkp = p1->p_vkernel; 728 KKASSERT(vkp->refs > 0); 729 atomic_add_int(&vkp->refs, 1); 730 p2->p_vkernel = vkp; 731 } 732 733 /* 734 * No requirements. 735 */ 736 void 737 vkernel_exit(struct proc *p) 738 { 739 struct vkernel_proc *vkp; 740 struct lwp *lp; 741 742 vkp = p->p_vkernel; 743 744 /* 745 * Restore the original VM context if we are killed while running 746 * a different one. 747 * 748 * This isn't supposed to happen. What is supposed to happen is 749 * that the process should enter vkernel_trap() before the handling 750 * the signal. 751 */ 752 RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) { 753 vkernel_lwp_exit(lp); 754 } 755 756 /* 757 * Dereference the common area 758 */ 759 p->p_vkernel = NULL; 760 KKASSERT(vkp->refs > 0); 761 762 if (atomic_fetchadd_int(&vkp->refs, -1) == 1) { 763 lwkt_gettoken(&vkp->token); 764 RB_SCAN(vmspace_rb_tree, &vkp->root, NULL, 765 rb_vmspace_delete, vkp); 766 lwkt_reltoken(&vkp->token); 767 kfree(vkp, M_VKERNEL); 768 } 769 } 770 771 /* 772 * No requirements. 773 */ 774 void 775 vkernel_lwp_exit(struct lwp *lp) 776 { 777 struct vkernel_lwp *vklp; 778 struct vmspace_entry *ve; 779 780 if ((vklp = lp->lwp_vkernel) != NULL) { 781 if (lp->lwp_thread->td_vmm == NULL) { 782 /* 783 * vkernel thread 784 */ 785 if ((ve = vklp->ve) != NULL) { 786 kprintf("Warning, pid %d killed with " 787 "active VC!\n", lp->lwp_proc->p_pid); 788 pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace); 789 vklp->ve = NULL; 790 KKASSERT(ve->refs > 0); 791 vmspace_entry_drop(ve); 792 } 793 } else { 794 /* 795 * guest thread 796 */ 797 vklp->ve = NULL; 798 } 799 if ((ve = vklp->ve_cache) != NULL) { 800 vklp->ve_cache = NULL; 801 vmspace_entry_cache_drop(ve); 802 } 803 804 lp->lwp_vkernel = NULL; 805 kfree(vklp, M_VKERNEL); 806 } 807 } 808 809 /* 810 * A VM space under virtual kernel control trapped out or made a system call 811 * or otherwise needs to return control to the virtual kernel context. 812 * 813 * No requirements. 814 */ 815 void 816 vkernel_trap(struct lwp *lp, struct trapframe *frame) 817 { 818 struct proc *p = lp->lwp_proc; 819 struct vmspace_entry *ve; 820 struct vkernel_lwp *vklp; 821 int error; 822 823 /* 824 * Which vmspace entry was running? 825 */ 826 vklp = lp->lwp_vkernel; 827 KKASSERT(vklp); 828 829 /* If it's a VMM thread just set the vkernel CR3 back */ 830 if (curthread->td_vmm == NULL) { 831 ve = vklp->ve; 832 KKASSERT(ve != NULL); 833 834 /* 835 * Switch the LWP vmspace back to the virtual kernel's VM space. 836 */ 837 vklp->ve = NULL; 838 pmap_setlwpvm(lp, p->p_vmspace); 839 KKASSERT(ve->refs > 0); 840 vmspace_entry_drop(ve); 841 /* ve is invalid once we kill our ref */ 842 } else { 843 vklp->ve = NULL; 844 vmm_vm_set_guest_cr3(p->p_vkernel->vkernel_cr3); 845 } 846 847 /* 848 * Copy the emulated process frame to the virtual kernel process. 849 * The emulated process cannot change TLS descriptors so don't 850 * bother saving them, we already have a copy. 851 * 852 * Restore the virtual kernel's saved context so the virtual kernel 853 * process can resume. 854 */ 855 error = copyout(frame, vklp->user_trapframe, sizeof(*frame)); 856 bcopy(&vklp->save_trapframe, frame, sizeof(*frame)); 857 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 858 sizeof(vklp->save_vextframe.vx_tls)); 859 set_user_TLS(); 860 cpu_vkernel_trap(frame, error); 861 } 862