1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/systm.h> 40 #include <sys/sysmsg.h> 41 #include <sys/kern_syscall.h> 42 #include <sys/mman.h> 43 #include <sys/thread.h> 44 #include <sys/proc.h> 45 #include <sys/malloc.h> 46 #include <sys/sysctl.h> 47 #include <sys/vkernel.h> 48 #include <sys/vmspace.h> 49 50 #include <vm/vm_extern.h> 51 #include <vm/pmap.h> 52 53 #include <machine/vmparam.h> 54 #include <machine/vmm.h> 55 56 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp, 57 void *id, int havetoken); 58 static int vmspace_entry_delete(struct vmspace_entry *ve, 59 struct vkernel_proc *vkp, int refs); 60 static void vmspace_entry_cache_ref(struct vmspace_entry *ve); 61 static void vmspace_entry_cache_drop(struct vmspace_entry *ve); 62 static void vmspace_entry_drop(struct vmspace_entry *ve); 63 64 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); 65 66 /* 67 * vmspace_create (void *id, int type, void *data) 68 * 69 * Create a VMSPACE under the control of the caller with the specified id. 70 * An id of NULL cannot be used. The type and data fields must currently 71 * be 0. 72 * 73 * The vmspace starts out completely empty. Memory may be mapped into the 74 * VMSPACE with vmspace_mmap(). 75 * 76 * No requirements. 77 */ 78 int 79 sys_vmspace_create(struct sysmsg *sysmsg, 80 const struct vmspace_create_args *uap) 81 { 82 struct vmspace_entry *ve; 83 struct vkernel_proc *vkp; 84 struct proc *p = curproc; 85 int error; 86 87 if (vkernel_enable == 0) 88 return (EOPNOTSUPP); 89 90 /* 91 * Create a virtual kernel side-structure for the process if one 92 * does not exist. 93 * 94 * Implement a simple resolution for SMP races. 95 */ 96 if ((vkp = p->p_vkernel) == NULL) { 97 vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO); 98 lwkt_gettoken(&p->p_token); 99 if (p->p_vkernel == NULL) { 100 vkp->refs = 1; 101 lwkt_token_init(&vkp->token, "vkernel"); 102 RB_INIT(&vkp->root); 103 p->p_vkernel = vkp; 104 } else { 105 kfree(vkp, M_VKERNEL); 106 vkp = p->p_vkernel; 107 } 108 lwkt_reltoken(&p->p_token); 109 } 110 111 if (curthread->td_vmm) 112 return 0; 113 114 /* 115 * Create a new VMSPACE, disallow conflicting ids 116 */ 117 ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); 118 ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 119 ve->id = uap->id; 120 ve->refs = 0; /* active refs (none) */ 121 ve->cache_refs = 1; /* on-tree, not deleted (prevent kfree) */ 122 pmap_pinit2(vmspace_pmap(ve->vmspace)); 123 124 lwkt_gettoken(&vkp->token); 125 if (RB_INSERT(vmspace_rb_tree, &vkp->root, ve)) { 126 vmspace_rel(ve->vmspace); 127 ve->vmspace = NULL; /* safety */ 128 kfree(ve, M_VKERNEL); 129 error = EEXIST; 130 } else { 131 error = 0; 132 } 133 lwkt_reltoken(&vkp->token); 134 135 return (error); 136 } 137 138 /* 139 * Destroy a VMSPACE given its identifier. 140 * 141 * No requirements. 142 */ 143 int 144 sys_vmspace_destroy(struct sysmsg *sysmsg, 145 const struct vmspace_destroy_args *uap) 146 { 147 struct vkernel_proc *vkp; 148 struct vmspace_entry *ve; 149 int error; 150 151 if ((vkp = curproc->p_vkernel) == NULL) 152 return EINVAL; 153 154 /* 155 * vkp->token protects the deletion against a new RB tree search. 156 */ 157 lwkt_gettoken(&vkp->token); 158 error = ENOENT; 159 if ((ve = vkernel_find_vmspace(vkp, uap->id, 1)) != NULL) { 160 error = vmspace_entry_delete(ve, vkp, 1); 161 if (error == 0) 162 vmspace_entry_cache_drop(ve); 163 } 164 lwkt_reltoken(&vkp->token); 165 166 return(error); 167 } 168 169 /* 170 * vmspace_ctl (void *id, int cmd, struct trapframe *tframe, 171 * struct vextframe *vframe); 172 * 173 * Transfer control to a VMSPACE. Control is returned after the specified 174 * number of microseconds or if a page fault, signal, trap, or system call 175 * occurs. The context is updated as appropriate. 176 * 177 * No requirements. 178 */ 179 int 180 sys_vmspace_ctl(struct sysmsg *sysmsg, 181 const struct vmspace_ctl_args *uap) 182 { 183 struct vmspace_ctl_args ua = *uap; 184 struct vkernel_proc *vkp; 185 struct vkernel_lwp *vklp; 186 struct vmspace_entry *ve = NULL; 187 struct lwp *lp; 188 struct proc *p; 189 int framesz; 190 int error; 191 192 lp = curthread->td_lwp; 193 p = lp->lwp_proc; 194 195 if ((vkp = p->p_vkernel) == NULL) 196 return (EINVAL); 197 198 /* 199 * ve only matters when VMM is not used. 200 * 201 * NOTE: We have to copy *uap into ua because uap is an aliased 202 * pointer into the sysframe, which we are replacing. 203 */ 204 if (curthread->td_vmm == NULL) { 205 if ((ve = vkernel_find_vmspace(vkp, ua.id, 0)) == NULL) { 206 error = ENOENT; 207 goto done; 208 } 209 } 210 211 switch(ua.cmd) { 212 case VMSPACE_CTL_RUN: 213 /* 214 * Save the caller's register context, swap VM spaces, and 215 * install the passed register context. Return with 216 * EJUSTRETURN so the syscall code doesn't adjust the context. 217 */ 218 framesz = sizeof(struct trapframe); 219 if ((vklp = lp->lwp_vkernel) == NULL) { 220 vklp = kmalloc(sizeof(*vklp), M_VKERNEL, 221 M_WAITOK|M_ZERO); 222 lp->lwp_vkernel = vklp; 223 } 224 if (ve && vklp->ve_cache != ve) { 225 vmspace_entry_cache_ref(ve); 226 if (vklp->ve_cache) 227 vmspace_entry_cache_drop(vklp->ve_cache); 228 vklp->ve_cache = ve; 229 } 230 vklp->user_trapframe = ua.tframe; 231 vklp->user_vextframe = ua.vframe; 232 bcopy(sysmsg->sysmsg_frame, &vklp->save_trapframe, framesz); 233 bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls, 234 sizeof(vklp->save_vextframe.vx_tls)); 235 error = copyin(ua.tframe, sysmsg->sysmsg_frame, framesz); 236 if (error == 0) { 237 error = copyin(&ua.vframe->vx_tls, 238 &curthread->td_tls, 239 sizeof(struct savetls)); 240 } 241 if (error == 0) 242 error = cpu_sanitize_frame(sysmsg->sysmsg_frame); 243 if (error == 0) 244 error = cpu_sanitize_tls(&curthread->td_tls); 245 if (error) { 246 bcopy(&vklp->save_trapframe, sysmsg->sysmsg_frame, 247 framesz); 248 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 249 sizeof(vklp->save_vextframe.vx_tls)); 250 set_user_TLS(); 251 } else { 252 /* 253 * If it's a VMM thread just set the CR3. We also set 254 * the vklp->ve to a key to be able to distinguish 255 * when a vkernel user process runs and when not 256 * (when it's NULL) 257 */ 258 if (curthread->td_vmm == NULL) { 259 vklp->ve = ve; 260 atomic_add_int(&ve->refs, 1); 261 pmap_setlwpvm(lp, ve->vmspace); 262 } else { 263 vklp->ve = ua.id; 264 vmm_vm_set_guest_cr3((register_t)ua.id); 265 } 266 set_user_TLS(); 267 set_vkernel_fp(sysmsg->sysmsg_frame); 268 error = EJUSTRETURN; 269 } 270 break; 271 default: 272 error = EOPNOTSUPP; 273 break; 274 } 275 done: 276 if (ve) 277 vmspace_entry_drop(ve); 278 279 return(error); 280 } 281 282 /* 283 * vmspace_mmap(id, addr, len, prot, flags, fd, offset) 284 * 285 * map memory within a VMSPACE. This function is just like a normal mmap() 286 * but operates on the vmspace's memory map. 287 * 288 * No requirements. 289 */ 290 int 291 sys_vmspace_mmap(struct sysmsg *sysmsg, 292 const struct vmspace_mmap_args *uap) 293 { 294 struct vkernel_proc *vkp; 295 struct vmspace_entry *ve; 296 int error; 297 298 if ((vkp = curproc->p_vkernel) == NULL) { 299 error = EINVAL; 300 goto done2; 301 } 302 303 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 304 error = ENOENT; 305 goto done2; 306 } 307 308 error = kern_mmap(ve->vmspace, uap->addr, uap->len, 309 uap->prot, uap->flags, 310 uap->fd, uap->offset, &sysmsg->sysmsg_resultp); 311 312 vmspace_entry_drop(ve); 313 done2: 314 return (error); 315 } 316 317 /* 318 * vmspace_munmap(id, addr, len) 319 * 320 * unmap memory within a VMSPACE. 321 * 322 * No requirements. 323 */ 324 int 325 sys_vmspace_munmap(struct sysmsg *sysmsg, 326 const struct vmspace_munmap_args *uap) 327 { 328 struct vkernel_proc *vkp; 329 struct vmspace_entry *ve; 330 vm_offset_t addr; 331 vm_offset_t tmpaddr; 332 vm_size_t size, pageoff; 333 vm_map_t map; 334 int error; 335 336 if ((vkp = curproc->p_vkernel) == NULL) { 337 error = EINVAL; 338 goto done2; 339 } 340 341 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 342 error = ENOENT; 343 goto done2; 344 } 345 346 /* 347 * NOTE: kern_munmap() can block so we need to temporarily 348 * ref ve->refs. 349 */ 350 351 /* 352 * Copied from sys_munmap() 353 */ 354 addr = (vm_offset_t)uap->addr; 355 size = uap->len; 356 357 pageoff = (addr & PAGE_MASK); 358 addr -= pageoff; 359 size += pageoff; 360 size = (vm_size_t)round_page(size); 361 if (size < uap->len) { /* wrap */ 362 error = EINVAL; 363 goto done1; 364 } 365 tmpaddr = addr + size; /* workaround gcc4 opt */ 366 if (tmpaddr < addr) { /* wrap */ 367 error = EINVAL; 368 goto done1; 369 } 370 if (size == 0) { 371 error = 0; 372 goto done1; 373 } 374 375 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 376 error = EINVAL; 377 goto done1; 378 } 379 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) { 380 error = EINVAL; 381 goto done1; 382 } 383 map = &ve->vmspace->vm_map; 384 if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE, FALSE)) { 385 error = EINVAL; 386 goto done1; 387 } 388 vm_map_remove(map, addr, addr + size); 389 error = 0; 390 done1: 391 vmspace_entry_drop(ve); 392 done2: 393 return (error); 394 } 395 396 /* 397 * vmspace_pread(id, buf, nbyte, flags, offset) 398 * 399 * Read data from a vmspace. The number of bytes read is returned or 400 * -1 if an unrecoverable error occured. If the number of bytes read is 401 * less then the request size, a page fault occured in the VMSPACE which 402 * the caller must resolve in order to proceed. 403 * 404 * (not implemented yet) 405 * No requirements. 406 */ 407 int 408 sys_vmspace_pread(struct sysmsg *sysmsg, 409 const struct vmspace_pread_args *uap) 410 { 411 struct vkernel_proc *vkp; 412 struct vmspace_entry *ve; 413 int error; 414 415 if ((vkp = curproc->p_vkernel) == NULL) { 416 error = EINVAL; 417 goto done3; 418 } 419 420 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 421 error = ENOENT; 422 goto done3; 423 } 424 vmspace_entry_drop(ve); 425 error = EINVAL; 426 done3: 427 return (error); 428 } 429 430 /* 431 * vmspace_pwrite(id, buf, nbyte, flags, offset) 432 * 433 * Write data to a vmspace. The number of bytes written is returned or 434 * -1 if an unrecoverable error occured. If the number of bytes written is 435 * less then the request size, a page fault occured in the VMSPACE which 436 * the caller must resolve in order to proceed. 437 * 438 * (not implemented yet) 439 * No requirements. 440 */ 441 int 442 sys_vmspace_pwrite(struct sysmsg *sysmsg, 443 const struct vmspace_pwrite_args *uap) 444 { 445 struct vkernel_proc *vkp; 446 struct vmspace_entry *ve; 447 int error; 448 449 if ((vkp = curproc->p_vkernel) == NULL) { 450 error = EINVAL; 451 goto done3; 452 } 453 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 454 error = ENOENT; 455 goto done3; 456 } 457 vmspace_entry_drop(ve); 458 error = EINVAL; 459 done3: 460 return (error); 461 } 462 463 /* 464 * vmspace_mcontrol(id, addr, len, behav, value) 465 * 466 * madvise/mcontrol support for a vmspace. 467 * 468 * No requirements. 469 */ 470 int 471 sys_vmspace_mcontrol(struct sysmsg *sysmsg, 472 const struct vmspace_mcontrol_args *uap) 473 { 474 struct vkernel_proc *vkp; 475 struct vmspace_entry *ve; 476 struct lwp *lp; 477 vm_offset_t start, end; 478 vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len; 479 int error; 480 481 lp = curthread->td_lwp; 482 if ((vkp = curproc->p_vkernel) == NULL) { 483 error = EINVAL; 484 goto done3; 485 } 486 487 if ((ve = vkernel_find_vmspace(vkp, uap->id, 0)) == NULL) { 488 error = ENOENT; 489 goto done3; 490 } 491 492 /* 493 * This code is basically copied from sys_mcontrol() 494 */ 495 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) { 496 error = EINVAL; 497 goto done1; 498 } 499 500 if (tmpaddr < (vm_offset_t)uap->addr) { 501 error = EINVAL; 502 goto done1; 503 } 504 if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) { 505 error = EINVAL; 506 goto done1; 507 } 508 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) { 509 error = EINVAL; 510 goto done1; 511 } 512 513 start = trunc_page((vm_offset_t) uap->addr); 514 end = round_page(tmpaddr); 515 516 error = vm_map_madvise(&ve->vmspace->vm_map, start, end, 517 uap->behav, uap->value); 518 done1: 519 vmspace_entry_drop(ve); 520 done3: 521 return (error); 522 } 523 524 /* 525 * Red black tree functions 526 */ 527 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *); 528 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); 529 530 /* 531 * a->start is address, and the only field has to be initialized. 532 * The caller must hold vkp->token. 533 * 534 * The caller must hold vkp->token. 535 */ 536 static int 537 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b) 538 { 539 if ((char *)a->id < (char *)b->id) 540 return(-1); 541 else if ((char *)a->id > (char *)b->id) 542 return(1); 543 return(0); 544 } 545 546 /* 547 * The caller must hold vkp->token. 548 */ 549 static 550 int 551 rb_vmspace_delete(struct vmspace_entry *ve, void *data) 552 { 553 struct vkernel_proc *vkp = data; 554 555 if (vmspace_entry_delete(ve, vkp, 0) == 0) 556 vmspace_entry_cache_drop(ve); 557 else 558 panic("rb_vmspace_delete: invalid refs %d", ve->refs); 559 return(0); 560 } 561 562 /* 563 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean 564 * up the pmap, the vm_map, then destroy the vmspace. We gain control of 565 * the associated cache_refs ref, which the caller will drop for us. 566 * 567 * The ve must not have any active references other than those from the 568 * caller. If it does, EBUSY is returned. The ve may still maintain 569 * any number of cache references which will drop as the related LWPs 570 * execute vmspace operations or exit. 571 * 572 * 0 is returned on success, EBUSY on failure. On success the caller must 573 * drop the last cache_refs. We have dropped the callers active refs. 574 * 575 * The caller must hold vkp->token. 576 */ 577 static 578 int 579 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp, 580 int refs) 581 { 582 /* 583 * Interlocked by vkp->token. 584 * 585 * Drop the callers refs and set VKE_REF_DELETED atomically, if 586 * the remaining refs match exactly. Dropping refs and setting 587 * the DELETED flag atomically protects other threads from trying 588 * to use the ve. 589 * 590 * The caller now owns the final cache_ref that was previously 591 * associated with the live state of the ve. 592 */ 593 if (atomic_cmpset_int(&ve->refs, refs, VKE_REF_DELETED) == 0) { 594 KKASSERT(ve->refs >= refs); 595 return EBUSY; 596 } 597 RB_REMOVE(vmspace_rb_tree, &vkp->root, ve); 598 599 pmap_remove_pages(vmspace_pmap(ve->vmspace), 600 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 601 vm_map_remove(&ve->vmspace->vm_map, 602 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 603 vmspace_rel(ve->vmspace); 604 ve->vmspace = NULL; /* safety */ 605 606 return 0; 607 } 608 609 /* 610 * Ref a ve for cache purposes 611 */ 612 static 613 void 614 vmspace_entry_cache_ref(struct vmspace_entry *ve) 615 { 616 atomic_add_int(&ve->cache_refs, 1); 617 } 618 619 /* 620 * The ve cache_drop is the final word for a ve. It gains an extra ref 621 * representing it being on the RB tree and not being in a deleted state. 622 * Removal from the RB tree and deletion manipulate this ref. The last 623 * drop will thus include full deletion of the ve in addition to the last 624 * cached user going away. 625 */ 626 static 627 void 628 vmspace_entry_cache_drop(struct vmspace_entry *ve) 629 { 630 if (atomic_fetchadd_int(&ve->cache_refs, -1) == 1) { 631 KKASSERT(ve->refs & VKE_REF_DELETED); 632 kfree(ve, M_VKERNEL); 633 } 634 } 635 636 /* 637 * Drop primary reference. The ve cannot be freed on the 1->0 transition. 638 * Instead, ve deletion interlocks the final kfree() via cache_refs. 639 */ 640 static 641 void 642 vmspace_entry_drop(struct vmspace_entry *ve) 643 { 644 atomic_fetchadd_int(&ve->refs, -1); 645 } 646 647 /* 648 * Locate the ve for (id), return the ve or NULL. If found this function 649 * will bump ve->refs which prevents the ve from being immediately destroyed 650 * (but it can still be removed). 651 * 652 * The cache can potentially contain a stale ve, check by testing ve->vmspace. 653 * 654 * The caller must hold vkp->token if excl is non-zero. 655 */ 656 static 657 struct vmspace_entry * 658 vkernel_find_vmspace(struct vkernel_proc *vkp, void *id, int excl) 659 { 660 struct vmspace_entry *ve; 661 struct vmspace_entry key; 662 struct vkernel_lwp *vklp; 663 struct lwp *lp = curthread->td_lwp; 664 665 /* 666 * Cache check. Since we already hold a ref on the cache entry 667 * the ve cannot be ripped out from under us while we cycle 668 * ve->refs. 669 */ 670 if ((vklp = lp->lwp_vkernel) != NULL) { 671 ve = vklp->ve_cache; 672 if (ve && ve->id == id) { 673 uint32_t n; 674 675 /* 676 * Bump active refs, check to see if the cache 677 * entry is stale. If not, we are good. 678 */ 679 n = atomic_fetchadd_int(&ve->refs, 1); 680 if ((n & VKE_REF_DELETED) == 0) { 681 KKASSERT(ve->vmspace); 682 return ve; 683 } 684 685 /* 686 * Cache is stale, clean it out and fall through 687 * to a normal search. 688 */ 689 vklp->ve_cache = NULL; 690 vmspace_entry_drop(ve); 691 vmspace_entry_cache_drop(ve); 692 } 693 } 694 695 /* 696 * Normal search protected by vkp->token. No new ve's can be marked 697 * DELETED while we hold the token so we are safe. 698 */ 699 if (excl == 0) 700 lwkt_gettoken_shared(&vkp->token); 701 key.id = id; 702 ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key); 703 if (ve) { 704 if (atomic_fetchadd_int(&ve->refs, 1) & VKE_REF_DELETED) { 705 vmspace_entry_drop(ve); 706 ve = NULL; 707 } 708 } 709 if (excl == 0) 710 lwkt_reltoken(&vkp->token); 711 return (ve); 712 } 713 714 /* 715 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing 716 * a vkernel process. 717 * 718 * No requirements. 719 */ 720 void 721 vkernel_inherit(struct proc *p1, struct proc *p2) 722 { 723 struct vkernel_proc *vkp; 724 725 vkp = p1->p_vkernel; 726 KKASSERT(vkp->refs > 0); 727 atomic_add_int(&vkp->refs, 1); 728 p2->p_vkernel = vkp; 729 } 730 731 /* 732 * No requirements. 733 */ 734 void 735 vkernel_exit(struct proc *p) 736 { 737 struct vkernel_proc *vkp; 738 struct lwp *lp; 739 740 vkp = p->p_vkernel; 741 742 /* 743 * Restore the original VM context if we are killed while running 744 * a different one. 745 * 746 * This isn't supposed to happen. What is supposed to happen is 747 * that the process should enter vkernel_trap() before the handling 748 * the signal. 749 */ 750 RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) { 751 vkernel_lwp_exit(lp); 752 } 753 754 /* 755 * Dereference the common area 756 */ 757 p->p_vkernel = NULL; 758 KKASSERT(vkp->refs > 0); 759 760 if (atomic_fetchadd_int(&vkp->refs, -1) == 1) { 761 lwkt_gettoken(&vkp->token); 762 RB_SCAN(vmspace_rb_tree, &vkp->root, NULL, 763 rb_vmspace_delete, vkp); 764 lwkt_reltoken(&vkp->token); 765 kfree(vkp, M_VKERNEL); 766 } 767 } 768 769 /* 770 * No requirements. 771 */ 772 void 773 vkernel_lwp_exit(struct lwp *lp) 774 { 775 struct vkernel_lwp *vklp; 776 struct vmspace_entry *ve; 777 778 if ((vklp = lp->lwp_vkernel) != NULL) { 779 if (lp->lwp_thread->td_vmm == NULL) { 780 /* 781 * vkernel thread 782 */ 783 if ((ve = vklp->ve) != NULL) { 784 kprintf("Warning, pid %d killed with " 785 "active VC!\n", lp->lwp_proc->p_pid); 786 pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace); 787 vklp->ve = NULL; 788 KKASSERT(ve->refs > 0); 789 vmspace_entry_drop(ve); 790 } 791 } else { 792 /* 793 * guest thread 794 */ 795 vklp->ve = NULL; 796 } 797 if ((ve = vklp->ve_cache) != NULL) { 798 vklp->ve_cache = NULL; 799 vmspace_entry_cache_drop(ve); 800 } 801 802 lp->lwp_vkernel = NULL; 803 kfree(vklp, M_VKERNEL); 804 } 805 } 806 807 /* 808 * A VM space under virtual kernel control trapped out or made a system call 809 * or otherwise needs to return control to the virtual kernel context. 810 * 811 * No requirements. 812 */ 813 void 814 vkernel_trap(struct lwp *lp, struct trapframe *frame) 815 { 816 struct proc *p = lp->lwp_proc; 817 struct vmspace_entry *ve; 818 struct vkernel_lwp *vklp; 819 int error; 820 821 /* 822 * Which vmspace entry was running? 823 */ 824 vklp = lp->lwp_vkernel; 825 KKASSERT(vklp); 826 827 /* If it's a VMM thread just set the vkernel CR3 back */ 828 if (curthread->td_vmm == NULL) { 829 ve = vklp->ve; 830 KKASSERT(ve != NULL); 831 832 /* 833 * Switch the LWP vmspace back to the virtual kernel's VM space. 834 */ 835 vklp->ve = NULL; 836 pmap_setlwpvm(lp, p->p_vmspace); 837 KKASSERT(ve->refs > 0); 838 vmspace_entry_drop(ve); 839 /* ve is invalid once we kill our ref */ 840 } else { 841 vklp->ve = NULL; 842 vmm_vm_set_guest_cr3(p->p_vkernel->vkernel_cr3); 843 } 844 845 /* 846 * Copy the emulated process frame to the virtual kernel process. 847 * The emulated process cannot change TLS descriptors so don't 848 * bother saving them, we already have a copy. 849 * 850 * Restore the virtual kernel's saved context so the virtual kernel 851 * process can resume. 852 */ 853 error = copyout(frame, vklp->user_trapframe, sizeof(*frame)); 854 bcopy(&vklp->save_trapframe, frame, sizeof(*frame)); 855 bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls, 856 sizeof(vklp->save_vextframe.vx_tls)); 857 set_user_TLS(); 858 cpu_vkernel_trap(frame, error); 859 } 860