xref: /dragonfly/sys/vm/vm_vmspace.c (revision 3568afc1)
1 /*
2  * (MPSAFE)
3  *
4  * Copyright (c) 2006 The DragonFly Project.  All rights reserved.
5  *
6  * This code is derived from software contributed to The DragonFly Project
7  * by Matthew Dillon <dillon@backplane.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  * 3. Neither the name of The DragonFly Project nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific, prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/kernel.h>
39 #include <sys/systm.h>
40 #include <sys/sysproto.h>
41 #include <sys/kern_syscall.h>
42 #include <sys/mman.h>
43 #include <sys/thread.h>
44 #include <sys/proc.h>
45 #include <sys/malloc.h>
46 #include <sys/sysctl.h>
47 #include <sys/vkernel.h>
48 #include <sys/vmspace.h>
49 
50 #include <vm/vm_extern.h>
51 #include <vm/pmap.h>
52 
53 #include <machine/vmparam.h>
54 #include <machine/vmm.h>
55 
56 #include <sys/sysref2.h>
57 
58 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp,
59 						  void *id);
60 static void vmspace_entry_delete(struct vmspace_entry *ve,
61 				 struct vkernel_proc *vkp);
62 
63 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures");
64 
65 /*
66  * vmspace_create (void *id, int type, void *data)
67  *
68  * Create a VMSPACE under the control of the caller with the specified id.
69  * An id of NULL cannot be used.  The type and data fields must currently
70  * be 0.
71  *
72  * The vmspace starts out completely empty.  Memory may be mapped into the
73  * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled
74  * with vmspace_mcontrol().
75  *
76  * No requirements.
77  */
78 int
79 sys_vmspace_create(struct vmspace_create_args *uap)
80 {
81 	struct vmspace_entry *ve;
82 	struct vkernel_proc *vkp;
83 	struct proc *p = curproc;
84 	int error;
85 
86 	if (vkernel_enable == 0)
87 		return (EOPNOTSUPP);
88 
89 	/*
90 	 * Create a virtual kernel side-structure for the process if one
91 	 * does not exist.
92 	 *
93 	 * Implement a simple resolution for SMP races.
94 	 */
95 	if ((vkp = p->p_vkernel) == NULL) {
96 		vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO);
97 		lwkt_gettoken(&p->p_token);
98 		if (p->p_vkernel == NULL) {
99 			vkp->refs = 1;
100 			lwkt_token_init(&vkp->token, "vkernel");
101 			RB_INIT(&vkp->root);
102 			p->p_vkernel = vkp;
103 		} else {
104 			kfree(vkp, M_VKERNEL);
105 			vkp = p->p_vkernel;
106 		}
107 		lwkt_reltoken(&p->p_token);
108 	}
109 
110 	if (curthread->td_vmm)
111 		return 0;
112 
113 	/*
114 	 * Create a new VMSPACE, disallow conflicting ids
115 	 */
116 	ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO);
117 	ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
118 	ve->id = uap->id;
119 	pmap_pinit2(vmspace_pmap(ve->vmspace));
120 
121 	lwkt_gettoken(&vkp->token);
122 	if (RB_INSERT(vmspace_rb_tree, &vkp->root, ve)) {
123 		vmspace_rel(ve->vmspace);
124 		ve->vmspace = NULL; /* safety */
125 		kfree(ve, M_VKERNEL);
126 		error = EEXIST;
127 	} else {
128 		error = 0;
129 	}
130 	lwkt_reltoken(&vkp->token);
131 
132 	return (error);
133 }
134 
135 /*
136  * Destroy a VMSPACE given its identifier.
137  *
138  * No requirements.
139  */
140 int
141 sys_vmspace_destroy(struct vmspace_destroy_args *uap)
142 {
143 	struct vkernel_proc *vkp;
144 	struct vmspace_entry *ve;
145 	int error;
146 
147 	if ((vkp = curproc->p_vkernel) == NULL) {
148 		error = EINVAL;
149 		goto done3;
150 	}
151 	lwkt_gettoken(&vkp->token);
152 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
153 		error = ENOENT;
154 		goto done2;
155 	}
156 	if (ve->refs) {
157 		error = EBUSY;
158 		goto done2;
159 	}
160 	vmspace_entry_delete(ve, vkp);
161 	error = 0;
162 done2:
163 	lwkt_reltoken(&vkp->token);
164 done3:
165 	return(error);
166 }
167 
168 /*
169  * vmspace_ctl (void *id, int cmd, struct trapframe *tframe,
170  *		struct vextframe *vframe);
171  *
172  * Transfer control to a VMSPACE.  Control is returned after the specified
173  * number of microseconds or if a page fault, signal, trap, or system call
174  * occurs.  The context is updated as appropriate.
175  *
176  * No requirements.
177  */
178 int
179 sys_vmspace_ctl(struct vmspace_ctl_args *uap)
180 {
181 	struct vkernel_proc *vkp;
182 	struct vkernel_lwp *vklp;
183 	struct vmspace_entry *ve = NULL;
184 	struct lwp *lp;
185 	struct proc *p;
186 	int framesz;
187 	int error;
188 
189 	lp = curthread->td_lwp;
190 	p = lp->lwp_proc;
191 
192 	if ((vkp = p->p_vkernel) == NULL)
193 		return (EINVAL);
194 
195 	/*
196 	 * ve only matters when VMM is not used.
197 	 */
198 	if (curthread->td_vmm == NULL) {
199 		lwkt_gettoken(&vkp->token);
200 		if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
201 			error = ENOENT;
202 			goto done;
203 		}
204 	}
205 
206 	switch(uap->cmd) {
207 	case VMSPACE_CTL_RUN:
208 		/*
209 		 * Save the caller's register context, swap VM spaces, and
210 		 * install the passed register context.  Return with
211 		 * EJUSTRETURN so the syscall code doesn't adjust the context.
212 		 */
213 		if (curthread->td_vmm == NULL)
214 			atomic_add_int(&ve->refs, 1);
215 
216 		framesz = sizeof(struct trapframe);
217 		if ((vklp = lp->lwp_vkernel) == NULL) {
218 			vklp = kmalloc(sizeof(*vklp), M_VKERNEL,
219 				       M_WAITOK|M_ZERO);
220 			lp->lwp_vkernel = vklp;
221 		}
222 		vklp->user_trapframe = uap->tframe;
223 		vklp->user_vextframe = uap->vframe;
224 		bcopy(uap->sysmsg_frame, &vklp->save_trapframe, framesz);
225 		bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls,
226 		      sizeof(vklp->save_vextframe.vx_tls));
227 		error = copyin(uap->tframe, uap->sysmsg_frame, framesz);
228 		if (error == 0) {
229 			error = copyin(&uap->vframe->vx_tls,
230 				       &curthread->td_tls,
231 				       sizeof(struct savetls));
232 		}
233 		if (error == 0)
234 			error = cpu_sanitize_frame(uap->sysmsg_frame);
235 		if (error == 0)
236 			error = cpu_sanitize_tls(&curthread->td_tls);
237 		if (error) {
238 			bcopy(&vklp->save_trapframe, uap->sysmsg_frame,
239 			      framesz);
240 			bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls,
241 			      sizeof(vklp->save_vextframe.vx_tls));
242 			set_user_TLS();
243 			if (curthread->td_vmm == NULL)
244 				atomic_subtract_int(&ve->refs, 1);
245 		} else {
246 			/*
247 			 * If it's a VMM thread just set the CR3. We also set
248 			 * the vklp->ve to a key to be able to distinguish
249 			 * when a vkernel user process runs and when not
250 			 * (when it's NULL)
251 			 */
252 			if (curthread->td_vmm == NULL) {
253 				vklp->ve = ve;
254 				pmap_setlwpvm(lp, ve->vmspace);
255 			} else {
256 				vklp->ve = uap->id;
257 				vmm_vm_set_guest_cr3((register_t)uap->id);
258 			}
259 			set_user_TLS();
260 			set_vkernel_fp(uap->sysmsg_frame);
261 			error = EJUSTRETURN;
262 		}
263 		break;
264 	default:
265 		error = EOPNOTSUPP;
266 		break;
267 	}
268 done:
269 	if (curthread->td_vmm == NULL)
270 		lwkt_reltoken(&vkp->token);
271 	return(error);
272 }
273 
274 /*
275  * vmspace_mmap(id, addr, len, prot, flags, fd, offset)
276  *
277  * map memory within a VMSPACE.  This function is just like a normal mmap()
278  * but operates on the vmspace's memory map.  Most callers use this to create
279  * a MAP_VPAGETABLE mapping.
280  *
281  * No requirements.
282  */
283 int
284 sys_vmspace_mmap(struct vmspace_mmap_args *uap)
285 {
286 	struct vkernel_proc *vkp;
287 	struct vmspace_entry *ve;
288 	int error;
289 
290 	/*
291 	 * We hold the vmspace token to serialize calls to vkernel_find_vmspace.
292 	 */
293 	lwkt_gettoken(&vmspace_token);
294 	if ((vkp = curproc->p_vkernel) == NULL) {
295 		error = EINVAL;
296 		goto done3;
297 	}
298 
299 	/*
300 	 * NOTE: kern_mmap() can block so we need to temporarily ref ve->refs.
301 	 */
302 	lwkt_gettoken(&vkp->token);
303 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) != NULL) {
304 		atomic_add_int(&ve->refs, 1);
305 		error = kern_mmap(ve->vmspace, uap->addr, uap->len,
306 				  uap->prot, uap->flags,
307 				  uap->fd, uap->offset, &uap->sysmsg_resultp);
308 		atomic_subtract_int(&ve->refs, 1);
309 	} else {
310 		error = ENOENT;
311 	}
312 	lwkt_reltoken(&vkp->token);
313 done3:
314 	lwkt_reltoken(&vmspace_token);
315 	return (error);
316 }
317 
318 /*
319  * vmspace_munmap(id, addr, len)
320  *
321  * unmap memory within a VMSPACE.
322  *
323  * No requirements.
324  */
325 int
326 sys_vmspace_munmap(struct vmspace_munmap_args *uap)
327 {
328 	struct vkernel_proc *vkp;
329 	struct vmspace_entry *ve;
330 	vm_offset_t addr;
331 	vm_offset_t tmpaddr;
332 	vm_size_t size, pageoff;
333 	vm_map_t map;
334 	int error;
335 
336 	if ((vkp = curproc->p_vkernel) == NULL) {
337 		error = EINVAL;
338 		goto done3;
339 	}
340 	lwkt_gettoken(&vkp->token);
341 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
342 		error = ENOENT;
343 		goto done2;
344 	}
345 
346 	/*
347 	 * NOTE: kern_munmap() can block so we need to temporarily
348 	 *	 ref ve->refs.
349 	 */
350 	atomic_add_int(&ve->refs, 1);
351 
352 	/*
353 	 * Copied from sys_munmap()
354 	 */
355 	addr = (vm_offset_t)uap->addr;
356 	size = uap->len;
357 
358 	pageoff = (addr & PAGE_MASK);
359 	addr -= pageoff;
360 	size += pageoff;
361 	size = (vm_size_t)round_page(size);
362 	if (size < uap->len) {		/* wrap */
363 		error = EINVAL;
364 		goto done1;
365 	}
366 	tmpaddr = addr + size;		/* workaround gcc4 opt */
367 	if (tmpaddr < addr) {		/* wrap */
368 		error = EINVAL;
369 		goto done1;
370 	}
371 	if (size == 0) {
372 		error = 0;
373 		goto done1;
374 	}
375 
376 	if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) {
377 		error = EINVAL;
378 		goto done1;
379 	}
380 	if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) {
381 		error = EINVAL;
382 		goto done1;
383 	}
384 	map = &ve->vmspace->vm_map;
385 	if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE, FALSE)) {
386 		error = EINVAL;
387 		goto done1;
388 	}
389 	vm_map_remove(map, addr, addr + size);
390 	error = 0;
391 done1:
392 	atomic_subtract_int(&ve->refs, 1);
393 done2:
394 	lwkt_reltoken(&vkp->token);
395 done3:
396 	return (error);
397 }
398 
399 /*
400  * vmspace_pread(id, buf, nbyte, flags, offset)
401  *
402  * Read data from a vmspace.  The number of bytes read is returned or
403  * -1 if an unrecoverable error occured.  If the number of bytes read is
404  * less then the request size, a page fault occured in the VMSPACE which
405  * the caller must resolve in order to proceed.
406  *
407  * (not implemented yet)
408  * No requirements.
409  */
410 int
411 sys_vmspace_pread(struct vmspace_pread_args *uap)
412 {
413 	struct vkernel_proc *vkp;
414 	struct vmspace_entry *ve;
415 	int error;
416 
417 	if ((vkp = curproc->p_vkernel) == NULL) {
418 		error = EINVAL;
419 		goto done3;
420 	}
421 	lwkt_gettoken(&vkp->token);
422 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
423 		error = ENOENT;
424 		goto done2;
425 	}
426 	error = EINVAL;
427 done2:
428 	lwkt_reltoken(&vkp->token);
429 done3:
430 	return (error);
431 }
432 
433 /*
434  * vmspace_pwrite(id, buf, nbyte, flags, offset)
435  *
436  * Write data to a vmspace.  The number of bytes written is returned or
437  * -1 if an unrecoverable error occured.  If the number of bytes written is
438  * less then the request size, a page fault occured in the VMSPACE which
439  * the caller must resolve in order to proceed.
440  *
441  * (not implemented yet)
442  * No requirements.
443  */
444 int
445 sys_vmspace_pwrite(struct vmspace_pwrite_args *uap)
446 {
447 	struct vkernel_proc *vkp;
448 	struct vmspace_entry *ve;
449 	int error;
450 
451 	if ((vkp = curproc->p_vkernel) == NULL) {
452 		error = EINVAL;
453 		goto done3;
454 	}
455 	lwkt_gettoken(&vkp->token);
456 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
457 		error = ENOENT;
458 		goto done2;
459 	}
460 	error = EINVAL;
461 done2:
462 	lwkt_reltoken(&vkp->token);
463 done3:
464 	return (error);
465 }
466 
467 /*
468  * vmspace_mcontrol(id, addr, len, behav, value)
469  *
470  * madvise/mcontrol support for a vmspace.
471  *
472  * No requirements.
473  */
474 int
475 sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap)
476 {
477 	struct vkernel_proc *vkp;
478 	struct vmspace_entry *ve;
479 	vm_offset_t start, end;
480 	vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len;
481 	int error;
482 
483 	if ((vkp = curproc->p_vkernel) == NULL) {
484 		error = EINVAL;
485 		goto done3;
486 	}
487 	lwkt_gettoken(&vkp->token);
488 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
489 		error = ENOENT;
490 		goto done2;
491 	}
492 
493 	/*
494 	 * NOTE: kern_madvise() can block so we need to temporarily
495 	 *	 ref ve->refs.
496 	 */
497 	atomic_add_int(&ve->refs, 1);
498 
499 	/*
500 	 * This code is basically copied from sys_mcontrol()
501 	 */
502 	if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) {
503 		error = EINVAL;
504 		goto done1;
505 	}
506 
507 	if (tmpaddr < (vm_offset_t)uap->addr) {
508 		error = EINVAL;
509 		goto done1;
510 	}
511 	if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) {
512 		error = EINVAL;
513 		goto done1;
514 	}
515         if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) {
516 		error = EINVAL;
517 		goto done1;
518 	}
519 
520 	start = trunc_page((vm_offset_t) uap->addr);
521 	end = round_page(tmpaddr);
522 
523 	error = vm_map_madvise(&ve->vmspace->vm_map, start, end,
524 				uap->behav, uap->value);
525 done1:
526 	atomic_subtract_int(&ve->refs, 1);
527 done2:
528 	lwkt_reltoken(&vkp->token);
529 done3:
530 	return (error);
531 }
532 
533 /*
534  * Red black tree functions
535  */
536 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *);
537 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare);
538 
539 /*
540  * a->start is address, and the only field has to be initialized.
541  * The caller must hold vkp->token.
542  *
543  * The caller must hold vkp->token.
544  */
545 static int
546 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b)
547 {
548         if ((char *)a->id < (char *)b->id)
549                 return(-1);
550         else if ((char *)a->id > (char *)b->id)
551                 return(1);
552         return(0);
553 }
554 
555 /*
556  * The caller must hold vkp->token.
557  */
558 static
559 int
560 rb_vmspace_delete(struct vmspace_entry *ve, void *data)
561 {
562 	struct vkernel_proc *vkp = data;
563 
564 	KKASSERT(ve->refs == 0);
565 	vmspace_entry_delete(ve, vkp);
566 	return(0);
567 }
568 
569 /*
570  * Remove a vmspace_entry from the RB tree and destroy it.  We have to clean
571  * up the pmap, the vm_map, then destroy the vmspace.
572  *
573  * This function must remove the ve immediately before it might potentially
574  * block.
575  *
576  * The caller must hold vkp->token.
577  */
578 static
579 void
580 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp)
581 {
582 	RB_REMOVE(vmspace_rb_tree, &vkp->root, ve);
583 
584 	pmap_remove_pages(vmspace_pmap(ve->vmspace),
585 			  VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
586 	vm_map_remove(&ve->vmspace->vm_map,
587 		      VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
588 	vmspace_rel(ve->vmspace);
589 	ve->vmspace = NULL; /* safety */
590 	kfree(ve, M_VKERNEL);
591 }
592 
593 /*
594  * Locate the ve for (id), return the ve or NULL.  If found this function
595  * will bump ve->refs which prevents the ve from being immediately destroyed
596  * (but it can still be removed).
597  *
598  * The caller must hold vkp->token.
599  */
600 static
601 struct vmspace_entry *
602 vkernel_find_vmspace(struct vkernel_proc *vkp, void *id)
603 {
604 	struct vmspace_entry *ve;
605 	struct vmspace_entry key;
606 
607 	key.id = id;
608 	ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key);
609 	return (ve);
610 }
611 
612 /*
613  * Manage vkernel refs, used by the kernel when fork()ing or exit()ing
614  * a vkernel process.
615  *
616  * No requirements.
617  */
618 void
619 vkernel_inherit(struct proc *p1, struct proc *p2)
620 {
621 	struct vkernel_proc *vkp;
622 
623 	vkp = p1->p_vkernel;
624 	KKASSERT(vkp->refs > 0);
625 	atomic_add_int(&vkp->refs, 1);
626 	p2->p_vkernel = vkp;
627 }
628 
629 /*
630  * No requirements.
631  */
632 void
633 vkernel_exit(struct proc *p)
634 {
635 	struct vkernel_proc *vkp;
636 	struct lwp *lp;
637 
638 	vkp = p->p_vkernel;
639 
640 	/*
641 	 * Restore the original VM context if we are killed while running
642 	 * a different one.
643 	 *
644 	 * This isn't supposed to happen.  What is supposed to happen is
645 	 * that the process should enter vkernel_trap() before the handling
646 	 * the signal.
647 	 */
648 	RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) {
649 		vkernel_lwp_exit(lp);
650 	}
651 
652 	/*
653 	 * Dereference the common area
654 	 */
655 	p->p_vkernel = NULL;
656 	KKASSERT(vkp->refs > 0);
657 
658 	if (atomic_fetchadd_int(&vkp->refs, -1) == 1) {
659 		lwkt_gettoken(&vkp->token);
660 		RB_SCAN(vmspace_rb_tree, &vkp->root, NULL,
661 			rb_vmspace_delete, vkp);
662 		lwkt_reltoken(&vkp->token);
663 		kfree(vkp, M_VKERNEL);
664 	}
665 }
666 
667 /*
668  * No requirements.
669  */
670 void
671 vkernel_lwp_exit(struct lwp *lp)
672 {
673 	struct vkernel_lwp *vklp;
674 	struct vmspace_entry *ve;
675 
676 	if ((vklp = lp->lwp_vkernel) != NULL) {
677 		if (lp->lwp_thread->td_vmm == NULL) {
678 			/*
679 			 * vkernel thread
680 			 */
681 			if ((ve = vklp->ve) != NULL) {
682 				kprintf("Warning, pid %d killed with "
683 					"active VC!\n", lp->lwp_proc->p_pid);
684 				pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace);
685 				vklp->ve = NULL;
686 				KKASSERT(ve->refs > 0);
687 				atomic_subtract_int(&ve->refs, 1);
688 			}
689 		} else {
690 			/*
691 			 * guest thread
692 			 */
693 			vklp->ve = NULL;
694 		}
695 		lp->lwp_vkernel = NULL;
696 		kfree(vklp, M_VKERNEL);
697 	}
698 }
699 
700 /*
701  * A VM space under virtual kernel control trapped out or made a system call
702  * or otherwise needs to return control to the virtual kernel context.
703  *
704  * No requirements.
705  */
706 void
707 vkernel_trap(struct lwp *lp, struct trapframe *frame)
708 {
709 	struct proc *p = lp->lwp_proc;
710 	struct vmspace_entry *ve;
711 	struct vkernel_lwp *vklp;
712 	int error;
713 
714 	/*
715 	 * Which vmspace entry was running?
716 	 */
717 	vklp = lp->lwp_vkernel;
718 	KKASSERT(vklp);
719 
720 	/* If it's a VMM thread just set the vkernel CR3 back */
721 	if (curthread->td_vmm == NULL) {
722 		ve = vklp->ve;
723 		KKASSERT(ve != NULL);
724 
725 		/*
726 		 * Switch the LWP vmspace back to the virtual kernel's VM space.
727 		 */
728 		vklp->ve = NULL;
729 		pmap_setlwpvm(lp, p->p_vmspace);
730 		KKASSERT(ve->refs > 0);
731 		atomic_subtract_int(&ve->refs, 1);
732 		/* ve is invalid once we kill our ref */
733 	} else {
734 		vklp->ve = NULL;
735 		vmm_vm_set_guest_cr3(p->p_vkernel->vkernel_cr3);
736 	}
737 
738 	/*
739 	 * Copy the emulated process frame to the virtual kernel process.
740 	 * The emulated process cannot change TLS descriptors so don't
741 	 * bother saving them, we already have a copy.
742 	 *
743 	 * Restore the virtual kernel's saved context so the virtual kernel
744 	 * process can resume.
745 	 */
746 	error = copyout(frame, vklp->user_trapframe, sizeof(*frame));
747 	bcopy(&vklp->save_trapframe, frame, sizeof(*frame));
748 	bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls,
749 	      sizeof(vklp->save_vextframe.vx_tls));
750 	set_user_TLS();
751 	cpu_vkernel_trap(frame, error);
752 }
753