xref: /dragonfly/sys/vm/vm_vmspace.c (revision c4d6eff4)
1 /*
2  * (MPSAFE)
3  *
4  * Copyright (c) 2006 The DragonFly Project.  All rights reserved.
5  *
6  * This code is derived from software contributed to The DragonFly Project
7  * by Matthew Dillon <dillon@backplane.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  * 3. Neither the name of The DragonFly Project nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific, prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/kernel.h>
39 #include <sys/systm.h>
40 #include <sys/sysproto.h>
41 #include <sys/kern_syscall.h>
42 #include <sys/mman.h>
43 #include <sys/thread.h>
44 #include <sys/proc.h>
45 #include <sys/malloc.h>
46 #include <sys/sysctl.h>
47 #include <sys/vkernel.h>
48 #include <sys/vmspace.h>
49 
50 #include <vm/vm_extern.h>
51 #include <vm/pmap.h>
52 
53 #include <machine/vmparam.h>
54 #include <machine/vmm.h>
55 
56 #include <sys/sysref2.h>
57 
58 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_proc *vkp,
59 						  void *id);
60 static void vmspace_entry_delete(struct vmspace_entry *ve,
61 				 struct vkernel_proc *vkp);
62 
63 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures");
64 
65 /*
66  * vmspace_create (void *id, int type, void *data)
67  *
68  * Create a VMSPACE under the control of the caller with the specified id.
69  * An id of NULL cannot be used.  The type and data fields must currently
70  * be 0.
71  *
72  * The vmspace starts out completely empty.  Memory may be mapped into the
73  * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled
74  * with vmspace_mcontrol().
75  *
76  * No requirements.
77  */
78 int
79 sys_vmspace_create(struct vmspace_create_args *uap)
80 {
81 	struct vmspace_entry *ve;
82 	struct vkernel_proc *vkp;
83 	struct proc *p = curproc;
84 	int error;
85 
86 	if (vkernel_enable == 0)
87 		return (EOPNOTSUPP);
88 
89 	/*
90 	 * Create a virtual kernel side-structure for the process if one
91 	 * does not exist.
92 	 *
93 	 * Implement a simple resolution for SMP races.
94 	 */
95 	if ((vkp = p->p_vkernel) == NULL) {
96 		vkp = kmalloc(sizeof(*vkp), M_VKERNEL, M_WAITOK|M_ZERO);
97 		lwkt_gettoken(&p->p_token);
98 		if (p->p_vkernel == NULL) {
99 			vkp->refs = 1;
100 			lwkt_token_init(&vkp->token, "vkernel");
101 			RB_INIT(&vkp->root);
102 			p->p_vkernel = vkp;
103 		} else {
104 			kfree(vkp, M_VKERNEL);
105 			vkp = p->p_vkernel;
106 		}
107 		lwkt_reltoken(&p->p_token);
108 	}
109 
110 	if (curthread->td_vmm)
111 		return 0;
112 
113 	/*
114 	 * Create a new VMSPACE, disallow conflicting ids
115 	 */
116 	ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO);
117 	ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
118 	ve->id = uap->id;
119 	pmap_pinit2(vmspace_pmap(ve->vmspace));
120 
121 	lwkt_gettoken(&vkp->token);
122 	if (RB_INSERT(vmspace_rb_tree, &vkp->root, ve)) {
123 		vmspace_rel(ve->vmspace);
124 		ve->vmspace = NULL; /* safety */
125 		kfree(ve, M_VKERNEL);
126 		error = EEXIST;
127 	} else {
128 		error = 0;
129 	}
130 	lwkt_reltoken(&vkp->token);
131 
132 	return (error);
133 }
134 
135 /*
136  * Destroy a VMSPACE given its identifier.
137  *
138  * No requirements.
139  */
140 int
141 sys_vmspace_destroy(struct vmspace_destroy_args *uap)
142 {
143 	struct vkernel_proc *vkp;
144 	struct vmspace_entry *ve;
145 	int error;
146 
147 	if ((vkp = curproc->p_vkernel) == NULL) {
148 		error = EINVAL;
149 		goto done3;
150 	}
151 	lwkt_gettoken(&vkp->token);
152 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
153 		error = ENOENT;
154 		goto done2;
155 	}
156 	if (ve->refs) {
157 		error = EBUSY;
158 		goto done2;
159 	}
160 	vmspace_entry_delete(ve, vkp);
161 	error = 0;
162 done2:
163 	lwkt_reltoken(&vkp->token);
164 done3:
165 	return(error);
166 }
167 
168 /*
169  * vmspace_ctl (void *id, int cmd, struct trapframe *tframe,
170  *		struct vextframe *vframe);
171  *
172  * Transfer control to a VMSPACE.  Control is returned after the specified
173  * number of microseconds or if a page fault, signal, trap, or system call
174  * occurs.  The context is updated as appropriate.
175  *
176  * No requirements.
177  */
178 int
179 sys_vmspace_ctl(struct vmspace_ctl_args *uap)
180 {
181 	struct vkernel_proc *vkp;
182 	struct vkernel_lwp *vklp;
183 	struct vmspace_entry *ve = NULL;
184 	struct lwp *lp;
185 	struct proc *p;
186 	int framesz;
187 	int error;
188 
189 	lp = curthread->td_lwp;
190 	p = lp->lwp_proc;
191 
192 	if ((vkp = p->p_vkernel) == NULL)
193 		return (EINVAL);
194 
195 	/*
196 	 * ve only matters when VMM is not used.
197 	 */
198 	if (curthread->td_vmm == NULL) {
199 		lwkt_gettoken(&vkp->token);
200 		if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
201 			error = ENOENT;
202 			goto done;
203 		}
204 	}
205 
206 	switch(uap->cmd) {
207 	case VMSPACE_CTL_RUN:
208 		/*
209 		 * Save the caller's register context, swap VM spaces, and
210 		 * install the passed register context.  Return with
211 		 * EJUSTRETURN so the syscall code doesn't adjust the context.
212 		 */
213 		if (curthread->td_vmm == NULL)
214 			atomic_add_int(&ve->refs, 1);
215 
216 		framesz = sizeof(struct trapframe);
217 		if ((vklp = lp->lwp_vkernel) == NULL) {
218 			vklp = kmalloc(sizeof(*vklp), M_VKERNEL,
219 				       M_WAITOK|M_ZERO);
220 			lp->lwp_vkernel = vklp;
221 		}
222 		vklp->user_trapframe = uap->tframe;
223 		vklp->user_vextframe = uap->vframe;
224 		bcopy(uap->sysmsg_frame, &vklp->save_trapframe, framesz);
225 		bcopy(&curthread->td_tls, &vklp->save_vextframe.vx_tls,
226 		      sizeof(vklp->save_vextframe.vx_tls));
227 		error = copyin(uap->tframe, uap->sysmsg_frame, framesz);
228 		if (error == 0) {
229 			error = copyin(&uap->vframe->vx_tls,
230 				       &curthread->td_tls,
231 				       sizeof(struct savetls));
232 		}
233 		if (error == 0)
234 			error = cpu_sanitize_frame(uap->sysmsg_frame);
235 		if (error == 0)
236 			error = cpu_sanitize_tls(&curthread->td_tls);
237 		if (error) {
238 			bcopy(&vklp->save_trapframe, uap->sysmsg_frame,
239 			      framesz);
240 			bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls,
241 			      sizeof(vklp->save_vextframe.vx_tls));
242 			set_user_TLS();
243 			if (curthread->td_vmm == NULL)
244 				atomic_subtract_int(&ve->refs, 1);
245 		} else {
246 			/* If it's a VMM thread just set the CR3. We also set the
247 			 * vklp->ve to a key to be able to distinguish when a
248 			 * vkernel user process runs and when not (when it's NULL)
249 			 */
250 			if (curthread->td_vmm == NULL) {
251 				vklp->ve = ve;
252 				pmap_setlwpvm(lp, ve->vmspace);
253 			} else {
254 				vklp->ve = uap->id;
255 				vmm_vm_set_guest_cr3((register_t)uap->id);
256 			}
257 			set_user_TLS();
258 			set_vkernel_fp(uap->sysmsg_frame);
259 			error = EJUSTRETURN;
260 		}
261 		break;
262 	default:
263 		error = EOPNOTSUPP;
264 		break;
265 	}
266 done:
267 	if (curthread->td_vmm == NULL)
268 		lwkt_reltoken(&vkp->token);
269 	return(error);
270 }
271 
272 /*
273  * vmspace_mmap(id, addr, len, prot, flags, fd, offset)
274  *
275  * map memory within a VMSPACE.  This function is just like a normal mmap()
276  * but operates on the vmspace's memory map.  Most callers use this to create
277  * a MAP_VPAGETABLE mapping.
278  *
279  * No requirements.
280  */
281 int
282 sys_vmspace_mmap(struct vmspace_mmap_args *uap)
283 {
284 	struct vkernel_proc *vkp;
285 	struct vmspace_entry *ve;
286 	int error;
287 
288 	/*
289 	 * We hold the vmspace token to serialize calls to vkernel_find_vmspace.
290 	 */
291 	lwkt_gettoken(&vmspace_token);
292 	if ((vkp = curproc->p_vkernel) == NULL) {
293 		error = EINVAL;
294 		goto done3;
295 	}
296 
297 	/*
298 	 * NOTE: kern_mmap() can block so we need to temporarily ref ve->refs.
299 	 */
300 	lwkt_gettoken(&vkp->token);
301 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) != NULL) {
302 		atomic_add_int(&ve->refs, 1);
303 		error = kern_mmap(ve->vmspace, uap->addr, uap->len,
304 				  uap->prot, uap->flags,
305 				  uap->fd, uap->offset, &uap->sysmsg_resultp);
306 		atomic_subtract_int(&ve->refs, 1);
307 	} else {
308 		error = ENOENT;
309 	}
310 	lwkt_reltoken(&vkp->token);
311 done3:
312 	lwkt_reltoken(&vmspace_token);
313 	return (error);
314 }
315 
316 /*
317  * vmspace_munmap(id, addr, len)
318  *
319  * unmap memory within a VMSPACE.
320  *
321  * No requirements.
322  */
323 int
324 sys_vmspace_munmap(struct vmspace_munmap_args *uap)
325 {
326 	struct vkernel_proc *vkp;
327 	struct vmspace_entry *ve;
328 	vm_offset_t addr;
329 	vm_offset_t tmpaddr;
330 	vm_size_t size, pageoff;
331 	vm_map_t map;
332 	int error;
333 
334 	if ((vkp = curproc->p_vkernel) == NULL) {
335 		error = EINVAL;
336 		goto done3;
337 	}
338 	lwkt_gettoken(&vkp->token);
339 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
340 		error = ENOENT;
341 		goto done2;
342 	}
343 
344 	/*
345 	 * NOTE: kern_munmap() can block so we need to temporarily
346 	 *	 ref ve->refs.
347 	 */
348 	atomic_add_int(&ve->refs, 1);
349 
350 	/*
351 	 * Copied from sys_munmap()
352 	 */
353 	addr = (vm_offset_t)uap->addr;
354 	size = uap->len;
355 
356 	pageoff = (addr & PAGE_MASK);
357 	addr -= pageoff;
358 	size += pageoff;
359 	size = (vm_size_t)round_page(size);
360 	if (size < uap->len) {		/* wrap */
361 		error = EINVAL;
362 		goto done1;
363 	}
364 	tmpaddr = addr + size;		/* workaround gcc4 opt */
365 	if (tmpaddr < addr) {		/* wrap */
366 		error = EINVAL;
367 		goto done1;
368 	}
369 	if (size == 0) {
370 		error = 0;
371 		goto done1;
372 	}
373 
374 	if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) {
375 		error = EINVAL;
376 		goto done1;
377 	}
378 	if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) {
379 		error = EINVAL;
380 		goto done1;
381 	}
382 	map = &ve->vmspace->vm_map;
383 	if (!vm_map_check_protection(map, addr, tmpaddr, VM_PROT_NONE, FALSE)) {
384 		error = EINVAL;
385 		goto done1;
386 	}
387 	vm_map_remove(map, addr, addr + size);
388 	error = 0;
389 done1:
390 	atomic_subtract_int(&ve->refs, 1);
391 done2:
392 	lwkt_reltoken(&vkp->token);
393 done3:
394 	return (error);
395 }
396 
397 /*
398  * vmspace_pread(id, buf, nbyte, flags, offset)
399  *
400  * Read data from a vmspace.  The number of bytes read is returned or
401  * -1 if an unrecoverable error occured.  If the number of bytes read is
402  * less then the request size, a page fault occured in the VMSPACE which
403  * the caller must resolve in order to proceed.
404  *
405  * (not implemented yet)
406  * No requirements.
407  */
408 int
409 sys_vmspace_pread(struct vmspace_pread_args *uap)
410 {
411 	struct vkernel_proc *vkp;
412 	struct vmspace_entry *ve;
413 	int error;
414 
415 	if ((vkp = curproc->p_vkernel) == NULL) {
416 		error = EINVAL;
417 		goto done3;
418 	}
419 	lwkt_gettoken(&vkp->token);
420 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
421 		error = ENOENT;
422 		goto done2;
423 	}
424 	error = EINVAL;
425 done2:
426 	lwkt_reltoken(&vkp->token);
427 done3:
428 	return (error);
429 }
430 
431 /*
432  * vmspace_pwrite(id, buf, nbyte, flags, offset)
433  *
434  * Write data to a vmspace.  The number of bytes written is returned or
435  * -1 if an unrecoverable error occured.  If the number of bytes written is
436  * less then the request size, a page fault occured in the VMSPACE which
437  * the caller must resolve in order to proceed.
438  *
439  * (not implemented yet)
440  * No requirements.
441  */
442 int
443 sys_vmspace_pwrite(struct vmspace_pwrite_args *uap)
444 {
445 	struct vkernel_proc *vkp;
446 	struct vmspace_entry *ve;
447 	int error;
448 
449 	if ((vkp = curproc->p_vkernel) == NULL) {
450 		error = EINVAL;
451 		goto done3;
452 	}
453 	lwkt_gettoken(&vkp->token);
454 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
455 		error = ENOENT;
456 		goto done2;
457 	}
458 	error = EINVAL;
459 done2:
460 	lwkt_reltoken(&vkp->token);
461 done3:
462 	return (error);
463 }
464 
465 /*
466  * vmspace_mcontrol(id, addr, len, behav, value)
467  *
468  * madvise/mcontrol support for a vmspace.
469  *
470  * No requirements.
471  */
472 int
473 sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap)
474 {
475 	struct vkernel_proc *vkp;
476 	struct vmspace_entry *ve;
477 	vm_offset_t start, end;
478 	vm_offset_t tmpaddr = (vm_offset_t)uap->addr + uap->len;
479 	int error;
480 
481 	if ((vkp = curproc->p_vkernel) == NULL) {
482 		error = EINVAL;
483 		goto done3;
484 	}
485 	lwkt_gettoken(&vkp->token);
486 	if ((ve = vkernel_find_vmspace(vkp, uap->id)) == NULL) {
487 		error = ENOENT;
488 		goto done2;
489 	}
490 
491 	/*
492 	 * NOTE: kern_madvise() can block so we need to temporarily
493 	 *	 ref ve->refs.
494 	 */
495 	atomic_add_int(&ve->refs, 1);
496 
497 	/*
498 	 * This code is basically copied from sys_mcontrol()
499 	 */
500 	if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) {
501 		error = EINVAL;
502 		goto done1;
503 	}
504 
505 	if (tmpaddr < (vm_offset_t)uap->addr) {
506 		error = EINVAL;
507 		goto done1;
508 	}
509 	if (VM_MAX_USER_ADDRESS > 0 && tmpaddr > VM_MAX_USER_ADDRESS) {
510 		error = EINVAL;
511 		goto done1;
512 	}
513         if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) {
514 		error = EINVAL;
515 		goto done1;
516 	}
517 
518 	start = trunc_page((vm_offset_t) uap->addr);
519 	end = round_page(tmpaddr);
520 
521 	error = vm_map_madvise(&ve->vmspace->vm_map, start, end,
522 				uap->behav, uap->value);
523 done1:
524 	atomic_subtract_int(&ve->refs, 1);
525 done2:
526 	lwkt_reltoken(&vkp->token);
527 done3:
528 	return (error);
529 }
530 
531 /*
532  * Red black tree functions
533  */
534 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *);
535 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare);
536 
537 /*
538  * a->start is address, and the only field has to be initialized.
539  * The caller must hold vkp->token.
540  *
541  * The caller must hold vkp->token.
542  */
543 static int
544 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b)
545 {
546         if ((char *)a->id < (char *)b->id)
547                 return(-1);
548         else if ((char *)a->id > (char *)b->id)
549                 return(1);
550         return(0);
551 }
552 
553 /*
554  * The caller must hold vkp->token.
555  */
556 static
557 int
558 rb_vmspace_delete(struct vmspace_entry *ve, void *data)
559 {
560 	struct vkernel_proc *vkp = data;
561 
562 	KKASSERT(ve->refs == 0);
563 	vmspace_entry_delete(ve, vkp);
564 	return(0);
565 }
566 
567 /*
568  * Remove a vmspace_entry from the RB tree and destroy it.  We have to clean
569  * up the pmap, the vm_map, then destroy the vmspace.
570  *
571  * This function must remove the ve immediately before it might potentially
572  * block.
573  *
574  * The caller must hold vkp->token.
575  */
576 static
577 void
578 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_proc *vkp)
579 {
580 	RB_REMOVE(vmspace_rb_tree, &vkp->root, ve);
581 
582 	pmap_remove_pages(vmspace_pmap(ve->vmspace),
583 			  VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
584 	vm_map_remove(&ve->vmspace->vm_map,
585 		      VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
586 	vmspace_rel(ve->vmspace);
587 	ve->vmspace = NULL; /* safety */
588 	kfree(ve, M_VKERNEL);
589 }
590 
591 /*
592  * Locate the ve for (id), return the ve or NULL.  If found this function
593  * will bump ve->refs which prevents the ve from being immediately destroyed
594  * (but it can still be removed).
595  *
596  * The caller must hold vkp->token.
597  */
598 static
599 struct vmspace_entry *
600 vkernel_find_vmspace(struct vkernel_proc *vkp, void *id)
601 {
602 	struct vmspace_entry *ve;
603 	struct vmspace_entry key;
604 
605 	key.id = id;
606 	ve = RB_FIND(vmspace_rb_tree, &vkp->root, &key);
607 	return (ve);
608 }
609 
610 /*
611  * Manage vkernel refs, used by the kernel when fork()ing or exit()ing
612  * a vkernel process.
613  *
614  * No requirements.
615  */
616 void
617 vkernel_inherit(struct proc *p1, struct proc *p2)
618 {
619 	struct vkernel_proc *vkp;
620 
621 	vkp = p1->p_vkernel;
622 	KKASSERT(vkp->refs > 0);
623 	atomic_add_int(&vkp->refs, 1);
624 	p2->p_vkernel = vkp;
625 }
626 
627 /*
628  * No requirements.
629  */
630 void
631 vkernel_exit(struct proc *p)
632 {
633 	struct vkernel_proc *vkp;
634 	struct lwp *lp;
635 
636 	vkp = p->p_vkernel;
637 
638 	/*
639 	 * Restore the original VM context if we are killed while running
640 	 * a different one.
641 	 *
642 	 * This isn't supposed to happen.  What is supposed to happen is
643 	 * that the process should enter vkernel_trap() before the handling
644 	 * the signal.
645 	 */
646 	RB_FOREACH(lp, lwp_rb_tree, &p->p_lwp_tree) {
647 		vkernel_lwp_exit(lp);
648 	}
649 
650 	/*
651 	 * Dereference the common area
652 	 */
653 	p->p_vkernel = NULL;
654 	KKASSERT(vkp->refs > 0);
655 
656 	if (atomic_fetchadd_int(&vkp->refs, -1) == 1) {
657 		lwkt_gettoken(&vkp->token);
658 		RB_SCAN(vmspace_rb_tree, &vkp->root, NULL,
659 			rb_vmspace_delete, vkp);
660 		lwkt_reltoken(&vkp->token);
661 		kfree(vkp, M_VKERNEL);
662 	}
663 }
664 
665 /*
666  * No requirements.
667  */
668 void
669 vkernel_lwp_exit(struct lwp *lp)
670 {
671 	struct vkernel_lwp *vklp;
672 	struct vmspace_entry *ve;
673 
674 	if ((vklp = lp->lwp_vkernel) != NULL) {
675 		if ((ve = vklp->ve) != NULL) {
676 			kprintf("Warning, pid %d killed with "
677 				"active VC!\n", lp->lwp_proc->p_pid);
678 			pmap_setlwpvm(lp, lp->lwp_proc->p_vmspace);
679 			vklp->ve = NULL;
680 			KKASSERT(ve->refs > 0);
681 			atomic_subtract_int(&ve->refs, 1);
682 		}
683 		lp->lwp_vkernel = NULL;
684 		kfree(vklp, M_VKERNEL);
685 	}
686 }
687 
688 /*
689  * A VM space under virtual kernel control trapped out or made a system call
690  * or otherwise needs to return control to the virtual kernel context.
691  *
692  * No requirements.
693  */
694 void
695 vkernel_trap(struct lwp *lp, struct trapframe *frame)
696 {
697 	struct proc *p = lp->lwp_proc;
698 	struct vmspace_entry *ve;
699 	struct vkernel_lwp *vklp;
700 	int error;
701 
702 	/*
703 	 * Which vmspace entry was running?
704 	 */
705 	vklp = lp->lwp_vkernel;
706 	KKASSERT(vklp);
707 
708 	/* If it's a VMM thread just set the vkernel CR3 back */
709 	if (curthread->td_vmm == NULL) {
710 		ve = vklp->ve;
711 		KKASSERT(ve != NULL);
712 
713 		/*
714 		 * Switch the LWP vmspace back to the virtual kernel's VM space.
715 		 */
716 		vklp->ve = NULL;
717 		pmap_setlwpvm(lp, p->p_vmspace);
718 		KKASSERT(ve->refs > 0);
719 		atomic_subtract_int(&ve->refs, 1);
720 		/* ve is invalid once we kill our ref */
721 	} else {
722 		vklp->ve = NULL;
723 		vmm_vm_set_guest_cr3(p->p_vkernel->vkernel_cr3);
724 	}
725 	/*
726 	 * Copy the emulated process frame to the virtual kernel process.
727 	 * The emulated process cannot change TLS descriptors so don't
728 	 * bother saving them, we already have a copy.
729 	 *
730 	 * Restore the virtual kernel's saved context so the virtual kernel
731 	 * process can resume.
732 	 */
733 	error = copyout(frame, vklp->user_trapframe, sizeof(*frame));
734 	bcopy(&vklp->save_trapframe, frame, sizeof(*frame));
735 	bcopy(&vklp->save_vextframe.vx_tls, &curthread->td_tls,
736 	      sizeof(vklp->save_vextframe.vx_tls));
737 	set_user_TLS();
738 	cpu_vkernel_trap(frame, error);
739 }
740