xref: /openbsd/sys/dev/vmm/vmm.c (revision 3c817da7)
1 /* $OpenBSD: vmm.c,v 1.2 2023/05/13 23:15:28 dv Exp $ */
2 /*
3  * Copyright (c) 2014-2023 Mike Larkin <mlarkin@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/device.h>
21 #include <sys/pool.h>
22 #include <sys/pledge.h>
23 #include <sys/proc.h>
24 #include <sys/ioctl.h>
25 #include <sys/malloc.h>
26 #include <sys/signalvar.h>
27 
28 #include <machine/vmmvar.h>
29 
30 #include <dev/vmm/vmm.h>
31 
32 struct vmm_softc *vmm_softc;
33 struct pool vm_pool;
34 struct pool vcpu_pool;
35 
36 struct cfdriver vmm_cd = {
37 	NULL, "vmm", DV_DULL, CD_SKIPHIBERNATE
38 };
39 
40 const struct cfattach vmm_ca = {
41 	sizeof(struct vmm_softc), vmm_probe, vmm_attach, NULL, vmm_activate
42 };
43 
44 int
vmm_probe(struct device * parent,void * match,void * aux)45 vmm_probe(struct device *parent, void *match, void *aux)
46 {
47 	const char **busname = (const char **)aux;
48 
49 	if (strcmp(*busname, vmm_cd.cd_name) != 0)
50 		return (0);
51 	return (1);
52 }
53 
54 void
vmm_attach(struct device * parent,struct device * self,void * aux)55 vmm_attach(struct device *parent, struct device *self, void *aux)
56 {
57 	struct vmm_softc *sc = (struct vmm_softc *)self;
58 
59 	rw_init(&sc->sc_slock, "vmmslk");
60 	sc->sc_status = VMM_ACTIVE;
61 	refcnt_init(&sc->sc_refcnt);
62 
63 	sc->vcpu_ct = 0;
64 	sc->vcpu_max = VMM_MAX_VCPUS;
65 	sc->vm_ct = 0;
66 	sc->vm_idx = 0;
67 
68 	SLIST_INIT(&sc->vm_list);
69 	rw_init(&sc->vm_lock, "vm_list");
70 
71 	pool_init(&vm_pool, sizeof(struct vm), 0, IPL_MPFLOOR, PR_WAITOK,
72 	    "vmpool", NULL);
73 	pool_init(&vcpu_pool, sizeof(struct vcpu), 64, IPL_MPFLOOR, PR_WAITOK,
74 	    "vcpupl", NULL);
75 
76 	vmm_attach_machdep(parent, self, aux);
77 
78 	vmm_softc = sc;
79 	printf("\n");
80 }
81 
82 int
vmm_activate(struct device * self,int act)83 vmm_activate(struct device *self, int act)
84 {
85 	switch (act) {
86 	case DVACT_QUIESCE:
87 		/* Block device users as we're suspending operation. */
88 		rw_enter_write(&vmm_softc->sc_slock);
89 		KASSERT(vmm_softc->sc_status == VMM_ACTIVE);
90 		vmm_softc->sc_status = VMM_SUSPENDED;
91 		rw_exit_write(&vmm_softc->sc_slock);
92 
93 		/* Wait for any device users to finish. */
94 		refcnt_finalize(&vmm_softc->sc_refcnt, "vmmsusp");
95 
96 		vmm_activate_machdep(self, act);
97 		break;
98 	case DVACT_WAKEUP:
99 		vmm_activate_machdep(self, act);
100 
101 		/* Set the device back to active. */
102 		rw_enter_write(&vmm_softc->sc_slock);
103 		KASSERT(vmm_softc->sc_status == VMM_SUSPENDED);
104 		refcnt_init(&vmm_softc->sc_refcnt);
105 		vmm_softc->sc_status = VMM_ACTIVE;
106 		rw_exit_write(&vmm_softc->sc_slock);
107 
108 		/* Notify any waiting device users. */
109 		wakeup(&vmm_softc->sc_status);
110 		break;
111 	}
112 
113 	return (0);
114 }
115 
116 /*
117  * vmmopen
118  *
119  * Called during open of /dev/vmm.
120  *
121  * Parameters:
122  *  dev, flag, mode, p: These come from the character device and are
123  *   all unused for this function
124  *
125  * Return values:
126  *  ENODEV: if vmm(4) didn't attach or no supported CPUs detected
127  *  0: successful open
128  */
129 int
vmmopen(dev_t dev,int flag,int mode,struct proc * p)130 vmmopen(dev_t dev, int flag, int mode, struct proc *p)
131 {
132 	/* Don't allow open if we didn't attach */
133 	if (vmm_softc == NULL)
134 		return (ENODEV);
135 
136 	/* Don't allow open if we didn't detect any supported CPUs */
137 	if (vmm_softc->mode == VMM_MODE_UNKNOWN)
138 		return (ENODEV);
139 
140 	return 0;
141 }
142 
143 /*
144  * vmmclose
145  *
146  * Called when /dev/vmm is closed. Presently unused.
147  */
148 int
vmmclose(dev_t dev,int flag,int mode,struct proc * p)149 vmmclose(dev_t dev, int flag, int mode, struct proc *p)
150 {
151 	return 0;
152 }
153 
154 /*
155  * vm_find
156  *
157  * Function to find an existing VM by its identifier.
158  * Must be called under the global vm_lock.
159  *
160  * Parameters:
161  *  id: The VM identifier.
162  *  *res: A pointer to the VM or NULL if not found
163  *
164  * Return values:
165  *  0: if successful
166  *  ENOENT: if the VM defined by 'id' cannot be found
167  *  EPERM: if the VM cannot be accessed by the current process
168  */
169 int
vm_find(uint32_t id,struct vm ** res)170 vm_find(uint32_t id, struct vm **res)
171 {
172 	struct proc *p = curproc;
173 	struct vm *vm;
174 	int ret = ENOENT;
175 
176 	*res = NULL;
177 
178 	rw_enter_read(&vmm_softc->vm_lock);
179 	SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) {
180 		if (vm->vm_id == id) {
181 			/*
182 			 * In the pledged VM process, only allow to find
183 			 * the VM that is running in the current process.
184 			 * The managing vmm parent process can lookup all
185 			 * all VMs and is indicated by PLEDGE_PROC.
186 			 */
187 			if (((p->p_p->ps_pledge &
188 			    (PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) &&
189 			    (vm->vm_creator_pid != p->p_p->ps_pid))
190 				ret = EPERM;
191 			else {
192 				refcnt_take(&vm->vm_refcnt);
193 				*res = vm;
194 				ret = 0;
195 			}
196 			break;
197 		}
198 	}
199 	rw_exit_read(&vmm_softc->vm_lock);
200 
201 	if (ret == EPERM)
202 		return (pledge_fail(p, EPERM, PLEDGE_VMM));
203 	return (ret);
204 }
205 
206 /*
207  * vmmioctl
208  *
209  * Main ioctl dispatch routine for /dev/vmm. Parses ioctl type and calls
210  * appropriate lower level handler routine. Returns result to ioctl caller.
211  */
212 int
vmmioctl(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)213 vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
214 {
215 	int ret;
216 
217 	KERNEL_UNLOCK();
218 
219 	ret = rw_enter(&vmm_softc->sc_slock, RW_READ | RW_INTR);
220 	if (ret != 0)
221 		goto out;
222 	while (vmm_softc->sc_status != VMM_ACTIVE) {
223 		ret = rwsleep_nsec(&vmm_softc->sc_status, &vmm_softc->sc_slock,
224 		    PWAIT | PCATCH, "vmmresume", INFSLP);
225 		if (ret != 0) {
226 			rw_exit(&vmm_softc->sc_slock);
227 			goto out;
228 		}
229 	}
230 	refcnt_take(&vmm_softc->sc_refcnt);
231 	rw_exit(&vmm_softc->sc_slock);
232 
233 	switch (cmd) {
234 	case VMM_IOC_CREATE:
235 		if ((ret = vmm_start()) != 0) {
236 			vmm_stop();
237 			break;
238 		}
239 		ret = vm_create((struct vm_create_params *)data, p);
240 		break;
241 	case VMM_IOC_RUN:
242 		ret = vm_run((struct vm_run_params *)data);
243 		break;
244 	case VMM_IOC_INFO:
245 		ret = vm_get_info((struct vm_info_params *)data);
246 		break;
247 	case VMM_IOC_TERM:
248 		ret = vm_terminate((struct vm_terminate_params *)data);
249 		break;
250 	case VMM_IOC_RESETCPU:
251 		ret = vm_resetcpu((struct vm_resetcpu_params *)data);
252 		break;
253 	case VMM_IOC_READREGS:
254 		ret = vm_rwregs((struct vm_rwregs_params *)data, 0);
255 		break;
256 	case VMM_IOC_WRITEREGS:
257 		ret = vm_rwregs((struct vm_rwregs_params *)data, 1);
258 		break;
259 	case VMM_IOC_READVMPARAMS:
260 		ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 0);
261 		break;
262 	case VMM_IOC_WRITEVMPARAMS:
263 		ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 1);
264 		break;
265 	case VMM_IOC_SHAREMEM:
266 		ret = vm_share_mem((struct vm_sharemem_params *)data, p);
267 		break;
268 	default:
269 		ret = vmmioctl_machdep(dev, cmd, data, flag, p);
270 		break;
271 	}
272 
273 	refcnt_rele_wake(&vmm_softc->sc_refcnt);
274 out:
275 	KERNEL_LOCK();
276 
277 	return (ret);
278 }
279 
280 /*
281  * pledge_ioctl_vmm
282  *
283  * Restrict the allowed ioctls in a pledged process context.
284  * Is called from pledge_ioctl().
285  */
286 int
pledge_ioctl_vmm(struct proc * p,long com)287 pledge_ioctl_vmm(struct proc *p, long com)
288 {
289 	switch (com) {
290 	case VMM_IOC_CREATE:
291 	case VMM_IOC_INFO:
292 	case VMM_IOC_SHAREMEM:
293 		/* The "parent" process in vmd forks and manages VMs */
294 		if (p->p_p->ps_pledge & PLEDGE_PROC)
295 			return (0);
296 		break;
297 	case VMM_IOC_TERM:
298 		/* XXX VM processes should only terminate themselves */
299 	case VMM_IOC_RUN:
300 	case VMM_IOC_RESETCPU:
301 	case VMM_IOC_READREGS:
302 	case VMM_IOC_WRITEREGS:
303 	case VMM_IOC_READVMPARAMS:
304 	case VMM_IOC_WRITEVMPARAMS:
305 		return (0);
306 	default:
307 		return pledge_ioctl_vmm_machdep(p, com);
308 	}
309 
310 	return (EPERM);
311 }
312 
313 /*
314  * vm_find_vcpu
315  *
316  * Lookup VMM VCPU by ID number
317  *
318  * Parameters:
319  *  vm: vm structure
320  *  id: index id of vcpu
321  *
322  * Returns pointer to vcpu structure if successful, NULL otherwise
323  */
324 struct vcpu *
vm_find_vcpu(struct vm * vm,uint32_t id)325 vm_find_vcpu(struct vm *vm, uint32_t id)
326 {
327 	struct vcpu *vcpu;
328 
329 	if (vm == NULL)
330 		return (NULL);
331 
332 	SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
333 		if (vcpu->vc_id == id)
334 			return (vcpu);
335 	}
336 
337 	return (NULL);
338 }
339 
340 /*
341  * vm_create
342  *
343  * Creates the in-memory VMM structures for the VM defined by 'vcp'. The
344  * parent of this VM shall be the process defined by 'p'.
345  * This function does not start the VCPU(s) - see vm_start.
346  *
347  * Return Values:
348  *  0: the create operation was successful
349  *  ENOMEM: out of memory
350  *  various other errors from vcpu_init/vm_impl_init
351  */
352 int
vm_create(struct vm_create_params * vcp,struct proc * p)353 vm_create(struct vm_create_params *vcp, struct proc *p)
354 {
355 	int i, ret;
356 	size_t memsize;
357 	struct vm *vm;
358 	struct vcpu *vcpu;
359 
360 	memsize = vm_create_check_mem_ranges(vcp);
361 	if (memsize == 0)
362 		return (EINVAL);
363 
364 	/* XXX - support UP only (for now) */
365 	if (vcp->vcp_ncpus != 1)
366 		return (EINVAL);
367 
368 	/* Bail early if we're already at vcpu capacity. */
369 	rw_enter_read(&vmm_softc->vm_lock);
370 	if (vmm_softc->vcpu_ct + vcp->vcp_ncpus > vmm_softc->vcpu_max) {
371 		DPRINTF("%s: maximum vcpus (%lu) reached\n", __func__,
372 		    vmm_softc->vcpu_max);
373 		rw_exit_read(&vmm_softc->vm_lock);
374 		return (ENOMEM);
375 	}
376 	rw_exit_read(&vmm_softc->vm_lock);
377 
378 	/* Instantiate and configure the new vm. */
379 	vm = pool_get(&vm_pool, PR_WAITOK | PR_ZERO);
380 
381 	vm->vm_creator_pid = p->p_p->ps_pid;
382 	vm->vm_nmemranges = vcp->vcp_nmemranges;
383 	memcpy(vm->vm_memranges, vcp->vcp_memranges,
384 	    vm->vm_nmemranges * sizeof(vm->vm_memranges[0]));
385 	vm->vm_memory_size = memsize;
386 	strncpy(vm->vm_name, vcp->vcp_name, VMM_MAX_NAME_LEN - 1);
387 
388 	if (vm_impl_init(vm, p)) {
389 		printf("failed to init arch-specific features for vm %p\n", vm);
390 		vm_teardown(&vm);
391 		return (ENOMEM);
392 	}
393 
394 	vm->vm_vcpu_ct = 0;
395 
396 	/* Initialize each VCPU defined in 'vcp' */
397 	SLIST_INIT(&vm->vm_vcpu_list);
398 	for (i = 0; i < vcp->vcp_ncpus; i++) {
399 		vcpu = pool_get(&vcpu_pool, PR_WAITOK | PR_ZERO);
400 
401 		vcpu->vc_parent = vm;
402 		if ((ret = vcpu_init(vcpu)) != 0) {
403 			printf("failed to init vcpu %d for vm %p\n", i, vm);
404 			vm_teardown(&vm);
405 			return (ret);
406 		}
407 		vcpu->vc_id = vm->vm_vcpu_ct;
408 		vm->vm_vcpu_ct++;
409 		/* Publish vcpu to list, inheriting the reference. */
410 		SLIST_INSERT_HEAD(&vm->vm_vcpu_list, vcpu, vc_vcpu_link);
411 	}
412 
413 	/* Attempt to register the vm now that it's configured. */
414 	rw_enter_write(&vmm_softc->vm_lock);
415 
416 	if (vmm_softc->vcpu_ct + vm->vm_vcpu_ct > vmm_softc->vcpu_max) {
417 		/* Someone already took our capacity. */
418 		printf("%s: maximum vcpus (%lu) reached\n", __func__,
419 		    vmm_softc->vcpu_max);
420 		rw_exit_write(&vmm_softc->vm_lock);
421 		vm_teardown(&vm);
422 		return (ENOMEM);
423 	}
424 
425 	/* Update the global index and identify the vm. */
426 	vmm_softc->vm_idx++;
427 	vm->vm_id = vmm_softc->vm_idx;
428 	vcp->vcp_id = vm->vm_id;
429 
430 	/* Publish the vm into the list and update counts. */
431 	refcnt_init(&vm->vm_refcnt);
432 	SLIST_INSERT_HEAD(&vmm_softc->vm_list, vm, vm_link);
433 	vmm_softc->vm_ct++;
434 	vmm_softc->vcpu_ct += vm->vm_vcpu_ct;
435 
436 	rw_exit_write(&vmm_softc->vm_lock);
437 
438 	return (0);
439 }
440 
441 /*
442  * vm_create_check_mem_ranges
443  *
444  * Make sure that the guest physical memory ranges given by the user process
445  * do not overlap and are in ascending order.
446  *
447  * The last physical address may not exceed VMM_MAX_VM_MEM_SIZE.
448  *
449  * Return Values:
450  *   The total memory size in bytes if the checks were successful
451  *   0: One of the memory ranges was invalid or VMM_MAX_VM_MEM_SIZE was
452  *   exceeded
453  */
454 size_t
vm_create_check_mem_ranges(struct vm_create_params * vcp)455 vm_create_check_mem_ranges(struct vm_create_params *vcp)
456 {
457 	size_t i, memsize = 0;
458 	struct vm_mem_range *vmr, *pvmr;
459 	const paddr_t maxgpa = VMM_MAX_VM_MEM_SIZE;
460 
461 	if (vcp->vcp_nmemranges == 0 ||
462 	    vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES) {
463 		DPRINTF("invalid number of guest memory ranges\n");
464 		return (0);
465 	}
466 
467 	for (i = 0; i < vcp->vcp_nmemranges; i++) {
468 		vmr = &vcp->vcp_memranges[i];
469 
470 		/* Only page-aligned addresses and sizes are permitted */
471 		if ((vmr->vmr_gpa & PAGE_MASK) || (vmr->vmr_va & PAGE_MASK) ||
472 		    (vmr->vmr_size & PAGE_MASK) || vmr->vmr_size == 0) {
473 			DPRINTF("memory range %zu is not page aligned\n", i);
474 			return (0);
475 		}
476 
477 		/* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded */
478 		if (vmr->vmr_gpa >= maxgpa ||
479 		    vmr->vmr_size > maxgpa - vmr->vmr_gpa) {
480 			DPRINTF("exceeded max memory size\n");
481 			return (0);
482 		}
483 
484 		/*
485 		 * Make sure that all virtual addresses are within the address
486 		 * space of the process and that they do not wrap around.
487 		 * Calling uvm_share() when creating the VM will take care of
488 		 * further checks.
489 		 */
490 		if (vmr->vmr_va < VM_MIN_ADDRESS ||
491 		    vmr->vmr_va >= VM_MAXUSER_ADDRESS ||
492 		    vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) {
493 			DPRINTF("guest va not within range or wraps\n");
494 			return (0);
495 		}
496 
497 		/*
498 		 * Make sure that guest physical memory ranges do not overlap
499 		 * and that they are ascending.
500 		 */
501 		if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa) {
502 			DPRINTF("guest range %zu overlaps or !ascending\n", i);
503 			return (0);
504 		}
505 
506 		/*
507 		 * No memory is mappable in MMIO ranges, so don't count towards
508 		 * the total guest memory size.
509 		 */
510 		if (vmr->vmr_type != VM_MEM_MMIO)
511 			memsize += vmr->vmr_size;
512 		pvmr = vmr;
513 	}
514 
515 	return (memsize);
516 }
517 
518 /*
519  * vm_teardown
520  *
521  * Tears down (destroys) the vm indicated by 'vm'.
522  *
523  * Assumes the vm is already removed from the global vm list (or was never
524  * added).
525  *
526  * Parameters:
527  *  vm: vm to be torn down
528  */
529 void
vm_teardown(struct vm ** target)530 vm_teardown(struct vm **target)
531 {
532 	size_t nvcpu = 0;
533 	struct vcpu *vcpu, *tmp;
534 	struct vm *vm = *target;
535 	struct vmspace *vm_vmspace;
536 
537 	KERNEL_ASSERT_UNLOCKED();
538 
539 	/* Free VCPUs */
540 	SLIST_FOREACH_SAFE(vcpu, &vm->vm_vcpu_list, vc_vcpu_link, tmp) {
541 		SLIST_REMOVE(&vm->vm_vcpu_list, vcpu, vcpu, vc_vcpu_link);
542 		vcpu_deinit(vcpu);
543 
544 		pool_put(&vcpu_pool, vcpu);
545 		nvcpu++;
546 	}
547 
548 	vm_impl_deinit(vm);
549 
550 	/* teardown guest vmspace */
551 	KERNEL_LOCK();
552 	vm_vmspace = vm->vm_vmspace;
553 	if (vm_vmspace != NULL) {
554 		vm->vm_vmspace = NULL;
555 		uvmspace_free(vm_vmspace);
556 	}
557 	KERNEL_UNLOCK();
558 
559 	pool_put(&vm_pool, vm);
560 	*target = NULL;
561 }
562 
563 /*
564  * vm_get_info
565  *
566  * Returns information about the VM indicated by 'vip'. The 'vip_size' field
567  * in the 'vip' parameter is used to indicate the size of the caller's buffer.
568  * If insufficient space exists in that buffer, the required size needed is
569  * returned in vip_size and the number of VM information structures returned
570  * in vip_info_count is set to 0. The caller should then try the ioctl again
571  * after allocating a sufficiently large buffer.
572  *
573  * Parameters:
574  *  vip: information structure identifying the VM to query
575  *
576  * Return values:
577  *  0: the operation succeeded
578  *  ENOMEM: memory allocation error during processing
579  *  EFAULT: error copying data to user process
580  */
581 int
vm_get_info(struct vm_info_params * vip)582 vm_get_info(struct vm_info_params *vip)
583 {
584 	struct vm_info_result *out;
585 	struct vm *vm;
586 	struct vcpu *vcpu;
587 	int i = 0, j;
588 	size_t need, vm_ct;
589 
590 	rw_enter_read(&vmm_softc->vm_lock);
591 	vm_ct = vmm_softc->vm_ct;
592 	rw_exit_read(&vmm_softc->vm_lock);
593 
594 	need = vm_ct * sizeof(struct vm_info_result);
595 	if (vip->vip_size < need) {
596 		vip->vip_info_ct = 0;
597 		vip->vip_size = need;
598 		return (0);
599 	}
600 
601 	out = malloc(need, M_DEVBUF, M_NOWAIT|M_ZERO);
602 	if (out == NULL) {
603 		vip->vip_info_ct = 0;
604 		return (ENOMEM);
605 	}
606 
607 	vip->vip_info_ct = vm_ct;
608 
609 	rw_enter_read(&vmm_softc->vm_lock);
610 	SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) {
611 		refcnt_take(&vm->vm_refcnt);
612 
613 		out[i].vir_memory_size = vm->vm_memory_size;
614 		out[i].vir_used_size =
615 		    pmap_resident_count(vm->vm_map->pmap) * PAGE_SIZE;
616 		out[i].vir_ncpus = vm->vm_vcpu_ct;
617 		out[i].vir_id = vm->vm_id;
618 		out[i].vir_creator_pid = vm->vm_creator_pid;
619 		strlcpy(out[i].vir_name, vm->vm_name, VMM_MAX_NAME_LEN);
620 
621 		for (j = 0; j < vm->vm_vcpu_ct; j++) {
622 			out[i].vir_vcpu_state[j] = VCPU_STATE_UNKNOWN;
623 			SLIST_FOREACH(vcpu, &vm->vm_vcpu_list,
624 			    vc_vcpu_link) {
625 				if (vcpu->vc_id == j)
626 					out[i].vir_vcpu_state[j] =
627 					    vcpu->vc_state;
628 			}
629 		}
630 
631 		refcnt_rele_wake(&vm->vm_refcnt);
632 		i++;
633 		if (i == vm_ct)
634 			break;	/* Truncate to keep within bounds of 'out'. */
635 	}
636 	rw_exit_read(&vmm_softc->vm_lock);
637 
638 	if (copyout(out, vip->vip_info, need) == EFAULT) {
639 		free(out, M_DEVBUF, need);
640 		return (EFAULT);
641 	}
642 
643 	free(out, M_DEVBUF, need);
644 	return (0);
645 }
646 
647 /*
648  * vm_terminate
649  *
650  * Terminates the VM indicated by 'vtp'.
651  *
652  * Parameters:
653  *  vtp: structure defining the VM to terminate
654  *
655  * Return values:
656  *  0: the VM was terminated
657  *  !0: the VM could not be located
658  */
659 int
vm_terminate(struct vm_terminate_params * vtp)660 vm_terminate(struct vm_terminate_params *vtp)
661 {
662 	struct vm *vm;
663 	int error, nvcpu, vm_id;
664 
665 	/*
666 	 * Find desired VM
667 	 */
668 	error = vm_find(vtp->vtp_vm_id, &vm);
669 	if (error)
670 		return (error);
671 
672 	/* Pop the vm out of the global vm list. */
673 	rw_enter_write(&vmm_softc->vm_lock);
674 	SLIST_REMOVE(&vmm_softc->vm_list, vm, vm, vm_link);
675 	rw_exit_write(&vmm_softc->vm_lock);
676 
677 	/* Drop the vm_list's reference to the vm. */
678 	if (refcnt_rele(&vm->vm_refcnt))
679 		panic("%s: vm %d(%p) vm_list refcnt drop was the last",
680 		    __func__, vm->vm_id, vm);
681 
682 	/* Wait for our reference (taken from vm_find) is the last active. */
683 	refcnt_finalize(&vm->vm_refcnt, __func__);
684 
685 	vm_id = vm->vm_id;
686 	nvcpu = vm->vm_vcpu_ct;
687 
688 	vm_teardown(&vm);
689 
690 	if (vm_id > 0) {
691 		rw_enter_write(&vmm_softc->vm_lock);
692 		vmm_softc->vm_ct--;
693 		vmm_softc->vcpu_ct -= nvcpu;
694 		if (vmm_softc->vm_ct < 1)
695 			vmm_stop();
696 		rw_exit_write(&vmm_softc->vm_lock);
697 	}
698 
699 	return (0);
700 }
701 
702 /*
703  * vm_resetcpu
704  *
705  * Resets the vcpu defined in 'vrp' to power-on-init register state
706  *
707  * Parameters:
708  *  vrp: ioctl structure defining the vcpu to reset (see vmmvar.h)
709  *
710  * Returns 0 if successful, or various error codes on failure:
711  *  ENOENT if the VM id contained in 'vrp' refers to an unknown VM or
712  *      if vrp describes an unknown vcpu for this VM
713  *  EBUSY if the indicated VCPU is not stopped
714  *  EIO if the indicated VCPU failed to reset
715  */
716 int
vm_resetcpu(struct vm_resetcpu_params * vrp)717 vm_resetcpu(struct vm_resetcpu_params *vrp)
718 {
719 	struct vm *vm;
720 	struct vcpu *vcpu;
721 	int error, ret = 0;
722 
723 	/* Find the desired VM */
724 	error = vm_find(vrp->vrp_vm_id, &vm);
725 
726 	/* Not found? exit. */
727 	if (error != 0) {
728 		DPRINTF("%s: vm id %u not found\n", __func__,
729 		    vrp->vrp_vm_id);
730 		return (error);
731 	}
732 
733 	vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id);
734 
735 	if (vcpu == NULL) {
736 		DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
737 		    vrp->vrp_vcpu_id, vrp->vrp_vm_id);
738 		ret = ENOENT;
739 		goto out;
740 	}
741 
742 	rw_enter_write(&vcpu->vc_lock);
743 	if (vcpu->vc_state != VCPU_STATE_STOPPED)
744 		ret = EBUSY;
745 	else {
746 		if (vcpu_reset_regs(vcpu, &vrp->vrp_init_state)) {
747 			printf("%s: failed\n", __func__);
748 #ifdef VMM_DEBUG
749 			dump_vcpu(vcpu);
750 #endif /* VMM_DEBUG */
751 			ret = EIO;
752 		}
753 	}
754 	rw_exit_write(&vcpu->vc_lock);
755 out:
756 	refcnt_rele_wake(&vm->vm_refcnt);
757 
758 	return (ret);
759 }
760 
761 /*
762  * vcpu_must_stop
763  *
764  * Check if we need to (temporarily) stop running the VCPU for some reason,
765  * such as:
766  * - the VM was requested to terminate
767  * - the proc running this VCPU has pending signals
768  *
769  * Parameters:
770  *  vcpu: the VCPU to check
771  *
772  * Return values:
773  *  1: the VM owning this VCPU should stop
774  *  0: no stop is needed
775  */
776 int
vcpu_must_stop(struct vcpu * vcpu)777 vcpu_must_stop(struct vcpu *vcpu)
778 {
779 	struct proc *p = curproc;
780 
781 	if (vcpu->vc_state == VCPU_STATE_REQTERM)
782 		return (1);
783 	if (SIGPENDING(p) != 0)
784 		return (1);
785 	return (0);
786 }
787 
788 /*
789  * vm_share_mem
790  *
791  * Share a uvm mapping for the vm guest memory ranges into the calling process.
792  *
793  * Return values:
794  *  0: if successful
795  *  ENOENT: if the vm cannot be found by vm_find
796  *  EPERM: if the vm cannot be accessed by the current process
797  *  EINVAL: if the provide memory ranges fail checks
798  *  ENOMEM: if uvm_share fails to find available memory in the destination map
799  */
800 int
vm_share_mem(struct vm_sharemem_params * vsp,struct proc * p)801 vm_share_mem(struct vm_sharemem_params *vsp, struct proc *p)
802 {
803 	int ret = EINVAL;
804 	size_t i, n;
805 	struct vm *vm;
806 	struct vm_mem_range *src, *dst;
807 
808 	ret = vm_find(vsp->vsp_vm_id, &vm);
809 	if (ret)
810 		return (ret);
811 
812 	/* Check we have the expected number of ranges. */
813 	if (vm->vm_nmemranges != vsp->vsp_nmemranges)
814 		goto out;
815 	n = vm->vm_nmemranges;
816 
817 	/* Check their types, sizes, and gpa's (implying page alignment). */
818 	for (i = 0; i < n; i++) {
819 		src = &vm->vm_memranges[i];
820 		dst = &vsp->vsp_memranges[i];
821 
822 		/*
823 		 * The vm memranges were already checked during creation, so
824 		 * compare to them to confirm validity of mapping request.
825 		 */
826 		if (src->vmr_type != dst->vmr_type)
827 			goto out;
828 		if (src->vmr_gpa != dst->vmr_gpa)
829 			goto out;
830 		if (src->vmr_size != dst->vmr_size)
831 			goto out;
832 
833 		/* Check our intended destination is page-aligned. */
834 		if (dst->vmr_va & PAGE_MASK)
835 			goto out;
836 	}
837 
838 	/*
839 	 * Share each range individually with the calling process. We do
840 	 * not need PROC_EXEC as the emulated devices do not need to execute
841 	 * instructions from guest memory.
842 	 */
843 	for (i = 0; i < n; i++) {
844 		src = &vm->vm_memranges[i];
845 		dst = &vsp->vsp_memranges[i];
846 
847 		/* Skip MMIO range. */
848 		if (src->vmr_type == VM_MEM_MMIO)
849 			continue;
850 
851 		DPRINTF("sharing gpa=0x%lx for pid %d @ va=0x%lx\n",
852 		    src->vmr_gpa, p->p_p->ps_pid, dst->vmr_va);
853 		ret = uvm_share(&p->p_vmspace->vm_map, dst->vmr_va,
854 		    PROT_READ | PROT_WRITE, vm->vm_map, src->vmr_gpa,
855 		    src->vmr_size);
856 		if (ret) {
857 			printf("%s: uvm_share failed (%d)\n", __func__, ret);
858 			break;
859 		}
860 	}
861 	ret = 0;
862 out:
863 	refcnt_rele_wake(&vm->vm_refcnt);
864 	return (ret);
865 }
866