1 /* $OpenBSD: vm.c,v 1.110 2024/11/21 13:25:30 claudio Exp $ */
2
3 /*
4 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/param.h> /* PAGE_SIZE, MAXCOMLEN */
20 #include <sys/types.h>
21 #include <sys/ioctl.h>
22 #include <sys/mman.h>
23 #include <sys/resource.h>
24
25 #include <dev/vmm/vmm.h>
26
27 #include <errno.h>
28 #include <event.h>
29 #include <fcntl.h>
30 #include <imsg.h>
31 #include <poll.h>
32 #include <pthread.h>
33 #include <pthread_np.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <util.h>
39
40 #include "atomicio.h"
41 #include "pci.h"
42 #include "virtio.h"
43 #include "vmd.h"
44
45 #define MMIO_NOTYET 0
46
47 static int run_vm(struct vmop_create_params *, struct vcpu_reg_state *);
48 static void vm_dispatch_vmm(int, short, void *);
49 static void *event_thread(void *);
50 static void *vcpu_run_loop(void *);
51 static int vmm_create_vm(struct vmd_vm *);
52 static int alloc_guest_mem(struct vmd_vm *);
53 static int send_vm(int, struct vmd_vm *);
54 static int dump_vmr(int , struct vm_mem_range *);
55 static int dump_mem(int, struct vmd_vm *);
56 static void restore_vmr(int, struct vm_mem_range *);
57 static void restore_mem(int, struct vm_create_params *);
58 static int restore_vm_params(int, struct vm_create_params *);
59 static void pause_vm(struct vmd_vm *);
60 static void unpause_vm(struct vmd_vm *);
61 static int start_vm(struct vmd_vm *, int);
62
63 int con_fd;
64 struct vmd_vm *current_vm;
65
66 extern struct vmd *env;
67
68 extern char *__progname;
69
70 pthread_mutex_t threadmutex;
71 pthread_cond_t threadcond;
72
73 pthread_cond_t vcpu_run_cond[VMM_MAX_VCPUS_PER_VM];
74 pthread_mutex_t vcpu_run_mtx[VMM_MAX_VCPUS_PER_VM];
75 pthread_barrier_t vm_pause_barrier;
76 pthread_cond_t vcpu_unpause_cond[VMM_MAX_VCPUS_PER_VM];
77 pthread_mutex_t vcpu_unpause_mtx[VMM_MAX_VCPUS_PER_VM];
78
79 pthread_mutex_t vm_mtx;
80 uint8_t vcpu_hlt[VMM_MAX_VCPUS_PER_VM];
81 uint8_t vcpu_done[VMM_MAX_VCPUS_PER_VM];
82
83 /*
84 * vm_main
85 *
86 * Primary entrypoint for launching a vm. Does not return.
87 *
88 * fd: file descriptor for communicating with vmm process.
89 * fd_vmm: file descriptor for communicating with vmm(4) device
90 */
91 void
vm_main(int fd,int fd_vmm)92 vm_main(int fd, int fd_vmm)
93 {
94 struct vm_create_params *vcp = NULL;
95 struct vmd_vm vm;
96 size_t sz = 0;
97 int ret = 0;
98
99 /*
100 * The vm process relies on global state. Set the fd for /dev/vmm.
101 */
102 env->vmd_fd = fd_vmm;
103
104 /*
105 * We aren't root, so we can't chroot(2). Use unveil(2) instead.
106 */
107 if (unveil(env->argv0, "x") == -1)
108 fatal("unveil %s", env->argv0);
109 if (unveil(NULL, NULL) == -1)
110 fatal("unveil lock");
111
112 /*
113 * pledge in the vm processes:
114 * stdio - for malloc and basic I/O including events.
115 * vmm - for the vmm ioctls and operations.
116 * proc exec - fork/exec for launching devices.
117 * recvfd - for vm send/recv and sending fd to devices.
118 */
119 if (pledge("stdio vmm proc exec recvfd", NULL) == -1)
120 fatal("pledge");
121
122 /* Receive our vm configuration. */
123 memset(&vm, 0, sizeof(vm));
124 sz = atomicio(read, fd, &vm, sizeof(vm));
125 if (sz != sizeof(vm)) {
126 log_warnx("failed to receive start message");
127 _exit(EIO);
128 }
129
130 /* Update process with the vm name. */
131 vcp = &vm.vm_params.vmc_params;
132 setproctitle("%s", vcp->vcp_name);
133 log_procinit("vm/%s", vcp->vcp_name);
134
135 /* Receive the local prefix settings. */
136 sz = atomicio(read, fd, &env->vmd_cfg.cfg_localprefix,
137 sizeof(env->vmd_cfg.cfg_localprefix));
138 if (sz != sizeof(env->vmd_cfg.cfg_localprefix)) {
139 log_warnx("failed to receive local prefix");
140 _exit(EIO);
141 }
142
143 /*
144 * We need, at minimum, a vm_kernel fd to boot a vm. This is either a
145 * kernel or a BIOS image.
146 */
147 if (!(vm.vm_state & VM_STATE_RECEIVED)) {
148 if (vm.vm_kernel == -1) {
149 log_warnx("%s: failed to receive boot fd",
150 vcp->vcp_name);
151 _exit(EINVAL);
152 }
153 }
154
155 if (vcp->vcp_sev && env->vmd_psp_fd < 0) {
156 log_warnx("%s not available", PSP_NODE);
157 _exit(EINVAL);
158 }
159
160 ret = start_vm(&vm, fd);
161 _exit(ret);
162 }
163
164 /*
165 * start_vm
166 *
167 * After forking a new VM process, starts the new VM with the creation
168 * parameters supplied (in the incoming vm->vm_params field). This
169 * function performs a basic sanity check on the incoming parameters
170 * and then performs the following steps to complete the creation of the VM:
171 *
172 * 1. validates and create the new VM
173 * 2. opens the imsg control channel to the parent and drops more privilege
174 * 3. drops additional privileges by calling pledge(2)
175 * 4. loads the kernel from the disk image or file descriptor
176 * 5. runs the VM's VCPU loops.
177 *
178 * Parameters:
179 * vm: The VM data structure that is including the VM create parameters.
180 * fd: The imsg socket that is connected to the parent process.
181 *
182 * Return values:
183 * 0: success
184 * !0 : failure - typically an errno indicating the source of the failure
185 */
186 int
start_vm(struct vmd_vm * vm,int fd)187 start_vm(struct vmd_vm *vm, int fd)
188 {
189 struct vmop_create_params *vmc = &vm->vm_params;
190 struct vm_create_params *vcp = &vmc->vmc_params;
191 struct vcpu_reg_state vrs;
192 int nicfds[VM_MAX_NICS_PER_VM];
193 int ret;
194 size_t i;
195 struct vm_rwregs_params vrp;
196
197 /*
198 * We first try to initialize and allocate memory before bothering
199 * vmm(4) with a request to create a new vm.
200 */
201 if (!(vm->vm_state & VM_STATE_RECEIVED))
202 create_memory_map(vcp);
203
204 ret = alloc_guest_mem(vm);
205 if (ret) {
206 struct rlimit lim;
207 char buf[FMT_SCALED_STRSIZE];
208 if (ret == ENOMEM && getrlimit(RLIMIT_DATA, &lim) == 0) {
209 if (fmt_scaled(lim.rlim_cur, buf) == 0)
210 fatalx("could not allocate guest memory (data "
211 "limit is %s)", buf);
212 }
213 errno = ret;
214 log_warn("could not allocate guest memory");
215 return (ret);
216 }
217
218 /* We've allocated guest memory, so now create the vm in vmm(4). */
219 ret = vmm_create_vm(vm);
220 if (ret) {
221 /* Let the vmm process know we failed by sending a 0 vm id. */
222 vcp->vcp_id = 0;
223 atomicio(vwrite, fd, &vcp->vcp_id, sizeof(vcp->vcp_id));
224 return (ret);
225 }
226
227 /* Setup SEV. */
228 ret = sev_init(vm);
229 if (ret) {
230 log_warnx("could not initialize SEV");
231 return (ret);
232 }
233
234 /*
235 * Some of vmd currently relies on global state (current_vm, con_fd).
236 */
237 current_vm = vm;
238 con_fd = vm->vm_tty;
239 if (fcntl(con_fd, F_SETFL, O_NONBLOCK) == -1) {
240 log_warn("failed to set nonblocking mode on console");
241 return (1);
242 }
243
244 /*
245 * We now let the vmm process know we were successful by sending it our
246 * vmm(4) assigned vm id.
247 */
248 if (atomicio(vwrite, fd, &vcp->vcp_id, sizeof(vcp->vcp_id)) !=
249 sizeof(vcp->vcp_id)) {
250 log_warn("failed to send created vm id to vmm process");
251 return (1);
252 }
253
254 /* Prepare either our boot image or receive an existing vm to launch. */
255 if (vm->vm_state & VM_STATE_RECEIVED) {
256 ret = atomicio(read, vm->vm_receive_fd, &vrp, sizeof(vrp));
257 if (ret != sizeof(vrp))
258 fatal("received incomplete vrp - exiting");
259 vrs = vrp.vrwp_regs;
260 } else if (load_firmware(vm, &vrs))
261 fatalx("failed to load kernel or firmware image");
262
263 if (vm->vm_kernel != -1)
264 close_fd(vm->vm_kernel);
265
266 /* Initialize our mutexes. */
267 ret = pthread_mutex_init(&threadmutex, NULL);
268 if (ret) {
269 log_warn("%s: could not initialize thread state mutex",
270 __func__);
271 return (ret);
272 }
273 ret = pthread_cond_init(&threadcond, NULL);
274 if (ret) {
275 log_warn("%s: could not initialize thread state "
276 "condition variable", __func__);
277 return (ret);
278 }
279 ret = pthread_mutex_init(&vm_mtx, NULL);
280 if (ret) {
281 log_warn("%s: could not initialize vm state mutex",
282 __func__);
283 return (ret);
284 }
285
286 /* Lock thread mutex now. It's unlocked when waiting on threadcond. */
287 mutex_lock(&threadmutex);
288
289 /*
290 * Finalize our communication socket with the vmm process. From here
291 * onwards, communication with the vmm process is event-based.
292 */
293 event_init();
294 if (vmm_pipe(vm, fd, vm_dispatch_vmm) == -1)
295 fatal("setup vm pipe");
296
297 /*
298 * Initialize or restore our emulated hardware.
299 */
300 for (i = 0; i < VMM_MAX_NICS_PER_VM; i++)
301 nicfds[i] = vm->vm_ifs[i].vif_fd;
302
303 if (vm->vm_state & VM_STATE_RECEIVED) {
304 restore_mem(vm->vm_receive_fd, vcp);
305 restore_emulated_hw(vcp, vm->vm_receive_fd, nicfds,
306 vm->vm_disks, vm->vm_cdrom);
307 if (restore_vm_params(vm->vm_receive_fd, vcp))
308 fatal("restore vm params failed");
309 unpause_vm(vm);
310 } else
311 init_emulated_hw(vmc, vm->vm_cdrom, vm->vm_disks, nicfds);
312
313 /* Drop privleges further before starting the vcpu run loop(s). */
314 if (pledge("stdio vmm recvfd", NULL) == -1)
315 fatal("pledge");
316
317 /*
318 * Execute the vcpu run loop(s) for this VM.
319 */
320 ret = run_vm(&vm->vm_params, &vrs);
321
322 /* Shutdown SEV. */
323 if (sev_shutdown(vm))
324 log_warnx("%s: could not shutdown SEV", __func__);
325
326 /* Ensure that any in-flight data is written back */
327 virtio_shutdown(vm);
328
329 return (ret);
330 }
331
332 /*
333 * vm_dispatch_vmm
334 *
335 * imsg callback for messages that are received from the vmm parent process.
336 */
337 void
vm_dispatch_vmm(int fd,short event,void * arg)338 vm_dispatch_vmm(int fd, short event, void *arg)
339 {
340 struct vmd_vm *vm = arg;
341 struct vmop_result vmr;
342 struct vmop_addr_result var;
343 struct imsgev *iev = &vm->vm_iev;
344 struct imsgbuf *ibuf = &iev->ibuf;
345 struct imsg imsg;
346 ssize_t n;
347 int verbose;
348
349 if (event & EV_READ) {
350 if ((n = imsgbuf_read(ibuf)) == -1)
351 fatal("%s: imsgbuf_read", __func__);
352 if (n == 0)
353 _exit(0);
354 }
355
356 if (event & EV_WRITE) {
357 if (imsgbuf_write(ibuf) == -1) {
358 if (errno == EPIPE)
359 _exit(0);
360 fatal("%s: imsgbuf_write fd %d", __func__, ibuf->fd);
361 }
362 }
363
364 for (;;) {
365 if ((n = imsg_get(ibuf, &imsg)) == -1)
366 fatal("%s: imsg_get", __func__);
367 if (n == 0)
368 break;
369
370 #if DEBUG > 1
371 log_debug("%s: got imsg %d from %s",
372 __func__, imsg.hdr.type,
373 vm->vm_params.vmc_params.vcp_name);
374 #endif
375
376 switch (imsg.hdr.type) {
377 case IMSG_CTL_VERBOSE:
378 IMSG_SIZE_CHECK(&imsg, &verbose);
379 memcpy(&verbose, imsg.data, sizeof(verbose));
380 log_setverbose(verbose);
381 virtio_broadcast_imsg(vm, IMSG_CTL_VERBOSE, &verbose,
382 sizeof(verbose));
383 break;
384 case IMSG_VMDOP_VM_SHUTDOWN:
385 if (vmmci_ctl(VMMCI_SHUTDOWN) == -1)
386 _exit(0);
387 break;
388 case IMSG_VMDOP_VM_REBOOT:
389 if (vmmci_ctl(VMMCI_REBOOT) == -1)
390 _exit(0);
391 break;
392 case IMSG_VMDOP_PAUSE_VM:
393 vmr.vmr_result = 0;
394 vmr.vmr_id = vm->vm_vmid;
395 pause_vm(vm);
396 imsg_compose_event(&vm->vm_iev,
397 IMSG_VMDOP_PAUSE_VM_RESPONSE,
398 imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr,
399 sizeof(vmr));
400 break;
401 case IMSG_VMDOP_UNPAUSE_VM:
402 vmr.vmr_result = 0;
403 vmr.vmr_id = vm->vm_vmid;
404 unpause_vm(vm);
405 imsg_compose_event(&vm->vm_iev,
406 IMSG_VMDOP_UNPAUSE_VM_RESPONSE,
407 imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr,
408 sizeof(vmr));
409 break;
410 case IMSG_VMDOP_SEND_VM_REQUEST:
411 vmr.vmr_id = vm->vm_vmid;
412 vmr.vmr_result = send_vm(imsg_get_fd(&imsg), vm);
413 imsg_compose_event(&vm->vm_iev,
414 IMSG_VMDOP_SEND_VM_RESPONSE,
415 imsg.hdr.peerid, imsg.hdr.pid, -1, &vmr,
416 sizeof(vmr));
417 if (!vmr.vmr_result) {
418 imsgbuf_flush(¤t_vm->vm_iev.ibuf);
419 _exit(0);
420 }
421 break;
422 case IMSG_VMDOP_PRIV_GET_ADDR_RESPONSE:
423 IMSG_SIZE_CHECK(&imsg, &var);
424 memcpy(&var, imsg.data, sizeof(var));
425
426 log_debug("%s: received tap addr %s for nic %d",
427 vm->vm_params.vmc_params.vcp_name,
428 ether_ntoa((void *)var.var_addr), var.var_nic_idx);
429
430 vionet_set_hostmac(vm, var.var_nic_idx, var.var_addr);
431 break;
432 default:
433 fatalx("%s: got invalid imsg %d from %s",
434 __func__, imsg.hdr.type,
435 vm->vm_params.vmc_params.vcp_name);
436 }
437 imsg_free(&imsg);
438 }
439 imsg_event_add(iev);
440 }
441
442 /*
443 * vm_shutdown
444 *
445 * Tell the vmm parent process to shutdown or reboot the VM and exit.
446 */
447 __dead void
vm_shutdown(unsigned int cmd)448 vm_shutdown(unsigned int cmd)
449 {
450 switch (cmd) {
451 case VMMCI_NONE:
452 case VMMCI_SHUTDOWN:
453 (void)imsg_compose_event(¤t_vm->vm_iev,
454 IMSG_VMDOP_VM_SHUTDOWN, 0, 0, -1, NULL, 0);
455 break;
456 case VMMCI_REBOOT:
457 (void)imsg_compose_event(¤t_vm->vm_iev,
458 IMSG_VMDOP_VM_REBOOT, 0, 0, -1, NULL, 0);
459 break;
460 default:
461 fatalx("invalid vm ctl command: %d", cmd);
462 }
463 imsgbuf_flush(¤t_vm->vm_iev.ibuf);
464
465 if (sev_shutdown(current_vm))
466 log_warnx("%s: could not shutdown SEV", __func__);
467
468 _exit(0);
469 }
470
471 int
send_vm(int fd,struct vmd_vm * vm)472 send_vm(int fd, struct vmd_vm *vm)
473 {
474 struct vm_rwregs_params vrp;
475 struct vm_rwvmparams_params vpp;
476 struct vmop_create_params *vmc;
477 struct vm_terminate_params vtp;
478 unsigned int flags = 0;
479 unsigned int i;
480 int ret = 0;
481 size_t sz;
482
483 if (dump_send_header(fd)) {
484 log_warnx("%s: failed to send vm dump header", __func__);
485 goto err;
486 }
487
488 pause_vm(vm);
489
490 vmc = calloc(1, sizeof(struct vmop_create_params));
491 if (vmc == NULL) {
492 log_warn("%s: calloc error getting vmc", __func__);
493 ret = -1;
494 goto err;
495 }
496
497 flags |= VMOP_CREATE_MEMORY;
498 memcpy(&vmc->vmc_params, ¤t_vm->vm_params, sizeof(struct
499 vmop_create_params));
500 vmc->vmc_flags = flags;
501 vrp.vrwp_vm_id = vm->vm_params.vmc_params.vcp_id;
502 vrp.vrwp_mask = VM_RWREGS_ALL;
503 vpp.vpp_mask = VM_RWVMPARAMS_ALL;
504 vpp.vpp_vm_id = vm->vm_params.vmc_params.vcp_id;
505
506 sz = atomicio(vwrite, fd, vmc, sizeof(struct vmop_create_params));
507 if (sz != sizeof(struct vmop_create_params)) {
508 ret = -1;
509 goto err;
510 }
511
512 for (i = 0; i < vm->vm_params.vmc_params.vcp_ncpus; i++) {
513 vrp.vrwp_vcpu_id = i;
514 if ((ret = ioctl(env->vmd_fd, VMM_IOC_READREGS, &vrp))) {
515 log_warn("%s: readregs failed", __func__);
516 goto err;
517 }
518
519 sz = atomicio(vwrite, fd, &vrp,
520 sizeof(struct vm_rwregs_params));
521 if (sz != sizeof(struct vm_rwregs_params)) {
522 log_warn("%s: dumping registers failed", __func__);
523 ret = -1;
524 goto err;
525 }
526 }
527
528 /* Dump memory before devices to aid in restoration. */
529 if ((ret = dump_mem(fd, vm)))
530 goto err;
531 if ((ret = dump_devs(fd)))
532 goto err;
533 if ((ret = pci_dump(fd)))
534 goto err;
535 if ((ret = virtio_dump(fd)))
536 goto err;
537
538 for (i = 0; i < vm->vm_params.vmc_params.vcp_ncpus; i++) {
539 vpp.vpp_vcpu_id = i;
540 if ((ret = ioctl(env->vmd_fd, VMM_IOC_READVMPARAMS, &vpp))) {
541 log_warn("%s: readvmparams failed", __func__);
542 goto err;
543 }
544
545 sz = atomicio(vwrite, fd, &vpp,
546 sizeof(struct vm_rwvmparams_params));
547 if (sz != sizeof(struct vm_rwvmparams_params)) {
548 log_warn("%s: dumping vm params failed", __func__);
549 ret = -1;
550 goto err;
551 }
552 }
553
554 vtp.vtp_vm_id = vm->vm_params.vmc_params.vcp_id;
555 if (ioctl(env->vmd_fd, VMM_IOC_TERM, &vtp) == -1) {
556 log_warnx("%s: term IOC error: %d, %d", __func__,
557 errno, ENOENT);
558 }
559 err:
560 close(fd);
561 if (ret)
562 unpause_vm(vm);
563 return ret;
564 }
565
566 int
dump_mem(int fd,struct vmd_vm * vm)567 dump_mem(int fd, struct vmd_vm *vm)
568 {
569 unsigned int i;
570 int ret;
571 struct vm_mem_range *vmr;
572
573 for (i = 0; i < vm->vm_params.vmc_params.vcp_nmemranges; i++) {
574 vmr = &vm->vm_params.vmc_params.vcp_memranges[i];
575 ret = dump_vmr(fd, vmr);
576 if (ret)
577 return ret;
578 }
579 return (0);
580 }
581
582 int
restore_vm_params(int fd,struct vm_create_params * vcp)583 restore_vm_params(int fd, struct vm_create_params *vcp) {
584 unsigned int i;
585 struct vm_rwvmparams_params vpp;
586
587 for (i = 0; i < vcp->vcp_ncpus; i++) {
588 if (atomicio(read, fd, &vpp, sizeof(vpp)) != sizeof(vpp)) {
589 log_warn("%s: error restoring vm params", __func__);
590 return (-1);
591 }
592 vpp.vpp_vm_id = vcp->vcp_id;
593 vpp.vpp_vcpu_id = i;
594 if (ioctl(env->vmd_fd, VMM_IOC_WRITEVMPARAMS, &vpp) < 0) {
595 log_debug("%s: writing vm params failed", __func__);
596 return (-1);
597 }
598 }
599 return (0);
600 }
601
602 void
restore_mem(int fd,struct vm_create_params * vcp)603 restore_mem(int fd, struct vm_create_params *vcp)
604 {
605 unsigned int i;
606 struct vm_mem_range *vmr;
607
608 for (i = 0; i < vcp->vcp_nmemranges; i++) {
609 vmr = &vcp->vcp_memranges[i];
610 restore_vmr(fd, vmr);
611 }
612 }
613
614 int
dump_vmr(int fd,struct vm_mem_range * vmr)615 dump_vmr(int fd, struct vm_mem_range *vmr)
616 {
617 size_t rem = vmr->vmr_size, read=0;
618 char buf[PAGE_SIZE];
619
620 while (rem > 0) {
621 if (read_mem(vmr->vmr_gpa + read, buf, PAGE_SIZE)) {
622 log_warn("failed to read vmr");
623 return (-1);
624 }
625 if (atomicio(vwrite, fd, buf, sizeof(buf)) != sizeof(buf)) {
626 log_warn("failed to dump vmr");
627 return (-1);
628 }
629 rem = rem - PAGE_SIZE;
630 read = read + PAGE_SIZE;
631 }
632 return (0);
633 }
634
635 void
restore_vmr(int fd,struct vm_mem_range * vmr)636 restore_vmr(int fd, struct vm_mem_range *vmr)
637 {
638 size_t rem = vmr->vmr_size, wrote=0;
639 char buf[PAGE_SIZE];
640
641 while (rem > 0) {
642 if (atomicio(read, fd, buf, sizeof(buf)) != sizeof(buf))
643 fatal("failed to restore vmr");
644 if (write_mem(vmr->vmr_gpa + wrote, buf, PAGE_SIZE))
645 fatal("failed to write vmr");
646 rem = rem - PAGE_SIZE;
647 wrote = wrote + PAGE_SIZE;
648 }
649 }
650
651 static void
pause_vm(struct vmd_vm * vm)652 pause_vm(struct vmd_vm *vm)
653 {
654 unsigned int n;
655 int ret;
656
657 mutex_lock(&vm_mtx);
658 if (vm->vm_state & VM_STATE_PAUSED) {
659 mutex_unlock(&vm_mtx);
660 return;
661 }
662 current_vm->vm_state |= VM_STATE_PAUSED;
663 mutex_unlock(&vm_mtx);
664
665 ret = pthread_barrier_init(&vm_pause_barrier, NULL,
666 vm->vm_params.vmc_params.vcp_ncpus + 1);
667 if (ret) {
668 log_warnx("%s: cannot initialize pause barrier (%d)",
669 __progname, ret);
670 return;
671 }
672
673 for (n = 0; n < vm->vm_params.vmc_params.vcp_ncpus; n++) {
674 ret = pthread_cond_broadcast(&vcpu_run_cond[n]);
675 if (ret) {
676 log_warnx("%s: can't broadcast vcpu run cond (%d)",
677 __func__, (int)ret);
678 return;
679 }
680 }
681 ret = pthread_barrier_wait(&vm_pause_barrier);
682 if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD) {
683 log_warnx("%s: could not wait on pause barrier (%d)",
684 __func__, (int)ret);
685 return;
686 }
687
688 ret = pthread_barrier_destroy(&vm_pause_barrier);
689 if (ret) {
690 log_warnx("%s: could not destroy pause barrier (%d)",
691 __progname, ret);
692 return;
693 }
694
695 pause_vm_md(vm);
696 }
697
698 static void
unpause_vm(struct vmd_vm * vm)699 unpause_vm(struct vmd_vm *vm)
700 {
701 unsigned int n;
702 int ret;
703
704 mutex_lock(&vm_mtx);
705 if (!(vm->vm_state & VM_STATE_PAUSED)) {
706 mutex_unlock(&vm_mtx);
707 return;
708 }
709 current_vm->vm_state &= ~VM_STATE_PAUSED;
710 mutex_unlock(&vm_mtx);
711
712 for (n = 0; n < vm->vm_params.vmc_params.vcp_ncpus; n++) {
713 ret = pthread_cond_broadcast(&vcpu_unpause_cond[n]);
714 if (ret) {
715 log_warnx("%s: can't broadcast vcpu unpause cond (%d)",
716 __func__, (int)ret);
717 return;
718 }
719 }
720
721 unpause_vm_md(vm);
722 }
723
724 /*
725 * vcpu_reset
726 *
727 * Requests vmm(4) to reset the VCPUs in the indicated VM to
728 * the register state provided
729 *
730 * Parameters
731 * vmid: VM ID to reset
732 * vcpu_id: VCPU ID to reset
733 * vrs: the register state to initialize
734 *
735 * Return values:
736 * 0: success
737 * !0 : ioctl to vmm(4) failed (eg, ENOENT if the supplied VM ID is not
738 * valid)
739 */
740 int
vcpu_reset(uint32_t vmid,uint32_t vcpu_id,struct vcpu_reg_state * vrs)741 vcpu_reset(uint32_t vmid, uint32_t vcpu_id, struct vcpu_reg_state *vrs)
742 {
743 struct vm_resetcpu_params vrp;
744
745 memset(&vrp, 0, sizeof(vrp));
746 vrp.vrp_vm_id = vmid;
747 vrp.vrp_vcpu_id = vcpu_id;
748 memcpy(&vrp.vrp_init_state, vrs, sizeof(struct vcpu_reg_state));
749
750 log_debug("%s: resetting vcpu %d for vm %d", __func__, vcpu_id, vmid);
751
752 if (ioctl(env->vmd_fd, VMM_IOC_RESETCPU, &vrp) == -1)
753 return (errno);
754
755 return (0);
756 }
757
758 /*
759 * alloc_guest_mem
760 *
761 * Allocates memory for the guest.
762 * Instead of doing a single allocation with one mmap(), we allocate memory
763 * separately for every range for the following reasons:
764 * - ASLR for the individual ranges
765 * - to reduce memory consumption in the UVM subsystem: if vmm(4) had to
766 * map the single mmap'd userspace memory to the individual guest physical
767 * memory ranges, the underlying amap of the single mmap'd range would have
768 * to allocate per-page reference counters. The reason is that the
769 * individual guest physical ranges would reference the single mmap'd region
770 * only partially. However, if every guest physical range has its own
771 * corresponding mmap'd userspace allocation, there are no partial
772 * references: every guest physical range fully references an mmap'd
773 * range => no per-page reference counters have to be allocated.
774 *
775 * Return values:
776 * 0: success
777 * !0: failure - errno indicating the source of the failure
778 */
779 int
alloc_guest_mem(struct vmd_vm * vm)780 alloc_guest_mem(struct vmd_vm *vm)
781 {
782 void *p;
783 int ret = 0;
784 size_t i, j;
785 struct vm_create_params *vcp = &vm->vm_params.vmc_params;
786 struct vm_mem_range *vmr;
787
788 for (i = 0; i < vcp->vcp_nmemranges; i++) {
789 vmr = &vcp->vcp_memranges[i];
790
791 /*
792 * We only need R/W as userland. vmm(4) will use R/W/X in its
793 * mapping.
794 *
795 * We must use MAP_SHARED so emulated devices will be able
796 * to generate shared mappings.
797 */
798 p = mmap(NULL, vmr->vmr_size, PROT_READ | PROT_WRITE,
799 MAP_ANON | MAP_CONCEAL | MAP_SHARED, -1, 0);
800 if (p == MAP_FAILED) {
801 ret = errno;
802 for (j = 0; j < i; j++) {
803 vmr = &vcp->vcp_memranges[j];
804 munmap((void *)vmr->vmr_va, vmr->vmr_size);
805 }
806 return (ret);
807 }
808 vmr->vmr_va = (vaddr_t)p;
809 }
810
811 return (ret);
812 }
813
814 /*
815 * vmm_create_vm
816 *
817 * Requests vmm(4) to create a new VM using the supplied creation
818 * parameters. This operation results in the creation of the in-kernel
819 * structures for the VM, but does not start the VM's vcpu(s).
820 *
821 * Parameters:
822 * vm: pointer to the vm object
823 *
824 * Return values:
825 * 0: success
826 * !0 : ioctl to vmm(4) failed
827 */
828 static int
vmm_create_vm(struct vmd_vm * vm)829 vmm_create_vm(struct vmd_vm *vm)
830 {
831 struct vm_create_params *vcp = &vm->vm_params.vmc_params;
832 size_t i;
833
834 /* Sanity check arguments */
835 if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM)
836 return (EINVAL);
837
838 if (vcp->vcp_nmemranges == 0 ||
839 vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES)
840 return (EINVAL);
841
842 if (vm->vm_params.vmc_ndisks > VM_MAX_DISKS_PER_VM)
843 return (EINVAL);
844
845 if (vm->vm_params.vmc_nnics > VM_MAX_NICS_PER_VM)
846 return (EINVAL);
847
848 if (ioctl(env->vmd_fd, VMM_IOC_CREATE, vcp) == -1)
849 return (errno);
850
851 for (i = 0; i < vcp->vcp_ncpus; i++)
852 vm->vm_sev_asid[i] = vcp->vcp_asid[i];
853
854 return (0);
855 }
856
857
858 /*
859 * run_vm
860 *
861 * Runs the VM whose creation parameters are specified in vcp
862 *
863 * Parameters:
864 * child_cdrom: previously-opened child ISO disk file descriptor
865 * child_disks: previously-opened child VM disk file file descriptors
866 * child_taps: previously-opened child tap file descriptors
867 * vmc: vmop_create_params struct containing the VM's desired creation
868 * configuration
869 * vrs: VCPU register state to initialize
870 *
871 * Return values:
872 * 0: the VM exited normally
873 * !0 : the VM exited abnormally or failed to start
874 */
875 static int
run_vm(struct vmop_create_params * vmc,struct vcpu_reg_state * vrs)876 run_vm(struct vmop_create_params *vmc, struct vcpu_reg_state *vrs)
877 {
878 struct vm_create_params *vcp = &vmc->vmc_params;
879 struct vm_rwregs_params vregsp;
880 uint8_t evdone = 0;
881 size_t i;
882 int ret;
883 pthread_t *tid, evtid;
884 char tname[MAXCOMLEN + 1];
885 struct vm_run_params **vrp;
886 void *exit_status;
887
888 if (vcp == NULL)
889 return (EINVAL);
890
891 if (vcp->vcp_nmemranges == 0 ||
892 vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES)
893 return (EINVAL);
894
895 tid = calloc(vcp->vcp_ncpus, sizeof(pthread_t));
896 vrp = calloc(vcp->vcp_ncpus, sizeof(struct vm_run_params *));
897 if (tid == NULL || vrp == NULL) {
898 log_warn("%s: memory allocation error - exiting.",
899 __progname);
900 return (ENOMEM);
901 }
902
903 log_debug("%s: starting %zu vcpu thread(s) for vm %s", __func__,
904 vcp->vcp_ncpus, vcp->vcp_name);
905
906 /*
907 * Create and launch one thread for each VCPU. These threads may
908 * migrate between PCPUs over time; the need to reload CPU state
909 * in such situations is detected and performed by vmm(4) in the
910 * kernel.
911 */
912 for (i = 0 ; i < vcp->vcp_ncpus; i++) {
913 vrp[i] = malloc(sizeof(struct vm_run_params));
914 if (vrp[i] == NULL) {
915 log_warn("%s: memory allocation error - "
916 "exiting.", __progname);
917 /* caller will exit, so skip freeing */
918 return (ENOMEM);
919 }
920 vrp[i]->vrp_exit = malloc(sizeof(struct vm_exit));
921 if (vrp[i]->vrp_exit == NULL) {
922 log_warn("%s: memory allocation error - "
923 "exiting.", __progname);
924 /* caller will exit, so skip freeing */
925 return (ENOMEM);
926 }
927 vrp[i]->vrp_vm_id = vcp->vcp_id;
928 vrp[i]->vrp_vcpu_id = i;
929
930 if (vcpu_reset(vcp->vcp_id, i, vrs)) {
931 log_warnx("%s: cannot reset VCPU %zu - exiting.",
932 __progname, i);
933 return (EIO);
934 }
935
936 if (sev_activate(current_vm, i)) {
937 log_warnx("%s: SEV activatation failed for VCPU "
938 "%zu failed - exiting.", __progname, i);
939 return (EIO);
940 }
941
942 if (sev_encrypt_memory(current_vm)) {
943 log_warnx("%s: memory encryption failed for VCPU "
944 "%zu failed - exiting.", __progname, i);
945 return (EIO);
946 }
947
948 /* once more because reset_cpu changes regs */
949 if (current_vm->vm_state & VM_STATE_RECEIVED) {
950 vregsp.vrwp_vm_id = vcp->vcp_id;
951 vregsp.vrwp_vcpu_id = i;
952 vregsp.vrwp_regs = *vrs;
953 vregsp.vrwp_mask = VM_RWREGS_ALL;
954 if ((ret = ioctl(env->vmd_fd, VMM_IOC_WRITEREGS,
955 &vregsp)) == -1) {
956 log_warn("%s: writeregs failed", __func__);
957 return (ret);
958 }
959 }
960
961 ret = pthread_cond_init(&vcpu_run_cond[i], NULL);
962 if (ret) {
963 log_warnx("%s: cannot initialize cond var (%d)",
964 __progname, ret);
965 return (ret);
966 }
967
968 ret = pthread_mutex_init(&vcpu_run_mtx[i], NULL);
969 if (ret) {
970 log_warnx("%s: cannot initialize mtx (%d)",
971 __progname, ret);
972 return (ret);
973 }
974
975 ret = pthread_cond_init(&vcpu_unpause_cond[i], NULL);
976 if (ret) {
977 log_warnx("%s: cannot initialize unpause var (%d)",
978 __progname, ret);
979 return (ret);
980 }
981
982 ret = pthread_mutex_init(&vcpu_unpause_mtx[i], NULL);
983 if (ret) {
984 log_warnx("%s: cannot initialize unpause mtx (%d)",
985 __progname, ret);
986 return (ret);
987 }
988
989 vcpu_hlt[i] = 0;
990
991 /* Start each VCPU run thread at vcpu_run_loop */
992 ret = pthread_create(&tid[i], NULL, vcpu_run_loop, vrp[i]);
993 if (ret) {
994 /* caller will _exit after this return */
995 ret = errno;
996 log_warn("%s: could not create vcpu thread %zu",
997 __func__, i);
998 return (ret);
999 }
1000
1001 snprintf(tname, sizeof(tname), "vcpu-%zu", i);
1002 pthread_set_name_np(tid[i], tname);
1003 }
1004
1005 log_debug("%s: waiting on events for VM %s", __func__, vcp->vcp_name);
1006 ret = pthread_create(&evtid, NULL, event_thread, &evdone);
1007 if (ret) {
1008 errno = ret;
1009 log_warn("%s: could not create event thread", __func__);
1010 return (ret);
1011 }
1012 pthread_set_name_np(evtid, "event");
1013
1014 for (;;) {
1015 ret = pthread_cond_wait(&threadcond, &threadmutex);
1016 if (ret) {
1017 log_warn("%s: waiting on thread state condition "
1018 "variable failed", __func__);
1019 return (ret);
1020 }
1021
1022 /*
1023 * Did a VCPU thread exit with an error? => return the first one
1024 */
1025 mutex_lock(&vm_mtx);
1026 for (i = 0; i < vcp->vcp_ncpus; i++) {
1027 if (vcpu_done[i] == 0)
1028 continue;
1029
1030 if (pthread_join(tid[i], &exit_status)) {
1031 log_warn("%s: failed to join thread %zd - "
1032 "exiting", __progname, i);
1033 mutex_unlock(&vm_mtx);
1034 return (EIO);
1035 }
1036
1037 ret = (intptr_t)exit_status;
1038 }
1039 mutex_unlock(&vm_mtx);
1040
1041 /* Did the event thread exit? => return with an error */
1042 if (evdone) {
1043 if (pthread_join(evtid, &exit_status)) {
1044 log_warn("%s: failed to join event thread - "
1045 "exiting", __progname);
1046 return (EIO);
1047 }
1048
1049 log_warnx("%s: vm %d event thread exited "
1050 "unexpectedly", __progname, vcp->vcp_id);
1051 return (EIO);
1052 }
1053
1054 /* Did all VCPU threads exit successfully? => return */
1055 mutex_lock(&vm_mtx);
1056 for (i = 0; i < vcp->vcp_ncpus; i++) {
1057 if (vcpu_done[i] == 0)
1058 break;
1059 }
1060 mutex_unlock(&vm_mtx);
1061 if (i == vcp->vcp_ncpus)
1062 return (ret);
1063
1064 /* Some more threads to wait for, start over */
1065 }
1066
1067 return (ret);
1068 }
1069
1070 static void *
event_thread(void * arg)1071 event_thread(void *arg)
1072 {
1073 uint8_t *donep = arg;
1074 intptr_t ret;
1075
1076 ret = event_dispatch();
1077
1078 *donep = 1;
1079
1080 mutex_lock(&threadmutex);
1081 pthread_cond_signal(&threadcond);
1082 mutex_unlock(&threadmutex);
1083
1084 return (void *)ret;
1085 }
1086
1087 /*
1088 * vcpu_run_loop
1089 *
1090 * Runs a single VCPU until vmm(4) requires help handling an exit,
1091 * or the VM terminates.
1092 *
1093 * Parameters:
1094 * arg: vcpu_run_params for the VCPU being run by this thread
1095 *
1096 * Return values:
1097 * NULL: the VCPU shutdown properly
1098 * !NULL: error processing VCPU run, or the VCPU shutdown abnormally
1099 */
1100 static void *
vcpu_run_loop(void * arg)1101 vcpu_run_loop(void *arg)
1102 {
1103 struct vm_run_params *vrp = (struct vm_run_params *)arg;
1104 intptr_t ret = 0;
1105 uint32_t n = vrp->vrp_vcpu_id;
1106 int paused = 0, halted = 0;
1107
1108 for (;;) {
1109 ret = pthread_mutex_lock(&vcpu_run_mtx[n]);
1110
1111 if (ret) {
1112 log_warnx("%s: can't lock vcpu run mtx (%d)",
1113 __func__, (int)ret);
1114 return ((void *)ret);
1115 }
1116
1117 mutex_lock(&vm_mtx);
1118 paused = (current_vm->vm_state & VM_STATE_PAUSED) != 0;
1119 halted = vcpu_hlt[n];
1120 mutex_unlock(&vm_mtx);
1121
1122 /* If we are halted and need to pause, pause */
1123 if (halted && paused) {
1124 ret = pthread_barrier_wait(&vm_pause_barrier);
1125 if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD) {
1126 log_warnx("%s: could not wait on pause barrier (%d)",
1127 __func__, (int)ret);
1128 return ((void *)ret);
1129 }
1130
1131 ret = pthread_mutex_lock(&vcpu_unpause_mtx[n]);
1132 if (ret) {
1133 log_warnx("%s: can't lock vcpu unpause mtx (%d)",
1134 __func__, (int)ret);
1135 return ((void *)ret);
1136 }
1137
1138 /* Interrupt may be firing, release run mtx. */
1139 mutex_unlock(&vcpu_run_mtx[n]);
1140 ret = pthread_cond_wait(&vcpu_unpause_cond[n],
1141 &vcpu_unpause_mtx[n]);
1142 if (ret) {
1143 log_warnx(
1144 "%s: can't wait on unpause cond (%d)",
1145 __func__, (int)ret);
1146 break;
1147 }
1148 mutex_lock(&vcpu_run_mtx[n]);
1149
1150 ret = pthread_mutex_unlock(&vcpu_unpause_mtx[n]);
1151 if (ret) {
1152 log_warnx("%s: can't unlock unpause mtx (%d)",
1153 __func__, (int)ret);
1154 break;
1155 }
1156 }
1157
1158 /* If we are halted and not paused, wait */
1159 if (halted) {
1160 ret = pthread_cond_wait(&vcpu_run_cond[n],
1161 &vcpu_run_mtx[n]);
1162
1163 if (ret) {
1164 log_warnx(
1165 "%s: can't wait on cond (%d)",
1166 __func__, (int)ret);
1167 (void)pthread_mutex_unlock(
1168 &vcpu_run_mtx[n]);
1169 break;
1170 }
1171 }
1172
1173 ret = pthread_mutex_unlock(&vcpu_run_mtx[n]);
1174
1175 if (ret) {
1176 log_warnx("%s: can't unlock mutex on cond (%d)",
1177 __func__, (int)ret);
1178 break;
1179 }
1180
1181 if (vrp->vrp_irqready && intr_pending(current_vm)) {
1182 vrp->vrp_inject.vie_vector = intr_ack(current_vm);
1183 vrp->vrp_inject.vie_type = VCPU_INJECT_INTR;
1184 } else
1185 vrp->vrp_inject.vie_type = VCPU_INJECT_NONE;
1186
1187 /* Still more interrupts pending? */
1188 vrp->vrp_intr_pending = intr_pending(current_vm);
1189
1190 if (ioctl(env->vmd_fd, VMM_IOC_RUN, vrp) == -1) {
1191 /* If run ioctl failed, exit */
1192 ret = errno;
1193 log_warn("%s: vm %d / vcpu %d run ioctl failed",
1194 __func__, current_vm->vm_vmid, n);
1195 break;
1196 }
1197
1198 /* If the VM is terminating, exit normally */
1199 if (vrp->vrp_exit_reason == VM_EXIT_TERMINATED) {
1200 ret = (intptr_t)NULL;
1201 break;
1202 }
1203
1204 if (vrp->vrp_exit_reason != VM_EXIT_NONE) {
1205 /*
1206 * vmm(4) needs help handling an exit, handle in
1207 * vcpu_exit.
1208 */
1209 ret = vcpu_exit(vrp);
1210 if (ret)
1211 break;
1212 }
1213 }
1214
1215 mutex_lock(&vm_mtx);
1216 vcpu_done[n] = 1;
1217 mutex_unlock(&vm_mtx);
1218
1219 mutex_lock(&threadmutex);
1220 pthread_cond_signal(&threadcond);
1221 mutex_unlock(&threadmutex);
1222
1223 return ((void *)ret);
1224 }
1225
1226 int
vcpu_intr(uint32_t vm_id,uint32_t vcpu_id,uint8_t intr)1227 vcpu_intr(uint32_t vm_id, uint32_t vcpu_id, uint8_t intr)
1228 {
1229 struct vm_intr_params vip;
1230
1231 memset(&vip, 0, sizeof(vip));
1232
1233 vip.vip_vm_id = vm_id;
1234 vip.vip_vcpu_id = vcpu_id; /* XXX always 0? */
1235 vip.vip_intr = intr;
1236
1237 if (ioctl(env->vmd_fd, VMM_IOC_INTR, &vip) == -1)
1238 return (errno);
1239
1240 return (0);
1241 }
1242
1243 /*
1244 * fd_hasdata
1245 *
1246 * Determines if data can be read from a file descriptor.
1247 *
1248 * Parameters:
1249 * fd: the fd to check
1250 *
1251 * Return values:
1252 * 1 if data can be read from an fd, or 0 otherwise.
1253 */
1254 int
fd_hasdata(int fd)1255 fd_hasdata(int fd)
1256 {
1257 struct pollfd pfd[1];
1258 int nready, hasdata = 0;
1259
1260 pfd[0].fd = fd;
1261 pfd[0].events = POLLIN;
1262 nready = poll(pfd, 1, 0);
1263 if (nready == -1)
1264 log_warn("checking file descriptor for data failed");
1265 else if (nready == 1 && pfd[0].revents & POLLIN)
1266 hasdata = 1;
1267 return (hasdata);
1268 }
1269
1270 /*
1271 * mutex_lock
1272 *
1273 * Wrapper function for pthread_mutex_lock that does error checking and that
1274 * exits on failure
1275 */
1276 void
mutex_lock(pthread_mutex_t * m)1277 mutex_lock(pthread_mutex_t *m)
1278 {
1279 int ret;
1280
1281 ret = pthread_mutex_lock(m);
1282 if (ret) {
1283 errno = ret;
1284 fatal("could not acquire mutex");
1285 }
1286 }
1287
1288 /*
1289 * mutex_unlock
1290 *
1291 * Wrapper function for pthread_mutex_unlock that does error checking and that
1292 * exits on failure
1293 */
1294 void
mutex_unlock(pthread_mutex_t * m)1295 mutex_unlock(pthread_mutex_t *m)
1296 {
1297 int ret;
1298
1299 ret = pthread_mutex_unlock(m);
1300 if (ret) {
1301 errno = ret;
1302 fatal("could not release mutex");
1303 }
1304 }
1305
1306
1307 void
vm_pipe_init(struct vm_dev_pipe * p,void (* cb)(int,short,void *))1308 vm_pipe_init(struct vm_dev_pipe *p, void (*cb)(int, short, void *))
1309 {
1310 vm_pipe_init2(p, cb, NULL);
1311 }
1312
1313 /*
1314 * vm_pipe_init2
1315 *
1316 * Initialize a vm_dev_pipe, setting up its file descriptors and its
1317 * event structure with the given callback and argument.
1318 *
1319 * Parameters:
1320 * p: pointer to vm_dev_pipe struct to initizlize
1321 * cb: callback to use for READ events on the read end of the pipe
1322 * arg: pointer to pass to the callback on event trigger
1323 */
1324 void
vm_pipe_init2(struct vm_dev_pipe * p,void (* cb)(int,short,void *),void * arg)1325 vm_pipe_init2(struct vm_dev_pipe *p, void (*cb)(int, short, void *), void *arg)
1326 {
1327 int ret;
1328 int fds[2];
1329
1330 memset(p, 0, sizeof(struct vm_dev_pipe));
1331
1332 ret = pipe2(fds, O_CLOEXEC);
1333 if (ret)
1334 fatal("failed to create vm_dev_pipe pipe");
1335
1336 p->read = fds[0];
1337 p->write = fds[1];
1338
1339 event_set(&p->read_ev, p->read, EV_READ | EV_PERSIST, cb, arg);
1340 }
1341
1342 /*
1343 * vm_pipe_send
1344 *
1345 * Send a message to an emulated device vie the provided vm_dev_pipe. This
1346 * relies on the fact sizeof(msg) < PIPE_BUF to ensure atomic writes.
1347 *
1348 * Parameters:
1349 * p: pointer to initialized vm_dev_pipe
1350 * msg: message to send in the channel
1351 */
1352 void
vm_pipe_send(struct vm_dev_pipe * p,enum pipe_msg_type msg)1353 vm_pipe_send(struct vm_dev_pipe *p, enum pipe_msg_type msg)
1354 {
1355 size_t n;
1356 n = write(p->write, &msg, sizeof(msg));
1357 if (n != sizeof(msg))
1358 fatal("failed to write to device pipe");
1359 }
1360
1361 /*
1362 * vm_pipe_recv
1363 *
1364 * Receive a message for an emulated device via the provided vm_dev_pipe.
1365 * Returns the message value, otherwise will exit on failure. This relies on
1366 * the fact sizeof(enum pipe_msg_type) < PIPE_BUF for atomic reads.
1367 *
1368 * Parameters:
1369 * p: pointer to initialized vm_dev_pipe
1370 *
1371 * Return values:
1372 * a value of enum pipe_msg_type or fatal exit on read(2) error
1373 */
1374 enum pipe_msg_type
vm_pipe_recv(struct vm_dev_pipe * p)1375 vm_pipe_recv(struct vm_dev_pipe *p)
1376 {
1377 size_t n;
1378 enum pipe_msg_type msg;
1379 n = read(p->read, &msg, sizeof(msg));
1380 if (n != sizeof(msg))
1381 fatal("failed to read from device pipe");
1382
1383 return msg;
1384 }
1385
1386 /*
1387 * Re-map the guest address space using vmm(4)'s VMM_IOC_SHARE
1388 *
1389 * Returns 0 on success, non-zero in event of failure.
1390 */
1391 int
remap_guest_mem(struct vmd_vm * vm,int vmm_fd)1392 remap_guest_mem(struct vmd_vm *vm, int vmm_fd)
1393 {
1394 struct vm_create_params *vcp;
1395 struct vm_mem_range *vmr;
1396 struct vm_sharemem_params vsp;
1397 size_t i, j;
1398 void *p = NULL;
1399 int ret;
1400
1401 if (vm == NULL)
1402 return (1);
1403
1404 vcp = &vm->vm_params.vmc_params;
1405
1406 /*
1407 * Initialize our VM shared memory request using our original
1408 * creation parameters. We'll overwrite the va's after mmap(2).
1409 */
1410 memset(&vsp, 0, sizeof(vsp));
1411 vsp.vsp_nmemranges = vcp->vcp_nmemranges;
1412 vsp.vsp_vm_id = vcp->vcp_id;
1413 memcpy(&vsp.vsp_memranges, &vcp->vcp_memranges,
1414 sizeof(vsp.vsp_memranges));
1415
1416 /*
1417 * Use mmap(2) to identify virtual address space for our mappings.
1418 */
1419 for (i = 0; i < VMM_MAX_MEM_RANGES; i++) {
1420 if (i < vsp.vsp_nmemranges) {
1421 vmr = &vsp.vsp_memranges[i];
1422
1423 /* Ignore any MMIO ranges. */
1424 if (vmr->vmr_type == VM_MEM_MMIO) {
1425 vmr->vmr_va = 0;
1426 vcp->vcp_memranges[i].vmr_va = 0;
1427 continue;
1428 }
1429
1430 /* Make initial mappings for the memrange. */
1431 p = mmap(NULL, vmr->vmr_size, PROT_READ, MAP_ANON, -1,
1432 0);
1433 if (p == MAP_FAILED) {
1434 ret = errno;
1435 log_warn("%s: mmap", __func__);
1436 for (j = 0; j < i; j++) {
1437 vmr = &vcp->vcp_memranges[j];
1438 munmap((void *)vmr->vmr_va,
1439 vmr->vmr_size);
1440 }
1441 return (ret);
1442 }
1443 vmr->vmr_va = (vaddr_t)p;
1444 vcp->vcp_memranges[i].vmr_va = vmr->vmr_va;
1445 }
1446 }
1447
1448 /*
1449 * munmap(2) now that we have va's and ranges that don't overlap. vmm
1450 * will use the va's and sizes to recreate the mappings for us.
1451 */
1452 for (i = 0; i < vsp.vsp_nmemranges; i++) {
1453 vmr = &vsp.vsp_memranges[i];
1454 if (vmr->vmr_type == VM_MEM_MMIO)
1455 continue;
1456 if (munmap((void*)vmr->vmr_va, vmr->vmr_size) == -1)
1457 fatal("%s: munmap", __func__);
1458 }
1459
1460 /*
1461 * Ask vmm to enter the shared mappings for us. They'll point
1462 * to the same host physical memory, but will have a randomized
1463 * virtual address for the calling process.
1464 */
1465 if (ioctl(vmm_fd, VMM_IOC_SHAREMEM, &vsp) == -1)
1466 return (errno);
1467
1468 return (0);
1469 }
1470
1471 void
vcpu_halt(uint32_t vcpu_id)1472 vcpu_halt(uint32_t vcpu_id)
1473 {
1474 mutex_lock(&vm_mtx);
1475 vcpu_hlt[vcpu_id] = 1;
1476 mutex_unlock(&vm_mtx);
1477 }
1478
1479 void
vcpu_unhalt(uint32_t vcpu_id)1480 vcpu_unhalt(uint32_t vcpu_id)
1481 {
1482 mutex_lock(&vm_mtx);
1483 vcpu_hlt[vcpu_id] = 0;
1484 mutex_unlock(&vm_mtx);
1485 }
1486
1487 void
vcpu_signal_run(uint32_t vcpu_id)1488 vcpu_signal_run(uint32_t vcpu_id)
1489 {
1490 int ret;
1491
1492 mutex_lock(&vcpu_run_mtx[vcpu_id]);
1493 ret = pthread_cond_signal(&vcpu_run_cond[vcpu_id]);
1494 if (ret)
1495 fatalx("%s: can't signal (%d)", __func__, ret);
1496 mutex_unlock(&vcpu_run_mtx[vcpu_id]);
1497 }
1498