xref: /openbsd/usr.sbin/vmd/virtio.c (revision 7ccb23dd)
1 /*	$OpenBSD: virtio.c,v 1.123 2025/01/08 15:46:10 dv Exp $	*/
2 
3 /*
4  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>	/* PAGE_SIZE */
20 #include <sys/socket.h>
21 #include <sys/wait.h>
22 
23 #include <dev/pci/pcireg.h>
24 #include <dev/pci/pcidevs.h>
25 #include <dev/pv/virtioreg.h>
26 #include <dev/pci/virtio_pcireg.h>
27 #include <dev/pv/vioblkreg.h>
28 #include <dev/vmm/vmm.h>
29 
30 #include <net/if.h>
31 #include <netinet/in.h>
32 #include <netinet/if_ether.h>
33 
34 #include <errno.h>
35 #include <event.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 
40 #include "atomicio.h"
41 #include "pci.h"
42 #include "vioscsi.h"
43 #include "virtio.h"
44 #include "vmd.h"
45 
46 extern struct vmd *env;
47 extern char *__progname;
48 
49 struct viornd_dev viornd;
50 struct vioscsi_dev *vioscsi;
51 struct vmmci_dev vmmci;
52 
53 /* Devices emulated in subprocesses are inserted into this list. */
54 SLIST_HEAD(virtio_dev_head, virtio_dev) virtio_devs;
55 
56 #define MAXPHYS	(64 * 1024)	/* max raw I/O transfer size */
57 
58 #define VIRTIO_NET_F_MAC	(1<<5)
59 
60 #define VMMCI_F_TIMESYNC	(1<<0)
61 #define VMMCI_F_ACK		(1<<1)
62 #define VMMCI_F_SYNCRTC		(1<<2)
63 
64 #define RXQ	0
65 #define TXQ	1
66 
67 static int virtio_dev_launch(struct vmd_vm *, struct virtio_dev *);
68 static void virtio_dispatch_dev(int, short, void *);
69 static int handle_dev_msg(struct viodev_msg *, struct virtio_dev *);
70 static int virtio_dev_closefds(struct virtio_dev *);
71 static void vmmci_pipe_dispatch(int, short, void *);
72 
73 const char *
virtio_reg_name(uint8_t reg)74 virtio_reg_name(uint8_t reg)
75 {
76 	switch (reg) {
77 	case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature";
78 	case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature";
79 	case VIRTIO_CONFIG_QUEUE_PFN: return "queue address";
80 	case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size";
81 	case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select";
82 	case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify";
83 	case VIRTIO_CONFIG_DEVICE_STATUS: return "device status";
84 	case VIRTIO_CONFIG_ISR_STATUS: return "isr status";
85 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI...VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3:
86 		return "device config 0";
87 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
88 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5:
89 		return "device config 1";
90 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "device config 2";
91 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: return "device config 3";
92 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: return "device config 4";
93 	default: return "unknown";
94 	}
95 }
96 
97 uint32_t
vring_size(uint32_t vq_size)98 vring_size(uint32_t vq_size)
99 {
100 	uint32_t allocsize1, allocsize2;
101 
102 	/* allocsize1: descriptor table + avail ring + pad */
103 	allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
104 	    + sizeof(uint16_t) * (2 + vq_size));
105 	/* allocsize2: used ring + pad */
106 	allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2
107 	    + sizeof(struct vring_used_elem) * vq_size);
108 
109 	return allocsize1 + allocsize2;
110 }
111 
112 /* Update queue select */
113 void
viornd_update_qs(void)114 viornd_update_qs(void)
115 {
116 	struct virtio_vq_info *vq_info;
117 
118 	/* Invalid queue? */
119 	if (viornd.cfg.queue_select > 0) {
120 		viornd.cfg.queue_size = 0;
121 		return;
122 	}
123 
124 	vq_info = &viornd.vq[viornd.cfg.queue_select];
125 
126 	/* Update queue pfn/size based on queue select */
127 	viornd.cfg.queue_pfn = vq_info->q_gpa >> 12;
128 	viornd.cfg.queue_size = vq_info->qs;
129 }
130 
131 /* Update queue address */
132 void
viornd_update_qa(void)133 viornd_update_qa(void)
134 {
135 	struct virtio_vq_info *vq_info;
136 	void *hva = NULL;
137 
138 	/* Invalid queue? */
139 	if (viornd.cfg.queue_select > 0)
140 		return;
141 
142 	vq_info = &viornd.vq[viornd.cfg.queue_select];
143 	vq_info->q_gpa = (uint64_t)viornd.cfg.queue_pfn * VIRTIO_PAGE_SIZE;
144 
145 	hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIORND_QUEUE_SIZE));
146 	if (hva == NULL)
147 		fatalx("viornd_update_qa");
148 	vq_info->q_hva = hva;
149 }
150 
151 int
viornd_notifyq(void)152 viornd_notifyq(void)
153 {
154 	size_t sz;
155 	int dxx, ret;
156 	uint16_t aidx, uidx;
157 	char *vr, *rnd_data;
158 	struct vring_desc *desc;
159 	struct vring_avail *avail;
160 	struct vring_used *used;
161 	struct virtio_vq_info *vq_info;
162 
163 	ret = 0;
164 
165 	/* Invalid queue? */
166 	if (viornd.cfg.queue_notify > 0)
167 		return (0);
168 
169 	vq_info = &viornd.vq[viornd.cfg.queue_notify];
170 	vr = vq_info->q_hva;
171 	if (vr == NULL)
172 		fatalx("%s: null vring", __func__);
173 
174 	desc = (struct vring_desc *)(vr);
175 	avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
176 	used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
177 
178 	aidx = avail->idx & VIORND_QUEUE_MASK;
179 	uidx = used->idx & VIORND_QUEUE_MASK;
180 
181 	dxx = avail->ring[aidx] & VIORND_QUEUE_MASK;
182 
183 	sz = desc[dxx].len;
184 	if (sz > MAXPHYS)
185 		fatalx("viornd descriptor size too large (%zu)", sz);
186 
187 	rnd_data = malloc(sz);
188 
189 	if (rnd_data != NULL) {
190 		arc4random_buf(rnd_data, sz);
191 		if (write_mem(desc[dxx].addr, rnd_data, sz)) {
192 			log_warnx("viornd: can't write random data @ "
193 			    "0x%llx",
194 			    desc[dxx].addr);
195 		} else {
196 			/* ret == 1 -> interrupt needed */
197 			/* XXX check VIRTIO_F_NO_INTR */
198 			ret = 1;
199 			viornd.cfg.isr_status = 1;
200 			used->ring[uidx].id = dxx;
201 			used->ring[uidx].len = sz;
202 			__sync_synchronize();
203 			used->idx++;
204 		}
205 		free(rnd_data);
206 	} else
207 		fatal("memory allocation error for viornd data");
208 
209 	return (ret);
210 }
211 
212 int
virtio_rnd_io(int dir,uint16_t reg,uint32_t * data,uint8_t * intr,void * unused,uint8_t sz)213 virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
214     void *unused, uint8_t sz)
215 {
216 	*intr = 0xFF;
217 
218 	if (dir == 0) {
219 		switch (reg) {
220 		case VIRTIO_CONFIG_DEVICE_FEATURES:
221 		case VIRTIO_CONFIG_QUEUE_SIZE:
222 		case VIRTIO_CONFIG_ISR_STATUS:
223 			log_warnx("%s: illegal write %x to %s",
224 			    __progname, *data, virtio_reg_name(reg));
225 			break;
226 		case VIRTIO_CONFIG_GUEST_FEATURES:
227 			viornd.cfg.guest_feature = *data;
228 			break;
229 		case VIRTIO_CONFIG_QUEUE_PFN:
230 			viornd.cfg.queue_pfn = *data;
231 			viornd_update_qa();
232 			break;
233 		case VIRTIO_CONFIG_QUEUE_SELECT:
234 			viornd.cfg.queue_select = *data;
235 			viornd_update_qs();
236 			break;
237 		case VIRTIO_CONFIG_QUEUE_NOTIFY:
238 			viornd.cfg.queue_notify = *data;
239 			if (viornd_notifyq())
240 				*intr = 1;
241 			break;
242 		case VIRTIO_CONFIG_DEVICE_STATUS:
243 			viornd.cfg.device_status = *data;
244 			break;
245 		}
246 	} else {
247 		switch (reg) {
248 		case VIRTIO_CONFIG_DEVICE_FEATURES:
249 			*data = viornd.cfg.device_feature;
250 			break;
251 		case VIRTIO_CONFIG_GUEST_FEATURES:
252 			*data = viornd.cfg.guest_feature;
253 			break;
254 		case VIRTIO_CONFIG_QUEUE_PFN:
255 			*data = viornd.cfg.queue_pfn;
256 			break;
257 		case VIRTIO_CONFIG_QUEUE_SIZE:
258 			*data = viornd.cfg.queue_size;
259 			break;
260 		case VIRTIO_CONFIG_QUEUE_SELECT:
261 			*data = viornd.cfg.queue_select;
262 			break;
263 		case VIRTIO_CONFIG_QUEUE_NOTIFY:
264 			*data = viornd.cfg.queue_notify;
265 			break;
266 		case VIRTIO_CONFIG_DEVICE_STATUS:
267 			*data = viornd.cfg.device_status;
268 			break;
269 		case VIRTIO_CONFIG_ISR_STATUS:
270 			*data = viornd.cfg.isr_status;
271 			viornd.cfg.isr_status = 0;
272 			vcpu_deassert_irq(viornd.vm_id, 0, viornd.irq);
273 			break;
274 		}
275 	}
276 	return (0);
277 }
278 
279 /*
280  * vmmci_ctl
281  *
282  * Inject a command into the vmmci device, potentially delivering interrupt.
283  *
284  * Called by the vm process's event(3) loop.
285  */
286 int
vmmci_ctl(unsigned int cmd)287 vmmci_ctl(unsigned int cmd)
288 {
289 	int ret = 0;
290 	struct timeval tv = { 0, 0 };
291 
292 	mutex_lock(&vmmci.mutex);
293 
294 	if ((vmmci.cfg.device_status &
295 	    VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0) {
296 		ret = -1;
297 		goto unlock;
298 	}
299 
300 	if (cmd == vmmci.cmd)
301 		goto unlock;
302 
303 	switch (cmd) {
304 	case VMMCI_NONE:
305 		break;
306 	case VMMCI_SHUTDOWN:
307 	case VMMCI_REBOOT:
308 		/* Update command */
309 		vmmci.cmd = cmd;
310 
311 		/*
312 		 * vmm VMs do not support powerdown, send a reboot request
313 		 * instead and turn it off after the triple fault.
314 		 */
315 		if (cmd == VMMCI_SHUTDOWN)
316 			cmd = VMMCI_REBOOT;
317 
318 		/* Trigger interrupt */
319 		vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
320 		vcpu_assert_irq(vmmci.vm_id, 0, vmmci.irq);
321 
322 		/* Add ACK timeout */
323 		tv.tv_sec = VMMCI_TIMEOUT_SHORT;
324 		evtimer_add(&vmmci.timeout, &tv);
325 		break;
326 	case VMMCI_SYNCRTC:
327 		if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC) {
328 			/* RTC updated, request guest VM resync of its RTC */
329 			vmmci.cmd = cmd;
330 
331 			vmmci.cfg.isr_status = VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
332 			vcpu_assert_irq(vmmci.vm_id, 0, vmmci.irq);
333 		} else {
334 			log_debug("%s: RTC sync skipped (guest does not "
335 			    "support RTC sync)\n", __func__);
336 		}
337 		break;
338 	default:
339 		fatalx("invalid vmmci command: %d", cmd);
340 	}
341 
342 unlock:
343 	mutex_unlock(&vmmci.mutex);
344 
345 	return (ret);
346 }
347 
348 /*
349  * vmmci_ack
350  *
351  * Process a write to the command register.
352  *
353  * Called by the vcpu thread. Must be called with the mutex held.
354  */
355 void
vmmci_ack(unsigned int cmd)356 vmmci_ack(unsigned int cmd)
357 {
358 	switch (cmd) {
359 	case VMMCI_NONE:
360 		break;
361 	case VMMCI_SHUTDOWN:
362 		/*
363 		 * The shutdown was requested by the VM if we don't have
364 		 * a pending shutdown request.  In this case add a short
365 		 * timeout to give the VM a chance to reboot before the
366 		 * timer is expired.
367 		 */
368 		if (vmmci.cmd == 0) {
369 			log_debug("%s: vm %u requested shutdown", __func__,
370 			    vmmci.vm_id);
371 			vm_pipe_send(&vmmci.dev_pipe, VMMCI_SET_TIMEOUT_SHORT);
372 			return;
373 		}
374 		/* FALLTHROUGH */
375 	case VMMCI_REBOOT:
376 		/*
377 		 * If the VM acknowledged our shutdown request, give it
378 		 * enough time to shutdown or reboot gracefully.  This
379 		 * might take a considerable amount of time (running
380 		 * rc.shutdown on the VM), so increase the timeout before
381 		 * killing it forcefully.
382 		 */
383 		if (cmd == vmmci.cmd) {
384 			log_debug("%s: vm %u acknowledged shutdown request",
385 			    __func__, vmmci.vm_id);
386 			vm_pipe_send(&vmmci.dev_pipe, VMMCI_SET_TIMEOUT_LONG);
387 		}
388 		break;
389 	case VMMCI_SYNCRTC:
390 		log_debug("%s: vm %u acknowledged RTC sync request",
391 		    __func__, vmmci.vm_id);
392 		vmmci.cmd = VMMCI_NONE;
393 		break;
394 	default:
395 		log_warnx("%s: illegal request %u", __func__, cmd);
396 		break;
397 	}
398 }
399 
400 void
vmmci_timeout(int fd,short type,void * arg)401 vmmci_timeout(int fd, short type, void *arg)
402 {
403 	log_debug("%s: vm %u shutdown", __progname, vmmci.vm_id);
404 	vm_shutdown(vmmci.cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN);
405 }
406 
407 int
vmmci_io(int dir,uint16_t reg,uint32_t * data,uint8_t * intr,void * unused,uint8_t sz)408 vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
409     void *unused, uint8_t sz)
410 {
411 	*intr = 0xFF;
412 
413 	mutex_lock(&vmmci.mutex);
414 	if (dir == 0) {
415 		switch (reg) {
416 		case VIRTIO_CONFIG_DEVICE_FEATURES:
417 		case VIRTIO_CONFIG_QUEUE_SIZE:
418 		case VIRTIO_CONFIG_ISR_STATUS:
419 			log_warnx("%s: illegal write %x to %s",
420 			    __progname, *data, virtio_reg_name(reg));
421 			break;
422 		case VIRTIO_CONFIG_GUEST_FEATURES:
423 			vmmci.cfg.guest_feature = *data;
424 			break;
425 		case VIRTIO_CONFIG_QUEUE_PFN:
426 			vmmci.cfg.queue_pfn = *data;
427 			break;
428 		case VIRTIO_CONFIG_QUEUE_SELECT:
429 			vmmci.cfg.queue_select = *data;
430 			break;
431 		case VIRTIO_CONFIG_QUEUE_NOTIFY:
432 			vmmci.cfg.queue_notify = *data;
433 			break;
434 		case VIRTIO_CONFIG_DEVICE_STATUS:
435 			vmmci.cfg.device_status = *data;
436 			break;
437 		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
438 			vmmci_ack(*data);
439 			break;
440 		}
441 	} else {
442 		switch (reg) {
443 		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
444 			*data = vmmci.cmd;
445 			break;
446 		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
447 			/* Update time once when reading the first register */
448 			gettimeofday(&vmmci.time, NULL);
449 			*data = (uint64_t)vmmci.time.tv_sec;
450 			break;
451 		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8:
452 			*data = (uint64_t)vmmci.time.tv_sec << 32;
453 			break;
454 		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12:
455 			*data = (uint64_t)vmmci.time.tv_usec;
456 			break;
457 		case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16:
458 			*data = (uint64_t)vmmci.time.tv_usec << 32;
459 			break;
460 		case VIRTIO_CONFIG_DEVICE_FEATURES:
461 			*data = vmmci.cfg.device_feature;
462 			break;
463 		case VIRTIO_CONFIG_GUEST_FEATURES:
464 			*data = vmmci.cfg.guest_feature;
465 			break;
466 		case VIRTIO_CONFIG_QUEUE_PFN:
467 			*data = vmmci.cfg.queue_pfn;
468 			break;
469 		case VIRTIO_CONFIG_QUEUE_SIZE:
470 			*data = vmmci.cfg.queue_size;
471 			break;
472 		case VIRTIO_CONFIG_QUEUE_SELECT:
473 			*data = vmmci.cfg.queue_select;
474 			break;
475 		case VIRTIO_CONFIG_QUEUE_NOTIFY:
476 			*data = vmmci.cfg.queue_notify;
477 			break;
478 		case VIRTIO_CONFIG_DEVICE_STATUS:
479 			*data = vmmci.cfg.device_status;
480 			break;
481 		case VIRTIO_CONFIG_ISR_STATUS:
482 			*data = vmmci.cfg.isr_status;
483 			vmmci.cfg.isr_status = 0;
484 			vcpu_deassert_irq(vmmci.vm_id, 0, vmmci.irq);
485 			break;
486 		}
487 	}
488 	mutex_unlock(&vmmci.mutex);
489 
490 	return (0);
491 }
492 
493 int
virtio_get_base(int fd,char * path,size_t npath,int type,const char * dpath)494 virtio_get_base(int fd, char *path, size_t npath, int type, const char *dpath)
495 {
496 	switch (type) {
497 	case VMDF_RAW:
498 		return 0;
499 	case VMDF_QCOW2:
500 		return virtio_qcow2_get_base(fd, path, npath, dpath);
501 	}
502 	log_warnx("%s: invalid disk format", __func__);
503 	return -1;
504 }
505 
506 static void
vmmci_pipe_dispatch(int fd,short event,void * arg)507 vmmci_pipe_dispatch(int fd, short event, void *arg)
508 {
509 	enum pipe_msg_type msg;
510 	struct timeval tv = { 0, 0 };
511 
512 	msg = vm_pipe_recv(&vmmci.dev_pipe);
513 	switch (msg) {
514 	case VMMCI_SET_TIMEOUT_SHORT:
515 		tv.tv_sec = VMMCI_TIMEOUT_SHORT;
516 		evtimer_add(&vmmci.timeout, &tv);
517 		break;
518 	case VMMCI_SET_TIMEOUT_LONG:
519 		tv.tv_sec = VMMCI_TIMEOUT_LONG;
520 		evtimer_add(&vmmci.timeout, &tv);
521 		break;
522 	default:
523 		log_warnx("%s: invalid pipe message type %d", __func__, msg);
524 	}
525 }
526 
527 void
virtio_init(struct vmd_vm * vm,int child_cdrom,int child_disks[][VM_MAX_BASE_PER_DISK],int * child_taps)528 virtio_init(struct vmd_vm *vm, int child_cdrom,
529     int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps)
530 {
531 	struct vmop_create_params *vmc = &vm->vm_params;
532 	struct vm_create_params *vcp = &vmc->vmc_params;
533 	struct virtio_dev *dev;
534 	uint8_t id;
535 	uint8_t i, j;
536 	int ret = 0;
537 
538 	/* Virtio entropy device */
539 	if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
540 	    PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM,
541 	    PCI_SUBCLASS_SYSTEM_MISC,
542 	    PCI_VENDOR_OPENBSD,
543 	    PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) {
544 		log_warnx("%s: can't add PCI virtio rng device",
545 		    __progname);
546 		return;
547 	}
548 
549 	if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) {
550 		log_warnx("%s: can't add bar for virtio rng device",
551 		    __progname);
552 		return;
553 	}
554 
555 	memset(&viornd, 0, sizeof(viornd));
556 	viornd.vq[0].qs = VIORND_QUEUE_SIZE;
557 	viornd.vq[0].vq_availoffset = sizeof(struct vring_desc) *
558 	    VIORND_QUEUE_SIZE;
559 	viornd.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
560 	    sizeof(struct vring_desc) * VIORND_QUEUE_SIZE
561 	    + sizeof(uint16_t) * (2 + VIORND_QUEUE_SIZE));
562 	viornd.pci_id = id;
563 	viornd.irq = pci_get_dev_irq(id);
564 	viornd.vm_id = vcp->vcp_id;
565 
566 	SLIST_INIT(&virtio_devs);
567 
568 	if (vmc->vmc_nnics > 0) {
569 		for (i = 0; i < vmc->vmc_nnics; i++) {
570 			dev = calloc(1, sizeof(struct virtio_dev));
571 			if (dev == NULL) {
572 				log_warn("%s: calloc failure allocating vionet",
573 				    __progname);
574 				return;
575 			}
576 			/* Virtio network */
577 			dev->dev_type = VMD_DEVTYPE_NET;
578 
579 			if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
580 				PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM,
581 				PCI_SUBCLASS_SYSTEM_MISC, PCI_VENDOR_OPENBSD,
582 				PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) {
583 				log_warnx("%s: can't add PCI virtio net device",
584 				    __progname);
585 				return;
586 			}
587 			dev->pci_id = id;
588 			dev->sync_fd = -1;
589 			dev->async_fd = -1;
590 			dev->vm_id = vcp->vcp_id;
591 			dev->vm_vmid = vm->vm_vmid;
592 			dev->irq = pci_get_dev_irq(id);
593 
594 			/* The vionet pci bar function is called by the vcpu. */
595 			if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io,
596 			    dev)) {
597 				log_warnx("%s: can't add bar for virtio net "
598 				    "device", __progname);
599 				return;
600 			}
601 
602 			dev->vionet.vq[RXQ].qs = VIONET_QUEUE_SIZE;
603 			dev->vionet.vq[RXQ].vq_availoffset =
604 			    sizeof(struct vring_desc) * VIONET_QUEUE_SIZE;
605 			dev->vionet.vq[RXQ].vq_usedoffset = VIRTQUEUE_ALIGN(
606 				sizeof(struct vring_desc) * VIONET_QUEUE_SIZE
607 				+ sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE));
608 			dev->vionet.vq[RXQ].last_avail = 0;
609 			dev->vionet.vq[RXQ].notified_avail = 0;
610 
611 			dev->vionet.vq[TXQ].qs = VIONET_QUEUE_SIZE;
612 			dev->vionet.vq[TXQ].vq_availoffset =
613 			    sizeof(struct vring_desc) * VIONET_QUEUE_SIZE;
614 			dev->vionet.vq[TXQ].vq_usedoffset = VIRTQUEUE_ALIGN(
615 				sizeof(struct vring_desc) * VIONET_QUEUE_SIZE
616 				+ sizeof(uint16_t) * (2 + VIONET_QUEUE_SIZE));
617 			dev->vionet.vq[TXQ].last_avail = 0;
618 			dev->vionet.vq[TXQ].notified_avail = 0;
619 
620 			dev->vionet.data_fd = child_taps[i];
621 
622 			/* MAC address has been assigned by the parent */
623 			memcpy(&dev->vionet.mac, &vmc->vmc_macs[i], 6);
624 			dev->vionet.cfg.device_feature = VIRTIO_NET_F_MAC;
625 
626 			dev->vionet.lockedmac =
627 			    vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0;
628 			dev->vionet.local =
629 			    vmc->vmc_ifflags[i] & VMIFF_LOCAL ? 1 : 0;
630 			if (i == 0 && vmc->vmc_bootdevice & VMBOOTDEV_NET)
631 				dev->vionet.pxeboot = 1;
632 			memcpy(&dev->vionet.local_prefix,
633 			    &env->vmd_cfg.cfg_localprefix,
634 			    sizeof(dev->vionet.local_prefix));
635 			log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s%s",
636 			    __func__, vcp->vcp_name, i,
637 			    ether_ntoa((void *)dev->vionet.mac),
638 			    dev->vionet.lockedmac ? ", locked" : "",
639 			    dev->vionet.local ? ", local" : "",
640 			    dev->vionet.pxeboot ? ", pxeboot" : "");
641 
642 			/* Add the vionet to our device list. */
643 			dev->vionet.idx = i;
644 			SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next);
645 		}
646 	}
647 
648 	if (vmc->vmc_ndisks > 0) {
649 		for (i = 0; i < vmc->vmc_ndisks; i++) {
650 			dev = calloc(1, sizeof(struct virtio_dev));
651 			if (dev == NULL) {
652 				log_warn("%s: calloc failure allocating vioblk",
653 				    __progname);
654 				return;
655 			}
656 
657 			/* One vioblk device for each disk defined in vcp */
658 			dev->dev_type = VMD_DEVTYPE_DISK;
659 
660 			if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
661 			    PCI_PRODUCT_QUMRANET_VIO_BLOCK,
662 			    PCI_CLASS_MASS_STORAGE,
663 			    PCI_SUBCLASS_MASS_STORAGE_SCSI,
664 			    PCI_VENDOR_OPENBSD,
665 			    PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) {
666 				log_warnx("%s: can't add PCI virtio block "
667 				    "device", __progname);
668 				return;
669 			}
670 			dev->pci_id = id;
671 			dev->sync_fd = -1;
672 			dev->async_fd = -1;
673 			dev->vm_id = vcp->vcp_id;
674 			dev->vm_vmid = vm->vm_vmid;
675 			dev->irq = pci_get_dev_irq(id);
676 
677 			if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io,
678 			    &dev->vioblk)) {
679 				log_warnx("%s: can't add bar for virtio block "
680 				    "device", __progname);
681 				return;
682 			}
683 			dev->vioblk.vq[0].qs = VIOBLK_QUEUE_SIZE;
684 			dev->vioblk.vq[0].vq_availoffset =
685 			    sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE;
686 			dev->vioblk.vq[0].vq_usedoffset = VIRTQUEUE_ALIGN(
687 			    sizeof(struct vring_desc) * VIOBLK_QUEUE_SIZE
688 			    + sizeof(uint16_t) * (2 + VIOBLK_QUEUE_SIZE));
689 			dev->vioblk.vq[0].last_avail = 0;
690 			dev->vioblk.cfg.device_feature =
691 			    VIRTIO_BLK_F_SEG_MAX;
692 			dev->vioblk.seg_max = VIOBLK_SEG_MAX;
693 
694 			/*
695 			 * Initialize disk fds to an invalid fd (-1), then
696 			 * set any child disk fds.
697 			 */
698 			memset(&dev->vioblk.disk_fd, -1,
699 			    sizeof(dev->vioblk.disk_fd));
700 			dev->vioblk.ndisk_fd = vmc->vmc_diskbases[i];
701 			for (j = 0; j < dev->vioblk.ndisk_fd; j++)
702 				dev->vioblk.disk_fd[j] = child_disks[i][j];
703 
704 			dev->vioblk.idx = i;
705 			SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next);
706 		}
707 	}
708 
709 	/*
710 	 * Launch virtio devices that support subprocess execution.
711 	 */
712 	SLIST_FOREACH(dev, &virtio_devs, dev_next) {
713 		if (virtio_dev_launch(vm, dev) != 0)
714 			fatalx("failed to launch virtio device");
715 	}
716 
717 	/* vioscsi cdrom */
718 	if (strlen(vmc->vmc_cdrom)) {
719 		vioscsi = calloc(1, sizeof(struct vioscsi_dev));
720 		if (vioscsi == NULL) {
721 			log_warn("%s: calloc failure allocating vioscsi",
722 			    __progname);
723 			return;
724 		}
725 
726 		if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
727 		    PCI_PRODUCT_QUMRANET_VIO_SCSI,
728 		    PCI_CLASS_MASS_STORAGE,
729 		    PCI_SUBCLASS_MASS_STORAGE_SCSI,
730 		    PCI_VENDOR_OPENBSD,
731 		    PCI_PRODUCT_VIRTIO_SCSI, 1, NULL)) {
732 			log_warnx("%s: can't add PCI vioscsi device",
733 			    __progname);
734 			return;
735 		}
736 
737 		if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vioscsi_io, vioscsi)) {
738 			log_warnx("%s: can't add bar for vioscsi device",
739 			    __progname);
740 			return;
741 		}
742 
743 		for (i = 0; i < VIRTIO_MAX_QUEUES; i++) {
744 			vioscsi->vq[i].qs = VIOSCSI_QUEUE_SIZE;
745 			vioscsi->vq[i].vq_availoffset =
746 			    sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE;
747 			vioscsi->vq[i].vq_usedoffset = VIRTQUEUE_ALIGN(
748 			    sizeof(struct vring_desc) * VIOSCSI_QUEUE_SIZE
749 			    + sizeof(uint16_t) * (2 + VIOSCSI_QUEUE_SIZE));
750 			vioscsi->vq[i].last_avail = 0;
751 		}
752 		if (virtio_raw_init(&vioscsi->file, &vioscsi->sz, &child_cdrom,
753 		    1) == -1) {
754 			log_warnx("%s: unable to determine iso format",
755 			    __func__);
756 			return;
757 		}
758 		vioscsi->locked = 0;
759 		vioscsi->lba = 0;
760 		vioscsi->n_blocks = vioscsi->sz / VIOSCSI_BLOCK_SIZE_CDROM;
761 		vioscsi->max_xfer = VIOSCSI_BLOCK_SIZE_CDROM;
762 		vioscsi->pci_id = id;
763 		vioscsi->vm_id = vcp->vcp_id;
764 		vioscsi->irq = pci_get_dev_irq(id);
765 	}
766 
767 	/* virtio control device */
768 	if (pci_add_device(&id, PCI_VENDOR_OPENBSD,
769 	    PCI_PRODUCT_OPENBSD_CONTROL,
770 	    PCI_CLASS_COMMUNICATIONS,
771 	    PCI_SUBCLASS_COMMUNICATIONS_MISC,
772 	    PCI_VENDOR_OPENBSD,
773 	    PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) {
774 		log_warnx("%s: can't add PCI vmm control device",
775 		    __progname);
776 		return;
777 	}
778 
779 	if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) {
780 		log_warnx("%s: can't add bar for vmm control device",
781 		    __progname);
782 		return;
783 	}
784 
785 	memset(&vmmci, 0, sizeof(vmmci));
786 	vmmci.cfg.device_feature = VMMCI_F_TIMESYNC | VMMCI_F_ACK |
787 	    VMMCI_F_SYNCRTC;
788 	vmmci.vm_id = vcp->vcp_id;
789 	vmmci.irq = pci_get_dev_irq(id);
790 	vmmci.pci_id = id;
791 	ret = pthread_mutex_init(&vmmci.mutex, NULL);
792 	if (ret) {
793 		errno = ret;
794 		fatal("could not initialize vmmci mutex");
795 	}
796 
797 	evtimer_set(&vmmci.timeout, vmmci_timeout, NULL);
798 	vm_pipe_init(&vmmci.dev_pipe, vmmci_pipe_dispatch);
799 	event_add(&vmmci.dev_pipe.read_ev, NULL);
800 }
801 
802 /*
803  * vionet_set_hostmac
804  *
805  * Sets the hardware address for the host-side tap(4) on a vionet_dev.
806  *
807  * This should only be called from the event-loop thread
808  *
809  * vm: pointer to the current vmd_vm instance
810  * idx: index into the array of vionet_dev's for the target vionet_dev
811  * addr: ethernet address to set
812  */
813 void
vionet_set_hostmac(struct vmd_vm * vm,unsigned int idx,uint8_t * addr)814 vionet_set_hostmac(struct vmd_vm *vm, unsigned int idx, uint8_t *addr)
815 {
816 	struct vmop_create_params	*vmc = &vm->vm_params;
817 	struct virtio_dev		*dev;
818 	struct vionet_dev		*vionet = NULL;
819 	int ret;
820 
821 	if (idx > vmc->vmc_nnics)
822 		fatalx("%s: invalid vionet index: %u", __func__, idx);
823 
824 	SLIST_FOREACH(dev, &virtio_devs, dev_next) {
825 		if (dev->dev_type == VMD_DEVTYPE_NET
826 		    && dev->vionet.idx == idx) {
827 			vionet = &dev->vionet;
828 			break;
829 		}
830 	}
831 	if (vionet == NULL)
832 		fatalx("%s: dev == NULL, idx = %u", __func__, idx);
833 
834 	/* Set the local vm process copy. */
835 	memcpy(vionet->hostmac, addr, sizeof(vionet->hostmac));
836 
837 	/* Send the information to the device process. */
838 	ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_HOSTMAC, 0, 0, -1,
839 	    vionet->hostmac, sizeof(vionet->hostmac));
840 	if (ret == -1) {
841 		log_warnx("%s: failed to queue hostmac to vionet dev %u",
842 		    __func__, idx);
843 		return;
844 	}
845 }
846 
847 void
virtio_shutdown(struct vmd_vm * vm)848 virtio_shutdown(struct vmd_vm *vm)
849 {
850 	int ret, status;
851 	pid_t pid = 0;
852 	struct virtio_dev *dev, *tmp;
853 	struct viodev_msg msg;
854 	struct imsgbuf *ibuf;
855 
856 	/* Ensure that our disks are synced. */
857 	if (vioscsi != NULL)
858 		vioscsi->file.close(vioscsi->file.p, 0);
859 
860 	/*
861 	 * Broadcast shutdown to child devices. We need to do this
862 	 * synchronously as we have already stopped the async event thread.
863 	 */
864 	SLIST_FOREACH(dev, &virtio_devs, dev_next) {
865 		memset(&msg, 0, sizeof(msg));
866 		msg.type = VIODEV_MSG_SHUTDOWN;
867 		ibuf = &dev->sync_iev.ibuf;
868 		ret = imsg_compose(ibuf, VIODEV_MSG_SHUTDOWN, 0, 0, -1,
869 		    &msg, sizeof(msg));
870 		if (ret == -1)
871 			fatalx("%s: failed to send shutdown to device",
872 			    __func__);
873 		if (imsgbuf_flush(ibuf) == -1)
874 			fatalx("%s: imsgbuf_flush", __func__);
875 	}
876 
877 	/*
878 	 * Wait for all children to shutdown using a simple approach of
879 	 * iterating over known child devices and waiting for them to die.
880 	 */
881 	SLIST_FOREACH_SAFE(dev, &virtio_devs, dev_next, tmp) {
882 		log_debug("%s: waiting on device pid %d", __func__,
883 		    dev->dev_pid);
884 		do {
885 			pid = waitpid(dev->dev_pid, &status, WNOHANG);
886 		} while (pid == 0 || (pid == -1 && errno == EINTR));
887 		if (pid == dev->dev_pid)
888 			log_debug("%s: device for pid %d is stopped",
889 			    __func__, pid);
890 		else
891 			log_warnx("%s: unexpected pid %d", __func__, pid);
892 		free(dev);
893 	}
894 }
895 
896 int
vmmci_restore(int fd,uint32_t vm_id)897 vmmci_restore(int fd, uint32_t vm_id)
898 {
899 	log_debug("%s: receiving vmmci", __func__);
900 	if (atomicio(read, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) {
901 		log_warnx("%s: error reading vmmci from fd", __func__);
902 		return (-1);
903 	}
904 
905 	if (pci_set_bar_fn(vmmci.pci_id, 0, vmmci_io, NULL)) {
906 		log_warnx("%s: can't set bar fn for vmm control device",
907 		    __progname);
908 		return (-1);
909 	}
910 	vmmci.vm_id = vm_id;
911 	vmmci.irq = pci_get_dev_irq(vmmci.pci_id);
912 	memset(&vmmci.timeout, 0, sizeof(struct event));
913 	evtimer_set(&vmmci.timeout, vmmci_timeout, NULL);
914 	return (0);
915 }
916 
917 int
viornd_restore(int fd,struct vmd_vm * vm)918 viornd_restore(int fd, struct vmd_vm *vm)
919 {
920 	void *hva = NULL;
921 
922 	log_debug("%s: receiving viornd", __func__);
923 	if (atomicio(read, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) {
924 		log_warnx("%s: error reading viornd from fd", __func__);
925 		return (-1);
926 	}
927 	if (pci_set_bar_fn(viornd.pci_id, 0, virtio_rnd_io, NULL)) {
928 		log_warnx("%s: can't set bar fn for virtio rng device",
929 		    __progname);
930 		return (-1);
931 	}
932 	viornd.vm_id = vm->vm_params.vmc_params.vcp_id;
933 	viornd.irq = pci_get_dev_irq(viornd.pci_id);
934 
935 	hva = hvaddr_mem(viornd.vq[0].q_gpa, vring_size(VIORND_QUEUE_SIZE));
936 	if (hva == NULL)
937 		fatal("failed to restore viornd virtqueue");
938 	viornd.vq[0].q_hva = hva;
939 
940 	return (0);
941 }
942 
943 int
vionet_restore(int fd,struct vmd_vm * vm,int * child_taps)944 vionet_restore(int fd, struct vmd_vm *vm, int *child_taps)
945 {
946 	struct vmop_create_params *vmc = &vm->vm_params;
947 	struct vm_create_params *vcp = &vmc->vmc_params;
948 	struct virtio_dev *dev;
949 	uint8_t i;
950 
951 	if (vmc->vmc_nnics == 0)
952 		return (0);
953 
954 	for (i = 0; i < vmc->vmc_nnics; i++) {
955 		dev = calloc(1, sizeof(struct virtio_dev));
956 		if (dev == NULL) {
957 			log_warn("%s: calloc failure allocating vionet",
958 			    __progname);
959 			return (-1);
960 		}
961 
962 		log_debug("%s: receiving virtio network device", __func__);
963 		if (atomicio(read, fd, dev, sizeof(struct virtio_dev))
964 		    != sizeof(struct virtio_dev)) {
965 			log_warnx("%s: error reading vionet from fd",
966 			    __func__);
967 			return (-1);
968 		}
969 
970 		/* Virtio network */
971 		if (dev->dev_type != VMD_DEVTYPE_NET) {
972 			log_warnx("%s: invalid device type", __func__);
973 			return (-1);
974 		}
975 
976 		dev->sync_fd = -1;
977 		dev->async_fd = -1;
978 		dev->vm_id = vcp->vcp_id;
979 		dev->vm_vmid = vm->vm_vmid;
980 		dev->irq = pci_get_dev_irq(dev->pci_id);
981 
982 		if (pci_set_bar_fn(dev->pci_id, 0, virtio_pci_io, dev)) {
983 			log_warnx("%s: can't set bar fn for virtio net "
984 			    "device", __progname);
985 			return (-1);
986 		}
987 
988 		dev->vionet.data_fd = child_taps[i];
989 		dev->vionet.idx = i;
990 
991 		SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next);
992 	}
993 
994 	return (0);
995 }
996 
997 int
vioblk_restore(int fd,struct vmd_vm * vm,int child_disks[][VM_MAX_BASE_PER_DISK])998 vioblk_restore(int fd, struct vmd_vm *vm,
999     int child_disks[][VM_MAX_BASE_PER_DISK])
1000 {
1001 	struct vmop_create_params *vmc = &vm->vm_params;
1002 	struct virtio_dev *dev;
1003 	uint8_t i, j;
1004 
1005 	if (vmc->vmc_ndisks == 0)
1006 		return (0);
1007 
1008 	for (i = 0; i < vmc->vmc_ndisks; i++) {
1009 		dev = calloc(1, sizeof(struct virtio_dev));
1010 		if (dev == NULL) {
1011 			log_warn("%s: calloc failure allocating vioblks",
1012 			    __progname);
1013 			return (-1);
1014 		}
1015 
1016 		log_debug("%s: receiving vioblk", __func__);
1017 		if (atomicio(read, fd, dev, sizeof(struct virtio_dev))
1018 		    != sizeof(struct virtio_dev)) {
1019 			log_warnx("%s: error reading vioblk from fd", __func__);
1020 			return (-1);
1021 		}
1022 		if (dev->dev_type != VMD_DEVTYPE_DISK) {
1023 			log_warnx("%s: invalid device type", __func__);
1024 			return (-1);
1025 		}
1026 
1027 		dev->sync_fd = -1;
1028 		dev->async_fd = -1;
1029 
1030 		if (pci_set_bar_fn(dev->pci_id, 0, virtio_pci_io, dev)) {
1031 			log_warnx("%s: can't set bar fn for virtio block "
1032 			    "device", __progname);
1033 			return (-1);
1034 		}
1035 		dev->vm_id = vmc->vmc_params.vcp_id;
1036 		dev->irq = pci_get_dev_irq(dev->pci_id);
1037 
1038 		memset(&dev->vioblk.disk_fd, -1, sizeof(dev->vioblk.disk_fd));
1039 		dev->vioblk.ndisk_fd = vmc->vmc_diskbases[i];
1040 		for (j = 0; j < dev->vioblk.ndisk_fd; j++)
1041 			dev->vioblk.disk_fd[j] = child_disks[i][j];
1042 
1043 		dev->vioblk.idx = i;
1044 		SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next);
1045 	}
1046 	return (0);
1047 }
1048 
1049 int
vioscsi_restore(int fd,struct vmd_vm * vm,int child_cdrom)1050 vioscsi_restore(int fd, struct vmd_vm *vm, int child_cdrom)
1051 {
1052 	void *hva = NULL;
1053 	unsigned int i;
1054 
1055 	if (!strlen(vm->vm_params.vmc_cdrom))
1056 		return (0);
1057 
1058 	vioscsi = calloc(1, sizeof(struct vioscsi_dev));
1059 	if (vioscsi == NULL) {
1060 		log_warn("%s: calloc failure allocating vioscsi", __progname);
1061 		return (-1);
1062 	}
1063 
1064 	log_debug("%s: receiving vioscsi", __func__);
1065 
1066 	if (atomicio(read, fd, vioscsi, sizeof(struct vioscsi_dev)) !=
1067 	    sizeof(struct vioscsi_dev)) {
1068 		log_warnx("%s: error reading vioscsi from fd", __func__);
1069 		return (-1);
1070 	}
1071 
1072 	if (pci_set_bar_fn(vioscsi->pci_id, 0, vioscsi_io, vioscsi)) {
1073 		log_warnx("%s: can't set bar fn for vmm control device",
1074 		    __progname);
1075 		return (-1);
1076 	}
1077 
1078 	vioscsi->vm_id = vm->vm_params.vmc_params.vcp_id;
1079 	vioscsi->irq = pci_get_dev_irq(vioscsi->pci_id);
1080 
1081 	/* vioscsi uses 3 virtqueues. */
1082 	for (i = 0; i < 3; i++) {
1083 		hva = hvaddr_mem(vioscsi->vq[i].q_gpa,
1084 		    vring_size(VIOSCSI_QUEUE_SIZE));
1085 		if (hva == NULL)
1086 			fatal("failed to restore vioscsi virtqueue");
1087 		vioscsi->vq[i].q_hva = hva;
1088 	}
1089 
1090 	return (0);
1091 }
1092 
1093 int
virtio_restore(int fd,struct vmd_vm * vm,int child_cdrom,int child_disks[][VM_MAX_BASE_PER_DISK],int * child_taps)1094 virtio_restore(int fd, struct vmd_vm *vm, int child_cdrom,
1095     int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps)
1096 {
1097 	struct virtio_dev *dev;
1098 	int ret;
1099 
1100 	SLIST_INIT(&virtio_devs);
1101 
1102 	if ((ret = viornd_restore(fd, vm)) == -1)
1103 		return (ret);
1104 
1105 	if ((ret = vioblk_restore(fd, vm, child_disks)) == -1)
1106 		return (ret);
1107 
1108 	if ((ret = vioscsi_restore(fd, vm, child_cdrom)) == -1)
1109 		return (ret);
1110 
1111 	if ((ret = vionet_restore(fd, vm, child_taps)) == -1)
1112 		return (ret);
1113 
1114 	if ((ret = vmmci_restore(fd, vm->vm_params.vmc_params.vcp_id)) == -1)
1115 		return (ret);
1116 
1117 	SLIST_FOREACH(dev, &virtio_devs, dev_next) {
1118 		if (virtio_dev_launch(vm, dev) != 0)
1119 			fatalx("%s: failed to restore virtio dev", __func__);
1120 	}
1121 
1122 	return (0);
1123 }
1124 
1125 int
viornd_dump(int fd)1126 viornd_dump(int fd)
1127 {
1128 	log_debug("%s: sending viornd", __func__);
1129 
1130 	viornd.vq[0].q_hva = NULL;
1131 
1132 	if (atomicio(vwrite, fd, &viornd, sizeof(viornd)) != sizeof(viornd)) {
1133 		log_warnx("%s: error writing viornd to fd", __func__);
1134 		return (-1);
1135 	}
1136 	return (0);
1137 }
1138 
1139 int
vmmci_dump(int fd)1140 vmmci_dump(int fd)
1141 {
1142 	log_debug("%s: sending vmmci", __func__);
1143 
1144 	if (atomicio(vwrite, fd, &vmmci, sizeof(vmmci)) != sizeof(vmmci)) {
1145 		log_warnx("%s: error writing vmmci to fd", __func__);
1146 		return (-1);
1147 	}
1148 	return (0);
1149 }
1150 
1151 int
vionet_dump(int fd)1152 vionet_dump(int fd)
1153 {
1154 	struct virtio_dev	*dev, temp;
1155 	struct viodev_msg	 msg;
1156 	struct imsg		 imsg;
1157 	struct imsgbuf		*ibuf = NULL;
1158 	size_t			 sz;
1159 	int			 ret;
1160 
1161 	log_debug("%s: dumping vionet", __func__);
1162 
1163 	SLIST_FOREACH(dev, &virtio_devs, dev_next) {
1164 		if (dev->dev_type != VMD_DEVTYPE_NET)
1165 			continue;
1166 
1167 		memset(&msg, 0, sizeof(msg));
1168 		memset(&imsg, 0, sizeof(imsg));
1169 
1170 		ibuf = &dev->sync_iev.ibuf;
1171 		msg.type = VIODEV_MSG_DUMP;
1172 
1173 		ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
1174 		    sizeof(msg));
1175 		if (ret == -1) {
1176 			log_warnx("%s: failed requesting dump of vionet[%d]",
1177 			    __func__, dev->vionet.idx);
1178 			return (-1);
1179 		}
1180 		if (imsgbuf_flush(ibuf) == -1) {
1181 			log_warnx("%s: imsgbuf_flush", __func__);
1182 			return (-1);
1183 		}
1184 
1185 		sz = atomicio(read, dev->sync_fd, &temp, sizeof(temp));
1186 		if (sz != sizeof(temp)) {
1187 			log_warnx("%s: failed to dump vionet[%d]", __func__,
1188 			    dev->vionet.idx);
1189 			return (-1);
1190 		}
1191 
1192 		/* Clear volatile state. Will reinitialize on restore. */
1193 		temp.vionet.vq[RXQ].q_hva = NULL;
1194 		temp.vionet.vq[TXQ].q_hva = NULL;
1195 		temp.async_fd = -1;
1196 		temp.sync_fd = -1;
1197 		memset(&temp.async_iev, 0, sizeof(temp.async_iev));
1198 		memset(&temp.sync_iev, 0, sizeof(temp.sync_iev));
1199 
1200 		if (atomicio(vwrite, fd, &temp, sizeof(temp)) != sizeof(temp)) {
1201 			log_warnx("%s: error writing vionet to fd", __func__);
1202 			return (-1);
1203 		}
1204 	}
1205 
1206 	return (0);
1207 }
1208 
1209 int
vioblk_dump(int fd)1210 vioblk_dump(int fd)
1211 {
1212 	struct virtio_dev	*dev, temp;
1213 	struct viodev_msg	 msg;
1214 	struct imsg		 imsg;
1215 	struct imsgbuf		*ibuf = NULL;
1216 	size_t			 sz;
1217 	int			 ret;
1218 
1219 	log_debug("%s: dumping vioblk", __func__);
1220 
1221 	SLIST_FOREACH(dev, &virtio_devs, dev_next) {
1222 		if (dev->dev_type != VMD_DEVTYPE_DISK)
1223 			continue;
1224 
1225 		memset(&msg, 0, sizeof(msg));
1226 		memset(&imsg, 0, sizeof(imsg));
1227 
1228 		ibuf = &dev->sync_iev.ibuf;
1229 		msg.type = VIODEV_MSG_DUMP;
1230 
1231 		ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
1232 		    sizeof(msg));
1233 		if (ret == -1) {
1234 			log_warnx("%s: failed requesting dump of vioblk[%d]",
1235 			    __func__, dev->vioblk.idx);
1236 			return (-1);
1237 		}
1238 		if (imsgbuf_flush(ibuf) == -1) {
1239 			log_warnx("%s: imsgbuf_flush", __func__);
1240 			return (-1);
1241 		}
1242 
1243 
1244 		sz = atomicio(read, dev->sync_fd, &temp, sizeof(temp));
1245 		if (sz != sizeof(temp)) {
1246 			log_warnx("%s: failed to dump vioblk[%d]", __func__,
1247 			    dev->vioblk.idx);
1248 			return (-1);
1249 		}
1250 
1251 		/* Clear volatile state. Will reinitialize on restore. */
1252 		temp.vioblk.vq[0].q_hva = NULL;
1253 		temp.async_fd = -1;
1254 		temp.sync_fd = -1;
1255 		memset(&temp.async_iev, 0, sizeof(temp.async_iev));
1256 		memset(&temp.sync_iev, 0, sizeof(temp.sync_iev));
1257 
1258 		if (atomicio(vwrite, fd, &temp, sizeof(temp)) != sizeof(temp)) {
1259 			log_warnx("%s: error writing vioblk to fd", __func__);
1260 			return (-1);
1261 		}
1262 	}
1263 
1264 	return (0);
1265 }
1266 
1267 int
vioscsi_dump(int fd)1268 vioscsi_dump(int fd)
1269 {
1270 	unsigned int i;
1271 
1272 	if (vioscsi == NULL)
1273 		return (0);
1274 
1275 	log_debug("%s: sending vioscsi", __func__);
1276 
1277 	for (i = 0; i < 3; i++)
1278 		vioscsi->vq[i].q_hva = NULL;
1279 
1280 	if (atomicio(vwrite, fd, vioscsi, sizeof(struct vioscsi_dev)) !=
1281 	    sizeof(struct vioscsi_dev)) {
1282 		log_warnx("%s: error writing vioscsi to fd", __func__);
1283 		return (-1);
1284 	}
1285 	return (0);
1286 }
1287 
1288 int
virtio_dump(int fd)1289 virtio_dump(int fd)
1290 {
1291 	int ret;
1292 
1293 	if ((ret = viornd_dump(fd)) == -1)
1294 		return ret;
1295 
1296 	if ((ret = vioblk_dump(fd)) == -1)
1297 		return ret;
1298 
1299 	if ((ret = vioscsi_dump(fd)) == -1)
1300 		return ret;
1301 
1302 	if ((ret = vionet_dump(fd)) == -1)
1303 		return ret;
1304 
1305 	if ((ret = vmmci_dump(fd)) == -1)
1306 		return ret;
1307 
1308 	return (0);
1309 }
1310 
virtio_broadcast_imsg(struct vmd_vm * vm,uint16_t type,void * data,uint16_t datalen)1311 void virtio_broadcast_imsg(struct vmd_vm *vm, uint16_t type, void *data,
1312     uint16_t datalen)
1313 {
1314 	struct virtio_dev *dev;
1315 	int ret;
1316 
1317 	SLIST_FOREACH(dev, &virtio_devs, dev_next) {
1318 		ret = imsg_compose_event(&dev->async_iev, type, 0, 0, -1, data,
1319 		    datalen);
1320 		if (ret == -1) {
1321 			log_warnx("%s: failed to broadcast imsg type %u",
1322 			    __func__, type);
1323 		}
1324 	}
1325 
1326 }
1327 
1328 void
virtio_stop(struct vmd_vm * vm)1329 virtio_stop(struct vmd_vm *vm)
1330 {
1331 	return virtio_broadcast_imsg(vm, IMSG_VMDOP_PAUSE_VM, NULL, 0);
1332 }
1333 
1334 void
virtio_start(struct vmd_vm * vm)1335 virtio_start(struct vmd_vm *vm)
1336 {
1337 	return virtio_broadcast_imsg(vm, IMSG_VMDOP_UNPAUSE_VM, NULL, 0);
1338 }
1339 
1340 /*
1341  * Fork+exec a child virtio device. Returns 0 on success.
1342  */
1343 static int
virtio_dev_launch(struct vmd_vm * vm,struct virtio_dev * dev)1344 virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev)
1345 {
1346 	char *nargv[12], num[32], vmm_fd[32], vm_name[VM_NAME_MAX], t[2];
1347 	pid_t dev_pid;
1348 	int sync_fds[2], async_fds[2], ret = 0;
1349 	size_t i, sz = 0;
1350 	struct viodev_msg msg;
1351 	struct virtio_dev *dev_entry;
1352 	struct imsg imsg;
1353 	struct imsgev *iev = &dev->sync_iev;
1354 
1355 	switch (dev->dev_type) {
1356 	case VMD_DEVTYPE_NET:
1357 		log_debug("%s: launching vionet%d",
1358 		    vm->vm_params.vmc_params.vcp_name, dev->vionet.idx);
1359 		break;
1360 	case VMD_DEVTYPE_DISK:
1361 		log_debug("%s: launching vioblk%d",
1362 		    vm->vm_params.vmc_params.vcp_name, dev->vioblk.idx);
1363 		break;
1364 		/* NOTREACHED */
1365 	default:
1366 		log_warn("%s: invalid device type", __func__);
1367 		return (EINVAL);
1368 	}
1369 
1370 	/* We need two channels: one synchronous (IO reads) and one async. */
1371 	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC,
1372 	    sync_fds) == -1) {
1373 		log_warn("failed to create socketpair");
1374 		return (errno);
1375 	}
1376 	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC,
1377 	    async_fds) == -1) {
1378 		log_warn("failed to create async socketpair");
1379 		return (errno);
1380 	}
1381 
1382 	/* Fork... */
1383 	dev_pid = fork();
1384 	if (dev_pid == -1) {
1385 		ret = errno;
1386 		log_warn("%s: fork failed", __func__);
1387 		goto err;
1388 	}
1389 
1390 	if (dev_pid > 0) {
1391 		/* Parent */
1392 		close_fd(sync_fds[1]);
1393 		close_fd(async_fds[1]);
1394 
1395 		/* Save the child's pid to help with cleanup. */
1396 		dev->dev_pid = dev_pid;
1397 
1398 		/* Set the channel fds to the child's before sending. */
1399 		dev->sync_fd = sync_fds[1];
1400 		dev->async_fd = async_fds[1];
1401 
1402 		/* 1. Send over our configured device. */
1403 		log_debug("%s: sending '%c' type device struct", __func__,
1404 			dev->dev_type);
1405 		sz = atomicio(vwrite, sync_fds[0], dev, sizeof(*dev));
1406 		if (sz != sizeof(*dev)) {
1407 			log_warnx("%s: failed to send device", __func__);
1408 			ret = EIO;
1409 			goto err;
1410 		}
1411 
1412 		/* Close data fds. Only the child device needs them now. */
1413 		if (virtio_dev_closefds(dev) == -1) {
1414 			log_warnx("%s: failed to close device data fds",
1415 			    __func__);
1416 			goto err;
1417 		}
1418 
1419 		/* 2. Send over details on the VM (including memory fds). */
1420 		log_debug("%s: sending vm message for '%s'", __func__,
1421 			vm->vm_params.vmc_params.vcp_name);
1422 		sz = atomicio(vwrite, sync_fds[0], vm, sizeof(*vm));
1423 		if (sz != sizeof(*vm)) {
1424 			log_warnx("%s: failed to send vm details", __func__);
1425 			ret = EIO;
1426 			goto err;
1427 		}
1428 
1429 		/*
1430 		 * Initialize our imsg channel to the child device. The initial
1431 		 * communication will be synchronous. We expect the child to
1432 		 * report itself "ready" to confirm the launch was a success.
1433 		 */
1434 		if (imsgbuf_init(&iev->ibuf, sync_fds[0]) == -1) {
1435 			log_warn("%s: failed to init imsgbuf", __func__);
1436 			goto err;
1437 		}
1438 		imsgbuf_allow_fdpass(&iev->ibuf);
1439 		ret = imsgbuf_read_one(&iev->ibuf, &imsg);
1440 		if (ret == 0 || ret == -1) {
1441 			log_warnx("%s: failed to receive ready message from "
1442 			    "'%c' type device", __func__, dev->dev_type);
1443 			ret = EIO;
1444 			goto err;
1445 		}
1446 		ret = 0;
1447 
1448 		IMSG_SIZE_CHECK(&imsg, &msg);
1449 		memcpy(&msg, imsg.data, sizeof(msg));
1450 		imsg_free(&imsg);
1451 
1452 		if (msg.type != VIODEV_MSG_READY) {
1453 			log_warnx("%s: expected ready message, got type %d",
1454 			    __func__, msg.type);
1455 			ret = EINVAL;
1456 			goto err;
1457 		}
1458 		log_debug("%s: device reports ready via sync channel",
1459 		    __func__);
1460 
1461 		/*
1462 		 * Wire in the async event handling, but after reverting back
1463 		 * to the parent's fd's.
1464 		 */
1465 		dev->sync_fd = sync_fds[0];
1466 		dev->async_fd = async_fds[0];
1467 		vm_device_pipe(dev, virtio_dispatch_dev, NULL);
1468 	} else {
1469 		/* Child */
1470 		close_fd(async_fds[0]);
1471 		close_fd(sync_fds[0]);
1472 
1473 		/* Close pty. Virtio devices do not need it. */
1474 		close_fd(vm->vm_tty);
1475 		vm->vm_tty = -1;
1476 
1477 		if (vm->vm_cdrom != -1) {
1478 			close_fd(vm->vm_cdrom);
1479 			vm->vm_cdrom = -1;
1480 		}
1481 
1482 		/* Keep data file descriptors open after exec. */
1483 		SLIST_FOREACH(dev_entry, &virtio_devs, dev_next) {
1484 			if (dev_entry == dev)
1485 				continue;
1486 			if (virtio_dev_closefds(dev_entry) == -1)
1487 				fatalx("unable to close other virtio devs");
1488 		}
1489 
1490 		memset(num, 0, sizeof(num));
1491 		snprintf(num, sizeof(num), "%d", sync_fds[1]);
1492 		memset(vmm_fd, 0, sizeof(vmm_fd));
1493 		snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd);
1494 		memset(vm_name, 0, sizeof(vm_name));
1495 		snprintf(vm_name, sizeof(vm_name), "%s",
1496 		    vm->vm_params.vmc_params.vcp_name);
1497 
1498 		t[0] = dev->dev_type;
1499 		t[1] = '\0';
1500 
1501 		i = 0;
1502 		nargv[i++] = env->argv0;
1503 		nargv[i++] = "-X";
1504 		nargv[i++] = num;
1505 		nargv[i++] = "-t";
1506 		nargv[i++] = t;
1507 		nargv[i++] = "-i";
1508 		nargv[i++] = vmm_fd;
1509 		nargv[i++] = "-p";
1510 		nargv[i++] = vm_name;
1511 		if (env->vmd_debug)
1512 			nargv[i++] = "-d";
1513 		if (env->vmd_verbose == 1)
1514 			nargv[i++] = "-v";
1515 		else if (env->vmd_verbose > 1)
1516 			nargv[i++] = "-vv";
1517 		nargv[i++] = NULL;
1518 		if (i > sizeof(nargv) / sizeof(nargv[0]))
1519 			fatalx("%s: nargv overflow", __func__);
1520 
1521 		/* Control resumes in vmd.c:main(). */
1522 		execvp(nargv[0], nargv);
1523 
1524 		ret = errno;
1525 		log_warn("%s: failed to exec device", __func__);
1526 		_exit(ret);
1527 		/* NOTREACHED */
1528 	}
1529 
1530 	return (ret);
1531 
1532 err:
1533 	close_fd(sync_fds[0]);
1534 	close_fd(sync_fds[1]);
1535 	close_fd(async_fds[0]);
1536 	close_fd(async_fds[1]);
1537 	return (ret);
1538 }
1539 
1540 /*
1541  * Initialize an async imsg channel for a virtio device.
1542  */
1543 int
vm_device_pipe(struct virtio_dev * dev,void (* cb)(int,short,void *),struct event_base * ev_base)1544 vm_device_pipe(struct virtio_dev *dev, void (*cb)(int, short, void *),
1545     struct event_base *ev_base)
1546 {
1547 	struct imsgev *iev = &dev->async_iev;
1548 	int fd = dev->async_fd;
1549 
1550 	log_debug("%s: initializing '%c' device pipe (fd=%d)", __func__,
1551 	    dev->dev_type, fd);
1552 
1553 	if (imsgbuf_init(&iev->ibuf, fd) == -1)
1554 		fatal("imsgbuf_init");
1555 	imsgbuf_allow_fdpass(&iev->ibuf);
1556 	iev->handler = cb;
1557 	iev->data = dev;
1558 	iev->events = EV_READ;
1559 	imsg_event_add2(iev, ev_base);
1560 
1561 	return (0);
1562 }
1563 
1564 void
virtio_dispatch_dev(int fd,short event,void * arg)1565 virtio_dispatch_dev(int fd, short event, void *arg)
1566 {
1567 	struct virtio_dev	*dev = (struct virtio_dev*)arg;
1568 	struct imsgev		*iev = &dev->async_iev;
1569 	struct imsgbuf		*ibuf = &iev->ibuf;
1570 	struct imsg		 imsg;
1571 	struct viodev_msg	 msg;
1572 	ssize_t			 n = 0;
1573 
1574 	if (event & EV_READ) {
1575 		if ((n = imsgbuf_read(ibuf)) == -1)
1576 			fatal("%s: imsgbuf_read", __func__);
1577 		if (n == 0) {
1578 			/* this pipe is dead, so remove the event handler */
1579 			log_debug("%s: pipe dead (EV_READ)", __func__);
1580 			event_del(&iev->ev);
1581 			event_loopexit(NULL);
1582 			return;
1583 		}
1584 	}
1585 
1586 	if (event & EV_WRITE) {
1587 		if (imsgbuf_write(ibuf) == -1) {
1588 			if (errno == EPIPE) {
1589 				/* this pipe is dead, remove the handler */
1590 				log_debug("%s: pipe dead (EV_WRITE)", __func__);
1591 				event_del(&iev->ev);
1592 				event_loopexit(NULL);
1593 				return;
1594 			}
1595 			fatal("%s: imsgbuf_write", __func__);
1596 		}
1597 	}
1598 
1599 	for (;;) {
1600 		if ((n = imsg_get(ibuf, &imsg)) == -1)
1601 			fatal("%s: imsg_get", __func__);
1602 		if (n == 0)
1603 			break;
1604 
1605 		switch (imsg.hdr.type) {
1606 		case IMSG_DEVOP_MSG:
1607 			IMSG_SIZE_CHECK(&imsg, &msg);
1608 			memcpy(&msg, imsg.data, sizeof(msg));
1609 			handle_dev_msg(&msg, dev);
1610 			break;
1611 		default:
1612 			log_warnx("%s: got non devop imsg %d", __func__,
1613 			    imsg.hdr.type);
1614 			break;
1615 		}
1616 		imsg_free(&imsg);
1617 	}
1618 	imsg_event_add(iev);
1619 }
1620 
1621 
1622 static int
handle_dev_msg(struct viodev_msg * msg,struct virtio_dev * gdev)1623 handle_dev_msg(struct viodev_msg *msg, struct virtio_dev *gdev)
1624 {
1625 	uint32_t vm_id = gdev->vm_id;
1626 	int irq = gdev->irq;
1627 
1628 	switch (msg->type) {
1629 	case VIODEV_MSG_KICK:
1630 		if (msg->state == INTR_STATE_ASSERT)
1631 			vcpu_assert_irq(vm_id, msg->vcpu, irq);
1632 		else if (msg->state == INTR_STATE_DEASSERT)
1633 			vcpu_deassert_irq(vm_id, msg->vcpu, irq);
1634 		break;
1635 	case VIODEV_MSG_READY:
1636 		log_debug("%s: device reports ready", __func__);
1637 		break;
1638 	case VIODEV_MSG_ERROR:
1639 		log_warnx("%s: device reported error", __func__);
1640 		break;
1641 	case VIODEV_MSG_INVALID:
1642 	case VIODEV_MSG_IO_READ:
1643 	case VIODEV_MSG_IO_WRITE:
1644 		/* FALLTHROUGH */
1645 	default:
1646 		log_warnx("%s: unsupported device message type %d", __func__,
1647 		    msg->type);
1648 		return (1);
1649 	}
1650 
1651 	return (0);
1652 };
1653 
1654 /*
1655  * Called by the VM process while processing IO from the VCPU thread.
1656  *
1657  * N.b. Since the VCPU thread calls this function, we cannot mutate the event
1658  * system. All ipc messages must be sent manually and cannot be queued for
1659  * the event loop to push them. (We need to perform a synchronous read, so
1660  * this isn't really a big deal.)
1661  */
1662 int
virtio_pci_io(int dir,uint16_t reg,uint32_t * data,uint8_t * intr,void * cookie,uint8_t sz)1663 virtio_pci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
1664     void *cookie, uint8_t sz)
1665 {
1666 	struct virtio_dev *dev = (struct virtio_dev *)cookie;
1667 	struct imsgbuf *ibuf = &dev->sync_iev.ibuf;
1668 	struct imsg imsg;
1669 	struct viodev_msg msg;
1670 	int ret = 0;
1671 
1672 	memset(&msg, 0, sizeof(msg));
1673 	msg.reg = reg;
1674 	msg.io_sz = sz;
1675 
1676 	if (dir == 0) {
1677 		msg.type = VIODEV_MSG_IO_WRITE;
1678 		msg.data = *data;
1679 		msg.data_valid = 1;
1680 	} else
1681 		msg.type = VIODEV_MSG_IO_READ;
1682 
1683 	if (msg.type == VIODEV_MSG_IO_WRITE) {
1684 		/*
1685 		 * Write request. No reply expected.
1686 		 */
1687 		ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
1688 		    sizeof(msg));
1689 		if (ret == -1) {
1690 			log_warn("%s: failed to send async io event to virtio"
1691 			    " device", __func__);
1692 			return (ret);
1693 		}
1694 		if (imsgbuf_flush(ibuf) == -1) {
1695 			log_warnx("%s: imsgbuf_flush (write)", __func__);
1696 			return (-1);
1697 		}
1698 	} else {
1699 		/*
1700 		 * Read request. Requires waiting for a reply.
1701 		 */
1702 		ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
1703 		    sizeof(msg));
1704 		if (ret == -1) {
1705 			log_warnx("%s: failed to send sync io event to virtio"
1706 			    " device", __func__);
1707 			return (ret);
1708 		}
1709 		if (imsgbuf_flush(ibuf) == -1) {
1710 			log_warnx("%s: imsgbuf_flush (read)", __func__);
1711 			return (-1);
1712 		}
1713 
1714 		/* Read our reply. */
1715 		ret = imsgbuf_read_one(ibuf, &imsg);
1716 		if (ret == 0 || ret == -1) {
1717 			log_warn("%s: imsgbuf_read (n=%d)", __func__, ret);
1718 			return (-1);
1719 		}
1720 		IMSG_SIZE_CHECK(&imsg, &msg);
1721 		memcpy(&msg, imsg.data, sizeof(msg));
1722 		imsg_free(&imsg);
1723 
1724 		if (msg.type == VIODEV_MSG_IO_READ && msg.data_valid) {
1725 #if DEBUG
1726 			log_debug("%s: got sync read response (reg=%s)",
1727 			    __func__, virtio_reg_name(msg.reg));
1728 #endif /* DEBUG */
1729 			*data = msg.data;
1730 			/*
1731 			 * It's possible we're asked to {de,}assert after the
1732 			 * device performs a register read.
1733 			 */
1734 			if (msg.state == INTR_STATE_ASSERT)
1735 				vcpu_assert_irq(dev->vm_id, msg.vcpu, msg.irq);
1736 			else if (msg.state == INTR_STATE_DEASSERT)
1737 				vcpu_deassert_irq(dev->vm_id, msg.vcpu, msg.irq);
1738 		} else {
1739 			log_warnx("%s: expected IO_READ, got %d", __func__,
1740 			    msg.type);
1741 			return (-1);
1742 		}
1743 	}
1744 
1745 	return (0);
1746 }
1747 
1748 void
virtio_assert_irq(struct virtio_dev * dev,int vcpu)1749 virtio_assert_irq(struct virtio_dev *dev, int vcpu)
1750 {
1751 	struct viodev_msg msg;
1752 	int ret;
1753 
1754 	memset(&msg, 0, sizeof(msg));
1755 	msg.irq = dev->irq;
1756 	msg.vcpu = vcpu;
1757 	msg.type = VIODEV_MSG_KICK;
1758 	msg.state = INTR_STATE_ASSERT;
1759 
1760 	ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
1761 	    &msg, sizeof(msg));
1762 	if (ret == -1)
1763 		log_warnx("%s: failed to assert irq %d", __func__, dev->irq);
1764 }
1765 
1766 void
virtio_deassert_irq(struct virtio_dev * dev,int vcpu)1767 virtio_deassert_irq(struct virtio_dev *dev, int vcpu)
1768 {
1769 	struct viodev_msg msg;
1770 	int ret;
1771 
1772 	memset(&msg, 0, sizeof(msg));
1773 	msg.irq = dev->irq;
1774 	msg.vcpu = vcpu;
1775 	msg.type = VIODEV_MSG_KICK;
1776 	msg.state = INTR_STATE_DEASSERT;
1777 
1778 	ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
1779 	    &msg, sizeof(msg));
1780 	if (ret == -1)
1781 		log_warnx("%s: failed to deassert irq %d", __func__, dev->irq);
1782 }
1783 
1784 /*
1785  * Close all underlying file descriptors for a given virtio device.
1786  */
1787 static int
virtio_dev_closefds(struct virtio_dev * dev)1788 virtio_dev_closefds(struct virtio_dev *dev)
1789 {
1790 	size_t i;
1791 
1792 	switch (dev->dev_type) {
1793 		case VMD_DEVTYPE_DISK:
1794 			for (i = 0; i < dev->vioblk.ndisk_fd; i++) {
1795 				close_fd(dev->vioblk.disk_fd[i]);
1796 				dev->vioblk.disk_fd[i] = -1;
1797 			}
1798 			break;
1799 		case VMD_DEVTYPE_NET:
1800 			close_fd(dev->vionet.data_fd);
1801 			dev->vionet.data_fd = -1;
1802 			break;
1803 	default:
1804 		log_warnx("%s: invalid device type", __func__);
1805 		return (-1);
1806 	}
1807 
1808 	close_fd(dev->async_fd);
1809 	dev->async_fd = -1;
1810 	close_fd(dev->sync_fd);
1811 	dev->sync_fd = -1;
1812 
1813 	return (0);
1814 }
1815