xref: /openbsd/usr.sbin/vmd/vionet.c (revision 0a9d031f)
1 /*	$OpenBSD: vionet.c,v 1.22 2024/11/21 13:39:34 claudio Exp $	*/
2 
3 /*
4  * Copyright (c) 2023 Dave Voutila <dv@openbsd.org>
5  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/types.h>
20 
21 #include <dev/pci/virtio_pcireg.h>
22 #include <dev/pv/virtioreg.h>
23 
24 #include <net/if.h>
25 #include <netinet/in.h>
26 #include <netinet/if_ether.h>
27 
28 #include <errno.h>
29 #include <event.h>
30 #include <fcntl.h>
31 #include <pthread.h>
32 #include <pthread_np.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 
37 #include "atomicio.h"
38 #include "virtio.h"
39 #include "vmd.h"
40 
41 #define VIRTIO_NET_F_MAC	(1 << 5)
42 #define RXQ	0
43 #define TXQ	1
44 
45 extern char *__progname;
46 extern struct vmd_vm *current_vm;
47 
48 struct packet {
49 	uint8_t	*buf;
50 	size_t	 len;
51 };
52 
53 static void *rx_run_loop(void *);
54 static void *tx_run_loop(void *);
55 static int vionet_rx(struct vionet_dev *, int);
56 static ssize_t vionet_rx_copy(struct vionet_dev *, int, const struct iovec *,
57     int, size_t);
58 static ssize_t vionet_rx_zerocopy(struct vionet_dev *, int,
59     const struct iovec *, int);
60 static void vionet_rx_event(int, short, void *);
61 static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *,
62     int8_t *);
63 static void handle_io_write(struct viodev_msg *, struct virtio_dev *);
64 static int vionet_tx(struct virtio_dev *);
65 static void vionet_notifyq(struct virtio_dev *);
66 static void dev_dispatch_vm(int, short, void *);
67 static void handle_sync_io(int, short, void *);
68 static void read_pipe_main(int, short, void *);
69 static void read_pipe_rx(int, short, void *);
70 static void read_pipe_tx(int, short, void *);
71 static void vionet_assert_pic_irq(struct virtio_dev *);
72 static void vionet_deassert_pic_irq(struct virtio_dev *);
73 
74 /* Device Globals */
75 struct event ev_tap;
76 struct event ev_inject;
77 struct event_base *ev_base_main;
78 struct event_base *ev_base_rx;
79 struct event_base *ev_base_tx;
80 pthread_t rx_thread;
81 pthread_t tx_thread;
82 struct vm_dev_pipe pipe_main;
83 struct vm_dev_pipe pipe_rx;
84 struct vm_dev_pipe pipe_tx;
85 int pipe_inject[2];
86 #define READ	0
87 #define WRITE	1
88 struct iovec iov_rx[VIONET_QUEUE_SIZE];
89 struct iovec iov_tx[VIONET_QUEUE_SIZE];
90 pthread_rwlock_t lock = NULL;		/* Guards device config state. */
91 int resetting = 0;	/* Transient reset state used to coordinate reset. */
92 int rx_enabled = 0;	/* 1: we expect to read the tap, 0: wait for notify. */
93 
94 __dead void
vionet_main(int fd,int fd_vmm)95 vionet_main(int fd, int fd_vmm)
96 {
97 	struct virtio_dev	 dev;
98 	struct vionet_dev	*vionet = NULL;
99 	struct viodev_msg 	 msg;
100 	struct vmd_vm	 	 vm;
101 	struct vm_create_params	*vcp;
102 	ssize_t			 sz;
103 	int			 ret;
104 
105 	/*
106 	 * stdio - needed for read/write to disk fds and channels to the vm.
107 	 * vmm + proc - needed to create shared vm mappings.
108 	 */
109 	if (pledge("stdio vmm proc", NULL) == -1)
110 		fatal("pledge");
111 
112 	/* Initialize iovec arrays. */
113 	memset(iov_rx, 0, sizeof(iov_rx));
114 	memset(iov_tx, 0, sizeof(iov_tx));
115 
116 	/* Receive our vionet_dev, mostly preconfigured. */
117 	sz = atomicio(read, fd, &dev, sizeof(dev));
118 	if (sz != sizeof(dev)) {
119 		ret = errno;
120 		log_warn("failed to receive vionet");
121 		goto fail;
122 	}
123 	if (dev.dev_type != VMD_DEVTYPE_NET) {
124 		ret = EINVAL;
125 		log_warn("received invalid device type");
126 		goto fail;
127 	}
128 	dev.sync_fd = fd;
129 	vionet = &dev.vionet;
130 
131 	log_debug("%s: got vionet dev. tap fd = %d, syncfd = %d, asyncfd = %d"
132 	    ", vmm fd = %d", __func__, vionet->data_fd, dev.sync_fd,
133 	    dev.async_fd, fd_vmm);
134 
135 	/* Receive our vm information from the vm process. */
136 	memset(&vm, 0, sizeof(vm));
137 	sz = atomicio(read, dev.sync_fd, &vm, sizeof(vm));
138 	if (sz != sizeof(vm)) {
139 		ret = EIO;
140 		log_warnx("failed to receive vm details");
141 		goto fail;
142 	}
143 	vcp = &vm.vm_params.vmc_params;
144 	current_vm = &vm;
145 	setproctitle("%s/vionet%d", vcp->vcp_name, vionet->idx);
146 	log_procinit("vm/%s/vionet%d", vcp->vcp_name, vionet->idx);
147 
148 	/* Now that we have our vm information, we can remap memory. */
149 	ret = remap_guest_mem(&vm, fd_vmm);
150 	if (ret) {
151 		fatal("%s: failed to remap", __func__);
152 		goto fail;
153 	}
154 
155 	/*
156 	 * We no longer need /dev/vmm access.
157 	 */
158 	close_fd(fd_vmm);
159 	if (pledge("stdio", NULL) == -1)
160 		fatal("pledge2");
161 
162 	/* If we're restoring hardware, re-initialize virtqueue hva's. */
163 	if (vm.vm_state & VM_STATE_RECEIVED) {
164 		struct virtio_vq_info *vq_info;
165 		void *hva = NULL;
166 
167 		vq_info = &dev.vionet.vq[TXQ];
168 		if (vq_info->q_gpa != 0) {
169 			log_debug("%s: restoring TX virtqueue for gpa 0x%llx",
170 			    __func__, vq_info->q_gpa);
171 			hva = hvaddr_mem(vq_info->q_gpa,
172 			    vring_size(VIONET_QUEUE_SIZE));
173 			if (hva == NULL)
174 				fatalx("%s: hva == NULL", __func__);
175 			vq_info->q_hva = hva;
176 		}
177 
178 		vq_info = &dev.vionet.vq[RXQ];
179 		if (vq_info->q_gpa != 0) {
180 			log_debug("%s: restoring RX virtqueue for gpa 0x%llx",
181 			    __func__, vq_info->q_gpa);
182 			hva = hvaddr_mem(vq_info->q_gpa,
183 			    vring_size(VIONET_QUEUE_SIZE));
184 			if (hva == NULL)
185 				fatalx("%s: hva == NULL", __func__);
186 			vq_info->q_hva = hva;
187 		}
188 	}
189 
190 	/* Initialize our packet injection pipe. */
191 	if (pipe2(pipe_inject, O_NONBLOCK) == -1) {
192 		log_warn("%s: injection pipe", __func__);
193 		goto fail;
194 	}
195 
196 	/* Initialize inter-thread communication channels. */
197 	vm_pipe_init2(&pipe_main, read_pipe_main, &dev);
198 	vm_pipe_init2(&pipe_rx, read_pipe_rx, &dev);
199 	vm_pipe_init2(&pipe_tx, read_pipe_tx, &dev);
200 
201 	/* Initialize RX and TX threads . */
202 	ret = pthread_create(&rx_thread, NULL, rx_run_loop, &dev);
203 	if (ret) {
204 		errno = ret;
205 		log_warn("%s: failed to initialize rx thread", __func__);
206 		goto fail;
207 	}
208 	pthread_set_name_np(rx_thread, "rx");
209 	ret = pthread_create(&tx_thread, NULL, tx_run_loop, &dev);
210 	if (ret) {
211 		errno = ret;
212 		log_warn("%s: failed to initialize tx thread", __func__);
213 		goto fail;
214 	}
215 	pthread_set_name_np(tx_thread, "tx");
216 
217 	/* Initialize our rwlock for guarding shared device state. */
218 	ret = pthread_rwlock_init(&lock, NULL);
219 	if (ret) {
220 		errno = ret;
221 		log_warn("%s: failed to initialize rwlock", __func__);
222 		goto fail;
223 	}
224 
225 	/* Initialize libevent so we can start wiring event handlers. */
226 	ev_base_main = event_base_new();
227 
228 	/* Add our handler for receiving messages from the RX/TX threads. */
229 	event_base_set(ev_base_main, &pipe_main.read_ev);
230 	event_add(&pipe_main.read_ev, NULL);
231 
232 	/* Wire up an async imsg channel. */
233 	log_debug("%s: wiring in async vm event handler (fd=%d)", __func__,
234 		dev.async_fd);
235 	if (vm_device_pipe(&dev, dev_dispatch_vm, ev_base_main)) {
236 		ret = EIO;
237 		log_warnx("vm_device_pipe");
238 		goto fail;
239 	}
240 
241 	/* Configure our sync channel event handler. */
242 	log_debug("%s: wiring in sync channel handler (fd=%d)", __func__,
243 		dev.sync_fd);
244 	if (imsgbuf_init(&dev.sync_iev.ibuf, dev.sync_fd) == -1) {
245 		log_warnx("imsgbuf_init");
246 		goto fail;
247 	}
248 	imsgbuf_allow_fdpass(&dev.sync_iev.ibuf);
249 	dev.sync_iev.handler = handle_sync_io;
250 	dev.sync_iev.data = &dev;
251 	dev.sync_iev.events = EV_READ;
252 	imsg_event_add2(&dev.sync_iev, ev_base_main);
253 
254 	/* Send a ready message over the sync channel. */
255 	log_debug("%s: telling vm %s device is ready", __func__, vcp->vcp_name);
256 	memset(&msg, 0, sizeof(msg));
257 	msg.type = VIODEV_MSG_READY;
258 	imsg_compose_event2(&dev.sync_iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
259 	    sizeof(msg), ev_base_main);
260 
261 	/* Send a ready message over the async channel. */
262 	log_debug("%s: sending async ready message", __func__);
263 	ret = imsg_compose_event2(&dev.async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
264 	    &msg, sizeof(msg), ev_base_main);
265 	if (ret == -1) {
266 		log_warnx("%s: failed to send async ready message!", __func__);
267 		goto fail;
268 	}
269 
270 	/* Engage the event loop! */
271 	ret = event_base_dispatch(ev_base_main);
272 	event_base_free(ev_base_main);
273 
274 	/* Try stopping the rx & tx threads cleanly by messaging them. */
275 	vm_pipe_send(&pipe_rx, VIRTIO_THREAD_STOP);
276 	vm_pipe_send(&pipe_tx, VIRTIO_THREAD_STOP);
277 
278 	/* Wait for threads to stop. */
279 	pthread_join(rx_thread, NULL);
280 	pthread_join(tx_thread, NULL);
281 	pthread_rwlock_destroy(&lock);
282 
283 	/* Cleanup */
284 	if (ret == 0) {
285 		close_fd(dev.sync_fd);
286 		close_fd(dev.async_fd);
287 		close_fd(vionet->data_fd);
288 		close_fd(pipe_main.read);
289 		close_fd(pipe_main.write);
290 		close_fd(pipe_rx.write);
291 		close_fd(pipe_tx.write);
292 		close_fd(pipe_inject[READ]);
293 		close_fd(pipe_inject[WRITE]);
294 		_exit(ret);
295 		/* NOTREACHED */
296 	}
297 fail:
298 	/* Try firing off a message to the vm saying we're dying. */
299 	memset(&msg, 0, sizeof(msg));
300 	msg.type = VIODEV_MSG_ERROR;
301 	msg.data = ret;
302 	imsg_compose(&dev.sync_iev.ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
303 	    sizeof(msg));
304 	imsgbuf_flush(&dev.sync_iev.ibuf);
305 
306 	close_fd(dev.sync_fd);
307 	close_fd(dev.async_fd);
308 	close_fd(pipe_inject[READ]);
309 	close_fd(pipe_inject[WRITE]);
310 	if (vionet != NULL)
311 		close_fd(vionet->data_fd);
312 	if (lock != NULL)
313 		pthread_rwlock_destroy(&lock);
314 	_exit(ret);
315 }
316 
317 /*
318  * Update the gpa and hva of the virtqueue.
319  */
320 static void
vionet_update_qa(struct vionet_dev * dev)321 vionet_update_qa(struct vionet_dev *dev)
322 {
323 	struct virtio_vq_info *vq_info;
324 	void *hva = NULL;
325 
326 	/* Invalid queue? */
327 	if (dev->cfg.queue_select > 1)
328 		return;
329 
330 	vq_info = &dev->vq[dev->cfg.queue_select];
331 	vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE;
332 	dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
333 
334 	if (vq_info->q_gpa == 0)
335 		vq_info->q_hva = NULL;
336 
337 	hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIONET_QUEUE_SIZE));
338 	if (hva == NULL)
339 		fatalx("%s: hva == NULL", __func__);
340 
341 	vq_info->q_hva = hva;
342 }
343 
344 /*
345  * Update the queue size.
346  */
347 static void
vionet_update_qs(struct vionet_dev * dev)348 vionet_update_qs(struct vionet_dev *dev)
349 {
350 	struct virtio_vq_info *vq_info;
351 
352 	/* Invalid queue? */
353 	if (dev->cfg.queue_select > 1) {
354 		log_warnx("%s: !!! invalid queue selector %d", __func__,
355 		    dev->cfg.queue_select);
356 		dev->cfg.queue_size = 0;
357 		return;
358 	}
359 
360 	vq_info = &dev->vq[dev->cfg.queue_select];
361 
362 	/* Update queue pfn/size based on queue select */
363 	dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
364 	dev->cfg.queue_size = vq_info->qs;
365 }
366 
367 /*
368  * vionet_rx
369  *
370  * Pull packet from the provided fd and fill the receive-side virtqueue. We
371  * selectively use zero-copy approaches when possible.
372  *
373  * Returns 1 if guest notification is needed. Otherwise, returns -1 on failure
374  * or 0 if no notification is needed.
375  */
376 static int
vionet_rx(struct vionet_dev * dev,int fd)377 vionet_rx(struct vionet_dev *dev, int fd)
378 {
379 	uint16_t idx, hdr_idx;
380 	char *vr = NULL;
381 	size_t chain_len = 0, iov_cnt;
382 	struct vring_desc *desc, *table;
383 	struct vring_avail *avail;
384 	struct vring_used *used;
385 	struct virtio_vq_info *vq_info;
386 	struct iovec *iov;
387 	int notify = 0;
388 	ssize_t sz;
389 	uint8_t status = 0;
390 
391 	status = dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK;
392 	if (status != VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) {
393 		log_warnx("%s: driver not ready", __func__);
394 		return (0);
395 	}
396 
397 	vq_info = &dev->vq[RXQ];
398 	idx = vq_info->last_avail;
399 	vr = vq_info->q_hva;
400 	if (vr == NULL)
401 		fatalx("%s: vr == NULL", __func__);
402 
403 	/* Compute offsets in ring of descriptors, avail ring, and used ring */
404 	table = (struct vring_desc *)(vr);
405 	avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
406 	used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
407 	used->flags |= VRING_USED_F_NO_NOTIFY;
408 
409 	while (idx != avail->idx) {
410 		hdr_idx = avail->ring[idx & VIONET_QUEUE_MASK];
411 		desc = &table[hdr_idx & VIONET_QUEUE_MASK];
412 		if (!DESC_WRITABLE(desc)) {
413 			log_warnx("%s: invalid descriptor state", __func__);
414 			goto reset;
415 		}
416 
417 		iov = &iov_rx[0];
418 		iov_cnt = 1;
419 
420 		/*
421 		 * First descriptor should be at least as large as the
422 		 * virtio_net_hdr. It's not technically required, but in
423 		 * legacy devices it should be safe to assume.
424 		 */
425 		iov->iov_len = desc->len;
426 		if (iov->iov_len < sizeof(struct virtio_net_hdr)) {
427 			log_warnx("%s: invalid descriptor length", __func__);
428 			goto reset;
429 		}
430 
431 		/*
432 		 * Insert the virtio_net_hdr and adjust len/base. We do the
433 		 * pointer math here before it's a void*.
434 		 */
435 		iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len);
436 		if (iov->iov_base == NULL)
437 			goto reset;
438 		memset(iov->iov_base, 0, sizeof(struct virtio_net_hdr));
439 
440 		/* Tweak the iovec to account for the virtio_net_hdr. */
441 		iov->iov_len -= sizeof(struct virtio_net_hdr);
442 		iov->iov_base = hvaddr_mem(desc->addr +
443 		    sizeof(struct virtio_net_hdr), iov->iov_len);
444 		if (iov->iov_base == NULL)
445 			goto reset;
446 		chain_len = iov->iov_len;
447 
448 		/*
449 		 * Walk the remaining chain and collect remaining addresses
450 		 * and lengths.
451 		 */
452 		while (desc->flags & VRING_DESC_F_NEXT) {
453 			desc = &table[desc->next & VIONET_QUEUE_MASK];
454 			if (!DESC_WRITABLE(desc)) {
455 				log_warnx("%s: invalid descriptor state",
456 				    __func__);
457 				goto reset;
458 			}
459 
460 			/* Collect our IO information. Translate gpa's. */
461 			iov = &iov_rx[iov_cnt];
462 			iov->iov_len = desc->len;
463 			iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len);
464 			if (iov->iov_base == NULL)
465 				goto reset;
466 			chain_len += iov->iov_len;
467 
468 			/* Guard against infinitely looping chains. */
469 			if (++iov_cnt >= nitems(iov_rx)) {
470 				log_warnx("%s: infinite chain detected",
471 				    __func__);
472 				goto reset;
473 			}
474 		}
475 
476 		/* Make sure the driver gave us the bare minimum buffers. */
477 		if (chain_len < VIONET_MIN_TXLEN) {
478 			log_warnx("%s: insufficient buffers provided",
479 			    __func__);
480 			goto reset;
481 		}
482 
483 		/*
484 		 * If we're enforcing hardware address or handling an injected
485 		 * packet, we need to use a copy-based approach.
486 		 */
487 		if (dev->lockedmac || fd != dev->data_fd)
488 			sz = vionet_rx_copy(dev, fd, iov_rx, iov_cnt,
489 			    chain_len);
490 		else
491 			sz = vionet_rx_zerocopy(dev, fd, iov_rx, iov_cnt);
492 		if (sz == -1)
493 			goto reset;
494 		if (sz == 0)	/* No packets, so bail out for now. */
495 			break;
496 
497 		/*
498 		 * Account for the prefixed header since it wasn't included
499 		 * in the copy or zerocopy operations.
500 		 */
501 		sz += sizeof(struct virtio_net_hdr);
502 
503 		/* Mark our buffers as used. */
504 		used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_idx;
505 		used->ring[used->idx & VIONET_QUEUE_MASK].len = sz;
506 		__sync_synchronize();
507 		used->idx++;
508 		idx++;
509 	}
510 
511 	if (idx != vq_info->last_avail &&
512 	    !(avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
513 		notify = 1;
514 	}
515 
516 	vq_info->last_avail = idx;
517 	return (notify);
518 reset:
519 	return (-1);
520 }
521 
522 /*
523  * vionet_rx_copy
524  *
525  * Read a packet off the provided file descriptor, validating packet
526  * characteristics, and copy into the provided buffers in the iovec array.
527  *
528  * It's assumed that the provided iovec array contains validated host virtual
529  * address translations and not guest physical addreses.
530  *
531  * Returns number of bytes copied on success, 0 if packet is dropped, and
532  * -1 on an error.
533  */
534 ssize_t
vionet_rx_copy(struct vionet_dev * dev,int fd,const struct iovec * iov,int iov_cnt,size_t chain_len)535 vionet_rx_copy(struct vionet_dev *dev, int fd, const struct iovec *iov,
536     int iov_cnt, size_t chain_len)
537 {
538 	static uint8_t		 buf[VIONET_HARD_MTU];
539 	struct packet		*pkt = NULL;
540 	struct ether_header	*eh = NULL;
541 	uint8_t			*payload = buf;
542 	size_t			 i, chunk, nbytes, copied = 0;
543 	ssize_t			 sz;
544 
545 	/* If reading from the tap(4), try to right-size the read. */
546 	if (fd == dev->data_fd)
547 		nbytes = MIN(chain_len, VIONET_HARD_MTU);
548 	else if (fd == pipe_inject[READ])
549 		nbytes = sizeof(struct packet);
550 	else {
551 		log_warnx("%s: invalid fd: %d", __func__, fd);
552 		return (-1);
553 	}
554 
555 	/*
556 	 * Try to pull a packet. The fd should be non-blocking and we don't
557 	 * care if we under-read (i.e. sz != nbytes) as we may not have a
558 	 * packet large enough to fill the buffer.
559 	 */
560 	sz = read(fd, buf, nbytes);
561 	if (sz == -1) {
562 		if (errno != EAGAIN) {
563 			log_warn("%s: error reading packet", __func__);
564 			return (-1);
565 		}
566 		return (0);
567 	} else if (fd == dev->data_fd && sz < VIONET_MIN_TXLEN) {
568 		/* If reading the tap(4), we should get valid ethernet. */
569 		log_warnx("%s: invalid packet size", __func__);
570 		return (0);
571 	} else if (fd == pipe_inject[READ] && sz != sizeof(struct packet)) {
572 		log_warnx("%s: invalid injected packet object (sz=%ld)",
573 		    __func__, sz);
574 		return (0);
575 	}
576 
577 	/* Decompose an injected packet, if that's what we're working with. */
578 	if (fd == pipe_inject[READ]) {
579 		pkt = (struct packet *)buf;
580 		if (pkt->buf == NULL) {
581 			log_warnx("%s: invalid injected packet, no buffer",
582 			    __func__);
583 			return (0);
584 		}
585 		if (sz < VIONET_MIN_TXLEN || sz > VIONET_MAX_TXLEN) {
586 			log_warnx("%s: invalid injected packet size", __func__);
587 			goto drop;
588 		}
589 		payload = pkt->buf;
590 		sz = (ssize_t)pkt->len;
591 	}
592 
593 	/* Validate the ethernet header, if required. */
594 	if (dev->lockedmac) {
595 		eh = (struct ether_header *)(payload);
596 		if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
597 		    memcmp(eh->ether_dhost, dev->mac,
598 		    sizeof(eh->ether_dhost)) != 0)
599 			goto drop;
600 	}
601 
602 	/* Truncate one last time to the chain length, if shorter. */
603 	sz = MIN(chain_len, (size_t)sz);
604 
605 	/*
606 	 * Copy the packet into the provided buffers. We can use memcpy(3)
607 	 * here as the gpa was validated and translated to an hva previously.
608 	 */
609 	for (i = 0; (int)i < iov_cnt && (size_t)sz > copied; i++) {
610 		chunk = MIN(iov[i].iov_len, (size_t)(sz - copied));
611 		memcpy(iov[i].iov_base, payload + copied, chunk);
612 		copied += chunk;
613 	}
614 
615 drop:
616 	/* Free any injected packet buffer. */
617 	if (pkt != NULL)
618 		free(pkt->buf);
619 
620 	return (copied);
621 }
622 
623 /*
624  * vionet_rx_zerocopy
625  *
626  * Perform a vectorized read from the given fd into the guest physical memory
627  * pointed to by iovecs.
628  *
629  * Returns number of bytes read on success, -1 on error, or 0 if EAGAIN was
630  * returned by readv.
631  *
632  */
633 static ssize_t
vionet_rx_zerocopy(struct vionet_dev * dev,int fd,const struct iovec * iov,int iov_cnt)634 vionet_rx_zerocopy(struct vionet_dev *dev, int fd, const struct iovec *iov,
635     int iov_cnt)
636 {
637 	ssize_t		sz;
638 
639 	if (dev->lockedmac) {
640 		log_warnx("%s: zerocopy not available for locked lladdr",
641 		    __func__);
642 		return (-1);
643 	}
644 
645 	sz = readv(fd, iov, iov_cnt);
646 	if (sz == -1 && errno == EAGAIN)
647 		return (0);
648 	return (sz);
649 }
650 
651 
652 /*
653  * vionet_rx_event
654  *
655  * Called when new data can be received on the tap fd of a vionet device.
656  */
657 static void
vionet_rx_event(int fd,short event,void * arg)658 vionet_rx_event(int fd, short event, void *arg)
659 {
660 	struct virtio_dev	*dev = (struct virtio_dev *)arg;
661 	struct vionet_dev	*vionet = &dev->vionet;
662 	int			 ret = 0;
663 
664 	if (!(event & EV_READ))
665 		fatalx("%s: invalid event type", __func__);
666 
667 	pthread_rwlock_rdlock(&lock);
668 	ret = vionet_rx(vionet, fd);
669 	pthread_rwlock_unlock(&lock);
670 
671 	if (ret == 0) {
672 		/* Nothing to do. */
673 		return;
674 	}
675 
676 	pthread_rwlock_wrlock(&lock);
677 	if (ret == 1) {
678 		/* Notify the driver. */
679 		vionet->cfg.isr_status |= 1;
680 	} else {
681 		/* Need a reset. Something went wrong. */
682 		log_warnx("%s: requesting device reset", __func__);
683 		vionet->cfg.device_status |= DEVICE_NEEDS_RESET;
684 		vionet->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
685 	}
686 	pthread_rwlock_unlock(&lock);
687 
688 	vm_pipe_send(&pipe_main, VIRTIO_RAISE_IRQ);
689 }
690 
691 static void
vionet_notifyq(struct virtio_dev * dev)692 vionet_notifyq(struct virtio_dev *dev)
693 {
694 	struct vionet_dev	*vionet = &dev->vionet;
695 
696 	switch (vionet->cfg.queue_notify) {
697 	case RXQ:
698 		rx_enabled = 1;
699 		vm_pipe_send(&pipe_rx, VIRTIO_NOTIFY);
700 		break;
701 	case TXQ:
702 		vm_pipe_send(&pipe_tx, VIRTIO_NOTIFY);
703 		break;
704 	default:
705 		/*
706 		 * Catch the unimplemented queue ID 2 (control queue) as
707 		 * well as any bogus queue IDs.
708 		 */
709 		log_debug("%s: notify for unimplemented queue ID %d",
710 		    __func__, vionet->cfg.queue_notify);
711 		break;
712 	}
713 }
714 
715 static int
vionet_tx(struct virtio_dev * dev)716 vionet_tx(struct virtio_dev *dev)
717 {
718 	uint16_t idx, hdr_idx;
719 	size_t chain_len, iov_cnt;
720 	ssize_t dhcpsz = 0, sz;
721 	int notify = 0;
722 	char *vr = NULL, *dhcppkt = NULL;
723 	struct vionet_dev *vionet = &dev->vionet;
724 	struct vring_desc *desc, *table;
725 	struct vring_avail *avail;
726 	struct vring_used *used;
727 	struct virtio_vq_info *vq_info;
728 	struct ether_header *eh;
729 	struct iovec *iov;
730 	struct packet pkt;
731 	uint8_t status = 0;
732 
733 	status = vionet->cfg.device_status
734 	    & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK;
735 	if (status != VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) {
736 		log_warnx("%s: driver not ready", __func__);
737 		return (0);
738 	}
739 
740 	vq_info = &vionet->vq[TXQ];
741 	idx = vq_info->last_avail;
742 	vr = vq_info->q_hva;
743 	if (vr == NULL)
744 		fatalx("%s: vr == NULL", __func__);
745 
746 	/* Compute offsets in ring of descriptors, avail ring, and used ring */
747 	table = (struct vring_desc *)(vr);
748 	avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
749 	used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
750 
751 	while (idx != avail->idx) {
752 		hdr_idx = avail->ring[idx & VIONET_QUEUE_MASK];
753 		desc = &table[hdr_idx & VIONET_QUEUE_MASK];
754 		if (DESC_WRITABLE(desc)) {
755 			log_warnx("%s: invalid descriptor state", __func__);
756 			goto reset;
757 		}
758 
759 		iov = &iov_tx[0];
760 		iov_cnt = 0;
761 		chain_len = 0;
762 
763 		/*
764 		 * As a legacy device, we most likely will receive a lead
765 		 * descriptor sized to the virtio_net_hdr. However, the framing
766 		 * is not guaranteed, so check for packet data.
767 		 */
768 		iov->iov_len = desc->len;
769 		if (iov->iov_len < sizeof(struct virtio_net_hdr)) {
770 			log_warnx("%s: invalid descriptor length", __func__);
771 			goto reset;
772 		} else if (iov->iov_len > sizeof(struct virtio_net_hdr)) {
773 			/* Chop off the virtio header, leaving packet data. */
774 			iov->iov_len -= sizeof(struct virtio_net_hdr);
775 			chain_len += iov->iov_len;
776 			iov->iov_base = hvaddr_mem(desc->addr +
777 			    sizeof(struct virtio_net_hdr), iov->iov_len);
778 			if (iov->iov_base == NULL)
779 				goto reset;
780 			iov_cnt++;
781 		}
782 
783 		/*
784 		 * Walk the chain and collect remaining addresses and lengths.
785 		 */
786 		while (desc->flags & VRING_DESC_F_NEXT) {
787 			desc = &table[desc->next & VIONET_QUEUE_MASK];
788 			if (DESC_WRITABLE(desc)) {
789 				log_warnx("%s: invalid descriptor state",
790 				    __func__);
791 				goto reset;
792 			}
793 
794 			/* Collect our IO information, translating gpa's. */
795 			iov = &iov_tx[iov_cnt];
796 			iov->iov_len = desc->len;
797 			iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len);
798 			if (iov->iov_base == NULL)
799 				goto reset;
800 			chain_len += iov->iov_len;
801 
802 			/* Guard against infinitely looping chains. */
803 			if (++iov_cnt >= nitems(iov_tx)) {
804 				log_warnx("%s: infinite chain detected",
805 				    __func__);
806 				goto reset;
807 			}
808 		}
809 
810 		/* Check if we've got a minimum viable amount of data. */
811 		if (chain_len < VIONET_MIN_TXLEN)
812 			goto drop;
813 
814 		/*
815 		 * Packet inspection for ethernet header (if using a "local"
816 		 * interface) for possibility of a DHCP packet or (if using
817 		 * locked lladdr) for validating ethernet header.
818 		 *
819 		 * To help preserve zero-copy semantics, we require the first
820 		 * descriptor with packet data contains a large enough buffer
821 		 * for this inspection.
822 		 */
823 		iov = &iov_tx[0];
824 		if (vionet->lockedmac) {
825 			if (iov->iov_len < ETHER_HDR_LEN) {
826 				log_warnx("%s: insufficient header data",
827 				    __func__);
828 				goto drop;
829 			}
830 			eh = (struct ether_header *)iov->iov_base;
831 			if (memcmp(eh->ether_shost, vionet->mac,
832 			    sizeof(eh->ether_shost)) != 0) {
833 				log_warnx("%s: bad source address %s",
834 				    __func__, ether_ntoa((struct ether_addr *)
835 					eh->ether_shost));
836 				goto drop;
837 			}
838 		}
839 		if (vionet->local) {
840 			dhcpsz = dhcp_request(dev, iov->iov_base, iov->iov_len,
841 			    &dhcppkt);
842 			if (dhcpsz > 0) {
843 				log_debug("%s: detected dhcp request of %zu bytes",
844 				    __func__, dhcpsz);
845 				goto drop;
846 			}
847 		}
848 
849 		/* Write our packet to the tap(4). */
850 		sz = writev(vionet->data_fd, iov_tx, iov_cnt);
851 		if (sz == -1 && errno != ENOBUFS) {
852 			log_warn("%s", __func__);
853 			goto reset;
854 		}
855 		chain_len += sizeof(struct virtio_net_hdr);
856 drop:
857 		used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_idx;
858 		used->ring[used->idx & VIONET_QUEUE_MASK].len = chain_len;
859 		__sync_synchronize();
860 		used->idx++;
861 		idx++;
862 
863 		/* Facilitate DHCP reply injection, if needed. */
864 		if (dhcpsz > 0) {
865 			pkt.buf = dhcppkt;
866 			pkt.len = dhcpsz;
867 			sz = write(pipe_inject[WRITE], &pkt, sizeof(pkt));
868 			if (sz == -1 && errno != EAGAIN) {
869 				log_warn("%s: packet injection", __func__);
870 				free(pkt.buf);
871 			} else if (sz == -1 && errno == EAGAIN) {
872 				log_debug("%s: dropping dhcp reply", __func__);
873 				free(pkt.buf);
874 			} else if (sz != sizeof(pkt)) {
875 				log_warnx("%s: failed packet injection",
876 				    __func__);
877 				free(pkt.buf);
878 			}
879 			log_debug("%s: injected dhcp reply with %ld bytes",
880 			    __func__, sz);
881 		}
882 	}
883 
884 	if (idx != vq_info->last_avail &&
885 	    !(avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
886 		notify = 1;
887 
888 
889 	vq_info->last_avail = idx;
890 	return (notify);
891 reset:
892 	return (-1);
893 }
894 
895 static void
dev_dispatch_vm(int fd,short event,void * arg)896 dev_dispatch_vm(int fd, short event, void *arg)
897 {
898 	struct virtio_dev	*dev = arg;
899 	struct vionet_dev	*vionet = &dev->vionet;
900 	struct imsgev		*iev = &dev->async_iev;
901 	struct imsgbuf		*ibuf = &iev->ibuf;
902 	struct imsg	 	 imsg;
903 	ssize_t			 n = 0;
904 	int			 verbose;
905 
906 	if (dev == NULL)
907 		fatalx("%s: missing vionet pointer", __func__);
908 
909 	if (event & EV_READ) {
910 		if ((n = imsgbuf_read(ibuf)) == -1)
911 			fatal("%s: imsgbuf_read", __func__);
912 		if (n == 0) {
913 			/* this pipe is dead, so remove the event handler */
914 			log_debug("%s: pipe dead (EV_READ)", __func__);
915 			event_del(&iev->ev);
916 			event_base_loopexit(ev_base_main, NULL);
917 			return;
918 		}
919 	}
920 
921 	if (event & EV_WRITE) {
922 		if (imsgbuf_write(ibuf) == -1) {
923 			if (errno == EPIPE) {
924 				/* this pipe is dead, remove the handler */
925 				log_debug("%s: pipe dead (EV_WRITE)", __func__);
926 				event_del(&iev->ev);
927 				event_loopexit(NULL);
928 				return;
929 			}
930 			fatal("%s: imsgbuf_write", __func__);
931 		}
932 	}
933 
934 	for (;;) {
935 		if ((n = imsg_get(ibuf, &imsg)) == -1)
936 			fatal("%s: imsg_get", __func__);
937 		if (n == 0)
938 			break;
939 
940 		switch (imsg.hdr.type) {
941 		case IMSG_DEVOP_HOSTMAC:
942 			IMSG_SIZE_CHECK(&imsg, vionet->hostmac);
943 			memcpy(vionet->hostmac, imsg.data,
944 			    sizeof(vionet->hostmac));
945 			log_debug("%s: set hostmac", __func__);
946 			break;
947 		case IMSG_VMDOP_PAUSE_VM:
948 			log_debug("%s: pausing", __func__);
949 			vm_pipe_send(&pipe_rx, VIRTIO_THREAD_PAUSE);
950 			break;
951 		case IMSG_VMDOP_UNPAUSE_VM:
952 			log_debug("%s: unpausing", __func__);
953 			if (rx_enabled)
954 				vm_pipe_send(&pipe_rx, VIRTIO_THREAD_START);
955 			break;
956 		case IMSG_CTL_VERBOSE:
957 			IMSG_SIZE_CHECK(&imsg, &verbose);
958 			memcpy(&verbose, imsg.data, sizeof(verbose));
959 			log_setverbose(verbose);
960 			break;
961 		}
962 		imsg_free(&imsg);
963 	}
964 	imsg_event_add2(iev, ev_base_main);
965 }
966 
967 /*
968  * Synchronous IO handler.
969  *
970  */
971 static void
handle_sync_io(int fd,short event,void * arg)972 handle_sync_io(int fd, short event, void *arg)
973 {
974 	struct virtio_dev *dev = (struct virtio_dev *)arg;
975 	struct imsgev *iev = &dev->sync_iev;
976 	struct imsgbuf *ibuf = &iev->ibuf;
977 	struct viodev_msg msg;
978 	struct imsg imsg;
979 	ssize_t n;
980 	int8_t intr = INTR_STATE_NOOP;
981 
982 	if (event & EV_READ) {
983 		if ((n = imsgbuf_read(ibuf)) == -1)
984 			fatal("%s: imsgbuf_read", __func__);
985 		if (n == 0) {
986 			/* this pipe is dead, so remove the event handler */
987 			log_debug("%s: pipe dead (EV_READ)", __func__);
988 			event_del(&iev->ev);
989 			event_base_loopexit(ev_base_main, NULL);
990 			return;
991 		}
992 	}
993 
994 	if (event & EV_WRITE) {
995 		if (imsgbuf_write(ibuf) == -1) {
996 			if (errno == EPIPE) {
997 				/* this pipe is dead, remove the handler */
998 				log_debug("%s: pipe dead (EV_WRITE)", __func__);
999 				event_del(&iev->ev);
1000 				event_loopexit(NULL);
1001 				return;
1002 			}
1003 			fatal("%s: imsgbuf_write", __func__);
1004 		}
1005 	}
1006 
1007 	for (;;) {
1008 		if ((n = imsg_get(ibuf, &imsg)) == -1)
1009 			fatalx("%s: imsg_get (n=%ld)", __func__, n);
1010 		if (n == 0)
1011 			break;
1012 
1013 		/* Unpack our message. They ALL should be dev messeges! */
1014 		IMSG_SIZE_CHECK(&imsg, &msg);
1015 		memcpy(&msg, imsg.data, sizeof(msg));
1016 		imsg_free(&imsg);
1017 
1018 		switch (msg.type) {
1019 		case VIODEV_MSG_DUMP:
1020 			/* Dump device */
1021 			n = atomicio(vwrite, dev->sync_fd, dev, sizeof(*dev));
1022 			if (n != sizeof(*dev)) {
1023 				log_warnx("%s: failed to dump vionet device",
1024 				    __func__);
1025 				break;
1026 			}
1027 		case VIODEV_MSG_IO_READ:
1028 			/* Read IO: make sure to send a reply */
1029 			msg.data = handle_io_read(&msg, dev, &intr);
1030 			msg.data_valid = 1;
1031 			msg.state = intr;
1032 			imsg_compose_event2(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
1033 			    sizeof(msg), ev_base_main);
1034 			break;
1035 		case VIODEV_MSG_IO_WRITE:
1036 			/* Write IO: no reply needed */
1037 			handle_io_write(&msg, dev);
1038 			break;
1039 		case VIODEV_MSG_SHUTDOWN:
1040 			event_del(&dev->sync_iev.ev);
1041 			event_base_loopbreak(ev_base_main);
1042 			return;
1043 		default:
1044 			fatalx("%s: invalid msg type %d", __func__, msg.type);
1045 		}
1046 	}
1047 	imsg_event_add2(iev, ev_base_main);
1048 }
1049 
1050 static void
handle_io_write(struct viodev_msg * msg,struct virtio_dev * dev)1051 handle_io_write(struct viodev_msg *msg, struct virtio_dev *dev)
1052 {
1053 	struct vionet_dev	*vionet = &dev->vionet;
1054 	uint32_t		 data = msg->data;
1055 	int			 pause_devices = 0;
1056 
1057 	pthread_rwlock_wrlock(&lock);
1058 
1059 	switch (msg->reg) {
1060 	case VIRTIO_CONFIG_DEVICE_FEATURES:
1061 	case VIRTIO_CONFIG_QUEUE_SIZE:
1062 	case VIRTIO_CONFIG_ISR_STATUS:
1063 		log_warnx("%s: illegal write %x to %s", __progname, data,
1064 		    virtio_reg_name(msg->reg));
1065 		break;
1066 	case VIRTIO_CONFIG_GUEST_FEATURES:
1067 		vionet->cfg.guest_feature = data;
1068 		break;
1069 	case VIRTIO_CONFIG_QUEUE_PFN:
1070 		vionet->cfg.queue_pfn = data;
1071 		vionet_update_qa(vionet);
1072 		break;
1073 	case VIRTIO_CONFIG_QUEUE_SELECT:
1074 		vionet->cfg.queue_select = data;
1075 		vionet_update_qs(vionet);
1076 		break;
1077 	case VIRTIO_CONFIG_QUEUE_NOTIFY:
1078 		vionet->cfg.queue_notify = data;
1079 		vionet_notifyq(dev);
1080 		break;
1081 	case VIRTIO_CONFIG_DEVICE_STATUS:
1082 		if (data == 0) {
1083 			resetting = 2;	/* Wait on two acks: rx & tx */
1084 			pause_devices = 1;
1085 		} else {
1086 			// XXX is this correct?
1087 			vionet->cfg.device_status = data;
1088 		}
1089 		break;
1090 	}
1091 
1092 	pthread_rwlock_unlock(&lock);
1093 	if (pause_devices) {
1094 		rx_enabled = 0;
1095 		vionet_deassert_pic_irq(dev);
1096 		vm_pipe_send(&pipe_rx, VIRTIO_THREAD_PAUSE);
1097 		vm_pipe_send(&pipe_tx, VIRTIO_THREAD_PAUSE);
1098 	}
1099 }
1100 
1101 static uint32_t
handle_io_read(struct viodev_msg * msg,struct virtio_dev * dev,int8_t * intr)1102 handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr)
1103 {
1104 	struct vionet_dev *vionet = &dev->vionet;
1105 	uint32_t data;
1106 
1107 	pthread_rwlock_rdlock(&lock);
1108 
1109 	switch (msg->reg) {
1110 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
1111 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1:
1112 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2:
1113 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3:
1114 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
1115 	case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5:
1116 		data = vionet->mac[msg->reg -
1117 		    VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI];
1118 		break;
1119 	case VIRTIO_CONFIG_DEVICE_FEATURES:
1120 		data = vionet->cfg.device_feature;
1121 		break;
1122 	case VIRTIO_CONFIG_GUEST_FEATURES:
1123 		data = vionet->cfg.guest_feature;
1124 		break;
1125 	case VIRTIO_CONFIG_QUEUE_PFN:
1126 		data = vionet->cfg.queue_pfn;
1127 		break;
1128 	case VIRTIO_CONFIG_QUEUE_SIZE:
1129 		data = vionet->cfg.queue_size;
1130 		break;
1131 	case VIRTIO_CONFIG_QUEUE_SELECT:
1132 		data = vionet->cfg.queue_select;
1133 		break;
1134 	case VIRTIO_CONFIG_QUEUE_NOTIFY:
1135 		data = vionet->cfg.queue_notify;
1136 		break;
1137 	case VIRTIO_CONFIG_DEVICE_STATUS:
1138 		data = vionet->cfg.device_status;
1139 		break;
1140 	case VIRTIO_CONFIG_ISR_STATUS:
1141 		pthread_rwlock_unlock(&lock);
1142 		pthread_rwlock_wrlock(&lock);
1143 		data = vionet->cfg.isr_status;
1144 		vionet->cfg.isr_status = 0;
1145 		if (intr != NULL)
1146 			*intr = INTR_STATE_DEASSERT;
1147 		break;
1148 	default:
1149 		data = 0xFFFFFFFF;
1150 	}
1151 
1152 	pthread_rwlock_unlock(&lock);
1153 	return (data);
1154 }
1155 
1156 /*
1157  * Handle the rx side processing, communicating to the main thread via pipe.
1158  */
1159 static void *
rx_run_loop(void * arg)1160 rx_run_loop(void *arg)
1161 {
1162 	struct virtio_dev	*dev = (struct virtio_dev *)arg;
1163 	struct vionet_dev	*vionet = &dev->vionet;
1164 	int			 ret;
1165 
1166 	ev_base_rx = event_base_new();
1167 
1168 	/* Wire up event handling for the tap fd. */
1169 	event_set(&ev_tap, vionet->data_fd, EV_READ | EV_PERSIST,
1170 	    vionet_rx_event, dev);
1171 	event_base_set(ev_base_rx, &ev_tap);
1172 
1173 	/* Wire up event handling for the packet injection pipe. */
1174 	event_set(&ev_inject, pipe_inject[READ], EV_READ | EV_PERSIST,
1175 	    vionet_rx_event, dev);
1176 	event_base_set(ev_base_rx, &ev_inject);
1177 
1178 	/* Wire up event handling for our inter-thread communication channel. */
1179 	event_base_set(ev_base_rx, &pipe_rx.read_ev);
1180 	event_add(&pipe_rx.read_ev, NULL);
1181 
1182 	/* Begin our event loop with our channel event active. */
1183 	ret = event_base_dispatch(ev_base_rx);
1184 	event_base_free(ev_base_rx);
1185 
1186 	log_debug("%s: exiting (%d)", __func__, ret);
1187 
1188 	close_fd(pipe_rx.read);
1189 	close_fd(pipe_inject[READ]);
1190 
1191 	return (NULL);
1192 }
1193 
1194 /*
1195  * Handle the tx side processing, communicating to the main thread via pipe.
1196  */
1197 static void *
tx_run_loop(void * arg)1198 tx_run_loop(void *arg)
1199 {
1200 	int			 ret;
1201 
1202 	ev_base_tx = event_base_new();
1203 
1204 	/* Wire up event handling for our inter-thread communication channel. */
1205 	event_base_set(ev_base_tx, &pipe_tx.read_ev);
1206 	event_add(&pipe_tx.read_ev, NULL);
1207 
1208 	/* Begin our event loop with our channel event active. */
1209 	ret = event_base_dispatch(ev_base_tx);
1210 	event_base_free(ev_base_tx);
1211 
1212 	log_debug("%s: exiting (%d)", __func__, ret);
1213 
1214 	close_fd(pipe_tx.read);
1215 
1216 	return (NULL);
1217 }
1218 
1219 /*
1220  * Read events sent by the main thread to the rx thread.
1221  */
1222 static void
read_pipe_rx(int fd,short event,void * arg)1223 read_pipe_rx(int fd, short event, void *arg)
1224 {
1225 	enum pipe_msg_type	msg;
1226 
1227 	if (!(event & EV_READ))
1228 		fatalx("%s: invalid event type", __func__);
1229 
1230 	msg = vm_pipe_recv(&pipe_rx);
1231 
1232 	switch (msg) {
1233 	case VIRTIO_NOTIFY:
1234 	case VIRTIO_THREAD_START:
1235 		event_add(&ev_tap, NULL);
1236 		event_add(&ev_inject, NULL);
1237 		break;
1238 	case VIRTIO_THREAD_PAUSE:
1239 		event_del(&ev_tap);
1240 		event_del(&ev_inject);
1241 		vm_pipe_send(&pipe_main, VIRTIO_THREAD_ACK);
1242 		break;
1243 	case VIRTIO_THREAD_STOP:
1244 		event_del(&ev_tap);
1245 		event_del(&ev_inject);
1246 		event_base_loopexit(ev_base_rx, NULL);
1247 		break;
1248 	default:
1249 		fatalx("%s: invalid channel message: %d", __func__, msg);
1250 	}
1251 }
1252 
1253 /*
1254  * Read events sent by the main thread to the tx thread.
1255  */
1256 static void
read_pipe_tx(int fd,short event,void * arg)1257 read_pipe_tx(int fd, short event, void *arg)
1258 {
1259 	struct virtio_dev	*dev = (struct virtio_dev*)arg;
1260 	struct vionet_dev	*vionet = &dev->vionet;
1261 	enum pipe_msg_type	 msg;
1262 	int			 ret = 0;
1263 
1264 	if (!(event & EV_READ))
1265 		fatalx("%s: invalid event type", __func__);
1266 
1267 	msg = vm_pipe_recv(&pipe_tx);
1268 
1269 	switch (msg) {
1270 	case VIRTIO_NOTIFY:
1271 		pthread_rwlock_rdlock(&lock);
1272 		ret = vionet_tx(dev);
1273 		pthread_rwlock_unlock(&lock);
1274 		break;
1275 	case VIRTIO_THREAD_START:
1276 		/* Ignore Start messages. */
1277 		break;
1278 	case VIRTIO_THREAD_PAUSE:
1279 		/*
1280 		 * Nothing to do when pausing on the tx side, but ACK so main
1281 		 * thread knows we're not transmitting.
1282 		 */
1283 		vm_pipe_send(&pipe_main, VIRTIO_THREAD_ACK);
1284 		break;
1285 	case VIRTIO_THREAD_STOP:
1286 		event_base_loopexit(ev_base_tx, NULL);
1287 		break;
1288 	default:
1289 		fatalx("%s: invalid channel message: %d", __func__, msg);
1290 	}
1291 
1292 	if (ret == 0) {
1293 		/* No notification needed. Return early. */
1294 		return;
1295 	}
1296 
1297 	pthread_rwlock_wrlock(&lock);
1298 	if (ret == 1) {
1299 		/* Notify the driver. */
1300 		vionet->cfg.isr_status |= 1;
1301 	} else {
1302 		/* Need a reset. Something went wrong. */
1303 		log_warnx("%s: requesting device reset", __func__);
1304 		vionet->cfg.device_status |= DEVICE_NEEDS_RESET;
1305 		vionet->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
1306 	}
1307 	pthread_rwlock_unlock(&lock);
1308 
1309 	vm_pipe_send(&pipe_main, VIRTIO_RAISE_IRQ);
1310 }
1311 
1312 /*
1313  * Read events sent by the rx/tx threads to the main thread.
1314  */
1315 static void
read_pipe_main(int fd,short event,void * arg)1316 read_pipe_main(int fd, short event, void *arg)
1317 {
1318 	struct virtio_dev	*dev = (struct virtio_dev*)arg;
1319 	struct vionet_dev	*vionet = &dev->vionet;
1320 	enum pipe_msg_type	 msg;
1321 
1322 	if (!(event & EV_READ))
1323 		fatalx("%s: invalid event type", __func__);
1324 
1325 	msg = vm_pipe_recv(&pipe_main);
1326 	switch (msg) {
1327 	case VIRTIO_RAISE_IRQ:
1328 		vionet_assert_pic_irq(dev);
1329 		break;
1330 	case VIRTIO_THREAD_ACK:
1331 		resetting--;
1332 		if (resetting == 0) {
1333 			log_debug("%s: resetting virtio network device %d",
1334 			    __func__, vionet->idx);
1335 
1336 			pthread_rwlock_wrlock(&lock);
1337 			vionet->cfg.device_status = 0;
1338 			vionet->cfg.guest_feature = 0;
1339 			vionet->cfg.queue_pfn = 0;
1340 			vionet_update_qa(vionet);
1341 			vionet->cfg.queue_size = 0;
1342 			vionet_update_qs(vionet);
1343 			vionet->cfg.queue_select = 0;
1344 			vionet->cfg.queue_notify = 0;
1345 			vionet->cfg.isr_status = 0;
1346 			vionet->vq[RXQ].last_avail = 0;
1347 			vionet->vq[RXQ].notified_avail = 0;
1348 			vionet->vq[TXQ].last_avail = 0;
1349 			vionet->vq[TXQ].notified_avail = 0;
1350 			pthread_rwlock_unlock(&lock);
1351 		}
1352 		break;
1353 	default:
1354 		fatalx("%s: invalid channel msg: %d", __func__, msg);
1355 	}
1356 }
1357 
1358 /*
1359  * Message the vm process asking to raise the irq. Must be called from the main
1360  * thread.
1361  */
1362 static void
vionet_assert_pic_irq(struct virtio_dev * dev)1363 vionet_assert_pic_irq(struct virtio_dev *dev)
1364 {
1365 	struct viodev_msg	msg;
1366 	int			ret;
1367 
1368 	memset(&msg, 0, sizeof(msg));
1369 	msg.irq = dev->irq;
1370 	msg.vcpu = 0; // XXX
1371 	msg.type = VIODEV_MSG_KICK;
1372 	msg.state = INTR_STATE_ASSERT;
1373 
1374 	ret = imsg_compose_event2(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
1375 	    &msg, sizeof(msg), ev_base_main);
1376 	if (ret == -1)
1377 		log_warnx("%s: failed to assert irq %d", __func__, dev->irq);
1378 }
1379 
1380 /*
1381  * Message the vm process asking to lower the irq. Must be called from the main
1382  * thread.
1383  */
1384 static void
vionet_deassert_pic_irq(struct virtio_dev * dev)1385 vionet_deassert_pic_irq(struct virtio_dev *dev)
1386 {
1387 	struct viodev_msg	msg;
1388 	int			ret;
1389 
1390 	memset(&msg, 0, sizeof(msg));
1391 	msg.irq = dev->irq;
1392 	msg.vcpu = 0; // XXX
1393 	msg.type = VIODEV_MSG_KICK;
1394 	msg.state = INTR_STATE_DEASSERT;
1395 
1396 	ret = imsg_compose_event2(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
1397 	    &msg, sizeof(msg), ev_base_main);
1398 	if (ret == -1)
1399 		log_warnx("%s: failed to assert irq %d", __func__, dev->irq);
1400 }
1401