1 /* $OpenBSD: vionet.c,v 1.22 2024/11/21 13:39:34 claudio Exp $ */
2
3 /*
4 * Copyright (c) 2023 Dave Voutila <dv@openbsd.org>
5 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <sys/types.h>
20
21 #include <dev/pci/virtio_pcireg.h>
22 #include <dev/pv/virtioreg.h>
23
24 #include <net/if.h>
25 #include <netinet/in.h>
26 #include <netinet/if_ether.h>
27
28 #include <errno.h>
29 #include <event.h>
30 #include <fcntl.h>
31 #include <pthread.h>
32 #include <pthread_np.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36
37 #include "atomicio.h"
38 #include "virtio.h"
39 #include "vmd.h"
40
41 #define VIRTIO_NET_F_MAC (1 << 5)
42 #define RXQ 0
43 #define TXQ 1
44
45 extern char *__progname;
46 extern struct vmd_vm *current_vm;
47
48 struct packet {
49 uint8_t *buf;
50 size_t len;
51 };
52
53 static void *rx_run_loop(void *);
54 static void *tx_run_loop(void *);
55 static int vionet_rx(struct vionet_dev *, int);
56 static ssize_t vionet_rx_copy(struct vionet_dev *, int, const struct iovec *,
57 int, size_t);
58 static ssize_t vionet_rx_zerocopy(struct vionet_dev *, int,
59 const struct iovec *, int);
60 static void vionet_rx_event(int, short, void *);
61 static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *,
62 int8_t *);
63 static void handle_io_write(struct viodev_msg *, struct virtio_dev *);
64 static int vionet_tx(struct virtio_dev *);
65 static void vionet_notifyq(struct virtio_dev *);
66 static void dev_dispatch_vm(int, short, void *);
67 static void handle_sync_io(int, short, void *);
68 static void read_pipe_main(int, short, void *);
69 static void read_pipe_rx(int, short, void *);
70 static void read_pipe_tx(int, short, void *);
71 static void vionet_assert_pic_irq(struct virtio_dev *);
72 static void vionet_deassert_pic_irq(struct virtio_dev *);
73
74 /* Device Globals */
75 struct event ev_tap;
76 struct event ev_inject;
77 struct event_base *ev_base_main;
78 struct event_base *ev_base_rx;
79 struct event_base *ev_base_tx;
80 pthread_t rx_thread;
81 pthread_t tx_thread;
82 struct vm_dev_pipe pipe_main;
83 struct vm_dev_pipe pipe_rx;
84 struct vm_dev_pipe pipe_tx;
85 int pipe_inject[2];
86 #define READ 0
87 #define WRITE 1
88 struct iovec iov_rx[VIONET_QUEUE_SIZE];
89 struct iovec iov_tx[VIONET_QUEUE_SIZE];
90 pthread_rwlock_t lock = NULL; /* Guards device config state. */
91 int resetting = 0; /* Transient reset state used to coordinate reset. */
92 int rx_enabled = 0; /* 1: we expect to read the tap, 0: wait for notify. */
93
94 __dead void
vionet_main(int fd,int fd_vmm)95 vionet_main(int fd, int fd_vmm)
96 {
97 struct virtio_dev dev;
98 struct vionet_dev *vionet = NULL;
99 struct viodev_msg msg;
100 struct vmd_vm vm;
101 struct vm_create_params *vcp;
102 ssize_t sz;
103 int ret;
104
105 /*
106 * stdio - needed for read/write to disk fds and channels to the vm.
107 * vmm + proc - needed to create shared vm mappings.
108 */
109 if (pledge("stdio vmm proc", NULL) == -1)
110 fatal("pledge");
111
112 /* Initialize iovec arrays. */
113 memset(iov_rx, 0, sizeof(iov_rx));
114 memset(iov_tx, 0, sizeof(iov_tx));
115
116 /* Receive our vionet_dev, mostly preconfigured. */
117 sz = atomicio(read, fd, &dev, sizeof(dev));
118 if (sz != sizeof(dev)) {
119 ret = errno;
120 log_warn("failed to receive vionet");
121 goto fail;
122 }
123 if (dev.dev_type != VMD_DEVTYPE_NET) {
124 ret = EINVAL;
125 log_warn("received invalid device type");
126 goto fail;
127 }
128 dev.sync_fd = fd;
129 vionet = &dev.vionet;
130
131 log_debug("%s: got vionet dev. tap fd = %d, syncfd = %d, asyncfd = %d"
132 ", vmm fd = %d", __func__, vionet->data_fd, dev.sync_fd,
133 dev.async_fd, fd_vmm);
134
135 /* Receive our vm information from the vm process. */
136 memset(&vm, 0, sizeof(vm));
137 sz = atomicio(read, dev.sync_fd, &vm, sizeof(vm));
138 if (sz != sizeof(vm)) {
139 ret = EIO;
140 log_warnx("failed to receive vm details");
141 goto fail;
142 }
143 vcp = &vm.vm_params.vmc_params;
144 current_vm = &vm;
145 setproctitle("%s/vionet%d", vcp->vcp_name, vionet->idx);
146 log_procinit("vm/%s/vionet%d", vcp->vcp_name, vionet->idx);
147
148 /* Now that we have our vm information, we can remap memory. */
149 ret = remap_guest_mem(&vm, fd_vmm);
150 if (ret) {
151 fatal("%s: failed to remap", __func__);
152 goto fail;
153 }
154
155 /*
156 * We no longer need /dev/vmm access.
157 */
158 close_fd(fd_vmm);
159 if (pledge("stdio", NULL) == -1)
160 fatal("pledge2");
161
162 /* If we're restoring hardware, re-initialize virtqueue hva's. */
163 if (vm.vm_state & VM_STATE_RECEIVED) {
164 struct virtio_vq_info *vq_info;
165 void *hva = NULL;
166
167 vq_info = &dev.vionet.vq[TXQ];
168 if (vq_info->q_gpa != 0) {
169 log_debug("%s: restoring TX virtqueue for gpa 0x%llx",
170 __func__, vq_info->q_gpa);
171 hva = hvaddr_mem(vq_info->q_gpa,
172 vring_size(VIONET_QUEUE_SIZE));
173 if (hva == NULL)
174 fatalx("%s: hva == NULL", __func__);
175 vq_info->q_hva = hva;
176 }
177
178 vq_info = &dev.vionet.vq[RXQ];
179 if (vq_info->q_gpa != 0) {
180 log_debug("%s: restoring RX virtqueue for gpa 0x%llx",
181 __func__, vq_info->q_gpa);
182 hva = hvaddr_mem(vq_info->q_gpa,
183 vring_size(VIONET_QUEUE_SIZE));
184 if (hva == NULL)
185 fatalx("%s: hva == NULL", __func__);
186 vq_info->q_hva = hva;
187 }
188 }
189
190 /* Initialize our packet injection pipe. */
191 if (pipe2(pipe_inject, O_NONBLOCK) == -1) {
192 log_warn("%s: injection pipe", __func__);
193 goto fail;
194 }
195
196 /* Initialize inter-thread communication channels. */
197 vm_pipe_init2(&pipe_main, read_pipe_main, &dev);
198 vm_pipe_init2(&pipe_rx, read_pipe_rx, &dev);
199 vm_pipe_init2(&pipe_tx, read_pipe_tx, &dev);
200
201 /* Initialize RX and TX threads . */
202 ret = pthread_create(&rx_thread, NULL, rx_run_loop, &dev);
203 if (ret) {
204 errno = ret;
205 log_warn("%s: failed to initialize rx thread", __func__);
206 goto fail;
207 }
208 pthread_set_name_np(rx_thread, "rx");
209 ret = pthread_create(&tx_thread, NULL, tx_run_loop, &dev);
210 if (ret) {
211 errno = ret;
212 log_warn("%s: failed to initialize tx thread", __func__);
213 goto fail;
214 }
215 pthread_set_name_np(tx_thread, "tx");
216
217 /* Initialize our rwlock for guarding shared device state. */
218 ret = pthread_rwlock_init(&lock, NULL);
219 if (ret) {
220 errno = ret;
221 log_warn("%s: failed to initialize rwlock", __func__);
222 goto fail;
223 }
224
225 /* Initialize libevent so we can start wiring event handlers. */
226 ev_base_main = event_base_new();
227
228 /* Add our handler for receiving messages from the RX/TX threads. */
229 event_base_set(ev_base_main, &pipe_main.read_ev);
230 event_add(&pipe_main.read_ev, NULL);
231
232 /* Wire up an async imsg channel. */
233 log_debug("%s: wiring in async vm event handler (fd=%d)", __func__,
234 dev.async_fd);
235 if (vm_device_pipe(&dev, dev_dispatch_vm, ev_base_main)) {
236 ret = EIO;
237 log_warnx("vm_device_pipe");
238 goto fail;
239 }
240
241 /* Configure our sync channel event handler. */
242 log_debug("%s: wiring in sync channel handler (fd=%d)", __func__,
243 dev.sync_fd);
244 if (imsgbuf_init(&dev.sync_iev.ibuf, dev.sync_fd) == -1) {
245 log_warnx("imsgbuf_init");
246 goto fail;
247 }
248 imsgbuf_allow_fdpass(&dev.sync_iev.ibuf);
249 dev.sync_iev.handler = handle_sync_io;
250 dev.sync_iev.data = &dev;
251 dev.sync_iev.events = EV_READ;
252 imsg_event_add2(&dev.sync_iev, ev_base_main);
253
254 /* Send a ready message over the sync channel. */
255 log_debug("%s: telling vm %s device is ready", __func__, vcp->vcp_name);
256 memset(&msg, 0, sizeof(msg));
257 msg.type = VIODEV_MSG_READY;
258 imsg_compose_event2(&dev.sync_iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
259 sizeof(msg), ev_base_main);
260
261 /* Send a ready message over the async channel. */
262 log_debug("%s: sending async ready message", __func__);
263 ret = imsg_compose_event2(&dev.async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
264 &msg, sizeof(msg), ev_base_main);
265 if (ret == -1) {
266 log_warnx("%s: failed to send async ready message!", __func__);
267 goto fail;
268 }
269
270 /* Engage the event loop! */
271 ret = event_base_dispatch(ev_base_main);
272 event_base_free(ev_base_main);
273
274 /* Try stopping the rx & tx threads cleanly by messaging them. */
275 vm_pipe_send(&pipe_rx, VIRTIO_THREAD_STOP);
276 vm_pipe_send(&pipe_tx, VIRTIO_THREAD_STOP);
277
278 /* Wait for threads to stop. */
279 pthread_join(rx_thread, NULL);
280 pthread_join(tx_thread, NULL);
281 pthread_rwlock_destroy(&lock);
282
283 /* Cleanup */
284 if (ret == 0) {
285 close_fd(dev.sync_fd);
286 close_fd(dev.async_fd);
287 close_fd(vionet->data_fd);
288 close_fd(pipe_main.read);
289 close_fd(pipe_main.write);
290 close_fd(pipe_rx.write);
291 close_fd(pipe_tx.write);
292 close_fd(pipe_inject[READ]);
293 close_fd(pipe_inject[WRITE]);
294 _exit(ret);
295 /* NOTREACHED */
296 }
297 fail:
298 /* Try firing off a message to the vm saying we're dying. */
299 memset(&msg, 0, sizeof(msg));
300 msg.type = VIODEV_MSG_ERROR;
301 msg.data = ret;
302 imsg_compose(&dev.sync_iev.ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
303 sizeof(msg));
304 imsgbuf_flush(&dev.sync_iev.ibuf);
305
306 close_fd(dev.sync_fd);
307 close_fd(dev.async_fd);
308 close_fd(pipe_inject[READ]);
309 close_fd(pipe_inject[WRITE]);
310 if (vionet != NULL)
311 close_fd(vionet->data_fd);
312 if (lock != NULL)
313 pthread_rwlock_destroy(&lock);
314 _exit(ret);
315 }
316
317 /*
318 * Update the gpa and hva of the virtqueue.
319 */
320 static void
vionet_update_qa(struct vionet_dev * dev)321 vionet_update_qa(struct vionet_dev *dev)
322 {
323 struct virtio_vq_info *vq_info;
324 void *hva = NULL;
325
326 /* Invalid queue? */
327 if (dev->cfg.queue_select > 1)
328 return;
329
330 vq_info = &dev->vq[dev->cfg.queue_select];
331 vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE;
332 dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
333
334 if (vq_info->q_gpa == 0)
335 vq_info->q_hva = NULL;
336
337 hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIONET_QUEUE_SIZE));
338 if (hva == NULL)
339 fatalx("%s: hva == NULL", __func__);
340
341 vq_info->q_hva = hva;
342 }
343
344 /*
345 * Update the queue size.
346 */
347 static void
vionet_update_qs(struct vionet_dev * dev)348 vionet_update_qs(struct vionet_dev *dev)
349 {
350 struct virtio_vq_info *vq_info;
351
352 /* Invalid queue? */
353 if (dev->cfg.queue_select > 1) {
354 log_warnx("%s: !!! invalid queue selector %d", __func__,
355 dev->cfg.queue_select);
356 dev->cfg.queue_size = 0;
357 return;
358 }
359
360 vq_info = &dev->vq[dev->cfg.queue_select];
361
362 /* Update queue pfn/size based on queue select */
363 dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
364 dev->cfg.queue_size = vq_info->qs;
365 }
366
367 /*
368 * vionet_rx
369 *
370 * Pull packet from the provided fd and fill the receive-side virtqueue. We
371 * selectively use zero-copy approaches when possible.
372 *
373 * Returns 1 if guest notification is needed. Otherwise, returns -1 on failure
374 * or 0 if no notification is needed.
375 */
376 static int
vionet_rx(struct vionet_dev * dev,int fd)377 vionet_rx(struct vionet_dev *dev, int fd)
378 {
379 uint16_t idx, hdr_idx;
380 char *vr = NULL;
381 size_t chain_len = 0, iov_cnt;
382 struct vring_desc *desc, *table;
383 struct vring_avail *avail;
384 struct vring_used *used;
385 struct virtio_vq_info *vq_info;
386 struct iovec *iov;
387 int notify = 0;
388 ssize_t sz;
389 uint8_t status = 0;
390
391 status = dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK;
392 if (status != VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) {
393 log_warnx("%s: driver not ready", __func__);
394 return (0);
395 }
396
397 vq_info = &dev->vq[RXQ];
398 idx = vq_info->last_avail;
399 vr = vq_info->q_hva;
400 if (vr == NULL)
401 fatalx("%s: vr == NULL", __func__);
402
403 /* Compute offsets in ring of descriptors, avail ring, and used ring */
404 table = (struct vring_desc *)(vr);
405 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
406 used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
407 used->flags |= VRING_USED_F_NO_NOTIFY;
408
409 while (idx != avail->idx) {
410 hdr_idx = avail->ring[idx & VIONET_QUEUE_MASK];
411 desc = &table[hdr_idx & VIONET_QUEUE_MASK];
412 if (!DESC_WRITABLE(desc)) {
413 log_warnx("%s: invalid descriptor state", __func__);
414 goto reset;
415 }
416
417 iov = &iov_rx[0];
418 iov_cnt = 1;
419
420 /*
421 * First descriptor should be at least as large as the
422 * virtio_net_hdr. It's not technically required, but in
423 * legacy devices it should be safe to assume.
424 */
425 iov->iov_len = desc->len;
426 if (iov->iov_len < sizeof(struct virtio_net_hdr)) {
427 log_warnx("%s: invalid descriptor length", __func__);
428 goto reset;
429 }
430
431 /*
432 * Insert the virtio_net_hdr and adjust len/base. We do the
433 * pointer math here before it's a void*.
434 */
435 iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len);
436 if (iov->iov_base == NULL)
437 goto reset;
438 memset(iov->iov_base, 0, sizeof(struct virtio_net_hdr));
439
440 /* Tweak the iovec to account for the virtio_net_hdr. */
441 iov->iov_len -= sizeof(struct virtio_net_hdr);
442 iov->iov_base = hvaddr_mem(desc->addr +
443 sizeof(struct virtio_net_hdr), iov->iov_len);
444 if (iov->iov_base == NULL)
445 goto reset;
446 chain_len = iov->iov_len;
447
448 /*
449 * Walk the remaining chain and collect remaining addresses
450 * and lengths.
451 */
452 while (desc->flags & VRING_DESC_F_NEXT) {
453 desc = &table[desc->next & VIONET_QUEUE_MASK];
454 if (!DESC_WRITABLE(desc)) {
455 log_warnx("%s: invalid descriptor state",
456 __func__);
457 goto reset;
458 }
459
460 /* Collect our IO information. Translate gpa's. */
461 iov = &iov_rx[iov_cnt];
462 iov->iov_len = desc->len;
463 iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len);
464 if (iov->iov_base == NULL)
465 goto reset;
466 chain_len += iov->iov_len;
467
468 /* Guard against infinitely looping chains. */
469 if (++iov_cnt >= nitems(iov_rx)) {
470 log_warnx("%s: infinite chain detected",
471 __func__);
472 goto reset;
473 }
474 }
475
476 /* Make sure the driver gave us the bare minimum buffers. */
477 if (chain_len < VIONET_MIN_TXLEN) {
478 log_warnx("%s: insufficient buffers provided",
479 __func__);
480 goto reset;
481 }
482
483 /*
484 * If we're enforcing hardware address or handling an injected
485 * packet, we need to use a copy-based approach.
486 */
487 if (dev->lockedmac || fd != dev->data_fd)
488 sz = vionet_rx_copy(dev, fd, iov_rx, iov_cnt,
489 chain_len);
490 else
491 sz = vionet_rx_zerocopy(dev, fd, iov_rx, iov_cnt);
492 if (sz == -1)
493 goto reset;
494 if (sz == 0) /* No packets, so bail out for now. */
495 break;
496
497 /*
498 * Account for the prefixed header since it wasn't included
499 * in the copy or zerocopy operations.
500 */
501 sz += sizeof(struct virtio_net_hdr);
502
503 /* Mark our buffers as used. */
504 used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_idx;
505 used->ring[used->idx & VIONET_QUEUE_MASK].len = sz;
506 __sync_synchronize();
507 used->idx++;
508 idx++;
509 }
510
511 if (idx != vq_info->last_avail &&
512 !(avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
513 notify = 1;
514 }
515
516 vq_info->last_avail = idx;
517 return (notify);
518 reset:
519 return (-1);
520 }
521
522 /*
523 * vionet_rx_copy
524 *
525 * Read a packet off the provided file descriptor, validating packet
526 * characteristics, and copy into the provided buffers in the iovec array.
527 *
528 * It's assumed that the provided iovec array contains validated host virtual
529 * address translations and not guest physical addreses.
530 *
531 * Returns number of bytes copied on success, 0 if packet is dropped, and
532 * -1 on an error.
533 */
534 ssize_t
vionet_rx_copy(struct vionet_dev * dev,int fd,const struct iovec * iov,int iov_cnt,size_t chain_len)535 vionet_rx_copy(struct vionet_dev *dev, int fd, const struct iovec *iov,
536 int iov_cnt, size_t chain_len)
537 {
538 static uint8_t buf[VIONET_HARD_MTU];
539 struct packet *pkt = NULL;
540 struct ether_header *eh = NULL;
541 uint8_t *payload = buf;
542 size_t i, chunk, nbytes, copied = 0;
543 ssize_t sz;
544
545 /* If reading from the tap(4), try to right-size the read. */
546 if (fd == dev->data_fd)
547 nbytes = MIN(chain_len, VIONET_HARD_MTU);
548 else if (fd == pipe_inject[READ])
549 nbytes = sizeof(struct packet);
550 else {
551 log_warnx("%s: invalid fd: %d", __func__, fd);
552 return (-1);
553 }
554
555 /*
556 * Try to pull a packet. The fd should be non-blocking and we don't
557 * care if we under-read (i.e. sz != nbytes) as we may not have a
558 * packet large enough to fill the buffer.
559 */
560 sz = read(fd, buf, nbytes);
561 if (sz == -1) {
562 if (errno != EAGAIN) {
563 log_warn("%s: error reading packet", __func__);
564 return (-1);
565 }
566 return (0);
567 } else if (fd == dev->data_fd && sz < VIONET_MIN_TXLEN) {
568 /* If reading the tap(4), we should get valid ethernet. */
569 log_warnx("%s: invalid packet size", __func__);
570 return (0);
571 } else if (fd == pipe_inject[READ] && sz != sizeof(struct packet)) {
572 log_warnx("%s: invalid injected packet object (sz=%ld)",
573 __func__, sz);
574 return (0);
575 }
576
577 /* Decompose an injected packet, if that's what we're working with. */
578 if (fd == pipe_inject[READ]) {
579 pkt = (struct packet *)buf;
580 if (pkt->buf == NULL) {
581 log_warnx("%s: invalid injected packet, no buffer",
582 __func__);
583 return (0);
584 }
585 if (sz < VIONET_MIN_TXLEN || sz > VIONET_MAX_TXLEN) {
586 log_warnx("%s: invalid injected packet size", __func__);
587 goto drop;
588 }
589 payload = pkt->buf;
590 sz = (ssize_t)pkt->len;
591 }
592
593 /* Validate the ethernet header, if required. */
594 if (dev->lockedmac) {
595 eh = (struct ether_header *)(payload);
596 if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
597 memcmp(eh->ether_dhost, dev->mac,
598 sizeof(eh->ether_dhost)) != 0)
599 goto drop;
600 }
601
602 /* Truncate one last time to the chain length, if shorter. */
603 sz = MIN(chain_len, (size_t)sz);
604
605 /*
606 * Copy the packet into the provided buffers. We can use memcpy(3)
607 * here as the gpa was validated and translated to an hva previously.
608 */
609 for (i = 0; (int)i < iov_cnt && (size_t)sz > copied; i++) {
610 chunk = MIN(iov[i].iov_len, (size_t)(sz - copied));
611 memcpy(iov[i].iov_base, payload + copied, chunk);
612 copied += chunk;
613 }
614
615 drop:
616 /* Free any injected packet buffer. */
617 if (pkt != NULL)
618 free(pkt->buf);
619
620 return (copied);
621 }
622
623 /*
624 * vionet_rx_zerocopy
625 *
626 * Perform a vectorized read from the given fd into the guest physical memory
627 * pointed to by iovecs.
628 *
629 * Returns number of bytes read on success, -1 on error, or 0 if EAGAIN was
630 * returned by readv.
631 *
632 */
633 static ssize_t
vionet_rx_zerocopy(struct vionet_dev * dev,int fd,const struct iovec * iov,int iov_cnt)634 vionet_rx_zerocopy(struct vionet_dev *dev, int fd, const struct iovec *iov,
635 int iov_cnt)
636 {
637 ssize_t sz;
638
639 if (dev->lockedmac) {
640 log_warnx("%s: zerocopy not available for locked lladdr",
641 __func__);
642 return (-1);
643 }
644
645 sz = readv(fd, iov, iov_cnt);
646 if (sz == -1 && errno == EAGAIN)
647 return (0);
648 return (sz);
649 }
650
651
652 /*
653 * vionet_rx_event
654 *
655 * Called when new data can be received on the tap fd of a vionet device.
656 */
657 static void
vionet_rx_event(int fd,short event,void * arg)658 vionet_rx_event(int fd, short event, void *arg)
659 {
660 struct virtio_dev *dev = (struct virtio_dev *)arg;
661 struct vionet_dev *vionet = &dev->vionet;
662 int ret = 0;
663
664 if (!(event & EV_READ))
665 fatalx("%s: invalid event type", __func__);
666
667 pthread_rwlock_rdlock(&lock);
668 ret = vionet_rx(vionet, fd);
669 pthread_rwlock_unlock(&lock);
670
671 if (ret == 0) {
672 /* Nothing to do. */
673 return;
674 }
675
676 pthread_rwlock_wrlock(&lock);
677 if (ret == 1) {
678 /* Notify the driver. */
679 vionet->cfg.isr_status |= 1;
680 } else {
681 /* Need a reset. Something went wrong. */
682 log_warnx("%s: requesting device reset", __func__);
683 vionet->cfg.device_status |= DEVICE_NEEDS_RESET;
684 vionet->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
685 }
686 pthread_rwlock_unlock(&lock);
687
688 vm_pipe_send(&pipe_main, VIRTIO_RAISE_IRQ);
689 }
690
691 static void
vionet_notifyq(struct virtio_dev * dev)692 vionet_notifyq(struct virtio_dev *dev)
693 {
694 struct vionet_dev *vionet = &dev->vionet;
695
696 switch (vionet->cfg.queue_notify) {
697 case RXQ:
698 rx_enabled = 1;
699 vm_pipe_send(&pipe_rx, VIRTIO_NOTIFY);
700 break;
701 case TXQ:
702 vm_pipe_send(&pipe_tx, VIRTIO_NOTIFY);
703 break;
704 default:
705 /*
706 * Catch the unimplemented queue ID 2 (control queue) as
707 * well as any bogus queue IDs.
708 */
709 log_debug("%s: notify for unimplemented queue ID %d",
710 __func__, vionet->cfg.queue_notify);
711 break;
712 }
713 }
714
715 static int
vionet_tx(struct virtio_dev * dev)716 vionet_tx(struct virtio_dev *dev)
717 {
718 uint16_t idx, hdr_idx;
719 size_t chain_len, iov_cnt;
720 ssize_t dhcpsz = 0, sz;
721 int notify = 0;
722 char *vr = NULL, *dhcppkt = NULL;
723 struct vionet_dev *vionet = &dev->vionet;
724 struct vring_desc *desc, *table;
725 struct vring_avail *avail;
726 struct vring_used *used;
727 struct virtio_vq_info *vq_info;
728 struct ether_header *eh;
729 struct iovec *iov;
730 struct packet pkt;
731 uint8_t status = 0;
732
733 status = vionet->cfg.device_status
734 & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK;
735 if (status != VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) {
736 log_warnx("%s: driver not ready", __func__);
737 return (0);
738 }
739
740 vq_info = &vionet->vq[TXQ];
741 idx = vq_info->last_avail;
742 vr = vq_info->q_hva;
743 if (vr == NULL)
744 fatalx("%s: vr == NULL", __func__);
745
746 /* Compute offsets in ring of descriptors, avail ring, and used ring */
747 table = (struct vring_desc *)(vr);
748 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
749 used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
750
751 while (idx != avail->idx) {
752 hdr_idx = avail->ring[idx & VIONET_QUEUE_MASK];
753 desc = &table[hdr_idx & VIONET_QUEUE_MASK];
754 if (DESC_WRITABLE(desc)) {
755 log_warnx("%s: invalid descriptor state", __func__);
756 goto reset;
757 }
758
759 iov = &iov_tx[0];
760 iov_cnt = 0;
761 chain_len = 0;
762
763 /*
764 * As a legacy device, we most likely will receive a lead
765 * descriptor sized to the virtio_net_hdr. However, the framing
766 * is not guaranteed, so check for packet data.
767 */
768 iov->iov_len = desc->len;
769 if (iov->iov_len < sizeof(struct virtio_net_hdr)) {
770 log_warnx("%s: invalid descriptor length", __func__);
771 goto reset;
772 } else if (iov->iov_len > sizeof(struct virtio_net_hdr)) {
773 /* Chop off the virtio header, leaving packet data. */
774 iov->iov_len -= sizeof(struct virtio_net_hdr);
775 chain_len += iov->iov_len;
776 iov->iov_base = hvaddr_mem(desc->addr +
777 sizeof(struct virtio_net_hdr), iov->iov_len);
778 if (iov->iov_base == NULL)
779 goto reset;
780 iov_cnt++;
781 }
782
783 /*
784 * Walk the chain and collect remaining addresses and lengths.
785 */
786 while (desc->flags & VRING_DESC_F_NEXT) {
787 desc = &table[desc->next & VIONET_QUEUE_MASK];
788 if (DESC_WRITABLE(desc)) {
789 log_warnx("%s: invalid descriptor state",
790 __func__);
791 goto reset;
792 }
793
794 /* Collect our IO information, translating gpa's. */
795 iov = &iov_tx[iov_cnt];
796 iov->iov_len = desc->len;
797 iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len);
798 if (iov->iov_base == NULL)
799 goto reset;
800 chain_len += iov->iov_len;
801
802 /* Guard against infinitely looping chains. */
803 if (++iov_cnt >= nitems(iov_tx)) {
804 log_warnx("%s: infinite chain detected",
805 __func__);
806 goto reset;
807 }
808 }
809
810 /* Check if we've got a minimum viable amount of data. */
811 if (chain_len < VIONET_MIN_TXLEN)
812 goto drop;
813
814 /*
815 * Packet inspection for ethernet header (if using a "local"
816 * interface) for possibility of a DHCP packet or (if using
817 * locked lladdr) for validating ethernet header.
818 *
819 * To help preserve zero-copy semantics, we require the first
820 * descriptor with packet data contains a large enough buffer
821 * for this inspection.
822 */
823 iov = &iov_tx[0];
824 if (vionet->lockedmac) {
825 if (iov->iov_len < ETHER_HDR_LEN) {
826 log_warnx("%s: insufficient header data",
827 __func__);
828 goto drop;
829 }
830 eh = (struct ether_header *)iov->iov_base;
831 if (memcmp(eh->ether_shost, vionet->mac,
832 sizeof(eh->ether_shost)) != 0) {
833 log_warnx("%s: bad source address %s",
834 __func__, ether_ntoa((struct ether_addr *)
835 eh->ether_shost));
836 goto drop;
837 }
838 }
839 if (vionet->local) {
840 dhcpsz = dhcp_request(dev, iov->iov_base, iov->iov_len,
841 &dhcppkt);
842 if (dhcpsz > 0) {
843 log_debug("%s: detected dhcp request of %zu bytes",
844 __func__, dhcpsz);
845 goto drop;
846 }
847 }
848
849 /* Write our packet to the tap(4). */
850 sz = writev(vionet->data_fd, iov_tx, iov_cnt);
851 if (sz == -1 && errno != ENOBUFS) {
852 log_warn("%s", __func__);
853 goto reset;
854 }
855 chain_len += sizeof(struct virtio_net_hdr);
856 drop:
857 used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_idx;
858 used->ring[used->idx & VIONET_QUEUE_MASK].len = chain_len;
859 __sync_synchronize();
860 used->idx++;
861 idx++;
862
863 /* Facilitate DHCP reply injection, if needed. */
864 if (dhcpsz > 0) {
865 pkt.buf = dhcppkt;
866 pkt.len = dhcpsz;
867 sz = write(pipe_inject[WRITE], &pkt, sizeof(pkt));
868 if (sz == -1 && errno != EAGAIN) {
869 log_warn("%s: packet injection", __func__);
870 free(pkt.buf);
871 } else if (sz == -1 && errno == EAGAIN) {
872 log_debug("%s: dropping dhcp reply", __func__);
873 free(pkt.buf);
874 } else if (sz != sizeof(pkt)) {
875 log_warnx("%s: failed packet injection",
876 __func__);
877 free(pkt.buf);
878 }
879 log_debug("%s: injected dhcp reply with %ld bytes",
880 __func__, sz);
881 }
882 }
883
884 if (idx != vq_info->last_avail &&
885 !(avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
886 notify = 1;
887
888
889 vq_info->last_avail = idx;
890 return (notify);
891 reset:
892 return (-1);
893 }
894
895 static void
dev_dispatch_vm(int fd,short event,void * arg)896 dev_dispatch_vm(int fd, short event, void *arg)
897 {
898 struct virtio_dev *dev = arg;
899 struct vionet_dev *vionet = &dev->vionet;
900 struct imsgev *iev = &dev->async_iev;
901 struct imsgbuf *ibuf = &iev->ibuf;
902 struct imsg imsg;
903 ssize_t n = 0;
904 int verbose;
905
906 if (dev == NULL)
907 fatalx("%s: missing vionet pointer", __func__);
908
909 if (event & EV_READ) {
910 if ((n = imsgbuf_read(ibuf)) == -1)
911 fatal("%s: imsgbuf_read", __func__);
912 if (n == 0) {
913 /* this pipe is dead, so remove the event handler */
914 log_debug("%s: pipe dead (EV_READ)", __func__);
915 event_del(&iev->ev);
916 event_base_loopexit(ev_base_main, NULL);
917 return;
918 }
919 }
920
921 if (event & EV_WRITE) {
922 if (imsgbuf_write(ibuf) == -1) {
923 if (errno == EPIPE) {
924 /* this pipe is dead, remove the handler */
925 log_debug("%s: pipe dead (EV_WRITE)", __func__);
926 event_del(&iev->ev);
927 event_loopexit(NULL);
928 return;
929 }
930 fatal("%s: imsgbuf_write", __func__);
931 }
932 }
933
934 for (;;) {
935 if ((n = imsg_get(ibuf, &imsg)) == -1)
936 fatal("%s: imsg_get", __func__);
937 if (n == 0)
938 break;
939
940 switch (imsg.hdr.type) {
941 case IMSG_DEVOP_HOSTMAC:
942 IMSG_SIZE_CHECK(&imsg, vionet->hostmac);
943 memcpy(vionet->hostmac, imsg.data,
944 sizeof(vionet->hostmac));
945 log_debug("%s: set hostmac", __func__);
946 break;
947 case IMSG_VMDOP_PAUSE_VM:
948 log_debug("%s: pausing", __func__);
949 vm_pipe_send(&pipe_rx, VIRTIO_THREAD_PAUSE);
950 break;
951 case IMSG_VMDOP_UNPAUSE_VM:
952 log_debug("%s: unpausing", __func__);
953 if (rx_enabled)
954 vm_pipe_send(&pipe_rx, VIRTIO_THREAD_START);
955 break;
956 case IMSG_CTL_VERBOSE:
957 IMSG_SIZE_CHECK(&imsg, &verbose);
958 memcpy(&verbose, imsg.data, sizeof(verbose));
959 log_setverbose(verbose);
960 break;
961 }
962 imsg_free(&imsg);
963 }
964 imsg_event_add2(iev, ev_base_main);
965 }
966
967 /*
968 * Synchronous IO handler.
969 *
970 */
971 static void
handle_sync_io(int fd,short event,void * arg)972 handle_sync_io(int fd, short event, void *arg)
973 {
974 struct virtio_dev *dev = (struct virtio_dev *)arg;
975 struct imsgev *iev = &dev->sync_iev;
976 struct imsgbuf *ibuf = &iev->ibuf;
977 struct viodev_msg msg;
978 struct imsg imsg;
979 ssize_t n;
980 int8_t intr = INTR_STATE_NOOP;
981
982 if (event & EV_READ) {
983 if ((n = imsgbuf_read(ibuf)) == -1)
984 fatal("%s: imsgbuf_read", __func__);
985 if (n == 0) {
986 /* this pipe is dead, so remove the event handler */
987 log_debug("%s: pipe dead (EV_READ)", __func__);
988 event_del(&iev->ev);
989 event_base_loopexit(ev_base_main, NULL);
990 return;
991 }
992 }
993
994 if (event & EV_WRITE) {
995 if (imsgbuf_write(ibuf) == -1) {
996 if (errno == EPIPE) {
997 /* this pipe is dead, remove the handler */
998 log_debug("%s: pipe dead (EV_WRITE)", __func__);
999 event_del(&iev->ev);
1000 event_loopexit(NULL);
1001 return;
1002 }
1003 fatal("%s: imsgbuf_write", __func__);
1004 }
1005 }
1006
1007 for (;;) {
1008 if ((n = imsg_get(ibuf, &imsg)) == -1)
1009 fatalx("%s: imsg_get (n=%ld)", __func__, n);
1010 if (n == 0)
1011 break;
1012
1013 /* Unpack our message. They ALL should be dev messeges! */
1014 IMSG_SIZE_CHECK(&imsg, &msg);
1015 memcpy(&msg, imsg.data, sizeof(msg));
1016 imsg_free(&imsg);
1017
1018 switch (msg.type) {
1019 case VIODEV_MSG_DUMP:
1020 /* Dump device */
1021 n = atomicio(vwrite, dev->sync_fd, dev, sizeof(*dev));
1022 if (n != sizeof(*dev)) {
1023 log_warnx("%s: failed to dump vionet device",
1024 __func__);
1025 break;
1026 }
1027 case VIODEV_MSG_IO_READ:
1028 /* Read IO: make sure to send a reply */
1029 msg.data = handle_io_read(&msg, dev, &intr);
1030 msg.data_valid = 1;
1031 msg.state = intr;
1032 imsg_compose_event2(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
1033 sizeof(msg), ev_base_main);
1034 break;
1035 case VIODEV_MSG_IO_WRITE:
1036 /* Write IO: no reply needed */
1037 handle_io_write(&msg, dev);
1038 break;
1039 case VIODEV_MSG_SHUTDOWN:
1040 event_del(&dev->sync_iev.ev);
1041 event_base_loopbreak(ev_base_main);
1042 return;
1043 default:
1044 fatalx("%s: invalid msg type %d", __func__, msg.type);
1045 }
1046 }
1047 imsg_event_add2(iev, ev_base_main);
1048 }
1049
1050 static void
handle_io_write(struct viodev_msg * msg,struct virtio_dev * dev)1051 handle_io_write(struct viodev_msg *msg, struct virtio_dev *dev)
1052 {
1053 struct vionet_dev *vionet = &dev->vionet;
1054 uint32_t data = msg->data;
1055 int pause_devices = 0;
1056
1057 pthread_rwlock_wrlock(&lock);
1058
1059 switch (msg->reg) {
1060 case VIRTIO_CONFIG_DEVICE_FEATURES:
1061 case VIRTIO_CONFIG_QUEUE_SIZE:
1062 case VIRTIO_CONFIG_ISR_STATUS:
1063 log_warnx("%s: illegal write %x to %s", __progname, data,
1064 virtio_reg_name(msg->reg));
1065 break;
1066 case VIRTIO_CONFIG_GUEST_FEATURES:
1067 vionet->cfg.guest_feature = data;
1068 break;
1069 case VIRTIO_CONFIG_QUEUE_PFN:
1070 vionet->cfg.queue_pfn = data;
1071 vionet_update_qa(vionet);
1072 break;
1073 case VIRTIO_CONFIG_QUEUE_SELECT:
1074 vionet->cfg.queue_select = data;
1075 vionet_update_qs(vionet);
1076 break;
1077 case VIRTIO_CONFIG_QUEUE_NOTIFY:
1078 vionet->cfg.queue_notify = data;
1079 vionet_notifyq(dev);
1080 break;
1081 case VIRTIO_CONFIG_DEVICE_STATUS:
1082 if (data == 0) {
1083 resetting = 2; /* Wait on two acks: rx & tx */
1084 pause_devices = 1;
1085 } else {
1086 // XXX is this correct?
1087 vionet->cfg.device_status = data;
1088 }
1089 break;
1090 }
1091
1092 pthread_rwlock_unlock(&lock);
1093 if (pause_devices) {
1094 rx_enabled = 0;
1095 vionet_deassert_pic_irq(dev);
1096 vm_pipe_send(&pipe_rx, VIRTIO_THREAD_PAUSE);
1097 vm_pipe_send(&pipe_tx, VIRTIO_THREAD_PAUSE);
1098 }
1099 }
1100
1101 static uint32_t
handle_io_read(struct viodev_msg * msg,struct virtio_dev * dev,int8_t * intr)1102 handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr)
1103 {
1104 struct vionet_dev *vionet = &dev->vionet;
1105 uint32_t data;
1106
1107 pthread_rwlock_rdlock(&lock);
1108
1109 switch (msg->reg) {
1110 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
1111 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1:
1112 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2:
1113 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3:
1114 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
1115 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5:
1116 data = vionet->mac[msg->reg -
1117 VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI];
1118 break;
1119 case VIRTIO_CONFIG_DEVICE_FEATURES:
1120 data = vionet->cfg.device_feature;
1121 break;
1122 case VIRTIO_CONFIG_GUEST_FEATURES:
1123 data = vionet->cfg.guest_feature;
1124 break;
1125 case VIRTIO_CONFIG_QUEUE_PFN:
1126 data = vionet->cfg.queue_pfn;
1127 break;
1128 case VIRTIO_CONFIG_QUEUE_SIZE:
1129 data = vionet->cfg.queue_size;
1130 break;
1131 case VIRTIO_CONFIG_QUEUE_SELECT:
1132 data = vionet->cfg.queue_select;
1133 break;
1134 case VIRTIO_CONFIG_QUEUE_NOTIFY:
1135 data = vionet->cfg.queue_notify;
1136 break;
1137 case VIRTIO_CONFIG_DEVICE_STATUS:
1138 data = vionet->cfg.device_status;
1139 break;
1140 case VIRTIO_CONFIG_ISR_STATUS:
1141 pthread_rwlock_unlock(&lock);
1142 pthread_rwlock_wrlock(&lock);
1143 data = vionet->cfg.isr_status;
1144 vionet->cfg.isr_status = 0;
1145 if (intr != NULL)
1146 *intr = INTR_STATE_DEASSERT;
1147 break;
1148 default:
1149 data = 0xFFFFFFFF;
1150 }
1151
1152 pthread_rwlock_unlock(&lock);
1153 return (data);
1154 }
1155
1156 /*
1157 * Handle the rx side processing, communicating to the main thread via pipe.
1158 */
1159 static void *
rx_run_loop(void * arg)1160 rx_run_loop(void *arg)
1161 {
1162 struct virtio_dev *dev = (struct virtio_dev *)arg;
1163 struct vionet_dev *vionet = &dev->vionet;
1164 int ret;
1165
1166 ev_base_rx = event_base_new();
1167
1168 /* Wire up event handling for the tap fd. */
1169 event_set(&ev_tap, vionet->data_fd, EV_READ | EV_PERSIST,
1170 vionet_rx_event, dev);
1171 event_base_set(ev_base_rx, &ev_tap);
1172
1173 /* Wire up event handling for the packet injection pipe. */
1174 event_set(&ev_inject, pipe_inject[READ], EV_READ | EV_PERSIST,
1175 vionet_rx_event, dev);
1176 event_base_set(ev_base_rx, &ev_inject);
1177
1178 /* Wire up event handling for our inter-thread communication channel. */
1179 event_base_set(ev_base_rx, &pipe_rx.read_ev);
1180 event_add(&pipe_rx.read_ev, NULL);
1181
1182 /* Begin our event loop with our channel event active. */
1183 ret = event_base_dispatch(ev_base_rx);
1184 event_base_free(ev_base_rx);
1185
1186 log_debug("%s: exiting (%d)", __func__, ret);
1187
1188 close_fd(pipe_rx.read);
1189 close_fd(pipe_inject[READ]);
1190
1191 return (NULL);
1192 }
1193
1194 /*
1195 * Handle the tx side processing, communicating to the main thread via pipe.
1196 */
1197 static void *
tx_run_loop(void * arg)1198 tx_run_loop(void *arg)
1199 {
1200 int ret;
1201
1202 ev_base_tx = event_base_new();
1203
1204 /* Wire up event handling for our inter-thread communication channel. */
1205 event_base_set(ev_base_tx, &pipe_tx.read_ev);
1206 event_add(&pipe_tx.read_ev, NULL);
1207
1208 /* Begin our event loop with our channel event active. */
1209 ret = event_base_dispatch(ev_base_tx);
1210 event_base_free(ev_base_tx);
1211
1212 log_debug("%s: exiting (%d)", __func__, ret);
1213
1214 close_fd(pipe_tx.read);
1215
1216 return (NULL);
1217 }
1218
1219 /*
1220 * Read events sent by the main thread to the rx thread.
1221 */
1222 static void
read_pipe_rx(int fd,short event,void * arg)1223 read_pipe_rx(int fd, short event, void *arg)
1224 {
1225 enum pipe_msg_type msg;
1226
1227 if (!(event & EV_READ))
1228 fatalx("%s: invalid event type", __func__);
1229
1230 msg = vm_pipe_recv(&pipe_rx);
1231
1232 switch (msg) {
1233 case VIRTIO_NOTIFY:
1234 case VIRTIO_THREAD_START:
1235 event_add(&ev_tap, NULL);
1236 event_add(&ev_inject, NULL);
1237 break;
1238 case VIRTIO_THREAD_PAUSE:
1239 event_del(&ev_tap);
1240 event_del(&ev_inject);
1241 vm_pipe_send(&pipe_main, VIRTIO_THREAD_ACK);
1242 break;
1243 case VIRTIO_THREAD_STOP:
1244 event_del(&ev_tap);
1245 event_del(&ev_inject);
1246 event_base_loopexit(ev_base_rx, NULL);
1247 break;
1248 default:
1249 fatalx("%s: invalid channel message: %d", __func__, msg);
1250 }
1251 }
1252
1253 /*
1254 * Read events sent by the main thread to the tx thread.
1255 */
1256 static void
read_pipe_tx(int fd,short event,void * arg)1257 read_pipe_tx(int fd, short event, void *arg)
1258 {
1259 struct virtio_dev *dev = (struct virtio_dev*)arg;
1260 struct vionet_dev *vionet = &dev->vionet;
1261 enum pipe_msg_type msg;
1262 int ret = 0;
1263
1264 if (!(event & EV_READ))
1265 fatalx("%s: invalid event type", __func__);
1266
1267 msg = vm_pipe_recv(&pipe_tx);
1268
1269 switch (msg) {
1270 case VIRTIO_NOTIFY:
1271 pthread_rwlock_rdlock(&lock);
1272 ret = vionet_tx(dev);
1273 pthread_rwlock_unlock(&lock);
1274 break;
1275 case VIRTIO_THREAD_START:
1276 /* Ignore Start messages. */
1277 break;
1278 case VIRTIO_THREAD_PAUSE:
1279 /*
1280 * Nothing to do when pausing on the tx side, but ACK so main
1281 * thread knows we're not transmitting.
1282 */
1283 vm_pipe_send(&pipe_main, VIRTIO_THREAD_ACK);
1284 break;
1285 case VIRTIO_THREAD_STOP:
1286 event_base_loopexit(ev_base_tx, NULL);
1287 break;
1288 default:
1289 fatalx("%s: invalid channel message: %d", __func__, msg);
1290 }
1291
1292 if (ret == 0) {
1293 /* No notification needed. Return early. */
1294 return;
1295 }
1296
1297 pthread_rwlock_wrlock(&lock);
1298 if (ret == 1) {
1299 /* Notify the driver. */
1300 vionet->cfg.isr_status |= 1;
1301 } else {
1302 /* Need a reset. Something went wrong. */
1303 log_warnx("%s: requesting device reset", __func__);
1304 vionet->cfg.device_status |= DEVICE_NEEDS_RESET;
1305 vionet->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
1306 }
1307 pthread_rwlock_unlock(&lock);
1308
1309 vm_pipe_send(&pipe_main, VIRTIO_RAISE_IRQ);
1310 }
1311
1312 /*
1313 * Read events sent by the rx/tx threads to the main thread.
1314 */
1315 static void
read_pipe_main(int fd,short event,void * arg)1316 read_pipe_main(int fd, short event, void *arg)
1317 {
1318 struct virtio_dev *dev = (struct virtio_dev*)arg;
1319 struct vionet_dev *vionet = &dev->vionet;
1320 enum pipe_msg_type msg;
1321
1322 if (!(event & EV_READ))
1323 fatalx("%s: invalid event type", __func__);
1324
1325 msg = vm_pipe_recv(&pipe_main);
1326 switch (msg) {
1327 case VIRTIO_RAISE_IRQ:
1328 vionet_assert_pic_irq(dev);
1329 break;
1330 case VIRTIO_THREAD_ACK:
1331 resetting--;
1332 if (resetting == 0) {
1333 log_debug("%s: resetting virtio network device %d",
1334 __func__, vionet->idx);
1335
1336 pthread_rwlock_wrlock(&lock);
1337 vionet->cfg.device_status = 0;
1338 vionet->cfg.guest_feature = 0;
1339 vionet->cfg.queue_pfn = 0;
1340 vionet_update_qa(vionet);
1341 vionet->cfg.queue_size = 0;
1342 vionet_update_qs(vionet);
1343 vionet->cfg.queue_select = 0;
1344 vionet->cfg.queue_notify = 0;
1345 vionet->cfg.isr_status = 0;
1346 vionet->vq[RXQ].last_avail = 0;
1347 vionet->vq[RXQ].notified_avail = 0;
1348 vionet->vq[TXQ].last_avail = 0;
1349 vionet->vq[TXQ].notified_avail = 0;
1350 pthread_rwlock_unlock(&lock);
1351 }
1352 break;
1353 default:
1354 fatalx("%s: invalid channel msg: %d", __func__, msg);
1355 }
1356 }
1357
1358 /*
1359 * Message the vm process asking to raise the irq. Must be called from the main
1360 * thread.
1361 */
1362 static void
vionet_assert_pic_irq(struct virtio_dev * dev)1363 vionet_assert_pic_irq(struct virtio_dev *dev)
1364 {
1365 struct viodev_msg msg;
1366 int ret;
1367
1368 memset(&msg, 0, sizeof(msg));
1369 msg.irq = dev->irq;
1370 msg.vcpu = 0; // XXX
1371 msg.type = VIODEV_MSG_KICK;
1372 msg.state = INTR_STATE_ASSERT;
1373
1374 ret = imsg_compose_event2(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
1375 &msg, sizeof(msg), ev_base_main);
1376 if (ret == -1)
1377 log_warnx("%s: failed to assert irq %d", __func__, dev->irq);
1378 }
1379
1380 /*
1381 * Message the vm process asking to lower the irq. Must be called from the main
1382 * thread.
1383 */
1384 static void
vionet_deassert_pic_irq(struct virtio_dev * dev)1385 vionet_deassert_pic_irq(struct virtio_dev *dev)
1386 {
1387 struct viodev_msg msg;
1388 int ret;
1389
1390 memset(&msg, 0, sizeof(msg));
1391 msg.irq = dev->irq;
1392 msg.vcpu = 0; // XXX
1393 msg.type = VIODEV_MSG_KICK;
1394 msg.state = INTR_STATE_DEASSERT;
1395
1396 ret = imsg_compose_event2(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
1397 &msg, sizeof(msg), ev_base_main);
1398 if (ret == -1)
1399 log_warnx("%s: failed to assert irq %d", __func__, dev->irq);
1400 }
1401