10df750e9SMarc-André Lureau /*
20df750e9SMarc-André Lureau  * Vhost User library
30df750e9SMarc-André Lureau  *
40df750e9SMarc-André Lureau  * Copyright IBM, Corp. 2007
50df750e9SMarc-André Lureau  * Copyright (c) 2016 Red Hat, Inc.
60df750e9SMarc-André Lureau  *
70df750e9SMarc-André Lureau  * Authors:
80df750e9SMarc-André Lureau  *  Anthony Liguori <aliguori@us.ibm.com>
90df750e9SMarc-André Lureau  *  Marc-André Lureau <mlureau@redhat.com>
100df750e9SMarc-André Lureau  *  Victor Kaplansky <victork@redhat.com>
110df750e9SMarc-André Lureau  *
120df750e9SMarc-André Lureau  * This work is licensed under the terms of the GNU GPL, version 2 or
130df750e9SMarc-André Lureau  * later.  See the COPYING file in the top-level directory.
140df750e9SMarc-André Lureau  */
150df750e9SMarc-André Lureau 
16dadc3d01SMarcel Holtmann #ifndef _GNU_SOURCE
17dadc3d01SMarcel Holtmann #define _GNU_SOURCE
18dadc3d01SMarcel Holtmann #endif
19dadc3d01SMarcel Holtmann 
200df750e9SMarc-André Lureau /* this code avoids GLib dependency */
210df750e9SMarc-André Lureau #include <stdlib.h>
220df750e9SMarc-André Lureau #include <stdio.h>
230df750e9SMarc-André Lureau #include <unistd.h>
240df750e9SMarc-André Lureau #include <stdarg.h>
250df750e9SMarc-André Lureau #include <errno.h>
260df750e9SMarc-André Lureau #include <string.h>
270df750e9SMarc-André Lureau #include <assert.h>
280df750e9SMarc-André Lureau #include <inttypes.h>
290df750e9SMarc-André Lureau #include <sys/types.h>
300df750e9SMarc-André Lureau #include <sys/socket.h>
310df750e9SMarc-André Lureau #include <sys/eventfd.h>
320df750e9SMarc-André Lureau #include <sys/mman.h>
330df750e9SMarc-André Lureau #include <endian.h>
340df750e9SMarc-André Lureau 
35193ba660SDavid 'Digit' Turner /* Necessary to provide VIRTIO_F_VERSION_1 on system
36193ba660SDavid 'Digit' Turner  * with older linux headers. Must appear before
37193ba660SDavid 'Digit' Turner  * <linux/vhost.h> below.
38193ba660SDavid 'Digit' Turner  */
39193ba660SDavid 'Digit' Turner #include "standard-headers/linux/virtio_config.h"
40193ba660SDavid 'Digit' Turner 
410df750e9SMarc-André Lureau #if defined(__linux__)
420df750e9SMarc-André Lureau #include <sys/syscall.h>
430df750e9SMarc-André Lureau #include <fcntl.h>
440df750e9SMarc-André Lureau #include <sys/ioctl.h>
450df750e9SMarc-André Lureau #include <linux/vhost.h>
46b2b63008SDavid Hildenbrand #include <sys/vfs.h>
47b2b63008SDavid Hildenbrand #include <linux/magic.h>
480df750e9SMarc-André Lureau 
490df750e9SMarc-André Lureau #ifdef __NR_userfaultfd
500df750e9SMarc-André Lureau #include <linux/userfaultfd.h>
510df750e9SMarc-André Lureau #endif
520df750e9SMarc-André Lureau 
530df750e9SMarc-André Lureau #endif
540df750e9SMarc-André Lureau 
553f55f97bSMarc-André Lureau #include "include/atomic.h"
560df750e9SMarc-André Lureau 
570df750e9SMarc-André Lureau #include "libvhost-user.h"
580df750e9SMarc-André Lureau 
590df750e9SMarc-André Lureau /* usually provided by GLib */
6052a57d8dSStefan Weil via #if     __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4)
6152a57d8dSStefan Weil via #if !defined(__clang__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 4)
6252a57d8dSStefan Weil via #define G_GNUC_PRINTF(format_idx, arg_idx) \
6352a57d8dSStefan Weil via   __attribute__((__format__(gnu_printf, format_idx, arg_idx)))
6452a57d8dSStefan Weil via #else
6552a57d8dSStefan Weil via #define G_GNUC_PRINTF(format_idx, arg_idx) \
6652a57d8dSStefan Weil via   __attribute__((__format__(__printf__, format_idx, arg_idx)))
6752a57d8dSStefan Weil via #endif
6852a57d8dSStefan Weil via #else   /* !__GNUC__ */
6952a57d8dSStefan Weil via #define G_GNUC_PRINTF(format_idx, arg_idx)
7052a57d8dSStefan Weil via #endif  /* !__GNUC__ */
710df750e9SMarc-André Lureau #ifndef MIN
720df750e9SMarc-André Lureau #define MIN(x, y) ({                            \
73aa5d395aSMarcel Holtmann             __typeof__(x) _min1 = (x);          \
74aa5d395aSMarcel Holtmann             __typeof__(y) _min2 = (y);          \
750df750e9SMarc-André Lureau             (void) (&_min1 == &_min2);          \
760df750e9SMarc-André Lureau             _min1 < _min2 ? _min1 : _min2; })
770df750e9SMarc-André Lureau #endif
780df750e9SMarc-André Lureau 
790df750e9SMarc-André Lureau /* Round number down to multiple */
800df750e9SMarc-André Lureau #define ALIGN_DOWN(n, m) ((n) / (m) * (m))
810df750e9SMarc-André Lureau 
820df750e9SMarc-André Lureau /* Round number up to multiple */
830df750e9SMarc-André Lureau #define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
840df750e9SMarc-André Lureau 
850df750e9SMarc-André Lureau #ifndef unlikely
860df750e9SMarc-André Lureau #define unlikely(x)   __builtin_expect(!!(x), 0)
870df750e9SMarc-André Lureau #endif
880df750e9SMarc-André Lureau 
890df750e9SMarc-André Lureau /* Align each region to cache line size in inflight buffer */
900df750e9SMarc-André Lureau #define INFLIGHT_ALIGNMENT 64
910df750e9SMarc-André Lureau 
920df750e9SMarc-André Lureau /* The version of inflight buffer */
930df750e9SMarc-André Lureau #define INFLIGHT_VERSION 1
940df750e9SMarc-André Lureau 
950df750e9SMarc-André Lureau /* The version of the protocol we support */
960df750e9SMarc-André Lureau #define VHOST_USER_VERSION 1
970df750e9SMarc-André Lureau #define LIBVHOST_USER_DEBUG 0
980df750e9SMarc-André Lureau 
990df750e9SMarc-André Lureau #define DPRINT(...)                             \
1000df750e9SMarc-André Lureau     do {                                        \
1010df750e9SMarc-André Lureau         if (LIBVHOST_USER_DEBUG) {              \
1020df750e9SMarc-André Lureau             fprintf(stderr, __VA_ARGS__);        \
1030df750e9SMarc-André Lureau         }                                       \
1040df750e9SMarc-André Lureau     } while (0)
1050df750e9SMarc-André Lureau 
1060df750e9SMarc-André Lureau static inline
1070df750e9SMarc-André Lureau bool has_feature(uint64_t features, unsigned int fbit)
1080df750e9SMarc-André Lureau {
1090df750e9SMarc-André Lureau     assert(fbit < 64);
1100df750e9SMarc-André Lureau     return !!(features & (1ULL << fbit));
1110df750e9SMarc-André Lureau }
1120df750e9SMarc-André Lureau 
1130df750e9SMarc-André Lureau static inline
1140df750e9SMarc-André Lureau bool vu_has_feature(VuDev *dev,
1150df750e9SMarc-André Lureau                     unsigned int fbit)
1160df750e9SMarc-André Lureau {
1170df750e9SMarc-André Lureau     return has_feature(dev->features, fbit);
1180df750e9SMarc-André Lureau }
1190df750e9SMarc-André Lureau 
1200df750e9SMarc-André Lureau static inline bool vu_has_protocol_feature(VuDev *dev, unsigned int fbit)
1210df750e9SMarc-André Lureau {
1220df750e9SMarc-André Lureau     return has_feature(dev->protocol_features, fbit);
1230df750e9SMarc-André Lureau }
1240df750e9SMarc-André Lureau 
125467eeb0fSAlex Bennée const char *
1260df750e9SMarc-André Lureau vu_request_to_string(unsigned int req)
1270df750e9SMarc-André Lureau {
1280df750e9SMarc-André Lureau #define REQ(req) [req] = #req
1290df750e9SMarc-André Lureau     static const char *vu_request_str[] = {
1300df750e9SMarc-André Lureau         REQ(VHOST_USER_NONE),
1310df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_FEATURES),
1320df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_FEATURES),
1330df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_OWNER),
1340df750e9SMarc-André Lureau         REQ(VHOST_USER_RESET_OWNER),
1350df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_MEM_TABLE),
1360df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_LOG_BASE),
1370df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_LOG_FD),
1380df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_NUM),
1390df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_ADDR),
1400df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_BASE),
1410df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_VRING_BASE),
1420df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_KICK),
1430df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_CALL),
1440df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_ERR),
1450df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
1460df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
1470df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_QUEUE_NUM),
1480df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_ENABLE),
1490df750e9SMarc-André Lureau         REQ(VHOST_USER_SEND_RARP),
1500df750e9SMarc-André Lureau         REQ(VHOST_USER_NET_SET_MTU),
151e608feedSMaxime Coquelin         REQ(VHOST_USER_SET_BACKEND_REQ_FD),
1520df750e9SMarc-André Lureau         REQ(VHOST_USER_IOTLB_MSG),
1530df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_ENDIAN),
1540df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_CONFIG),
1550df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_CONFIG),
1560df750e9SMarc-André Lureau         REQ(VHOST_USER_POSTCOPY_ADVISE),
1570df750e9SMarc-André Lureau         REQ(VHOST_USER_POSTCOPY_LISTEN),
1580df750e9SMarc-André Lureau         REQ(VHOST_USER_POSTCOPY_END),
1590df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_INFLIGHT_FD),
1600df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_INFLIGHT_FD),
1610df750e9SMarc-André Lureau         REQ(VHOST_USER_GPU_SET_SOCKET),
1620df750e9SMarc-André Lureau         REQ(VHOST_USER_VRING_KICK),
1630df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_MAX_MEM_SLOTS),
1640df750e9SMarc-André Lureau         REQ(VHOST_USER_ADD_MEM_REG),
1650df750e9SMarc-André Lureau         REQ(VHOST_USER_REM_MEM_REG),
166ce0f3b03SAlbert Esteve         REQ(VHOST_USER_GET_SHARED_OBJECT),
1670df750e9SMarc-André Lureau         REQ(VHOST_USER_MAX),
1680df750e9SMarc-André Lureau     };
1690df750e9SMarc-André Lureau #undef REQ
1700df750e9SMarc-André Lureau 
1710df750e9SMarc-André Lureau     if (req < VHOST_USER_MAX) {
1720df750e9SMarc-André Lureau         return vu_request_str[req];
1730df750e9SMarc-André Lureau     } else {
1740df750e9SMarc-André Lureau         return "unknown";
1750df750e9SMarc-André Lureau     }
1760df750e9SMarc-André Lureau }
1770df750e9SMarc-André Lureau 
17852a57d8dSStefan Weil via static void G_GNUC_PRINTF(2, 3)
1790df750e9SMarc-André Lureau vu_panic(VuDev *dev, const char *msg, ...)
1800df750e9SMarc-André Lureau {
1810df750e9SMarc-André Lureau     char *buf = NULL;
1820df750e9SMarc-André Lureau     va_list ap;
1830df750e9SMarc-André Lureau 
1840df750e9SMarc-André Lureau     va_start(ap, msg);
1850df750e9SMarc-André Lureau     if (vasprintf(&buf, msg, ap) < 0) {
1860df750e9SMarc-André Lureau         buf = NULL;
1870df750e9SMarc-André Lureau     }
1880df750e9SMarc-André Lureau     va_end(ap);
1890df750e9SMarc-André Lureau 
1900df750e9SMarc-André Lureau     dev->broken = true;
1910df750e9SMarc-André Lureau     dev->panic(dev, buf);
1920df750e9SMarc-André Lureau     free(buf);
1930df750e9SMarc-André Lureau 
1940df750e9SMarc-André Lureau     /*
1950df750e9SMarc-André Lureau      * FIXME:
1960df750e9SMarc-André Lureau      * find a way to call virtio_error, or perhaps close the connection?
1970df750e9SMarc-André Lureau      */
1980df750e9SMarc-André Lureau }
1990df750e9SMarc-André Lureau 
20060ccdca4SDavid Hildenbrand /* Search for a memory region that covers this guest physical address. */
20160ccdca4SDavid Hildenbrand static VuDevRegion *
20260ccdca4SDavid Hildenbrand vu_gpa_to_mem_region(VuDev *dev, uint64_t guest_addr)
20360ccdca4SDavid Hildenbrand {
204a3c0118cSDavid Hildenbrand     int low = 0;
205a3c0118cSDavid Hildenbrand     int high = dev->nregions - 1;
20660ccdca4SDavid Hildenbrand 
20760ccdca4SDavid Hildenbrand     /*
20860ccdca4SDavid Hildenbrand      * Memory regions cannot overlap in guest physical address space. Each
20960ccdca4SDavid Hildenbrand      * GPA belongs to exactly one memory region, so there can only be one
21060ccdca4SDavid Hildenbrand      * match.
211a3c0118cSDavid Hildenbrand      *
212a3c0118cSDavid Hildenbrand      * We store our memory regions ordered by GPA and can simply perform a
213a3c0118cSDavid Hildenbrand      * binary search.
21460ccdca4SDavid Hildenbrand      */
215a3c0118cSDavid Hildenbrand     while (low <= high) {
216a3c0118cSDavid Hildenbrand         unsigned int mid = low + (high - low) / 2;
217a3c0118cSDavid Hildenbrand         VuDevRegion *cur = &dev->regions[mid];
21860ccdca4SDavid Hildenbrand 
21960ccdca4SDavid Hildenbrand         if (guest_addr >= cur->gpa && guest_addr < cur->gpa + cur->size) {
22060ccdca4SDavid Hildenbrand             return cur;
22160ccdca4SDavid Hildenbrand         }
222a3c0118cSDavid Hildenbrand         if (guest_addr >= cur->gpa + cur->size) {
223a3c0118cSDavid Hildenbrand             low = mid + 1;
224a3c0118cSDavid Hildenbrand         }
225a3c0118cSDavid Hildenbrand         if (guest_addr < cur->gpa) {
226a3c0118cSDavid Hildenbrand             high = mid - 1;
227a3c0118cSDavid Hildenbrand         }
22860ccdca4SDavid Hildenbrand     }
22960ccdca4SDavid Hildenbrand     return NULL;
23060ccdca4SDavid Hildenbrand }
23160ccdca4SDavid Hildenbrand 
2320df750e9SMarc-André Lureau /* Translate guest physical address to our virtual address.  */
2330df750e9SMarc-André Lureau void *
2340df750e9SMarc-André Lureau vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr)
2350df750e9SMarc-André Lureau {
23660ccdca4SDavid Hildenbrand     VuDevRegion *r;
2370df750e9SMarc-André Lureau 
2380df750e9SMarc-André Lureau     if (*plen == 0) {
2390df750e9SMarc-André Lureau         return NULL;
2400df750e9SMarc-André Lureau     }
2410df750e9SMarc-André Lureau 
24260ccdca4SDavid Hildenbrand     r = vu_gpa_to_mem_region(dev, guest_addr);
24360ccdca4SDavid Hildenbrand     if (!r) {
24460ccdca4SDavid Hildenbrand         return NULL;
24560ccdca4SDavid Hildenbrand     }
2460df750e9SMarc-André Lureau 
2470df750e9SMarc-André Lureau     if ((guest_addr + *plen) > (r->gpa + r->size)) {
2480df750e9SMarc-André Lureau         *plen = r->gpa + r->size - guest_addr;
2490df750e9SMarc-André Lureau     }
25060ccdca4SDavid Hildenbrand     return (void *)(uintptr_t)guest_addr - r->gpa + r->mmap_addr +
25160ccdca4SDavid Hildenbrand            r->mmap_offset;
2520df750e9SMarc-André Lureau }
2530df750e9SMarc-André Lureau 
2540df750e9SMarc-André Lureau /* Translate qemu virtual address to our virtual address.  */
2550df750e9SMarc-André Lureau static void *
2560df750e9SMarc-André Lureau qva_to_va(VuDev *dev, uint64_t qemu_addr)
2570df750e9SMarc-André Lureau {
25892bf2461SMarcel Holtmann     unsigned int i;
2590df750e9SMarc-André Lureau 
2600df750e9SMarc-André Lureau     /* Find matching memory region.  */
2610df750e9SMarc-André Lureau     for (i = 0; i < dev->nregions; i++) {
2620df750e9SMarc-André Lureau         VuDevRegion *r = &dev->regions[i];
2630df750e9SMarc-André Lureau 
2640df750e9SMarc-André Lureau         if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
2650df750e9SMarc-André Lureau             return (void *)(uintptr_t)
2660df750e9SMarc-André Lureau                 qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
2670df750e9SMarc-André Lureau         }
2680df750e9SMarc-André Lureau     }
2690df750e9SMarc-André Lureau 
2700df750e9SMarc-André Lureau     return NULL;
2710df750e9SMarc-André Lureau }
2720df750e9SMarc-André Lureau 
2730df750e9SMarc-André Lureau static void
274bec58209SDavid Hildenbrand vu_remove_all_mem_regs(VuDev *dev)
275bec58209SDavid Hildenbrand {
276bec58209SDavid Hildenbrand     unsigned int i;
277bec58209SDavid Hildenbrand 
278bec58209SDavid Hildenbrand     for (i = 0; i < dev->nregions; i++) {
279bec58209SDavid Hildenbrand         VuDevRegion *r = &dev->regions[i];
280bec58209SDavid Hildenbrand 
2814f865c3bSDavid Hildenbrand         munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
282bec58209SDavid Hildenbrand     }
283bec58209SDavid Hildenbrand     dev->nregions = 0;
284bec58209SDavid Hildenbrand }
285bec58209SDavid Hildenbrand 
2862a290227SDavid Hildenbrand static bool
28767f4f663SDavid Hildenbrand map_ring(VuDev *dev, VuVirtq *vq)
28867f4f663SDavid Hildenbrand {
28967f4f663SDavid Hildenbrand     vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr);
29067f4f663SDavid Hildenbrand     vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr);
29167f4f663SDavid Hildenbrand     vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr);
29267f4f663SDavid Hildenbrand 
29367f4f663SDavid Hildenbrand     DPRINT("Setting virtq addresses:\n");
29467f4f663SDavid Hildenbrand     DPRINT("    vring_desc  at %p\n", vq->vring.desc);
29567f4f663SDavid Hildenbrand     DPRINT("    vring_used  at %p\n", vq->vring.used);
29667f4f663SDavid Hildenbrand     DPRINT("    vring_avail at %p\n", vq->vring.avail);
29767f4f663SDavid Hildenbrand 
29867f4f663SDavid Hildenbrand     return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
29967f4f663SDavid Hildenbrand }
30067f4f663SDavid Hildenbrand 
30167f4f663SDavid Hildenbrand static bool
3022a290227SDavid Hildenbrand vu_is_vq_usable(VuDev *dev, VuVirtq *vq)
3032a290227SDavid Hildenbrand {
30467f4f663SDavid Hildenbrand     if (unlikely(dev->broken)) {
30567f4f663SDavid Hildenbrand         return false;
30667f4f663SDavid Hildenbrand     }
30767f4f663SDavid Hildenbrand 
30867f4f663SDavid Hildenbrand     if (likely(vq->vring.avail)) {
30967f4f663SDavid Hildenbrand         return true;
31067f4f663SDavid Hildenbrand     }
31167f4f663SDavid Hildenbrand 
31267f4f663SDavid Hildenbrand     /*
31367f4f663SDavid Hildenbrand      * In corner cases, we might temporarily remove a memory region that
31467f4f663SDavid Hildenbrand      * mapped a ring. When removing a memory region we make sure to
31567f4f663SDavid Hildenbrand      * unmap any rings that would be impacted. Let's try to remap if we
31667f4f663SDavid Hildenbrand      * already succeeded mapping this ring once.
31767f4f663SDavid Hildenbrand      */
31867f4f663SDavid Hildenbrand     if (!vq->vra.desc_user_addr || !vq->vra.used_user_addr ||
31967f4f663SDavid Hildenbrand         !vq->vra.avail_user_addr) {
32067f4f663SDavid Hildenbrand         return false;
32167f4f663SDavid Hildenbrand     }
32267f4f663SDavid Hildenbrand     if (map_ring(dev, vq)) {
32367f4f663SDavid Hildenbrand         vu_panic(dev, "remapping queue on access");
32467f4f663SDavid Hildenbrand         return false;
32567f4f663SDavid Hildenbrand     }
32667f4f663SDavid Hildenbrand     return true;
32767f4f663SDavid Hildenbrand }
32867f4f663SDavid Hildenbrand 
32967f4f663SDavid Hildenbrand static void
33067f4f663SDavid Hildenbrand unmap_rings(VuDev *dev, VuDevRegion *r)
33167f4f663SDavid Hildenbrand {
33267f4f663SDavid Hildenbrand     int i;
33367f4f663SDavid Hildenbrand 
33467f4f663SDavid Hildenbrand     for (i = 0; i < dev->max_queues; i++) {
33567f4f663SDavid Hildenbrand         VuVirtq *vq = &dev->vq[i];
33667f4f663SDavid Hildenbrand         const uintptr_t desc = (uintptr_t)vq->vring.desc;
33767f4f663SDavid Hildenbrand         const uintptr_t used = (uintptr_t)vq->vring.used;
33867f4f663SDavid Hildenbrand         const uintptr_t avail = (uintptr_t)vq->vring.avail;
33967f4f663SDavid Hildenbrand 
34067f4f663SDavid Hildenbrand         if (desc < r->mmap_addr || desc >= r->mmap_addr + r->size) {
34167f4f663SDavid Hildenbrand             continue;
34267f4f663SDavid Hildenbrand         }
34367f4f663SDavid Hildenbrand         if (used < r->mmap_addr || used >= r->mmap_addr + r->size) {
34467f4f663SDavid Hildenbrand             continue;
34567f4f663SDavid Hildenbrand         }
34667f4f663SDavid Hildenbrand         if (avail < r->mmap_addr || avail >= r->mmap_addr + r->size) {
34767f4f663SDavid Hildenbrand             continue;
34867f4f663SDavid Hildenbrand         }
34967f4f663SDavid Hildenbrand 
35067f4f663SDavid Hildenbrand         DPRINT("Unmapping rings of queue %d\n", i);
35167f4f663SDavid Hildenbrand         vq->vring.desc = NULL;
35267f4f663SDavid Hildenbrand         vq->vring.used = NULL;
35367f4f663SDavid Hildenbrand         vq->vring.avail = NULL;
35467f4f663SDavid Hildenbrand     }
3552a290227SDavid Hildenbrand }
3562a290227SDavid Hildenbrand 
357b2b63008SDavid Hildenbrand static size_t
358b2b63008SDavid Hildenbrand get_fd_hugepagesize(int fd)
359b2b63008SDavid Hildenbrand {
360b2b63008SDavid Hildenbrand #if defined(__linux__)
361b2b63008SDavid Hildenbrand     struct statfs fs;
362b2b63008SDavid Hildenbrand     int ret;
363b2b63008SDavid Hildenbrand 
364b2b63008SDavid Hildenbrand     do {
365b2b63008SDavid Hildenbrand         ret = fstatfs(fd, &fs);
366b2b63008SDavid Hildenbrand     } while (ret != 0 && errno == EINTR);
367b2b63008SDavid Hildenbrand 
368b2b63008SDavid Hildenbrand     if (!ret && (unsigned int)fs.f_type == HUGETLBFS_MAGIC) {
369b2b63008SDavid Hildenbrand         return fs.f_bsize;
370b2b63008SDavid Hildenbrand     }
371b2b63008SDavid Hildenbrand #endif
372b2b63008SDavid Hildenbrand     return 0;
373b2b63008SDavid Hildenbrand }
374b2b63008SDavid Hildenbrand 
375bec58209SDavid Hildenbrand static void
37693fec23dSDavid Hildenbrand _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion *msg_region, int fd)
37793fec23dSDavid Hildenbrand {
378a3c0118cSDavid Hildenbrand     const uint64_t start_gpa = msg_region->guest_phys_addr;
379a3c0118cSDavid Hildenbrand     const uint64_t end_gpa = start_gpa + msg_region->memory_size;
38093fec23dSDavid Hildenbrand     int prot = PROT_READ | PROT_WRITE;
381b2b63008SDavid Hildenbrand     uint64_t mmap_offset, fd_offset;
382b2b63008SDavid Hildenbrand     size_t hugepagesize;
38393fec23dSDavid Hildenbrand     VuDevRegion *r;
38493fec23dSDavid Hildenbrand     void *mmap_addr;
385a3c0118cSDavid Hildenbrand     int low = 0;
386a3c0118cSDavid Hildenbrand     int high = dev->nregions - 1;
387a3c0118cSDavid Hildenbrand     unsigned int idx;
38893fec23dSDavid Hildenbrand 
38993fec23dSDavid Hildenbrand     DPRINT("Adding region %d\n", dev->nregions);
39093fec23dSDavid Hildenbrand     DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
39193fec23dSDavid Hildenbrand            msg_region->guest_phys_addr);
39293fec23dSDavid Hildenbrand     DPRINT("    memory_size:     0x%016"PRIx64"\n",
39393fec23dSDavid Hildenbrand            msg_region->memory_size);
39493fec23dSDavid Hildenbrand     DPRINT("    userspace_addr:  0x%016"PRIx64"\n",
39593fec23dSDavid Hildenbrand            msg_region->userspace_addr);
396b2b63008SDavid Hildenbrand     DPRINT("    old mmap_offset: 0x%016"PRIx64"\n",
39793fec23dSDavid Hildenbrand            msg_region->mmap_offset);
39893fec23dSDavid Hildenbrand 
39993fec23dSDavid Hildenbrand     if (dev->postcopy_listening) {
40093fec23dSDavid Hildenbrand         /*
40193fec23dSDavid Hildenbrand          * In postcopy we're using PROT_NONE here to catch anyone
40293fec23dSDavid Hildenbrand          * accessing it before we userfault
40393fec23dSDavid Hildenbrand          */
40493fec23dSDavid Hildenbrand         prot = PROT_NONE;
40593fec23dSDavid Hildenbrand     }
40693fec23dSDavid Hildenbrand 
40793fec23dSDavid Hildenbrand     /*
408a3c0118cSDavid Hildenbrand      * We will add memory regions into the array sorted by GPA. Perform a
409a3c0118cSDavid Hildenbrand      * binary search to locate the insertion point: it will be at the low
410a3c0118cSDavid Hildenbrand      * index.
411a3c0118cSDavid Hildenbrand      */
412a3c0118cSDavid Hildenbrand     while (low <= high) {
413a3c0118cSDavid Hildenbrand         unsigned int mid = low + (high - low)  / 2;
414a3c0118cSDavid Hildenbrand         VuDevRegion *cur = &dev->regions[mid];
415a3c0118cSDavid Hildenbrand 
416a3c0118cSDavid Hildenbrand         /* Overlap of GPA addresses. */
417a3c0118cSDavid Hildenbrand         if (start_gpa < cur->gpa + cur->size && cur->gpa < end_gpa) {
418a3c0118cSDavid Hildenbrand             vu_panic(dev, "regions with overlapping guest physical addresses");
419a3c0118cSDavid Hildenbrand             return;
420a3c0118cSDavid Hildenbrand         }
421a3c0118cSDavid Hildenbrand         if (start_gpa >= cur->gpa + cur->size) {
422a3c0118cSDavid Hildenbrand             low = mid + 1;
423a3c0118cSDavid Hildenbrand         }
424a3c0118cSDavid Hildenbrand         if (start_gpa < cur->gpa) {
425a3c0118cSDavid Hildenbrand             high = mid - 1;
426a3c0118cSDavid Hildenbrand         }
427a3c0118cSDavid Hildenbrand     }
428a3c0118cSDavid Hildenbrand     idx = low;
429a3c0118cSDavid Hildenbrand 
430a3c0118cSDavid Hildenbrand     /*
431b2b63008SDavid Hildenbrand      * Convert most of msg_region->mmap_offset to fd_offset. In almost all
432b2b63008SDavid Hildenbrand      * cases, this will leave us with mmap_offset == 0, mmap()'ing only
433b2b63008SDavid Hildenbrand      * what we really need. Only if a memory region would partially cover
434b2b63008SDavid Hildenbrand      * hugetlb pages, we'd get mmap_offset != 0, which usually doesn't happen
435b2b63008SDavid Hildenbrand      * anymore (i.e., modern QEMU).
436b2b63008SDavid Hildenbrand      *
437b2b63008SDavid Hildenbrand      * Note that mmap() with hugetlb would fail if the offset into the file
438b2b63008SDavid Hildenbrand      * is not aligned to the huge page size.
43993fec23dSDavid Hildenbrand      */
440b2b63008SDavid Hildenbrand     hugepagesize = get_fd_hugepagesize(fd);
441b2b63008SDavid Hildenbrand     if (hugepagesize) {
442b2b63008SDavid Hildenbrand         fd_offset = ALIGN_DOWN(msg_region->mmap_offset, hugepagesize);
443b2b63008SDavid Hildenbrand         mmap_offset = msg_region->mmap_offset - fd_offset;
444b2b63008SDavid Hildenbrand     } else {
445b2b63008SDavid Hildenbrand         fd_offset = msg_region->mmap_offset;
446b2b63008SDavid Hildenbrand         mmap_offset = 0;
447b2b63008SDavid Hildenbrand     }
448b2b63008SDavid Hildenbrand 
449b2b63008SDavid Hildenbrand     DPRINT("    fd_offset:       0x%016"PRIx64"\n",
450b2b63008SDavid Hildenbrand            fd_offset);
451b2b63008SDavid Hildenbrand     DPRINT("    new mmap_offset: 0x%016"PRIx64"\n",
452b2b63008SDavid Hildenbrand            mmap_offset);
453b2b63008SDavid Hildenbrand 
454b2b63008SDavid Hildenbrand     mmap_addr = mmap(0, msg_region->memory_size + mmap_offset,
455b2b63008SDavid Hildenbrand                      prot, MAP_SHARED | MAP_NORESERVE, fd, fd_offset);
45693fec23dSDavid Hildenbrand     if (mmap_addr == MAP_FAILED) {
45793fec23dSDavid Hildenbrand         vu_panic(dev, "region mmap error: %s", strerror(errno));
45893fec23dSDavid Hildenbrand         return;
45993fec23dSDavid Hildenbrand     }
46093fec23dSDavid Hildenbrand     DPRINT("    mmap_addr:       0x%016"PRIx64"\n",
46193fec23dSDavid Hildenbrand            (uint64_t)(uintptr_t)mmap_addr);
46293fec23dSDavid Hildenbrand 
463a3c0118cSDavid Hildenbrand     /* Shift all affected entries by 1 to open a hole at idx. */
464a3c0118cSDavid Hildenbrand     r = &dev->regions[idx];
465a3c0118cSDavid Hildenbrand     memmove(r + 1, r, sizeof(VuDevRegion) * (dev->nregions - idx));
46693fec23dSDavid Hildenbrand     r->gpa = msg_region->guest_phys_addr;
46793fec23dSDavid Hildenbrand     r->size = msg_region->memory_size;
46893fec23dSDavid Hildenbrand     r->qva = msg_region->userspace_addr;
46993fec23dSDavid Hildenbrand     r->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
470b2b63008SDavid Hildenbrand     r->mmap_offset = mmap_offset;
47193fec23dSDavid Hildenbrand     dev->nregions++;
47293fec23dSDavid Hildenbrand 
47393fec23dSDavid Hildenbrand     if (dev->postcopy_listening) {
47493fec23dSDavid Hildenbrand         /*
47593fec23dSDavid Hildenbrand          * Return the address to QEMU so that it can translate the ufd
47693fec23dSDavid Hildenbrand          * fault addresses back.
47793fec23dSDavid Hildenbrand          */
47893fec23dSDavid Hildenbrand         msg_region->userspace_addr = r->mmap_addr + r->mmap_offset;
47993fec23dSDavid Hildenbrand     }
48093fec23dSDavid Hildenbrand }
48193fec23dSDavid Hildenbrand 
48293fec23dSDavid Hildenbrand static void
4830df750e9SMarc-André Lureau vmsg_close_fds(VhostUserMsg *vmsg)
4840df750e9SMarc-André Lureau {
4850df750e9SMarc-André Lureau     int i;
4860df750e9SMarc-André Lureau 
4870df750e9SMarc-André Lureau     for (i = 0; i < vmsg->fd_num; i++) {
4880df750e9SMarc-André Lureau         close(vmsg->fds[i]);
4890df750e9SMarc-André Lureau     }
4900df750e9SMarc-André Lureau }
4910df750e9SMarc-André Lureau 
4920df750e9SMarc-André Lureau /* Set reply payload.u64 and clear request flags and fd_num */
4930df750e9SMarc-André Lureau static void vmsg_set_reply_u64(VhostUserMsg *vmsg, uint64_t val)
4940df750e9SMarc-André Lureau {
4950df750e9SMarc-André Lureau     vmsg->flags = 0; /* defaults will be set by vu_send_reply() */
4960df750e9SMarc-André Lureau     vmsg->size = sizeof(vmsg->payload.u64);
4970df750e9SMarc-André Lureau     vmsg->payload.u64 = val;
4980df750e9SMarc-André Lureau     vmsg->fd_num = 0;
4990df750e9SMarc-André Lureau }
5000df750e9SMarc-André Lureau 
5010df750e9SMarc-André Lureau /* A test to see if we have userfault available */
5020df750e9SMarc-André Lureau static bool
5030df750e9SMarc-André Lureau have_userfault(void)
5040df750e9SMarc-André Lureau {
5050df750e9SMarc-André Lureau #if defined(__linux__) && defined(__NR_userfaultfd) &&\
5060df750e9SMarc-André Lureau         defined(UFFD_FEATURE_MISSING_SHMEM) &&\
5070df750e9SMarc-André Lureau         defined(UFFD_FEATURE_MISSING_HUGETLBFS)
5080df750e9SMarc-André Lureau     /* Now test the kernel we're running on really has the features */
5090df750e9SMarc-André Lureau     int ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
5100df750e9SMarc-André Lureau     struct uffdio_api api_struct;
5110df750e9SMarc-André Lureau     if (ufd < 0) {
5120df750e9SMarc-André Lureau         return false;
5130df750e9SMarc-André Lureau     }
5140df750e9SMarc-André Lureau 
5150df750e9SMarc-André Lureau     api_struct.api = UFFD_API;
5160df750e9SMarc-André Lureau     api_struct.features = UFFD_FEATURE_MISSING_SHMEM |
5170df750e9SMarc-André Lureau                           UFFD_FEATURE_MISSING_HUGETLBFS;
5180df750e9SMarc-André Lureau     if (ioctl(ufd, UFFDIO_API, &api_struct)) {
5190df750e9SMarc-André Lureau         close(ufd);
5200df750e9SMarc-André Lureau         return false;
5210df750e9SMarc-André Lureau     }
5220df750e9SMarc-André Lureau     close(ufd);
5230df750e9SMarc-André Lureau     return true;
5240df750e9SMarc-André Lureau 
5250df750e9SMarc-André Lureau #else
5260df750e9SMarc-André Lureau     return false;
5270df750e9SMarc-André Lureau #endif
5280df750e9SMarc-André Lureau }
5290df750e9SMarc-André Lureau 
5300df750e9SMarc-André Lureau static bool
5310df750e9SMarc-André Lureau vu_message_read_default(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
5320df750e9SMarc-André Lureau {
5330df750e9SMarc-André Lureau     char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {};
5340df750e9SMarc-André Lureau     struct iovec iov = {
5350df750e9SMarc-André Lureau         .iov_base = (char *)vmsg,
5360df750e9SMarc-André Lureau         .iov_len = VHOST_USER_HDR_SIZE,
5370df750e9SMarc-André Lureau     };
5380df750e9SMarc-André Lureau     struct msghdr msg = {
5390df750e9SMarc-André Lureau         .msg_iov = &iov,
5400df750e9SMarc-André Lureau         .msg_iovlen = 1,
5410df750e9SMarc-André Lureau         .msg_control = control,
5420df750e9SMarc-André Lureau         .msg_controllen = sizeof(control),
5430df750e9SMarc-André Lureau     };
5440df750e9SMarc-André Lureau     size_t fd_size;
5450df750e9SMarc-André Lureau     struct cmsghdr *cmsg;
5460df750e9SMarc-André Lureau     int rc;
5470df750e9SMarc-André Lureau 
5480df750e9SMarc-André Lureau     do {
5490df750e9SMarc-André Lureau         rc = recvmsg(conn_fd, &msg, 0);
5500df750e9SMarc-André Lureau     } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
5510df750e9SMarc-André Lureau 
5520df750e9SMarc-André Lureau     if (rc < 0) {
5530df750e9SMarc-André Lureau         vu_panic(dev, "Error while recvmsg: %s", strerror(errno));
5540df750e9SMarc-André Lureau         return false;
5550df750e9SMarc-André Lureau     }
5560df750e9SMarc-André Lureau 
5570df750e9SMarc-André Lureau     vmsg->fd_num = 0;
5580df750e9SMarc-André Lureau     for (cmsg = CMSG_FIRSTHDR(&msg);
5590df750e9SMarc-André Lureau          cmsg != NULL;
5600df750e9SMarc-André Lureau          cmsg = CMSG_NXTHDR(&msg, cmsg))
5610df750e9SMarc-André Lureau     {
5620df750e9SMarc-André Lureau         if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
5630df750e9SMarc-André Lureau             fd_size = cmsg->cmsg_len - CMSG_LEN(0);
5640df750e9SMarc-André Lureau             vmsg->fd_num = fd_size / sizeof(int);
565a6f4d2ecSVladimir Sementsov-Ogievskiy             assert(fd_size < VHOST_MEMORY_BASELINE_NREGIONS);
5660df750e9SMarc-André Lureau             memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
5670df750e9SMarc-André Lureau             break;
5680df750e9SMarc-André Lureau         }
5690df750e9SMarc-André Lureau     }
5700df750e9SMarc-André Lureau 
5710df750e9SMarc-André Lureau     if (vmsg->size > sizeof(vmsg->payload)) {
5720df750e9SMarc-André Lureau         vu_panic(dev,
5730df750e9SMarc-André Lureau                  "Error: too big message request: %d, size: vmsg->size: %u, "
5740df750e9SMarc-André Lureau                  "while sizeof(vmsg->payload) = %zu\n",
5750df750e9SMarc-André Lureau                  vmsg->request, vmsg->size, sizeof(vmsg->payload));
5760df750e9SMarc-André Lureau         goto fail;
5770df750e9SMarc-André Lureau     }
5780df750e9SMarc-André Lureau 
5790df750e9SMarc-André Lureau     if (vmsg->size) {
5800df750e9SMarc-André Lureau         do {
5810df750e9SMarc-André Lureau             rc = read(conn_fd, &vmsg->payload, vmsg->size);
5820df750e9SMarc-André Lureau         } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
5830df750e9SMarc-André Lureau 
5840df750e9SMarc-André Lureau         if (rc <= 0) {
5850df750e9SMarc-André Lureau             vu_panic(dev, "Error while reading: %s", strerror(errno));
5860df750e9SMarc-André Lureau             goto fail;
5870df750e9SMarc-André Lureau         }
5880df750e9SMarc-André Lureau 
58918fa7f1eSMarcel Holtmann         assert((uint32_t)rc == vmsg->size);
5900df750e9SMarc-André Lureau     }
5910df750e9SMarc-André Lureau 
5920df750e9SMarc-André Lureau     return true;
5930df750e9SMarc-André Lureau 
5940df750e9SMarc-André Lureau fail:
5950df750e9SMarc-André Lureau     vmsg_close_fds(vmsg);
5960df750e9SMarc-André Lureau 
5970df750e9SMarc-André Lureau     return false;
5980df750e9SMarc-André Lureau }
5990df750e9SMarc-André Lureau 
6000df750e9SMarc-André Lureau static bool
6010df750e9SMarc-André Lureau vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
6020df750e9SMarc-André Lureau {
6030df750e9SMarc-André Lureau     int rc;
6040df750e9SMarc-André Lureau     uint8_t *p = (uint8_t *)vmsg;
6050df750e9SMarc-André Lureau     char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {};
6060df750e9SMarc-André Lureau     struct iovec iov = {
6070df750e9SMarc-André Lureau         .iov_base = (char *)vmsg,
6080df750e9SMarc-André Lureau         .iov_len = VHOST_USER_HDR_SIZE,
6090df750e9SMarc-André Lureau     };
6100df750e9SMarc-André Lureau     struct msghdr msg = {
6110df750e9SMarc-André Lureau         .msg_iov = &iov,
6120df750e9SMarc-André Lureau         .msg_iovlen = 1,
6130df750e9SMarc-André Lureau         .msg_control = control,
6140df750e9SMarc-André Lureau     };
6150df750e9SMarc-André Lureau     struct cmsghdr *cmsg;
6160df750e9SMarc-André Lureau 
6170df750e9SMarc-André Lureau     memset(control, 0, sizeof(control));
6180df750e9SMarc-André Lureau     assert(vmsg->fd_num <= VHOST_MEMORY_BASELINE_NREGIONS);
6190df750e9SMarc-André Lureau     if (vmsg->fd_num > 0) {
6200df750e9SMarc-André Lureau         size_t fdsize = vmsg->fd_num * sizeof(int);
6210df750e9SMarc-André Lureau         msg.msg_controllen = CMSG_SPACE(fdsize);
6220df750e9SMarc-André Lureau         cmsg = CMSG_FIRSTHDR(&msg);
6230df750e9SMarc-André Lureau         cmsg->cmsg_len = CMSG_LEN(fdsize);
6240df750e9SMarc-André Lureau         cmsg->cmsg_level = SOL_SOCKET;
6250df750e9SMarc-André Lureau         cmsg->cmsg_type = SCM_RIGHTS;
6260df750e9SMarc-André Lureau         memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize);
6270df750e9SMarc-André Lureau     } else {
6280df750e9SMarc-André Lureau         msg.msg_controllen = 0;
6290df750e9SMarc-André Lureau     }
6300df750e9SMarc-André Lureau 
6310df750e9SMarc-André Lureau     do {
6320df750e9SMarc-André Lureau         rc = sendmsg(conn_fd, &msg, 0);
6330df750e9SMarc-André Lureau     } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
6340df750e9SMarc-André Lureau 
6350df750e9SMarc-André Lureau     if (vmsg->size) {
6360df750e9SMarc-André Lureau         do {
6370df750e9SMarc-André Lureau             if (vmsg->data) {
6380df750e9SMarc-André Lureau                 rc = write(conn_fd, vmsg->data, vmsg->size);
6390df750e9SMarc-André Lureau             } else {
6400df750e9SMarc-André Lureau                 rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
6410df750e9SMarc-André Lureau             }
6420df750e9SMarc-André Lureau         } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
6430df750e9SMarc-André Lureau     }
6440df750e9SMarc-André Lureau 
6450df750e9SMarc-André Lureau     if (rc <= 0) {
6460df750e9SMarc-André Lureau         vu_panic(dev, "Error while writing: %s", strerror(errno));
6470df750e9SMarc-André Lureau         return false;
6480df750e9SMarc-André Lureau     }
6490df750e9SMarc-André Lureau 
6500df750e9SMarc-André Lureau     return true;
6510df750e9SMarc-André Lureau }
6520df750e9SMarc-André Lureau 
6530df750e9SMarc-André Lureau static bool
6540df750e9SMarc-André Lureau vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
6550df750e9SMarc-André Lureau {
6560df750e9SMarc-André Lureau     /* Set the version in the flags when sending the reply */
6570df750e9SMarc-André Lureau     vmsg->flags &= ~VHOST_USER_VERSION_MASK;
6580df750e9SMarc-André Lureau     vmsg->flags |= VHOST_USER_VERSION;
6590df750e9SMarc-André Lureau     vmsg->flags |= VHOST_USER_REPLY_MASK;
6600df750e9SMarc-André Lureau 
6610df750e9SMarc-André Lureau     return vu_message_write(dev, conn_fd, vmsg);
6620df750e9SMarc-André Lureau }
6630df750e9SMarc-André Lureau 
6640df750e9SMarc-André Lureau /*
665f8ed3648SManos Pitsidianakis  * Processes a reply on the backend channel.
666f8ed3648SManos Pitsidianakis  * Entered with backend_mutex held and releases it before exit.
6670df750e9SMarc-André Lureau  * Returns true on success.
6680df750e9SMarc-André Lureau  */
6690df750e9SMarc-André Lureau static bool
6700df750e9SMarc-André Lureau vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
6710df750e9SMarc-André Lureau {
6720df750e9SMarc-André Lureau     VhostUserMsg msg_reply;
6730df750e9SMarc-André Lureau     bool result = false;
6740df750e9SMarc-André Lureau 
6750df750e9SMarc-André Lureau     if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
6760df750e9SMarc-André Lureau         result = true;
6770df750e9SMarc-André Lureau         goto out;
6780df750e9SMarc-André Lureau     }
6790df750e9SMarc-André Lureau 
680f8ed3648SManos Pitsidianakis     if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) {
6810df750e9SMarc-André Lureau         goto out;
6820df750e9SMarc-André Lureau     }
6830df750e9SMarc-André Lureau 
6840df750e9SMarc-André Lureau     if (msg_reply.request != vmsg->request) {
6850df750e9SMarc-André Lureau         DPRINT("Received unexpected msg type. Expected %d received %d",
6860df750e9SMarc-André Lureau                vmsg->request, msg_reply.request);
6870df750e9SMarc-André Lureau         goto out;
6880df750e9SMarc-André Lureau     }
6890df750e9SMarc-André Lureau 
6900df750e9SMarc-André Lureau     result = msg_reply.payload.u64 == 0;
6910df750e9SMarc-André Lureau 
6920df750e9SMarc-André Lureau out:
693f8ed3648SManos Pitsidianakis     pthread_mutex_unlock(&dev->backend_mutex);
6940df750e9SMarc-André Lureau     return result;
6950df750e9SMarc-André Lureau }
6960df750e9SMarc-André Lureau 
6970df750e9SMarc-André Lureau /* Kick the log_call_fd if required. */
6980df750e9SMarc-André Lureau static void
6990df750e9SMarc-André Lureau vu_log_kick(VuDev *dev)
7000df750e9SMarc-André Lureau {
7010df750e9SMarc-André Lureau     if (dev->log_call_fd != -1) {
7020df750e9SMarc-André Lureau         DPRINT("Kicking the QEMU's log...\n");
7030df750e9SMarc-André Lureau         if (eventfd_write(dev->log_call_fd, 1) < 0) {
7040df750e9SMarc-André Lureau             vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
7050df750e9SMarc-André Lureau         }
7060df750e9SMarc-André Lureau     }
7070df750e9SMarc-André Lureau }
7080df750e9SMarc-André Lureau 
7090df750e9SMarc-André Lureau static void
7100df750e9SMarc-André Lureau vu_log_page(uint8_t *log_table, uint64_t page)
7110df750e9SMarc-André Lureau {
7120df750e9SMarc-André Lureau     DPRINT("Logged dirty guest page: %"PRId64"\n", page);
7130df750e9SMarc-André Lureau     qatomic_or(&log_table[page / 8], 1 << (page % 8));
7140df750e9SMarc-André Lureau }
7150df750e9SMarc-André Lureau 
7160df750e9SMarc-André Lureau static void
7170df750e9SMarc-André Lureau vu_log_write(VuDev *dev, uint64_t address, uint64_t length)
7180df750e9SMarc-André Lureau {
7190df750e9SMarc-André Lureau     uint64_t page;
7200df750e9SMarc-André Lureau 
7210df750e9SMarc-André Lureau     if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
7220df750e9SMarc-André Lureau         !dev->log_table || !length) {
7230df750e9SMarc-André Lureau         return;
7240df750e9SMarc-André Lureau     }
7250df750e9SMarc-André Lureau 
7260df750e9SMarc-André Lureau     assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
7270df750e9SMarc-André Lureau 
7280df750e9SMarc-André Lureau     page = address / VHOST_LOG_PAGE;
7290df750e9SMarc-André Lureau     while (page * VHOST_LOG_PAGE < address + length) {
7300df750e9SMarc-André Lureau         vu_log_page(dev->log_table, page);
7310df750e9SMarc-André Lureau         page += 1;
7320df750e9SMarc-André Lureau     }
7330df750e9SMarc-André Lureau 
7340df750e9SMarc-André Lureau     vu_log_kick(dev);
7350df750e9SMarc-André Lureau }
7360df750e9SMarc-André Lureau 
7370df750e9SMarc-André Lureau static void
7380df750e9SMarc-André Lureau vu_kick_cb(VuDev *dev, int condition, void *data)
7390df750e9SMarc-André Lureau {
7400df750e9SMarc-André Lureau     int index = (intptr_t)data;
7410df750e9SMarc-André Lureau     VuVirtq *vq = &dev->vq[index];
7420df750e9SMarc-André Lureau     int sock = vq->kick_fd;
7430df750e9SMarc-André Lureau     eventfd_t kick_data;
7440df750e9SMarc-André Lureau     ssize_t rc;
7450df750e9SMarc-André Lureau 
7460df750e9SMarc-André Lureau     rc = eventfd_read(sock, &kick_data);
7470df750e9SMarc-André Lureau     if (rc == -1) {
7480df750e9SMarc-André Lureau         vu_panic(dev, "kick eventfd_read(): %s", strerror(errno));
7490df750e9SMarc-André Lureau         dev->remove_watch(dev, dev->vq[index].kick_fd);
7500df750e9SMarc-André Lureau     } else {
7510df750e9SMarc-André Lureau         DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n",
7520df750e9SMarc-André Lureau                kick_data, vq->handler, index);
7530df750e9SMarc-André Lureau         if (vq->handler) {
7540df750e9SMarc-André Lureau             vq->handler(dev, index);
7550df750e9SMarc-André Lureau         }
7560df750e9SMarc-André Lureau     }
7570df750e9SMarc-André Lureau }
7580df750e9SMarc-André Lureau 
7590df750e9SMarc-André Lureau static bool
7600df750e9SMarc-André Lureau vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
7610df750e9SMarc-André Lureau {
7620df750e9SMarc-André Lureau     vmsg->payload.u64 =
7630df750e9SMarc-André Lureau         /*
7640df750e9SMarc-André Lureau          * The following VIRTIO feature bits are supported by our virtqueue
7650df750e9SMarc-André Lureau          * implementation:
7660df750e9SMarc-André Lureau          */
7670df750e9SMarc-André Lureau         1ULL << VIRTIO_F_NOTIFY_ON_EMPTY |
7680df750e9SMarc-André Lureau         1ULL << VIRTIO_RING_F_INDIRECT_DESC |
7690df750e9SMarc-André Lureau         1ULL << VIRTIO_RING_F_EVENT_IDX |
7700df750e9SMarc-André Lureau         1ULL << VIRTIO_F_VERSION_1 |
7710df750e9SMarc-André Lureau 
7720df750e9SMarc-André Lureau         /* vhost-user feature bits */
7730df750e9SMarc-André Lureau         1ULL << VHOST_F_LOG_ALL |
7740df750e9SMarc-André Lureau         1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
7750df750e9SMarc-André Lureau 
7760df750e9SMarc-André Lureau     if (dev->iface->get_features) {
7770df750e9SMarc-André Lureau         vmsg->payload.u64 |= dev->iface->get_features(dev);
7780df750e9SMarc-André Lureau     }
7790df750e9SMarc-André Lureau 
7800df750e9SMarc-André Lureau     vmsg->size = sizeof(vmsg->payload.u64);
7810df750e9SMarc-André Lureau     vmsg->fd_num = 0;
7820df750e9SMarc-André Lureau 
7830df750e9SMarc-André Lureau     DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
7840df750e9SMarc-André Lureau 
7850df750e9SMarc-André Lureau     return true;
7860df750e9SMarc-André Lureau }
7870df750e9SMarc-André Lureau 
7880df750e9SMarc-André Lureau static void
7890df750e9SMarc-André Lureau vu_set_enable_all_rings(VuDev *dev, bool enabled)
7900df750e9SMarc-André Lureau {
7910df750e9SMarc-André Lureau     uint16_t i;
7920df750e9SMarc-André Lureau 
7930df750e9SMarc-André Lureau     for (i = 0; i < dev->max_queues; i++) {
7940df750e9SMarc-André Lureau         dev->vq[i].enable = enabled;
7950df750e9SMarc-André Lureau     }
7960df750e9SMarc-André Lureau }
7970df750e9SMarc-André Lureau 
7980df750e9SMarc-André Lureau static bool
7990df750e9SMarc-André Lureau vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg)
8000df750e9SMarc-André Lureau {
8010df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
8020df750e9SMarc-André Lureau 
8030df750e9SMarc-André Lureau     dev->features = vmsg->payload.u64;
8040df750e9SMarc-André Lureau     if (!vu_has_feature(dev, VIRTIO_F_VERSION_1)) {
8050df750e9SMarc-André Lureau         /*
8060df750e9SMarc-André Lureau          * We only support devices conforming to VIRTIO 1.0 or
8070df750e9SMarc-André Lureau          * later
8080df750e9SMarc-André Lureau          */
8090df750e9SMarc-André Lureau         vu_panic(dev, "virtio legacy devices aren't supported by libvhost-user");
8100df750e9SMarc-André Lureau         return false;
8110df750e9SMarc-André Lureau     }
8120df750e9SMarc-André Lureau 
8130df750e9SMarc-André Lureau     if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) {
8140df750e9SMarc-André Lureau         vu_set_enable_all_rings(dev, true);
8150df750e9SMarc-André Lureau     }
8160df750e9SMarc-André Lureau 
8170df750e9SMarc-André Lureau     if (dev->iface->set_features) {
8180df750e9SMarc-André Lureau         dev->iface->set_features(dev, dev->features);
8190df750e9SMarc-André Lureau     }
8200df750e9SMarc-André Lureau 
8210df750e9SMarc-André Lureau     return false;
8220df750e9SMarc-André Lureau }
8230df750e9SMarc-André Lureau 
8240df750e9SMarc-André Lureau static bool
8250df750e9SMarc-André Lureau vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg)
8260df750e9SMarc-André Lureau {
8270df750e9SMarc-André Lureau     return false;
8280df750e9SMarc-André Lureau }
8290df750e9SMarc-André Lureau 
8300df750e9SMarc-André Lureau static void
8310df750e9SMarc-André Lureau vu_close_log(VuDev *dev)
8320df750e9SMarc-André Lureau {
8330df750e9SMarc-André Lureau     if (dev->log_table) {
8340df750e9SMarc-André Lureau         if (munmap(dev->log_table, dev->log_size) != 0) {
8350df750e9SMarc-André Lureau             perror("close log munmap() error");
8360df750e9SMarc-André Lureau         }
8370df750e9SMarc-André Lureau 
8380df750e9SMarc-André Lureau         dev->log_table = NULL;
8390df750e9SMarc-André Lureau     }
8400df750e9SMarc-André Lureau     if (dev->log_call_fd != -1) {
8410df750e9SMarc-André Lureau         close(dev->log_call_fd);
8420df750e9SMarc-André Lureau         dev->log_call_fd = -1;
8430df750e9SMarc-André Lureau     }
8440df750e9SMarc-André Lureau }
8450df750e9SMarc-André Lureau 
8460df750e9SMarc-André Lureau static bool
8470df750e9SMarc-André Lureau vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
8480df750e9SMarc-André Lureau {
8490df750e9SMarc-André Lureau     vu_set_enable_all_rings(dev, false);
8500df750e9SMarc-André Lureau 
8510df750e9SMarc-André Lureau     return false;
8520df750e9SMarc-André Lureau }
8530df750e9SMarc-André Lureau 
8540df750e9SMarc-André Lureau static bool
8550df750e9SMarc-André Lureau generate_faults(VuDev *dev) {
85692bf2461SMarcel Holtmann     unsigned int i;
8570df750e9SMarc-André Lureau     for (i = 0; i < dev->nregions; i++) {
858bb302772SPierre Labatut #ifdef UFFDIO_REGISTER
8590df750e9SMarc-André Lureau         VuDevRegion *dev_region = &dev->regions[i];
8600df750e9SMarc-André Lureau         int ret;
861d87a6424SMarcel Holtmann         struct uffdio_register reg_struct;
862d87a6424SMarcel Holtmann 
8630df750e9SMarc-André Lureau         /*
8640df750e9SMarc-André Lureau          * We should already have an open ufd. Mark each memory
8650df750e9SMarc-André Lureau          * range as ufd.
8660df750e9SMarc-André Lureau          * Discard any mapping we have here; note I can't use MADV_REMOVE
8670df750e9SMarc-André Lureau          * or fallocate to make the hole since I don't want to lose
8680df750e9SMarc-André Lureau          * data that's already arrived in the shared process.
8690df750e9SMarc-André Lureau          * TODO: How to do hugepage
8700df750e9SMarc-André Lureau          */
8710df750e9SMarc-André Lureau         ret = madvise((void *)(uintptr_t)dev_region->mmap_addr,
8720df750e9SMarc-André Lureau                       dev_region->size + dev_region->mmap_offset,
8730df750e9SMarc-André Lureau                       MADV_DONTNEED);
8740df750e9SMarc-André Lureau         if (ret) {
8750df750e9SMarc-André Lureau             fprintf(stderr,
8760df750e9SMarc-André Lureau                     "%s: Failed to madvise(DONTNEED) region %d: %s\n",
8770df750e9SMarc-André Lureau                     __func__, i, strerror(errno));
8780df750e9SMarc-André Lureau         }
8790df750e9SMarc-André Lureau         /*
8800df750e9SMarc-André Lureau          * Turn off transparent hugepages so we dont get lose wakeups
8810df750e9SMarc-André Lureau          * in neighbouring pages.
8820df750e9SMarc-André Lureau          * TODO: Turn this backon later.
8830df750e9SMarc-André Lureau          */
8840df750e9SMarc-André Lureau         ret = madvise((void *)(uintptr_t)dev_region->mmap_addr,
8850df750e9SMarc-André Lureau                       dev_region->size + dev_region->mmap_offset,
8860df750e9SMarc-André Lureau                       MADV_NOHUGEPAGE);
8870df750e9SMarc-André Lureau         if (ret) {
8880df750e9SMarc-André Lureau             /*
8890df750e9SMarc-André Lureau              * Note: This can happen legally on kernels that are configured
8900df750e9SMarc-André Lureau              * without madvise'able hugepages
8910df750e9SMarc-André Lureau              */
8920df750e9SMarc-André Lureau             fprintf(stderr,
8930df750e9SMarc-André Lureau                     "%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n",
8940df750e9SMarc-André Lureau                     __func__, i, strerror(errno));
8950df750e9SMarc-André Lureau         }
896d87a6424SMarcel Holtmann 
8970df750e9SMarc-André Lureau         reg_struct.range.start = (uintptr_t)dev_region->mmap_addr;
8980df750e9SMarc-André Lureau         reg_struct.range.len = dev_region->size + dev_region->mmap_offset;
8990df750e9SMarc-André Lureau         reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
9000df750e9SMarc-André Lureau 
9010df750e9SMarc-André Lureau         if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER, &reg_struct)) {
9020df750e9SMarc-André Lureau             vu_panic(dev, "%s: Failed to userfault region %d "
9037d4774e6SStefan Weil via                           "@%" PRIx64 " + size:%" PRIx64 " offset: %" PRIx64
9047d4774e6SStefan Weil via                           ": (ufd=%d)%s\n",
9050df750e9SMarc-André Lureau                      __func__, i,
9060df750e9SMarc-André Lureau                      dev_region->mmap_addr,
9070df750e9SMarc-André Lureau                      dev_region->size, dev_region->mmap_offset,
9080df750e9SMarc-André Lureau                      dev->postcopy_ufd, strerror(errno));
9090df750e9SMarc-André Lureau             return false;
9100df750e9SMarc-André Lureau         }
91173b49878SPaolo Bonzini         if (!(reg_struct.ioctls & (1ULL << _UFFDIO_COPY))) {
9120df750e9SMarc-André Lureau             vu_panic(dev, "%s Region (%d) doesn't support COPY",
9130df750e9SMarc-André Lureau                      __func__, i);
9140df750e9SMarc-André Lureau             return false;
9150df750e9SMarc-André Lureau         }
9160df750e9SMarc-André Lureau         DPRINT("%s: region %d: Registered userfault for %"
9170df750e9SMarc-André Lureau                PRIx64 " + %" PRIx64 "\n", __func__, i,
9180df750e9SMarc-André Lureau                (uint64_t)reg_struct.range.start,
9190df750e9SMarc-André Lureau                (uint64_t)reg_struct.range.len);
9200df750e9SMarc-André Lureau         /* Now it's registered we can let the client at it */
9210df750e9SMarc-André Lureau         if (mprotect((void *)(uintptr_t)dev_region->mmap_addr,
9220df750e9SMarc-André Lureau                      dev_region->size + dev_region->mmap_offset,
9230df750e9SMarc-André Lureau                      PROT_READ | PROT_WRITE)) {
9240df750e9SMarc-André Lureau             vu_panic(dev, "failed to mprotect region %d for postcopy (%s)",
9250df750e9SMarc-André Lureau                      i, strerror(errno));
9260df750e9SMarc-André Lureau             return false;
9270df750e9SMarc-André Lureau         }
9280df750e9SMarc-André Lureau         /* TODO: Stash 'zero' support flags somewhere */
9290df750e9SMarc-André Lureau #endif
9300df750e9SMarc-André Lureau     }
9310df750e9SMarc-André Lureau 
9320df750e9SMarc-André Lureau     return true;
9330df750e9SMarc-André Lureau }
9340df750e9SMarc-André Lureau 
9350df750e9SMarc-André Lureau static bool
9360df750e9SMarc-André Lureau vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
9370df750e9SMarc-André Lureau     VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m;
9380df750e9SMarc-André Lureau 
9399f4e6349SRaphael Norwitz     if (vmsg->fd_num != 1) {
9409f4e6349SRaphael Norwitz         vmsg_close_fds(vmsg);
9419f4e6349SRaphael Norwitz         vu_panic(dev, "VHOST_USER_ADD_MEM_REG received %d fds - only 1 fd "
9429f4e6349SRaphael Norwitz                       "should be sent for this message type", vmsg->fd_num);
9439f4e6349SRaphael Norwitz         return false;
9449f4e6349SRaphael Norwitz     }
9459f4e6349SRaphael Norwitz 
9469f4e6349SRaphael Norwitz     if (vmsg->size < VHOST_USER_MEM_REG_SIZE) {
9479f4e6349SRaphael Norwitz         close(vmsg->fds[0]);
9489f4e6349SRaphael Norwitz         vu_panic(dev, "VHOST_USER_ADD_MEM_REG requires a message size of at "
9498541bf45SStefan Weil via                       "least %zu bytes and only %d bytes were received",
9509f4e6349SRaphael Norwitz                       VHOST_USER_MEM_REG_SIZE, vmsg->size);
9519f4e6349SRaphael Norwitz         return false;
9529f4e6349SRaphael Norwitz     }
9539f4e6349SRaphael Norwitz 
954b906a23cSRaphael Norwitz     if (dev->nregions == VHOST_USER_MAX_RAM_SLOTS) {
955b906a23cSRaphael Norwitz         close(vmsg->fds[0]);
956b906a23cSRaphael Norwitz         vu_panic(dev, "failing attempt to hot add memory via "
957b906a23cSRaphael Norwitz                       "VHOST_USER_ADD_MEM_REG message because the backend has "
958b906a23cSRaphael Norwitz                       "no free ram slots available");
959b906a23cSRaphael Norwitz         return false;
960b906a23cSRaphael Norwitz     }
961b906a23cSRaphael Norwitz 
9620df750e9SMarc-André Lureau     /*
9630df750e9SMarc-André Lureau      * If we are in postcopy mode and we receive a u64 payload with a 0 value
9640df750e9SMarc-André Lureau      * we know all the postcopy client bases have been received, and we
9650df750e9SMarc-André Lureau      * should start generating faults.
9660df750e9SMarc-André Lureau      */
96793fec23dSDavid Hildenbrand     if (dev->postcopy_listening &&
9680df750e9SMarc-André Lureau         vmsg->size == sizeof(vmsg->payload.u64) &&
9690df750e9SMarc-André Lureau         vmsg->payload.u64 == 0) {
9700df750e9SMarc-André Lureau         (void)generate_faults(dev);
9710df750e9SMarc-André Lureau         return false;
9720df750e9SMarc-André Lureau     }
9730df750e9SMarc-André Lureau 
97493fec23dSDavid Hildenbrand     _vu_add_mem_reg(dev, msg_region, vmsg->fds[0]);
9750df750e9SMarc-André Lureau     close(vmsg->fds[0]);
9760df750e9SMarc-André Lureau 
97793fec23dSDavid Hildenbrand     if (dev->postcopy_listening) {
9780df750e9SMarc-André Lureau         /* Send the message back to qemu with the addresses filled in. */
9790df750e9SMarc-André Lureau         vmsg->fd_num = 0;
9800df750e9SMarc-André Lureau         DPRINT("Successfully added new region in postcopy\n");
9817f27d20dSKevin Wolf         return true;
9820df750e9SMarc-André Lureau     }
9830df750e9SMarc-André Lureau     DPRINT("Successfully added new region\n");
9845ebfdeb2SKevin Wolf     return false;
9850df750e9SMarc-André Lureau }
9860df750e9SMarc-André Lureau 
9870df750e9SMarc-André Lureau static inline bool reg_equal(VuDevRegion *vudev_reg,
9880df750e9SMarc-André Lureau                              VhostUserMemoryRegion *msg_reg)
9890df750e9SMarc-André Lureau {
9900df750e9SMarc-André Lureau     if (vudev_reg->gpa == msg_reg->guest_phys_addr &&
9910df750e9SMarc-André Lureau         vudev_reg->qva == msg_reg->userspace_addr &&
9920df750e9SMarc-André Lureau         vudev_reg->size == msg_reg->memory_size) {
9930df750e9SMarc-André Lureau         return true;
9940df750e9SMarc-André Lureau     }
9950df750e9SMarc-André Lureau 
9960df750e9SMarc-André Lureau     return false;
9970df750e9SMarc-André Lureau }
9980df750e9SMarc-André Lureau 
9990df750e9SMarc-André Lureau static bool
10000df750e9SMarc-André Lureau vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
10010df750e9SMarc-André Lureau     VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m;
100260ccdca4SDavid Hildenbrand     unsigned int idx;
100360ccdca4SDavid Hildenbrand     VuDevRegion *r;
10040df750e9SMarc-André Lureau 
1005a81d8d4aSKevin Wolf     if (vmsg->fd_num > 1) {
1006316ee111SRaphael Norwitz         vmsg_close_fds(vmsg);
1007a81d8d4aSKevin Wolf         vu_panic(dev, "VHOST_USER_REM_MEM_REG received %d fds - at most 1 fd "
1008316ee111SRaphael Norwitz                       "should be sent for this message type", vmsg->fd_num);
1009316ee111SRaphael Norwitz         return false;
1010316ee111SRaphael Norwitz     }
1011316ee111SRaphael Norwitz 
1012316ee111SRaphael Norwitz     if (vmsg->size < VHOST_USER_MEM_REG_SIZE) {
1013a81d8d4aSKevin Wolf         vmsg_close_fds(vmsg);
1014316ee111SRaphael Norwitz         vu_panic(dev, "VHOST_USER_REM_MEM_REG requires a message size of at "
10158541bf45SStefan Weil via                       "least %zu bytes and only %d bytes were received",
1016316ee111SRaphael Norwitz                       VHOST_USER_MEM_REG_SIZE, vmsg->size);
1017316ee111SRaphael Norwitz         return false;
1018316ee111SRaphael Norwitz     }
1019316ee111SRaphael Norwitz 
10200df750e9SMarc-André Lureau     DPRINT("Removing region:\n");
10210df750e9SMarc-André Lureau     DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
10220df750e9SMarc-André Lureau            msg_region->guest_phys_addr);
10230df750e9SMarc-André Lureau     DPRINT("    memory_size:     0x%016"PRIx64"\n",
10240df750e9SMarc-André Lureau            msg_region->memory_size);
10250df750e9SMarc-André Lureau     DPRINT("    userspace_addr   0x%016"PRIx64"\n",
10260df750e9SMarc-André Lureau            msg_region->userspace_addr);
10270df750e9SMarc-André Lureau     DPRINT("    mmap_offset      0x%016"PRIx64"\n",
10280df750e9SMarc-André Lureau            msg_region->mmap_offset);
10290df750e9SMarc-André Lureau 
103060ccdca4SDavid Hildenbrand     r = vu_gpa_to_mem_region(dev, msg_region->guest_phys_addr);
103160ccdca4SDavid Hildenbrand     if (!r || !reg_equal(r, msg_region)) {
103260ccdca4SDavid Hildenbrand         vmsg_close_fds(vmsg);
103360ccdca4SDavid Hildenbrand         vu_panic(dev, "Specified region not found\n");
103460ccdca4SDavid Hildenbrand         return false;
103560ccdca4SDavid Hildenbrand     }
10360df750e9SMarc-André Lureau 
103767f4f663SDavid Hildenbrand     /*
103867f4f663SDavid Hildenbrand      * There might be valid cases where we temporarily remove memory regions
103967f4f663SDavid Hildenbrand      * to readd them again, or remove memory regions and don't use the rings
104067f4f663SDavid Hildenbrand      * anymore before we set the ring addresses and restart the device.
104167f4f663SDavid Hildenbrand      *
104267f4f663SDavid Hildenbrand      * Unmap all affected rings, remapping them on demand later. This should
104367f4f663SDavid Hildenbrand      * be a corner case.
104467f4f663SDavid Hildenbrand      */
104567f4f663SDavid Hildenbrand     unmap_rings(dev, r);
104667f4f663SDavid Hildenbrand 
10474f865c3bSDavid Hildenbrand     munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
10484fd5ca82SDavid Hildenbrand 
104960ccdca4SDavid Hildenbrand     idx = r - dev->regions;
105060ccdca4SDavid Hildenbrand     assert(idx < dev->nregions);
105160ccdca4SDavid Hildenbrand     /* Shift all affected entries by 1 to close the hole. */
105260ccdca4SDavid Hildenbrand     memmove(r, r + 1, sizeof(VuDevRegion) * (dev->nregions - idx - 1));
10530df750e9SMarc-André Lureau     DPRINT("Successfully removed a region\n");
10540df750e9SMarc-André Lureau     dev->nregions--;
10550df750e9SMarc-André Lureau 
1056a81d8d4aSKevin Wolf     vmsg_close_fds(vmsg);
1057fa3d5483SDavid Hildenbrand 
10585ebfdeb2SKevin Wolf     return false;
10590df750e9SMarc-André Lureau }
10600df750e9SMarc-André Lureau 
10610df750e9SMarc-André Lureau static bool
1062ce0f3b03SAlbert Esteve vu_get_shared_object(VuDev *dev, VhostUserMsg *vmsg)
1063ce0f3b03SAlbert Esteve {
1064ce0f3b03SAlbert Esteve     int fd_num = 0;
1065ce0f3b03SAlbert Esteve     int dmabuf_fd = -1;
1066ce0f3b03SAlbert Esteve     if (dev->iface->get_shared_object) {
1067ce0f3b03SAlbert Esteve         dmabuf_fd = dev->iface->get_shared_object(
1068ce0f3b03SAlbert Esteve             dev, &vmsg->payload.object.uuid[0]);
1069ce0f3b03SAlbert Esteve     }
1070ce0f3b03SAlbert Esteve     if (dmabuf_fd != -1) {
1071ce0f3b03SAlbert Esteve         DPRINT("dmabuf_fd found for requested UUID\n");
1072ce0f3b03SAlbert Esteve         vmsg->fds[fd_num++] = dmabuf_fd;
1073ce0f3b03SAlbert Esteve     }
1074ce0f3b03SAlbert Esteve     vmsg->fd_num = fd_num;
1075ce0f3b03SAlbert Esteve 
1076ce0f3b03SAlbert Esteve     return true;
1077ce0f3b03SAlbert Esteve }
1078ce0f3b03SAlbert Esteve 
1079ce0f3b03SAlbert Esteve static bool
108005a58ce4SDavid Hildenbrand vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
10810df750e9SMarc-André Lureau {
10820df750e9SMarc-André Lureau     VhostUserMemory m = vmsg->payload.memory, *memory = &m;
108305a58ce4SDavid Hildenbrand     unsigned int i;
10840df750e9SMarc-André Lureau 
1085bec58209SDavid Hildenbrand     vu_remove_all_mem_regs(dev);
10860df750e9SMarc-André Lureau 
10870df750e9SMarc-André Lureau     DPRINT("Nregions: %u\n", memory->nregions);
108893fec23dSDavid Hildenbrand     for (i = 0; i < memory->nregions; i++) {
108993fec23dSDavid Hildenbrand         _vu_add_mem_reg(dev, &memory->regions[i], vmsg->fds[i]);
10900df750e9SMarc-André Lureau         close(vmsg->fds[i]);
10910df750e9SMarc-André Lureau     }
10920df750e9SMarc-André Lureau 
109305a58ce4SDavid Hildenbrand     if (dev->postcopy_listening) {
109405a58ce4SDavid Hildenbrand         /* Send the message back to qemu with the addresses filled in */
109505a58ce4SDavid Hildenbrand         vmsg->fd_num = 0;
109605a58ce4SDavid Hildenbrand         if (!vu_send_reply(dev, dev->sock, vmsg)) {
109705a58ce4SDavid Hildenbrand             vu_panic(dev, "failed to respond to set-mem-table for postcopy");
109805a58ce4SDavid Hildenbrand             return false;
109905a58ce4SDavid Hildenbrand         }
110005a58ce4SDavid Hildenbrand 
110105a58ce4SDavid Hildenbrand         /*
110205a58ce4SDavid Hildenbrand          * Wait for QEMU to confirm that it's registered the handler for the
110305a58ce4SDavid Hildenbrand          * faults.
110405a58ce4SDavid Hildenbrand          */
110505a58ce4SDavid Hildenbrand         if (!dev->read_msg(dev, dev->sock, vmsg) ||
110605a58ce4SDavid Hildenbrand             vmsg->size != sizeof(vmsg->payload.u64) ||
110705a58ce4SDavid Hildenbrand             vmsg->payload.u64 != 0) {
110805a58ce4SDavid Hildenbrand             vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table");
110905a58ce4SDavid Hildenbrand             return false;
111005a58ce4SDavid Hildenbrand         }
111105a58ce4SDavid Hildenbrand 
111205a58ce4SDavid Hildenbrand         /* OK, now we can go and register the memory and generate faults */
111305a58ce4SDavid Hildenbrand         (void)generate_faults(dev);
111405a58ce4SDavid Hildenbrand         return false;
111505a58ce4SDavid Hildenbrand     }
111605a58ce4SDavid Hildenbrand 
11170df750e9SMarc-André Lureau     for (i = 0; i < dev->max_queues; i++) {
11180df750e9SMarc-André Lureau         if (dev->vq[i].vring.desc) {
11190df750e9SMarc-André Lureau             if (map_ring(dev, &dev->vq[i])) {
11200df750e9SMarc-André Lureau                 vu_panic(dev, "remapping queue %d during setmemtable", i);
11210df750e9SMarc-André Lureau             }
11220df750e9SMarc-André Lureau         }
11230df750e9SMarc-André Lureau     }
11240df750e9SMarc-André Lureau 
11250df750e9SMarc-André Lureau     return false;
11260df750e9SMarc-André Lureau }
11270df750e9SMarc-André Lureau 
11280df750e9SMarc-André Lureau static bool
11290df750e9SMarc-André Lureau vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
11300df750e9SMarc-André Lureau {
11310df750e9SMarc-André Lureau     int fd;
11320df750e9SMarc-André Lureau     uint64_t log_mmap_size, log_mmap_offset;
11330df750e9SMarc-André Lureau     void *rc;
11340df750e9SMarc-André Lureau 
11350df750e9SMarc-André Lureau     if (vmsg->fd_num != 1 ||
11360df750e9SMarc-André Lureau         vmsg->size != sizeof(vmsg->payload.log)) {
11370df750e9SMarc-André Lureau         vu_panic(dev, "Invalid log_base message");
11380df750e9SMarc-André Lureau         return true;
11390df750e9SMarc-André Lureau     }
11400df750e9SMarc-André Lureau 
11410df750e9SMarc-André Lureau     fd = vmsg->fds[0];
11420df750e9SMarc-André Lureau     log_mmap_offset = vmsg->payload.log.mmap_offset;
11430df750e9SMarc-André Lureau     log_mmap_size = vmsg->payload.log.mmap_size;
11440df750e9SMarc-André Lureau     DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
11450df750e9SMarc-André Lureau     DPRINT("Log mmap_size:   %"PRId64"\n", log_mmap_size);
11460df750e9SMarc-André Lureau 
11470df750e9SMarc-André Lureau     rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
11480df750e9SMarc-André Lureau               log_mmap_offset);
11490df750e9SMarc-André Lureau     close(fd);
11500df750e9SMarc-André Lureau     if (rc == MAP_FAILED) {
11510df750e9SMarc-André Lureau         perror("log mmap error");
11520df750e9SMarc-André Lureau     }
11530df750e9SMarc-André Lureau 
11540df750e9SMarc-André Lureau     if (dev->log_table) {
11550df750e9SMarc-André Lureau         munmap(dev->log_table, dev->log_size);
11560df750e9SMarc-André Lureau     }
11570df750e9SMarc-André Lureau     dev->log_table = rc;
11580df750e9SMarc-André Lureau     dev->log_size = log_mmap_size;
11590df750e9SMarc-André Lureau 
11600df750e9SMarc-André Lureau     vmsg->size = sizeof(vmsg->payload.u64);
11610df750e9SMarc-André Lureau     vmsg->fd_num = 0;
11620df750e9SMarc-André Lureau 
11630df750e9SMarc-André Lureau     return true;
11640df750e9SMarc-André Lureau }
11650df750e9SMarc-André Lureau 
11660df750e9SMarc-André Lureau static bool
11670df750e9SMarc-André Lureau vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg)
11680df750e9SMarc-André Lureau {
11690df750e9SMarc-André Lureau     if (vmsg->fd_num != 1) {
11700df750e9SMarc-André Lureau         vu_panic(dev, "Invalid log_fd message");
11710df750e9SMarc-André Lureau         return false;
11720df750e9SMarc-André Lureau     }
11730df750e9SMarc-André Lureau 
11740df750e9SMarc-André Lureau     if (dev->log_call_fd != -1) {
11750df750e9SMarc-André Lureau         close(dev->log_call_fd);
11760df750e9SMarc-André Lureau     }
11770df750e9SMarc-André Lureau     dev->log_call_fd = vmsg->fds[0];
11780df750e9SMarc-André Lureau     DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
11790df750e9SMarc-André Lureau 
11800df750e9SMarc-André Lureau     return false;
11810df750e9SMarc-André Lureau }
11820df750e9SMarc-André Lureau 
11830df750e9SMarc-André Lureau static bool
11840df750e9SMarc-André Lureau vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg)
11850df750e9SMarc-André Lureau {
11860df750e9SMarc-André Lureau     unsigned int index = vmsg->payload.state.index;
11870df750e9SMarc-André Lureau     unsigned int num = vmsg->payload.state.num;
11880df750e9SMarc-André Lureau 
11890df750e9SMarc-André Lureau     DPRINT("State.index: %u\n", index);
11900df750e9SMarc-André Lureau     DPRINT("State.num:   %u\n", num);
11910df750e9SMarc-André Lureau     dev->vq[index].vring.num = num;
11920df750e9SMarc-André Lureau 
11930df750e9SMarc-André Lureau     return false;
11940df750e9SMarc-André Lureau }
11950df750e9SMarc-André Lureau 
11960df750e9SMarc-André Lureau static bool
11970df750e9SMarc-André Lureau vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
11980df750e9SMarc-André Lureau {
11990df750e9SMarc-André Lureau     struct vhost_vring_addr addr = vmsg->payload.addr, *vra = &addr;
12000df750e9SMarc-André Lureau     unsigned int index = vra->index;
12010df750e9SMarc-André Lureau     VuVirtq *vq = &dev->vq[index];
12020df750e9SMarc-André Lureau 
12030df750e9SMarc-André Lureau     DPRINT("vhost_vring_addr:\n");
12040df750e9SMarc-André Lureau     DPRINT("    index:  %d\n", vra->index);
12050df750e9SMarc-André Lureau     DPRINT("    flags:  %d\n", vra->flags);
12064fe29344SMarc-André Lureau     DPRINT("    desc_user_addr:   0x%016" PRIx64 "\n", (uint64_t)vra->desc_user_addr);
12074fe29344SMarc-André Lureau     DPRINT("    used_user_addr:   0x%016" PRIx64 "\n", (uint64_t)vra->used_user_addr);
12084fe29344SMarc-André Lureau     DPRINT("    avail_user_addr:  0x%016" PRIx64 "\n", (uint64_t)vra->avail_user_addr);
12094fe29344SMarc-André Lureau     DPRINT("    log_guest_addr:   0x%016" PRIx64 "\n", (uint64_t)vra->log_guest_addr);
12100df750e9SMarc-André Lureau 
12110df750e9SMarc-André Lureau     vq->vra = *vra;
12120df750e9SMarc-André Lureau     vq->vring.flags = vra->flags;
12130df750e9SMarc-André Lureau     vq->vring.log_guest_addr = vra->log_guest_addr;
12140df750e9SMarc-André Lureau 
12150df750e9SMarc-André Lureau 
12160df750e9SMarc-André Lureau     if (map_ring(dev, vq)) {
12170df750e9SMarc-André Lureau         vu_panic(dev, "Invalid vring_addr message");
12180df750e9SMarc-André Lureau         return false;
12190df750e9SMarc-André Lureau     }
12200df750e9SMarc-André Lureau 
12210df750e9SMarc-André Lureau     vq->used_idx = le16toh(vq->vring.used->idx);
12220df750e9SMarc-André Lureau 
12230df750e9SMarc-André Lureau     if (vq->last_avail_idx != vq->used_idx) {
12240df750e9SMarc-André Lureau         bool resume = dev->iface->queue_is_processed_in_order &&
12250df750e9SMarc-André Lureau             dev->iface->queue_is_processed_in_order(dev, index);
12260df750e9SMarc-André Lureau 
12270df750e9SMarc-André Lureau         DPRINT("Last avail index != used index: %u != %u%s\n",
12280df750e9SMarc-André Lureau                vq->last_avail_idx, vq->used_idx,
12290df750e9SMarc-André Lureau                resume ? ", resuming" : "");
12300df750e9SMarc-André Lureau 
12310df750e9SMarc-André Lureau         if (resume) {
12320df750e9SMarc-André Lureau             vq->shadow_avail_idx = vq->last_avail_idx = vq->used_idx;
12330df750e9SMarc-André Lureau         }
12340df750e9SMarc-André Lureau     }
12350df750e9SMarc-André Lureau 
12360df750e9SMarc-André Lureau     return false;
12370df750e9SMarc-André Lureau }
12380df750e9SMarc-André Lureau 
12390df750e9SMarc-André Lureau static bool
12400df750e9SMarc-André Lureau vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
12410df750e9SMarc-André Lureau {
12420df750e9SMarc-André Lureau     unsigned int index = vmsg->payload.state.index;
12430df750e9SMarc-André Lureau     unsigned int num = vmsg->payload.state.num;
12440df750e9SMarc-André Lureau 
12450df750e9SMarc-André Lureau     DPRINT("State.index: %u\n", index);
12460df750e9SMarc-André Lureau     DPRINT("State.num:   %u\n", num);
12470df750e9SMarc-André Lureau     dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num;
12480df750e9SMarc-André Lureau 
12490df750e9SMarc-André Lureau     return false;
12500df750e9SMarc-André Lureau }
12510df750e9SMarc-André Lureau 
12520df750e9SMarc-André Lureau static bool
12530df750e9SMarc-André Lureau vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
12540df750e9SMarc-André Lureau {
12550df750e9SMarc-André Lureau     unsigned int index = vmsg->payload.state.index;
12560df750e9SMarc-André Lureau 
12570df750e9SMarc-André Lureau     DPRINT("State.index: %u\n", index);
12580df750e9SMarc-André Lureau     vmsg->payload.state.num = dev->vq[index].last_avail_idx;
12590df750e9SMarc-André Lureau     vmsg->size = sizeof(vmsg->payload.state);
12600df750e9SMarc-André Lureau 
12610df750e9SMarc-André Lureau     dev->vq[index].started = false;
12620df750e9SMarc-André Lureau     if (dev->iface->queue_set_started) {
12630df750e9SMarc-André Lureau         dev->iface->queue_set_started(dev, index, false);
12640df750e9SMarc-André Lureau     }
12650df750e9SMarc-André Lureau 
12660df750e9SMarc-André Lureau     if (dev->vq[index].call_fd != -1) {
12670df750e9SMarc-André Lureau         close(dev->vq[index].call_fd);
12680df750e9SMarc-André Lureau         dev->vq[index].call_fd = -1;
12690df750e9SMarc-André Lureau     }
12700df750e9SMarc-André Lureau     if (dev->vq[index].kick_fd != -1) {
12710df750e9SMarc-André Lureau         dev->remove_watch(dev, dev->vq[index].kick_fd);
12720df750e9SMarc-André Lureau         close(dev->vq[index].kick_fd);
12730df750e9SMarc-André Lureau         dev->vq[index].kick_fd = -1;
12740df750e9SMarc-André Lureau     }
12750df750e9SMarc-André Lureau 
12760df750e9SMarc-André Lureau     return true;
12770df750e9SMarc-André Lureau }
12780df750e9SMarc-André Lureau 
12790df750e9SMarc-André Lureau static bool
12800df750e9SMarc-André Lureau vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
12810df750e9SMarc-André Lureau {
12820df750e9SMarc-André Lureau     int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
12830df750e9SMarc-André Lureau     bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
12840df750e9SMarc-André Lureau 
12850df750e9SMarc-André Lureau     if (index >= dev->max_queues) {
12860df750e9SMarc-André Lureau         vmsg_close_fds(vmsg);
12870df750e9SMarc-André Lureau         vu_panic(dev, "Invalid queue index: %u", index);
12880df750e9SMarc-André Lureau         return false;
12890df750e9SMarc-André Lureau     }
12900df750e9SMarc-André Lureau 
12910df750e9SMarc-André Lureau     if (nofd) {
12920df750e9SMarc-André Lureau         vmsg_close_fds(vmsg);
12930df750e9SMarc-André Lureau         return true;
12940df750e9SMarc-André Lureau     }
12950df750e9SMarc-André Lureau 
12960df750e9SMarc-André Lureau     if (vmsg->fd_num != 1) {
12970df750e9SMarc-André Lureau         vmsg_close_fds(vmsg);
12980df750e9SMarc-André Lureau         vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
12990df750e9SMarc-André Lureau         return false;
13000df750e9SMarc-André Lureau     }
13010df750e9SMarc-André Lureau 
13020df750e9SMarc-André Lureau     return true;
13030df750e9SMarc-André Lureau }
13040df750e9SMarc-André Lureau 
13050df750e9SMarc-André Lureau static int
13060df750e9SMarc-André Lureau inflight_desc_compare(const void *a, const void *b)
13070df750e9SMarc-André Lureau {
13080df750e9SMarc-André Lureau     VuVirtqInflightDesc *desc0 = (VuVirtqInflightDesc *)a,
13090df750e9SMarc-André Lureau                         *desc1 = (VuVirtqInflightDesc *)b;
13100df750e9SMarc-André Lureau 
13110df750e9SMarc-André Lureau     if (desc1->counter > desc0->counter &&
13120df750e9SMarc-André Lureau         (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) {
13130df750e9SMarc-André Lureau         return 1;
13140df750e9SMarc-André Lureau     }
13150df750e9SMarc-André Lureau 
13160df750e9SMarc-André Lureau     return -1;
13170df750e9SMarc-André Lureau }
13180df750e9SMarc-André Lureau 
13190df750e9SMarc-André Lureau static int
13200df750e9SMarc-André Lureau vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
13210df750e9SMarc-André Lureau {
13220df750e9SMarc-André Lureau     int i = 0;
13230df750e9SMarc-André Lureau 
13240df750e9SMarc-André Lureau     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
13250df750e9SMarc-André Lureau         return 0;
13260df750e9SMarc-André Lureau     }
13270df750e9SMarc-André Lureau 
13280df750e9SMarc-André Lureau     if (unlikely(!vq->inflight)) {
13290df750e9SMarc-André Lureau         return -1;
13300df750e9SMarc-André Lureau     }
13310df750e9SMarc-André Lureau 
13320df750e9SMarc-André Lureau     if (unlikely(!vq->inflight->version)) {
13330df750e9SMarc-André Lureau         /* initialize the buffer */
13340df750e9SMarc-André Lureau         vq->inflight->version = INFLIGHT_VERSION;
13350df750e9SMarc-André Lureau         return 0;
13360df750e9SMarc-André Lureau     }
13370df750e9SMarc-André Lureau 
13380df750e9SMarc-André Lureau     vq->used_idx = le16toh(vq->vring.used->idx);
13390df750e9SMarc-André Lureau     vq->resubmit_num = 0;
13400df750e9SMarc-André Lureau     vq->resubmit_list = NULL;
13410df750e9SMarc-André Lureau     vq->counter = 0;
13420df750e9SMarc-André Lureau 
13430df750e9SMarc-André Lureau     if (unlikely(vq->inflight->used_idx != vq->used_idx)) {
13440df750e9SMarc-André Lureau         vq->inflight->desc[vq->inflight->last_batch_head].inflight = 0;
13450df750e9SMarc-André Lureau 
13460df750e9SMarc-André Lureau         barrier();
13470df750e9SMarc-André Lureau 
13480df750e9SMarc-André Lureau         vq->inflight->used_idx = vq->used_idx;
13490df750e9SMarc-André Lureau     }
13500df750e9SMarc-André Lureau 
13510df750e9SMarc-André Lureau     for (i = 0; i < vq->inflight->desc_num; i++) {
13520df750e9SMarc-André Lureau         if (vq->inflight->desc[i].inflight == 1) {
13530df750e9SMarc-André Lureau             vq->inuse++;
13540df750e9SMarc-André Lureau         }
13550df750e9SMarc-André Lureau     }
13560df750e9SMarc-André Lureau 
13570df750e9SMarc-André Lureau     vq->shadow_avail_idx = vq->last_avail_idx = vq->inuse + vq->used_idx;
13580df750e9SMarc-André Lureau 
13590df750e9SMarc-André Lureau     if (vq->inuse) {
13600df750e9SMarc-André Lureau         vq->resubmit_list = calloc(vq->inuse, sizeof(VuVirtqInflightDesc));
13610df750e9SMarc-André Lureau         if (!vq->resubmit_list) {
13620df750e9SMarc-André Lureau             return -1;
13630df750e9SMarc-André Lureau         }
13640df750e9SMarc-André Lureau 
13650df750e9SMarc-André Lureau         for (i = 0; i < vq->inflight->desc_num; i++) {
13660df750e9SMarc-André Lureau             if (vq->inflight->desc[i].inflight) {
13670df750e9SMarc-André Lureau                 vq->resubmit_list[vq->resubmit_num].index = i;
13680df750e9SMarc-André Lureau                 vq->resubmit_list[vq->resubmit_num].counter =
13690df750e9SMarc-André Lureau                                         vq->inflight->desc[i].counter;
13700df750e9SMarc-André Lureau                 vq->resubmit_num++;
13710df750e9SMarc-André Lureau             }
13720df750e9SMarc-André Lureau         }
13730df750e9SMarc-André Lureau 
13740df750e9SMarc-André Lureau         if (vq->resubmit_num > 1) {
13750df750e9SMarc-André Lureau             qsort(vq->resubmit_list, vq->resubmit_num,
13760df750e9SMarc-André Lureau                   sizeof(VuVirtqInflightDesc), inflight_desc_compare);
13770df750e9SMarc-André Lureau         }
13780df750e9SMarc-André Lureau         vq->counter = vq->resubmit_list[0].counter + 1;
13790df750e9SMarc-André Lureau     }
13800df750e9SMarc-André Lureau 
13810df750e9SMarc-André Lureau     /* in case of I/O hang after reconnecting */
13820df750e9SMarc-André Lureau     if (eventfd_write(vq->kick_fd, 1)) {
13830df750e9SMarc-André Lureau         return -1;
13840df750e9SMarc-André Lureau     }
13850df750e9SMarc-André Lureau 
13860df750e9SMarc-André Lureau     return 0;
13870df750e9SMarc-André Lureau }
13880df750e9SMarc-André Lureau 
13890df750e9SMarc-André Lureau static bool
13900df750e9SMarc-André Lureau vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
13910df750e9SMarc-André Lureau {
13920df750e9SMarc-André Lureau     int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
13930df750e9SMarc-André Lureau     bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
13940df750e9SMarc-André Lureau 
13950df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
13960df750e9SMarc-André Lureau 
13970df750e9SMarc-André Lureau     if (!vu_check_queue_msg_file(dev, vmsg)) {
13980df750e9SMarc-André Lureau         return false;
13990df750e9SMarc-André Lureau     }
14000df750e9SMarc-André Lureau 
14010df750e9SMarc-André Lureau     if (dev->vq[index].kick_fd != -1) {
14020df750e9SMarc-André Lureau         dev->remove_watch(dev, dev->vq[index].kick_fd);
14030df750e9SMarc-André Lureau         close(dev->vq[index].kick_fd);
14040df750e9SMarc-André Lureau         dev->vq[index].kick_fd = -1;
14050df750e9SMarc-André Lureau     }
14060df750e9SMarc-André Lureau 
14070df750e9SMarc-André Lureau     dev->vq[index].kick_fd = nofd ? -1 : vmsg->fds[0];
14080df750e9SMarc-André Lureau     DPRINT("Got kick_fd: %d for vq: %d\n", dev->vq[index].kick_fd, index);
14090df750e9SMarc-André Lureau 
14100df750e9SMarc-André Lureau     dev->vq[index].started = true;
14110df750e9SMarc-André Lureau     if (dev->iface->queue_set_started) {
14120df750e9SMarc-André Lureau         dev->iface->queue_set_started(dev, index, true);
14130df750e9SMarc-André Lureau     }
14140df750e9SMarc-André Lureau 
14150df750e9SMarc-André Lureau     if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) {
14160df750e9SMarc-André Lureau         dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN,
14170df750e9SMarc-André Lureau                        vu_kick_cb, (void *)(long)index);
14180df750e9SMarc-André Lureau 
14190df750e9SMarc-André Lureau         DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
14200df750e9SMarc-André Lureau                dev->vq[index].kick_fd, index);
14210df750e9SMarc-André Lureau     }
14220df750e9SMarc-André Lureau 
14230df750e9SMarc-André Lureau     if (vu_check_queue_inflights(dev, &dev->vq[index])) {
14240df750e9SMarc-André Lureau         vu_panic(dev, "Failed to check inflights for vq: %d\n", index);
14250df750e9SMarc-André Lureau     }
14260df750e9SMarc-André Lureau 
14270df750e9SMarc-André Lureau     return false;
14280df750e9SMarc-André Lureau }
14290df750e9SMarc-André Lureau 
14300df750e9SMarc-André Lureau void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
14310df750e9SMarc-André Lureau                           vu_queue_handler_cb handler)
14320df750e9SMarc-André Lureau {
14330df750e9SMarc-André Lureau     int qidx = vq - dev->vq;
14340df750e9SMarc-André Lureau 
14350df750e9SMarc-André Lureau     vq->handler = handler;
14360df750e9SMarc-André Lureau     if (vq->kick_fd >= 0) {
14370df750e9SMarc-André Lureau         if (handler) {
14380df750e9SMarc-André Lureau             dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN,
14390df750e9SMarc-André Lureau                            vu_kick_cb, (void *)(long)qidx);
14400df750e9SMarc-André Lureau         } else {
14410df750e9SMarc-André Lureau             dev->remove_watch(dev, vq->kick_fd);
14420df750e9SMarc-André Lureau         }
14430df750e9SMarc-André Lureau     }
14440df750e9SMarc-André Lureau }
14450df750e9SMarc-André Lureau 
14460df750e9SMarc-André Lureau bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
14470df750e9SMarc-André Lureau                                 int size, int offset)
14480df750e9SMarc-André Lureau {
14490df750e9SMarc-André Lureau     int qidx = vq - dev->vq;
14500df750e9SMarc-André Lureau     int fd_num = 0;
14510df750e9SMarc-André Lureau     VhostUserMsg vmsg = {
1452e608feedSMaxime Coquelin         .request = VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG,
14530df750e9SMarc-André Lureau         .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
14540df750e9SMarc-André Lureau         .size = sizeof(vmsg.payload.area),
14550df750e9SMarc-André Lureau         .payload.area = {
14560df750e9SMarc-André Lureau             .u64 = qidx & VHOST_USER_VRING_IDX_MASK,
14570df750e9SMarc-André Lureau             .size = size,
14580df750e9SMarc-André Lureau             .offset = offset,
14590df750e9SMarc-André Lureau         },
14600df750e9SMarc-André Lureau     };
14610df750e9SMarc-André Lureau 
14620df750e9SMarc-André Lureau     if (fd == -1) {
14630df750e9SMarc-André Lureau         vmsg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
14640df750e9SMarc-André Lureau     } else {
14650df750e9SMarc-André Lureau         vmsg.fds[fd_num++] = fd;
14660df750e9SMarc-André Lureau     }
14670df750e9SMarc-André Lureau 
14680df750e9SMarc-André Lureau     vmsg.fd_num = fd_num;
14690df750e9SMarc-André Lureau 
1470e608feedSMaxime Coquelin     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD)) {
14710df750e9SMarc-André Lureau         return false;
14720df750e9SMarc-André Lureau     }
14730df750e9SMarc-André Lureau 
1474f8ed3648SManos Pitsidianakis     pthread_mutex_lock(&dev->backend_mutex);
1475f8ed3648SManos Pitsidianakis     if (!vu_message_write(dev, dev->backend_fd, &vmsg)) {
1476f8ed3648SManos Pitsidianakis         pthread_mutex_unlock(&dev->backend_mutex);
14770df750e9SMarc-André Lureau         return false;
14780df750e9SMarc-André Lureau     }
14790df750e9SMarc-André Lureau 
1480f8ed3648SManos Pitsidianakis     /* Also unlocks the backend_mutex */
14810df750e9SMarc-André Lureau     return vu_process_message_reply(dev, &vmsg);
14820df750e9SMarc-André Lureau }
14830df750e9SMarc-André Lureau 
1484ce0f3b03SAlbert Esteve bool
1485ce0f3b03SAlbert Esteve vu_lookup_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN],
1486ce0f3b03SAlbert Esteve                         int *dmabuf_fd)
1487ce0f3b03SAlbert Esteve {
1488ce0f3b03SAlbert Esteve     bool result = false;
1489ce0f3b03SAlbert Esteve     VhostUserMsg msg_reply;
1490ce0f3b03SAlbert Esteve     VhostUserMsg msg = {
1491ce0f3b03SAlbert Esteve         .request = VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP,
1492ce0f3b03SAlbert Esteve         .size = sizeof(msg.payload.object),
1493ce0f3b03SAlbert Esteve         .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1494ce0f3b03SAlbert Esteve     };
1495ce0f3b03SAlbert Esteve 
1496ce0f3b03SAlbert Esteve     memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
1497ce0f3b03SAlbert Esteve 
1498ce0f3b03SAlbert Esteve     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
1499ce0f3b03SAlbert Esteve         return false;
1500ce0f3b03SAlbert Esteve     }
1501ce0f3b03SAlbert Esteve 
1502ce0f3b03SAlbert Esteve     pthread_mutex_lock(&dev->backend_mutex);
1503ce0f3b03SAlbert Esteve     if (!vu_message_write(dev, dev->backend_fd, &msg)) {
1504ce0f3b03SAlbert Esteve         goto out;
1505ce0f3b03SAlbert Esteve     }
1506ce0f3b03SAlbert Esteve 
1507ce0f3b03SAlbert Esteve     if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) {
1508ce0f3b03SAlbert Esteve         goto out;
1509ce0f3b03SAlbert Esteve     }
1510ce0f3b03SAlbert Esteve 
1511ce0f3b03SAlbert Esteve     if (msg_reply.request != msg.request) {
1512ce0f3b03SAlbert Esteve         DPRINT("Received unexpected msg type. Expected %d, received %d",
1513ce0f3b03SAlbert Esteve                msg.request, msg_reply.request);
1514ce0f3b03SAlbert Esteve         goto out;
1515ce0f3b03SAlbert Esteve     }
1516ce0f3b03SAlbert Esteve 
1517ce0f3b03SAlbert Esteve     if (msg_reply.fd_num != 1) {
1518ce0f3b03SAlbert Esteve         DPRINT("Received unexpected number of fds. Expected 1, received %d",
1519ce0f3b03SAlbert Esteve                msg_reply.fd_num);
1520ce0f3b03SAlbert Esteve         goto out;
1521ce0f3b03SAlbert Esteve     }
1522ce0f3b03SAlbert Esteve 
1523ce0f3b03SAlbert Esteve     *dmabuf_fd = msg_reply.fds[0];
1524ce0f3b03SAlbert Esteve     result = *dmabuf_fd > 0 && msg_reply.payload.u64 == 0;
1525ce0f3b03SAlbert Esteve out:
1526ce0f3b03SAlbert Esteve     pthread_mutex_unlock(&dev->backend_mutex);
1527ce0f3b03SAlbert Esteve 
1528ce0f3b03SAlbert Esteve     return result;
1529ce0f3b03SAlbert Esteve }
1530ce0f3b03SAlbert Esteve 
1531ce0f3b03SAlbert Esteve static bool
1532ce0f3b03SAlbert Esteve vu_send_message(VuDev *dev, VhostUserMsg *vmsg)
1533ce0f3b03SAlbert Esteve {
1534ce0f3b03SAlbert Esteve     bool result = false;
1535ce0f3b03SAlbert Esteve     pthread_mutex_lock(&dev->backend_mutex);
1536ce0f3b03SAlbert Esteve     if (!vu_message_write(dev, dev->backend_fd, vmsg)) {
1537ce0f3b03SAlbert Esteve         goto out;
1538ce0f3b03SAlbert Esteve     }
1539ce0f3b03SAlbert Esteve 
1540ce0f3b03SAlbert Esteve     result = true;
1541ce0f3b03SAlbert Esteve out:
1542ce0f3b03SAlbert Esteve     pthread_mutex_unlock(&dev->backend_mutex);
1543ce0f3b03SAlbert Esteve 
1544ce0f3b03SAlbert Esteve     return result;
1545ce0f3b03SAlbert Esteve }
1546ce0f3b03SAlbert Esteve 
1547ce0f3b03SAlbert Esteve bool
1548ce0f3b03SAlbert Esteve vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN])
1549ce0f3b03SAlbert Esteve {
1550ce0f3b03SAlbert Esteve     VhostUserMsg msg = {
1551ce0f3b03SAlbert Esteve         .request = VHOST_USER_BACKEND_SHARED_OBJECT_ADD,
1552ce0f3b03SAlbert Esteve         .size = sizeof(msg.payload.object),
1553ce0f3b03SAlbert Esteve         .flags = VHOST_USER_VERSION,
1554ce0f3b03SAlbert Esteve     };
1555ce0f3b03SAlbert Esteve 
1556ce0f3b03SAlbert Esteve     memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
1557ce0f3b03SAlbert Esteve 
1558ce0f3b03SAlbert Esteve     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
1559ce0f3b03SAlbert Esteve         return false;
1560ce0f3b03SAlbert Esteve     }
1561ce0f3b03SAlbert Esteve 
1562ce0f3b03SAlbert Esteve     return vu_send_message(dev, &msg);
1563ce0f3b03SAlbert Esteve }
1564ce0f3b03SAlbert Esteve 
1565ce0f3b03SAlbert Esteve bool
1566ce0f3b03SAlbert Esteve vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN])
1567ce0f3b03SAlbert Esteve {
1568ce0f3b03SAlbert Esteve     VhostUserMsg msg = {
1569ce0f3b03SAlbert Esteve         .request = VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE,
1570ce0f3b03SAlbert Esteve         .size = sizeof(msg.payload.object),
1571ce0f3b03SAlbert Esteve         .flags = VHOST_USER_VERSION,
1572ce0f3b03SAlbert Esteve     };
1573ce0f3b03SAlbert Esteve 
1574ce0f3b03SAlbert Esteve     memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
1575ce0f3b03SAlbert Esteve 
1576ce0f3b03SAlbert Esteve     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
1577ce0f3b03SAlbert Esteve         return false;
1578ce0f3b03SAlbert Esteve     }
1579ce0f3b03SAlbert Esteve 
1580ce0f3b03SAlbert Esteve     return vu_send_message(dev, &msg);
1581ce0f3b03SAlbert Esteve }
1582ce0f3b03SAlbert Esteve 
15830df750e9SMarc-André Lureau static bool
15840df750e9SMarc-André Lureau vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
15850df750e9SMarc-André Lureau {
15860df750e9SMarc-André Lureau     int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
15870df750e9SMarc-André Lureau     bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
15880df750e9SMarc-André Lureau 
15890df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
15900df750e9SMarc-André Lureau 
15910df750e9SMarc-André Lureau     if (!vu_check_queue_msg_file(dev, vmsg)) {
15920df750e9SMarc-André Lureau         return false;
15930df750e9SMarc-André Lureau     }
15940df750e9SMarc-André Lureau 
15950df750e9SMarc-André Lureau     if (dev->vq[index].call_fd != -1) {
15960df750e9SMarc-André Lureau         close(dev->vq[index].call_fd);
15970df750e9SMarc-André Lureau         dev->vq[index].call_fd = -1;
15980df750e9SMarc-André Lureau     }
15990df750e9SMarc-André Lureau 
16000df750e9SMarc-André Lureau     dev->vq[index].call_fd = nofd ? -1 : vmsg->fds[0];
16010df750e9SMarc-André Lureau 
16020df750e9SMarc-André Lureau     /* in case of I/O hang after reconnecting */
16030df750e9SMarc-André Lureau     if (dev->vq[index].call_fd != -1 && eventfd_write(vmsg->fds[0], 1)) {
16040df750e9SMarc-André Lureau         return -1;
16050df750e9SMarc-André Lureau     }
16060df750e9SMarc-André Lureau 
16070df750e9SMarc-André Lureau     DPRINT("Got call_fd: %d for vq: %d\n", dev->vq[index].call_fd, index);
16080df750e9SMarc-André Lureau 
16090df750e9SMarc-André Lureau     return false;
16100df750e9SMarc-André Lureau }
16110df750e9SMarc-André Lureau 
16120df750e9SMarc-André Lureau static bool
16130df750e9SMarc-André Lureau vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
16140df750e9SMarc-André Lureau {
16150df750e9SMarc-André Lureau     int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
16160df750e9SMarc-André Lureau     bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
16170df750e9SMarc-André Lureau 
16180df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
16190df750e9SMarc-André Lureau 
16200df750e9SMarc-André Lureau     if (!vu_check_queue_msg_file(dev, vmsg)) {
16210df750e9SMarc-André Lureau         return false;
16220df750e9SMarc-André Lureau     }
16230df750e9SMarc-André Lureau 
16240df750e9SMarc-André Lureau     if (dev->vq[index].err_fd != -1) {
16250df750e9SMarc-André Lureau         close(dev->vq[index].err_fd);
16260df750e9SMarc-André Lureau         dev->vq[index].err_fd = -1;
16270df750e9SMarc-André Lureau     }
16280df750e9SMarc-André Lureau 
16290df750e9SMarc-André Lureau     dev->vq[index].err_fd = nofd ? -1 : vmsg->fds[0];
16300df750e9SMarc-André Lureau 
16310df750e9SMarc-André Lureau     return false;
16320df750e9SMarc-André Lureau }
16330df750e9SMarc-André Lureau 
16340df750e9SMarc-André Lureau static bool
16350df750e9SMarc-André Lureau vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
16360df750e9SMarc-André Lureau {
16370df750e9SMarc-André Lureau     /*
16380df750e9SMarc-André Lureau      * Note that we support, but intentionally do not set,
16390df750e9SMarc-André Lureau      * VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. This means that
16400df750e9SMarc-André Lureau      * a device implementation can return it in its callback
16410df750e9SMarc-André Lureau      * (get_protocol_features) if it wants to use this for
16420df750e9SMarc-André Lureau      * simulation, but it is otherwise not desirable (if even
1643f8ed3648SManos Pitsidianakis      * implemented by the frontend.)
16440df750e9SMarc-André Lureau      */
16450df750e9SMarc-André Lureau     uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ |
16460df750e9SMarc-André Lureau                         1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
1647e608feedSMaxime Coquelin                         1ULL << VHOST_USER_PROTOCOL_F_BACKEND_REQ |
16480df750e9SMarc-André Lureau                         1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER |
1649e608feedSMaxime Coquelin                         1ULL << VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD |
16500df750e9SMarc-André Lureau                         1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |
16510df750e9SMarc-André Lureau                         1ULL << VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS;
16520df750e9SMarc-André Lureau 
16530df750e9SMarc-André Lureau     if (have_userfault()) {
16540df750e9SMarc-André Lureau         features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT;
16550df750e9SMarc-André Lureau     }
16560df750e9SMarc-André Lureau 
16570df750e9SMarc-André Lureau     if (dev->iface->get_config && dev->iface->set_config) {
16580df750e9SMarc-André Lureau         features |= 1ULL << VHOST_USER_PROTOCOL_F_CONFIG;
16590df750e9SMarc-André Lureau     }
16600df750e9SMarc-André Lureau 
16610df750e9SMarc-André Lureau     if (dev->iface->get_protocol_features) {
16620df750e9SMarc-André Lureau         features |= dev->iface->get_protocol_features(dev);
16630df750e9SMarc-André Lureau     }
16640df750e9SMarc-André Lureau 
16650df750e9SMarc-André Lureau     vmsg_set_reply_u64(vmsg, features);
16660df750e9SMarc-André Lureau     return true;
16670df750e9SMarc-André Lureau }
16680df750e9SMarc-André Lureau 
16690df750e9SMarc-André Lureau static bool
16700df750e9SMarc-André Lureau vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
16710df750e9SMarc-André Lureau {
16720df750e9SMarc-André Lureau     uint64_t features = vmsg->payload.u64;
16730df750e9SMarc-André Lureau 
16740df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", features);
16750df750e9SMarc-André Lureau 
16760df750e9SMarc-André Lureau     dev->protocol_features = vmsg->payload.u64;
16770df750e9SMarc-André Lureau 
16780df750e9SMarc-André Lureau     if (vu_has_protocol_feature(dev,
16790df750e9SMarc-André Lureau                                 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
1680e608feedSMaxime Coquelin         (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_REQ) ||
16810df750e9SMarc-André Lureau          !vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
16820df750e9SMarc-André Lureau         /*
16830df750e9SMarc-André Lureau          * The use case for using messages for kick/call is simulation, to make
16840df750e9SMarc-André Lureau          * the kick and call synchronous. To actually get that behaviour, both
16850df750e9SMarc-André Lureau          * of the other features are required.
16860df750e9SMarc-André Lureau          * Theoretically, one could use only kick messages, or do them without
16870df750e9SMarc-André Lureau          * having F_REPLY_ACK, but too many (possibly pending) messages on the
1688f8ed3648SManos Pitsidianakis          * socket will eventually cause the frontend to hang, to avoid this in
16890df750e9SMarc-André Lureau          * scenarios where not desired enforce that the settings are in a way
16900df750e9SMarc-André Lureau          * that actually enables the simulation case.
16910df750e9SMarc-André Lureau          */
16920df750e9SMarc-André Lureau         vu_panic(dev,
1693e608feedSMaxime Coquelin                  "F_IN_BAND_NOTIFICATIONS requires F_BACKEND_REQ && F_REPLY_ACK");
16940df750e9SMarc-André Lureau         return false;
16950df750e9SMarc-André Lureau     }
16960df750e9SMarc-André Lureau 
16970df750e9SMarc-André Lureau     if (dev->iface->set_protocol_features) {
16980df750e9SMarc-André Lureau         dev->iface->set_protocol_features(dev, features);
16990df750e9SMarc-André Lureau     }
17000df750e9SMarc-André Lureau 
17010df750e9SMarc-André Lureau     return false;
17020df750e9SMarc-André Lureau }
17030df750e9SMarc-André Lureau 
17040df750e9SMarc-André Lureau static bool
17050df750e9SMarc-André Lureau vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
17060df750e9SMarc-André Lureau {
17070df750e9SMarc-André Lureau     vmsg_set_reply_u64(vmsg, dev->max_queues);
17080df750e9SMarc-André Lureau     return true;
17090df750e9SMarc-André Lureau }
17100df750e9SMarc-André Lureau 
17110df750e9SMarc-André Lureau static bool
17120df750e9SMarc-André Lureau vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
17130df750e9SMarc-André Lureau {
17140df750e9SMarc-André Lureau     unsigned int index = vmsg->payload.state.index;
17150df750e9SMarc-André Lureau     unsigned int enable = vmsg->payload.state.num;
17160df750e9SMarc-André Lureau 
17170df750e9SMarc-André Lureau     DPRINT("State.index: %u\n", index);
17180df750e9SMarc-André Lureau     DPRINT("State.enable:   %u\n", enable);
17190df750e9SMarc-André Lureau 
17200df750e9SMarc-André Lureau     if (index >= dev->max_queues) {
17210df750e9SMarc-André Lureau         vu_panic(dev, "Invalid vring_enable index: %u", index);
17220df750e9SMarc-André Lureau         return false;
17230df750e9SMarc-André Lureau     }
17240df750e9SMarc-André Lureau 
17250df750e9SMarc-André Lureau     dev->vq[index].enable = enable;
17260df750e9SMarc-André Lureau     return false;
17270df750e9SMarc-André Lureau }
17280df750e9SMarc-André Lureau 
17290df750e9SMarc-André Lureau static bool
1730f8ed3648SManos Pitsidianakis vu_set_backend_req_fd(VuDev *dev, VhostUserMsg *vmsg)
17310df750e9SMarc-André Lureau {
17320df750e9SMarc-André Lureau     if (vmsg->fd_num != 1) {
1733f8ed3648SManos Pitsidianakis         vu_panic(dev, "Invalid backend_req_fd message (%d fd's)", vmsg->fd_num);
17340df750e9SMarc-André Lureau         return false;
17350df750e9SMarc-André Lureau     }
17360df750e9SMarc-André Lureau 
1737f8ed3648SManos Pitsidianakis     if (dev->backend_fd != -1) {
1738f8ed3648SManos Pitsidianakis         close(dev->backend_fd);
17390df750e9SMarc-André Lureau     }
1740f8ed3648SManos Pitsidianakis     dev->backend_fd = vmsg->fds[0];
1741f8ed3648SManos Pitsidianakis     DPRINT("Got backend_fd: %d\n", vmsg->fds[0]);
17420df750e9SMarc-André Lureau 
17430df750e9SMarc-André Lureau     return false;
17440df750e9SMarc-André Lureau }
17450df750e9SMarc-André Lureau 
17460df750e9SMarc-André Lureau static bool
17470df750e9SMarc-André Lureau vu_get_config(VuDev *dev, VhostUserMsg *vmsg)
17480df750e9SMarc-André Lureau {
17490df750e9SMarc-André Lureau     int ret = -1;
17500df750e9SMarc-André Lureau 
17510df750e9SMarc-André Lureau     if (dev->iface->get_config) {
17520df750e9SMarc-André Lureau         ret = dev->iface->get_config(dev, vmsg->payload.config.region,
17530df750e9SMarc-André Lureau                                      vmsg->payload.config.size);
17540df750e9SMarc-André Lureau     }
17550df750e9SMarc-André Lureau 
17560df750e9SMarc-André Lureau     if (ret) {
1757f8ed3648SManos Pitsidianakis         /* resize to zero to indicate an error to frontend */
17580df750e9SMarc-André Lureau         vmsg->size = 0;
17590df750e9SMarc-André Lureau     }
17600df750e9SMarc-André Lureau 
17610df750e9SMarc-André Lureau     return true;
17620df750e9SMarc-André Lureau }
17630df750e9SMarc-André Lureau 
17640df750e9SMarc-André Lureau static bool
17650df750e9SMarc-André Lureau vu_set_config(VuDev *dev, VhostUserMsg *vmsg)
17660df750e9SMarc-André Lureau {
17670df750e9SMarc-André Lureau     int ret = -1;
17680df750e9SMarc-André Lureau 
17690df750e9SMarc-André Lureau     if (dev->iface->set_config) {
17700df750e9SMarc-André Lureau         ret = dev->iface->set_config(dev, vmsg->payload.config.region,
17710df750e9SMarc-André Lureau                                      vmsg->payload.config.offset,
17720df750e9SMarc-André Lureau                                      vmsg->payload.config.size,
17730df750e9SMarc-André Lureau                                      vmsg->payload.config.flags);
17740df750e9SMarc-André Lureau         if (ret) {
17750df750e9SMarc-André Lureau             vu_panic(dev, "Set virtio configuration space failed");
17760df750e9SMarc-André Lureau         }
17770df750e9SMarc-André Lureau     }
17780df750e9SMarc-André Lureau 
17790df750e9SMarc-André Lureau     return false;
17800df750e9SMarc-André Lureau }
17810df750e9SMarc-André Lureau 
17820df750e9SMarc-André Lureau static bool
17830df750e9SMarc-André Lureau vu_set_postcopy_advise(VuDev *dev, VhostUserMsg *vmsg)
17840df750e9SMarc-André Lureau {
17850df750e9SMarc-André Lureau #ifdef UFFDIO_API
17860df750e9SMarc-André Lureau     struct uffdio_api api_struct;
17870df750e9SMarc-André Lureau 
17880df750e9SMarc-André Lureau     dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
17890df750e9SMarc-André Lureau     vmsg->size = 0;
1790f1c563d2SMarcel Holtmann #else
1791f1c563d2SMarcel Holtmann     dev->postcopy_ufd = -1;
17920df750e9SMarc-André Lureau #endif
17930df750e9SMarc-André Lureau 
17940df750e9SMarc-André Lureau     if (dev->postcopy_ufd == -1) {
17950df750e9SMarc-André Lureau         vu_panic(dev, "Userfaultfd not available: %s", strerror(errno));
17960df750e9SMarc-André Lureau         goto out;
17970df750e9SMarc-André Lureau     }
17980df750e9SMarc-André Lureau 
17990df750e9SMarc-André Lureau #ifdef UFFDIO_API
18000df750e9SMarc-André Lureau     api_struct.api = UFFD_API;
18010df750e9SMarc-André Lureau     api_struct.features = 0;
18020df750e9SMarc-André Lureau     if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
18030df750e9SMarc-André Lureau         vu_panic(dev, "Failed UFFDIO_API: %s", strerror(errno));
18040df750e9SMarc-André Lureau         close(dev->postcopy_ufd);
18050df750e9SMarc-André Lureau         dev->postcopy_ufd = -1;
18060df750e9SMarc-André Lureau         goto out;
18070df750e9SMarc-André Lureau     }
18080df750e9SMarc-André Lureau     /* TODO: Stash feature flags somewhere */
18090df750e9SMarc-André Lureau #endif
18100df750e9SMarc-André Lureau 
18110df750e9SMarc-André Lureau out:
18120df750e9SMarc-André Lureau     /* Return a ufd to the QEMU */
18130df750e9SMarc-André Lureau     vmsg->fd_num = 1;
18140df750e9SMarc-André Lureau     vmsg->fds[0] = dev->postcopy_ufd;
18150df750e9SMarc-André Lureau     return true; /* = send a reply */
18160df750e9SMarc-André Lureau }
18170df750e9SMarc-André Lureau 
18180df750e9SMarc-André Lureau static bool
18190df750e9SMarc-André Lureau vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg)
18200df750e9SMarc-André Lureau {
18210df750e9SMarc-André Lureau     if (dev->nregions) {
18220df750e9SMarc-André Lureau         vu_panic(dev, "Regions already registered at postcopy-listen");
18230df750e9SMarc-André Lureau         vmsg_set_reply_u64(vmsg, -1);
18240df750e9SMarc-André Lureau         return true;
18250df750e9SMarc-André Lureau     }
18260df750e9SMarc-André Lureau     dev->postcopy_listening = true;
18270df750e9SMarc-André Lureau 
18280df750e9SMarc-André Lureau     vmsg_set_reply_u64(vmsg, 0);
18290df750e9SMarc-André Lureau     return true;
18300df750e9SMarc-André Lureau }
18310df750e9SMarc-André Lureau 
18320df750e9SMarc-André Lureau static bool
18330df750e9SMarc-André Lureau vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg)
18340df750e9SMarc-André Lureau {
18350df750e9SMarc-André Lureau     DPRINT("%s: Entry\n", __func__);
18360df750e9SMarc-André Lureau     dev->postcopy_listening = false;
18370df750e9SMarc-André Lureau     if (dev->postcopy_ufd > 0) {
18380df750e9SMarc-André Lureau         close(dev->postcopy_ufd);
18390df750e9SMarc-André Lureau         dev->postcopy_ufd = -1;
18400df750e9SMarc-André Lureau         DPRINT("%s: Done close\n", __func__);
18410df750e9SMarc-André Lureau     }
18420df750e9SMarc-André Lureau 
18430df750e9SMarc-André Lureau     vmsg_set_reply_u64(vmsg, 0);
18440df750e9SMarc-André Lureau     DPRINT("%s: exit\n", __func__);
18450df750e9SMarc-André Lureau     return true;
18460df750e9SMarc-André Lureau }
18470df750e9SMarc-André Lureau 
18480df750e9SMarc-André Lureau static inline uint64_t
18490df750e9SMarc-André Lureau vu_inflight_queue_size(uint16_t queue_size)
18500df750e9SMarc-André Lureau {
18510df750e9SMarc-André Lureau     return ALIGN_UP(sizeof(VuDescStateSplit) * queue_size +
18520df750e9SMarc-André Lureau            sizeof(uint16_t), INFLIGHT_ALIGNMENT);
18530df750e9SMarc-André Lureau }
18540df750e9SMarc-André Lureau 
18550df750e9SMarc-André Lureau #ifdef MFD_ALLOW_SEALING
18560df750e9SMarc-André Lureau static void *
18570df750e9SMarc-André Lureau memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
18580df750e9SMarc-André Lureau {
18590df750e9SMarc-André Lureau     void *ptr;
18600df750e9SMarc-André Lureau     int ret;
18610df750e9SMarc-André Lureau 
18620df750e9SMarc-André Lureau     *fd = memfd_create(name, MFD_ALLOW_SEALING);
18630df750e9SMarc-André Lureau     if (*fd < 0) {
18640df750e9SMarc-André Lureau         return NULL;
18650df750e9SMarc-André Lureau     }
18660df750e9SMarc-André Lureau 
18670df750e9SMarc-André Lureau     ret = ftruncate(*fd, size);
18680df750e9SMarc-André Lureau     if (ret < 0) {
18690df750e9SMarc-André Lureau         close(*fd);
18700df750e9SMarc-André Lureau         return NULL;
18710df750e9SMarc-André Lureau     }
18720df750e9SMarc-André Lureau 
18730df750e9SMarc-André Lureau     ret = fcntl(*fd, F_ADD_SEALS, flags);
18740df750e9SMarc-André Lureau     if (ret < 0) {
18750df750e9SMarc-André Lureau         close(*fd);
18760df750e9SMarc-André Lureau         return NULL;
18770df750e9SMarc-André Lureau     }
18780df750e9SMarc-André Lureau 
18790df750e9SMarc-André Lureau     ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
18800df750e9SMarc-André Lureau     if (ptr == MAP_FAILED) {
18810df750e9SMarc-André Lureau         close(*fd);
18820df750e9SMarc-André Lureau         return NULL;
18830df750e9SMarc-André Lureau     }
18840df750e9SMarc-André Lureau 
18850df750e9SMarc-André Lureau     return ptr;
18860df750e9SMarc-André Lureau }
18870df750e9SMarc-André Lureau #endif
18880df750e9SMarc-André Lureau 
18890df750e9SMarc-André Lureau static bool
18900df750e9SMarc-André Lureau vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
18910df750e9SMarc-André Lureau {
18920df750e9SMarc-André Lureau     int fd = -1;
18930df750e9SMarc-André Lureau     void *addr = NULL;
18940df750e9SMarc-André Lureau     uint64_t mmap_size;
18950df750e9SMarc-André Lureau     uint16_t num_queues, queue_size;
18960df750e9SMarc-André Lureau 
18970df750e9SMarc-André Lureau     if (vmsg->size != sizeof(vmsg->payload.inflight)) {
18980df750e9SMarc-André Lureau         vu_panic(dev, "Invalid get_inflight_fd message:%d", vmsg->size);
18990df750e9SMarc-André Lureau         vmsg->payload.inflight.mmap_size = 0;
19000df750e9SMarc-André Lureau         return true;
19010df750e9SMarc-André Lureau     }
19020df750e9SMarc-André Lureau 
19030df750e9SMarc-André Lureau     num_queues = vmsg->payload.inflight.num_queues;
19040df750e9SMarc-André Lureau     queue_size = vmsg->payload.inflight.queue_size;
19050df750e9SMarc-André Lureau 
19060df750e9SMarc-André Lureau     DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
19070df750e9SMarc-André Lureau     DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
19080df750e9SMarc-André Lureau 
19090df750e9SMarc-André Lureau     mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
19100df750e9SMarc-André Lureau 
19110df750e9SMarc-André Lureau #ifdef MFD_ALLOW_SEALING
19120df750e9SMarc-André Lureau     addr = memfd_alloc("vhost-inflight", mmap_size,
19130df750e9SMarc-André Lureau                        F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
19140df750e9SMarc-André Lureau                        &fd);
19150df750e9SMarc-André Lureau #else
19160df750e9SMarc-André Lureau     vu_panic(dev, "Not implemented: memfd support is missing");
19170df750e9SMarc-André Lureau #endif
19180df750e9SMarc-André Lureau 
19190df750e9SMarc-André Lureau     if (!addr) {
19200df750e9SMarc-André Lureau         vu_panic(dev, "Failed to alloc vhost inflight area");
19210df750e9SMarc-André Lureau         vmsg->payload.inflight.mmap_size = 0;
19220df750e9SMarc-André Lureau         return true;
19230df750e9SMarc-André Lureau     }
19240df750e9SMarc-André Lureau 
19250df750e9SMarc-André Lureau     memset(addr, 0, mmap_size);
19260df750e9SMarc-André Lureau 
19270df750e9SMarc-André Lureau     dev->inflight_info.addr = addr;
19280df750e9SMarc-André Lureau     dev->inflight_info.size = vmsg->payload.inflight.mmap_size = mmap_size;
19290df750e9SMarc-André Lureau     dev->inflight_info.fd = vmsg->fds[0] = fd;
19300df750e9SMarc-André Lureau     vmsg->fd_num = 1;
19310df750e9SMarc-André Lureau     vmsg->payload.inflight.mmap_offset = 0;
19320df750e9SMarc-André Lureau 
19330df750e9SMarc-André Lureau     DPRINT("send inflight mmap_size: %"PRId64"\n",
19340df750e9SMarc-André Lureau            vmsg->payload.inflight.mmap_size);
19350df750e9SMarc-André Lureau     DPRINT("send inflight mmap offset: %"PRId64"\n",
19360df750e9SMarc-André Lureau            vmsg->payload.inflight.mmap_offset);
19370df750e9SMarc-André Lureau 
19380df750e9SMarc-André Lureau     return true;
19390df750e9SMarc-André Lureau }
19400df750e9SMarc-André Lureau 
19410df750e9SMarc-André Lureau static bool
19420df750e9SMarc-André Lureau vu_set_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
19430df750e9SMarc-André Lureau {
19440df750e9SMarc-André Lureau     int fd, i;
19450df750e9SMarc-André Lureau     uint64_t mmap_size, mmap_offset;
19460df750e9SMarc-André Lureau     uint16_t num_queues, queue_size;
19470df750e9SMarc-André Lureau     void *rc;
19480df750e9SMarc-André Lureau 
19490df750e9SMarc-André Lureau     if (vmsg->fd_num != 1 ||
19500df750e9SMarc-André Lureau         vmsg->size != sizeof(vmsg->payload.inflight)) {
19510df750e9SMarc-André Lureau         vu_panic(dev, "Invalid set_inflight_fd message size:%d fds:%d",
19520df750e9SMarc-André Lureau                  vmsg->size, vmsg->fd_num);
19530df750e9SMarc-André Lureau         return false;
19540df750e9SMarc-André Lureau     }
19550df750e9SMarc-André Lureau 
19560df750e9SMarc-André Lureau     fd = vmsg->fds[0];
19570df750e9SMarc-André Lureau     mmap_size = vmsg->payload.inflight.mmap_size;
19580df750e9SMarc-André Lureau     mmap_offset = vmsg->payload.inflight.mmap_offset;
19590df750e9SMarc-André Lureau     num_queues = vmsg->payload.inflight.num_queues;
19600df750e9SMarc-André Lureau     queue_size = vmsg->payload.inflight.queue_size;
19610df750e9SMarc-André Lureau 
19620df750e9SMarc-André Lureau     DPRINT("set_inflight_fd mmap_size: %"PRId64"\n", mmap_size);
19630df750e9SMarc-André Lureau     DPRINT("set_inflight_fd mmap_offset: %"PRId64"\n", mmap_offset);
19640df750e9SMarc-André Lureau     DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
19650df750e9SMarc-André Lureau     DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
19660df750e9SMarc-André Lureau 
19670df750e9SMarc-André Lureau     rc = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
19680df750e9SMarc-André Lureau               fd, mmap_offset);
19690df750e9SMarc-André Lureau 
19700df750e9SMarc-André Lureau     if (rc == MAP_FAILED) {
19710df750e9SMarc-André Lureau         vu_panic(dev, "set_inflight_fd mmap error: %s", strerror(errno));
19720df750e9SMarc-André Lureau         return false;
19730df750e9SMarc-André Lureau     }
19740df750e9SMarc-André Lureau 
19750df750e9SMarc-André Lureau     if (dev->inflight_info.fd) {
19760df750e9SMarc-André Lureau         close(dev->inflight_info.fd);
19770df750e9SMarc-André Lureau     }
19780df750e9SMarc-André Lureau 
19790df750e9SMarc-André Lureau     if (dev->inflight_info.addr) {
19800df750e9SMarc-André Lureau         munmap(dev->inflight_info.addr, dev->inflight_info.size);
19810df750e9SMarc-André Lureau     }
19820df750e9SMarc-André Lureau 
19830df750e9SMarc-André Lureau     dev->inflight_info.fd = fd;
19840df750e9SMarc-André Lureau     dev->inflight_info.addr = rc;
19850df750e9SMarc-André Lureau     dev->inflight_info.size = mmap_size;
19860df750e9SMarc-André Lureau 
19870df750e9SMarc-André Lureau     for (i = 0; i < num_queues; i++) {
19880df750e9SMarc-André Lureau         dev->vq[i].inflight = (VuVirtqInflight *)rc;
19890df750e9SMarc-André Lureau         dev->vq[i].inflight->desc_num = queue_size;
19900df750e9SMarc-André Lureau         rc = (void *)((char *)rc + vu_inflight_queue_size(queue_size));
19910df750e9SMarc-André Lureau     }
19920df750e9SMarc-André Lureau 
19930df750e9SMarc-André Lureau     return false;
19940df750e9SMarc-André Lureau }
19950df750e9SMarc-André Lureau 
19960df750e9SMarc-André Lureau static bool
19970df750e9SMarc-André Lureau vu_handle_vring_kick(VuDev *dev, VhostUserMsg *vmsg)
19980df750e9SMarc-André Lureau {
19990df750e9SMarc-André Lureau     unsigned int index = vmsg->payload.state.index;
20000df750e9SMarc-André Lureau 
20010df750e9SMarc-André Lureau     if (index >= dev->max_queues) {
20020df750e9SMarc-André Lureau         vu_panic(dev, "Invalid queue index: %u", index);
20030df750e9SMarc-André Lureau         return false;
20040df750e9SMarc-André Lureau     }
20050df750e9SMarc-André Lureau 
20060df750e9SMarc-André Lureau     DPRINT("Got kick message: handler:%p idx:%u\n",
20070df750e9SMarc-André Lureau            dev->vq[index].handler, index);
20080df750e9SMarc-André Lureau 
20090df750e9SMarc-André Lureau     if (!dev->vq[index].started) {
20100df750e9SMarc-André Lureau         dev->vq[index].started = true;
20110df750e9SMarc-André Lureau 
20120df750e9SMarc-André Lureau         if (dev->iface->queue_set_started) {
20130df750e9SMarc-André Lureau             dev->iface->queue_set_started(dev, index, true);
20140df750e9SMarc-André Lureau         }
20150df750e9SMarc-André Lureau     }
20160df750e9SMarc-André Lureau 
20170df750e9SMarc-André Lureau     if (dev->vq[index].handler) {
20180df750e9SMarc-André Lureau         dev->vq[index].handler(dev, index);
20190df750e9SMarc-André Lureau     }
20200df750e9SMarc-André Lureau 
20210df750e9SMarc-André Lureau     return false;
20220df750e9SMarc-André Lureau }
20230df750e9SMarc-André Lureau 
20240df750e9SMarc-André Lureau static bool vu_handle_get_max_memslots(VuDev *dev, VhostUserMsg *vmsg)
20250df750e9SMarc-André Lureau {
202669a5daecSKevin Wolf     vmsg_set_reply_u64(vmsg, VHOST_USER_MAX_RAM_SLOTS);
20270df750e9SMarc-André Lureau 
20280df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", (uint64_t) VHOST_USER_MAX_RAM_SLOTS);
20290df750e9SMarc-André Lureau 
203069a5daecSKevin Wolf     return true;
20310df750e9SMarc-André Lureau }
20320df750e9SMarc-André Lureau 
20330df750e9SMarc-André Lureau static bool
20340df750e9SMarc-André Lureau vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
20350df750e9SMarc-André Lureau {
20360df750e9SMarc-André Lureau     int do_reply = 0;
20370df750e9SMarc-André Lureau 
20380df750e9SMarc-André Lureau     /* Print out generic part of the request. */
20390df750e9SMarc-André Lureau     DPRINT("================ Vhost user message ================\n");
20400df750e9SMarc-André Lureau     DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request),
20410df750e9SMarc-André Lureau            vmsg->request);
20420df750e9SMarc-André Lureau     DPRINT("Flags:   0x%x\n", vmsg->flags);
20430df750e9SMarc-André Lureau     DPRINT("Size:    %u\n", vmsg->size);
20440df750e9SMarc-André Lureau 
20450df750e9SMarc-André Lureau     if (vmsg->fd_num) {
20460df750e9SMarc-André Lureau         int i;
20470df750e9SMarc-André Lureau         DPRINT("Fds:");
20480df750e9SMarc-André Lureau         for (i = 0; i < vmsg->fd_num; i++) {
20490df750e9SMarc-André Lureau             DPRINT(" %d", vmsg->fds[i]);
20500df750e9SMarc-André Lureau         }
20510df750e9SMarc-André Lureau         DPRINT("\n");
20520df750e9SMarc-André Lureau     }
20530df750e9SMarc-André Lureau 
20540df750e9SMarc-André Lureau     if (dev->iface->process_msg &&
20550df750e9SMarc-André Lureau         dev->iface->process_msg(dev, vmsg, &do_reply)) {
20560df750e9SMarc-André Lureau         return do_reply;
20570df750e9SMarc-André Lureau     }
20580df750e9SMarc-André Lureau 
20590df750e9SMarc-André Lureau     switch (vmsg->request) {
20600df750e9SMarc-André Lureau     case VHOST_USER_GET_FEATURES:
20610df750e9SMarc-André Lureau         return vu_get_features_exec(dev, vmsg);
20620df750e9SMarc-André Lureau     case VHOST_USER_SET_FEATURES:
20630df750e9SMarc-André Lureau         return vu_set_features_exec(dev, vmsg);
20640df750e9SMarc-André Lureau     case VHOST_USER_GET_PROTOCOL_FEATURES:
20650df750e9SMarc-André Lureau         return vu_get_protocol_features_exec(dev, vmsg);
20660df750e9SMarc-André Lureau     case VHOST_USER_SET_PROTOCOL_FEATURES:
20670df750e9SMarc-André Lureau         return vu_set_protocol_features_exec(dev, vmsg);
20680df750e9SMarc-André Lureau     case VHOST_USER_SET_OWNER:
20690df750e9SMarc-André Lureau         return vu_set_owner_exec(dev, vmsg);
20700df750e9SMarc-André Lureau     case VHOST_USER_RESET_OWNER:
20710df750e9SMarc-André Lureau         return vu_reset_device_exec(dev, vmsg);
20720df750e9SMarc-André Lureau     case VHOST_USER_SET_MEM_TABLE:
20730df750e9SMarc-André Lureau         return vu_set_mem_table_exec(dev, vmsg);
20740df750e9SMarc-André Lureau     case VHOST_USER_SET_LOG_BASE:
20750df750e9SMarc-André Lureau         return vu_set_log_base_exec(dev, vmsg);
20760df750e9SMarc-André Lureau     case VHOST_USER_SET_LOG_FD:
20770df750e9SMarc-André Lureau         return vu_set_log_fd_exec(dev, vmsg);
20780df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_NUM:
20790df750e9SMarc-André Lureau         return vu_set_vring_num_exec(dev, vmsg);
20800df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_ADDR:
20810df750e9SMarc-André Lureau         return vu_set_vring_addr_exec(dev, vmsg);
20820df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_BASE:
20830df750e9SMarc-André Lureau         return vu_set_vring_base_exec(dev, vmsg);
20840df750e9SMarc-André Lureau     case VHOST_USER_GET_VRING_BASE:
20850df750e9SMarc-André Lureau         return vu_get_vring_base_exec(dev, vmsg);
20860df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_KICK:
20870df750e9SMarc-André Lureau         return vu_set_vring_kick_exec(dev, vmsg);
20880df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_CALL:
20890df750e9SMarc-André Lureau         return vu_set_vring_call_exec(dev, vmsg);
20900df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_ERR:
20910df750e9SMarc-André Lureau         return vu_set_vring_err_exec(dev, vmsg);
20920df750e9SMarc-André Lureau     case VHOST_USER_GET_QUEUE_NUM:
20930df750e9SMarc-André Lureau         return vu_get_queue_num_exec(dev, vmsg);
20940df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_ENABLE:
20950df750e9SMarc-André Lureau         return vu_set_vring_enable_exec(dev, vmsg);
2096e608feedSMaxime Coquelin     case VHOST_USER_SET_BACKEND_REQ_FD:
2097f8ed3648SManos Pitsidianakis         return vu_set_backend_req_fd(dev, vmsg);
20980df750e9SMarc-André Lureau     case VHOST_USER_GET_CONFIG:
20990df750e9SMarc-André Lureau         return vu_get_config(dev, vmsg);
21000df750e9SMarc-André Lureau     case VHOST_USER_SET_CONFIG:
21010df750e9SMarc-André Lureau         return vu_set_config(dev, vmsg);
21020df750e9SMarc-André Lureau     case VHOST_USER_NONE:
21030df750e9SMarc-André Lureau         /* if you need processing before exit, override iface->process_msg */
21040df750e9SMarc-André Lureau         exit(0);
21050df750e9SMarc-André Lureau     case VHOST_USER_POSTCOPY_ADVISE:
21060df750e9SMarc-André Lureau         return vu_set_postcopy_advise(dev, vmsg);
21070df750e9SMarc-André Lureau     case VHOST_USER_POSTCOPY_LISTEN:
21080df750e9SMarc-André Lureau         return vu_set_postcopy_listen(dev, vmsg);
21090df750e9SMarc-André Lureau     case VHOST_USER_POSTCOPY_END:
21100df750e9SMarc-André Lureau         return vu_set_postcopy_end(dev, vmsg);
21110df750e9SMarc-André Lureau     case VHOST_USER_GET_INFLIGHT_FD:
21120df750e9SMarc-André Lureau         return vu_get_inflight_fd(dev, vmsg);
21130df750e9SMarc-André Lureau     case VHOST_USER_SET_INFLIGHT_FD:
21140df750e9SMarc-André Lureau         return vu_set_inflight_fd(dev, vmsg);
21150df750e9SMarc-André Lureau     case VHOST_USER_VRING_KICK:
21160df750e9SMarc-André Lureau         return vu_handle_vring_kick(dev, vmsg);
21170df750e9SMarc-André Lureau     case VHOST_USER_GET_MAX_MEM_SLOTS:
21180df750e9SMarc-André Lureau         return vu_handle_get_max_memslots(dev, vmsg);
21190df750e9SMarc-André Lureau     case VHOST_USER_ADD_MEM_REG:
21200df750e9SMarc-André Lureau         return vu_add_mem_reg(dev, vmsg);
21210df750e9SMarc-André Lureau     case VHOST_USER_REM_MEM_REG:
21220df750e9SMarc-André Lureau         return vu_rem_mem_reg(dev, vmsg);
2123ce0f3b03SAlbert Esteve     case VHOST_USER_GET_SHARED_OBJECT:
2124ce0f3b03SAlbert Esteve         return vu_get_shared_object(dev, vmsg);
21250df750e9SMarc-André Lureau     default:
21260df750e9SMarc-André Lureau         vmsg_close_fds(vmsg);
21270df750e9SMarc-André Lureau         vu_panic(dev, "Unhandled request: %d", vmsg->request);
21280df750e9SMarc-André Lureau     }
21290df750e9SMarc-André Lureau 
21300df750e9SMarc-André Lureau     return false;
21310df750e9SMarc-André Lureau }
21320df750e9SMarc-André Lureau 
21330df750e9SMarc-André Lureau bool
21340df750e9SMarc-André Lureau vu_dispatch(VuDev *dev)
21350df750e9SMarc-André Lureau {
21360df750e9SMarc-André Lureau     VhostUserMsg vmsg = { 0, };
21370df750e9SMarc-André Lureau     int reply_requested;
21380df750e9SMarc-André Lureau     bool need_reply, success = false;
21390df750e9SMarc-André Lureau 
21400df750e9SMarc-André Lureau     if (!dev->read_msg(dev, dev->sock, &vmsg)) {
21410df750e9SMarc-André Lureau         goto end;
21420df750e9SMarc-André Lureau     }
21430df750e9SMarc-André Lureau 
21440df750e9SMarc-André Lureau     need_reply = vmsg.flags & VHOST_USER_NEED_REPLY_MASK;
21450df750e9SMarc-André Lureau 
21460df750e9SMarc-André Lureau     reply_requested = vu_process_message(dev, &vmsg);
21470df750e9SMarc-André Lureau     if (!reply_requested && need_reply) {
21480df750e9SMarc-André Lureau         vmsg_set_reply_u64(&vmsg, 0);
21490df750e9SMarc-André Lureau         reply_requested = 1;
21500df750e9SMarc-André Lureau     }
21510df750e9SMarc-André Lureau 
21520df750e9SMarc-André Lureau     if (!reply_requested) {
21530df750e9SMarc-André Lureau         success = true;
21540df750e9SMarc-André Lureau         goto end;
21550df750e9SMarc-André Lureau     }
21560df750e9SMarc-André Lureau 
21570df750e9SMarc-André Lureau     if (!vu_send_reply(dev, dev->sock, &vmsg)) {
21580df750e9SMarc-André Lureau         goto end;
21590df750e9SMarc-André Lureau     }
21600df750e9SMarc-André Lureau 
21610df750e9SMarc-André Lureau     success = true;
21620df750e9SMarc-André Lureau 
21630df750e9SMarc-André Lureau end:
21640df750e9SMarc-André Lureau     free(vmsg.data);
21650df750e9SMarc-André Lureau     return success;
21660df750e9SMarc-André Lureau }
21670df750e9SMarc-André Lureau 
21680df750e9SMarc-André Lureau void
21690df750e9SMarc-André Lureau vu_deinit(VuDev *dev)
21700df750e9SMarc-André Lureau {
217192bf2461SMarcel Holtmann     unsigned int i;
21720df750e9SMarc-André Lureau 
2173bec58209SDavid Hildenbrand     vu_remove_all_mem_regs(dev);
21740df750e9SMarc-André Lureau 
21750df750e9SMarc-André Lureau     for (i = 0; i < dev->max_queues; i++) {
21760df750e9SMarc-André Lureau         VuVirtq *vq = &dev->vq[i];
21770df750e9SMarc-André Lureau 
21780df750e9SMarc-André Lureau         if (vq->call_fd != -1) {
21790df750e9SMarc-André Lureau             close(vq->call_fd);
21800df750e9SMarc-André Lureau             vq->call_fd = -1;
21810df750e9SMarc-André Lureau         }
21820df750e9SMarc-André Lureau 
21830df750e9SMarc-André Lureau         if (vq->kick_fd != -1) {
21840df750e9SMarc-André Lureau             dev->remove_watch(dev, vq->kick_fd);
21850df750e9SMarc-André Lureau             close(vq->kick_fd);
21860df750e9SMarc-André Lureau             vq->kick_fd = -1;
21870df750e9SMarc-André Lureau         }
21880df750e9SMarc-André Lureau 
21890df750e9SMarc-André Lureau         if (vq->err_fd != -1) {
21900df750e9SMarc-André Lureau             close(vq->err_fd);
21910df750e9SMarc-André Lureau             vq->err_fd = -1;
21920df750e9SMarc-André Lureau         }
21930df750e9SMarc-André Lureau 
21940df750e9SMarc-André Lureau         if (vq->resubmit_list) {
21950df750e9SMarc-André Lureau             free(vq->resubmit_list);
21960df750e9SMarc-André Lureau             vq->resubmit_list = NULL;
21970df750e9SMarc-André Lureau         }
21980df750e9SMarc-André Lureau 
21990df750e9SMarc-André Lureau         vq->inflight = NULL;
22000df750e9SMarc-André Lureau     }
22010df750e9SMarc-André Lureau 
22020df750e9SMarc-André Lureau     if (dev->inflight_info.addr) {
22030df750e9SMarc-André Lureau         munmap(dev->inflight_info.addr, dev->inflight_info.size);
22040df750e9SMarc-André Lureau         dev->inflight_info.addr = NULL;
22050df750e9SMarc-André Lureau     }
22060df750e9SMarc-André Lureau 
22070df750e9SMarc-André Lureau     if (dev->inflight_info.fd > 0) {
22080df750e9SMarc-André Lureau         close(dev->inflight_info.fd);
22090df750e9SMarc-André Lureau         dev->inflight_info.fd = -1;
22100df750e9SMarc-André Lureau     }
22110df750e9SMarc-André Lureau 
22120df750e9SMarc-André Lureau     vu_close_log(dev);
2213f8ed3648SManos Pitsidianakis     if (dev->backend_fd != -1) {
2214f8ed3648SManos Pitsidianakis         close(dev->backend_fd);
2215f8ed3648SManos Pitsidianakis         dev->backend_fd = -1;
22160df750e9SMarc-André Lureau     }
2217f8ed3648SManos Pitsidianakis     pthread_mutex_destroy(&dev->backend_mutex);
22180df750e9SMarc-André Lureau 
22190df750e9SMarc-André Lureau     if (dev->sock != -1) {
22200df750e9SMarc-André Lureau         close(dev->sock);
22210df750e9SMarc-André Lureau     }
22220df750e9SMarc-André Lureau 
22230df750e9SMarc-André Lureau     free(dev->vq);
22240df750e9SMarc-André Lureau     dev->vq = NULL;
2225d884e272SDavid Hildenbrand     free(dev->regions);
2226d884e272SDavid Hildenbrand     dev->regions = NULL;
22270df750e9SMarc-André Lureau }
22280df750e9SMarc-André Lureau 
22290df750e9SMarc-André Lureau bool
22300df750e9SMarc-André Lureau vu_init(VuDev *dev,
22310df750e9SMarc-André Lureau         uint16_t max_queues,
22320df750e9SMarc-André Lureau         int socket,
22330df750e9SMarc-André Lureau         vu_panic_cb panic,
22340df750e9SMarc-André Lureau         vu_read_msg_cb read_msg,
22350df750e9SMarc-André Lureau         vu_set_watch_cb set_watch,
22360df750e9SMarc-André Lureau         vu_remove_watch_cb remove_watch,
22370df750e9SMarc-André Lureau         const VuDevIface *iface)
22380df750e9SMarc-André Lureau {
22390df750e9SMarc-André Lureau     uint16_t i;
22400df750e9SMarc-André Lureau 
22410df750e9SMarc-André Lureau     assert(max_queues > 0);
22420df750e9SMarc-André Lureau     assert(socket >= 0);
22430df750e9SMarc-André Lureau     assert(set_watch);
22440df750e9SMarc-André Lureau     assert(remove_watch);
22450df750e9SMarc-André Lureau     assert(iface);
22460df750e9SMarc-André Lureau     assert(panic);
22470df750e9SMarc-André Lureau 
22480df750e9SMarc-André Lureau     memset(dev, 0, sizeof(*dev));
22490df750e9SMarc-André Lureau 
22500df750e9SMarc-André Lureau     dev->sock = socket;
22510df750e9SMarc-André Lureau     dev->panic = panic;
22520df750e9SMarc-André Lureau     dev->read_msg = read_msg ? read_msg : vu_message_read_default;
22530df750e9SMarc-André Lureau     dev->set_watch = set_watch;
22540df750e9SMarc-André Lureau     dev->remove_watch = remove_watch;
22550df750e9SMarc-André Lureau     dev->iface = iface;
22560df750e9SMarc-André Lureau     dev->log_call_fd = -1;
2257f8ed3648SManos Pitsidianakis     pthread_mutex_init(&dev->backend_mutex, NULL);
2258f8ed3648SManos Pitsidianakis     dev->backend_fd = -1;
22590df750e9SMarc-André Lureau     dev->max_queues = max_queues;
22600df750e9SMarc-André Lureau 
2261d884e272SDavid Hildenbrand     dev->regions = malloc(VHOST_USER_MAX_RAM_SLOTS * sizeof(dev->regions[0]));
2262d884e272SDavid Hildenbrand     if (!dev->regions) {
2263d884e272SDavid Hildenbrand         DPRINT("%s: failed to malloc mem regions\n", __func__);
2264d884e272SDavid Hildenbrand         return false;
2265d884e272SDavid Hildenbrand     }
2266d884e272SDavid Hildenbrand 
22670df750e9SMarc-André Lureau     dev->vq = malloc(max_queues * sizeof(dev->vq[0]));
22680df750e9SMarc-André Lureau     if (!dev->vq) {
22690df750e9SMarc-André Lureau         DPRINT("%s: failed to malloc virtqueues\n", __func__);
2270d884e272SDavid Hildenbrand         free(dev->regions);
2271d884e272SDavid Hildenbrand         dev->regions = NULL;
22720df750e9SMarc-André Lureau         return false;
22730df750e9SMarc-André Lureau     }
22740df750e9SMarc-André Lureau 
22750df750e9SMarc-André Lureau     for (i = 0; i < max_queues; i++) {
22760df750e9SMarc-André Lureau         dev->vq[i] = (VuVirtq) {
22770df750e9SMarc-André Lureau             .call_fd = -1, .kick_fd = -1, .err_fd = -1,
22780df750e9SMarc-André Lureau             .notification = true,
22790df750e9SMarc-André Lureau         };
22800df750e9SMarc-André Lureau     }
22810df750e9SMarc-André Lureau 
22820df750e9SMarc-André Lureau     return true;
22830df750e9SMarc-André Lureau }
22840df750e9SMarc-André Lureau 
22850df750e9SMarc-André Lureau VuVirtq *
22860df750e9SMarc-André Lureau vu_get_queue(VuDev *dev, int qidx)
22870df750e9SMarc-André Lureau {
22880df750e9SMarc-André Lureau     assert(qidx < dev->max_queues);
22890df750e9SMarc-André Lureau     return &dev->vq[qidx];
22900df750e9SMarc-André Lureau }
22910df750e9SMarc-André Lureau 
22920df750e9SMarc-André Lureau bool
22930df750e9SMarc-André Lureau vu_queue_enabled(VuDev *dev, VuVirtq *vq)
22940df750e9SMarc-André Lureau {
22950df750e9SMarc-André Lureau     return vq->enable;
22960df750e9SMarc-André Lureau }
22970df750e9SMarc-André Lureau 
22980df750e9SMarc-André Lureau bool
22990df750e9SMarc-André Lureau vu_queue_started(const VuDev *dev, const VuVirtq *vq)
23000df750e9SMarc-André Lureau {
23010df750e9SMarc-André Lureau     return vq->started;
23020df750e9SMarc-André Lureau }
23030df750e9SMarc-André Lureau 
23040df750e9SMarc-André Lureau static inline uint16_t
23050df750e9SMarc-André Lureau vring_avail_flags(VuVirtq *vq)
23060df750e9SMarc-André Lureau {
23070df750e9SMarc-André Lureau     return le16toh(vq->vring.avail->flags);
23080df750e9SMarc-André Lureau }
23090df750e9SMarc-André Lureau 
23100df750e9SMarc-André Lureau static inline uint16_t
23110df750e9SMarc-André Lureau vring_avail_idx(VuVirtq *vq)
23120df750e9SMarc-André Lureau {
23130df750e9SMarc-André Lureau     vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
23140df750e9SMarc-André Lureau 
23150df750e9SMarc-André Lureau     return vq->shadow_avail_idx;
23160df750e9SMarc-André Lureau }
23170df750e9SMarc-André Lureau 
23180df750e9SMarc-André Lureau static inline uint16_t
23190df750e9SMarc-André Lureau vring_avail_ring(VuVirtq *vq, int i)
23200df750e9SMarc-André Lureau {
23210df750e9SMarc-André Lureau     return le16toh(vq->vring.avail->ring[i]);
23220df750e9SMarc-André Lureau }
23230df750e9SMarc-André Lureau 
23240df750e9SMarc-André Lureau static inline uint16_t
23250df750e9SMarc-André Lureau vring_get_used_event(VuVirtq *vq)
23260df750e9SMarc-André Lureau {
23270df750e9SMarc-André Lureau     return vring_avail_ring(vq, vq->vring.num);
23280df750e9SMarc-André Lureau }
23290df750e9SMarc-André Lureau 
23300df750e9SMarc-André Lureau static int
23310df750e9SMarc-André Lureau virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
23320df750e9SMarc-André Lureau {
23330df750e9SMarc-André Lureau     uint16_t num_heads = vring_avail_idx(vq) - idx;
23340df750e9SMarc-André Lureau 
23350df750e9SMarc-André Lureau     /* Check it isn't doing very strange things with descriptor numbers. */
23360df750e9SMarc-André Lureau     if (num_heads > vq->vring.num) {
23370df750e9SMarc-André Lureau         vu_panic(dev, "Guest moved used index from %u to %u",
23380df750e9SMarc-André Lureau                  idx, vq->shadow_avail_idx);
23390df750e9SMarc-André Lureau         return -1;
23400df750e9SMarc-André Lureau     }
23410df750e9SMarc-André Lureau     if (num_heads) {
23420df750e9SMarc-André Lureau         /* On success, callers read a descriptor at vq->last_avail_idx.
23430df750e9SMarc-André Lureau          * Make sure descriptor read does not bypass avail index read. */
23440df750e9SMarc-André Lureau         smp_rmb();
23450df750e9SMarc-André Lureau     }
23460df750e9SMarc-André Lureau 
23470df750e9SMarc-André Lureau     return num_heads;
23480df750e9SMarc-André Lureau }
23490df750e9SMarc-André Lureau 
23500df750e9SMarc-André Lureau static bool
23510df750e9SMarc-André Lureau virtqueue_get_head(VuDev *dev, VuVirtq *vq,
23520df750e9SMarc-André Lureau                    unsigned int idx, unsigned int *head)
23530df750e9SMarc-André Lureau {
23540df750e9SMarc-André Lureau     /* Grab the next descriptor number they're advertising, and increment
23550df750e9SMarc-André Lureau      * the index we've seen. */
23560df750e9SMarc-André Lureau     *head = vring_avail_ring(vq, idx % vq->vring.num);
23570df750e9SMarc-André Lureau 
23580df750e9SMarc-André Lureau     /* If their number is silly, that's a fatal mistake. */
23590df750e9SMarc-André Lureau     if (*head >= vq->vring.num) {
23600df750e9SMarc-André Lureau         vu_panic(dev, "Guest says index %u is available", *head);
23610df750e9SMarc-André Lureau         return false;
23620df750e9SMarc-André Lureau     }
23630df750e9SMarc-André Lureau 
23640df750e9SMarc-André Lureau     return true;
23650df750e9SMarc-André Lureau }
23660df750e9SMarc-André Lureau 
23670df750e9SMarc-André Lureau static int
23680df750e9SMarc-André Lureau virtqueue_read_indirect_desc(VuDev *dev, struct vring_desc *desc,
23690df750e9SMarc-André Lureau                              uint64_t addr, size_t len)
23700df750e9SMarc-André Lureau {
23710df750e9SMarc-André Lureau     struct vring_desc *ori_desc;
23720df750e9SMarc-André Lureau     uint64_t read_len;
23730df750e9SMarc-André Lureau 
23740df750e9SMarc-André Lureau     if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc))) {
23750df750e9SMarc-André Lureau         return -1;
23760df750e9SMarc-André Lureau     }
23770df750e9SMarc-André Lureau 
23780df750e9SMarc-André Lureau     if (len == 0) {
23790df750e9SMarc-André Lureau         return -1;
23800df750e9SMarc-André Lureau     }
23810df750e9SMarc-André Lureau 
23820df750e9SMarc-André Lureau     while (len) {
23830df750e9SMarc-André Lureau         read_len = len;
23840df750e9SMarc-André Lureau         ori_desc = vu_gpa_to_va(dev, &read_len, addr);
23850df750e9SMarc-André Lureau         if (!ori_desc) {
23860df750e9SMarc-André Lureau             return -1;
23870df750e9SMarc-André Lureau         }
23880df750e9SMarc-André Lureau 
23890df750e9SMarc-André Lureau         memcpy(desc, ori_desc, read_len);
23900df750e9SMarc-André Lureau         len -= read_len;
23910df750e9SMarc-André Lureau         addr += read_len;
23920df750e9SMarc-André Lureau         desc += read_len;
23930df750e9SMarc-André Lureau     }
23940df750e9SMarc-André Lureau 
23950df750e9SMarc-André Lureau     return 0;
23960df750e9SMarc-André Lureau }
23970df750e9SMarc-André Lureau 
23980df750e9SMarc-André Lureau enum {
23990df750e9SMarc-André Lureau     VIRTQUEUE_READ_DESC_ERROR = -1,
24000df750e9SMarc-André Lureau     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
24010df750e9SMarc-André Lureau     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
24020df750e9SMarc-André Lureau };
24030df750e9SMarc-André Lureau 
24040df750e9SMarc-André Lureau static int
24050df750e9SMarc-André Lureau virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
24060df750e9SMarc-André Lureau                          int i, unsigned int max, unsigned int *next)
24070df750e9SMarc-André Lureau {
24080df750e9SMarc-André Lureau     /* If this descriptor says it doesn't chain, we're done. */
24090df750e9SMarc-André Lureau     if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT)) {
24100df750e9SMarc-André Lureau         return VIRTQUEUE_READ_DESC_DONE;
24110df750e9SMarc-André Lureau     }
24120df750e9SMarc-André Lureau 
24130df750e9SMarc-André Lureau     /* Check they're not leading us off end of descriptors. */
24140df750e9SMarc-André Lureau     *next = le16toh(desc[i].next);
24150df750e9SMarc-André Lureau     /* Make sure compiler knows to grab that: we don't want it changing! */
24160df750e9SMarc-André Lureau     smp_wmb();
24170df750e9SMarc-André Lureau 
24180df750e9SMarc-André Lureau     if (*next >= max) {
24190df750e9SMarc-André Lureau         vu_panic(dev, "Desc next is %u", *next);
24200df750e9SMarc-André Lureau         return VIRTQUEUE_READ_DESC_ERROR;
24210df750e9SMarc-André Lureau     }
24220df750e9SMarc-André Lureau 
24230df750e9SMarc-André Lureau     return VIRTQUEUE_READ_DESC_MORE;
24240df750e9SMarc-André Lureau }
24250df750e9SMarc-André Lureau 
24260df750e9SMarc-André Lureau void
24270df750e9SMarc-André Lureau vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
24280df750e9SMarc-André Lureau                          unsigned int *out_bytes,
24290df750e9SMarc-André Lureau                          unsigned max_in_bytes, unsigned max_out_bytes)
24300df750e9SMarc-André Lureau {
24310df750e9SMarc-André Lureau     unsigned int idx;
24320df750e9SMarc-André Lureau     unsigned int total_bufs, in_total, out_total;
24330df750e9SMarc-André Lureau     int rc;
24340df750e9SMarc-André Lureau 
24350df750e9SMarc-André Lureau     idx = vq->last_avail_idx;
24360df750e9SMarc-André Lureau 
24370df750e9SMarc-André Lureau     total_bufs = in_total = out_total = 0;
24382a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
24390df750e9SMarc-André Lureau         goto done;
24400df750e9SMarc-André Lureau     }
24410df750e9SMarc-André Lureau 
24420df750e9SMarc-André Lureau     while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
24430df750e9SMarc-André Lureau         unsigned int max, desc_len, num_bufs, indirect = 0;
24440df750e9SMarc-André Lureau         uint64_t desc_addr, read_len;
24450df750e9SMarc-André Lureau         struct vring_desc *desc;
24460df750e9SMarc-André Lureau         struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
24470df750e9SMarc-André Lureau         unsigned int i;
24480df750e9SMarc-André Lureau 
24490df750e9SMarc-André Lureau         max = vq->vring.num;
24500df750e9SMarc-André Lureau         num_bufs = total_bufs;
24510df750e9SMarc-André Lureau         if (!virtqueue_get_head(dev, vq, idx++, &i)) {
24520df750e9SMarc-André Lureau             goto err;
24530df750e9SMarc-André Lureau         }
24540df750e9SMarc-André Lureau         desc = vq->vring.desc;
24550df750e9SMarc-André Lureau 
24560df750e9SMarc-André Lureau         if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
24570df750e9SMarc-André Lureau             if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
24580df750e9SMarc-André Lureau                 vu_panic(dev, "Invalid size for indirect buffer table");
24590df750e9SMarc-André Lureau                 goto err;
24600df750e9SMarc-André Lureau             }
24610df750e9SMarc-André Lureau 
24620df750e9SMarc-André Lureau             /* If we've got too many, that implies a descriptor loop. */
24630df750e9SMarc-André Lureau             if (num_bufs >= max) {
24640df750e9SMarc-André Lureau                 vu_panic(dev, "Looped descriptor");
24650df750e9SMarc-André Lureau                 goto err;
24660df750e9SMarc-André Lureau             }
24670df750e9SMarc-André Lureau 
24680df750e9SMarc-André Lureau             /* loop over the indirect descriptor table */
24690df750e9SMarc-André Lureau             indirect = 1;
24700df750e9SMarc-André Lureau             desc_addr = le64toh(desc[i].addr);
24710df750e9SMarc-André Lureau             desc_len = le32toh(desc[i].len);
24720df750e9SMarc-André Lureau             max = desc_len / sizeof(struct vring_desc);
24730df750e9SMarc-André Lureau             read_len = desc_len;
24740df750e9SMarc-André Lureau             desc = vu_gpa_to_va(dev, &read_len, desc_addr);
24750df750e9SMarc-André Lureau             if (unlikely(desc && read_len != desc_len)) {
24760df750e9SMarc-André Lureau                 /* Failed to use zero copy */
24770df750e9SMarc-André Lureau                 desc = NULL;
24780df750e9SMarc-André Lureau                 if (!virtqueue_read_indirect_desc(dev, desc_buf,
24790df750e9SMarc-André Lureau                                                   desc_addr,
24800df750e9SMarc-André Lureau                                                   desc_len)) {
24810df750e9SMarc-André Lureau                     desc = desc_buf;
24820df750e9SMarc-André Lureau                 }
24830df750e9SMarc-André Lureau             }
24840df750e9SMarc-André Lureau             if (!desc) {
24850df750e9SMarc-André Lureau                 vu_panic(dev, "Invalid indirect buffer table");
24860df750e9SMarc-André Lureau                 goto err;
24870df750e9SMarc-André Lureau             }
24880df750e9SMarc-André Lureau             num_bufs = i = 0;
24890df750e9SMarc-André Lureau         }
24900df750e9SMarc-André Lureau 
24910df750e9SMarc-André Lureau         do {
24920df750e9SMarc-André Lureau             /* If we've got too many, that implies a descriptor loop. */
24930df750e9SMarc-André Lureau             if (++num_bufs > max) {
24940df750e9SMarc-André Lureau                 vu_panic(dev, "Looped descriptor");
24950df750e9SMarc-André Lureau                 goto err;
24960df750e9SMarc-André Lureau             }
24970df750e9SMarc-André Lureau 
24980df750e9SMarc-André Lureau             if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
24990df750e9SMarc-André Lureau                 in_total += le32toh(desc[i].len);
25000df750e9SMarc-André Lureau             } else {
25010df750e9SMarc-André Lureau                 out_total += le32toh(desc[i].len);
25020df750e9SMarc-André Lureau             }
25030df750e9SMarc-André Lureau             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
25040df750e9SMarc-André Lureau                 goto done;
25050df750e9SMarc-André Lureau             }
25060df750e9SMarc-André Lureau             rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
25070df750e9SMarc-André Lureau         } while (rc == VIRTQUEUE_READ_DESC_MORE);
25080df750e9SMarc-André Lureau 
25090df750e9SMarc-André Lureau         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
25100df750e9SMarc-André Lureau             goto err;
25110df750e9SMarc-André Lureau         }
25120df750e9SMarc-André Lureau 
25130df750e9SMarc-André Lureau         if (!indirect) {
25140df750e9SMarc-André Lureau             total_bufs = num_bufs;
25150df750e9SMarc-André Lureau         } else {
25160df750e9SMarc-André Lureau             total_bufs++;
25170df750e9SMarc-André Lureau         }
25180df750e9SMarc-André Lureau     }
25190df750e9SMarc-André Lureau     if (rc < 0) {
25200df750e9SMarc-André Lureau         goto err;
25210df750e9SMarc-André Lureau     }
25220df750e9SMarc-André Lureau done:
25230df750e9SMarc-André Lureau     if (in_bytes) {
25240df750e9SMarc-André Lureau         *in_bytes = in_total;
25250df750e9SMarc-André Lureau     }
25260df750e9SMarc-André Lureau     if (out_bytes) {
25270df750e9SMarc-André Lureau         *out_bytes = out_total;
25280df750e9SMarc-André Lureau     }
25290df750e9SMarc-André Lureau     return;
25300df750e9SMarc-André Lureau 
25310df750e9SMarc-André Lureau err:
25320df750e9SMarc-André Lureau     in_total = out_total = 0;
25330df750e9SMarc-André Lureau     goto done;
25340df750e9SMarc-André Lureau }
25350df750e9SMarc-André Lureau 
25360df750e9SMarc-André Lureau bool
25370df750e9SMarc-André Lureau vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
25380df750e9SMarc-André Lureau                      unsigned int out_bytes)
25390df750e9SMarc-André Lureau {
25400df750e9SMarc-André Lureau     unsigned int in_total, out_total;
25410df750e9SMarc-André Lureau 
25420df750e9SMarc-André Lureau     vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total,
25430df750e9SMarc-André Lureau                              in_bytes, out_bytes);
25440df750e9SMarc-André Lureau 
25450df750e9SMarc-André Lureau     return in_bytes <= in_total && out_bytes <= out_total;
25460df750e9SMarc-André Lureau }
25470df750e9SMarc-André Lureau 
25480df750e9SMarc-André Lureau /* Fetch avail_idx from VQ memory only when we really need to know if
25490df750e9SMarc-André Lureau  * guest has added some buffers. */
25500df750e9SMarc-André Lureau bool
25510df750e9SMarc-André Lureau vu_queue_empty(VuDev *dev, VuVirtq *vq)
25520df750e9SMarc-André Lureau {
25532a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
25540df750e9SMarc-André Lureau         return true;
25550df750e9SMarc-André Lureau     }
25560df750e9SMarc-André Lureau 
25570df750e9SMarc-André Lureau     if (vq->shadow_avail_idx != vq->last_avail_idx) {
25580df750e9SMarc-André Lureau         return false;
25590df750e9SMarc-André Lureau     }
25600df750e9SMarc-André Lureau 
25610df750e9SMarc-André Lureau     return vring_avail_idx(vq) == vq->last_avail_idx;
25620df750e9SMarc-André Lureau }
25630df750e9SMarc-André Lureau 
25640df750e9SMarc-André Lureau static bool
25650df750e9SMarc-André Lureau vring_notify(VuDev *dev, VuVirtq *vq)
25660df750e9SMarc-André Lureau {
25670df750e9SMarc-André Lureau     uint16_t old, new;
25680df750e9SMarc-André Lureau     bool v;
25690df750e9SMarc-André Lureau 
25700df750e9SMarc-André Lureau     /* We need to expose used array entries before checking used event. */
25710df750e9SMarc-André Lureau     smp_mb();
25720df750e9SMarc-André Lureau 
25730df750e9SMarc-André Lureau     /* Always notify when queue is empty (when feature acknowledge) */
25740df750e9SMarc-André Lureau     if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
25750df750e9SMarc-André Lureau         !vq->inuse && vu_queue_empty(dev, vq)) {
25760df750e9SMarc-André Lureau         return true;
25770df750e9SMarc-André Lureau     }
25780df750e9SMarc-André Lureau 
25790df750e9SMarc-André Lureau     if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
25800df750e9SMarc-André Lureau         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
25810df750e9SMarc-André Lureau     }
25820df750e9SMarc-André Lureau 
25830df750e9SMarc-André Lureau     v = vq->signalled_used_valid;
25840df750e9SMarc-André Lureau     vq->signalled_used_valid = true;
25850df750e9SMarc-André Lureau     old = vq->signalled_used;
25860df750e9SMarc-André Lureau     new = vq->signalled_used = vq->used_idx;
25870df750e9SMarc-André Lureau     return !v || vring_need_event(vring_get_used_event(vq), new, old);
25880df750e9SMarc-André Lureau }
25890df750e9SMarc-André Lureau 
25900df750e9SMarc-André Lureau static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
25910df750e9SMarc-André Lureau {
25922a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
25930df750e9SMarc-André Lureau         return;
25940df750e9SMarc-André Lureau     }
25950df750e9SMarc-André Lureau 
25960df750e9SMarc-André Lureau     if (!vring_notify(dev, vq)) {
25970df750e9SMarc-André Lureau         DPRINT("skipped notify...\n");
25980df750e9SMarc-André Lureau         return;
25990df750e9SMarc-André Lureau     }
26000df750e9SMarc-André Lureau 
26010df750e9SMarc-André Lureau     if (vq->call_fd < 0 &&
26020df750e9SMarc-André Lureau         vu_has_protocol_feature(dev,
26030df750e9SMarc-André Lureau                                 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
2604e608feedSMaxime Coquelin         vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_REQ)) {
26050df750e9SMarc-André Lureau         VhostUserMsg vmsg = {
2606e608feedSMaxime Coquelin             .request = VHOST_USER_BACKEND_VRING_CALL,
26070df750e9SMarc-André Lureau             .flags = VHOST_USER_VERSION,
26080df750e9SMarc-André Lureau             .size = sizeof(vmsg.payload.state),
26090df750e9SMarc-André Lureau             .payload.state = {
26100df750e9SMarc-André Lureau                 .index = vq - dev->vq,
26110df750e9SMarc-André Lureau             },
26120df750e9SMarc-André Lureau         };
26130df750e9SMarc-André Lureau         bool ack = sync &&
26140df750e9SMarc-André Lureau                    vu_has_protocol_feature(dev,
26150df750e9SMarc-André Lureau                                            VHOST_USER_PROTOCOL_F_REPLY_ACK);
26160df750e9SMarc-André Lureau 
26170df750e9SMarc-André Lureau         if (ack) {
26180df750e9SMarc-André Lureau             vmsg.flags |= VHOST_USER_NEED_REPLY_MASK;
26190df750e9SMarc-André Lureau         }
26200df750e9SMarc-André Lureau 
2621f8ed3648SManos Pitsidianakis         vu_message_write(dev, dev->backend_fd, &vmsg);
26220df750e9SMarc-André Lureau         if (ack) {
2623f8ed3648SManos Pitsidianakis             vu_message_read_default(dev, dev->backend_fd, &vmsg);
26240df750e9SMarc-André Lureau         }
26250df750e9SMarc-André Lureau         return;
26260df750e9SMarc-André Lureau     }
26270df750e9SMarc-André Lureau 
26280df750e9SMarc-André Lureau     if (eventfd_write(vq->call_fd, 1) < 0) {
26290df750e9SMarc-André Lureau         vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
26300df750e9SMarc-André Lureau     }
26310df750e9SMarc-André Lureau }
26320df750e9SMarc-André Lureau 
26330df750e9SMarc-André Lureau void vu_queue_notify(VuDev *dev, VuVirtq *vq)
26340df750e9SMarc-André Lureau {
26350df750e9SMarc-André Lureau     _vu_queue_notify(dev, vq, false);
26360df750e9SMarc-André Lureau }
26370df750e9SMarc-André Lureau 
26380df750e9SMarc-André Lureau void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq)
26390df750e9SMarc-André Lureau {
26400df750e9SMarc-André Lureau     _vu_queue_notify(dev, vq, true);
26410df750e9SMarc-André Lureau }
26420df750e9SMarc-André Lureau 
2643ca858a5fSVladimir Sementsov-Ogievskiy void vu_config_change_msg(VuDev *dev)
2644ca858a5fSVladimir Sementsov-Ogievskiy {
2645ca858a5fSVladimir Sementsov-Ogievskiy     VhostUserMsg vmsg = {
2646ca858a5fSVladimir Sementsov-Ogievskiy         .request = VHOST_USER_BACKEND_CONFIG_CHANGE_MSG,
2647ca858a5fSVladimir Sementsov-Ogievskiy         .flags = VHOST_USER_VERSION,
2648ca858a5fSVladimir Sementsov-Ogievskiy     };
2649ca858a5fSVladimir Sementsov-Ogievskiy 
2650f8ed3648SManos Pitsidianakis     vu_message_write(dev, dev->backend_fd, &vmsg);
2651ca858a5fSVladimir Sementsov-Ogievskiy }
2652ca858a5fSVladimir Sementsov-Ogievskiy 
26530df750e9SMarc-André Lureau static inline void
26540df750e9SMarc-André Lureau vring_used_flags_set_bit(VuVirtq *vq, int mask)
26550df750e9SMarc-André Lureau {
26560df750e9SMarc-André Lureau     uint16_t *flags;
26570df750e9SMarc-André Lureau 
26580df750e9SMarc-André Lureau     flags = (uint16_t *)((char*)vq->vring.used +
26590df750e9SMarc-André Lureau                          offsetof(struct vring_used, flags));
26600df750e9SMarc-André Lureau     *flags = htole16(le16toh(*flags) | mask);
26610df750e9SMarc-André Lureau }
26620df750e9SMarc-André Lureau 
26630df750e9SMarc-André Lureau static inline void
26640df750e9SMarc-André Lureau vring_used_flags_unset_bit(VuVirtq *vq, int mask)
26650df750e9SMarc-André Lureau {
26660df750e9SMarc-André Lureau     uint16_t *flags;
26670df750e9SMarc-André Lureau 
26680df750e9SMarc-André Lureau     flags = (uint16_t *)((char*)vq->vring.used +
26690df750e9SMarc-André Lureau                          offsetof(struct vring_used, flags));
26700df750e9SMarc-André Lureau     *flags = htole16(le16toh(*flags) & ~mask);
26710df750e9SMarc-André Lureau }
26720df750e9SMarc-André Lureau 
26730df750e9SMarc-André Lureau static inline void
26740df750e9SMarc-André Lureau vring_set_avail_event(VuVirtq *vq, uint16_t val)
26750df750e9SMarc-André Lureau {
2676950a2f2eSMarcel Holtmann     uint16_t val_le = htole16(val);
26770df750e9SMarc-André Lureau 
26780df750e9SMarc-André Lureau     if (!vq->notification) {
26790df750e9SMarc-André Lureau         return;
26800df750e9SMarc-André Lureau     }
26810df750e9SMarc-André Lureau 
2682950a2f2eSMarcel Holtmann     memcpy(&vq->vring.used->ring[vq->vring.num], &val_le, sizeof(uint16_t));
26830df750e9SMarc-André Lureau }
26840df750e9SMarc-André Lureau 
26850df750e9SMarc-André Lureau void
26860df750e9SMarc-André Lureau vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
26870df750e9SMarc-André Lureau {
26880df750e9SMarc-André Lureau     vq->notification = enable;
26890df750e9SMarc-André Lureau     if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
26900df750e9SMarc-André Lureau         vring_set_avail_event(vq, vring_avail_idx(vq));
26910df750e9SMarc-André Lureau     } else if (enable) {
26920df750e9SMarc-André Lureau         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
26930df750e9SMarc-André Lureau     } else {
26940df750e9SMarc-André Lureau         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
26950df750e9SMarc-André Lureau     }
26960df750e9SMarc-André Lureau     if (enable) {
26970df750e9SMarc-André Lureau         /* Expose avail event/used flags before caller checks the avail idx. */
26980df750e9SMarc-André Lureau         smp_mb();
26990df750e9SMarc-André Lureau     }
27000df750e9SMarc-André Lureau }
27010df750e9SMarc-André Lureau 
27020df750e9SMarc-André Lureau static bool
27030df750e9SMarc-André Lureau virtqueue_map_desc(VuDev *dev,
27040df750e9SMarc-André Lureau                    unsigned int *p_num_sg, struct iovec *iov,
27050df750e9SMarc-André Lureau                    unsigned int max_num_sg, bool is_write,
27060df750e9SMarc-André Lureau                    uint64_t pa, size_t sz)
27070df750e9SMarc-André Lureau {
27080df750e9SMarc-André Lureau     unsigned num_sg = *p_num_sg;
27090df750e9SMarc-André Lureau 
27100df750e9SMarc-André Lureau     assert(num_sg <= max_num_sg);
27110df750e9SMarc-André Lureau 
27120df750e9SMarc-André Lureau     if (!sz) {
27130df750e9SMarc-André Lureau         vu_panic(dev, "virtio: zero sized buffers are not allowed");
27140df750e9SMarc-André Lureau         return false;
27150df750e9SMarc-André Lureau     }
27160df750e9SMarc-André Lureau 
27170df750e9SMarc-André Lureau     while (sz) {
27180df750e9SMarc-André Lureau         uint64_t len = sz;
27190df750e9SMarc-André Lureau 
27200df750e9SMarc-André Lureau         if (num_sg == max_num_sg) {
27210df750e9SMarc-André Lureau             vu_panic(dev, "virtio: too many descriptors in indirect table");
27220df750e9SMarc-André Lureau             return false;
27230df750e9SMarc-André Lureau         }
27240df750e9SMarc-André Lureau 
27250df750e9SMarc-André Lureau         iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa);
27260df750e9SMarc-André Lureau         if (iov[num_sg].iov_base == NULL) {
27270df750e9SMarc-André Lureau             vu_panic(dev, "virtio: invalid address for buffers");
27280df750e9SMarc-André Lureau             return false;
27290df750e9SMarc-André Lureau         }
27300df750e9SMarc-André Lureau         iov[num_sg].iov_len = len;
27310df750e9SMarc-André Lureau         num_sg++;
27320df750e9SMarc-André Lureau         sz -= len;
27330df750e9SMarc-André Lureau         pa += len;
27340df750e9SMarc-André Lureau     }
27350df750e9SMarc-André Lureau 
27360df750e9SMarc-André Lureau     *p_num_sg = num_sg;
27370df750e9SMarc-André Lureau     return true;
27380df750e9SMarc-André Lureau }
27390df750e9SMarc-André Lureau 
27400df750e9SMarc-André Lureau static void *
27410df750e9SMarc-André Lureau virtqueue_alloc_element(size_t sz,
27420df750e9SMarc-André Lureau                                      unsigned out_num, unsigned in_num)
27430df750e9SMarc-André Lureau {
27440df750e9SMarc-André Lureau     VuVirtqElement *elem;
27450df750e9SMarc-André Lureau     size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
27460df750e9SMarc-André Lureau     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
27470df750e9SMarc-André Lureau     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
27480df750e9SMarc-André Lureau 
27490df750e9SMarc-André Lureau     assert(sz >= sizeof(VuVirtqElement));
27500df750e9SMarc-André Lureau     elem = malloc(out_sg_end);
27519c191605SCarlos López     if (!elem) {
27529c191605SCarlos López         DPRINT("%s: failed to malloc virtqueue element\n", __func__);
27539c191605SCarlos López         return NULL;
27549c191605SCarlos López     }
27550df750e9SMarc-André Lureau     elem->out_num = out_num;
27560df750e9SMarc-André Lureau     elem->in_num = in_num;
27570df750e9SMarc-André Lureau     elem->in_sg = (void *)elem + in_sg_ofs;
27580df750e9SMarc-André Lureau     elem->out_sg = (void *)elem + out_sg_ofs;
27590df750e9SMarc-André Lureau     return elem;
27600df750e9SMarc-André Lureau }
27610df750e9SMarc-André Lureau 
27620df750e9SMarc-André Lureau static void *
27630df750e9SMarc-André Lureau vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
27640df750e9SMarc-André Lureau {
27650df750e9SMarc-André Lureau     struct vring_desc *desc = vq->vring.desc;
27660df750e9SMarc-André Lureau     uint64_t desc_addr, read_len;
27670df750e9SMarc-André Lureau     unsigned int desc_len;
27680df750e9SMarc-André Lureau     unsigned int max = vq->vring.num;
27690df750e9SMarc-André Lureau     unsigned int i = idx;
27700df750e9SMarc-André Lureau     VuVirtqElement *elem;
27710df750e9SMarc-André Lureau     unsigned int out_num = 0, in_num = 0;
27720df750e9SMarc-André Lureau     struct iovec iov[VIRTQUEUE_MAX_SIZE];
27730df750e9SMarc-André Lureau     struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
27740df750e9SMarc-André Lureau     int rc;
27750df750e9SMarc-André Lureau 
27760df750e9SMarc-André Lureau     if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
27770df750e9SMarc-André Lureau         if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
27780df750e9SMarc-André Lureau             vu_panic(dev, "Invalid size for indirect buffer table");
27790df750e9SMarc-André Lureau             return NULL;
27800df750e9SMarc-André Lureau         }
27810df750e9SMarc-André Lureau 
27820df750e9SMarc-André Lureau         /* loop over the indirect descriptor table */
27830df750e9SMarc-André Lureau         desc_addr = le64toh(desc[i].addr);
27840df750e9SMarc-André Lureau         desc_len = le32toh(desc[i].len);
27850df750e9SMarc-André Lureau         max = desc_len / sizeof(struct vring_desc);
27860df750e9SMarc-André Lureau         read_len = desc_len;
27870df750e9SMarc-André Lureau         desc = vu_gpa_to_va(dev, &read_len, desc_addr);
27880df750e9SMarc-André Lureau         if (unlikely(desc && read_len != desc_len)) {
27890df750e9SMarc-André Lureau             /* Failed to use zero copy */
27900df750e9SMarc-André Lureau             desc = NULL;
27910df750e9SMarc-André Lureau             if (!virtqueue_read_indirect_desc(dev, desc_buf,
27920df750e9SMarc-André Lureau                                               desc_addr,
27930df750e9SMarc-André Lureau                                               desc_len)) {
27940df750e9SMarc-André Lureau                 desc = desc_buf;
27950df750e9SMarc-André Lureau             }
27960df750e9SMarc-André Lureau         }
27970df750e9SMarc-André Lureau         if (!desc) {
27980df750e9SMarc-André Lureau             vu_panic(dev, "Invalid indirect buffer table");
27990df750e9SMarc-André Lureau             return NULL;
28000df750e9SMarc-André Lureau         }
28010df750e9SMarc-André Lureau         i = 0;
28020df750e9SMarc-André Lureau     }
28030df750e9SMarc-André Lureau 
28040df750e9SMarc-André Lureau     /* Collect all the descriptors */
28050df750e9SMarc-André Lureau     do {
28060df750e9SMarc-André Lureau         if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
28070df750e9SMarc-André Lureau             if (!virtqueue_map_desc(dev, &in_num, iov + out_num,
28080df750e9SMarc-André Lureau                                VIRTQUEUE_MAX_SIZE - out_num, true,
28090df750e9SMarc-André Lureau                                le64toh(desc[i].addr),
28100df750e9SMarc-André Lureau                                le32toh(desc[i].len))) {
28110df750e9SMarc-André Lureau                 return NULL;
28120df750e9SMarc-André Lureau             }
28130df750e9SMarc-André Lureau         } else {
28140df750e9SMarc-André Lureau             if (in_num) {
28150df750e9SMarc-André Lureau                 vu_panic(dev, "Incorrect order for descriptors");
28160df750e9SMarc-André Lureau                 return NULL;
28170df750e9SMarc-André Lureau             }
28180df750e9SMarc-André Lureau             if (!virtqueue_map_desc(dev, &out_num, iov,
28190df750e9SMarc-André Lureau                                VIRTQUEUE_MAX_SIZE, false,
28200df750e9SMarc-André Lureau                                le64toh(desc[i].addr),
28210df750e9SMarc-André Lureau                                le32toh(desc[i].len))) {
28220df750e9SMarc-André Lureau                 return NULL;
28230df750e9SMarc-André Lureau             }
28240df750e9SMarc-André Lureau         }
28250df750e9SMarc-André Lureau 
28260df750e9SMarc-André Lureau         /* If we've got too many, that implies a descriptor loop. */
28270df750e9SMarc-André Lureau         if ((in_num + out_num) > max) {
28280df750e9SMarc-André Lureau             vu_panic(dev, "Looped descriptor");
28290df750e9SMarc-André Lureau             return NULL;
28300df750e9SMarc-André Lureau         }
28310df750e9SMarc-André Lureau         rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
28320df750e9SMarc-André Lureau     } while (rc == VIRTQUEUE_READ_DESC_MORE);
28330df750e9SMarc-André Lureau 
28340df750e9SMarc-André Lureau     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
28350df750e9SMarc-André Lureau         vu_panic(dev, "read descriptor error");
28360df750e9SMarc-André Lureau         return NULL;
28370df750e9SMarc-André Lureau     }
28380df750e9SMarc-André Lureau 
28390df750e9SMarc-André Lureau     /* Now copy what we have collected and mapped */
28400df750e9SMarc-André Lureau     elem = virtqueue_alloc_element(sz, out_num, in_num);
28419c191605SCarlos López     if (!elem) {
28429c191605SCarlos López         return NULL;
28439c191605SCarlos López     }
28440df750e9SMarc-André Lureau     elem->index = idx;
28450df750e9SMarc-André Lureau     for (i = 0; i < out_num; i++) {
28460df750e9SMarc-André Lureau         elem->out_sg[i] = iov[i];
28470df750e9SMarc-André Lureau     }
28480df750e9SMarc-André Lureau     for (i = 0; i < in_num; i++) {
28490df750e9SMarc-André Lureau         elem->in_sg[i] = iov[out_num + i];
28500df750e9SMarc-André Lureau     }
28510df750e9SMarc-André Lureau 
28520df750e9SMarc-André Lureau     return elem;
28530df750e9SMarc-André Lureau }
28540df750e9SMarc-André Lureau 
28550df750e9SMarc-André Lureau static int
28560df750e9SMarc-André Lureau vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
28570df750e9SMarc-André Lureau {
28580df750e9SMarc-André Lureau     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
28590df750e9SMarc-André Lureau         return 0;
28600df750e9SMarc-André Lureau     }
28610df750e9SMarc-André Lureau 
28620df750e9SMarc-André Lureau     if (unlikely(!vq->inflight)) {
28630df750e9SMarc-André Lureau         return -1;
28640df750e9SMarc-André Lureau     }
28650df750e9SMarc-André Lureau 
28660df750e9SMarc-André Lureau     vq->inflight->desc[desc_idx].counter = vq->counter++;
28670df750e9SMarc-André Lureau     vq->inflight->desc[desc_idx].inflight = 1;
28680df750e9SMarc-André Lureau 
28690df750e9SMarc-André Lureau     return 0;
28700df750e9SMarc-André Lureau }
28710df750e9SMarc-André Lureau 
28720df750e9SMarc-André Lureau static int
28730df750e9SMarc-André Lureau vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
28740df750e9SMarc-André Lureau {
28750df750e9SMarc-André Lureau     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
28760df750e9SMarc-André Lureau         return 0;
28770df750e9SMarc-André Lureau     }
28780df750e9SMarc-André Lureau 
28790df750e9SMarc-André Lureau     if (unlikely(!vq->inflight)) {
28800df750e9SMarc-André Lureau         return -1;
28810df750e9SMarc-André Lureau     }
28820df750e9SMarc-André Lureau 
28830df750e9SMarc-André Lureau     vq->inflight->last_batch_head = desc_idx;
28840df750e9SMarc-André Lureau 
28850df750e9SMarc-André Lureau     return 0;
28860df750e9SMarc-André Lureau }
28870df750e9SMarc-André Lureau 
28880df750e9SMarc-André Lureau static int
28890df750e9SMarc-André Lureau vu_queue_inflight_post_put(VuDev *dev, VuVirtq *vq, int desc_idx)
28900df750e9SMarc-André Lureau {
28910df750e9SMarc-André Lureau     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
28920df750e9SMarc-André Lureau         return 0;
28930df750e9SMarc-André Lureau     }
28940df750e9SMarc-André Lureau 
28950df750e9SMarc-André Lureau     if (unlikely(!vq->inflight)) {
28960df750e9SMarc-André Lureau         return -1;
28970df750e9SMarc-André Lureau     }
28980df750e9SMarc-André Lureau 
28990df750e9SMarc-André Lureau     barrier();
29000df750e9SMarc-André Lureau 
29010df750e9SMarc-André Lureau     vq->inflight->desc[desc_idx].inflight = 0;
29020df750e9SMarc-André Lureau 
29030df750e9SMarc-André Lureau     barrier();
29040df750e9SMarc-André Lureau 
29050df750e9SMarc-André Lureau     vq->inflight->used_idx = vq->used_idx;
29060df750e9SMarc-André Lureau 
29070df750e9SMarc-André Lureau     return 0;
29080df750e9SMarc-André Lureau }
29090df750e9SMarc-André Lureau 
29100df750e9SMarc-André Lureau void *
29110df750e9SMarc-André Lureau vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
29120df750e9SMarc-André Lureau {
29130df750e9SMarc-André Lureau     int i;
29140df750e9SMarc-André Lureau     unsigned int head;
29150df750e9SMarc-André Lureau     VuVirtqElement *elem;
29160df750e9SMarc-André Lureau 
29172a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
29180df750e9SMarc-André Lureau         return NULL;
29190df750e9SMarc-André Lureau     }
29200df750e9SMarc-André Lureau 
29210df750e9SMarc-André Lureau     if (unlikely(vq->resubmit_list && vq->resubmit_num > 0)) {
29220df750e9SMarc-André Lureau         i = (--vq->resubmit_num);
29230df750e9SMarc-André Lureau         elem = vu_queue_map_desc(dev, vq, vq->resubmit_list[i].index, sz);
29240df750e9SMarc-André Lureau 
29250df750e9SMarc-André Lureau         if (!vq->resubmit_num) {
29260df750e9SMarc-André Lureau             free(vq->resubmit_list);
29270df750e9SMarc-André Lureau             vq->resubmit_list = NULL;
29280df750e9SMarc-André Lureau         }
29290df750e9SMarc-André Lureau 
29300df750e9SMarc-André Lureau         return elem;
29310df750e9SMarc-André Lureau     }
29320df750e9SMarc-André Lureau 
29330df750e9SMarc-André Lureau     if (vu_queue_empty(dev, vq)) {
29340df750e9SMarc-André Lureau         return NULL;
29350df750e9SMarc-André Lureau     }
29360df750e9SMarc-André Lureau     /*
29370df750e9SMarc-André Lureau      * Needed after virtio_queue_empty(), see comment in
29380df750e9SMarc-André Lureau      * virtqueue_num_heads().
29390df750e9SMarc-André Lureau      */
29400df750e9SMarc-André Lureau     smp_rmb();
29410df750e9SMarc-André Lureau 
29420df750e9SMarc-André Lureau     if (vq->inuse >= vq->vring.num) {
29430df750e9SMarc-André Lureau         vu_panic(dev, "Virtqueue size exceeded");
29440df750e9SMarc-André Lureau         return NULL;
29450df750e9SMarc-André Lureau     }
29460df750e9SMarc-André Lureau 
29470df750e9SMarc-André Lureau     if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
29480df750e9SMarc-André Lureau         return NULL;
29490df750e9SMarc-André Lureau     }
29500df750e9SMarc-André Lureau 
29510df750e9SMarc-André Lureau     if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
29520df750e9SMarc-André Lureau         vring_set_avail_event(vq, vq->last_avail_idx);
29530df750e9SMarc-André Lureau     }
29540df750e9SMarc-André Lureau 
29550df750e9SMarc-André Lureau     elem = vu_queue_map_desc(dev, vq, head, sz);
29560df750e9SMarc-André Lureau 
29570df750e9SMarc-André Lureau     if (!elem) {
29580df750e9SMarc-André Lureau         return NULL;
29590df750e9SMarc-André Lureau     }
29600df750e9SMarc-André Lureau 
29610df750e9SMarc-André Lureau     vq->inuse++;
29620df750e9SMarc-André Lureau 
29630df750e9SMarc-André Lureau     vu_queue_inflight_get(dev, vq, head);
29640df750e9SMarc-André Lureau 
29650df750e9SMarc-André Lureau     return elem;
29660df750e9SMarc-André Lureau }
29670df750e9SMarc-André Lureau 
29680df750e9SMarc-André Lureau static void
29690df750e9SMarc-André Lureau vu_queue_detach_element(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
29700df750e9SMarc-André Lureau                         size_t len)
29710df750e9SMarc-André Lureau {
29720df750e9SMarc-André Lureau     vq->inuse--;
29730df750e9SMarc-André Lureau     /* unmap, when DMA support is added */
29740df750e9SMarc-André Lureau }
29750df750e9SMarc-André Lureau 
29760df750e9SMarc-André Lureau void
29770df750e9SMarc-André Lureau vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
29780df750e9SMarc-André Lureau                size_t len)
29790df750e9SMarc-André Lureau {
29800df750e9SMarc-André Lureau     vq->last_avail_idx--;
29810df750e9SMarc-André Lureau     vu_queue_detach_element(dev, vq, elem, len);
29820df750e9SMarc-André Lureau }
29830df750e9SMarc-André Lureau 
29840df750e9SMarc-André Lureau bool
29850df750e9SMarc-André Lureau vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
29860df750e9SMarc-André Lureau {
29870df750e9SMarc-André Lureau     if (num > vq->inuse) {
29880df750e9SMarc-André Lureau         return false;
29890df750e9SMarc-André Lureau     }
29900df750e9SMarc-André Lureau     vq->last_avail_idx -= num;
29910df750e9SMarc-André Lureau     vq->inuse -= num;
29920df750e9SMarc-André Lureau     return true;
29930df750e9SMarc-André Lureau }
29940df750e9SMarc-André Lureau 
29950df750e9SMarc-André Lureau static inline
29960df750e9SMarc-André Lureau void vring_used_write(VuDev *dev, VuVirtq *vq,
29970df750e9SMarc-André Lureau                       struct vring_used_elem *uelem, int i)
29980df750e9SMarc-André Lureau {
29990df750e9SMarc-André Lureau     struct vring_used *used = vq->vring.used;
30000df750e9SMarc-André Lureau 
30010df750e9SMarc-André Lureau     used->ring[i] = *uelem;
30020df750e9SMarc-André Lureau     vu_log_write(dev, vq->vring.log_guest_addr +
30030df750e9SMarc-André Lureau                  offsetof(struct vring_used, ring[i]),
30040df750e9SMarc-André Lureau                  sizeof(used->ring[i]));
30050df750e9SMarc-André Lureau }
30060df750e9SMarc-André Lureau 
30070df750e9SMarc-André Lureau 
30080df750e9SMarc-André Lureau static void
30090df750e9SMarc-André Lureau vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
30100df750e9SMarc-André Lureau                   const VuVirtqElement *elem,
30110df750e9SMarc-André Lureau                   unsigned int len)
30120df750e9SMarc-André Lureau {
30130df750e9SMarc-André Lureau     struct vring_desc *desc = vq->vring.desc;
30140df750e9SMarc-André Lureau     unsigned int i, max, min, desc_len;
30150df750e9SMarc-André Lureau     uint64_t desc_addr, read_len;
30160df750e9SMarc-André Lureau     struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
30170df750e9SMarc-André Lureau     unsigned num_bufs = 0;
30180df750e9SMarc-André Lureau 
30190df750e9SMarc-André Lureau     max = vq->vring.num;
30200df750e9SMarc-André Lureau     i = elem->index;
30210df750e9SMarc-André Lureau 
30220df750e9SMarc-André Lureau     if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
30230df750e9SMarc-André Lureau         if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
30240df750e9SMarc-André Lureau             vu_panic(dev, "Invalid size for indirect buffer table");
30250df750e9SMarc-André Lureau             return;
30260df750e9SMarc-André Lureau         }
30270df750e9SMarc-André Lureau 
30280df750e9SMarc-André Lureau         /* loop over the indirect descriptor table */
30290df750e9SMarc-André Lureau         desc_addr = le64toh(desc[i].addr);
30300df750e9SMarc-André Lureau         desc_len = le32toh(desc[i].len);
30310df750e9SMarc-André Lureau         max = desc_len / sizeof(struct vring_desc);
30320df750e9SMarc-André Lureau         read_len = desc_len;
30330df750e9SMarc-André Lureau         desc = vu_gpa_to_va(dev, &read_len, desc_addr);
30340df750e9SMarc-André Lureau         if (unlikely(desc && read_len != desc_len)) {
30350df750e9SMarc-André Lureau             /* Failed to use zero copy */
30360df750e9SMarc-André Lureau             desc = NULL;
30370df750e9SMarc-André Lureau             if (!virtqueue_read_indirect_desc(dev, desc_buf,
30380df750e9SMarc-André Lureau                                               desc_addr,
30390df750e9SMarc-André Lureau                                               desc_len)) {
30400df750e9SMarc-André Lureau                 desc = desc_buf;
30410df750e9SMarc-André Lureau             }
30420df750e9SMarc-André Lureau         }
30430df750e9SMarc-André Lureau         if (!desc) {
30440df750e9SMarc-André Lureau             vu_panic(dev, "Invalid indirect buffer table");
30450df750e9SMarc-André Lureau             return;
30460df750e9SMarc-André Lureau         }
30470df750e9SMarc-André Lureau         i = 0;
30480df750e9SMarc-André Lureau     }
30490df750e9SMarc-André Lureau 
30500df750e9SMarc-André Lureau     do {
30510df750e9SMarc-André Lureau         if (++num_bufs > max) {
30520df750e9SMarc-André Lureau             vu_panic(dev, "Looped descriptor");
30530df750e9SMarc-André Lureau             return;
30540df750e9SMarc-André Lureau         }
30550df750e9SMarc-André Lureau 
30560df750e9SMarc-André Lureau         if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
30570df750e9SMarc-André Lureau             min = MIN(le32toh(desc[i].len), len);
30580df750e9SMarc-André Lureau             vu_log_write(dev, le64toh(desc[i].addr), min);
30590df750e9SMarc-André Lureau             len -= min;
30600df750e9SMarc-André Lureau         }
30610df750e9SMarc-André Lureau 
30620df750e9SMarc-André Lureau     } while (len > 0 &&
30630df750e9SMarc-André Lureau              (virtqueue_read_next_desc(dev, desc, i, max, &i)
30640df750e9SMarc-André Lureau               == VIRTQUEUE_READ_DESC_MORE));
30650df750e9SMarc-André Lureau }
30660df750e9SMarc-André Lureau 
30670df750e9SMarc-André Lureau void
30680df750e9SMarc-André Lureau vu_queue_fill(VuDev *dev, VuVirtq *vq,
30690df750e9SMarc-André Lureau               const VuVirtqElement *elem,
30700df750e9SMarc-André Lureau               unsigned int len, unsigned int idx)
30710df750e9SMarc-André Lureau {
30720df750e9SMarc-André Lureau     struct vring_used_elem uelem;
30730df750e9SMarc-André Lureau 
30742a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
30750df750e9SMarc-André Lureau         return;
30760df750e9SMarc-André Lureau     }
30770df750e9SMarc-André Lureau 
30780df750e9SMarc-André Lureau     vu_log_queue_fill(dev, vq, elem, len);
30790df750e9SMarc-André Lureau 
30800df750e9SMarc-André Lureau     idx = (idx + vq->used_idx) % vq->vring.num;
30810df750e9SMarc-André Lureau 
30820df750e9SMarc-André Lureau     uelem.id = htole32(elem->index);
30830df750e9SMarc-André Lureau     uelem.len = htole32(len);
30840df750e9SMarc-André Lureau     vring_used_write(dev, vq, &uelem, idx);
30850df750e9SMarc-André Lureau }
30860df750e9SMarc-André Lureau 
30870df750e9SMarc-André Lureau static inline
30880df750e9SMarc-André Lureau void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
30890df750e9SMarc-André Lureau {
30900df750e9SMarc-André Lureau     vq->vring.used->idx = htole16(val);
30910df750e9SMarc-André Lureau     vu_log_write(dev,
30920df750e9SMarc-André Lureau                  vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
30930df750e9SMarc-André Lureau                  sizeof(vq->vring.used->idx));
30940df750e9SMarc-André Lureau 
30950df750e9SMarc-André Lureau     vq->used_idx = val;
30960df750e9SMarc-André Lureau }
30970df750e9SMarc-André Lureau 
30980df750e9SMarc-André Lureau void
30990df750e9SMarc-André Lureau vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
31000df750e9SMarc-André Lureau {
31010df750e9SMarc-André Lureau     uint16_t old, new;
31020df750e9SMarc-André Lureau 
31032a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
31040df750e9SMarc-André Lureau         return;
31050df750e9SMarc-André Lureau     }
31060df750e9SMarc-André Lureau 
31070df750e9SMarc-André Lureau     /* Make sure buffer is written before we update index. */
31080df750e9SMarc-André Lureau     smp_wmb();
31090df750e9SMarc-André Lureau 
31100df750e9SMarc-André Lureau     old = vq->used_idx;
31110df750e9SMarc-André Lureau     new = old + count;
31120df750e9SMarc-André Lureau     vring_used_idx_set(dev, vq, new);
31130df750e9SMarc-André Lureau     vq->inuse -= count;
31140df750e9SMarc-André Lureau     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
31150df750e9SMarc-André Lureau         vq->signalled_used_valid = false;
31160df750e9SMarc-André Lureau     }
31170df750e9SMarc-André Lureau }
31180df750e9SMarc-André Lureau 
31190df750e9SMarc-André Lureau void
31200df750e9SMarc-André Lureau vu_queue_push(VuDev *dev, VuVirtq *vq,
31210df750e9SMarc-André Lureau               const VuVirtqElement *elem, unsigned int len)
31220df750e9SMarc-André Lureau {
31230df750e9SMarc-André Lureau     vu_queue_fill(dev, vq, elem, len, 0);
31240df750e9SMarc-André Lureau     vu_queue_inflight_pre_put(dev, vq, elem->index);
31250df750e9SMarc-André Lureau     vu_queue_flush(dev, vq, 1);
31260df750e9SMarc-André Lureau     vu_queue_inflight_post_put(dev, vq, elem->index);
31270df750e9SMarc-André Lureau }
3128