10df750e9SMarc-André Lureau /*
20df750e9SMarc-André Lureau  * Vhost User library
30df750e9SMarc-André Lureau  *
40df750e9SMarc-André Lureau  * Copyright IBM, Corp. 2007
50df750e9SMarc-André Lureau  * Copyright (c) 2016 Red Hat, Inc.
60df750e9SMarc-André Lureau  *
70df750e9SMarc-André Lureau  * Authors:
80df750e9SMarc-André Lureau  *  Anthony Liguori <aliguori@us.ibm.com>
90df750e9SMarc-André Lureau  *  Marc-André Lureau <mlureau@redhat.com>
100df750e9SMarc-André Lureau  *  Victor Kaplansky <victork@redhat.com>
110df750e9SMarc-André Lureau  *
120df750e9SMarc-André Lureau  * This work is licensed under the terms of the GNU GPL, version 2 or
130df750e9SMarc-André Lureau  * later.  See the COPYING file in the top-level directory.
140df750e9SMarc-André Lureau  */
150df750e9SMarc-André Lureau 
16dadc3d01SMarcel Holtmann #ifndef _GNU_SOURCE
17dadc3d01SMarcel Holtmann #define _GNU_SOURCE
18dadc3d01SMarcel Holtmann #endif
19dadc3d01SMarcel Holtmann 
200df750e9SMarc-André Lureau /* this code avoids GLib dependency */
210df750e9SMarc-André Lureau #include <stdlib.h>
220df750e9SMarc-André Lureau #include <stdio.h>
230df750e9SMarc-André Lureau #include <unistd.h>
240df750e9SMarc-André Lureau #include <stdarg.h>
250df750e9SMarc-André Lureau #include <errno.h>
260df750e9SMarc-André Lureau #include <string.h>
270df750e9SMarc-André Lureau #include <assert.h>
280df750e9SMarc-André Lureau #include <inttypes.h>
290df750e9SMarc-André Lureau #include <sys/types.h>
300df750e9SMarc-André Lureau #include <sys/socket.h>
310df750e9SMarc-André Lureau #include <sys/eventfd.h>
320df750e9SMarc-André Lureau #include <sys/mman.h>
330df750e9SMarc-André Lureau #include <endian.h>
340df750e9SMarc-André Lureau 
35193ba660SDavid 'Digit' Turner /* Necessary to provide VIRTIO_F_VERSION_1 on system
36193ba660SDavid 'Digit' Turner  * with older linux headers. Must appear before
37193ba660SDavid 'Digit' Turner  * <linux/vhost.h> below.
38193ba660SDavid 'Digit' Turner  */
39193ba660SDavid 'Digit' Turner #include "standard-headers/linux/virtio_config.h"
40193ba660SDavid 'Digit' Turner 
410df750e9SMarc-André Lureau #if defined(__linux__)
420df750e9SMarc-André Lureau #include <sys/syscall.h>
430df750e9SMarc-André Lureau #include <fcntl.h>
440df750e9SMarc-André Lureau #include <sys/ioctl.h>
450df750e9SMarc-André Lureau #include <linux/vhost.h>
46b2b63008SDavid Hildenbrand #include <sys/vfs.h>
47b2b63008SDavid Hildenbrand #include <linux/magic.h>
480df750e9SMarc-André Lureau 
490df750e9SMarc-André Lureau #ifdef __NR_userfaultfd
500df750e9SMarc-André Lureau #include <linux/userfaultfd.h>
510df750e9SMarc-André Lureau #endif
520df750e9SMarc-André Lureau 
530df750e9SMarc-André Lureau #endif
540df750e9SMarc-André Lureau 
553f55f97bSMarc-André Lureau #include "include/atomic.h"
560df750e9SMarc-André Lureau 
570df750e9SMarc-André Lureau #include "libvhost-user.h"
580df750e9SMarc-André Lureau 
590df750e9SMarc-André Lureau /* usually provided by GLib */
6052a57d8dSStefan Weil via #if     __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4)
6152a57d8dSStefan Weil via #if !defined(__clang__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 4)
6252a57d8dSStefan Weil via #define G_GNUC_PRINTF(format_idx, arg_idx) \
6352a57d8dSStefan Weil via   __attribute__((__format__(gnu_printf, format_idx, arg_idx)))
6452a57d8dSStefan Weil via #else
6552a57d8dSStefan Weil via #define G_GNUC_PRINTF(format_idx, arg_idx) \
6652a57d8dSStefan Weil via   __attribute__((__format__(__printf__, format_idx, arg_idx)))
6752a57d8dSStefan Weil via #endif
6852a57d8dSStefan Weil via #else   /* !__GNUC__ */
6952a57d8dSStefan Weil via #define G_GNUC_PRINTF(format_idx, arg_idx)
7052a57d8dSStefan Weil via #endif  /* !__GNUC__ */
710df750e9SMarc-André Lureau #ifndef MIN
720df750e9SMarc-André Lureau #define MIN(x, y) ({                            \
73aa5d395aSMarcel Holtmann             __typeof__(x) _min1 = (x);          \
74aa5d395aSMarcel Holtmann             __typeof__(y) _min2 = (y);          \
750df750e9SMarc-André Lureau             (void) (&_min1 == &_min2);          \
760df750e9SMarc-André Lureau             _min1 < _min2 ? _min1 : _min2; })
770df750e9SMarc-André Lureau #endif
780df750e9SMarc-André Lureau 
790df750e9SMarc-André Lureau /* Round number down to multiple */
800df750e9SMarc-André Lureau #define ALIGN_DOWN(n, m) ((n) / (m) * (m))
810df750e9SMarc-André Lureau 
820df750e9SMarc-André Lureau /* Round number up to multiple */
830df750e9SMarc-André Lureau #define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
840df750e9SMarc-André Lureau 
850df750e9SMarc-André Lureau #ifndef unlikely
860df750e9SMarc-André Lureau #define unlikely(x)   __builtin_expect(!!(x), 0)
870df750e9SMarc-André Lureau #endif
880df750e9SMarc-André Lureau 
890df750e9SMarc-André Lureau /* Align each region to cache line size in inflight buffer */
900df750e9SMarc-André Lureau #define INFLIGHT_ALIGNMENT 64
910df750e9SMarc-André Lureau 
920df750e9SMarc-André Lureau /* The version of inflight buffer */
930df750e9SMarc-André Lureau #define INFLIGHT_VERSION 1
940df750e9SMarc-André Lureau 
950df750e9SMarc-André Lureau /* The version of the protocol we support */
960df750e9SMarc-André Lureau #define VHOST_USER_VERSION 1
970df750e9SMarc-André Lureau #define LIBVHOST_USER_DEBUG 0
980df750e9SMarc-André Lureau 
990df750e9SMarc-André Lureau #define DPRINT(...)                             \
1000df750e9SMarc-André Lureau     do {                                        \
1010df750e9SMarc-André Lureau         if (LIBVHOST_USER_DEBUG) {              \
1020df750e9SMarc-André Lureau             fprintf(stderr, __VA_ARGS__);        \
1030df750e9SMarc-André Lureau         }                                       \
1040df750e9SMarc-André Lureau     } while (0)
1050df750e9SMarc-André Lureau 
1060df750e9SMarc-André Lureau static inline
has_feature(uint64_t features,unsigned int fbit)1070df750e9SMarc-André Lureau bool has_feature(uint64_t features, unsigned int fbit)
1080df750e9SMarc-André Lureau {
1090df750e9SMarc-André Lureau     assert(fbit < 64);
1100df750e9SMarc-André Lureau     return !!(features & (1ULL << fbit));
1110df750e9SMarc-André Lureau }
1120df750e9SMarc-André Lureau 
1130df750e9SMarc-André Lureau static inline
vu_has_feature(VuDev * dev,unsigned int fbit)1140df750e9SMarc-André Lureau bool vu_has_feature(VuDev *dev,
1150df750e9SMarc-André Lureau                     unsigned int fbit)
1160df750e9SMarc-André Lureau {
1170df750e9SMarc-André Lureau     return has_feature(dev->features, fbit);
1180df750e9SMarc-André Lureau }
1190df750e9SMarc-André Lureau 
vu_has_protocol_feature(VuDev * dev,unsigned int fbit)1200df750e9SMarc-André Lureau static inline bool vu_has_protocol_feature(VuDev *dev, unsigned int fbit)
1210df750e9SMarc-André Lureau {
1220df750e9SMarc-André Lureau     return has_feature(dev->protocol_features, fbit);
1230df750e9SMarc-André Lureau }
1240df750e9SMarc-André Lureau 
125467eeb0fSAlex Bennée const char *
vu_request_to_string(unsigned int req)1260df750e9SMarc-André Lureau vu_request_to_string(unsigned int req)
1270df750e9SMarc-André Lureau {
1280df750e9SMarc-André Lureau #define REQ(req) [req] = #req
1290df750e9SMarc-André Lureau     static const char *vu_request_str[] = {
1300df750e9SMarc-André Lureau         REQ(VHOST_USER_NONE),
1310df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_FEATURES),
1320df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_FEATURES),
1330df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_OWNER),
1340df750e9SMarc-André Lureau         REQ(VHOST_USER_RESET_OWNER),
1350df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_MEM_TABLE),
1360df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_LOG_BASE),
1370df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_LOG_FD),
1380df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_NUM),
1390df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_ADDR),
1400df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_BASE),
1410df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_VRING_BASE),
1420df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_KICK),
1430df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_CALL),
1440df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_ERR),
1450df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
1460df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
1470df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_QUEUE_NUM),
1480df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_ENABLE),
1490df750e9SMarc-André Lureau         REQ(VHOST_USER_SEND_RARP),
1500df750e9SMarc-André Lureau         REQ(VHOST_USER_NET_SET_MTU),
151e608feedSMaxime Coquelin         REQ(VHOST_USER_SET_BACKEND_REQ_FD),
1520df750e9SMarc-André Lureau         REQ(VHOST_USER_IOTLB_MSG),
1530df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_VRING_ENDIAN),
1540df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_CONFIG),
1550df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_CONFIG),
1560df750e9SMarc-André Lureau         REQ(VHOST_USER_POSTCOPY_ADVISE),
1570df750e9SMarc-André Lureau         REQ(VHOST_USER_POSTCOPY_LISTEN),
1580df750e9SMarc-André Lureau         REQ(VHOST_USER_POSTCOPY_END),
1590df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_INFLIGHT_FD),
1600df750e9SMarc-André Lureau         REQ(VHOST_USER_SET_INFLIGHT_FD),
1610df750e9SMarc-André Lureau         REQ(VHOST_USER_GPU_SET_SOCKET),
1620df750e9SMarc-André Lureau         REQ(VHOST_USER_VRING_KICK),
1630df750e9SMarc-André Lureau         REQ(VHOST_USER_GET_MAX_MEM_SLOTS),
1640df750e9SMarc-André Lureau         REQ(VHOST_USER_ADD_MEM_REG),
1650df750e9SMarc-André Lureau         REQ(VHOST_USER_REM_MEM_REG),
166ce0f3b03SAlbert Esteve         REQ(VHOST_USER_GET_SHARED_OBJECT),
1670df750e9SMarc-André Lureau         REQ(VHOST_USER_MAX),
1680df750e9SMarc-André Lureau     };
1690df750e9SMarc-André Lureau #undef REQ
1700df750e9SMarc-André Lureau 
1710df750e9SMarc-André Lureau     if (req < VHOST_USER_MAX) {
1720df750e9SMarc-André Lureau         return vu_request_str[req];
1730df750e9SMarc-André Lureau     } else {
1740df750e9SMarc-André Lureau         return "unknown";
1750df750e9SMarc-André Lureau     }
1760df750e9SMarc-André Lureau }
1770df750e9SMarc-André Lureau 
17852a57d8dSStefan Weil via static void G_GNUC_PRINTF(2, 3)
vu_panic(VuDev * dev,const char * msg,...)1790df750e9SMarc-André Lureau vu_panic(VuDev *dev, const char *msg, ...)
1800df750e9SMarc-André Lureau {
1810df750e9SMarc-André Lureau     char *buf = NULL;
1820df750e9SMarc-André Lureau     va_list ap;
1830df750e9SMarc-André Lureau 
1840df750e9SMarc-André Lureau     va_start(ap, msg);
1850df750e9SMarc-André Lureau     if (vasprintf(&buf, msg, ap) < 0) {
1860df750e9SMarc-André Lureau         buf = NULL;
1870df750e9SMarc-André Lureau     }
1880df750e9SMarc-André Lureau     va_end(ap);
1890df750e9SMarc-André Lureau 
1900df750e9SMarc-André Lureau     dev->broken = true;
1910df750e9SMarc-André Lureau     dev->panic(dev, buf);
1920df750e9SMarc-André Lureau     free(buf);
1930df750e9SMarc-André Lureau 
1940df750e9SMarc-André Lureau     /*
1950df750e9SMarc-André Lureau      * FIXME:
1960df750e9SMarc-André Lureau      * find a way to call virtio_error, or perhaps close the connection?
1970df750e9SMarc-André Lureau      */
1980df750e9SMarc-André Lureau }
1990df750e9SMarc-André Lureau 
20060ccdca4SDavid Hildenbrand /* Search for a memory region that covers this guest physical address. */
20160ccdca4SDavid Hildenbrand static VuDevRegion *
vu_gpa_to_mem_region(VuDev * dev,uint64_t guest_addr)20260ccdca4SDavid Hildenbrand vu_gpa_to_mem_region(VuDev *dev, uint64_t guest_addr)
20360ccdca4SDavid Hildenbrand {
204a3c0118cSDavid Hildenbrand     int low = 0;
205a3c0118cSDavid Hildenbrand     int high = dev->nregions - 1;
20660ccdca4SDavid Hildenbrand 
20760ccdca4SDavid Hildenbrand     /*
20860ccdca4SDavid Hildenbrand      * Memory regions cannot overlap in guest physical address space. Each
20960ccdca4SDavid Hildenbrand      * GPA belongs to exactly one memory region, so there can only be one
21060ccdca4SDavid Hildenbrand      * match.
211a3c0118cSDavid Hildenbrand      *
212a3c0118cSDavid Hildenbrand      * We store our memory regions ordered by GPA and can simply perform a
213a3c0118cSDavid Hildenbrand      * binary search.
21460ccdca4SDavid Hildenbrand      */
215a3c0118cSDavid Hildenbrand     while (low <= high) {
216a3c0118cSDavid Hildenbrand         unsigned int mid = low + (high - low) / 2;
217a3c0118cSDavid Hildenbrand         VuDevRegion *cur = &dev->regions[mid];
21860ccdca4SDavid Hildenbrand 
21960ccdca4SDavid Hildenbrand         if (guest_addr >= cur->gpa && guest_addr < cur->gpa + cur->size) {
22060ccdca4SDavid Hildenbrand             return cur;
22160ccdca4SDavid Hildenbrand         }
222a3c0118cSDavid Hildenbrand         if (guest_addr >= cur->gpa + cur->size) {
223a3c0118cSDavid Hildenbrand             low = mid + 1;
224a3c0118cSDavid Hildenbrand         }
225a3c0118cSDavid Hildenbrand         if (guest_addr < cur->gpa) {
226a3c0118cSDavid Hildenbrand             high = mid - 1;
227a3c0118cSDavid Hildenbrand         }
22860ccdca4SDavid Hildenbrand     }
22960ccdca4SDavid Hildenbrand     return NULL;
23060ccdca4SDavid Hildenbrand }
23160ccdca4SDavid Hildenbrand 
2320df750e9SMarc-André Lureau /* Translate guest physical address to our virtual address.  */
2330df750e9SMarc-André Lureau void *
vu_gpa_to_va(VuDev * dev,uint64_t * plen,uint64_t guest_addr)2340df750e9SMarc-André Lureau vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr)
2350df750e9SMarc-André Lureau {
23660ccdca4SDavid Hildenbrand     VuDevRegion *r;
2370df750e9SMarc-André Lureau 
2380df750e9SMarc-André Lureau     if (*plen == 0) {
2390df750e9SMarc-André Lureau         return NULL;
2400df750e9SMarc-André Lureau     }
2410df750e9SMarc-André Lureau 
24260ccdca4SDavid Hildenbrand     r = vu_gpa_to_mem_region(dev, guest_addr);
24360ccdca4SDavid Hildenbrand     if (!r) {
24460ccdca4SDavid Hildenbrand         return NULL;
24560ccdca4SDavid Hildenbrand     }
2460df750e9SMarc-André Lureau 
2470df750e9SMarc-André Lureau     if ((guest_addr + *plen) > (r->gpa + r->size)) {
2480df750e9SMarc-André Lureau         *plen = r->gpa + r->size - guest_addr;
2490df750e9SMarc-André Lureau     }
25060ccdca4SDavid Hildenbrand     return (void *)(uintptr_t)guest_addr - r->gpa + r->mmap_addr +
25160ccdca4SDavid Hildenbrand            r->mmap_offset;
2520df750e9SMarc-André Lureau }
2530df750e9SMarc-André Lureau 
2540df750e9SMarc-André Lureau /* Translate qemu virtual address to our virtual address.  */
2550df750e9SMarc-André Lureau static void *
qva_to_va(VuDev * dev,uint64_t qemu_addr)2560df750e9SMarc-André Lureau qva_to_va(VuDev *dev, uint64_t qemu_addr)
2570df750e9SMarc-André Lureau {
25892bf2461SMarcel Holtmann     unsigned int i;
2590df750e9SMarc-André Lureau 
2600df750e9SMarc-André Lureau     /* Find matching memory region.  */
2610df750e9SMarc-André Lureau     for (i = 0; i < dev->nregions; i++) {
2620df750e9SMarc-André Lureau         VuDevRegion *r = &dev->regions[i];
2630df750e9SMarc-André Lureau 
2640df750e9SMarc-André Lureau         if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
2650df750e9SMarc-André Lureau             return (void *)(uintptr_t)
2660df750e9SMarc-André Lureau                 qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
2670df750e9SMarc-André Lureau         }
2680df750e9SMarc-André Lureau     }
2690df750e9SMarc-André Lureau 
2700df750e9SMarc-André Lureau     return NULL;
2710df750e9SMarc-André Lureau }
2720df750e9SMarc-André Lureau 
2730df750e9SMarc-André Lureau static void
vu_remove_all_mem_regs(VuDev * dev)274bec58209SDavid Hildenbrand vu_remove_all_mem_regs(VuDev *dev)
275bec58209SDavid Hildenbrand {
276bec58209SDavid Hildenbrand     unsigned int i;
277bec58209SDavid Hildenbrand 
278bec58209SDavid Hildenbrand     for (i = 0; i < dev->nregions; i++) {
279bec58209SDavid Hildenbrand         VuDevRegion *r = &dev->regions[i];
280bec58209SDavid Hildenbrand 
2814f865c3bSDavid Hildenbrand         munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
282bec58209SDavid Hildenbrand     }
283bec58209SDavid Hildenbrand     dev->nregions = 0;
284bec58209SDavid Hildenbrand }
285bec58209SDavid Hildenbrand 
2862a290227SDavid Hildenbrand static bool
map_ring(VuDev * dev,VuVirtq * vq)28767f4f663SDavid Hildenbrand map_ring(VuDev *dev, VuVirtq *vq)
28867f4f663SDavid Hildenbrand {
28967f4f663SDavid Hildenbrand     vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr);
29067f4f663SDavid Hildenbrand     vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr);
29167f4f663SDavid Hildenbrand     vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr);
29267f4f663SDavid Hildenbrand 
29367f4f663SDavid Hildenbrand     DPRINT("Setting virtq addresses:\n");
29467f4f663SDavid Hildenbrand     DPRINT("    vring_desc  at %p\n", vq->vring.desc);
29567f4f663SDavid Hildenbrand     DPRINT("    vring_used  at %p\n", vq->vring.used);
29667f4f663SDavid Hildenbrand     DPRINT("    vring_avail at %p\n", vq->vring.avail);
29767f4f663SDavid Hildenbrand 
29867f4f663SDavid Hildenbrand     return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
29967f4f663SDavid Hildenbrand }
30067f4f663SDavid Hildenbrand 
30167f4f663SDavid Hildenbrand static bool
vu_is_vq_usable(VuDev * dev,VuVirtq * vq)3022a290227SDavid Hildenbrand vu_is_vq_usable(VuDev *dev, VuVirtq *vq)
3032a290227SDavid Hildenbrand {
30467f4f663SDavid Hildenbrand     if (unlikely(dev->broken)) {
30567f4f663SDavid Hildenbrand         return false;
30667f4f663SDavid Hildenbrand     }
30767f4f663SDavid Hildenbrand 
30867f4f663SDavid Hildenbrand     if (likely(vq->vring.avail)) {
30967f4f663SDavid Hildenbrand         return true;
31067f4f663SDavid Hildenbrand     }
31167f4f663SDavid Hildenbrand 
31267f4f663SDavid Hildenbrand     /*
31367f4f663SDavid Hildenbrand      * In corner cases, we might temporarily remove a memory region that
31467f4f663SDavid Hildenbrand      * mapped a ring. When removing a memory region we make sure to
31567f4f663SDavid Hildenbrand      * unmap any rings that would be impacted. Let's try to remap if we
31667f4f663SDavid Hildenbrand      * already succeeded mapping this ring once.
31767f4f663SDavid Hildenbrand      */
31867f4f663SDavid Hildenbrand     if (!vq->vra.desc_user_addr || !vq->vra.used_user_addr ||
31967f4f663SDavid Hildenbrand         !vq->vra.avail_user_addr) {
32067f4f663SDavid Hildenbrand         return false;
32167f4f663SDavid Hildenbrand     }
32267f4f663SDavid Hildenbrand     if (map_ring(dev, vq)) {
32367f4f663SDavid Hildenbrand         vu_panic(dev, "remapping queue on access");
32467f4f663SDavid Hildenbrand         return false;
32567f4f663SDavid Hildenbrand     }
32667f4f663SDavid Hildenbrand     return true;
32767f4f663SDavid Hildenbrand }
32867f4f663SDavid Hildenbrand 
32967f4f663SDavid Hildenbrand static void
unmap_rings(VuDev * dev,VuDevRegion * r)33067f4f663SDavid Hildenbrand unmap_rings(VuDev *dev, VuDevRegion *r)
33167f4f663SDavid Hildenbrand {
33267f4f663SDavid Hildenbrand     int i;
33367f4f663SDavid Hildenbrand 
33467f4f663SDavid Hildenbrand     for (i = 0; i < dev->max_queues; i++) {
33567f4f663SDavid Hildenbrand         VuVirtq *vq = &dev->vq[i];
33667f4f663SDavid Hildenbrand         const uintptr_t desc = (uintptr_t)vq->vring.desc;
33767f4f663SDavid Hildenbrand         const uintptr_t used = (uintptr_t)vq->vring.used;
33867f4f663SDavid Hildenbrand         const uintptr_t avail = (uintptr_t)vq->vring.avail;
33967f4f663SDavid Hildenbrand 
34067f4f663SDavid Hildenbrand         if (desc < r->mmap_addr || desc >= r->mmap_addr + r->size) {
34167f4f663SDavid Hildenbrand             continue;
34267f4f663SDavid Hildenbrand         }
34367f4f663SDavid Hildenbrand         if (used < r->mmap_addr || used >= r->mmap_addr + r->size) {
34467f4f663SDavid Hildenbrand             continue;
34567f4f663SDavid Hildenbrand         }
34667f4f663SDavid Hildenbrand         if (avail < r->mmap_addr || avail >= r->mmap_addr + r->size) {
34767f4f663SDavid Hildenbrand             continue;
34867f4f663SDavid Hildenbrand         }
34967f4f663SDavid Hildenbrand 
35067f4f663SDavid Hildenbrand         DPRINT("Unmapping rings of queue %d\n", i);
35167f4f663SDavid Hildenbrand         vq->vring.desc = NULL;
35267f4f663SDavid Hildenbrand         vq->vring.used = NULL;
35367f4f663SDavid Hildenbrand         vq->vring.avail = NULL;
35467f4f663SDavid Hildenbrand     }
3552a290227SDavid Hildenbrand }
3562a290227SDavid Hildenbrand 
357b2b63008SDavid Hildenbrand static size_t
get_fd_hugepagesize(int fd)358b2b63008SDavid Hildenbrand get_fd_hugepagesize(int fd)
359b2b63008SDavid Hildenbrand {
360b2b63008SDavid Hildenbrand #if defined(__linux__)
361b2b63008SDavid Hildenbrand     struct statfs fs;
362b2b63008SDavid Hildenbrand     int ret;
363b2b63008SDavid Hildenbrand 
364b2b63008SDavid Hildenbrand     do {
365b2b63008SDavid Hildenbrand         ret = fstatfs(fd, &fs);
366b2b63008SDavid Hildenbrand     } while (ret != 0 && errno == EINTR);
367b2b63008SDavid Hildenbrand 
368b2b63008SDavid Hildenbrand     if (!ret && (unsigned int)fs.f_type == HUGETLBFS_MAGIC) {
369b2b63008SDavid Hildenbrand         return fs.f_bsize;
370b2b63008SDavid Hildenbrand     }
371b2b63008SDavid Hildenbrand #endif
372b2b63008SDavid Hildenbrand     return 0;
373b2b63008SDavid Hildenbrand }
374b2b63008SDavid Hildenbrand 
375bec58209SDavid Hildenbrand static void
_vu_add_mem_reg(VuDev * dev,VhostUserMemoryRegion * msg_region,int fd)37693fec23dSDavid Hildenbrand _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion *msg_region, int fd)
37793fec23dSDavid Hildenbrand {
378a3c0118cSDavid Hildenbrand     const uint64_t start_gpa = msg_region->guest_phys_addr;
379a3c0118cSDavid Hildenbrand     const uint64_t end_gpa = start_gpa + msg_region->memory_size;
38093fec23dSDavid Hildenbrand     int prot = PROT_READ | PROT_WRITE;
381b2b63008SDavid Hildenbrand     uint64_t mmap_offset, fd_offset;
382b2b63008SDavid Hildenbrand     size_t hugepagesize;
38393fec23dSDavid Hildenbrand     VuDevRegion *r;
38493fec23dSDavid Hildenbrand     void *mmap_addr;
385a3c0118cSDavid Hildenbrand     int low = 0;
386a3c0118cSDavid Hildenbrand     int high = dev->nregions - 1;
387a3c0118cSDavid Hildenbrand     unsigned int idx;
38893fec23dSDavid Hildenbrand 
38993fec23dSDavid Hildenbrand     DPRINT("Adding region %d\n", dev->nregions);
39093fec23dSDavid Hildenbrand     DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
39193fec23dSDavid Hildenbrand            msg_region->guest_phys_addr);
39293fec23dSDavid Hildenbrand     DPRINT("    memory_size:     0x%016"PRIx64"\n",
39393fec23dSDavid Hildenbrand            msg_region->memory_size);
39493fec23dSDavid Hildenbrand     DPRINT("    userspace_addr:  0x%016"PRIx64"\n",
39593fec23dSDavid Hildenbrand            msg_region->userspace_addr);
396b2b63008SDavid Hildenbrand     DPRINT("    old mmap_offset: 0x%016"PRIx64"\n",
39793fec23dSDavid Hildenbrand            msg_region->mmap_offset);
39893fec23dSDavid Hildenbrand 
39993fec23dSDavid Hildenbrand     if (dev->postcopy_listening) {
40093fec23dSDavid Hildenbrand         /*
40193fec23dSDavid Hildenbrand          * In postcopy we're using PROT_NONE here to catch anyone
40293fec23dSDavid Hildenbrand          * accessing it before we userfault
40393fec23dSDavid Hildenbrand          */
40493fec23dSDavid Hildenbrand         prot = PROT_NONE;
40593fec23dSDavid Hildenbrand     }
40693fec23dSDavid Hildenbrand 
40793fec23dSDavid Hildenbrand     /*
408a3c0118cSDavid Hildenbrand      * We will add memory regions into the array sorted by GPA. Perform a
409a3c0118cSDavid Hildenbrand      * binary search to locate the insertion point: it will be at the low
410a3c0118cSDavid Hildenbrand      * index.
411a3c0118cSDavid Hildenbrand      */
412a3c0118cSDavid Hildenbrand     while (low <= high) {
413a3c0118cSDavid Hildenbrand         unsigned int mid = low + (high - low)  / 2;
414a3c0118cSDavid Hildenbrand         VuDevRegion *cur = &dev->regions[mid];
415a3c0118cSDavid Hildenbrand 
416a3c0118cSDavid Hildenbrand         /* Overlap of GPA addresses. */
417a3c0118cSDavid Hildenbrand         if (start_gpa < cur->gpa + cur->size && cur->gpa < end_gpa) {
418a3c0118cSDavid Hildenbrand             vu_panic(dev, "regions with overlapping guest physical addresses");
419a3c0118cSDavid Hildenbrand             return;
420a3c0118cSDavid Hildenbrand         }
421a3c0118cSDavid Hildenbrand         if (start_gpa >= cur->gpa + cur->size) {
422a3c0118cSDavid Hildenbrand             low = mid + 1;
423a3c0118cSDavid Hildenbrand         }
424a3c0118cSDavid Hildenbrand         if (start_gpa < cur->gpa) {
425a3c0118cSDavid Hildenbrand             high = mid - 1;
426a3c0118cSDavid Hildenbrand         }
427a3c0118cSDavid Hildenbrand     }
428a3c0118cSDavid Hildenbrand     idx = low;
429a3c0118cSDavid Hildenbrand 
430a3c0118cSDavid Hildenbrand     /*
431b2b63008SDavid Hildenbrand      * Convert most of msg_region->mmap_offset to fd_offset. In almost all
432b2b63008SDavid Hildenbrand      * cases, this will leave us with mmap_offset == 0, mmap()'ing only
433b2b63008SDavid Hildenbrand      * what we really need. Only if a memory region would partially cover
434b2b63008SDavid Hildenbrand      * hugetlb pages, we'd get mmap_offset != 0, which usually doesn't happen
435b2b63008SDavid Hildenbrand      * anymore (i.e., modern QEMU).
436b2b63008SDavid Hildenbrand      *
437b2b63008SDavid Hildenbrand      * Note that mmap() with hugetlb would fail if the offset into the file
438b2b63008SDavid Hildenbrand      * is not aligned to the huge page size.
43993fec23dSDavid Hildenbrand      */
440b2b63008SDavid Hildenbrand     hugepagesize = get_fd_hugepagesize(fd);
441b2b63008SDavid Hildenbrand     if (hugepagesize) {
442b2b63008SDavid Hildenbrand         fd_offset = ALIGN_DOWN(msg_region->mmap_offset, hugepagesize);
443b2b63008SDavid Hildenbrand         mmap_offset = msg_region->mmap_offset - fd_offset;
444b2b63008SDavid Hildenbrand     } else {
445b2b63008SDavid Hildenbrand         fd_offset = msg_region->mmap_offset;
446b2b63008SDavid Hildenbrand         mmap_offset = 0;
447b2b63008SDavid Hildenbrand     }
448b2b63008SDavid Hildenbrand 
449b2b63008SDavid Hildenbrand     DPRINT("    fd_offset:       0x%016"PRIx64"\n",
450b2b63008SDavid Hildenbrand            fd_offset);
451b2b63008SDavid Hildenbrand     DPRINT("    new mmap_offset: 0x%016"PRIx64"\n",
452b2b63008SDavid Hildenbrand            mmap_offset);
453b2b63008SDavid Hildenbrand 
454b2b63008SDavid Hildenbrand     mmap_addr = mmap(0, msg_region->memory_size + mmap_offset,
455b2b63008SDavid Hildenbrand                      prot, MAP_SHARED | MAP_NORESERVE, fd, fd_offset);
45693fec23dSDavid Hildenbrand     if (mmap_addr == MAP_FAILED) {
45793fec23dSDavid Hildenbrand         vu_panic(dev, "region mmap error: %s", strerror(errno));
45893fec23dSDavid Hildenbrand         return;
45993fec23dSDavid Hildenbrand     }
46093fec23dSDavid Hildenbrand     DPRINT("    mmap_addr:       0x%016"PRIx64"\n",
46193fec23dSDavid Hildenbrand            (uint64_t)(uintptr_t)mmap_addr);
46293fec23dSDavid Hildenbrand 
46352767e10SDavid Hildenbrand #if defined(__linux__)
46452767e10SDavid Hildenbrand     /* Don't include all guest memory in a coredump. */
46552767e10SDavid Hildenbrand     madvise(mmap_addr, msg_region->memory_size + mmap_offset,
46652767e10SDavid Hildenbrand             MADV_DONTDUMP);
46752767e10SDavid Hildenbrand #endif
46852767e10SDavid Hildenbrand 
469a3c0118cSDavid Hildenbrand     /* Shift all affected entries by 1 to open a hole at idx. */
470a3c0118cSDavid Hildenbrand     r = &dev->regions[idx];
471a3c0118cSDavid Hildenbrand     memmove(r + 1, r, sizeof(VuDevRegion) * (dev->nregions - idx));
47293fec23dSDavid Hildenbrand     r->gpa = msg_region->guest_phys_addr;
47393fec23dSDavid Hildenbrand     r->size = msg_region->memory_size;
47493fec23dSDavid Hildenbrand     r->qva = msg_region->userspace_addr;
47593fec23dSDavid Hildenbrand     r->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
476b2b63008SDavid Hildenbrand     r->mmap_offset = mmap_offset;
47793fec23dSDavid Hildenbrand     dev->nregions++;
47893fec23dSDavid Hildenbrand 
47993fec23dSDavid Hildenbrand     if (dev->postcopy_listening) {
48093fec23dSDavid Hildenbrand         /*
48193fec23dSDavid Hildenbrand          * Return the address to QEMU so that it can translate the ufd
48293fec23dSDavid Hildenbrand          * fault addresses back.
48393fec23dSDavid Hildenbrand          */
48493fec23dSDavid Hildenbrand         msg_region->userspace_addr = r->mmap_addr + r->mmap_offset;
48593fec23dSDavid Hildenbrand     }
48693fec23dSDavid Hildenbrand }
48793fec23dSDavid Hildenbrand 
48893fec23dSDavid Hildenbrand static void
vmsg_close_fds(VhostUserMsg * vmsg)4890df750e9SMarc-André Lureau vmsg_close_fds(VhostUserMsg *vmsg)
4900df750e9SMarc-André Lureau {
4910df750e9SMarc-André Lureau     int i;
4920df750e9SMarc-André Lureau 
4930df750e9SMarc-André Lureau     for (i = 0; i < vmsg->fd_num; i++) {
4940df750e9SMarc-André Lureau         close(vmsg->fds[i]);
4950df750e9SMarc-André Lureau     }
4960df750e9SMarc-André Lureau }
4970df750e9SMarc-André Lureau 
4980df750e9SMarc-André Lureau /* Set reply payload.u64 and clear request flags and fd_num */
vmsg_set_reply_u64(VhostUserMsg * vmsg,uint64_t val)4990df750e9SMarc-André Lureau static void vmsg_set_reply_u64(VhostUserMsg *vmsg, uint64_t val)
5000df750e9SMarc-André Lureau {
5010df750e9SMarc-André Lureau     vmsg->flags = 0; /* defaults will be set by vu_send_reply() */
5020df750e9SMarc-André Lureau     vmsg->size = sizeof(vmsg->payload.u64);
5030df750e9SMarc-André Lureau     vmsg->payload.u64 = val;
5040df750e9SMarc-André Lureau     vmsg->fd_num = 0;
5050df750e9SMarc-André Lureau }
5060df750e9SMarc-André Lureau 
5070df750e9SMarc-André Lureau /* A test to see if we have userfault available */
5080df750e9SMarc-André Lureau static bool
have_userfault(void)5090df750e9SMarc-André Lureau have_userfault(void)
5100df750e9SMarc-André Lureau {
5110df750e9SMarc-André Lureau #if defined(__linux__) && defined(__NR_userfaultfd) &&\
5120df750e9SMarc-André Lureau         defined(UFFD_FEATURE_MISSING_SHMEM) &&\
5130df750e9SMarc-André Lureau         defined(UFFD_FEATURE_MISSING_HUGETLBFS)
5140df750e9SMarc-André Lureau     /* Now test the kernel we're running on really has the features */
5150df750e9SMarc-André Lureau     int ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
5160df750e9SMarc-André Lureau     struct uffdio_api api_struct;
5170df750e9SMarc-André Lureau     if (ufd < 0) {
5180df750e9SMarc-André Lureau         return false;
5190df750e9SMarc-André Lureau     }
5200df750e9SMarc-André Lureau 
5210df750e9SMarc-André Lureau     api_struct.api = UFFD_API;
5220df750e9SMarc-André Lureau     api_struct.features = UFFD_FEATURE_MISSING_SHMEM |
5230df750e9SMarc-André Lureau                           UFFD_FEATURE_MISSING_HUGETLBFS;
5240df750e9SMarc-André Lureau     if (ioctl(ufd, UFFDIO_API, &api_struct)) {
5250df750e9SMarc-André Lureau         close(ufd);
5260df750e9SMarc-André Lureau         return false;
5270df750e9SMarc-André Lureau     }
5280df750e9SMarc-André Lureau     close(ufd);
5290df750e9SMarc-André Lureau     return true;
5300df750e9SMarc-André Lureau 
5310df750e9SMarc-André Lureau #else
5320df750e9SMarc-André Lureau     return false;
5330df750e9SMarc-André Lureau #endif
5340df750e9SMarc-André Lureau }
5350df750e9SMarc-André Lureau 
5360df750e9SMarc-André Lureau static bool
vu_message_read_default(VuDev * dev,int conn_fd,VhostUserMsg * vmsg)5370df750e9SMarc-André Lureau vu_message_read_default(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
5380df750e9SMarc-André Lureau {
5390df750e9SMarc-André Lureau     char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {};
5400df750e9SMarc-André Lureau     struct iovec iov = {
5410df750e9SMarc-André Lureau         .iov_base = (char *)vmsg,
5420df750e9SMarc-André Lureau         .iov_len = VHOST_USER_HDR_SIZE,
5430df750e9SMarc-André Lureau     };
5440df750e9SMarc-André Lureau     struct msghdr msg = {
5450df750e9SMarc-André Lureau         .msg_iov = &iov,
5460df750e9SMarc-André Lureau         .msg_iovlen = 1,
5470df750e9SMarc-André Lureau         .msg_control = control,
5480df750e9SMarc-André Lureau         .msg_controllen = sizeof(control),
5490df750e9SMarc-André Lureau     };
5500df750e9SMarc-André Lureau     size_t fd_size;
5510df750e9SMarc-André Lureau     struct cmsghdr *cmsg;
5520df750e9SMarc-André Lureau     int rc;
5530df750e9SMarc-André Lureau 
5540df750e9SMarc-André Lureau     do {
5550df750e9SMarc-André Lureau         rc = recvmsg(conn_fd, &msg, 0);
5560df750e9SMarc-André Lureau     } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
5570df750e9SMarc-André Lureau 
5580df750e9SMarc-André Lureau     if (rc < 0) {
5590df750e9SMarc-André Lureau         vu_panic(dev, "Error while recvmsg: %s", strerror(errno));
5600df750e9SMarc-André Lureau         return false;
5610df750e9SMarc-André Lureau     }
5620df750e9SMarc-André Lureau 
5630df750e9SMarc-André Lureau     vmsg->fd_num = 0;
5640df750e9SMarc-André Lureau     for (cmsg = CMSG_FIRSTHDR(&msg);
5650df750e9SMarc-André Lureau          cmsg != NULL;
5660df750e9SMarc-André Lureau          cmsg = CMSG_NXTHDR(&msg, cmsg))
5670df750e9SMarc-André Lureau     {
5680df750e9SMarc-André Lureau         if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
5690df750e9SMarc-André Lureau             fd_size = cmsg->cmsg_len - CMSG_LEN(0);
5700df750e9SMarc-André Lureau             vmsg->fd_num = fd_size / sizeof(int);
571a6f4d2ecSVladimir Sementsov-Ogievskiy             assert(fd_size < VHOST_MEMORY_BASELINE_NREGIONS);
5720df750e9SMarc-André Lureau             memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
5730df750e9SMarc-André Lureau             break;
5740df750e9SMarc-André Lureau         }
5750df750e9SMarc-André Lureau     }
5760df750e9SMarc-André Lureau 
5770df750e9SMarc-André Lureau     if (vmsg->size > sizeof(vmsg->payload)) {
5780df750e9SMarc-André Lureau         vu_panic(dev,
5790df750e9SMarc-André Lureau                  "Error: too big message request: %d, size: vmsg->size: %u, "
5800df750e9SMarc-André Lureau                  "while sizeof(vmsg->payload) = %zu\n",
5810df750e9SMarc-André Lureau                  vmsg->request, vmsg->size, sizeof(vmsg->payload));
5820df750e9SMarc-André Lureau         goto fail;
5830df750e9SMarc-André Lureau     }
5840df750e9SMarc-André Lureau 
5850df750e9SMarc-André Lureau     if (vmsg->size) {
5860df750e9SMarc-André Lureau         do {
5870df750e9SMarc-André Lureau             rc = read(conn_fd, &vmsg->payload, vmsg->size);
5880df750e9SMarc-André Lureau         } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
5890df750e9SMarc-André Lureau 
5900df750e9SMarc-André Lureau         if (rc <= 0) {
5910df750e9SMarc-André Lureau             vu_panic(dev, "Error while reading: %s", strerror(errno));
5920df750e9SMarc-André Lureau             goto fail;
5930df750e9SMarc-André Lureau         }
5940df750e9SMarc-André Lureau 
59518fa7f1eSMarcel Holtmann         assert((uint32_t)rc == vmsg->size);
5960df750e9SMarc-André Lureau     }
5970df750e9SMarc-André Lureau 
5980df750e9SMarc-André Lureau     return true;
5990df750e9SMarc-André Lureau 
6000df750e9SMarc-André Lureau fail:
6010df750e9SMarc-André Lureau     vmsg_close_fds(vmsg);
6020df750e9SMarc-André Lureau 
6030df750e9SMarc-André Lureau     return false;
6040df750e9SMarc-André Lureau }
6050df750e9SMarc-André Lureau 
6060df750e9SMarc-André Lureau static bool
vu_message_write(VuDev * dev,int conn_fd,VhostUserMsg * vmsg)6070df750e9SMarc-André Lureau vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
6080df750e9SMarc-André Lureau {
6090df750e9SMarc-André Lureau     int rc;
6100df750e9SMarc-André Lureau     uint8_t *p = (uint8_t *)vmsg;
6110df750e9SMarc-André Lureau     char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {};
6120df750e9SMarc-André Lureau     struct iovec iov = {
6130df750e9SMarc-André Lureau         .iov_base = (char *)vmsg,
6140df750e9SMarc-André Lureau         .iov_len = VHOST_USER_HDR_SIZE,
6150df750e9SMarc-André Lureau     };
6160df750e9SMarc-André Lureau     struct msghdr msg = {
6170df750e9SMarc-André Lureau         .msg_iov = &iov,
6180df750e9SMarc-André Lureau         .msg_iovlen = 1,
6190df750e9SMarc-André Lureau         .msg_control = control,
6200df750e9SMarc-André Lureau     };
6210df750e9SMarc-André Lureau     struct cmsghdr *cmsg;
6220df750e9SMarc-André Lureau 
6230df750e9SMarc-André Lureau     memset(control, 0, sizeof(control));
6240df750e9SMarc-André Lureau     assert(vmsg->fd_num <= VHOST_MEMORY_BASELINE_NREGIONS);
6250df750e9SMarc-André Lureau     if (vmsg->fd_num > 0) {
6260df750e9SMarc-André Lureau         size_t fdsize = vmsg->fd_num * sizeof(int);
6270df750e9SMarc-André Lureau         msg.msg_controllen = CMSG_SPACE(fdsize);
6280df750e9SMarc-André Lureau         cmsg = CMSG_FIRSTHDR(&msg);
6290df750e9SMarc-André Lureau         cmsg->cmsg_len = CMSG_LEN(fdsize);
6300df750e9SMarc-André Lureau         cmsg->cmsg_level = SOL_SOCKET;
6310df750e9SMarc-André Lureau         cmsg->cmsg_type = SCM_RIGHTS;
6320df750e9SMarc-André Lureau         memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize);
6330df750e9SMarc-André Lureau     } else {
6340df750e9SMarc-André Lureau         msg.msg_controllen = 0;
6350df750e9SMarc-André Lureau     }
6360df750e9SMarc-André Lureau 
6370df750e9SMarc-André Lureau     do {
6380df750e9SMarc-André Lureau         rc = sendmsg(conn_fd, &msg, 0);
6390df750e9SMarc-André Lureau     } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
6400df750e9SMarc-André Lureau 
6410df750e9SMarc-André Lureau     if (vmsg->size) {
6420df750e9SMarc-André Lureau         do {
6430df750e9SMarc-André Lureau             if (vmsg->data) {
6440df750e9SMarc-André Lureau                 rc = write(conn_fd, vmsg->data, vmsg->size);
6450df750e9SMarc-André Lureau             } else {
6460df750e9SMarc-André Lureau                 rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
6470df750e9SMarc-André Lureau             }
6480df750e9SMarc-André Lureau         } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
6490df750e9SMarc-André Lureau     }
6500df750e9SMarc-André Lureau 
6510df750e9SMarc-André Lureau     if (rc <= 0) {
6520df750e9SMarc-André Lureau         vu_panic(dev, "Error while writing: %s", strerror(errno));
6530df750e9SMarc-André Lureau         return false;
6540df750e9SMarc-André Lureau     }
6550df750e9SMarc-André Lureau 
6560df750e9SMarc-André Lureau     return true;
6570df750e9SMarc-André Lureau }
6580df750e9SMarc-André Lureau 
6590df750e9SMarc-André Lureau static bool
vu_send_reply(VuDev * dev,int conn_fd,VhostUserMsg * vmsg)6600df750e9SMarc-André Lureau vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
6610df750e9SMarc-André Lureau {
6620df750e9SMarc-André Lureau     /* Set the version in the flags when sending the reply */
6630df750e9SMarc-André Lureau     vmsg->flags &= ~VHOST_USER_VERSION_MASK;
6640df750e9SMarc-André Lureau     vmsg->flags |= VHOST_USER_VERSION;
6650df750e9SMarc-André Lureau     vmsg->flags |= VHOST_USER_REPLY_MASK;
6660df750e9SMarc-André Lureau 
6670df750e9SMarc-André Lureau     return vu_message_write(dev, conn_fd, vmsg);
6680df750e9SMarc-André Lureau }
6690df750e9SMarc-André Lureau 
6700df750e9SMarc-André Lureau /*
671f8ed3648SManos Pitsidianakis  * Processes a reply on the backend channel.
672f8ed3648SManos Pitsidianakis  * Entered with backend_mutex held and releases it before exit.
6730df750e9SMarc-André Lureau  * Returns true on success.
6740df750e9SMarc-André Lureau  */
6750df750e9SMarc-André Lureau static bool
vu_process_message_reply(VuDev * dev,const VhostUserMsg * vmsg)6760df750e9SMarc-André Lureau vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
6770df750e9SMarc-André Lureau {
6780df750e9SMarc-André Lureau     VhostUserMsg msg_reply;
6790df750e9SMarc-André Lureau     bool result = false;
6800df750e9SMarc-André Lureau 
6810df750e9SMarc-André Lureau     if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
6820df750e9SMarc-André Lureau         result = true;
6830df750e9SMarc-André Lureau         goto out;
6840df750e9SMarc-André Lureau     }
6850df750e9SMarc-André Lureau 
686f8ed3648SManos Pitsidianakis     if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) {
6870df750e9SMarc-André Lureau         goto out;
6880df750e9SMarc-André Lureau     }
6890df750e9SMarc-André Lureau 
6900df750e9SMarc-André Lureau     if (msg_reply.request != vmsg->request) {
6910df750e9SMarc-André Lureau         DPRINT("Received unexpected msg type. Expected %d received %d",
6920df750e9SMarc-André Lureau                vmsg->request, msg_reply.request);
6930df750e9SMarc-André Lureau         goto out;
6940df750e9SMarc-André Lureau     }
6950df750e9SMarc-André Lureau 
6960df750e9SMarc-André Lureau     result = msg_reply.payload.u64 == 0;
6970df750e9SMarc-André Lureau 
6980df750e9SMarc-André Lureau out:
699f8ed3648SManos Pitsidianakis     pthread_mutex_unlock(&dev->backend_mutex);
7000df750e9SMarc-André Lureau     return result;
7010df750e9SMarc-André Lureau }
7020df750e9SMarc-André Lureau 
7030df750e9SMarc-André Lureau /* Kick the log_call_fd if required. */
7040df750e9SMarc-André Lureau static void
vu_log_kick(VuDev * dev)7050df750e9SMarc-André Lureau vu_log_kick(VuDev *dev)
7060df750e9SMarc-André Lureau {
7070df750e9SMarc-André Lureau     if (dev->log_call_fd != -1) {
7080df750e9SMarc-André Lureau         DPRINT("Kicking the QEMU's log...\n");
7090df750e9SMarc-André Lureau         if (eventfd_write(dev->log_call_fd, 1) < 0) {
7100df750e9SMarc-André Lureau             vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
7110df750e9SMarc-André Lureau         }
7120df750e9SMarc-André Lureau     }
7130df750e9SMarc-André Lureau }
7140df750e9SMarc-André Lureau 
7150df750e9SMarc-André Lureau static void
vu_log_page(uint8_t * log_table,uint64_t page)7160df750e9SMarc-André Lureau vu_log_page(uint8_t *log_table, uint64_t page)
7170df750e9SMarc-André Lureau {
7180df750e9SMarc-André Lureau     DPRINT("Logged dirty guest page: %"PRId64"\n", page);
7190df750e9SMarc-André Lureau     qatomic_or(&log_table[page / 8], 1 << (page % 8));
7200df750e9SMarc-André Lureau }
7210df750e9SMarc-André Lureau 
7220df750e9SMarc-André Lureau static void
vu_log_write(VuDev * dev,uint64_t address,uint64_t length)7230df750e9SMarc-André Lureau vu_log_write(VuDev *dev, uint64_t address, uint64_t length)
7240df750e9SMarc-André Lureau {
7250df750e9SMarc-André Lureau     uint64_t page;
7260df750e9SMarc-André Lureau 
7270df750e9SMarc-André Lureau     if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
7280df750e9SMarc-André Lureau         !dev->log_table || !length) {
7290df750e9SMarc-André Lureau         return;
7300df750e9SMarc-André Lureau     }
7310df750e9SMarc-André Lureau 
7320df750e9SMarc-André Lureau     assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
7330df750e9SMarc-André Lureau 
7340df750e9SMarc-André Lureau     page = address / VHOST_LOG_PAGE;
7350df750e9SMarc-André Lureau     while (page * VHOST_LOG_PAGE < address + length) {
7360df750e9SMarc-André Lureau         vu_log_page(dev->log_table, page);
7370df750e9SMarc-André Lureau         page += 1;
7380df750e9SMarc-André Lureau     }
7390df750e9SMarc-André Lureau 
7400df750e9SMarc-André Lureau     vu_log_kick(dev);
7410df750e9SMarc-André Lureau }
7420df750e9SMarc-André Lureau 
7430df750e9SMarc-André Lureau static void
vu_kick_cb(VuDev * dev,int condition,void * data)7440df750e9SMarc-André Lureau vu_kick_cb(VuDev *dev, int condition, void *data)
7450df750e9SMarc-André Lureau {
7460df750e9SMarc-André Lureau     int index = (intptr_t)data;
7470df750e9SMarc-André Lureau     VuVirtq *vq = &dev->vq[index];
7480df750e9SMarc-André Lureau     int sock = vq->kick_fd;
7490df750e9SMarc-André Lureau     eventfd_t kick_data;
7500df750e9SMarc-André Lureau     ssize_t rc;
7510df750e9SMarc-André Lureau 
7520df750e9SMarc-André Lureau     rc = eventfd_read(sock, &kick_data);
7530df750e9SMarc-André Lureau     if (rc == -1) {
7540df750e9SMarc-André Lureau         vu_panic(dev, "kick eventfd_read(): %s", strerror(errno));
7550df750e9SMarc-André Lureau         dev->remove_watch(dev, dev->vq[index].kick_fd);
7560df750e9SMarc-André Lureau     } else {
7570df750e9SMarc-André Lureau         DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n",
7580df750e9SMarc-André Lureau                kick_data, vq->handler, index);
7590df750e9SMarc-André Lureau         if (vq->handler) {
7600df750e9SMarc-André Lureau             vq->handler(dev, index);
7610df750e9SMarc-André Lureau         }
7620df750e9SMarc-André Lureau     }
7630df750e9SMarc-André Lureau }
7640df750e9SMarc-André Lureau 
7650df750e9SMarc-André Lureau static bool
vu_get_features_exec(VuDev * dev,VhostUserMsg * vmsg)7660df750e9SMarc-André Lureau vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
7670df750e9SMarc-André Lureau {
7680df750e9SMarc-André Lureau     vmsg->payload.u64 =
7690df750e9SMarc-André Lureau         /*
7700df750e9SMarc-André Lureau          * The following VIRTIO feature bits are supported by our virtqueue
7710df750e9SMarc-André Lureau          * implementation:
7720df750e9SMarc-André Lureau          */
7730df750e9SMarc-André Lureau         1ULL << VIRTIO_F_NOTIFY_ON_EMPTY |
7740df750e9SMarc-André Lureau         1ULL << VIRTIO_RING_F_INDIRECT_DESC |
7750df750e9SMarc-André Lureau         1ULL << VIRTIO_RING_F_EVENT_IDX |
7760df750e9SMarc-André Lureau         1ULL << VIRTIO_F_VERSION_1 |
7770df750e9SMarc-André Lureau 
7780df750e9SMarc-André Lureau         /* vhost-user feature bits */
7790df750e9SMarc-André Lureau         1ULL << VHOST_F_LOG_ALL |
7800df750e9SMarc-André Lureau         1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
7810df750e9SMarc-André Lureau 
7820df750e9SMarc-André Lureau     if (dev->iface->get_features) {
7830df750e9SMarc-André Lureau         vmsg->payload.u64 |= dev->iface->get_features(dev);
7840df750e9SMarc-André Lureau     }
7850df750e9SMarc-André Lureau 
7860df750e9SMarc-André Lureau     vmsg->size = sizeof(vmsg->payload.u64);
7870df750e9SMarc-André Lureau     vmsg->fd_num = 0;
7880df750e9SMarc-André Lureau 
7890df750e9SMarc-André Lureau     DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
7900df750e9SMarc-André Lureau 
7910df750e9SMarc-André Lureau     return true;
7920df750e9SMarc-André Lureau }
7930df750e9SMarc-André Lureau 
7940df750e9SMarc-André Lureau static void
vu_set_enable_all_rings(VuDev * dev,bool enabled)7950df750e9SMarc-André Lureau vu_set_enable_all_rings(VuDev *dev, bool enabled)
7960df750e9SMarc-André Lureau {
7970df750e9SMarc-André Lureau     uint16_t i;
7980df750e9SMarc-André Lureau 
7990df750e9SMarc-André Lureau     for (i = 0; i < dev->max_queues; i++) {
8000df750e9SMarc-André Lureau         dev->vq[i].enable = enabled;
8010df750e9SMarc-André Lureau     }
8020df750e9SMarc-André Lureau }
8030df750e9SMarc-André Lureau 
8040df750e9SMarc-André Lureau static bool
vu_set_features_exec(VuDev * dev,VhostUserMsg * vmsg)8050df750e9SMarc-André Lureau vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg)
8060df750e9SMarc-André Lureau {
8070df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
8080df750e9SMarc-André Lureau 
8090df750e9SMarc-André Lureau     dev->features = vmsg->payload.u64;
8100df750e9SMarc-André Lureau     if (!vu_has_feature(dev, VIRTIO_F_VERSION_1)) {
8110df750e9SMarc-André Lureau         /*
8120df750e9SMarc-André Lureau          * We only support devices conforming to VIRTIO 1.0 or
8130df750e9SMarc-André Lureau          * later
8140df750e9SMarc-André Lureau          */
8150df750e9SMarc-André Lureau         vu_panic(dev, "virtio legacy devices aren't supported by libvhost-user");
8160df750e9SMarc-André Lureau         return false;
8170df750e9SMarc-André Lureau     }
8180df750e9SMarc-André Lureau 
8190df750e9SMarc-André Lureau     if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) {
8200df750e9SMarc-André Lureau         vu_set_enable_all_rings(dev, true);
8210df750e9SMarc-André Lureau     }
8220df750e9SMarc-André Lureau 
8230df750e9SMarc-André Lureau     if (dev->iface->set_features) {
8240df750e9SMarc-André Lureau         dev->iface->set_features(dev, dev->features);
8250df750e9SMarc-André Lureau     }
8260df750e9SMarc-André Lureau 
8270df750e9SMarc-André Lureau     return false;
8280df750e9SMarc-André Lureau }
8290df750e9SMarc-André Lureau 
8300df750e9SMarc-André Lureau static bool
vu_set_owner_exec(VuDev * dev,VhostUserMsg * vmsg)8310df750e9SMarc-André Lureau vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg)
8320df750e9SMarc-André Lureau {
8330df750e9SMarc-André Lureau     return false;
8340df750e9SMarc-André Lureau }
8350df750e9SMarc-André Lureau 
8360df750e9SMarc-André Lureau static void
vu_close_log(VuDev * dev)8370df750e9SMarc-André Lureau vu_close_log(VuDev *dev)
8380df750e9SMarc-André Lureau {
8390df750e9SMarc-André Lureau     if (dev->log_table) {
8400df750e9SMarc-André Lureau         if (munmap(dev->log_table, dev->log_size) != 0) {
8410df750e9SMarc-André Lureau             perror("close log munmap() error");
8420df750e9SMarc-André Lureau         }
8430df750e9SMarc-André Lureau 
8440df750e9SMarc-André Lureau         dev->log_table = NULL;
8450df750e9SMarc-André Lureau     }
8460df750e9SMarc-André Lureau     if (dev->log_call_fd != -1) {
8470df750e9SMarc-André Lureau         close(dev->log_call_fd);
8480df750e9SMarc-André Lureau         dev->log_call_fd = -1;
8490df750e9SMarc-André Lureau     }
8500df750e9SMarc-André Lureau }
8510df750e9SMarc-André Lureau 
8520df750e9SMarc-André Lureau static bool
vu_reset_device_exec(VuDev * dev,VhostUserMsg * vmsg)8530df750e9SMarc-André Lureau vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
8540df750e9SMarc-André Lureau {
8550df750e9SMarc-André Lureau     vu_set_enable_all_rings(dev, false);
8560df750e9SMarc-André Lureau 
8570df750e9SMarc-André Lureau     return false;
8580df750e9SMarc-André Lureau }
8590df750e9SMarc-André Lureau 
8600df750e9SMarc-André Lureau static bool
generate_faults(VuDev * dev)8610df750e9SMarc-André Lureau generate_faults(VuDev *dev) {
86292bf2461SMarcel Holtmann     unsigned int i;
8630df750e9SMarc-André Lureau     for (i = 0; i < dev->nregions; i++) {
864bb302772SPierre Labatut #ifdef UFFDIO_REGISTER
8650df750e9SMarc-André Lureau         VuDevRegion *dev_region = &dev->regions[i];
8660df750e9SMarc-André Lureau         int ret;
867d87a6424SMarcel Holtmann         struct uffdio_register reg_struct;
868d87a6424SMarcel Holtmann 
8690df750e9SMarc-André Lureau         /*
8700df750e9SMarc-André Lureau          * We should already have an open ufd. Mark each memory
8710df750e9SMarc-André Lureau          * range as ufd.
8720df750e9SMarc-André Lureau          * Discard any mapping we have here; note I can't use MADV_REMOVE
8730df750e9SMarc-André Lureau          * or fallocate to make the hole since I don't want to lose
8740df750e9SMarc-André Lureau          * data that's already arrived in the shared process.
8750df750e9SMarc-André Lureau          * TODO: How to do hugepage
8760df750e9SMarc-André Lureau          */
8770df750e9SMarc-André Lureau         ret = madvise((void *)(uintptr_t)dev_region->mmap_addr,
8780df750e9SMarc-André Lureau                       dev_region->size + dev_region->mmap_offset,
8790df750e9SMarc-André Lureau                       MADV_DONTNEED);
8800df750e9SMarc-André Lureau         if (ret) {
8810df750e9SMarc-André Lureau             fprintf(stderr,
8820df750e9SMarc-André Lureau                     "%s: Failed to madvise(DONTNEED) region %d: %s\n",
8830df750e9SMarc-André Lureau                     __func__, i, strerror(errno));
8840df750e9SMarc-André Lureau         }
8850df750e9SMarc-André Lureau         /*
8860df750e9SMarc-André Lureau          * Turn off transparent hugepages so we dont get lose wakeups
8870df750e9SMarc-André Lureau          * in neighbouring pages.
8880df750e9SMarc-André Lureau          * TODO: Turn this backon later.
8890df750e9SMarc-André Lureau          */
8900df750e9SMarc-André Lureau         ret = madvise((void *)(uintptr_t)dev_region->mmap_addr,
8910df750e9SMarc-André Lureau                       dev_region->size + dev_region->mmap_offset,
8920df750e9SMarc-André Lureau                       MADV_NOHUGEPAGE);
8930df750e9SMarc-André Lureau         if (ret) {
8940df750e9SMarc-André Lureau             /*
8950df750e9SMarc-André Lureau              * Note: This can happen legally on kernels that are configured
8960df750e9SMarc-André Lureau              * without madvise'able hugepages
8970df750e9SMarc-André Lureau              */
8980df750e9SMarc-André Lureau             fprintf(stderr,
8990df750e9SMarc-André Lureau                     "%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n",
9000df750e9SMarc-André Lureau                     __func__, i, strerror(errno));
9010df750e9SMarc-André Lureau         }
902d87a6424SMarcel Holtmann 
9030df750e9SMarc-André Lureau         reg_struct.range.start = (uintptr_t)dev_region->mmap_addr;
9040df750e9SMarc-André Lureau         reg_struct.range.len = dev_region->size + dev_region->mmap_offset;
9050df750e9SMarc-André Lureau         reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
9060df750e9SMarc-André Lureau 
9070df750e9SMarc-André Lureau         if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER, &reg_struct)) {
9080df750e9SMarc-André Lureau             vu_panic(dev, "%s: Failed to userfault region %d "
9097d4774e6SStefan Weil via                           "@%" PRIx64 " + size:%" PRIx64 " offset: %" PRIx64
9107d4774e6SStefan Weil via                           ": (ufd=%d)%s\n",
9110df750e9SMarc-André Lureau                      __func__, i,
9120df750e9SMarc-André Lureau                      dev_region->mmap_addr,
9130df750e9SMarc-André Lureau                      dev_region->size, dev_region->mmap_offset,
9140df750e9SMarc-André Lureau                      dev->postcopy_ufd, strerror(errno));
9150df750e9SMarc-André Lureau             return false;
9160df750e9SMarc-André Lureau         }
91773b49878SPaolo Bonzini         if (!(reg_struct.ioctls & (1ULL << _UFFDIO_COPY))) {
9180df750e9SMarc-André Lureau             vu_panic(dev, "%s Region (%d) doesn't support COPY",
9190df750e9SMarc-André Lureau                      __func__, i);
9200df750e9SMarc-André Lureau             return false;
9210df750e9SMarc-André Lureau         }
9220df750e9SMarc-André Lureau         DPRINT("%s: region %d: Registered userfault for %"
9230df750e9SMarc-André Lureau                PRIx64 " + %" PRIx64 "\n", __func__, i,
9240df750e9SMarc-André Lureau                (uint64_t)reg_struct.range.start,
9250df750e9SMarc-André Lureau                (uint64_t)reg_struct.range.len);
9260df750e9SMarc-André Lureau         /* Now it's registered we can let the client at it */
9270df750e9SMarc-André Lureau         if (mprotect((void *)(uintptr_t)dev_region->mmap_addr,
9280df750e9SMarc-André Lureau                      dev_region->size + dev_region->mmap_offset,
9290df750e9SMarc-André Lureau                      PROT_READ | PROT_WRITE)) {
9300df750e9SMarc-André Lureau             vu_panic(dev, "failed to mprotect region %d for postcopy (%s)",
9310df750e9SMarc-André Lureau                      i, strerror(errno));
9320df750e9SMarc-André Lureau             return false;
9330df750e9SMarc-André Lureau         }
9340df750e9SMarc-André Lureau         /* TODO: Stash 'zero' support flags somewhere */
9350df750e9SMarc-André Lureau #endif
9360df750e9SMarc-André Lureau     }
9370df750e9SMarc-André Lureau 
9380df750e9SMarc-André Lureau     return true;
9390df750e9SMarc-André Lureau }
9400df750e9SMarc-André Lureau 
9410df750e9SMarc-André Lureau static bool
vu_add_mem_reg(VuDev * dev,VhostUserMsg * vmsg)9420df750e9SMarc-André Lureau vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
9430df750e9SMarc-André Lureau     VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m;
9440df750e9SMarc-André Lureau 
9459f4e6349SRaphael Norwitz     if (vmsg->fd_num != 1) {
9469f4e6349SRaphael Norwitz         vmsg_close_fds(vmsg);
9479f4e6349SRaphael Norwitz         vu_panic(dev, "VHOST_USER_ADD_MEM_REG received %d fds - only 1 fd "
9489f4e6349SRaphael Norwitz                       "should be sent for this message type", vmsg->fd_num);
9499f4e6349SRaphael Norwitz         return false;
9509f4e6349SRaphael Norwitz     }
9519f4e6349SRaphael Norwitz 
9529f4e6349SRaphael Norwitz     if (vmsg->size < VHOST_USER_MEM_REG_SIZE) {
9539f4e6349SRaphael Norwitz         close(vmsg->fds[0]);
9549f4e6349SRaphael Norwitz         vu_panic(dev, "VHOST_USER_ADD_MEM_REG requires a message size of at "
9558541bf45SStefan Weil via                       "least %zu bytes and only %d bytes were received",
9569f4e6349SRaphael Norwitz                       VHOST_USER_MEM_REG_SIZE, vmsg->size);
9579f4e6349SRaphael Norwitz         return false;
9589f4e6349SRaphael Norwitz     }
9599f4e6349SRaphael Norwitz 
960b906a23cSRaphael Norwitz     if (dev->nregions == VHOST_USER_MAX_RAM_SLOTS) {
961b906a23cSRaphael Norwitz         close(vmsg->fds[0]);
962b906a23cSRaphael Norwitz         vu_panic(dev, "failing attempt to hot add memory via "
963b906a23cSRaphael Norwitz                       "VHOST_USER_ADD_MEM_REG message because the backend has "
964b906a23cSRaphael Norwitz                       "no free ram slots available");
965b906a23cSRaphael Norwitz         return false;
966b906a23cSRaphael Norwitz     }
967b906a23cSRaphael Norwitz 
9680df750e9SMarc-André Lureau     /*
9690df750e9SMarc-André Lureau      * If we are in postcopy mode and we receive a u64 payload with a 0 value
9700df750e9SMarc-André Lureau      * we know all the postcopy client bases have been received, and we
9710df750e9SMarc-André Lureau      * should start generating faults.
9720df750e9SMarc-André Lureau      */
97393fec23dSDavid Hildenbrand     if (dev->postcopy_listening &&
9740df750e9SMarc-André Lureau         vmsg->size == sizeof(vmsg->payload.u64) &&
9750df750e9SMarc-André Lureau         vmsg->payload.u64 == 0) {
9760df750e9SMarc-André Lureau         (void)generate_faults(dev);
9770df750e9SMarc-André Lureau         return false;
9780df750e9SMarc-André Lureau     }
9790df750e9SMarc-André Lureau 
98093fec23dSDavid Hildenbrand     _vu_add_mem_reg(dev, msg_region, vmsg->fds[0]);
9810df750e9SMarc-André Lureau     close(vmsg->fds[0]);
9820df750e9SMarc-André Lureau 
98393fec23dSDavid Hildenbrand     if (dev->postcopy_listening) {
9840df750e9SMarc-André Lureau         /* Send the message back to qemu with the addresses filled in. */
9850df750e9SMarc-André Lureau         vmsg->fd_num = 0;
9860df750e9SMarc-André Lureau         DPRINT("Successfully added new region in postcopy\n");
9877f27d20dSKevin Wolf         return true;
9880df750e9SMarc-André Lureau     }
9890df750e9SMarc-André Lureau     DPRINT("Successfully added new region\n");
9905ebfdeb2SKevin Wolf     return false;
9910df750e9SMarc-André Lureau }
9920df750e9SMarc-André Lureau 
reg_equal(VuDevRegion * vudev_reg,VhostUserMemoryRegion * msg_reg)9930df750e9SMarc-André Lureau static inline bool reg_equal(VuDevRegion *vudev_reg,
9940df750e9SMarc-André Lureau                              VhostUserMemoryRegion *msg_reg)
9950df750e9SMarc-André Lureau {
9960df750e9SMarc-André Lureau     if (vudev_reg->gpa == msg_reg->guest_phys_addr &&
9970df750e9SMarc-André Lureau         vudev_reg->qva == msg_reg->userspace_addr &&
9980df750e9SMarc-André Lureau         vudev_reg->size == msg_reg->memory_size) {
9990df750e9SMarc-André Lureau         return true;
10000df750e9SMarc-André Lureau     }
10010df750e9SMarc-André Lureau 
10020df750e9SMarc-André Lureau     return false;
10030df750e9SMarc-André Lureau }
10040df750e9SMarc-André Lureau 
10050df750e9SMarc-André Lureau static bool
vu_rem_mem_reg(VuDev * dev,VhostUserMsg * vmsg)10060df750e9SMarc-André Lureau vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
10070df750e9SMarc-André Lureau     VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m;
100860ccdca4SDavid Hildenbrand     unsigned int idx;
100960ccdca4SDavid Hildenbrand     VuDevRegion *r;
10100df750e9SMarc-André Lureau 
1011a81d8d4aSKevin Wolf     if (vmsg->fd_num > 1) {
1012316ee111SRaphael Norwitz         vmsg_close_fds(vmsg);
1013a81d8d4aSKevin Wolf         vu_panic(dev, "VHOST_USER_REM_MEM_REG received %d fds - at most 1 fd "
1014316ee111SRaphael Norwitz                       "should be sent for this message type", vmsg->fd_num);
1015316ee111SRaphael Norwitz         return false;
1016316ee111SRaphael Norwitz     }
1017316ee111SRaphael Norwitz 
1018316ee111SRaphael Norwitz     if (vmsg->size < VHOST_USER_MEM_REG_SIZE) {
1019a81d8d4aSKevin Wolf         vmsg_close_fds(vmsg);
1020316ee111SRaphael Norwitz         vu_panic(dev, "VHOST_USER_REM_MEM_REG requires a message size of at "
10218541bf45SStefan Weil via                       "least %zu bytes and only %d bytes were received",
1022316ee111SRaphael Norwitz                       VHOST_USER_MEM_REG_SIZE, vmsg->size);
1023316ee111SRaphael Norwitz         return false;
1024316ee111SRaphael Norwitz     }
1025316ee111SRaphael Norwitz 
10260df750e9SMarc-André Lureau     DPRINT("Removing region:\n");
10270df750e9SMarc-André Lureau     DPRINT("    guest_phys_addr: 0x%016"PRIx64"\n",
10280df750e9SMarc-André Lureau            msg_region->guest_phys_addr);
10290df750e9SMarc-André Lureau     DPRINT("    memory_size:     0x%016"PRIx64"\n",
10300df750e9SMarc-André Lureau            msg_region->memory_size);
10310df750e9SMarc-André Lureau     DPRINT("    userspace_addr   0x%016"PRIx64"\n",
10320df750e9SMarc-André Lureau            msg_region->userspace_addr);
10330df750e9SMarc-André Lureau     DPRINT("    mmap_offset      0x%016"PRIx64"\n",
10340df750e9SMarc-André Lureau            msg_region->mmap_offset);
10350df750e9SMarc-André Lureau 
103660ccdca4SDavid Hildenbrand     r = vu_gpa_to_mem_region(dev, msg_region->guest_phys_addr);
103760ccdca4SDavid Hildenbrand     if (!r || !reg_equal(r, msg_region)) {
103860ccdca4SDavid Hildenbrand         vmsg_close_fds(vmsg);
103960ccdca4SDavid Hildenbrand         vu_panic(dev, "Specified region not found\n");
104060ccdca4SDavid Hildenbrand         return false;
104160ccdca4SDavid Hildenbrand     }
10420df750e9SMarc-André Lureau 
104367f4f663SDavid Hildenbrand     /*
104467f4f663SDavid Hildenbrand      * There might be valid cases where we temporarily remove memory regions
104567f4f663SDavid Hildenbrand      * to readd them again, or remove memory regions and don't use the rings
104667f4f663SDavid Hildenbrand      * anymore before we set the ring addresses and restart the device.
104767f4f663SDavid Hildenbrand      *
104867f4f663SDavid Hildenbrand      * Unmap all affected rings, remapping them on demand later. This should
104967f4f663SDavid Hildenbrand      * be a corner case.
105067f4f663SDavid Hildenbrand      */
105167f4f663SDavid Hildenbrand     unmap_rings(dev, r);
105267f4f663SDavid Hildenbrand 
10534f865c3bSDavid Hildenbrand     munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
10544fd5ca82SDavid Hildenbrand 
105560ccdca4SDavid Hildenbrand     idx = r - dev->regions;
105660ccdca4SDavid Hildenbrand     assert(idx < dev->nregions);
105760ccdca4SDavid Hildenbrand     /* Shift all affected entries by 1 to close the hole. */
105860ccdca4SDavid Hildenbrand     memmove(r, r + 1, sizeof(VuDevRegion) * (dev->nregions - idx - 1));
10590df750e9SMarc-André Lureau     DPRINT("Successfully removed a region\n");
10600df750e9SMarc-André Lureau     dev->nregions--;
10610df750e9SMarc-André Lureau 
1062a81d8d4aSKevin Wolf     vmsg_close_fds(vmsg);
1063fa3d5483SDavid Hildenbrand 
10645ebfdeb2SKevin Wolf     return false;
10650df750e9SMarc-André Lureau }
10660df750e9SMarc-André Lureau 
10670df750e9SMarc-André Lureau static bool
vu_get_shared_object(VuDev * dev,VhostUserMsg * vmsg)1068ce0f3b03SAlbert Esteve vu_get_shared_object(VuDev *dev, VhostUserMsg *vmsg)
1069ce0f3b03SAlbert Esteve {
1070ce0f3b03SAlbert Esteve     int fd_num = 0;
1071ce0f3b03SAlbert Esteve     int dmabuf_fd = -1;
1072ce0f3b03SAlbert Esteve     if (dev->iface->get_shared_object) {
1073ce0f3b03SAlbert Esteve         dmabuf_fd = dev->iface->get_shared_object(
1074ce0f3b03SAlbert Esteve             dev, &vmsg->payload.object.uuid[0]);
1075ce0f3b03SAlbert Esteve     }
1076ce0f3b03SAlbert Esteve     if (dmabuf_fd != -1) {
1077ce0f3b03SAlbert Esteve         DPRINT("dmabuf_fd found for requested UUID\n");
1078ce0f3b03SAlbert Esteve         vmsg->fds[fd_num++] = dmabuf_fd;
1079ce0f3b03SAlbert Esteve     }
1080ce0f3b03SAlbert Esteve     vmsg->fd_num = fd_num;
1081ce0f3b03SAlbert Esteve 
1082ce0f3b03SAlbert Esteve     return true;
1083ce0f3b03SAlbert Esteve }
1084ce0f3b03SAlbert Esteve 
1085ce0f3b03SAlbert Esteve static bool
vu_set_mem_table_exec(VuDev * dev,VhostUserMsg * vmsg)108605a58ce4SDavid Hildenbrand vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
10870df750e9SMarc-André Lureau {
10880df750e9SMarc-André Lureau     VhostUserMemory m = vmsg->payload.memory, *memory = &m;
108905a58ce4SDavid Hildenbrand     unsigned int i;
10900df750e9SMarc-André Lureau 
1091bec58209SDavid Hildenbrand     vu_remove_all_mem_regs(dev);
10920df750e9SMarc-André Lureau 
10930df750e9SMarc-André Lureau     DPRINT("Nregions: %u\n", memory->nregions);
109493fec23dSDavid Hildenbrand     for (i = 0; i < memory->nregions; i++) {
109593fec23dSDavid Hildenbrand         _vu_add_mem_reg(dev, &memory->regions[i], vmsg->fds[i]);
10960df750e9SMarc-André Lureau         close(vmsg->fds[i]);
10970df750e9SMarc-André Lureau     }
10980df750e9SMarc-André Lureau 
109905a58ce4SDavid Hildenbrand     if (dev->postcopy_listening) {
110005a58ce4SDavid Hildenbrand         /* Send the message back to qemu with the addresses filled in */
110105a58ce4SDavid Hildenbrand         vmsg->fd_num = 0;
110205a58ce4SDavid Hildenbrand         if (!vu_send_reply(dev, dev->sock, vmsg)) {
110305a58ce4SDavid Hildenbrand             vu_panic(dev, "failed to respond to set-mem-table for postcopy");
110405a58ce4SDavid Hildenbrand             return false;
110505a58ce4SDavid Hildenbrand         }
110605a58ce4SDavid Hildenbrand 
110705a58ce4SDavid Hildenbrand         /*
110805a58ce4SDavid Hildenbrand          * Wait for QEMU to confirm that it's registered the handler for the
110905a58ce4SDavid Hildenbrand          * faults.
111005a58ce4SDavid Hildenbrand          */
111105a58ce4SDavid Hildenbrand         if (!dev->read_msg(dev, dev->sock, vmsg) ||
111205a58ce4SDavid Hildenbrand             vmsg->size != sizeof(vmsg->payload.u64) ||
111305a58ce4SDavid Hildenbrand             vmsg->payload.u64 != 0) {
111405a58ce4SDavid Hildenbrand             vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table");
111505a58ce4SDavid Hildenbrand             return false;
111605a58ce4SDavid Hildenbrand         }
111705a58ce4SDavid Hildenbrand 
111805a58ce4SDavid Hildenbrand         /* OK, now we can go and register the memory and generate faults */
111905a58ce4SDavid Hildenbrand         (void)generate_faults(dev);
112005a58ce4SDavid Hildenbrand         return false;
112105a58ce4SDavid Hildenbrand     }
112205a58ce4SDavid Hildenbrand 
11230df750e9SMarc-André Lureau     for (i = 0; i < dev->max_queues; i++) {
11240df750e9SMarc-André Lureau         if (dev->vq[i].vring.desc) {
11250df750e9SMarc-André Lureau             if (map_ring(dev, &dev->vq[i])) {
11260df750e9SMarc-André Lureau                 vu_panic(dev, "remapping queue %d during setmemtable", i);
11270df750e9SMarc-André Lureau             }
11280df750e9SMarc-André Lureau         }
11290df750e9SMarc-André Lureau     }
11300df750e9SMarc-André Lureau 
11310df750e9SMarc-André Lureau     return false;
11320df750e9SMarc-André Lureau }
11330df750e9SMarc-André Lureau 
11340df750e9SMarc-André Lureau static bool
vu_set_log_base_exec(VuDev * dev,VhostUserMsg * vmsg)11350df750e9SMarc-André Lureau vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
11360df750e9SMarc-André Lureau {
11370df750e9SMarc-André Lureau     int fd;
11380df750e9SMarc-André Lureau     uint64_t log_mmap_size, log_mmap_offset;
11390df750e9SMarc-André Lureau     void *rc;
11400df750e9SMarc-André Lureau 
11410df750e9SMarc-André Lureau     if (vmsg->fd_num != 1 ||
11420df750e9SMarc-André Lureau         vmsg->size != sizeof(vmsg->payload.log)) {
11430df750e9SMarc-André Lureau         vu_panic(dev, "Invalid log_base message");
11440df750e9SMarc-André Lureau         return true;
11450df750e9SMarc-André Lureau     }
11460df750e9SMarc-André Lureau 
11470df750e9SMarc-André Lureau     fd = vmsg->fds[0];
11480df750e9SMarc-André Lureau     log_mmap_offset = vmsg->payload.log.mmap_offset;
11490df750e9SMarc-André Lureau     log_mmap_size = vmsg->payload.log.mmap_size;
11500df750e9SMarc-André Lureau     DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
11510df750e9SMarc-André Lureau     DPRINT("Log mmap_size:   %"PRId64"\n", log_mmap_size);
11520df750e9SMarc-André Lureau 
11530df750e9SMarc-André Lureau     rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
11540df750e9SMarc-André Lureau               log_mmap_offset);
11550df750e9SMarc-André Lureau     close(fd);
11560df750e9SMarc-André Lureau     if (rc == MAP_FAILED) {
11570df750e9SMarc-André Lureau         perror("log mmap error");
11580df750e9SMarc-André Lureau     }
11590df750e9SMarc-André Lureau 
11600df750e9SMarc-André Lureau     if (dev->log_table) {
11610df750e9SMarc-André Lureau         munmap(dev->log_table, dev->log_size);
11620df750e9SMarc-André Lureau     }
11630df750e9SMarc-André Lureau     dev->log_table = rc;
11640df750e9SMarc-André Lureau     dev->log_size = log_mmap_size;
11650df750e9SMarc-André Lureau 
11660df750e9SMarc-André Lureau     vmsg->size = sizeof(vmsg->payload.u64);
11670df750e9SMarc-André Lureau     vmsg->fd_num = 0;
11680df750e9SMarc-André Lureau 
11690df750e9SMarc-André Lureau     return true;
11700df750e9SMarc-André Lureau }
11710df750e9SMarc-André Lureau 
11720df750e9SMarc-André Lureau static bool
vu_set_log_fd_exec(VuDev * dev,VhostUserMsg * vmsg)11730df750e9SMarc-André Lureau vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg)
11740df750e9SMarc-André Lureau {
11750df750e9SMarc-André Lureau     if (vmsg->fd_num != 1) {
11760df750e9SMarc-André Lureau         vu_panic(dev, "Invalid log_fd message");
11770df750e9SMarc-André Lureau         return false;
11780df750e9SMarc-André Lureau     }
11790df750e9SMarc-André Lureau 
11800df750e9SMarc-André Lureau     if (dev->log_call_fd != -1) {
11810df750e9SMarc-André Lureau         close(dev->log_call_fd);
11820df750e9SMarc-André Lureau     }
11830df750e9SMarc-André Lureau     dev->log_call_fd = vmsg->fds[0];
11840df750e9SMarc-André Lureau     DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
11850df750e9SMarc-André Lureau 
11860df750e9SMarc-André Lureau     return false;
11870df750e9SMarc-André Lureau }
11880df750e9SMarc-André Lureau 
11890df750e9SMarc-André Lureau static bool
vu_set_vring_num_exec(VuDev * dev,VhostUserMsg * vmsg)11900df750e9SMarc-André Lureau vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg)
11910df750e9SMarc-André Lureau {
11920df750e9SMarc-André Lureau     unsigned int index = vmsg->payload.state.index;
11930df750e9SMarc-André Lureau     unsigned int num = vmsg->payload.state.num;
11940df750e9SMarc-André Lureau 
11950df750e9SMarc-André Lureau     DPRINT("State.index: %u\n", index);
11960df750e9SMarc-André Lureau     DPRINT("State.num:   %u\n", num);
11970df750e9SMarc-André Lureau     dev->vq[index].vring.num = num;
11980df750e9SMarc-André Lureau 
11990df750e9SMarc-André Lureau     return false;
12000df750e9SMarc-André Lureau }
12010df750e9SMarc-André Lureau 
12020df750e9SMarc-André Lureau static bool
vu_set_vring_addr_exec(VuDev * dev,VhostUserMsg * vmsg)12030df750e9SMarc-André Lureau vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
12040df750e9SMarc-André Lureau {
12050df750e9SMarc-André Lureau     struct vhost_vring_addr addr = vmsg->payload.addr, *vra = &addr;
12060df750e9SMarc-André Lureau     unsigned int index = vra->index;
12070df750e9SMarc-André Lureau     VuVirtq *vq = &dev->vq[index];
12080df750e9SMarc-André Lureau 
12090df750e9SMarc-André Lureau     DPRINT("vhost_vring_addr:\n");
12100df750e9SMarc-André Lureau     DPRINT("    index:  %d\n", vra->index);
12110df750e9SMarc-André Lureau     DPRINT("    flags:  %d\n", vra->flags);
12124fe29344SMarc-André Lureau     DPRINT("    desc_user_addr:   0x%016" PRIx64 "\n", (uint64_t)vra->desc_user_addr);
12134fe29344SMarc-André Lureau     DPRINT("    used_user_addr:   0x%016" PRIx64 "\n", (uint64_t)vra->used_user_addr);
12144fe29344SMarc-André Lureau     DPRINT("    avail_user_addr:  0x%016" PRIx64 "\n", (uint64_t)vra->avail_user_addr);
12154fe29344SMarc-André Lureau     DPRINT("    log_guest_addr:   0x%016" PRIx64 "\n", (uint64_t)vra->log_guest_addr);
12160df750e9SMarc-André Lureau 
12170df750e9SMarc-André Lureau     vq->vra = *vra;
12180df750e9SMarc-André Lureau     vq->vring.flags = vra->flags;
12190df750e9SMarc-André Lureau     vq->vring.log_guest_addr = vra->log_guest_addr;
12200df750e9SMarc-André Lureau 
12210df750e9SMarc-André Lureau 
12220df750e9SMarc-André Lureau     if (map_ring(dev, vq)) {
12230df750e9SMarc-André Lureau         vu_panic(dev, "Invalid vring_addr message");
12240df750e9SMarc-André Lureau         return false;
12250df750e9SMarc-André Lureau     }
12260df750e9SMarc-André Lureau 
12270df750e9SMarc-André Lureau     vq->used_idx = le16toh(vq->vring.used->idx);
12280df750e9SMarc-André Lureau 
12290df750e9SMarc-André Lureau     if (vq->last_avail_idx != vq->used_idx) {
12300df750e9SMarc-André Lureau         bool resume = dev->iface->queue_is_processed_in_order &&
12310df750e9SMarc-André Lureau             dev->iface->queue_is_processed_in_order(dev, index);
12320df750e9SMarc-André Lureau 
12330df750e9SMarc-André Lureau         DPRINT("Last avail index != used index: %u != %u%s\n",
12340df750e9SMarc-André Lureau                vq->last_avail_idx, vq->used_idx,
12350df750e9SMarc-André Lureau                resume ? ", resuming" : "");
12360df750e9SMarc-André Lureau 
12370df750e9SMarc-André Lureau         if (resume) {
12380df750e9SMarc-André Lureau             vq->shadow_avail_idx = vq->last_avail_idx = vq->used_idx;
12390df750e9SMarc-André Lureau         }
12400df750e9SMarc-André Lureau     }
12410df750e9SMarc-André Lureau 
12420df750e9SMarc-André Lureau     return false;
12430df750e9SMarc-André Lureau }
12440df750e9SMarc-André Lureau 
12450df750e9SMarc-André Lureau static bool
vu_set_vring_base_exec(VuDev * dev,VhostUserMsg * vmsg)12460df750e9SMarc-André Lureau vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
12470df750e9SMarc-André Lureau {
12480df750e9SMarc-André Lureau     unsigned int index = vmsg->payload.state.index;
12490df750e9SMarc-André Lureau     unsigned int num = vmsg->payload.state.num;
12500df750e9SMarc-André Lureau 
12510df750e9SMarc-André Lureau     DPRINT("State.index: %u\n", index);
12520df750e9SMarc-André Lureau     DPRINT("State.num:   %u\n", num);
12530df750e9SMarc-André Lureau     dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num;
12540df750e9SMarc-André Lureau 
12550df750e9SMarc-André Lureau     return false;
12560df750e9SMarc-André Lureau }
12570df750e9SMarc-André Lureau 
12580df750e9SMarc-André Lureau static bool
vu_get_vring_base_exec(VuDev * dev,VhostUserMsg * vmsg)12590df750e9SMarc-André Lureau vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
12600df750e9SMarc-André Lureau {
12610df750e9SMarc-André Lureau     unsigned int index = vmsg->payload.state.index;
12620df750e9SMarc-André Lureau 
12630df750e9SMarc-André Lureau     DPRINT("State.index: %u\n", index);
12640df750e9SMarc-André Lureau     vmsg->payload.state.num = dev->vq[index].last_avail_idx;
12650df750e9SMarc-André Lureau     vmsg->size = sizeof(vmsg->payload.state);
12660df750e9SMarc-André Lureau 
12670df750e9SMarc-André Lureau     dev->vq[index].started = false;
12680df750e9SMarc-André Lureau     if (dev->iface->queue_set_started) {
12690df750e9SMarc-André Lureau         dev->iface->queue_set_started(dev, index, false);
12700df750e9SMarc-André Lureau     }
12710df750e9SMarc-André Lureau 
12720df750e9SMarc-André Lureau     if (dev->vq[index].call_fd != -1) {
12730df750e9SMarc-André Lureau         close(dev->vq[index].call_fd);
12740df750e9SMarc-André Lureau         dev->vq[index].call_fd = -1;
12750df750e9SMarc-André Lureau     }
12760df750e9SMarc-André Lureau     if (dev->vq[index].kick_fd != -1) {
12770df750e9SMarc-André Lureau         dev->remove_watch(dev, dev->vq[index].kick_fd);
12780df750e9SMarc-André Lureau         close(dev->vq[index].kick_fd);
12790df750e9SMarc-André Lureau         dev->vq[index].kick_fd = -1;
12800df750e9SMarc-André Lureau     }
12810df750e9SMarc-André Lureau 
12820df750e9SMarc-André Lureau     return true;
12830df750e9SMarc-André Lureau }
12840df750e9SMarc-André Lureau 
12850df750e9SMarc-André Lureau static bool
vu_check_queue_msg_file(VuDev * dev,VhostUserMsg * vmsg)12860df750e9SMarc-André Lureau vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
12870df750e9SMarc-André Lureau {
12880df750e9SMarc-André Lureau     int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
12890df750e9SMarc-André Lureau     bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
12900df750e9SMarc-André Lureau 
12910df750e9SMarc-André Lureau     if (index >= dev->max_queues) {
12920df750e9SMarc-André Lureau         vmsg_close_fds(vmsg);
12930df750e9SMarc-André Lureau         vu_panic(dev, "Invalid queue index: %u", index);
12940df750e9SMarc-André Lureau         return false;
12950df750e9SMarc-André Lureau     }
12960df750e9SMarc-André Lureau 
12970df750e9SMarc-André Lureau     if (nofd) {
12980df750e9SMarc-André Lureau         vmsg_close_fds(vmsg);
12990df750e9SMarc-André Lureau         return true;
13000df750e9SMarc-André Lureau     }
13010df750e9SMarc-André Lureau 
13020df750e9SMarc-André Lureau     if (vmsg->fd_num != 1) {
13030df750e9SMarc-André Lureau         vmsg_close_fds(vmsg);
13040df750e9SMarc-André Lureau         vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
13050df750e9SMarc-André Lureau         return false;
13060df750e9SMarc-André Lureau     }
13070df750e9SMarc-André Lureau 
13080df750e9SMarc-André Lureau     return true;
13090df750e9SMarc-André Lureau }
13100df750e9SMarc-André Lureau 
13110df750e9SMarc-André Lureau static int
inflight_desc_compare(const void * a,const void * b)13120df750e9SMarc-André Lureau inflight_desc_compare(const void *a, const void *b)
13130df750e9SMarc-André Lureau {
13140df750e9SMarc-André Lureau     VuVirtqInflightDesc *desc0 = (VuVirtqInflightDesc *)a,
13150df750e9SMarc-André Lureau                         *desc1 = (VuVirtqInflightDesc *)b;
13160df750e9SMarc-André Lureau 
13170df750e9SMarc-André Lureau     if (desc1->counter > desc0->counter &&
13180df750e9SMarc-André Lureau         (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) {
13190df750e9SMarc-André Lureau         return 1;
13200df750e9SMarc-André Lureau     }
13210df750e9SMarc-André Lureau 
13220df750e9SMarc-André Lureau     return -1;
13230df750e9SMarc-André Lureau }
13240df750e9SMarc-André Lureau 
13250df750e9SMarc-André Lureau static int
vu_check_queue_inflights(VuDev * dev,VuVirtq * vq)13260df750e9SMarc-André Lureau vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
13270df750e9SMarc-André Lureau {
13280df750e9SMarc-André Lureau     int i = 0;
13290df750e9SMarc-André Lureau 
13300df750e9SMarc-André Lureau     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
13310df750e9SMarc-André Lureau         return 0;
13320df750e9SMarc-André Lureau     }
13330df750e9SMarc-André Lureau 
13340df750e9SMarc-André Lureau     if (unlikely(!vq->inflight)) {
13350df750e9SMarc-André Lureau         return -1;
13360df750e9SMarc-André Lureau     }
13370df750e9SMarc-André Lureau 
13380df750e9SMarc-André Lureau     if (unlikely(!vq->inflight->version)) {
13390df750e9SMarc-André Lureau         /* initialize the buffer */
13400df750e9SMarc-André Lureau         vq->inflight->version = INFLIGHT_VERSION;
13410df750e9SMarc-André Lureau         return 0;
13420df750e9SMarc-André Lureau     }
13430df750e9SMarc-André Lureau 
13440df750e9SMarc-André Lureau     vq->used_idx = le16toh(vq->vring.used->idx);
13450df750e9SMarc-André Lureau     vq->resubmit_num = 0;
13460df750e9SMarc-André Lureau     vq->resubmit_list = NULL;
13470df750e9SMarc-André Lureau     vq->counter = 0;
13480df750e9SMarc-André Lureau 
13490df750e9SMarc-André Lureau     if (unlikely(vq->inflight->used_idx != vq->used_idx)) {
13500df750e9SMarc-André Lureau         vq->inflight->desc[vq->inflight->last_batch_head].inflight = 0;
13510df750e9SMarc-André Lureau 
13520df750e9SMarc-André Lureau         barrier();
13530df750e9SMarc-André Lureau 
13540df750e9SMarc-André Lureau         vq->inflight->used_idx = vq->used_idx;
13550df750e9SMarc-André Lureau     }
13560df750e9SMarc-André Lureau 
13570df750e9SMarc-André Lureau     for (i = 0; i < vq->inflight->desc_num; i++) {
13580df750e9SMarc-André Lureau         if (vq->inflight->desc[i].inflight == 1) {
13590df750e9SMarc-André Lureau             vq->inuse++;
13600df750e9SMarc-André Lureau         }
13610df750e9SMarc-André Lureau     }
13620df750e9SMarc-André Lureau 
13630df750e9SMarc-André Lureau     vq->shadow_avail_idx = vq->last_avail_idx = vq->inuse + vq->used_idx;
13640df750e9SMarc-André Lureau 
13650df750e9SMarc-André Lureau     if (vq->inuse) {
13660df750e9SMarc-André Lureau         vq->resubmit_list = calloc(vq->inuse, sizeof(VuVirtqInflightDesc));
13670df750e9SMarc-André Lureau         if (!vq->resubmit_list) {
13680df750e9SMarc-André Lureau             return -1;
13690df750e9SMarc-André Lureau         }
13700df750e9SMarc-André Lureau 
13710df750e9SMarc-André Lureau         for (i = 0; i < vq->inflight->desc_num; i++) {
13720df750e9SMarc-André Lureau             if (vq->inflight->desc[i].inflight) {
13730df750e9SMarc-André Lureau                 vq->resubmit_list[vq->resubmit_num].index = i;
13740df750e9SMarc-André Lureau                 vq->resubmit_list[vq->resubmit_num].counter =
13750df750e9SMarc-André Lureau                                         vq->inflight->desc[i].counter;
13760df750e9SMarc-André Lureau                 vq->resubmit_num++;
13770df750e9SMarc-André Lureau             }
13780df750e9SMarc-André Lureau         }
13790df750e9SMarc-André Lureau 
13800df750e9SMarc-André Lureau         if (vq->resubmit_num > 1) {
13810df750e9SMarc-André Lureau             qsort(vq->resubmit_list, vq->resubmit_num,
13820df750e9SMarc-André Lureau                   sizeof(VuVirtqInflightDesc), inflight_desc_compare);
13830df750e9SMarc-André Lureau         }
13840df750e9SMarc-André Lureau         vq->counter = vq->resubmit_list[0].counter + 1;
13850df750e9SMarc-André Lureau     }
13860df750e9SMarc-André Lureau 
13870df750e9SMarc-André Lureau     /* in case of I/O hang after reconnecting */
13880df750e9SMarc-André Lureau     if (eventfd_write(vq->kick_fd, 1)) {
13890df750e9SMarc-André Lureau         return -1;
13900df750e9SMarc-André Lureau     }
13910df750e9SMarc-André Lureau 
13920df750e9SMarc-André Lureau     return 0;
13930df750e9SMarc-André Lureau }
13940df750e9SMarc-André Lureau 
13950df750e9SMarc-André Lureau static bool
vu_set_vring_kick_exec(VuDev * dev,VhostUserMsg * vmsg)13960df750e9SMarc-André Lureau vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
13970df750e9SMarc-André Lureau {
13980df750e9SMarc-André Lureau     int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
13990df750e9SMarc-André Lureau     bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
14000df750e9SMarc-André Lureau 
14010df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
14020df750e9SMarc-André Lureau 
14030df750e9SMarc-André Lureau     if (!vu_check_queue_msg_file(dev, vmsg)) {
14040df750e9SMarc-André Lureau         return false;
14050df750e9SMarc-André Lureau     }
14060df750e9SMarc-André Lureau 
14070df750e9SMarc-André Lureau     if (dev->vq[index].kick_fd != -1) {
14080df750e9SMarc-André Lureau         dev->remove_watch(dev, dev->vq[index].kick_fd);
14090df750e9SMarc-André Lureau         close(dev->vq[index].kick_fd);
14100df750e9SMarc-André Lureau         dev->vq[index].kick_fd = -1;
14110df750e9SMarc-André Lureau     }
14120df750e9SMarc-André Lureau 
14130df750e9SMarc-André Lureau     dev->vq[index].kick_fd = nofd ? -1 : vmsg->fds[0];
14140df750e9SMarc-André Lureau     DPRINT("Got kick_fd: %d for vq: %d\n", dev->vq[index].kick_fd, index);
14150df750e9SMarc-André Lureau 
14160df750e9SMarc-André Lureau     dev->vq[index].started = true;
14170df750e9SMarc-André Lureau     if (dev->iface->queue_set_started) {
14180df750e9SMarc-André Lureau         dev->iface->queue_set_started(dev, index, true);
14190df750e9SMarc-André Lureau     }
14200df750e9SMarc-André Lureau 
14210df750e9SMarc-André Lureau     if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) {
14220df750e9SMarc-André Lureau         dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN,
14230df750e9SMarc-André Lureau                        vu_kick_cb, (void *)(long)index);
14240df750e9SMarc-André Lureau 
14250df750e9SMarc-André Lureau         DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
14260df750e9SMarc-André Lureau                dev->vq[index].kick_fd, index);
14270df750e9SMarc-André Lureau     }
14280df750e9SMarc-André Lureau 
14290df750e9SMarc-André Lureau     if (vu_check_queue_inflights(dev, &dev->vq[index])) {
14300df750e9SMarc-André Lureau         vu_panic(dev, "Failed to check inflights for vq: %d\n", index);
14310df750e9SMarc-André Lureau     }
14320df750e9SMarc-André Lureau 
14330df750e9SMarc-André Lureau     return false;
14340df750e9SMarc-André Lureau }
14350df750e9SMarc-André Lureau 
vu_set_queue_handler(VuDev * dev,VuVirtq * vq,vu_queue_handler_cb handler)14360df750e9SMarc-André Lureau void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
14370df750e9SMarc-André Lureau                           vu_queue_handler_cb handler)
14380df750e9SMarc-André Lureau {
14390df750e9SMarc-André Lureau     int qidx = vq - dev->vq;
14400df750e9SMarc-André Lureau 
14410df750e9SMarc-André Lureau     vq->handler = handler;
14420df750e9SMarc-André Lureau     if (vq->kick_fd >= 0) {
14430df750e9SMarc-André Lureau         if (handler) {
14440df750e9SMarc-André Lureau             dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN,
14450df750e9SMarc-André Lureau                            vu_kick_cb, (void *)(long)qidx);
14460df750e9SMarc-André Lureau         } else {
14470df750e9SMarc-André Lureau             dev->remove_watch(dev, vq->kick_fd);
14480df750e9SMarc-André Lureau         }
14490df750e9SMarc-André Lureau     }
14500df750e9SMarc-André Lureau }
14510df750e9SMarc-André Lureau 
vu_set_queue_host_notifier(VuDev * dev,VuVirtq * vq,int fd,int size,int offset)14520df750e9SMarc-André Lureau bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
14530df750e9SMarc-André Lureau                                 int size, int offset)
14540df750e9SMarc-André Lureau {
14550df750e9SMarc-André Lureau     int qidx = vq - dev->vq;
14560df750e9SMarc-André Lureau     int fd_num = 0;
14570df750e9SMarc-André Lureau     VhostUserMsg vmsg = {
1458e608feedSMaxime Coquelin         .request = VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG,
14590df750e9SMarc-André Lureau         .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
14600df750e9SMarc-André Lureau         .size = sizeof(vmsg.payload.area),
14610df750e9SMarc-André Lureau         .payload.area = {
14620df750e9SMarc-André Lureau             .u64 = qidx & VHOST_USER_VRING_IDX_MASK,
14630df750e9SMarc-André Lureau             .size = size,
14640df750e9SMarc-André Lureau             .offset = offset,
14650df750e9SMarc-André Lureau         },
14660df750e9SMarc-André Lureau     };
14670df750e9SMarc-André Lureau 
14680df750e9SMarc-André Lureau     if (fd == -1) {
14690df750e9SMarc-André Lureau         vmsg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
14700df750e9SMarc-André Lureau     } else {
14710df750e9SMarc-André Lureau         vmsg.fds[fd_num++] = fd;
14720df750e9SMarc-André Lureau     }
14730df750e9SMarc-André Lureau 
14740df750e9SMarc-André Lureau     vmsg.fd_num = fd_num;
14750df750e9SMarc-André Lureau 
1476e608feedSMaxime Coquelin     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD)) {
14770df750e9SMarc-André Lureau         return false;
14780df750e9SMarc-André Lureau     }
14790df750e9SMarc-André Lureau 
1480f8ed3648SManos Pitsidianakis     pthread_mutex_lock(&dev->backend_mutex);
1481f8ed3648SManos Pitsidianakis     if (!vu_message_write(dev, dev->backend_fd, &vmsg)) {
1482f8ed3648SManos Pitsidianakis         pthread_mutex_unlock(&dev->backend_mutex);
14830df750e9SMarc-André Lureau         return false;
14840df750e9SMarc-André Lureau     }
14850df750e9SMarc-André Lureau 
1486f8ed3648SManos Pitsidianakis     /* Also unlocks the backend_mutex */
14870df750e9SMarc-André Lureau     return vu_process_message_reply(dev, &vmsg);
14880df750e9SMarc-André Lureau }
14890df750e9SMarc-André Lureau 
1490ce0f3b03SAlbert Esteve bool
vu_lookup_shared_object(VuDev * dev,unsigned char uuid[UUID_LEN],int * dmabuf_fd)1491ce0f3b03SAlbert Esteve vu_lookup_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN],
1492ce0f3b03SAlbert Esteve                         int *dmabuf_fd)
1493ce0f3b03SAlbert Esteve {
1494ce0f3b03SAlbert Esteve     bool result = false;
1495ce0f3b03SAlbert Esteve     VhostUserMsg msg_reply;
1496ce0f3b03SAlbert Esteve     VhostUserMsg msg = {
1497ce0f3b03SAlbert Esteve         .request = VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP,
1498ce0f3b03SAlbert Esteve         .size = sizeof(msg.payload.object),
1499ce0f3b03SAlbert Esteve         .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1500ce0f3b03SAlbert Esteve     };
1501ce0f3b03SAlbert Esteve 
1502ce0f3b03SAlbert Esteve     memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
1503ce0f3b03SAlbert Esteve 
1504ce0f3b03SAlbert Esteve     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
1505ce0f3b03SAlbert Esteve         return false;
1506ce0f3b03SAlbert Esteve     }
1507ce0f3b03SAlbert Esteve 
1508ce0f3b03SAlbert Esteve     pthread_mutex_lock(&dev->backend_mutex);
1509ce0f3b03SAlbert Esteve     if (!vu_message_write(dev, dev->backend_fd, &msg)) {
1510ce0f3b03SAlbert Esteve         goto out;
1511ce0f3b03SAlbert Esteve     }
1512ce0f3b03SAlbert Esteve 
1513ce0f3b03SAlbert Esteve     if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) {
1514ce0f3b03SAlbert Esteve         goto out;
1515ce0f3b03SAlbert Esteve     }
1516ce0f3b03SAlbert Esteve 
1517ce0f3b03SAlbert Esteve     if (msg_reply.request != msg.request) {
1518ce0f3b03SAlbert Esteve         DPRINT("Received unexpected msg type. Expected %d, received %d",
1519ce0f3b03SAlbert Esteve                msg.request, msg_reply.request);
1520ce0f3b03SAlbert Esteve         goto out;
1521ce0f3b03SAlbert Esteve     }
1522ce0f3b03SAlbert Esteve 
1523ce0f3b03SAlbert Esteve     if (msg_reply.fd_num != 1) {
1524ce0f3b03SAlbert Esteve         DPRINT("Received unexpected number of fds. Expected 1, received %d",
1525ce0f3b03SAlbert Esteve                msg_reply.fd_num);
1526ce0f3b03SAlbert Esteve         goto out;
1527ce0f3b03SAlbert Esteve     }
1528ce0f3b03SAlbert Esteve 
1529ce0f3b03SAlbert Esteve     *dmabuf_fd = msg_reply.fds[0];
1530ce0f3b03SAlbert Esteve     result = *dmabuf_fd > 0 && msg_reply.payload.u64 == 0;
1531ce0f3b03SAlbert Esteve out:
1532ce0f3b03SAlbert Esteve     pthread_mutex_unlock(&dev->backend_mutex);
1533ce0f3b03SAlbert Esteve 
1534ce0f3b03SAlbert Esteve     return result;
1535ce0f3b03SAlbert Esteve }
1536ce0f3b03SAlbert Esteve 
1537ce0f3b03SAlbert Esteve static bool
vu_send_message(VuDev * dev,VhostUserMsg * vmsg)1538ce0f3b03SAlbert Esteve vu_send_message(VuDev *dev, VhostUserMsg *vmsg)
1539ce0f3b03SAlbert Esteve {
1540ce0f3b03SAlbert Esteve     bool result = false;
1541ce0f3b03SAlbert Esteve     pthread_mutex_lock(&dev->backend_mutex);
1542ce0f3b03SAlbert Esteve     if (!vu_message_write(dev, dev->backend_fd, vmsg)) {
1543ce0f3b03SAlbert Esteve         goto out;
1544ce0f3b03SAlbert Esteve     }
1545ce0f3b03SAlbert Esteve 
1546ce0f3b03SAlbert Esteve     result = true;
1547ce0f3b03SAlbert Esteve out:
1548ce0f3b03SAlbert Esteve     pthread_mutex_unlock(&dev->backend_mutex);
1549ce0f3b03SAlbert Esteve 
1550ce0f3b03SAlbert Esteve     return result;
1551ce0f3b03SAlbert Esteve }
1552ce0f3b03SAlbert Esteve 
1553ce0f3b03SAlbert Esteve bool
vu_add_shared_object(VuDev * dev,unsigned char uuid[UUID_LEN])1554ce0f3b03SAlbert Esteve vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN])
1555ce0f3b03SAlbert Esteve {
1556ce0f3b03SAlbert Esteve     VhostUserMsg msg = {
1557ce0f3b03SAlbert Esteve         .request = VHOST_USER_BACKEND_SHARED_OBJECT_ADD,
1558ce0f3b03SAlbert Esteve         .size = sizeof(msg.payload.object),
1559ce0f3b03SAlbert Esteve         .flags = VHOST_USER_VERSION,
1560ce0f3b03SAlbert Esteve     };
1561ce0f3b03SAlbert Esteve 
1562ce0f3b03SAlbert Esteve     memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
1563ce0f3b03SAlbert Esteve 
1564ce0f3b03SAlbert Esteve     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
1565ce0f3b03SAlbert Esteve         return false;
1566ce0f3b03SAlbert Esteve     }
1567ce0f3b03SAlbert Esteve 
1568ce0f3b03SAlbert Esteve     return vu_send_message(dev, &msg);
1569ce0f3b03SAlbert Esteve }
1570ce0f3b03SAlbert Esteve 
1571ce0f3b03SAlbert Esteve bool
vu_rm_shared_object(VuDev * dev,unsigned char uuid[UUID_LEN])1572ce0f3b03SAlbert Esteve vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN])
1573ce0f3b03SAlbert Esteve {
1574ce0f3b03SAlbert Esteve     VhostUserMsg msg = {
1575ce0f3b03SAlbert Esteve         .request = VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE,
1576ce0f3b03SAlbert Esteve         .size = sizeof(msg.payload.object),
1577ce0f3b03SAlbert Esteve         .flags = VHOST_USER_VERSION,
1578ce0f3b03SAlbert Esteve     };
1579ce0f3b03SAlbert Esteve 
1580ce0f3b03SAlbert Esteve     memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
1581ce0f3b03SAlbert Esteve 
1582ce0f3b03SAlbert Esteve     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
1583ce0f3b03SAlbert Esteve         return false;
1584ce0f3b03SAlbert Esteve     }
1585ce0f3b03SAlbert Esteve 
1586ce0f3b03SAlbert Esteve     return vu_send_message(dev, &msg);
1587ce0f3b03SAlbert Esteve }
1588ce0f3b03SAlbert Esteve 
15890df750e9SMarc-André Lureau static bool
vu_set_vring_call_exec(VuDev * dev,VhostUserMsg * vmsg)15900df750e9SMarc-André Lureau vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
15910df750e9SMarc-André Lureau {
15920df750e9SMarc-André Lureau     int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
15930df750e9SMarc-André Lureau     bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
15940df750e9SMarc-André Lureau 
15950df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
15960df750e9SMarc-André Lureau 
15970df750e9SMarc-André Lureau     if (!vu_check_queue_msg_file(dev, vmsg)) {
15980df750e9SMarc-André Lureau         return false;
15990df750e9SMarc-André Lureau     }
16000df750e9SMarc-André Lureau 
16010df750e9SMarc-André Lureau     if (dev->vq[index].call_fd != -1) {
16020df750e9SMarc-André Lureau         close(dev->vq[index].call_fd);
16030df750e9SMarc-André Lureau         dev->vq[index].call_fd = -1;
16040df750e9SMarc-André Lureau     }
16050df750e9SMarc-André Lureau 
16060df750e9SMarc-André Lureau     dev->vq[index].call_fd = nofd ? -1 : vmsg->fds[0];
16070df750e9SMarc-André Lureau 
16080df750e9SMarc-André Lureau     /* in case of I/O hang after reconnecting */
16090df750e9SMarc-André Lureau     if (dev->vq[index].call_fd != -1 && eventfd_write(vmsg->fds[0], 1)) {
16100df750e9SMarc-André Lureau         return -1;
16110df750e9SMarc-André Lureau     }
16120df750e9SMarc-André Lureau 
16130df750e9SMarc-André Lureau     DPRINT("Got call_fd: %d for vq: %d\n", dev->vq[index].call_fd, index);
16140df750e9SMarc-André Lureau 
16150df750e9SMarc-André Lureau     return false;
16160df750e9SMarc-André Lureau }
16170df750e9SMarc-André Lureau 
16180df750e9SMarc-André Lureau static bool
vu_set_vring_err_exec(VuDev * dev,VhostUserMsg * vmsg)16190df750e9SMarc-André Lureau vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
16200df750e9SMarc-André Lureau {
16210df750e9SMarc-André Lureau     int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
16220df750e9SMarc-André Lureau     bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
16230df750e9SMarc-André Lureau 
16240df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
16250df750e9SMarc-André Lureau 
16260df750e9SMarc-André Lureau     if (!vu_check_queue_msg_file(dev, vmsg)) {
16270df750e9SMarc-André Lureau         return false;
16280df750e9SMarc-André Lureau     }
16290df750e9SMarc-André Lureau 
16300df750e9SMarc-André Lureau     if (dev->vq[index].err_fd != -1) {
16310df750e9SMarc-André Lureau         close(dev->vq[index].err_fd);
16320df750e9SMarc-André Lureau         dev->vq[index].err_fd = -1;
16330df750e9SMarc-André Lureau     }
16340df750e9SMarc-André Lureau 
16350df750e9SMarc-André Lureau     dev->vq[index].err_fd = nofd ? -1 : vmsg->fds[0];
16360df750e9SMarc-André Lureau 
16370df750e9SMarc-André Lureau     return false;
16380df750e9SMarc-André Lureau }
16390df750e9SMarc-André Lureau 
16400df750e9SMarc-André Lureau static bool
vu_get_protocol_features_exec(VuDev * dev,VhostUserMsg * vmsg)16410df750e9SMarc-André Lureau vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
16420df750e9SMarc-André Lureau {
16430df750e9SMarc-André Lureau     /*
16440df750e9SMarc-André Lureau      * Note that we support, but intentionally do not set,
16450df750e9SMarc-André Lureau      * VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. This means that
16460df750e9SMarc-André Lureau      * a device implementation can return it in its callback
16470df750e9SMarc-André Lureau      * (get_protocol_features) if it wants to use this for
16480df750e9SMarc-André Lureau      * simulation, but it is otherwise not desirable (if even
1649f8ed3648SManos Pitsidianakis      * implemented by the frontend.)
16500df750e9SMarc-André Lureau      */
16510df750e9SMarc-André Lureau     uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ |
16520df750e9SMarc-André Lureau                         1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
1653e608feedSMaxime Coquelin                         1ULL << VHOST_USER_PROTOCOL_F_BACKEND_REQ |
16540df750e9SMarc-André Lureau                         1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER |
1655e608feedSMaxime Coquelin                         1ULL << VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD |
16560df750e9SMarc-André Lureau                         1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |
16570df750e9SMarc-André Lureau                         1ULL << VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS;
16580df750e9SMarc-André Lureau 
16590df750e9SMarc-André Lureau     if (have_userfault()) {
16600df750e9SMarc-André Lureau         features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT;
16610df750e9SMarc-André Lureau     }
16620df750e9SMarc-André Lureau 
16630df750e9SMarc-André Lureau     if (dev->iface->get_config && dev->iface->set_config) {
16640df750e9SMarc-André Lureau         features |= 1ULL << VHOST_USER_PROTOCOL_F_CONFIG;
16650df750e9SMarc-André Lureau     }
16660df750e9SMarc-André Lureau 
16670df750e9SMarc-André Lureau     if (dev->iface->get_protocol_features) {
16680df750e9SMarc-André Lureau         features |= dev->iface->get_protocol_features(dev);
16690df750e9SMarc-André Lureau     }
16700df750e9SMarc-André Lureau 
16710df750e9SMarc-André Lureau     vmsg_set_reply_u64(vmsg, features);
16720df750e9SMarc-André Lureau     return true;
16730df750e9SMarc-André Lureau }
16740df750e9SMarc-André Lureau 
16750df750e9SMarc-André Lureau static bool
vu_set_protocol_features_exec(VuDev * dev,VhostUserMsg * vmsg)16760df750e9SMarc-André Lureau vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
16770df750e9SMarc-André Lureau {
16780df750e9SMarc-André Lureau     uint64_t features = vmsg->payload.u64;
16790df750e9SMarc-André Lureau 
16800df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", features);
16810df750e9SMarc-André Lureau 
16820df750e9SMarc-André Lureau     dev->protocol_features = vmsg->payload.u64;
16830df750e9SMarc-André Lureau 
16840df750e9SMarc-André Lureau     if (vu_has_protocol_feature(dev,
16850df750e9SMarc-André Lureau                                 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
1686e608feedSMaxime Coquelin         (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_REQ) ||
16870df750e9SMarc-André Lureau          !vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
16880df750e9SMarc-André Lureau         /*
16890df750e9SMarc-André Lureau          * The use case for using messages for kick/call is simulation, to make
16900df750e9SMarc-André Lureau          * the kick and call synchronous. To actually get that behaviour, both
16910df750e9SMarc-André Lureau          * of the other features are required.
16920df750e9SMarc-André Lureau          * Theoretically, one could use only kick messages, or do them without
16930df750e9SMarc-André Lureau          * having F_REPLY_ACK, but too many (possibly pending) messages on the
1694f8ed3648SManos Pitsidianakis          * socket will eventually cause the frontend to hang, to avoid this in
16950df750e9SMarc-André Lureau          * scenarios where not desired enforce that the settings are in a way
16960df750e9SMarc-André Lureau          * that actually enables the simulation case.
16970df750e9SMarc-André Lureau          */
16980df750e9SMarc-André Lureau         vu_panic(dev,
1699e608feedSMaxime Coquelin                  "F_IN_BAND_NOTIFICATIONS requires F_BACKEND_REQ && F_REPLY_ACK");
17000df750e9SMarc-André Lureau         return false;
17010df750e9SMarc-André Lureau     }
17020df750e9SMarc-André Lureau 
17030df750e9SMarc-André Lureau     if (dev->iface->set_protocol_features) {
17040df750e9SMarc-André Lureau         dev->iface->set_protocol_features(dev, features);
17050df750e9SMarc-André Lureau     }
17060df750e9SMarc-André Lureau 
17070df750e9SMarc-André Lureau     return false;
17080df750e9SMarc-André Lureau }
17090df750e9SMarc-André Lureau 
17100df750e9SMarc-André Lureau static bool
vu_get_queue_num_exec(VuDev * dev,VhostUserMsg * vmsg)17110df750e9SMarc-André Lureau vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
17120df750e9SMarc-André Lureau {
17130df750e9SMarc-André Lureau     vmsg_set_reply_u64(vmsg, dev->max_queues);
17140df750e9SMarc-André Lureau     return true;
17150df750e9SMarc-André Lureau }
17160df750e9SMarc-André Lureau 
17170df750e9SMarc-André Lureau static bool
vu_set_vring_enable_exec(VuDev * dev,VhostUserMsg * vmsg)17180df750e9SMarc-André Lureau vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
17190df750e9SMarc-André Lureau {
17200df750e9SMarc-André Lureau     unsigned int index = vmsg->payload.state.index;
17210df750e9SMarc-André Lureau     unsigned int enable = vmsg->payload.state.num;
17220df750e9SMarc-André Lureau 
17230df750e9SMarc-André Lureau     DPRINT("State.index: %u\n", index);
17240df750e9SMarc-André Lureau     DPRINT("State.enable:   %u\n", enable);
17250df750e9SMarc-André Lureau 
17260df750e9SMarc-André Lureau     if (index >= dev->max_queues) {
17270df750e9SMarc-André Lureau         vu_panic(dev, "Invalid vring_enable index: %u", index);
17280df750e9SMarc-André Lureau         return false;
17290df750e9SMarc-André Lureau     }
17300df750e9SMarc-André Lureau 
17310df750e9SMarc-André Lureau     dev->vq[index].enable = enable;
17320df750e9SMarc-André Lureau     return false;
17330df750e9SMarc-André Lureau }
17340df750e9SMarc-André Lureau 
17350df750e9SMarc-André Lureau static bool
vu_set_backend_req_fd(VuDev * dev,VhostUserMsg * vmsg)1736f8ed3648SManos Pitsidianakis vu_set_backend_req_fd(VuDev *dev, VhostUserMsg *vmsg)
17370df750e9SMarc-André Lureau {
17380df750e9SMarc-André Lureau     if (vmsg->fd_num != 1) {
1739f8ed3648SManos Pitsidianakis         vu_panic(dev, "Invalid backend_req_fd message (%d fd's)", vmsg->fd_num);
17400df750e9SMarc-André Lureau         return false;
17410df750e9SMarc-André Lureau     }
17420df750e9SMarc-André Lureau 
1743f8ed3648SManos Pitsidianakis     if (dev->backend_fd != -1) {
1744f8ed3648SManos Pitsidianakis         close(dev->backend_fd);
17450df750e9SMarc-André Lureau     }
1746f8ed3648SManos Pitsidianakis     dev->backend_fd = vmsg->fds[0];
1747f8ed3648SManos Pitsidianakis     DPRINT("Got backend_fd: %d\n", vmsg->fds[0]);
17480df750e9SMarc-André Lureau 
17490df750e9SMarc-André Lureau     return false;
17500df750e9SMarc-André Lureau }
17510df750e9SMarc-André Lureau 
17520df750e9SMarc-André Lureau static bool
vu_get_config(VuDev * dev,VhostUserMsg * vmsg)17530df750e9SMarc-André Lureau vu_get_config(VuDev *dev, VhostUserMsg *vmsg)
17540df750e9SMarc-André Lureau {
17550df750e9SMarc-André Lureau     int ret = -1;
17560df750e9SMarc-André Lureau 
17570df750e9SMarc-André Lureau     if (dev->iface->get_config) {
17580df750e9SMarc-André Lureau         ret = dev->iface->get_config(dev, vmsg->payload.config.region,
17590df750e9SMarc-André Lureau                                      vmsg->payload.config.size);
17600df750e9SMarc-André Lureau     }
17610df750e9SMarc-André Lureau 
17620df750e9SMarc-André Lureau     if (ret) {
1763f8ed3648SManos Pitsidianakis         /* resize to zero to indicate an error to frontend */
17640df750e9SMarc-André Lureau         vmsg->size = 0;
17650df750e9SMarc-André Lureau     }
17660df750e9SMarc-André Lureau 
17670df750e9SMarc-André Lureau     return true;
17680df750e9SMarc-André Lureau }
17690df750e9SMarc-André Lureau 
17700df750e9SMarc-André Lureau static bool
vu_set_config(VuDev * dev,VhostUserMsg * vmsg)17710df750e9SMarc-André Lureau vu_set_config(VuDev *dev, VhostUserMsg *vmsg)
17720df750e9SMarc-André Lureau {
17730df750e9SMarc-André Lureau     int ret = -1;
17740df750e9SMarc-André Lureau 
17750df750e9SMarc-André Lureau     if (dev->iface->set_config) {
17760df750e9SMarc-André Lureau         ret = dev->iface->set_config(dev, vmsg->payload.config.region,
17770df750e9SMarc-André Lureau                                      vmsg->payload.config.offset,
17780df750e9SMarc-André Lureau                                      vmsg->payload.config.size,
17790df750e9SMarc-André Lureau                                      vmsg->payload.config.flags);
17800df750e9SMarc-André Lureau         if (ret) {
17810df750e9SMarc-André Lureau             vu_panic(dev, "Set virtio configuration space failed");
17820df750e9SMarc-André Lureau         }
17830df750e9SMarc-André Lureau     }
17840df750e9SMarc-André Lureau 
17850df750e9SMarc-André Lureau     return false;
17860df750e9SMarc-André Lureau }
17870df750e9SMarc-André Lureau 
17880df750e9SMarc-André Lureau static bool
vu_set_postcopy_advise(VuDev * dev,VhostUserMsg * vmsg)17890df750e9SMarc-André Lureau vu_set_postcopy_advise(VuDev *dev, VhostUserMsg *vmsg)
17900df750e9SMarc-André Lureau {
17910df750e9SMarc-André Lureau #ifdef UFFDIO_API
17920df750e9SMarc-André Lureau     struct uffdio_api api_struct;
17930df750e9SMarc-André Lureau 
17940df750e9SMarc-André Lureau     dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
17950df750e9SMarc-André Lureau     vmsg->size = 0;
1796f1c563d2SMarcel Holtmann #else
1797f1c563d2SMarcel Holtmann     dev->postcopy_ufd = -1;
17980df750e9SMarc-André Lureau #endif
17990df750e9SMarc-André Lureau 
18000df750e9SMarc-André Lureau     if (dev->postcopy_ufd == -1) {
18010df750e9SMarc-André Lureau         vu_panic(dev, "Userfaultfd not available: %s", strerror(errno));
18020df750e9SMarc-André Lureau         goto out;
18030df750e9SMarc-André Lureau     }
18040df750e9SMarc-André Lureau 
18050df750e9SMarc-André Lureau #ifdef UFFDIO_API
18060df750e9SMarc-André Lureau     api_struct.api = UFFD_API;
18070df750e9SMarc-André Lureau     api_struct.features = 0;
18080df750e9SMarc-André Lureau     if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
18090df750e9SMarc-André Lureau         vu_panic(dev, "Failed UFFDIO_API: %s", strerror(errno));
18100df750e9SMarc-André Lureau         close(dev->postcopy_ufd);
18110df750e9SMarc-André Lureau         dev->postcopy_ufd = -1;
18120df750e9SMarc-André Lureau         goto out;
18130df750e9SMarc-André Lureau     }
18140df750e9SMarc-André Lureau     /* TODO: Stash feature flags somewhere */
18150df750e9SMarc-André Lureau #endif
18160df750e9SMarc-André Lureau 
18170df750e9SMarc-André Lureau out:
18180df750e9SMarc-André Lureau     /* Return a ufd to the QEMU */
18190df750e9SMarc-André Lureau     vmsg->fd_num = 1;
18200df750e9SMarc-André Lureau     vmsg->fds[0] = dev->postcopy_ufd;
18210df750e9SMarc-André Lureau     return true; /* = send a reply */
18220df750e9SMarc-André Lureau }
18230df750e9SMarc-André Lureau 
18240df750e9SMarc-André Lureau static bool
vu_set_postcopy_listen(VuDev * dev,VhostUserMsg * vmsg)18250df750e9SMarc-André Lureau vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg)
18260df750e9SMarc-André Lureau {
18270df750e9SMarc-André Lureau     if (dev->nregions) {
18280df750e9SMarc-André Lureau         vu_panic(dev, "Regions already registered at postcopy-listen");
18290df750e9SMarc-André Lureau         vmsg_set_reply_u64(vmsg, -1);
18300df750e9SMarc-André Lureau         return true;
18310df750e9SMarc-André Lureau     }
18320df750e9SMarc-André Lureau     dev->postcopy_listening = true;
18330df750e9SMarc-André Lureau 
18340df750e9SMarc-André Lureau     vmsg_set_reply_u64(vmsg, 0);
18350df750e9SMarc-André Lureau     return true;
18360df750e9SMarc-André Lureau }
18370df750e9SMarc-André Lureau 
18380df750e9SMarc-André Lureau static bool
vu_set_postcopy_end(VuDev * dev,VhostUserMsg * vmsg)18390df750e9SMarc-André Lureau vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg)
18400df750e9SMarc-André Lureau {
18410df750e9SMarc-André Lureau     DPRINT("%s: Entry\n", __func__);
18420df750e9SMarc-André Lureau     dev->postcopy_listening = false;
18430df750e9SMarc-André Lureau     if (dev->postcopy_ufd > 0) {
18440df750e9SMarc-André Lureau         close(dev->postcopy_ufd);
18450df750e9SMarc-André Lureau         dev->postcopy_ufd = -1;
18460df750e9SMarc-André Lureau         DPRINT("%s: Done close\n", __func__);
18470df750e9SMarc-André Lureau     }
18480df750e9SMarc-André Lureau 
18490df750e9SMarc-André Lureau     vmsg_set_reply_u64(vmsg, 0);
18500df750e9SMarc-André Lureau     DPRINT("%s: exit\n", __func__);
18510df750e9SMarc-André Lureau     return true;
18520df750e9SMarc-André Lureau }
18530df750e9SMarc-André Lureau 
18540df750e9SMarc-André Lureau static inline uint64_t
vu_inflight_queue_size(uint16_t queue_size)18550df750e9SMarc-André Lureau vu_inflight_queue_size(uint16_t queue_size)
18560df750e9SMarc-André Lureau {
18570df750e9SMarc-André Lureau     return ALIGN_UP(sizeof(VuDescStateSplit) * queue_size +
18580df750e9SMarc-André Lureau            sizeof(uint16_t), INFLIGHT_ALIGNMENT);
18590df750e9SMarc-André Lureau }
18600df750e9SMarc-André Lureau 
18610df750e9SMarc-André Lureau #ifdef MFD_ALLOW_SEALING
18620df750e9SMarc-André Lureau static void *
memfd_alloc(const char * name,size_t size,unsigned int flags,int * fd)18630df750e9SMarc-André Lureau memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
18640df750e9SMarc-André Lureau {
18650df750e9SMarc-André Lureau     void *ptr;
18660df750e9SMarc-André Lureau     int ret;
18670df750e9SMarc-André Lureau 
18680df750e9SMarc-André Lureau     *fd = memfd_create(name, MFD_ALLOW_SEALING);
18690df750e9SMarc-André Lureau     if (*fd < 0) {
18700df750e9SMarc-André Lureau         return NULL;
18710df750e9SMarc-André Lureau     }
18720df750e9SMarc-André Lureau 
18730df750e9SMarc-André Lureau     ret = ftruncate(*fd, size);
18740df750e9SMarc-André Lureau     if (ret < 0) {
18750df750e9SMarc-André Lureau         close(*fd);
18760df750e9SMarc-André Lureau         return NULL;
18770df750e9SMarc-André Lureau     }
18780df750e9SMarc-André Lureau 
18790df750e9SMarc-André Lureau     ret = fcntl(*fd, F_ADD_SEALS, flags);
18800df750e9SMarc-André Lureau     if (ret < 0) {
18810df750e9SMarc-André Lureau         close(*fd);
18820df750e9SMarc-André Lureau         return NULL;
18830df750e9SMarc-André Lureau     }
18840df750e9SMarc-André Lureau 
18850df750e9SMarc-André Lureau     ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
18860df750e9SMarc-André Lureau     if (ptr == MAP_FAILED) {
18870df750e9SMarc-André Lureau         close(*fd);
18880df750e9SMarc-André Lureau         return NULL;
18890df750e9SMarc-André Lureau     }
18900df750e9SMarc-André Lureau 
18910df750e9SMarc-André Lureau     return ptr;
18920df750e9SMarc-André Lureau }
18930df750e9SMarc-André Lureau #endif
18940df750e9SMarc-André Lureau 
18950df750e9SMarc-André Lureau static bool
vu_get_inflight_fd(VuDev * dev,VhostUserMsg * vmsg)18960df750e9SMarc-André Lureau vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
18970df750e9SMarc-André Lureau {
18980df750e9SMarc-André Lureau     int fd = -1;
18990df750e9SMarc-André Lureau     void *addr = NULL;
19000df750e9SMarc-André Lureau     uint64_t mmap_size;
19010df750e9SMarc-André Lureau     uint16_t num_queues, queue_size;
19020df750e9SMarc-André Lureau 
19030df750e9SMarc-André Lureau     if (vmsg->size != sizeof(vmsg->payload.inflight)) {
19040df750e9SMarc-André Lureau         vu_panic(dev, "Invalid get_inflight_fd message:%d", vmsg->size);
19050df750e9SMarc-André Lureau         vmsg->payload.inflight.mmap_size = 0;
19060df750e9SMarc-André Lureau         return true;
19070df750e9SMarc-André Lureau     }
19080df750e9SMarc-André Lureau 
19090df750e9SMarc-André Lureau     num_queues = vmsg->payload.inflight.num_queues;
19100df750e9SMarc-André Lureau     queue_size = vmsg->payload.inflight.queue_size;
19110df750e9SMarc-André Lureau 
19120df750e9SMarc-André Lureau     DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
19130df750e9SMarc-André Lureau     DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
19140df750e9SMarc-André Lureau 
19150df750e9SMarc-André Lureau     mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
19160df750e9SMarc-André Lureau 
19170df750e9SMarc-André Lureau #ifdef MFD_ALLOW_SEALING
19180df750e9SMarc-André Lureau     addr = memfd_alloc("vhost-inflight", mmap_size,
19190df750e9SMarc-André Lureau                        F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
19200df750e9SMarc-André Lureau                        &fd);
19210df750e9SMarc-André Lureau #else
19220df750e9SMarc-André Lureau     vu_panic(dev, "Not implemented: memfd support is missing");
19230df750e9SMarc-André Lureau #endif
19240df750e9SMarc-André Lureau 
19250df750e9SMarc-André Lureau     if (!addr) {
19260df750e9SMarc-André Lureau         vu_panic(dev, "Failed to alloc vhost inflight area");
19270df750e9SMarc-André Lureau         vmsg->payload.inflight.mmap_size = 0;
19280df750e9SMarc-André Lureau         return true;
19290df750e9SMarc-André Lureau     }
19300df750e9SMarc-André Lureau 
19310df750e9SMarc-André Lureau     memset(addr, 0, mmap_size);
19320df750e9SMarc-André Lureau 
19330df750e9SMarc-André Lureau     dev->inflight_info.addr = addr;
19340df750e9SMarc-André Lureau     dev->inflight_info.size = vmsg->payload.inflight.mmap_size = mmap_size;
19350df750e9SMarc-André Lureau     dev->inflight_info.fd = vmsg->fds[0] = fd;
19360df750e9SMarc-André Lureau     vmsg->fd_num = 1;
19370df750e9SMarc-André Lureau     vmsg->payload.inflight.mmap_offset = 0;
19380df750e9SMarc-André Lureau 
19390df750e9SMarc-André Lureau     DPRINT("send inflight mmap_size: %"PRId64"\n",
19400df750e9SMarc-André Lureau            vmsg->payload.inflight.mmap_size);
19410df750e9SMarc-André Lureau     DPRINT("send inflight mmap offset: %"PRId64"\n",
19420df750e9SMarc-André Lureau            vmsg->payload.inflight.mmap_offset);
19430df750e9SMarc-André Lureau 
19440df750e9SMarc-André Lureau     return true;
19450df750e9SMarc-André Lureau }
19460df750e9SMarc-André Lureau 
19470df750e9SMarc-André Lureau static bool
vu_set_inflight_fd(VuDev * dev,VhostUserMsg * vmsg)19480df750e9SMarc-André Lureau vu_set_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
19490df750e9SMarc-André Lureau {
19500df750e9SMarc-André Lureau     int fd, i;
19510df750e9SMarc-André Lureau     uint64_t mmap_size, mmap_offset;
19520df750e9SMarc-André Lureau     uint16_t num_queues, queue_size;
19530df750e9SMarc-André Lureau     void *rc;
19540df750e9SMarc-André Lureau 
19550df750e9SMarc-André Lureau     if (vmsg->fd_num != 1 ||
19560df750e9SMarc-André Lureau         vmsg->size != sizeof(vmsg->payload.inflight)) {
19570df750e9SMarc-André Lureau         vu_panic(dev, "Invalid set_inflight_fd message size:%d fds:%d",
19580df750e9SMarc-André Lureau                  vmsg->size, vmsg->fd_num);
19590df750e9SMarc-André Lureau         return false;
19600df750e9SMarc-André Lureau     }
19610df750e9SMarc-André Lureau 
19620df750e9SMarc-André Lureau     fd = vmsg->fds[0];
19630df750e9SMarc-André Lureau     mmap_size = vmsg->payload.inflight.mmap_size;
19640df750e9SMarc-André Lureau     mmap_offset = vmsg->payload.inflight.mmap_offset;
19650df750e9SMarc-André Lureau     num_queues = vmsg->payload.inflight.num_queues;
19660df750e9SMarc-André Lureau     queue_size = vmsg->payload.inflight.queue_size;
19670df750e9SMarc-André Lureau 
19680df750e9SMarc-André Lureau     DPRINT("set_inflight_fd mmap_size: %"PRId64"\n", mmap_size);
19690df750e9SMarc-André Lureau     DPRINT("set_inflight_fd mmap_offset: %"PRId64"\n", mmap_offset);
19700df750e9SMarc-André Lureau     DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
19710df750e9SMarc-André Lureau     DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
19720df750e9SMarc-André Lureau 
19730df750e9SMarc-André Lureau     rc = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
19740df750e9SMarc-André Lureau               fd, mmap_offset);
19750df750e9SMarc-André Lureau 
19760df750e9SMarc-André Lureau     if (rc == MAP_FAILED) {
19770df750e9SMarc-André Lureau         vu_panic(dev, "set_inflight_fd mmap error: %s", strerror(errno));
19780df750e9SMarc-André Lureau         return false;
19790df750e9SMarc-André Lureau     }
19800df750e9SMarc-André Lureau 
19810df750e9SMarc-André Lureau     if (dev->inflight_info.fd) {
19820df750e9SMarc-André Lureau         close(dev->inflight_info.fd);
19830df750e9SMarc-André Lureau     }
19840df750e9SMarc-André Lureau 
19850df750e9SMarc-André Lureau     if (dev->inflight_info.addr) {
19860df750e9SMarc-André Lureau         munmap(dev->inflight_info.addr, dev->inflight_info.size);
19870df750e9SMarc-André Lureau     }
19880df750e9SMarc-André Lureau 
19890df750e9SMarc-André Lureau     dev->inflight_info.fd = fd;
19900df750e9SMarc-André Lureau     dev->inflight_info.addr = rc;
19910df750e9SMarc-André Lureau     dev->inflight_info.size = mmap_size;
19920df750e9SMarc-André Lureau 
19930df750e9SMarc-André Lureau     for (i = 0; i < num_queues; i++) {
19940df750e9SMarc-André Lureau         dev->vq[i].inflight = (VuVirtqInflight *)rc;
19950df750e9SMarc-André Lureau         dev->vq[i].inflight->desc_num = queue_size;
19960df750e9SMarc-André Lureau         rc = (void *)((char *)rc + vu_inflight_queue_size(queue_size));
19970df750e9SMarc-André Lureau     }
19980df750e9SMarc-André Lureau 
19990df750e9SMarc-André Lureau     return false;
20000df750e9SMarc-André Lureau }
20010df750e9SMarc-André Lureau 
20020df750e9SMarc-André Lureau static bool
vu_handle_vring_kick(VuDev * dev,VhostUserMsg * vmsg)20030df750e9SMarc-André Lureau vu_handle_vring_kick(VuDev *dev, VhostUserMsg *vmsg)
20040df750e9SMarc-André Lureau {
20050df750e9SMarc-André Lureau     unsigned int index = vmsg->payload.state.index;
20060df750e9SMarc-André Lureau 
20070df750e9SMarc-André Lureau     if (index >= dev->max_queues) {
20080df750e9SMarc-André Lureau         vu_panic(dev, "Invalid queue index: %u", index);
20090df750e9SMarc-André Lureau         return false;
20100df750e9SMarc-André Lureau     }
20110df750e9SMarc-André Lureau 
20120df750e9SMarc-André Lureau     DPRINT("Got kick message: handler:%p idx:%u\n",
20130df750e9SMarc-André Lureau            dev->vq[index].handler, index);
20140df750e9SMarc-André Lureau 
20150df750e9SMarc-André Lureau     if (!dev->vq[index].started) {
20160df750e9SMarc-André Lureau         dev->vq[index].started = true;
20170df750e9SMarc-André Lureau 
20180df750e9SMarc-André Lureau         if (dev->iface->queue_set_started) {
20190df750e9SMarc-André Lureau             dev->iface->queue_set_started(dev, index, true);
20200df750e9SMarc-André Lureau         }
20210df750e9SMarc-André Lureau     }
20220df750e9SMarc-André Lureau 
20230df750e9SMarc-André Lureau     if (dev->vq[index].handler) {
20240df750e9SMarc-André Lureau         dev->vq[index].handler(dev, index);
20250df750e9SMarc-André Lureau     }
20260df750e9SMarc-André Lureau 
20270df750e9SMarc-André Lureau     return false;
20280df750e9SMarc-André Lureau }
20290df750e9SMarc-André Lureau 
vu_handle_get_max_memslots(VuDev * dev,VhostUserMsg * vmsg)20300df750e9SMarc-André Lureau static bool vu_handle_get_max_memslots(VuDev *dev, VhostUserMsg *vmsg)
20310df750e9SMarc-André Lureau {
203269a5daecSKevin Wolf     vmsg_set_reply_u64(vmsg, VHOST_USER_MAX_RAM_SLOTS);
20330df750e9SMarc-André Lureau 
20340df750e9SMarc-André Lureau     DPRINT("u64: 0x%016"PRIx64"\n", (uint64_t) VHOST_USER_MAX_RAM_SLOTS);
20350df750e9SMarc-André Lureau 
203669a5daecSKevin Wolf     return true;
20370df750e9SMarc-André Lureau }
20380df750e9SMarc-André Lureau 
20390df750e9SMarc-André Lureau static bool
vu_process_message(VuDev * dev,VhostUserMsg * vmsg)20400df750e9SMarc-André Lureau vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
20410df750e9SMarc-André Lureau {
20420df750e9SMarc-André Lureau     int do_reply = 0;
20430df750e9SMarc-André Lureau 
20440df750e9SMarc-André Lureau     /* Print out generic part of the request. */
20450df750e9SMarc-André Lureau     DPRINT("================ Vhost user message ================\n");
20460df750e9SMarc-André Lureau     DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request),
20470df750e9SMarc-André Lureau            vmsg->request);
20480df750e9SMarc-André Lureau     DPRINT("Flags:   0x%x\n", vmsg->flags);
20490df750e9SMarc-André Lureau     DPRINT("Size:    %u\n", vmsg->size);
20500df750e9SMarc-André Lureau 
20510df750e9SMarc-André Lureau     if (vmsg->fd_num) {
20520df750e9SMarc-André Lureau         int i;
20530df750e9SMarc-André Lureau         DPRINT("Fds:");
20540df750e9SMarc-André Lureau         for (i = 0; i < vmsg->fd_num; i++) {
20550df750e9SMarc-André Lureau             DPRINT(" %d", vmsg->fds[i]);
20560df750e9SMarc-André Lureau         }
20570df750e9SMarc-André Lureau         DPRINT("\n");
20580df750e9SMarc-André Lureau     }
20590df750e9SMarc-André Lureau 
20600df750e9SMarc-André Lureau     if (dev->iface->process_msg &&
20610df750e9SMarc-André Lureau         dev->iface->process_msg(dev, vmsg, &do_reply)) {
20620df750e9SMarc-André Lureau         return do_reply;
20630df750e9SMarc-André Lureau     }
20640df750e9SMarc-André Lureau 
20650df750e9SMarc-André Lureau     switch (vmsg->request) {
20660df750e9SMarc-André Lureau     case VHOST_USER_GET_FEATURES:
20670df750e9SMarc-André Lureau         return vu_get_features_exec(dev, vmsg);
20680df750e9SMarc-André Lureau     case VHOST_USER_SET_FEATURES:
20690df750e9SMarc-André Lureau         return vu_set_features_exec(dev, vmsg);
20700df750e9SMarc-André Lureau     case VHOST_USER_GET_PROTOCOL_FEATURES:
20710df750e9SMarc-André Lureau         return vu_get_protocol_features_exec(dev, vmsg);
20720df750e9SMarc-André Lureau     case VHOST_USER_SET_PROTOCOL_FEATURES:
20730df750e9SMarc-André Lureau         return vu_set_protocol_features_exec(dev, vmsg);
20740df750e9SMarc-André Lureau     case VHOST_USER_SET_OWNER:
20750df750e9SMarc-André Lureau         return vu_set_owner_exec(dev, vmsg);
20760df750e9SMarc-André Lureau     case VHOST_USER_RESET_OWNER:
20770df750e9SMarc-André Lureau         return vu_reset_device_exec(dev, vmsg);
20780df750e9SMarc-André Lureau     case VHOST_USER_SET_MEM_TABLE:
20790df750e9SMarc-André Lureau         return vu_set_mem_table_exec(dev, vmsg);
20800df750e9SMarc-André Lureau     case VHOST_USER_SET_LOG_BASE:
20810df750e9SMarc-André Lureau         return vu_set_log_base_exec(dev, vmsg);
20820df750e9SMarc-André Lureau     case VHOST_USER_SET_LOG_FD:
20830df750e9SMarc-André Lureau         return vu_set_log_fd_exec(dev, vmsg);
20840df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_NUM:
20850df750e9SMarc-André Lureau         return vu_set_vring_num_exec(dev, vmsg);
20860df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_ADDR:
20870df750e9SMarc-André Lureau         return vu_set_vring_addr_exec(dev, vmsg);
20880df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_BASE:
20890df750e9SMarc-André Lureau         return vu_set_vring_base_exec(dev, vmsg);
20900df750e9SMarc-André Lureau     case VHOST_USER_GET_VRING_BASE:
20910df750e9SMarc-André Lureau         return vu_get_vring_base_exec(dev, vmsg);
20920df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_KICK:
20930df750e9SMarc-André Lureau         return vu_set_vring_kick_exec(dev, vmsg);
20940df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_CALL:
20950df750e9SMarc-André Lureau         return vu_set_vring_call_exec(dev, vmsg);
20960df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_ERR:
20970df750e9SMarc-André Lureau         return vu_set_vring_err_exec(dev, vmsg);
20980df750e9SMarc-André Lureau     case VHOST_USER_GET_QUEUE_NUM:
20990df750e9SMarc-André Lureau         return vu_get_queue_num_exec(dev, vmsg);
21000df750e9SMarc-André Lureau     case VHOST_USER_SET_VRING_ENABLE:
21010df750e9SMarc-André Lureau         return vu_set_vring_enable_exec(dev, vmsg);
2102e608feedSMaxime Coquelin     case VHOST_USER_SET_BACKEND_REQ_FD:
2103f8ed3648SManos Pitsidianakis         return vu_set_backend_req_fd(dev, vmsg);
21040df750e9SMarc-André Lureau     case VHOST_USER_GET_CONFIG:
21050df750e9SMarc-André Lureau         return vu_get_config(dev, vmsg);
21060df750e9SMarc-André Lureau     case VHOST_USER_SET_CONFIG:
21070df750e9SMarc-André Lureau         return vu_set_config(dev, vmsg);
21080df750e9SMarc-André Lureau     case VHOST_USER_NONE:
21090df750e9SMarc-André Lureau         /* if you need processing before exit, override iface->process_msg */
21100df750e9SMarc-André Lureau         exit(0);
21110df750e9SMarc-André Lureau     case VHOST_USER_POSTCOPY_ADVISE:
21120df750e9SMarc-André Lureau         return vu_set_postcopy_advise(dev, vmsg);
21130df750e9SMarc-André Lureau     case VHOST_USER_POSTCOPY_LISTEN:
21140df750e9SMarc-André Lureau         return vu_set_postcopy_listen(dev, vmsg);
21150df750e9SMarc-André Lureau     case VHOST_USER_POSTCOPY_END:
21160df750e9SMarc-André Lureau         return vu_set_postcopy_end(dev, vmsg);
21170df750e9SMarc-André Lureau     case VHOST_USER_GET_INFLIGHT_FD:
21180df750e9SMarc-André Lureau         return vu_get_inflight_fd(dev, vmsg);
21190df750e9SMarc-André Lureau     case VHOST_USER_SET_INFLIGHT_FD:
21200df750e9SMarc-André Lureau         return vu_set_inflight_fd(dev, vmsg);
21210df750e9SMarc-André Lureau     case VHOST_USER_VRING_KICK:
21220df750e9SMarc-André Lureau         return vu_handle_vring_kick(dev, vmsg);
21230df750e9SMarc-André Lureau     case VHOST_USER_GET_MAX_MEM_SLOTS:
21240df750e9SMarc-André Lureau         return vu_handle_get_max_memslots(dev, vmsg);
21250df750e9SMarc-André Lureau     case VHOST_USER_ADD_MEM_REG:
21260df750e9SMarc-André Lureau         return vu_add_mem_reg(dev, vmsg);
21270df750e9SMarc-André Lureau     case VHOST_USER_REM_MEM_REG:
21280df750e9SMarc-André Lureau         return vu_rem_mem_reg(dev, vmsg);
2129ce0f3b03SAlbert Esteve     case VHOST_USER_GET_SHARED_OBJECT:
2130ce0f3b03SAlbert Esteve         return vu_get_shared_object(dev, vmsg);
21310df750e9SMarc-André Lureau     default:
21320df750e9SMarc-André Lureau         vmsg_close_fds(vmsg);
21330df750e9SMarc-André Lureau         vu_panic(dev, "Unhandled request: %d", vmsg->request);
21340df750e9SMarc-André Lureau     }
21350df750e9SMarc-André Lureau 
21360df750e9SMarc-André Lureau     return false;
21370df750e9SMarc-André Lureau }
21380df750e9SMarc-André Lureau 
21390df750e9SMarc-André Lureau bool
vu_dispatch(VuDev * dev)21400df750e9SMarc-André Lureau vu_dispatch(VuDev *dev)
21410df750e9SMarc-André Lureau {
21420df750e9SMarc-André Lureau     VhostUserMsg vmsg = { 0, };
21430df750e9SMarc-André Lureau     int reply_requested;
21440df750e9SMarc-André Lureau     bool need_reply, success = false;
21450df750e9SMarc-André Lureau 
21460df750e9SMarc-André Lureau     if (!dev->read_msg(dev, dev->sock, &vmsg)) {
21470df750e9SMarc-André Lureau         goto end;
21480df750e9SMarc-André Lureau     }
21490df750e9SMarc-André Lureau 
21500df750e9SMarc-André Lureau     need_reply = vmsg.flags & VHOST_USER_NEED_REPLY_MASK;
21510df750e9SMarc-André Lureau 
21520df750e9SMarc-André Lureau     reply_requested = vu_process_message(dev, &vmsg);
21530df750e9SMarc-André Lureau     if (!reply_requested && need_reply) {
21540df750e9SMarc-André Lureau         vmsg_set_reply_u64(&vmsg, 0);
21550df750e9SMarc-André Lureau         reply_requested = 1;
21560df750e9SMarc-André Lureau     }
21570df750e9SMarc-André Lureau 
21580df750e9SMarc-André Lureau     if (!reply_requested) {
21590df750e9SMarc-André Lureau         success = true;
21600df750e9SMarc-André Lureau         goto end;
21610df750e9SMarc-André Lureau     }
21620df750e9SMarc-André Lureau 
21630df750e9SMarc-André Lureau     if (!vu_send_reply(dev, dev->sock, &vmsg)) {
21640df750e9SMarc-André Lureau         goto end;
21650df750e9SMarc-André Lureau     }
21660df750e9SMarc-André Lureau 
21670df750e9SMarc-André Lureau     success = true;
21680df750e9SMarc-André Lureau 
21690df750e9SMarc-André Lureau end:
21700df750e9SMarc-André Lureau     free(vmsg.data);
21710df750e9SMarc-André Lureau     return success;
21720df750e9SMarc-André Lureau }
21730df750e9SMarc-André Lureau 
21740df750e9SMarc-André Lureau void
vu_deinit(VuDev * dev)21750df750e9SMarc-André Lureau vu_deinit(VuDev *dev)
21760df750e9SMarc-André Lureau {
217792bf2461SMarcel Holtmann     unsigned int i;
21780df750e9SMarc-André Lureau 
2179bec58209SDavid Hildenbrand     vu_remove_all_mem_regs(dev);
21800df750e9SMarc-André Lureau 
21810df750e9SMarc-André Lureau     for (i = 0; i < dev->max_queues; i++) {
21820df750e9SMarc-André Lureau         VuVirtq *vq = &dev->vq[i];
21830df750e9SMarc-André Lureau 
21840df750e9SMarc-André Lureau         if (vq->call_fd != -1) {
21850df750e9SMarc-André Lureau             close(vq->call_fd);
21860df750e9SMarc-André Lureau             vq->call_fd = -1;
21870df750e9SMarc-André Lureau         }
21880df750e9SMarc-André Lureau 
21890df750e9SMarc-André Lureau         if (vq->kick_fd != -1) {
21900df750e9SMarc-André Lureau             dev->remove_watch(dev, vq->kick_fd);
21910df750e9SMarc-André Lureau             close(vq->kick_fd);
21920df750e9SMarc-André Lureau             vq->kick_fd = -1;
21930df750e9SMarc-André Lureau         }
21940df750e9SMarc-André Lureau 
21950df750e9SMarc-André Lureau         if (vq->err_fd != -1) {
21960df750e9SMarc-André Lureau             close(vq->err_fd);
21970df750e9SMarc-André Lureau             vq->err_fd = -1;
21980df750e9SMarc-André Lureau         }
21990df750e9SMarc-André Lureau 
22000df750e9SMarc-André Lureau         if (vq->resubmit_list) {
22010df750e9SMarc-André Lureau             free(vq->resubmit_list);
22020df750e9SMarc-André Lureau             vq->resubmit_list = NULL;
22030df750e9SMarc-André Lureau         }
22040df750e9SMarc-André Lureau 
22050df750e9SMarc-André Lureau         vq->inflight = NULL;
22060df750e9SMarc-André Lureau     }
22070df750e9SMarc-André Lureau 
22080df750e9SMarc-André Lureau     if (dev->inflight_info.addr) {
22090df750e9SMarc-André Lureau         munmap(dev->inflight_info.addr, dev->inflight_info.size);
22100df750e9SMarc-André Lureau         dev->inflight_info.addr = NULL;
22110df750e9SMarc-André Lureau     }
22120df750e9SMarc-André Lureau 
22130df750e9SMarc-André Lureau     if (dev->inflight_info.fd > 0) {
22140df750e9SMarc-André Lureau         close(dev->inflight_info.fd);
22150df750e9SMarc-André Lureau         dev->inflight_info.fd = -1;
22160df750e9SMarc-André Lureau     }
22170df750e9SMarc-André Lureau 
22180df750e9SMarc-André Lureau     vu_close_log(dev);
2219f8ed3648SManos Pitsidianakis     if (dev->backend_fd != -1) {
2220f8ed3648SManos Pitsidianakis         close(dev->backend_fd);
2221f8ed3648SManos Pitsidianakis         dev->backend_fd = -1;
22220df750e9SMarc-André Lureau     }
2223f8ed3648SManos Pitsidianakis     pthread_mutex_destroy(&dev->backend_mutex);
22240df750e9SMarc-André Lureau 
22250df750e9SMarc-André Lureau     if (dev->sock != -1) {
22260df750e9SMarc-André Lureau         close(dev->sock);
22270df750e9SMarc-André Lureau     }
22280df750e9SMarc-André Lureau 
22290df750e9SMarc-André Lureau     free(dev->vq);
22300df750e9SMarc-André Lureau     dev->vq = NULL;
2231d884e272SDavid Hildenbrand     free(dev->regions);
2232d884e272SDavid Hildenbrand     dev->regions = NULL;
22330df750e9SMarc-André Lureau }
22340df750e9SMarc-André Lureau 
22350df750e9SMarc-André Lureau bool
vu_init(VuDev * dev,uint16_t max_queues,int socket,vu_panic_cb panic,vu_read_msg_cb read_msg,vu_set_watch_cb set_watch,vu_remove_watch_cb remove_watch,const VuDevIface * iface)22360df750e9SMarc-André Lureau vu_init(VuDev *dev,
22370df750e9SMarc-André Lureau         uint16_t max_queues,
22380df750e9SMarc-André Lureau         int socket,
22390df750e9SMarc-André Lureau         vu_panic_cb panic,
22400df750e9SMarc-André Lureau         vu_read_msg_cb read_msg,
22410df750e9SMarc-André Lureau         vu_set_watch_cb set_watch,
22420df750e9SMarc-André Lureau         vu_remove_watch_cb remove_watch,
22430df750e9SMarc-André Lureau         const VuDevIface *iface)
22440df750e9SMarc-André Lureau {
22450df750e9SMarc-André Lureau     uint16_t i;
22460df750e9SMarc-André Lureau 
22470df750e9SMarc-André Lureau     assert(max_queues > 0);
22480df750e9SMarc-André Lureau     assert(socket >= 0);
22490df750e9SMarc-André Lureau     assert(set_watch);
22500df750e9SMarc-André Lureau     assert(remove_watch);
22510df750e9SMarc-André Lureau     assert(iface);
22520df750e9SMarc-André Lureau     assert(panic);
22530df750e9SMarc-André Lureau 
22540df750e9SMarc-André Lureau     memset(dev, 0, sizeof(*dev));
22550df750e9SMarc-André Lureau 
22560df750e9SMarc-André Lureau     dev->sock = socket;
22570df750e9SMarc-André Lureau     dev->panic = panic;
22580df750e9SMarc-André Lureau     dev->read_msg = read_msg ? read_msg : vu_message_read_default;
22590df750e9SMarc-André Lureau     dev->set_watch = set_watch;
22600df750e9SMarc-André Lureau     dev->remove_watch = remove_watch;
22610df750e9SMarc-André Lureau     dev->iface = iface;
22620df750e9SMarc-André Lureau     dev->log_call_fd = -1;
2263f8ed3648SManos Pitsidianakis     pthread_mutex_init(&dev->backend_mutex, NULL);
2264f8ed3648SManos Pitsidianakis     dev->backend_fd = -1;
22650df750e9SMarc-André Lureau     dev->max_queues = max_queues;
22660df750e9SMarc-André Lureau 
2267d884e272SDavid Hildenbrand     dev->regions = malloc(VHOST_USER_MAX_RAM_SLOTS * sizeof(dev->regions[0]));
2268d884e272SDavid Hildenbrand     if (!dev->regions) {
2269d884e272SDavid Hildenbrand         DPRINT("%s: failed to malloc mem regions\n", __func__);
2270d884e272SDavid Hildenbrand         return false;
2271d884e272SDavid Hildenbrand     }
2272d884e272SDavid Hildenbrand 
22730df750e9SMarc-André Lureau     dev->vq = malloc(max_queues * sizeof(dev->vq[0]));
22740df750e9SMarc-André Lureau     if (!dev->vq) {
22750df750e9SMarc-André Lureau         DPRINT("%s: failed to malloc virtqueues\n", __func__);
2276d884e272SDavid Hildenbrand         free(dev->regions);
2277d884e272SDavid Hildenbrand         dev->regions = NULL;
22780df750e9SMarc-André Lureau         return false;
22790df750e9SMarc-André Lureau     }
22800df750e9SMarc-André Lureau 
22810df750e9SMarc-André Lureau     for (i = 0; i < max_queues; i++) {
22820df750e9SMarc-André Lureau         dev->vq[i] = (VuVirtq) {
22830df750e9SMarc-André Lureau             .call_fd = -1, .kick_fd = -1, .err_fd = -1,
22840df750e9SMarc-André Lureau             .notification = true,
22850df750e9SMarc-André Lureau         };
22860df750e9SMarc-André Lureau     }
22870df750e9SMarc-André Lureau 
22880df750e9SMarc-André Lureau     return true;
22890df750e9SMarc-André Lureau }
22900df750e9SMarc-André Lureau 
22910df750e9SMarc-André Lureau VuVirtq *
vu_get_queue(VuDev * dev,int qidx)22920df750e9SMarc-André Lureau vu_get_queue(VuDev *dev, int qidx)
22930df750e9SMarc-André Lureau {
22940df750e9SMarc-André Lureau     assert(qidx < dev->max_queues);
22950df750e9SMarc-André Lureau     return &dev->vq[qidx];
22960df750e9SMarc-André Lureau }
22970df750e9SMarc-André Lureau 
22980df750e9SMarc-André Lureau bool
vu_queue_enabled(VuDev * dev,VuVirtq * vq)22990df750e9SMarc-André Lureau vu_queue_enabled(VuDev *dev, VuVirtq *vq)
23000df750e9SMarc-André Lureau {
23010df750e9SMarc-André Lureau     return vq->enable;
23020df750e9SMarc-André Lureau }
23030df750e9SMarc-André Lureau 
23040df750e9SMarc-André Lureau bool
vu_queue_started(const VuDev * dev,const VuVirtq * vq)23050df750e9SMarc-André Lureau vu_queue_started(const VuDev *dev, const VuVirtq *vq)
23060df750e9SMarc-André Lureau {
23070df750e9SMarc-André Lureau     return vq->started;
23080df750e9SMarc-André Lureau }
23090df750e9SMarc-André Lureau 
23100df750e9SMarc-André Lureau static inline uint16_t
vring_avail_flags(VuVirtq * vq)23110df750e9SMarc-André Lureau vring_avail_flags(VuVirtq *vq)
23120df750e9SMarc-André Lureau {
23130df750e9SMarc-André Lureau     return le16toh(vq->vring.avail->flags);
23140df750e9SMarc-André Lureau }
23150df750e9SMarc-André Lureau 
23160df750e9SMarc-André Lureau static inline uint16_t
vring_avail_idx(VuVirtq * vq)23170df750e9SMarc-André Lureau vring_avail_idx(VuVirtq *vq)
23180df750e9SMarc-André Lureau {
23190df750e9SMarc-André Lureau     vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
23200df750e9SMarc-André Lureau 
23210df750e9SMarc-André Lureau     return vq->shadow_avail_idx;
23220df750e9SMarc-André Lureau }
23230df750e9SMarc-André Lureau 
23240df750e9SMarc-André Lureau static inline uint16_t
vring_avail_ring(VuVirtq * vq,int i)23250df750e9SMarc-André Lureau vring_avail_ring(VuVirtq *vq, int i)
23260df750e9SMarc-André Lureau {
23270df750e9SMarc-André Lureau     return le16toh(vq->vring.avail->ring[i]);
23280df750e9SMarc-André Lureau }
23290df750e9SMarc-André Lureau 
23300df750e9SMarc-André Lureau static inline uint16_t
vring_get_used_event(VuVirtq * vq)23310df750e9SMarc-André Lureau vring_get_used_event(VuVirtq *vq)
23320df750e9SMarc-André Lureau {
23330df750e9SMarc-André Lureau     return vring_avail_ring(vq, vq->vring.num);
23340df750e9SMarc-André Lureau }
23350df750e9SMarc-André Lureau 
23360df750e9SMarc-André Lureau static int
virtqueue_num_heads(VuDev * dev,VuVirtq * vq,unsigned int idx)23370df750e9SMarc-André Lureau virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
23380df750e9SMarc-André Lureau {
23390df750e9SMarc-André Lureau     uint16_t num_heads = vring_avail_idx(vq) - idx;
23400df750e9SMarc-André Lureau 
23410df750e9SMarc-André Lureau     /* Check it isn't doing very strange things with descriptor numbers. */
23420df750e9SMarc-André Lureau     if (num_heads > vq->vring.num) {
23430df750e9SMarc-André Lureau         vu_panic(dev, "Guest moved used index from %u to %u",
23440df750e9SMarc-André Lureau                  idx, vq->shadow_avail_idx);
23450df750e9SMarc-André Lureau         return -1;
23460df750e9SMarc-André Lureau     }
23470df750e9SMarc-André Lureau     if (num_heads) {
23480df750e9SMarc-André Lureau         /* On success, callers read a descriptor at vq->last_avail_idx.
23490df750e9SMarc-André Lureau          * Make sure descriptor read does not bypass avail index read. */
23500df750e9SMarc-André Lureau         smp_rmb();
23510df750e9SMarc-André Lureau     }
23520df750e9SMarc-André Lureau 
23530df750e9SMarc-André Lureau     return num_heads;
23540df750e9SMarc-André Lureau }
23550df750e9SMarc-André Lureau 
23560df750e9SMarc-André Lureau static bool
virtqueue_get_head(VuDev * dev,VuVirtq * vq,unsigned int idx,unsigned int * head)23570df750e9SMarc-André Lureau virtqueue_get_head(VuDev *dev, VuVirtq *vq,
23580df750e9SMarc-André Lureau                    unsigned int idx, unsigned int *head)
23590df750e9SMarc-André Lureau {
23600df750e9SMarc-André Lureau     /* Grab the next descriptor number they're advertising, and increment
23610df750e9SMarc-André Lureau      * the index we've seen. */
23620df750e9SMarc-André Lureau     *head = vring_avail_ring(vq, idx % vq->vring.num);
23630df750e9SMarc-André Lureau 
23640df750e9SMarc-André Lureau     /* If their number is silly, that's a fatal mistake. */
23650df750e9SMarc-André Lureau     if (*head >= vq->vring.num) {
23660df750e9SMarc-André Lureau         vu_panic(dev, "Guest says index %u is available", *head);
23670df750e9SMarc-André Lureau         return false;
23680df750e9SMarc-André Lureau     }
23690df750e9SMarc-André Lureau 
23700df750e9SMarc-André Lureau     return true;
23710df750e9SMarc-André Lureau }
23720df750e9SMarc-André Lureau 
23730df750e9SMarc-André Lureau static int
virtqueue_read_indirect_desc(VuDev * dev,struct vring_desc * desc,uint64_t addr,size_t len)23740df750e9SMarc-André Lureau virtqueue_read_indirect_desc(VuDev *dev, struct vring_desc *desc,
23750df750e9SMarc-André Lureau                              uint64_t addr, size_t len)
23760df750e9SMarc-André Lureau {
23770df750e9SMarc-André Lureau     struct vring_desc *ori_desc;
23780df750e9SMarc-André Lureau     uint64_t read_len;
23790df750e9SMarc-André Lureau 
23800df750e9SMarc-André Lureau     if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc))) {
23810df750e9SMarc-André Lureau         return -1;
23820df750e9SMarc-André Lureau     }
23830df750e9SMarc-André Lureau 
23840df750e9SMarc-André Lureau     if (len == 0) {
23850df750e9SMarc-André Lureau         return -1;
23860df750e9SMarc-André Lureau     }
23870df750e9SMarc-André Lureau 
23880df750e9SMarc-André Lureau     while (len) {
23890df750e9SMarc-André Lureau         read_len = len;
23900df750e9SMarc-André Lureau         ori_desc = vu_gpa_to_va(dev, &read_len, addr);
23910df750e9SMarc-André Lureau         if (!ori_desc) {
23920df750e9SMarc-André Lureau             return -1;
23930df750e9SMarc-André Lureau         }
23940df750e9SMarc-André Lureau 
23950df750e9SMarc-André Lureau         memcpy(desc, ori_desc, read_len);
23960df750e9SMarc-André Lureau         len -= read_len;
23970df750e9SMarc-André Lureau         addr += read_len;
23980df750e9SMarc-André Lureau         desc += read_len;
23990df750e9SMarc-André Lureau     }
24000df750e9SMarc-André Lureau 
24010df750e9SMarc-André Lureau     return 0;
24020df750e9SMarc-André Lureau }
24030df750e9SMarc-André Lureau 
24040df750e9SMarc-André Lureau enum {
24050df750e9SMarc-André Lureau     VIRTQUEUE_READ_DESC_ERROR = -1,
24060df750e9SMarc-André Lureau     VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
24070df750e9SMarc-André Lureau     VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
24080df750e9SMarc-André Lureau };
24090df750e9SMarc-André Lureau 
24100df750e9SMarc-André Lureau static int
virtqueue_read_next_desc(VuDev * dev,struct vring_desc * desc,int i,unsigned int max,unsigned int * next)24110df750e9SMarc-André Lureau virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
24120df750e9SMarc-André Lureau                          int i, unsigned int max, unsigned int *next)
24130df750e9SMarc-André Lureau {
24140df750e9SMarc-André Lureau     /* If this descriptor says it doesn't chain, we're done. */
24150df750e9SMarc-André Lureau     if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT)) {
24160df750e9SMarc-André Lureau         return VIRTQUEUE_READ_DESC_DONE;
24170df750e9SMarc-André Lureau     }
24180df750e9SMarc-André Lureau 
24190df750e9SMarc-André Lureau     /* Check they're not leading us off end of descriptors. */
24200df750e9SMarc-André Lureau     *next = le16toh(desc[i].next);
24210df750e9SMarc-André Lureau     /* Make sure compiler knows to grab that: we don't want it changing! */
24220df750e9SMarc-André Lureau     smp_wmb();
24230df750e9SMarc-André Lureau 
24240df750e9SMarc-André Lureau     if (*next >= max) {
24250df750e9SMarc-André Lureau         vu_panic(dev, "Desc next is %u", *next);
24260df750e9SMarc-André Lureau         return VIRTQUEUE_READ_DESC_ERROR;
24270df750e9SMarc-André Lureau     }
24280df750e9SMarc-André Lureau 
24290df750e9SMarc-André Lureau     return VIRTQUEUE_READ_DESC_MORE;
24300df750e9SMarc-André Lureau }
24310df750e9SMarc-André Lureau 
24320df750e9SMarc-André Lureau void
vu_queue_get_avail_bytes(VuDev * dev,VuVirtq * vq,unsigned int * in_bytes,unsigned int * out_bytes,unsigned max_in_bytes,unsigned max_out_bytes)24330df750e9SMarc-André Lureau vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
24340df750e9SMarc-André Lureau                          unsigned int *out_bytes,
24350df750e9SMarc-André Lureau                          unsigned max_in_bytes, unsigned max_out_bytes)
24360df750e9SMarc-André Lureau {
24370df750e9SMarc-André Lureau     unsigned int idx;
24380df750e9SMarc-André Lureau     unsigned int total_bufs, in_total, out_total;
24390df750e9SMarc-André Lureau     int rc;
24400df750e9SMarc-André Lureau 
24410df750e9SMarc-André Lureau     idx = vq->last_avail_idx;
24420df750e9SMarc-André Lureau 
24430df750e9SMarc-André Lureau     total_bufs = in_total = out_total = 0;
24442a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
24450df750e9SMarc-André Lureau         goto done;
24460df750e9SMarc-André Lureau     }
24470df750e9SMarc-André Lureau 
24480df750e9SMarc-André Lureau     while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
24490df750e9SMarc-André Lureau         unsigned int max, desc_len, num_bufs, indirect = 0;
24500df750e9SMarc-André Lureau         uint64_t desc_addr, read_len;
24510df750e9SMarc-André Lureau         struct vring_desc *desc;
24520df750e9SMarc-André Lureau         struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
24530df750e9SMarc-André Lureau         unsigned int i;
24540df750e9SMarc-André Lureau 
24550df750e9SMarc-André Lureau         max = vq->vring.num;
24560df750e9SMarc-André Lureau         num_bufs = total_bufs;
24570df750e9SMarc-André Lureau         if (!virtqueue_get_head(dev, vq, idx++, &i)) {
24580df750e9SMarc-André Lureau             goto err;
24590df750e9SMarc-André Lureau         }
24600df750e9SMarc-André Lureau         desc = vq->vring.desc;
24610df750e9SMarc-André Lureau 
24620df750e9SMarc-André Lureau         if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
24630df750e9SMarc-André Lureau             if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
24640df750e9SMarc-André Lureau                 vu_panic(dev, "Invalid size for indirect buffer table");
24650df750e9SMarc-André Lureau                 goto err;
24660df750e9SMarc-André Lureau             }
24670df750e9SMarc-André Lureau 
24680df750e9SMarc-André Lureau             /* If we've got too many, that implies a descriptor loop. */
24690df750e9SMarc-André Lureau             if (num_bufs >= max) {
24700df750e9SMarc-André Lureau                 vu_panic(dev, "Looped descriptor");
24710df750e9SMarc-André Lureau                 goto err;
24720df750e9SMarc-André Lureau             }
24730df750e9SMarc-André Lureau 
24740df750e9SMarc-André Lureau             /* loop over the indirect descriptor table */
24750df750e9SMarc-André Lureau             indirect = 1;
24760df750e9SMarc-André Lureau             desc_addr = le64toh(desc[i].addr);
24770df750e9SMarc-André Lureau             desc_len = le32toh(desc[i].len);
24780df750e9SMarc-André Lureau             max = desc_len / sizeof(struct vring_desc);
24790df750e9SMarc-André Lureau             read_len = desc_len;
24800df750e9SMarc-André Lureau             desc = vu_gpa_to_va(dev, &read_len, desc_addr);
24810df750e9SMarc-André Lureau             if (unlikely(desc && read_len != desc_len)) {
24820df750e9SMarc-André Lureau                 /* Failed to use zero copy */
24830df750e9SMarc-André Lureau                 desc = NULL;
24840df750e9SMarc-André Lureau                 if (!virtqueue_read_indirect_desc(dev, desc_buf,
24850df750e9SMarc-André Lureau                                                   desc_addr,
24860df750e9SMarc-André Lureau                                                   desc_len)) {
24870df750e9SMarc-André Lureau                     desc = desc_buf;
24880df750e9SMarc-André Lureau                 }
24890df750e9SMarc-André Lureau             }
24900df750e9SMarc-André Lureau             if (!desc) {
24910df750e9SMarc-André Lureau                 vu_panic(dev, "Invalid indirect buffer table");
24920df750e9SMarc-André Lureau                 goto err;
24930df750e9SMarc-André Lureau             }
24940df750e9SMarc-André Lureau             num_bufs = i = 0;
24950df750e9SMarc-André Lureau         }
24960df750e9SMarc-André Lureau 
24970df750e9SMarc-André Lureau         do {
24980df750e9SMarc-André Lureau             /* If we've got too many, that implies a descriptor loop. */
24990df750e9SMarc-André Lureau             if (++num_bufs > max) {
25000df750e9SMarc-André Lureau                 vu_panic(dev, "Looped descriptor");
25010df750e9SMarc-André Lureau                 goto err;
25020df750e9SMarc-André Lureau             }
25030df750e9SMarc-André Lureau 
25040df750e9SMarc-André Lureau             if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
25050df750e9SMarc-André Lureau                 in_total += le32toh(desc[i].len);
25060df750e9SMarc-André Lureau             } else {
25070df750e9SMarc-André Lureau                 out_total += le32toh(desc[i].len);
25080df750e9SMarc-André Lureau             }
25090df750e9SMarc-André Lureau             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
25100df750e9SMarc-André Lureau                 goto done;
25110df750e9SMarc-André Lureau             }
25120df750e9SMarc-André Lureau             rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
25130df750e9SMarc-André Lureau         } while (rc == VIRTQUEUE_READ_DESC_MORE);
25140df750e9SMarc-André Lureau 
25150df750e9SMarc-André Lureau         if (rc == VIRTQUEUE_READ_DESC_ERROR) {
25160df750e9SMarc-André Lureau             goto err;
25170df750e9SMarc-André Lureau         }
25180df750e9SMarc-André Lureau 
25190df750e9SMarc-André Lureau         if (!indirect) {
25200df750e9SMarc-André Lureau             total_bufs = num_bufs;
25210df750e9SMarc-André Lureau         } else {
25220df750e9SMarc-André Lureau             total_bufs++;
25230df750e9SMarc-André Lureau         }
25240df750e9SMarc-André Lureau     }
25250df750e9SMarc-André Lureau     if (rc < 0) {
25260df750e9SMarc-André Lureau         goto err;
25270df750e9SMarc-André Lureau     }
25280df750e9SMarc-André Lureau done:
25290df750e9SMarc-André Lureau     if (in_bytes) {
25300df750e9SMarc-André Lureau         *in_bytes = in_total;
25310df750e9SMarc-André Lureau     }
25320df750e9SMarc-André Lureau     if (out_bytes) {
25330df750e9SMarc-André Lureau         *out_bytes = out_total;
25340df750e9SMarc-André Lureau     }
25350df750e9SMarc-André Lureau     return;
25360df750e9SMarc-André Lureau 
25370df750e9SMarc-André Lureau err:
25380df750e9SMarc-André Lureau     in_total = out_total = 0;
25390df750e9SMarc-André Lureau     goto done;
25400df750e9SMarc-André Lureau }
25410df750e9SMarc-André Lureau 
25420df750e9SMarc-André Lureau bool
vu_queue_avail_bytes(VuDev * dev,VuVirtq * vq,unsigned int in_bytes,unsigned int out_bytes)25430df750e9SMarc-André Lureau vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
25440df750e9SMarc-André Lureau                      unsigned int out_bytes)
25450df750e9SMarc-André Lureau {
25460df750e9SMarc-André Lureau     unsigned int in_total, out_total;
25470df750e9SMarc-André Lureau 
25480df750e9SMarc-André Lureau     vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total,
25490df750e9SMarc-André Lureau                              in_bytes, out_bytes);
25500df750e9SMarc-André Lureau 
25510df750e9SMarc-André Lureau     return in_bytes <= in_total && out_bytes <= out_total;
25520df750e9SMarc-André Lureau }
25530df750e9SMarc-André Lureau 
25540df750e9SMarc-André Lureau /* Fetch avail_idx from VQ memory only when we really need to know if
25550df750e9SMarc-André Lureau  * guest has added some buffers. */
25560df750e9SMarc-André Lureau bool
vu_queue_empty(VuDev * dev,VuVirtq * vq)25570df750e9SMarc-André Lureau vu_queue_empty(VuDev *dev, VuVirtq *vq)
25580df750e9SMarc-André Lureau {
25592a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
25600df750e9SMarc-André Lureau         return true;
25610df750e9SMarc-André Lureau     }
25620df750e9SMarc-André Lureau 
25630df750e9SMarc-André Lureau     if (vq->shadow_avail_idx != vq->last_avail_idx) {
25640df750e9SMarc-André Lureau         return false;
25650df750e9SMarc-André Lureau     }
25660df750e9SMarc-André Lureau 
25670df750e9SMarc-André Lureau     return vring_avail_idx(vq) == vq->last_avail_idx;
25680df750e9SMarc-André Lureau }
25690df750e9SMarc-André Lureau 
25700df750e9SMarc-André Lureau static bool
vring_notify(VuDev * dev,VuVirtq * vq)25710df750e9SMarc-André Lureau vring_notify(VuDev *dev, VuVirtq *vq)
25720df750e9SMarc-André Lureau {
25730df750e9SMarc-André Lureau     uint16_t old, new;
25740df750e9SMarc-André Lureau     bool v;
25750df750e9SMarc-André Lureau 
25760df750e9SMarc-André Lureau     /* We need to expose used array entries before checking used event. */
25770df750e9SMarc-André Lureau     smp_mb();
25780df750e9SMarc-André Lureau 
25790df750e9SMarc-André Lureau     /* Always notify when queue is empty (when feature acknowledge) */
25800df750e9SMarc-André Lureau     if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
25810df750e9SMarc-André Lureau         !vq->inuse && vu_queue_empty(dev, vq)) {
25820df750e9SMarc-André Lureau         return true;
25830df750e9SMarc-André Lureau     }
25840df750e9SMarc-André Lureau 
25850df750e9SMarc-André Lureau     if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
25860df750e9SMarc-André Lureau         return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
25870df750e9SMarc-André Lureau     }
25880df750e9SMarc-André Lureau 
25890df750e9SMarc-André Lureau     v = vq->signalled_used_valid;
25900df750e9SMarc-André Lureau     vq->signalled_used_valid = true;
25910df750e9SMarc-André Lureau     old = vq->signalled_used;
25920df750e9SMarc-André Lureau     new = vq->signalled_used = vq->used_idx;
25930df750e9SMarc-André Lureau     return !v || vring_need_event(vring_get_used_event(vq), new, old);
25940df750e9SMarc-André Lureau }
25950df750e9SMarc-André Lureau 
_vu_queue_notify(VuDev * dev,VuVirtq * vq,bool sync)25960df750e9SMarc-André Lureau static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
25970df750e9SMarc-André Lureau {
25982a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
25990df750e9SMarc-André Lureau         return;
26000df750e9SMarc-André Lureau     }
26010df750e9SMarc-André Lureau 
26020df750e9SMarc-André Lureau     if (!vring_notify(dev, vq)) {
26030df750e9SMarc-André Lureau         DPRINT("skipped notify...\n");
26040df750e9SMarc-André Lureau         return;
26050df750e9SMarc-André Lureau     }
26060df750e9SMarc-André Lureau 
26070df750e9SMarc-André Lureau     if (vq->call_fd < 0 &&
26080df750e9SMarc-André Lureau         vu_has_protocol_feature(dev,
26090df750e9SMarc-André Lureau                                 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
2610e608feedSMaxime Coquelin         vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_REQ)) {
26110df750e9SMarc-André Lureau         VhostUserMsg vmsg = {
2612e608feedSMaxime Coquelin             .request = VHOST_USER_BACKEND_VRING_CALL,
26130df750e9SMarc-André Lureau             .flags = VHOST_USER_VERSION,
26140df750e9SMarc-André Lureau             .size = sizeof(vmsg.payload.state),
26150df750e9SMarc-André Lureau             .payload.state = {
26160df750e9SMarc-André Lureau                 .index = vq - dev->vq,
26170df750e9SMarc-André Lureau             },
26180df750e9SMarc-André Lureau         };
26190df750e9SMarc-André Lureau         bool ack = sync &&
26200df750e9SMarc-André Lureau                    vu_has_protocol_feature(dev,
26210df750e9SMarc-André Lureau                                            VHOST_USER_PROTOCOL_F_REPLY_ACK);
26220df750e9SMarc-André Lureau 
26230df750e9SMarc-André Lureau         if (ack) {
26240df750e9SMarc-André Lureau             vmsg.flags |= VHOST_USER_NEED_REPLY_MASK;
26250df750e9SMarc-André Lureau         }
26260df750e9SMarc-André Lureau 
2627f8ed3648SManos Pitsidianakis         vu_message_write(dev, dev->backend_fd, &vmsg);
26280df750e9SMarc-André Lureau         if (ack) {
2629f8ed3648SManos Pitsidianakis             vu_message_read_default(dev, dev->backend_fd, &vmsg);
26300df750e9SMarc-André Lureau         }
26310df750e9SMarc-André Lureau         return;
26320df750e9SMarc-André Lureau     }
26330df750e9SMarc-André Lureau 
26340df750e9SMarc-André Lureau     if (eventfd_write(vq->call_fd, 1) < 0) {
26350df750e9SMarc-André Lureau         vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
26360df750e9SMarc-André Lureau     }
26370df750e9SMarc-André Lureau }
26380df750e9SMarc-André Lureau 
vu_queue_notify(VuDev * dev,VuVirtq * vq)26390df750e9SMarc-André Lureau void vu_queue_notify(VuDev *dev, VuVirtq *vq)
26400df750e9SMarc-André Lureau {
26410df750e9SMarc-André Lureau     _vu_queue_notify(dev, vq, false);
26420df750e9SMarc-André Lureau }
26430df750e9SMarc-André Lureau 
vu_queue_notify_sync(VuDev * dev,VuVirtq * vq)26440df750e9SMarc-André Lureau void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq)
26450df750e9SMarc-André Lureau {
26460df750e9SMarc-André Lureau     _vu_queue_notify(dev, vq, true);
26470df750e9SMarc-André Lureau }
26480df750e9SMarc-André Lureau 
vu_config_change_msg(VuDev * dev)2649ca858a5fSVladimir Sementsov-Ogievskiy void vu_config_change_msg(VuDev *dev)
2650ca858a5fSVladimir Sementsov-Ogievskiy {
2651ca858a5fSVladimir Sementsov-Ogievskiy     VhostUserMsg vmsg = {
2652ca858a5fSVladimir Sementsov-Ogievskiy         .request = VHOST_USER_BACKEND_CONFIG_CHANGE_MSG,
2653ca858a5fSVladimir Sementsov-Ogievskiy         .flags = VHOST_USER_VERSION,
2654ca858a5fSVladimir Sementsov-Ogievskiy     };
2655ca858a5fSVladimir Sementsov-Ogievskiy 
2656f8ed3648SManos Pitsidianakis     vu_message_write(dev, dev->backend_fd, &vmsg);
2657ca858a5fSVladimir Sementsov-Ogievskiy }
2658ca858a5fSVladimir Sementsov-Ogievskiy 
26590df750e9SMarc-André Lureau static inline void
vring_used_flags_set_bit(VuVirtq * vq,int mask)26600df750e9SMarc-André Lureau vring_used_flags_set_bit(VuVirtq *vq, int mask)
26610df750e9SMarc-André Lureau {
26620df750e9SMarc-André Lureau     uint16_t *flags;
26630df750e9SMarc-André Lureau 
26640df750e9SMarc-André Lureau     flags = (uint16_t *)((char*)vq->vring.used +
26650df750e9SMarc-André Lureau                          offsetof(struct vring_used, flags));
26660df750e9SMarc-André Lureau     *flags = htole16(le16toh(*flags) | mask);
26670df750e9SMarc-André Lureau }
26680df750e9SMarc-André Lureau 
26690df750e9SMarc-André Lureau static inline void
vring_used_flags_unset_bit(VuVirtq * vq,int mask)26700df750e9SMarc-André Lureau vring_used_flags_unset_bit(VuVirtq *vq, int mask)
26710df750e9SMarc-André Lureau {
26720df750e9SMarc-André Lureau     uint16_t *flags;
26730df750e9SMarc-André Lureau 
26740df750e9SMarc-André Lureau     flags = (uint16_t *)((char*)vq->vring.used +
26750df750e9SMarc-André Lureau                          offsetof(struct vring_used, flags));
26760df750e9SMarc-André Lureau     *flags = htole16(le16toh(*flags) & ~mask);
26770df750e9SMarc-André Lureau }
26780df750e9SMarc-André Lureau 
26790df750e9SMarc-André Lureau static inline void
vring_set_avail_event(VuVirtq * vq,uint16_t val)26800df750e9SMarc-André Lureau vring_set_avail_event(VuVirtq *vq, uint16_t val)
26810df750e9SMarc-André Lureau {
2682950a2f2eSMarcel Holtmann     uint16_t val_le = htole16(val);
26830df750e9SMarc-André Lureau 
26840df750e9SMarc-André Lureau     if (!vq->notification) {
26850df750e9SMarc-André Lureau         return;
26860df750e9SMarc-André Lureau     }
26870df750e9SMarc-André Lureau 
2688950a2f2eSMarcel Holtmann     memcpy(&vq->vring.used->ring[vq->vring.num], &val_le, sizeof(uint16_t));
26890df750e9SMarc-André Lureau }
26900df750e9SMarc-André Lureau 
26910df750e9SMarc-André Lureau void
vu_queue_set_notification(VuDev * dev,VuVirtq * vq,int enable)26920df750e9SMarc-André Lureau vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
26930df750e9SMarc-André Lureau {
26940df750e9SMarc-André Lureau     vq->notification = enable;
26950df750e9SMarc-André Lureau     if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
26960df750e9SMarc-André Lureau         vring_set_avail_event(vq, vring_avail_idx(vq));
26970df750e9SMarc-André Lureau     } else if (enable) {
26980df750e9SMarc-André Lureau         vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
26990df750e9SMarc-André Lureau     } else {
27000df750e9SMarc-André Lureau         vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
27010df750e9SMarc-André Lureau     }
27020df750e9SMarc-André Lureau     if (enable) {
27030df750e9SMarc-André Lureau         /* Expose avail event/used flags before caller checks the avail idx. */
27040df750e9SMarc-André Lureau         smp_mb();
27050df750e9SMarc-André Lureau     }
27060df750e9SMarc-André Lureau }
27070df750e9SMarc-André Lureau 
27080df750e9SMarc-André Lureau static bool
virtqueue_map_desc(VuDev * dev,unsigned int * p_num_sg,struct iovec * iov,unsigned int max_num_sg,bool is_write,uint64_t pa,size_t sz)27090df750e9SMarc-André Lureau virtqueue_map_desc(VuDev *dev,
27100df750e9SMarc-André Lureau                    unsigned int *p_num_sg, struct iovec *iov,
27110df750e9SMarc-André Lureau                    unsigned int max_num_sg, bool is_write,
27120df750e9SMarc-André Lureau                    uint64_t pa, size_t sz)
27130df750e9SMarc-André Lureau {
27140df750e9SMarc-André Lureau     unsigned num_sg = *p_num_sg;
27150df750e9SMarc-André Lureau 
27160df750e9SMarc-André Lureau     assert(num_sg <= max_num_sg);
27170df750e9SMarc-André Lureau 
27180df750e9SMarc-André Lureau     if (!sz) {
27190df750e9SMarc-André Lureau         vu_panic(dev, "virtio: zero sized buffers are not allowed");
27200df750e9SMarc-André Lureau         return false;
27210df750e9SMarc-André Lureau     }
27220df750e9SMarc-André Lureau 
27230df750e9SMarc-André Lureau     while (sz) {
27240df750e9SMarc-André Lureau         uint64_t len = sz;
27250df750e9SMarc-André Lureau 
27260df750e9SMarc-André Lureau         if (num_sg == max_num_sg) {
27270df750e9SMarc-André Lureau             vu_panic(dev, "virtio: too many descriptors in indirect table");
27280df750e9SMarc-André Lureau             return false;
27290df750e9SMarc-André Lureau         }
27300df750e9SMarc-André Lureau 
27310df750e9SMarc-André Lureau         iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa);
27320df750e9SMarc-André Lureau         if (iov[num_sg].iov_base == NULL) {
27330df750e9SMarc-André Lureau             vu_panic(dev, "virtio: invalid address for buffers");
27340df750e9SMarc-André Lureau             return false;
27350df750e9SMarc-André Lureau         }
27360df750e9SMarc-André Lureau         iov[num_sg].iov_len = len;
27370df750e9SMarc-André Lureau         num_sg++;
27380df750e9SMarc-André Lureau         sz -= len;
27390df750e9SMarc-André Lureau         pa += len;
27400df750e9SMarc-André Lureau     }
27410df750e9SMarc-André Lureau 
27420df750e9SMarc-André Lureau     *p_num_sg = num_sg;
27430df750e9SMarc-André Lureau     return true;
27440df750e9SMarc-André Lureau }
27450df750e9SMarc-André Lureau 
27460df750e9SMarc-André Lureau static void *
virtqueue_alloc_element(size_t sz,unsigned out_num,unsigned in_num)27470df750e9SMarc-André Lureau virtqueue_alloc_element(size_t sz,
27480df750e9SMarc-André Lureau                                      unsigned out_num, unsigned in_num)
27490df750e9SMarc-André Lureau {
27500df750e9SMarc-André Lureau     VuVirtqElement *elem;
27510df750e9SMarc-André Lureau     size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
27520df750e9SMarc-André Lureau     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
27530df750e9SMarc-André Lureau     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
27540df750e9SMarc-André Lureau 
27550df750e9SMarc-André Lureau     assert(sz >= sizeof(VuVirtqElement));
27560df750e9SMarc-André Lureau     elem = malloc(out_sg_end);
27579c191605SCarlos López     if (!elem) {
27589c191605SCarlos López         DPRINT("%s: failed to malloc virtqueue element\n", __func__);
27599c191605SCarlos López         return NULL;
27609c191605SCarlos López     }
27610df750e9SMarc-André Lureau     elem->out_num = out_num;
27620df750e9SMarc-André Lureau     elem->in_num = in_num;
27630df750e9SMarc-André Lureau     elem->in_sg = (void *)elem + in_sg_ofs;
27640df750e9SMarc-André Lureau     elem->out_sg = (void *)elem + out_sg_ofs;
27650df750e9SMarc-André Lureau     return elem;
27660df750e9SMarc-André Lureau }
27670df750e9SMarc-André Lureau 
27680df750e9SMarc-André Lureau static void *
vu_queue_map_desc(VuDev * dev,VuVirtq * vq,unsigned int idx,size_t sz)27690df750e9SMarc-André Lureau vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
27700df750e9SMarc-André Lureau {
27710df750e9SMarc-André Lureau     struct vring_desc *desc = vq->vring.desc;
27720df750e9SMarc-André Lureau     uint64_t desc_addr, read_len;
27730df750e9SMarc-André Lureau     unsigned int desc_len;
27740df750e9SMarc-André Lureau     unsigned int max = vq->vring.num;
27750df750e9SMarc-André Lureau     unsigned int i = idx;
27760df750e9SMarc-André Lureau     VuVirtqElement *elem;
27770df750e9SMarc-André Lureau     unsigned int out_num = 0, in_num = 0;
27780df750e9SMarc-André Lureau     struct iovec iov[VIRTQUEUE_MAX_SIZE];
27790df750e9SMarc-André Lureau     struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
27800df750e9SMarc-André Lureau     int rc;
27810df750e9SMarc-André Lureau 
27820df750e9SMarc-André Lureau     if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
27830df750e9SMarc-André Lureau         if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
27840df750e9SMarc-André Lureau             vu_panic(dev, "Invalid size for indirect buffer table");
27850df750e9SMarc-André Lureau             return NULL;
27860df750e9SMarc-André Lureau         }
27870df750e9SMarc-André Lureau 
27880df750e9SMarc-André Lureau         /* loop over the indirect descriptor table */
27890df750e9SMarc-André Lureau         desc_addr = le64toh(desc[i].addr);
27900df750e9SMarc-André Lureau         desc_len = le32toh(desc[i].len);
27910df750e9SMarc-André Lureau         max = desc_len / sizeof(struct vring_desc);
27920df750e9SMarc-André Lureau         read_len = desc_len;
27930df750e9SMarc-André Lureau         desc = vu_gpa_to_va(dev, &read_len, desc_addr);
27940df750e9SMarc-André Lureau         if (unlikely(desc && read_len != desc_len)) {
27950df750e9SMarc-André Lureau             /* Failed to use zero copy */
27960df750e9SMarc-André Lureau             desc = NULL;
27970df750e9SMarc-André Lureau             if (!virtqueue_read_indirect_desc(dev, desc_buf,
27980df750e9SMarc-André Lureau                                               desc_addr,
27990df750e9SMarc-André Lureau                                               desc_len)) {
28000df750e9SMarc-André Lureau                 desc = desc_buf;
28010df750e9SMarc-André Lureau             }
28020df750e9SMarc-André Lureau         }
28030df750e9SMarc-André Lureau         if (!desc) {
28040df750e9SMarc-André Lureau             vu_panic(dev, "Invalid indirect buffer table");
28050df750e9SMarc-André Lureau             return NULL;
28060df750e9SMarc-André Lureau         }
28070df750e9SMarc-André Lureau         i = 0;
28080df750e9SMarc-André Lureau     }
28090df750e9SMarc-André Lureau 
28100df750e9SMarc-André Lureau     /* Collect all the descriptors */
28110df750e9SMarc-André Lureau     do {
28120df750e9SMarc-André Lureau         if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
28130df750e9SMarc-André Lureau             if (!virtqueue_map_desc(dev, &in_num, iov + out_num,
28140df750e9SMarc-André Lureau                                VIRTQUEUE_MAX_SIZE - out_num, true,
28150df750e9SMarc-André Lureau                                le64toh(desc[i].addr),
28160df750e9SMarc-André Lureau                                le32toh(desc[i].len))) {
28170df750e9SMarc-André Lureau                 return NULL;
28180df750e9SMarc-André Lureau             }
28190df750e9SMarc-André Lureau         } else {
28200df750e9SMarc-André Lureau             if (in_num) {
28210df750e9SMarc-André Lureau                 vu_panic(dev, "Incorrect order for descriptors");
28220df750e9SMarc-André Lureau                 return NULL;
28230df750e9SMarc-André Lureau             }
28240df750e9SMarc-André Lureau             if (!virtqueue_map_desc(dev, &out_num, iov,
28250df750e9SMarc-André Lureau                                VIRTQUEUE_MAX_SIZE, false,
28260df750e9SMarc-André Lureau                                le64toh(desc[i].addr),
28270df750e9SMarc-André Lureau                                le32toh(desc[i].len))) {
28280df750e9SMarc-André Lureau                 return NULL;
28290df750e9SMarc-André Lureau             }
28300df750e9SMarc-André Lureau         }
28310df750e9SMarc-André Lureau 
28320df750e9SMarc-André Lureau         /* If we've got too many, that implies a descriptor loop. */
28330df750e9SMarc-André Lureau         if ((in_num + out_num) > max) {
28340df750e9SMarc-André Lureau             vu_panic(dev, "Looped descriptor");
28350df750e9SMarc-André Lureau             return NULL;
28360df750e9SMarc-André Lureau         }
28370df750e9SMarc-André Lureau         rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
28380df750e9SMarc-André Lureau     } while (rc == VIRTQUEUE_READ_DESC_MORE);
28390df750e9SMarc-André Lureau 
28400df750e9SMarc-André Lureau     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
28410df750e9SMarc-André Lureau         vu_panic(dev, "read descriptor error");
28420df750e9SMarc-André Lureau         return NULL;
28430df750e9SMarc-André Lureau     }
28440df750e9SMarc-André Lureau 
28450df750e9SMarc-André Lureau     /* Now copy what we have collected and mapped */
28460df750e9SMarc-André Lureau     elem = virtqueue_alloc_element(sz, out_num, in_num);
28479c191605SCarlos López     if (!elem) {
28489c191605SCarlos López         return NULL;
28499c191605SCarlos López     }
28500df750e9SMarc-André Lureau     elem->index = idx;
28510df750e9SMarc-André Lureau     for (i = 0; i < out_num; i++) {
28520df750e9SMarc-André Lureau         elem->out_sg[i] = iov[i];
28530df750e9SMarc-André Lureau     }
28540df750e9SMarc-André Lureau     for (i = 0; i < in_num; i++) {
28550df750e9SMarc-André Lureau         elem->in_sg[i] = iov[out_num + i];
28560df750e9SMarc-André Lureau     }
28570df750e9SMarc-André Lureau 
28580df750e9SMarc-André Lureau     return elem;
28590df750e9SMarc-André Lureau }
28600df750e9SMarc-André Lureau 
28610df750e9SMarc-André Lureau static int
vu_queue_inflight_get(VuDev * dev,VuVirtq * vq,int desc_idx)28620df750e9SMarc-André Lureau vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
28630df750e9SMarc-André Lureau {
28640df750e9SMarc-André Lureau     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
28650df750e9SMarc-André Lureau         return 0;
28660df750e9SMarc-André Lureau     }
28670df750e9SMarc-André Lureau 
28680df750e9SMarc-André Lureau     if (unlikely(!vq->inflight)) {
28690df750e9SMarc-André Lureau         return -1;
28700df750e9SMarc-André Lureau     }
28710df750e9SMarc-André Lureau 
28720df750e9SMarc-André Lureau     vq->inflight->desc[desc_idx].counter = vq->counter++;
28730df750e9SMarc-André Lureau     vq->inflight->desc[desc_idx].inflight = 1;
28740df750e9SMarc-André Lureau 
28750df750e9SMarc-André Lureau     return 0;
28760df750e9SMarc-André Lureau }
28770df750e9SMarc-André Lureau 
28780df750e9SMarc-André Lureau static int
vu_queue_inflight_pre_put(VuDev * dev,VuVirtq * vq,int desc_idx)28790df750e9SMarc-André Lureau vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
28800df750e9SMarc-André Lureau {
28810df750e9SMarc-André Lureau     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
28820df750e9SMarc-André Lureau         return 0;
28830df750e9SMarc-André Lureau     }
28840df750e9SMarc-André Lureau 
28850df750e9SMarc-André Lureau     if (unlikely(!vq->inflight)) {
28860df750e9SMarc-André Lureau         return -1;
28870df750e9SMarc-André Lureau     }
28880df750e9SMarc-André Lureau 
28890df750e9SMarc-André Lureau     vq->inflight->last_batch_head = desc_idx;
28900df750e9SMarc-André Lureau 
28910df750e9SMarc-André Lureau     return 0;
28920df750e9SMarc-André Lureau }
28930df750e9SMarc-André Lureau 
28940df750e9SMarc-André Lureau static int
vu_queue_inflight_post_put(VuDev * dev,VuVirtq * vq,int desc_idx)28950df750e9SMarc-André Lureau vu_queue_inflight_post_put(VuDev *dev, VuVirtq *vq, int desc_idx)
28960df750e9SMarc-André Lureau {
28970df750e9SMarc-André Lureau     if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
28980df750e9SMarc-André Lureau         return 0;
28990df750e9SMarc-André Lureau     }
29000df750e9SMarc-André Lureau 
29010df750e9SMarc-André Lureau     if (unlikely(!vq->inflight)) {
29020df750e9SMarc-André Lureau         return -1;
29030df750e9SMarc-André Lureau     }
29040df750e9SMarc-André Lureau 
29050df750e9SMarc-André Lureau     barrier();
29060df750e9SMarc-André Lureau 
29070df750e9SMarc-André Lureau     vq->inflight->desc[desc_idx].inflight = 0;
29080df750e9SMarc-André Lureau 
29090df750e9SMarc-André Lureau     barrier();
29100df750e9SMarc-André Lureau 
29110df750e9SMarc-André Lureau     vq->inflight->used_idx = vq->used_idx;
29120df750e9SMarc-André Lureau 
29130df750e9SMarc-André Lureau     return 0;
29140df750e9SMarc-André Lureau }
29150df750e9SMarc-André Lureau 
29160df750e9SMarc-André Lureau void *
vu_queue_pop(VuDev * dev,VuVirtq * vq,size_t sz)29170df750e9SMarc-André Lureau vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
29180df750e9SMarc-André Lureau {
29190df750e9SMarc-André Lureau     int i;
29200df750e9SMarc-André Lureau     unsigned int head;
29210df750e9SMarc-André Lureau     VuVirtqElement *elem;
29220df750e9SMarc-André Lureau 
29232a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
29240df750e9SMarc-André Lureau         return NULL;
29250df750e9SMarc-André Lureau     }
29260df750e9SMarc-André Lureau 
29270df750e9SMarc-André Lureau     if (unlikely(vq->resubmit_list && vq->resubmit_num > 0)) {
29280df750e9SMarc-André Lureau         i = (--vq->resubmit_num);
29290df750e9SMarc-André Lureau         elem = vu_queue_map_desc(dev, vq, vq->resubmit_list[i].index, sz);
29300df750e9SMarc-André Lureau 
29310df750e9SMarc-André Lureau         if (!vq->resubmit_num) {
29320df750e9SMarc-André Lureau             free(vq->resubmit_list);
29330df750e9SMarc-André Lureau             vq->resubmit_list = NULL;
29340df750e9SMarc-André Lureau         }
29350df750e9SMarc-André Lureau 
29360df750e9SMarc-André Lureau         return elem;
29370df750e9SMarc-André Lureau     }
29380df750e9SMarc-André Lureau 
29390df750e9SMarc-André Lureau     if (vu_queue_empty(dev, vq)) {
29400df750e9SMarc-André Lureau         return NULL;
29410df750e9SMarc-André Lureau     }
29420df750e9SMarc-André Lureau     /*
29430df750e9SMarc-André Lureau      * Needed after virtio_queue_empty(), see comment in
29440df750e9SMarc-André Lureau      * virtqueue_num_heads().
29450df750e9SMarc-André Lureau      */
29460df750e9SMarc-André Lureau     smp_rmb();
29470df750e9SMarc-André Lureau 
29480df750e9SMarc-André Lureau     if (vq->inuse >= vq->vring.num) {
29490df750e9SMarc-André Lureau         vu_panic(dev, "Virtqueue size exceeded");
29500df750e9SMarc-André Lureau         return NULL;
29510df750e9SMarc-André Lureau     }
29520df750e9SMarc-André Lureau 
29530df750e9SMarc-André Lureau     if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
29540df750e9SMarc-André Lureau         return NULL;
29550df750e9SMarc-André Lureau     }
29560df750e9SMarc-André Lureau 
29570df750e9SMarc-André Lureau     if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
29580df750e9SMarc-André Lureau         vring_set_avail_event(vq, vq->last_avail_idx);
29590df750e9SMarc-André Lureau     }
29600df750e9SMarc-André Lureau 
29610df750e9SMarc-André Lureau     elem = vu_queue_map_desc(dev, vq, head, sz);
29620df750e9SMarc-André Lureau 
29630df750e9SMarc-André Lureau     if (!elem) {
29640df750e9SMarc-André Lureau         return NULL;
29650df750e9SMarc-André Lureau     }
29660df750e9SMarc-André Lureau 
29670df750e9SMarc-André Lureau     vq->inuse++;
29680df750e9SMarc-André Lureau 
29690df750e9SMarc-André Lureau     vu_queue_inflight_get(dev, vq, head);
29700df750e9SMarc-André Lureau 
29710df750e9SMarc-André Lureau     return elem;
29720df750e9SMarc-André Lureau }
29730df750e9SMarc-André Lureau 
29740df750e9SMarc-André Lureau static void
vu_queue_detach_element(VuDev * dev,VuVirtq * vq,VuVirtqElement * elem,size_t len)29750df750e9SMarc-André Lureau vu_queue_detach_element(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
29760df750e9SMarc-André Lureau                         size_t len)
29770df750e9SMarc-André Lureau {
29780df750e9SMarc-André Lureau     vq->inuse--;
29790df750e9SMarc-André Lureau     /* unmap, when DMA support is added */
29800df750e9SMarc-André Lureau }
29810df750e9SMarc-André Lureau 
29820df750e9SMarc-André Lureau void
vu_queue_unpop(VuDev * dev,VuVirtq * vq,VuVirtqElement * elem,size_t len)29830df750e9SMarc-André Lureau vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
29840df750e9SMarc-André Lureau                size_t len)
29850df750e9SMarc-André Lureau {
29860df750e9SMarc-André Lureau     vq->last_avail_idx--;
29870df750e9SMarc-André Lureau     vu_queue_detach_element(dev, vq, elem, len);
29880df750e9SMarc-André Lureau }
29890df750e9SMarc-André Lureau 
29900df750e9SMarc-André Lureau bool
vu_queue_rewind(VuDev * dev,VuVirtq * vq,unsigned int num)29910df750e9SMarc-André Lureau vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
29920df750e9SMarc-André Lureau {
29930df750e9SMarc-André Lureau     if (num > vq->inuse) {
29940df750e9SMarc-André Lureau         return false;
29950df750e9SMarc-André Lureau     }
29960df750e9SMarc-André Lureau     vq->last_avail_idx -= num;
29970df750e9SMarc-André Lureau     vq->inuse -= num;
29980df750e9SMarc-André Lureau     return true;
29990df750e9SMarc-André Lureau }
30000df750e9SMarc-André Lureau 
30010df750e9SMarc-André Lureau static inline
vring_used_write(VuDev * dev,VuVirtq * vq,struct vring_used_elem * uelem,int i)30020df750e9SMarc-André Lureau void vring_used_write(VuDev *dev, VuVirtq *vq,
30030df750e9SMarc-André Lureau                       struct vring_used_elem *uelem, int i)
30040df750e9SMarc-André Lureau {
30050df750e9SMarc-André Lureau     struct vring_used *used = vq->vring.used;
30060df750e9SMarc-André Lureau 
30070df750e9SMarc-André Lureau     used->ring[i] = *uelem;
30080df750e9SMarc-André Lureau     vu_log_write(dev, vq->vring.log_guest_addr +
30090df750e9SMarc-André Lureau                  offsetof(struct vring_used, ring[i]),
30100df750e9SMarc-André Lureau                  sizeof(used->ring[i]));
30110df750e9SMarc-André Lureau }
30120df750e9SMarc-André Lureau 
30130df750e9SMarc-André Lureau 
30140df750e9SMarc-André Lureau static void
vu_log_queue_fill(VuDev * dev,VuVirtq * vq,const VuVirtqElement * elem,unsigned int len)30150df750e9SMarc-André Lureau vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
30160df750e9SMarc-André Lureau                   const VuVirtqElement *elem,
30170df750e9SMarc-André Lureau                   unsigned int len)
30180df750e9SMarc-André Lureau {
30190df750e9SMarc-André Lureau     struct vring_desc *desc = vq->vring.desc;
30200df750e9SMarc-André Lureau     unsigned int i, max, min, desc_len;
30210df750e9SMarc-André Lureau     uint64_t desc_addr, read_len;
30220df750e9SMarc-André Lureau     struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
30230df750e9SMarc-André Lureau     unsigned num_bufs = 0;
30240df750e9SMarc-André Lureau 
30250df750e9SMarc-André Lureau     max = vq->vring.num;
30260df750e9SMarc-André Lureau     i = elem->index;
30270df750e9SMarc-André Lureau 
30280df750e9SMarc-André Lureau     if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
30290df750e9SMarc-André Lureau         if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
30300df750e9SMarc-André Lureau             vu_panic(dev, "Invalid size for indirect buffer table");
30310df750e9SMarc-André Lureau             return;
30320df750e9SMarc-André Lureau         }
30330df750e9SMarc-André Lureau 
30340df750e9SMarc-André Lureau         /* loop over the indirect descriptor table */
30350df750e9SMarc-André Lureau         desc_addr = le64toh(desc[i].addr);
30360df750e9SMarc-André Lureau         desc_len = le32toh(desc[i].len);
30370df750e9SMarc-André Lureau         max = desc_len / sizeof(struct vring_desc);
30380df750e9SMarc-André Lureau         read_len = desc_len;
30390df750e9SMarc-André Lureau         desc = vu_gpa_to_va(dev, &read_len, desc_addr);
30400df750e9SMarc-André Lureau         if (unlikely(desc && read_len != desc_len)) {
30410df750e9SMarc-André Lureau             /* Failed to use zero copy */
30420df750e9SMarc-André Lureau             desc = NULL;
30430df750e9SMarc-André Lureau             if (!virtqueue_read_indirect_desc(dev, desc_buf,
30440df750e9SMarc-André Lureau                                               desc_addr,
30450df750e9SMarc-André Lureau                                               desc_len)) {
30460df750e9SMarc-André Lureau                 desc = desc_buf;
30470df750e9SMarc-André Lureau             }
30480df750e9SMarc-André Lureau         }
30490df750e9SMarc-André Lureau         if (!desc) {
30500df750e9SMarc-André Lureau             vu_panic(dev, "Invalid indirect buffer table");
30510df750e9SMarc-André Lureau             return;
30520df750e9SMarc-André Lureau         }
30530df750e9SMarc-André Lureau         i = 0;
30540df750e9SMarc-André Lureau     }
30550df750e9SMarc-André Lureau 
30560df750e9SMarc-André Lureau     do {
30570df750e9SMarc-André Lureau         if (++num_bufs > max) {
30580df750e9SMarc-André Lureau             vu_panic(dev, "Looped descriptor");
30590df750e9SMarc-André Lureau             return;
30600df750e9SMarc-André Lureau         }
30610df750e9SMarc-André Lureau 
30620df750e9SMarc-André Lureau         if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
30630df750e9SMarc-André Lureau             min = MIN(le32toh(desc[i].len), len);
30640df750e9SMarc-André Lureau             vu_log_write(dev, le64toh(desc[i].addr), min);
30650df750e9SMarc-André Lureau             len -= min;
30660df750e9SMarc-André Lureau         }
30670df750e9SMarc-André Lureau 
30680df750e9SMarc-André Lureau     } while (len > 0 &&
30690df750e9SMarc-André Lureau              (virtqueue_read_next_desc(dev, desc, i, max, &i)
30700df750e9SMarc-André Lureau               == VIRTQUEUE_READ_DESC_MORE));
30710df750e9SMarc-André Lureau }
30720df750e9SMarc-André Lureau 
30730df750e9SMarc-André Lureau void
vu_queue_fill(VuDev * dev,VuVirtq * vq,const VuVirtqElement * elem,unsigned int len,unsigned int idx)30740df750e9SMarc-André Lureau vu_queue_fill(VuDev *dev, VuVirtq *vq,
30750df750e9SMarc-André Lureau               const VuVirtqElement *elem,
30760df750e9SMarc-André Lureau               unsigned int len, unsigned int idx)
30770df750e9SMarc-André Lureau {
30780df750e9SMarc-André Lureau     struct vring_used_elem uelem;
30790df750e9SMarc-André Lureau 
30802a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
30810df750e9SMarc-André Lureau         return;
30820df750e9SMarc-André Lureau     }
30830df750e9SMarc-André Lureau 
30840df750e9SMarc-André Lureau     vu_log_queue_fill(dev, vq, elem, len);
30850df750e9SMarc-André Lureau 
30860df750e9SMarc-André Lureau     idx = (idx + vq->used_idx) % vq->vring.num;
30870df750e9SMarc-André Lureau 
30880df750e9SMarc-André Lureau     uelem.id = htole32(elem->index);
30890df750e9SMarc-André Lureau     uelem.len = htole32(len);
30900df750e9SMarc-André Lureau     vring_used_write(dev, vq, &uelem, idx);
30910df750e9SMarc-André Lureau }
30920df750e9SMarc-André Lureau 
30930df750e9SMarc-André Lureau static inline
vring_used_idx_set(VuDev * dev,VuVirtq * vq,uint16_t val)30940df750e9SMarc-André Lureau void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
30950df750e9SMarc-André Lureau {
30960df750e9SMarc-André Lureau     vq->vring.used->idx = htole16(val);
30970df750e9SMarc-André Lureau     vu_log_write(dev,
30980df750e9SMarc-André Lureau                  vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
30990df750e9SMarc-André Lureau                  sizeof(vq->vring.used->idx));
31000df750e9SMarc-André Lureau 
31010df750e9SMarc-André Lureau     vq->used_idx = val;
31020df750e9SMarc-André Lureau }
31030df750e9SMarc-André Lureau 
31040df750e9SMarc-André Lureau void
vu_queue_flush(VuDev * dev,VuVirtq * vq,unsigned int count)31050df750e9SMarc-André Lureau vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
31060df750e9SMarc-André Lureau {
31070df750e9SMarc-André Lureau     uint16_t old, new;
31080df750e9SMarc-André Lureau 
31092a290227SDavid Hildenbrand     if (!vu_is_vq_usable(dev, vq)) {
31100df750e9SMarc-André Lureau         return;
31110df750e9SMarc-André Lureau     }
31120df750e9SMarc-André Lureau 
31130df750e9SMarc-André Lureau     /* Make sure buffer is written before we update index. */
31140df750e9SMarc-André Lureau     smp_wmb();
31150df750e9SMarc-André Lureau 
31160df750e9SMarc-André Lureau     old = vq->used_idx;
31170df750e9SMarc-André Lureau     new = old + count;
31180df750e9SMarc-André Lureau     vring_used_idx_set(dev, vq, new);
31190df750e9SMarc-André Lureau     vq->inuse -= count;
31200df750e9SMarc-André Lureau     if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
31210df750e9SMarc-André Lureau         vq->signalled_used_valid = false;
31220df750e9SMarc-André Lureau     }
31230df750e9SMarc-André Lureau }
31240df750e9SMarc-André Lureau 
31250df750e9SMarc-André Lureau void
vu_queue_push(VuDev * dev,VuVirtq * vq,const VuVirtqElement * elem,unsigned int len)31260df750e9SMarc-André Lureau vu_queue_push(VuDev *dev, VuVirtq *vq,
31270df750e9SMarc-André Lureau               const VuVirtqElement *elem, unsigned int len)
31280df750e9SMarc-André Lureau {
31290df750e9SMarc-André Lureau     vu_queue_fill(dev, vq, elem, len, 0);
31300df750e9SMarc-André Lureau     vu_queue_inflight_pre_put(dev, vq, elem->index);
31310df750e9SMarc-André Lureau     vu_queue_flush(dev, vq, 1);
31320df750e9SMarc-André Lureau     vu_queue_inflight_post_put(dev, vq, elem->index);
31330df750e9SMarc-André Lureau }
3134