10df750e9SMarc-André Lureau /*
20df750e9SMarc-André Lureau * Vhost User library
30df750e9SMarc-André Lureau *
40df750e9SMarc-André Lureau * Copyright IBM, Corp. 2007
50df750e9SMarc-André Lureau * Copyright (c) 2016 Red Hat, Inc.
60df750e9SMarc-André Lureau *
70df750e9SMarc-André Lureau * Authors:
80df750e9SMarc-André Lureau * Anthony Liguori <aliguori@us.ibm.com>
90df750e9SMarc-André Lureau * Marc-André Lureau <mlureau@redhat.com>
100df750e9SMarc-André Lureau * Victor Kaplansky <victork@redhat.com>
110df750e9SMarc-André Lureau *
120df750e9SMarc-André Lureau * This work is licensed under the terms of the GNU GPL, version 2 or
130df750e9SMarc-André Lureau * later. See the COPYING file in the top-level directory.
140df750e9SMarc-André Lureau */
150df750e9SMarc-André Lureau
16dadc3d01SMarcel Holtmann #ifndef _GNU_SOURCE
17dadc3d01SMarcel Holtmann #define _GNU_SOURCE
18dadc3d01SMarcel Holtmann #endif
19dadc3d01SMarcel Holtmann
200df750e9SMarc-André Lureau /* this code avoids GLib dependency */
210df750e9SMarc-André Lureau #include <stdlib.h>
220df750e9SMarc-André Lureau #include <stdio.h>
230df750e9SMarc-André Lureau #include <unistd.h>
240df750e9SMarc-André Lureau #include <stdarg.h>
250df750e9SMarc-André Lureau #include <errno.h>
260df750e9SMarc-André Lureau #include <string.h>
270df750e9SMarc-André Lureau #include <assert.h>
280df750e9SMarc-André Lureau #include <inttypes.h>
290df750e9SMarc-André Lureau #include <sys/types.h>
300df750e9SMarc-André Lureau #include <sys/socket.h>
310df750e9SMarc-André Lureau #include <sys/eventfd.h>
320df750e9SMarc-André Lureau #include <sys/mman.h>
330df750e9SMarc-André Lureau #include <endian.h>
340df750e9SMarc-André Lureau
35193ba660SDavid 'Digit' Turner /* Necessary to provide VIRTIO_F_VERSION_1 on system
36193ba660SDavid 'Digit' Turner * with older linux headers. Must appear before
37193ba660SDavid 'Digit' Turner * <linux/vhost.h> below.
38193ba660SDavid 'Digit' Turner */
39193ba660SDavid 'Digit' Turner #include "standard-headers/linux/virtio_config.h"
40193ba660SDavid 'Digit' Turner
410df750e9SMarc-André Lureau #if defined(__linux__)
420df750e9SMarc-André Lureau #include <sys/syscall.h>
430df750e9SMarc-André Lureau #include <fcntl.h>
440df750e9SMarc-André Lureau #include <sys/ioctl.h>
450df750e9SMarc-André Lureau #include <linux/vhost.h>
46b2b63008SDavid Hildenbrand #include <sys/vfs.h>
47b2b63008SDavid Hildenbrand #include <linux/magic.h>
480df750e9SMarc-André Lureau
490df750e9SMarc-André Lureau #ifdef __NR_userfaultfd
500df750e9SMarc-André Lureau #include <linux/userfaultfd.h>
510df750e9SMarc-André Lureau #endif
520df750e9SMarc-André Lureau
530df750e9SMarc-André Lureau #endif
540df750e9SMarc-André Lureau
553f55f97bSMarc-André Lureau #include "include/atomic.h"
560df750e9SMarc-André Lureau
570df750e9SMarc-André Lureau #include "libvhost-user.h"
580df750e9SMarc-André Lureau
590df750e9SMarc-André Lureau /* usually provided by GLib */
6052a57d8dSStefan Weil via #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4)
6152a57d8dSStefan Weil via #if !defined(__clang__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 4)
6252a57d8dSStefan Weil via #define G_GNUC_PRINTF(format_idx, arg_idx) \
6352a57d8dSStefan Weil via __attribute__((__format__(gnu_printf, format_idx, arg_idx)))
6452a57d8dSStefan Weil via #else
6552a57d8dSStefan Weil via #define G_GNUC_PRINTF(format_idx, arg_idx) \
6652a57d8dSStefan Weil via __attribute__((__format__(__printf__, format_idx, arg_idx)))
6752a57d8dSStefan Weil via #endif
6852a57d8dSStefan Weil via #else /* !__GNUC__ */
6952a57d8dSStefan Weil via #define G_GNUC_PRINTF(format_idx, arg_idx)
7052a57d8dSStefan Weil via #endif /* !__GNUC__ */
710df750e9SMarc-André Lureau #ifndef MIN
720df750e9SMarc-André Lureau #define MIN(x, y) ({ \
73aa5d395aSMarcel Holtmann __typeof__(x) _min1 = (x); \
74aa5d395aSMarcel Holtmann __typeof__(y) _min2 = (y); \
750df750e9SMarc-André Lureau (void) (&_min1 == &_min2); \
760df750e9SMarc-André Lureau _min1 < _min2 ? _min1 : _min2; })
770df750e9SMarc-André Lureau #endif
780df750e9SMarc-André Lureau
790df750e9SMarc-André Lureau /* Round number down to multiple */
800df750e9SMarc-André Lureau #define ALIGN_DOWN(n, m) ((n) / (m) * (m))
810df750e9SMarc-André Lureau
820df750e9SMarc-André Lureau /* Round number up to multiple */
830df750e9SMarc-André Lureau #define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
840df750e9SMarc-André Lureau
850df750e9SMarc-André Lureau #ifndef unlikely
860df750e9SMarc-André Lureau #define unlikely(x) __builtin_expect(!!(x), 0)
870df750e9SMarc-André Lureau #endif
880df750e9SMarc-André Lureau
890df750e9SMarc-André Lureau /* Align each region to cache line size in inflight buffer */
900df750e9SMarc-André Lureau #define INFLIGHT_ALIGNMENT 64
910df750e9SMarc-André Lureau
920df750e9SMarc-André Lureau /* The version of inflight buffer */
930df750e9SMarc-André Lureau #define INFLIGHT_VERSION 1
940df750e9SMarc-André Lureau
950df750e9SMarc-André Lureau /* The version of the protocol we support */
960df750e9SMarc-André Lureau #define VHOST_USER_VERSION 1
970df750e9SMarc-André Lureau #define LIBVHOST_USER_DEBUG 0
980df750e9SMarc-André Lureau
990df750e9SMarc-André Lureau #define DPRINT(...) \
1000df750e9SMarc-André Lureau do { \
1010df750e9SMarc-André Lureau if (LIBVHOST_USER_DEBUG) { \
1020df750e9SMarc-André Lureau fprintf(stderr, __VA_ARGS__); \
1030df750e9SMarc-André Lureau } \
1040df750e9SMarc-André Lureau } while (0)
1050df750e9SMarc-André Lureau
1060df750e9SMarc-André Lureau static inline
has_feature(uint64_t features,unsigned int fbit)1070df750e9SMarc-André Lureau bool has_feature(uint64_t features, unsigned int fbit)
1080df750e9SMarc-André Lureau {
1090df750e9SMarc-André Lureau assert(fbit < 64);
1100df750e9SMarc-André Lureau return !!(features & (1ULL << fbit));
1110df750e9SMarc-André Lureau }
1120df750e9SMarc-André Lureau
1130df750e9SMarc-André Lureau static inline
vu_has_feature(VuDev * dev,unsigned int fbit)1140df750e9SMarc-André Lureau bool vu_has_feature(VuDev *dev,
1150df750e9SMarc-André Lureau unsigned int fbit)
1160df750e9SMarc-André Lureau {
1170df750e9SMarc-André Lureau return has_feature(dev->features, fbit);
1180df750e9SMarc-André Lureau }
1190df750e9SMarc-André Lureau
vu_has_protocol_feature(VuDev * dev,unsigned int fbit)1200df750e9SMarc-André Lureau static inline bool vu_has_protocol_feature(VuDev *dev, unsigned int fbit)
1210df750e9SMarc-André Lureau {
1220df750e9SMarc-André Lureau return has_feature(dev->protocol_features, fbit);
1230df750e9SMarc-André Lureau }
1240df750e9SMarc-André Lureau
125467eeb0fSAlex Bennée const char *
vu_request_to_string(unsigned int req)1260df750e9SMarc-André Lureau vu_request_to_string(unsigned int req)
1270df750e9SMarc-André Lureau {
1280df750e9SMarc-André Lureau #define REQ(req) [req] = #req
1290df750e9SMarc-André Lureau static const char *vu_request_str[] = {
1300df750e9SMarc-André Lureau REQ(VHOST_USER_NONE),
1310df750e9SMarc-André Lureau REQ(VHOST_USER_GET_FEATURES),
1320df750e9SMarc-André Lureau REQ(VHOST_USER_SET_FEATURES),
1330df750e9SMarc-André Lureau REQ(VHOST_USER_SET_OWNER),
1340df750e9SMarc-André Lureau REQ(VHOST_USER_RESET_OWNER),
1350df750e9SMarc-André Lureau REQ(VHOST_USER_SET_MEM_TABLE),
1360df750e9SMarc-André Lureau REQ(VHOST_USER_SET_LOG_BASE),
1370df750e9SMarc-André Lureau REQ(VHOST_USER_SET_LOG_FD),
1380df750e9SMarc-André Lureau REQ(VHOST_USER_SET_VRING_NUM),
1390df750e9SMarc-André Lureau REQ(VHOST_USER_SET_VRING_ADDR),
1400df750e9SMarc-André Lureau REQ(VHOST_USER_SET_VRING_BASE),
1410df750e9SMarc-André Lureau REQ(VHOST_USER_GET_VRING_BASE),
1420df750e9SMarc-André Lureau REQ(VHOST_USER_SET_VRING_KICK),
1430df750e9SMarc-André Lureau REQ(VHOST_USER_SET_VRING_CALL),
1440df750e9SMarc-André Lureau REQ(VHOST_USER_SET_VRING_ERR),
1450df750e9SMarc-André Lureau REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
1460df750e9SMarc-André Lureau REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
1470df750e9SMarc-André Lureau REQ(VHOST_USER_GET_QUEUE_NUM),
1480df750e9SMarc-André Lureau REQ(VHOST_USER_SET_VRING_ENABLE),
1490df750e9SMarc-André Lureau REQ(VHOST_USER_SEND_RARP),
1500df750e9SMarc-André Lureau REQ(VHOST_USER_NET_SET_MTU),
151e608feedSMaxime Coquelin REQ(VHOST_USER_SET_BACKEND_REQ_FD),
1520df750e9SMarc-André Lureau REQ(VHOST_USER_IOTLB_MSG),
1530df750e9SMarc-André Lureau REQ(VHOST_USER_SET_VRING_ENDIAN),
1540df750e9SMarc-André Lureau REQ(VHOST_USER_GET_CONFIG),
1550df750e9SMarc-André Lureau REQ(VHOST_USER_SET_CONFIG),
1560df750e9SMarc-André Lureau REQ(VHOST_USER_POSTCOPY_ADVISE),
1570df750e9SMarc-André Lureau REQ(VHOST_USER_POSTCOPY_LISTEN),
1580df750e9SMarc-André Lureau REQ(VHOST_USER_POSTCOPY_END),
1590df750e9SMarc-André Lureau REQ(VHOST_USER_GET_INFLIGHT_FD),
1600df750e9SMarc-André Lureau REQ(VHOST_USER_SET_INFLIGHT_FD),
1610df750e9SMarc-André Lureau REQ(VHOST_USER_GPU_SET_SOCKET),
1620df750e9SMarc-André Lureau REQ(VHOST_USER_VRING_KICK),
1630df750e9SMarc-André Lureau REQ(VHOST_USER_GET_MAX_MEM_SLOTS),
1640df750e9SMarc-André Lureau REQ(VHOST_USER_ADD_MEM_REG),
1650df750e9SMarc-André Lureau REQ(VHOST_USER_REM_MEM_REG),
166ce0f3b03SAlbert Esteve REQ(VHOST_USER_GET_SHARED_OBJECT),
1670df750e9SMarc-André Lureau REQ(VHOST_USER_MAX),
1680df750e9SMarc-André Lureau };
1690df750e9SMarc-André Lureau #undef REQ
1700df750e9SMarc-André Lureau
1710df750e9SMarc-André Lureau if (req < VHOST_USER_MAX) {
1720df750e9SMarc-André Lureau return vu_request_str[req];
1730df750e9SMarc-André Lureau } else {
1740df750e9SMarc-André Lureau return "unknown";
1750df750e9SMarc-André Lureau }
1760df750e9SMarc-André Lureau }
1770df750e9SMarc-André Lureau
17852a57d8dSStefan Weil via static void G_GNUC_PRINTF(2, 3)
vu_panic(VuDev * dev,const char * msg,...)1790df750e9SMarc-André Lureau vu_panic(VuDev *dev, const char *msg, ...)
1800df750e9SMarc-André Lureau {
1810df750e9SMarc-André Lureau char *buf = NULL;
1820df750e9SMarc-André Lureau va_list ap;
1830df750e9SMarc-André Lureau
1840df750e9SMarc-André Lureau va_start(ap, msg);
1850df750e9SMarc-André Lureau if (vasprintf(&buf, msg, ap) < 0) {
1860df750e9SMarc-André Lureau buf = NULL;
1870df750e9SMarc-André Lureau }
1880df750e9SMarc-André Lureau va_end(ap);
1890df750e9SMarc-André Lureau
1900df750e9SMarc-André Lureau dev->broken = true;
1910df750e9SMarc-André Lureau dev->panic(dev, buf);
1920df750e9SMarc-André Lureau free(buf);
1930df750e9SMarc-André Lureau
1940df750e9SMarc-André Lureau /*
1950df750e9SMarc-André Lureau * FIXME:
1960df750e9SMarc-André Lureau * find a way to call virtio_error, or perhaps close the connection?
1970df750e9SMarc-André Lureau */
1980df750e9SMarc-André Lureau }
1990df750e9SMarc-André Lureau
20060ccdca4SDavid Hildenbrand /* Search for a memory region that covers this guest physical address. */
20160ccdca4SDavid Hildenbrand static VuDevRegion *
vu_gpa_to_mem_region(VuDev * dev,uint64_t guest_addr)20260ccdca4SDavid Hildenbrand vu_gpa_to_mem_region(VuDev *dev, uint64_t guest_addr)
20360ccdca4SDavid Hildenbrand {
204a3c0118cSDavid Hildenbrand int low = 0;
205a3c0118cSDavid Hildenbrand int high = dev->nregions - 1;
20660ccdca4SDavid Hildenbrand
20760ccdca4SDavid Hildenbrand /*
20860ccdca4SDavid Hildenbrand * Memory regions cannot overlap in guest physical address space. Each
20960ccdca4SDavid Hildenbrand * GPA belongs to exactly one memory region, so there can only be one
21060ccdca4SDavid Hildenbrand * match.
211a3c0118cSDavid Hildenbrand *
212a3c0118cSDavid Hildenbrand * We store our memory regions ordered by GPA and can simply perform a
213a3c0118cSDavid Hildenbrand * binary search.
21460ccdca4SDavid Hildenbrand */
215a3c0118cSDavid Hildenbrand while (low <= high) {
216a3c0118cSDavid Hildenbrand unsigned int mid = low + (high - low) / 2;
217a3c0118cSDavid Hildenbrand VuDevRegion *cur = &dev->regions[mid];
21860ccdca4SDavid Hildenbrand
21960ccdca4SDavid Hildenbrand if (guest_addr >= cur->gpa && guest_addr < cur->gpa + cur->size) {
22060ccdca4SDavid Hildenbrand return cur;
22160ccdca4SDavid Hildenbrand }
222a3c0118cSDavid Hildenbrand if (guest_addr >= cur->gpa + cur->size) {
223a3c0118cSDavid Hildenbrand low = mid + 1;
224a3c0118cSDavid Hildenbrand }
225a3c0118cSDavid Hildenbrand if (guest_addr < cur->gpa) {
226a3c0118cSDavid Hildenbrand high = mid - 1;
227a3c0118cSDavid Hildenbrand }
22860ccdca4SDavid Hildenbrand }
22960ccdca4SDavid Hildenbrand return NULL;
23060ccdca4SDavid Hildenbrand }
23160ccdca4SDavid Hildenbrand
2320df750e9SMarc-André Lureau /* Translate guest physical address to our virtual address. */
2330df750e9SMarc-André Lureau void *
vu_gpa_to_va(VuDev * dev,uint64_t * plen,uint64_t guest_addr)2340df750e9SMarc-André Lureau vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr)
2350df750e9SMarc-André Lureau {
23660ccdca4SDavid Hildenbrand VuDevRegion *r;
2370df750e9SMarc-André Lureau
2380df750e9SMarc-André Lureau if (*plen == 0) {
2390df750e9SMarc-André Lureau return NULL;
2400df750e9SMarc-André Lureau }
2410df750e9SMarc-André Lureau
24260ccdca4SDavid Hildenbrand r = vu_gpa_to_mem_region(dev, guest_addr);
24360ccdca4SDavid Hildenbrand if (!r) {
24460ccdca4SDavid Hildenbrand return NULL;
24560ccdca4SDavid Hildenbrand }
2460df750e9SMarc-André Lureau
2470df750e9SMarc-André Lureau if ((guest_addr + *plen) > (r->gpa + r->size)) {
2480df750e9SMarc-André Lureau *plen = r->gpa + r->size - guest_addr;
2490df750e9SMarc-André Lureau }
25060ccdca4SDavid Hildenbrand return (void *)(uintptr_t)guest_addr - r->gpa + r->mmap_addr +
25160ccdca4SDavid Hildenbrand r->mmap_offset;
2520df750e9SMarc-André Lureau }
2530df750e9SMarc-André Lureau
2540df750e9SMarc-André Lureau /* Translate qemu virtual address to our virtual address. */
2550df750e9SMarc-André Lureau static void *
qva_to_va(VuDev * dev,uint64_t qemu_addr)2560df750e9SMarc-André Lureau qva_to_va(VuDev *dev, uint64_t qemu_addr)
2570df750e9SMarc-André Lureau {
25892bf2461SMarcel Holtmann unsigned int i;
2590df750e9SMarc-André Lureau
2600df750e9SMarc-André Lureau /* Find matching memory region. */
2610df750e9SMarc-André Lureau for (i = 0; i < dev->nregions; i++) {
2620df750e9SMarc-André Lureau VuDevRegion *r = &dev->regions[i];
2630df750e9SMarc-André Lureau
2640df750e9SMarc-André Lureau if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
2650df750e9SMarc-André Lureau return (void *)(uintptr_t)
2660df750e9SMarc-André Lureau qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
2670df750e9SMarc-André Lureau }
2680df750e9SMarc-André Lureau }
2690df750e9SMarc-André Lureau
2700df750e9SMarc-André Lureau return NULL;
2710df750e9SMarc-André Lureau }
2720df750e9SMarc-André Lureau
2730df750e9SMarc-André Lureau static void
vu_remove_all_mem_regs(VuDev * dev)274bec58209SDavid Hildenbrand vu_remove_all_mem_regs(VuDev *dev)
275bec58209SDavid Hildenbrand {
276bec58209SDavid Hildenbrand unsigned int i;
277bec58209SDavid Hildenbrand
278bec58209SDavid Hildenbrand for (i = 0; i < dev->nregions; i++) {
279bec58209SDavid Hildenbrand VuDevRegion *r = &dev->regions[i];
280bec58209SDavid Hildenbrand
2814f865c3bSDavid Hildenbrand munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
282bec58209SDavid Hildenbrand }
283bec58209SDavid Hildenbrand dev->nregions = 0;
284bec58209SDavid Hildenbrand }
285bec58209SDavid Hildenbrand
2862a290227SDavid Hildenbrand static bool
map_ring(VuDev * dev,VuVirtq * vq)28767f4f663SDavid Hildenbrand map_ring(VuDev *dev, VuVirtq *vq)
28867f4f663SDavid Hildenbrand {
28967f4f663SDavid Hildenbrand vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr);
29067f4f663SDavid Hildenbrand vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr);
29167f4f663SDavid Hildenbrand vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr);
29267f4f663SDavid Hildenbrand
29367f4f663SDavid Hildenbrand DPRINT("Setting virtq addresses:\n");
29467f4f663SDavid Hildenbrand DPRINT(" vring_desc at %p\n", vq->vring.desc);
29567f4f663SDavid Hildenbrand DPRINT(" vring_used at %p\n", vq->vring.used);
29667f4f663SDavid Hildenbrand DPRINT(" vring_avail at %p\n", vq->vring.avail);
29767f4f663SDavid Hildenbrand
29867f4f663SDavid Hildenbrand return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
29967f4f663SDavid Hildenbrand }
30067f4f663SDavid Hildenbrand
30167f4f663SDavid Hildenbrand static bool
vu_is_vq_usable(VuDev * dev,VuVirtq * vq)3022a290227SDavid Hildenbrand vu_is_vq_usable(VuDev *dev, VuVirtq *vq)
3032a290227SDavid Hildenbrand {
30467f4f663SDavid Hildenbrand if (unlikely(dev->broken)) {
30567f4f663SDavid Hildenbrand return false;
30667f4f663SDavid Hildenbrand }
30767f4f663SDavid Hildenbrand
30867f4f663SDavid Hildenbrand if (likely(vq->vring.avail)) {
30967f4f663SDavid Hildenbrand return true;
31067f4f663SDavid Hildenbrand }
31167f4f663SDavid Hildenbrand
31267f4f663SDavid Hildenbrand /*
31367f4f663SDavid Hildenbrand * In corner cases, we might temporarily remove a memory region that
31467f4f663SDavid Hildenbrand * mapped a ring. When removing a memory region we make sure to
31567f4f663SDavid Hildenbrand * unmap any rings that would be impacted. Let's try to remap if we
31667f4f663SDavid Hildenbrand * already succeeded mapping this ring once.
31767f4f663SDavid Hildenbrand */
31867f4f663SDavid Hildenbrand if (!vq->vra.desc_user_addr || !vq->vra.used_user_addr ||
31967f4f663SDavid Hildenbrand !vq->vra.avail_user_addr) {
32067f4f663SDavid Hildenbrand return false;
32167f4f663SDavid Hildenbrand }
32267f4f663SDavid Hildenbrand if (map_ring(dev, vq)) {
32367f4f663SDavid Hildenbrand vu_panic(dev, "remapping queue on access");
32467f4f663SDavid Hildenbrand return false;
32567f4f663SDavid Hildenbrand }
32667f4f663SDavid Hildenbrand return true;
32767f4f663SDavid Hildenbrand }
32867f4f663SDavid Hildenbrand
32967f4f663SDavid Hildenbrand static void
unmap_rings(VuDev * dev,VuDevRegion * r)33067f4f663SDavid Hildenbrand unmap_rings(VuDev *dev, VuDevRegion *r)
33167f4f663SDavid Hildenbrand {
33267f4f663SDavid Hildenbrand int i;
33367f4f663SDavid Hildenbrand
33467f4f663SDavid Hildenbrand for (i = 0; i < dev->max_queues; i++) {
33567f4f663SDavid Hildenbrand VuVirtq *vq = &dev->vq[i];
33667f4f663SDavid Hildenbrand const uintptr_t desc = (uintptr_t)vq->vring.desc;
33767f4f663SDavid Hildenbrand const uintptr_t used = (uintptr_t)vq->vring.used;
33867f4f663SDavid Hildenbrand const uintptr_t avail = (uintptr_t)vq->vring.avail;
33967f4f663SDavid Hildenbrand
34067f4f663SDavid Hildenbrand if (desc < r->mmap_addr || desc >= r->mmap_addr + r->size) {
34167f4f663SDavid Hildenbrand continue;
34267f4f663SDavid Hildenbrand }
34367f4f663SDavid Hildenbrand if (used < r->mmap_addr || used >= r->mmap_addr + r->size) {
34467f4f663SDavid Hildenbrand continue;
34567f4f663SDavid Hildenbrand }
34667f4f663SDavid Hildenbrand if (avail < r->mmap_addr || avail >= r->mmap_addr + r->size) {
34767f4f663SDavid Hildenbrand continue;
34867f4f663SDavid Hildenbrand }
34967f4f663SDavid Hildenbrand
35067f4f663SDavid Hildenbrand DPRINT("Unmapping rings of queue %d\n", i);
35167f4f663SDavid Hildenbrand vq->vring.desc = NULL;
35267f4f663SDavid Hildenbrand vq->vring.used = NULL;
35367f4f663SDavid Hildenbrand vq->vring.avail = NULL;
35467f4f663SDavid Hildenbrand }
3552a290227SDavid Hildenbrand }
3562a290227SDavid Hildenbrand
357b2b63008SDavid Hildenbrand static size_t
get_fd_hugepagesize(int fd)358b2b63008SDavid Hildenbrand get_fd_hugepagesize(int fd)
359b2b63008SDavid Hildenbrand {
360b2b63008SDavid Hildenbrand #if defined(__linux__)
361b2b63008SDavid Hildenbrand struct statfs fs;
362b2b63008SDavid Hildenbrand int ret;
363b2b63008SDavid Hildenbrand
364b2b63008SDavid Hildenbrand do {
365b2b63008SDavid Hildenbrand ret = fstatfs(fd, &fs);
366b2b63008SDavid Hildenbrand } while (ret != 0 && errno == EINTR);
367b2b63008SDavid Hildenbrand
368b2b63008SDavid Hildenbrand if (!ret && (unsigned int)fs.f_type == HUGETLBFS_MAGIC) {
369b2b63008SDavid Hildenbrand return fs.f_bsize;
370b2b63008SDavid Hildenbrand }
371b2b63008SDavid Hildenbrand #endif
372b2b63008SDavid Hildenbrand return 0;
373b2b63008SDavid Hildenbrand }
374b2b63008SDavid Hildenbrand
375bec58209SDavid Hildenbrand static void
_vu_add_mem_reg(VuDev * dev,VhostUserMemoryRegion * msg_region,int fd)37693fec23dSDavid Hildenbrand _vu_add_mem_reg(VuDev *dev, VhostUserMemoryRegion *msg_region, int fd)
37793fec23dSDavid Hildenbrand {
378a3c0118cSDavid Hildenbrand const uint64_t start_gpa = msg_region->guest_phys_addr;
379a3c0118cSDavid Hildenbrand const uint64_t end_gpa = start_gpa + msg_region->memory_size;
38093fec23dSDavid Hildenbrand int prot = PROT_READ | PROT_WRITE;
381b2b63008SDavid Hildenbrand uint64_t mmap_offset, fd_offset;
382b2b63008SDavid Hildenbrand size_t hugepagesize;
38393fec23dSDavid Hildenbrand VuDevRegion *r;
38493fec23dSDavid Hildenbrand void *mmap_addr;
385a3c0118cSDavid Hildenbrand int low = 0;
386a3c0118cSDavid Hildenbrand int high = dev->nregions - 1;
387a3c0118cSDavid Hildenbrand unsigned int idx;
38893fec23dSDavid Hildenbrand
38993fec23dSDavid Hildenbrand DPRINT("Adding region %d\n", dev->nregions);
39093fec23dSDavid Hildenbrand DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
39193fec23dSDavid Hildenbrand msg_region->guest_phys_addr);
39293fec23dSDavid Hildenbrand DPRINT(" memory_size: 0x%016"PRIx64"\n",
39393fec23dSDavid Hildenbrand msg_region->memory_size);
39493fec23dSDavid Hildenbrand DPRINT(" userspace_addr: 0x%016"PRIx64"\n",
39593fec23dSDavid Hildenbrand msg_region->userspace_addr);
396b2b63008SDavid Hildenbrand DPRINT(" old mmap_offset: 0x%016"PRIx64"\n",
39793fec23dSDavid Hildenbrand msg_region->mmap_offset);
39893fec23dSDavid Hildenbrand
39993fec23dSDavid Hildenbrand if (dev->postcopy_listening) {
40093fec23dSDavid Hildenbrand /*
40193fec23dSDavid Hildenbrand * In postcopy we're using PROT_NONE here to catch anyone
40293fec23dSDavid Hildenbrand * accessing it before we userfault
40393fec23dSDavid Hildenbrand */
40493fec23dSDavid Hildenbrand prot = PROT_NONE;
40593fec23dSDavid Hildenbrand }
40693fec23dSDavid Hildenbrand
40793fec23dSDavid Hildenbrand /*
408a3c0118cSDavid Hildenbrand * We will add memory regions into the array sorted by GPA. Perform a
409a3c0118cSDavid Hildenbrand * binary search to locate the insertion point: it will be at the low
410a3c0118cSDavid Hildenbrand * index.
411a3c0118cSDavid Hildenbrand */
412a3c0118cSDavid Hildenbrand while (low <= high) {
413a3c0118cSDavid Hildenbrand unsigned int mid = low + (high - low) / 2;
414a3c0118cSDavid Hildenbrand VuDevRegion *cur = &dev->regions[mid];
415a3c0118cSDavid Hildenbrand
416a3c0118cSDavid Hildenbrand /* Overlap of GPA addresses. */
417a3c0118cSDavid Hildenbrand if (start_gpa < cur->gpa + cur->size && cur->gpa < end_gpa) {
418a3c0118cSDavid Hildenbrand vu_panic(dev, "regions with overlapping guest physical addresses");
419a3c0118cSDavid Hildenbrand return;
420a3c0118cSDavid Hildenbrand }
421a3c0118cSDavid Hildenbrand if (start_gpa >= cur->gpa + cur->size) {
422a3c0118cSDavid Hildenbrand low = mid + 1;
423a3c0118cSDavid Hildenbrand }
424a3c0118cSDavid Hildenbrand if (start_gpa < cur->gpa) {
425a3c0118cSDavid Hildenbrand high = mid - 1;
426a3c0118cSDavid Hildenbrand }
427a3c0118cSDavid Hildenbrand }
428a3c0118cSDavid Hildenbrand idx = low;
429a3c0118cSDavid Hildenbrand
430a3c0118cSDavid Hildenbrand /*
431b2b63008SDavid Hildenbrand * Convert most of msg_region->mmap_offset to fd_offset. In almost all
432b2b63008SDavid Hildenbrand * cases, this will leave us with mmap_offset == 0, mmap()'ing only
433b2b63008SDavid Hildenbrand * what we really need. Only if a memory region would partially cover
434b2b63008SDavid Hildenbrand * hugetlb pages, we'd get mmap_offset != 0, which usually doesn't happen
435b2b63008SDavid Hildenbrand * anymore (i.e., modern QEMU).
436b2b63008SDavid Hildenbrand *
437b2b63008SDavid Hildenbrand * Note that mmap() with hugetlb would fail if the offset into the file
438b2b63008SDavid Hildenbrand * is not aligned to the huge page size.
43993fec23dSDavid Hildenbrand */
440b2b63008SDavid Hildenbrand hugepagesize = get_fd_hugepagesize(fd);
441b2b63008SDavid Hildenbrand if (hugepagesize) {
442b2b63008SDavid Hildenbrand fd_offset = ALIGN_DOWN(msg_region->mmap_offset, hugepagesize);
443b2b63008SDavid Hildenbrand mmap_offset = msg_region->mmap_offset - fd_offset;
444b2b63008SDavid Hildenbrand } else {
445b2b63008SDavid Hildenbrand fd_offset = msg_region->mmap_offset;
446b2b63008SDavid Hildenbrand mmap_offset = 0;
447b2b63008SDavid Hildenbrand }
448b2b63008SDavid Hildenbrand
449b2b63008SDavid Hildenbrand DPRINT(" fd_offset: 0x%016"PRIx64"\n",
450b2b63008SDavid Hildenbrand fd_offset);
451b2b63008SDavid Hildenbrand DPRINT(" new mmap_offset: 0x%016"PRIx64"\n",
452b2b63008SDavid Hildenbrand mmap_offset);
453b2b63008SDavid Hildenbrand
454b2b63008SDavid Hildenbrand mmap_addr = mmap(0, msg_region->memory_size + mmap_offset,
455b2b63008SDavid Hildenbrand prot, MAP_SHARED | MAP_NORESERVE, fd, fd_offset);
45693fec23dSDavid Hildenbrand if (mmap_addr == MAP_FAILED) {
45793fec23dSDavid Hildenbrand vu_panic(dev, "region mmap error: %s", strerror(errno));
45893fec23dSDavid Hildenbrand return;
45993fec23dSDavid Hildenbrand }
46093fec23dSDavid Hildenbrand DPRINT(" mmap_addr: 0x%016"PRIx64"\n",
46193fec23dSDavid Hildenbrand (uint64_t)(uintptr_t)mmap_addr);
46293fec23dSDavid Hildenbrand
46352767e10SDavid Hildenbrand #if defined(__linux__)
46452767e10SDavid Hildenbrand /* Don't include all guest memory in a coredump. */
46552767e10SDavid Hildenbrand madvise(mmap_addr, msg_region->memory_size + mmap_offset,
46652767e10SDavid Hildenbrand MADV_DONTDUMP);
46752767e10SDavid Hildenbrand #endif
46852767e10SDavid Hildenbrand
469a3c0118cSDavid Hildenbrand /* Shift all affected entries by 1 to open a hole at idx. */
470a3c0118cSDavid Hildenbrand r = &dev->regions[idx];
471a3c0118cSDavid Hildenbrand memmove(r + 1, r, sizeof(VuDevRegion) * (dev->nregions - idx));
47293fec23dSDavid Hildenbrand r->gpa = msg_region->guest_phys_addr;
47393fec23dSDavid Hildenbrand r->size = msg_region->memory_size;
47493fec23dSDavid Hildenbrand r->qva = msg_region->userspace_addr;
47593fec23dSDavid Hildenbrand r->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
476b2b63008SDavid Hildenbrand r->mmap_offset = mmap_offset;
47793fec23dSDavid Hildenbrand dev->nregions++;
47893fec23dSDavid Hildenbrand
47993fec23dSDavid Hildenbrand if (dev->postcopy_listening) {
48093fec23dSDavid Hildenbrand /*
48193fec23dSDavid Hildenbrand * Return the address to QEMU so that it can translate the ufd
48293fec23dSDavid Hildenbrand * fault addresses back.
48393fec23dSDavid Hildenbrand */
48493fec23dSDavid Hildenbrand msg_region->userspace_addr = r->mmap_addr + r->mmap_offset;
48593fec23dSDavid Hildenbrand }
48693fec23dSDavid Hildenbrand }
48793fec23dSDavid Hildenbrand
48893fec23dSDavid Hildenbrand static void
vmsg_close_fds(VhostUserMsg * vmsg)4890df750e9SMarc-André Lureau vmsg_close_fds(VhostUserMsg *vmsg)
4900df750e9SMarc-André Lureau {
4910df750e9SMarc-André Lureau int i;
4920df750e9SMarc-André Lureau
4930df750e9SMarc-André Lureau for (i = 0; i < vmsg->fd_num; i++) {
4940df750e9SMarc-André Lureau close(vmsg->fds[i]);
4950df750e9SMarc-André Lureau }
4960df750e9SMarc-André Lureau }
4970df750e9SMarc-André Lureau
4980df750e9SMarc-André Lureau /* Set reply payload.u64 and clear request flags and fd_num */
vmsg_set_reply_u64(VhostUserMsg * vmsg,uint64_t val)4990df750e9SMarc-André Lureau static void vmsg_set_reply_u64(VhostUserMsg *vmsg, uint64_t val)
5000df750e9SMarc-André Lureau {
5010df750e9SMarc-André Lureau vmsg->flags = 0; /* defaults will be set by vu_send_reply() */
5020df750e9SMarc-André Lureau vmsg->size = sizeof(vmsg->payload.u64);
5030df750e9SMarc-André Lureau vmsg->payload.u64 = val;
5040df750e9SMarc-André Lureau vmsg->fd_num = 0;
5050df750e9SMarc-André Lureau }
5060df750e9SMarc-André Lureau
5070df750e9SMarc-André Lureau /* A test to see if we have userfault available */
5080df750e9SMarc-André Lureau static bool
have_userfault(void)5090df750e9SMarc-André Lureau have_userfault(void)
5100df750e9SMarc-André Lureau {
5110df750e9SMarc-André Lureau #if defined(__linux__) && defined(__NR_userfaultfd) &&\
5120df750e9SMarc-André Lureau defined(UFFD_FEATURE_MISSING_SHMEM) &&\
5130df750e9SMarc-André Lureau defined(UFFD_FEATURE_MISSING_HUGETLBFS)
5140df750e9SMarc-André Lureau /* Now test the kernel we're running on really has the features */
5150df750e9SMarc-André Lureau int ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
5160df750e9SMarc-André Lureau struct uffdio_api api_struct;
5170df750e9SMarc-André Lureau if (ufd < 0) {
5180df750e9SMarc-André Lureau return false;
5190df750e9SMarc-André Lureau }
5200df750e9SMarc-André Lureau
5210df750e9SMarc-André Lureau api_struct.api = UFFD_API;
5220df750e9SMarc-André Lureau api_struct.features = UFFD_FEATURE_MISSING_SHMEM |
5230df750e9SMarc-André Lureau UFFD_FEATURE_MISSING_HUGETLBFS;
5240df750e9SMarc-André Lureau if (ioctl(ufd, UFFDIO_API, &api_struct)) {
5250df750e9SMarc-André Lureau close(ufd);
5260df750e9SMarc-André Lureau return false;
5270df750e9SMarc-André Lureau }
5280df750e9SMarc-André Lureau close(ufd);
5290df750e9SMarc-André Lureau return true;
5300df750e9SMarc-André Lureau
5310df750e9SMarc-André Lureau #else
5320df750e9SMarc-André Lureau return false;
5330df750e9SMarc-André Lureau #endif
5340df750e9SMarc-André Lureau }
5350df750e9SMarc-André Lureau
5360df750e9SMarc-André Lureau static bool
vu_message_read_default(VuDev * dev,int conn_fd,VhostUserMsg * vmsg)5370df750e9SMarc-André Lureau vu_message_read_default(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
5380df750e9SMarc-André Lureau {
5390df750e9SMarc-André Lureau char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {};
5400df750e9SMarc-André Lureau struct iovec iov = {
5410df750e9SMarc-André Lureau .iov_base = (char *)vmsg,
5420df750e9SMarc-André Lureau .iov_len = VHOST_USER_HDR_SIZE,
5430df750e9SMarc-André Lureau };
5440df750e9SMarc-André Lureau struct msghdr msg = {
5450df750e9SMarc-André Lureau .msg_iov = &iov,
5460df750e9SMarc-André Lureau .msg_iovlen = 1,
5470df750e9SMarc-André Lureau .msg_control = control,
5480df750e9SMarc-André Lureau .msg_controllen = sizeof(control),
5490df750e9SMarc-André Lureau };
5500df750e9SMarc-André Lureau size_t fd_size;
5510df750e9SMarc-André Lureau struct cmsghdr *cmsg;
5520df750e9SMarc-André Lureau int rc;
5530df750e9SMarc-André Lureau
5540df750e9SMarc-André Lureau do {
5550df750e9SMarc-André Lureau rc = recvmsg(conn_fd, &msg, 0);
5560df750e9SMarc-André Lureau } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
5570df750e9SMarc-André Lureau
5580df750e9SMarc-André Lureau if (rc < 0) {
5590df750e9SMarc-André Lureau vu_panic(dev, "Error while recvmsg: %s", strerror(errno));
5600df750e9SMarc-André Lureau return false;
5610df750e9SMarc-André Lureau }
5620df750e9SMarc-André Lureau
5630df750e9SMarc-André Lureau vmsg->fd_num = 0;
5640df750e9SMarc-André Lureau for (cmsg = CMSG_FIRSTHDR(&msg);
5650df750e9SMarc-André Lureau cmsg != NULL;
5660df750e9SMarc-André Lureau cmsg = CMSG_NXTHDR(&msg, cmsg))
5670df750e9SMarc-André Lureau {
5680df750e9SMarc-André Lureau if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
5690df750e9SMarc-André Lureau fd_size = cmsg->cmsg_len - CMSG_LEN(0);
5700df750e9SMarc-André Lureau vmsg->fd_num = fd_size / sizeof(int);
571a6f4d2ecSVladimir Sementsov-Ogievskiy assert(fd_size < VHOST_MEMORY_BASELINE_NREGIONS);
5720df750e9SMarc-André Lureau memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
5730df750e9SMarc-André Lureau break;
5740df750e9SMarc-André Lureau }
5750df750e9SMarc-André Lureau }
5760df750e9SMarc-André Lureau
5770df750e9SMarc-André Lureau if (vmsg->size > sizeof(vmsg->payload)) {
5780df750e9SMarc-André Lureau vu_panic(dev,
5790df750e9SMarc-André Lureau "Error: too big message request: %d, size: vmsg->size: %u, "
5800df750e9SMarc-André Lureau "while sizeof(vmsg->payload) = %zu\n",
5810df750e9SMarc-André Lureau vmsg->request, vmsg->size, sizeof(vmsg->payload));
5820df750e9SMarc-André Lureau goto fail;
5830df750e9SMarc-André Lureau }
5840df750e9SMarc-André Lureau
5850df750e9SMarc-André Lureau if (vmsg->size) {
5860df750e9SMarc-André Lureau do {
5870df750e9SMarc-André Lureau rc = read(conn_fd, &vmsg->payload, vmsg->size);
5880df750e9SMarc-André Lureau } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
5890df750e9SMarc-André Lureau
5900df750e9SMarc-André Lureau if (rc <= 0) {
5910df750e9SMarc-André Lureau vu_panic(dev, "Error while reading: %s", strerror(errno));
5920df750e9SMarc-André Lureau goto fail;
5930df750e9SMarc-André Lureau }
5940df750e9SMarc-André Lureau
59518fa7f1eSMarcel Holtmann assert((uint32_t)rc == vmsg->size);
5960df750e9SMarc-André Lureau }
5970df750e9SMarc-André Lureau
5980df750e9SMarc-André Lureau return true;
5990df750e9SMarc-André Lureau
6000df750e9SMarc-André Lureau fail:
6010df750e9SMarc-André Lureau vmsg_close_fds(vmsg);
6020df750e9SMarc-André Lureau
6030df750e9SMarc-André Lureau return false;
6040df750e9SMarc-André Lureau }
6050df750e9SMarc-André Lureau
6060df750e9SMarc-André Lureau static bool
vu_message_write(VuDev * dev,int conn_fd,VhostUserMsg * vmsg)6070df750e9SMarc-André Lureau vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
6080df750e9SMarc-André Lureau {
6090df750e9SMarc-André Lureau int rc;
6100df750e9SMarc-André Lureau uint8_t *p = (uint8_t *)vmsg;
6110df750e9SMarc-André Lureau char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {};
6120df750e9SMarc-André Lureau struct iovec iov = {
6130df750e9SMarc-André Lureau .iov_base = (char *)vmsg,
6140df750e9SMarc-André Lureau .iov_len = VHOST_USER_HDR_SIZE,
6150df750e9SMarc-André Lureau };
6160df750e9SMarc-André Lureau struct msghdr msg = {
6170df750e9SMarc-André Lureau .msg_iov = &iov,
6180df750e9SMarc-André Lureau .msg_iovlen = 1,
6190df750e9SMarc-André Lureau .msg_control = control,
6200df750e9SMarc-André Lureau };
6210df750e9SMarc-André Lureau struct cmsghdr *cmsg;
6220df750e9SMarc-André Lureau
6230df750e9SMarc-André Lureau memset(control, 0, sizeof(control));
6240df750e9SMarc-André Lureau assert(vmsg->fd_num <= VHOST_MEMORY_BASELINE_NREGIONS);
6250df750e9SMarc-André Lureau if (vmsg->fd_num > 0) {
6260df750e9SMarc-André Lureau size_t fdsize = vmsg->fd_num * sizeof(int);
6270df750e9SMarc-André Lureau msg.msg_controllen = CMSG_SPACE(fdsize);
6280df750e9SMarc-André Lureau cmsg = CMSG_FIRSTHDR(&msg);
6290df750e9SMarc-André Lureau cmsg->cmsg_len = CMSG_LEN(fdsize);
6300df750e9SMarc-André Lureau cmsg->cmsg_level = SOL_SOCKET;
6310df750e9SMarc-André Lureau cmsg->cmsg_type = SCM_RIGHTS;
6320df750e9SMarc-André Lureau memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize);
6330df750e9SMarc-André Lureau } else {
6340df750e9SMarc-André Lureau msg.msg_controllen = 0;
6350df750e9SMarc-André Lureau }
6360df750e9SMarc-André Lureau
6370df750e9SMarc-André Lureau do {
6380df750e9SMarc-André Lureau rc = sendmsg(conn_fd, &msg, 0);
6390df750e9SMarc-André Lureau } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
6400df750e9SMarc-André Lureau
6410df750e9SMarc-André Lureau if (vmsg->size) {
6420df750e9SMarc-André Lureau do {
6430df750e9SMarc-André Lureau if (vmsg->data) {
6440df750e9SMarc-André Lureau rc = write(conn_fd, vmsg->data, vmsg->size);
6450df750e9SMarc-André Lureau } else {
6460df750e9SMarc-André Lureau rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
6470df750e9SMarc-André Lureau }
6480df750e9SMarc-André Lureau } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
6490df750e9SMarc-André Lureau }
6500df750e9SMarc-André Lureau
6510df750e9SMarc-André Lureau if (rc <= 0) {
6520df750e9SMarc-André Lureau vu_panic(dev, "Error while writing: %s", strerror(errno));
6530df750e9SMarc-André Lureau return false;
6540df750e9SMarc-André Lureau }
6550df750e9SMarc-André Lureau
6560df750e9SMarc-André Lureau return true;
6570df750e9SMarc-André Lureau }
6580df750e9SMarc-André Lureau
6590df750e9SMarc-André Lureau static bool
vu_send_reply(VuDev * dev,int conn_fd,VhostUserMsg * vmsg)6600df750e9SMarc-André Lureau vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
6610df750e9SMarc-André Lureau {
6620df750e9SMarc-André Lureau /* Set the version in the flags when sending the reply */
6630df750e9SMarc-André Lureau vmsg->flags &= ~VHOST_USER_VERSION_MASK;
6640df750e9SMarc-André Lureau vmsg->flags |= VHOST_USER_VERSION;
6650df750e9SMarc-André Lureau vmsg->flags |= VHOST_USER_REPLY_MASK;
6660df750e9SMarc-André Lureau
6670df750e9SMarc-André Lureau return vu_message_write(dev, conn_fd, vmsg);
6680df750e9SMarc-André Lureau }
6690df750e9SMarc-André Lureau
6700df750e9SMarc-André Lureau /*
671f8ed3648SManos Pitsidianakis * Processes a reply on the backend channel.
672f8ed3648SManos Pitsidianakis * Entered with backend_mutex held and releases it before exit.
6730df750e9SMarc-André Lureau * Returns true on success.
6740df750e9SMarc-André Lureau */
6750df750e9SMarc-André Lureau static bool
vu_process_message_reply(VuDev * dev,const VhostUserMsg * vmsg)6760df750e9SMarc-André Lureau vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
6770df750e9SMarc-André Lureau {
6780df750e9SMarc-André Lureau VhostUserMsg msg_reply;
6790df750e9SMarc-André Lureau bool result = false;
6800df750e9SMarc-André Lureau
6810df750e9SMarc-André Lureau if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
6820df750e9SMarc-André Lureau result = true;
6830df750e9SMarc-André Lureau goto out;
6840df750e9SMarc-André Lureau }
6850df750e9SMarc-André Lureau
686f8ed3648SManos Pitsidianakis if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) {
6870df750e9SMarc-André Lureau goto out;
6880df750e9SMarc-André Lureau }
6890df750e9SMarc-André Lureau
6900df750e9SMarc-André Lureau if (msg_reply.request != vmsg->request) {
6910df750e9SMarc-André Lureau DPRINT("Received unexpected msg type. Expected %d received %d",
6920df750e9SMarc-André Lureau vmsg->request, msg_reply.request);
6930df750e9SMarc-André Lureau goto out;
6940df750e9SMarc-André Lureau }
6950df750e9SMarc-André Lureau
6960df750e9SMarc-André Lureau result = msg_reply.payload.u64 == 0;
6970df750e9SMarc-André Lureau
6980df750e9SMarc-André Lureau out:
699f8ed3648SManos Pitsidianakis pthread_mutex_unlock(&dev->backend_mutex);
7000df750e9SMarc-André Lureau return result;
7010df750e9SMarc-André Lureau }
7020df750e9SMarc-André Lureau
7030df750e9SMarc-André Lureau /* Kick the log_call_fd if required. */
7040df750e9SMarc-André Lureau static void
vu_log_kick(VuDev * dev)7050df750e9SMarc-André Lureau vu_log_kick(VuDev *dev)
7060df750e9SMarc-André Lureau {
7070df750e9SMarc-André Lureau if (dev->log_call_fd != -1) {
7080df750e9SMarc-André Lureau DPRINT("Kicking the QEMU's log...\n");
7090df750e9SMarc-André Lureau if (eventfd_write(dev->log_call_fd, 1) < 0) {
7100df750e9SMarc-André Lureau vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
7110df750e9SMarc-André Lureau }
7120df750e9SMarc-André Lureau }
7130df750e9SMarc-André Lureau }
7140df750e9SMarc-André Lureau
7150df750e9SMarc-André Lureau static void
vu_log_page(uint8_t * log_table,uint64_t page)7160df750e9SMarc-André Lureau vu_log_page(uint8_t *log_table, uint64_t page)
7170df750e9SMarc-André Lureau {
7180df750e9SMarc-André Lureau DPRINT("Logged dirty guest page: %"PRId64"\n", page);
7190df750e9SMarc-André Lureau qatomic_or(&log_table[page / 8], 1 << (page % 8));
7200df750e9SMarc-André Lureau }
7210df750e9SMarc-André Lureau
7220df750e9SMarc-André Lureau static void
vu_log_write(VuDev * dev,uint64_t address,uint64_t length)7230df750e9SMarc-André Lureau vu_log_write(VuDev *dev, uint64_t address, uint64_t length)
7240df750e9SMarc-André Lureau {
7250df750e9SMarc-André Lureau uint64_t page;
7260df750e9SMarc-André Lureau
7270df750e9SMarc-André Lureau if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
7280df750e9SMarc-André Lureau !dev->log_table || !length) {
7290df750e9SMarc-André Lureau return;
7300df750e9SMarc-André Lureau }
7310df750e9SMarc-André Lureau
7320df750e9SMarc-André Lureau assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
7330df750e9SMarc-André Lureau
7340df750e9SMarc-André Lureau page = address / VHOST_LOG_PAGE;
7350df750e9SMarc-André Lureau while (page * VHOST_LOG_PAGE < address + length) {
7360df750e9SMarc-André Lureau vu_log_page(dev->log_table, page);
7370df750e9SMarc-André Lureau page += 1;
7380df750e9SMarc-André Lureau }
7390df750e9SMarc-André Lureau
7400df750e9SMarc-André Lureau vu_log_kick(dev);
7410df750e9SMarc-André Lureau }
7420df750e9SMarc-André Lureau
7430df750e9SMarc-André Lureau static void
vu_kick_cb(VuDev * dev,int condition,void * data)7440df750e9SMarc-André Lureau vu_kick_cb(VuDev *dev, int condition, void *data)
7450df750e9SMarc-André Lureau {
7460df750e9SMarc-André Lureau int index = (intptr_t)data;
7470df750e9SMarc-André Lureau VuVirtq *vq = &dev->vq[index];
7480df750e9SMarc-André Lureau int sock = vq->kick_fd;
7490df750e9SMarc-André Lureau eventfd_t kick_data;
7500df750e9SMarc-André Lureau ssize_t rc;
7510df750e9SMarc-André Lureau
7520df750e9SMarc-André Lureau rc = eventfd_read(sock, &kick_data);
7530df750e9SMarc-André Lureau if (rc == -1) {
7540df750e9SMarc-André Lureau vu_panic(dev, "kick eventfd_read(): %s", strerror(errno));
7550df750e9SMarc-André Lureau dev->remove_watch(dev, dev->vq[index].kick_fd);
7560df750e9SMarc-André Lureau } else {
7570df750e9SMarc-André Lureau DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n",
7580df750e9SMarc-André Lureau kick_data, vq->handler, index);
7590df750e9SMarc-André Lureau if (vq->handler) {
7600df750e9SMarc-André Lureau vq->handler(dev, index);
7610df750e9SMarc-André Lureau }
7620df750e9SMarc-André Lureau }
7630df750e9SMarc-André Lureau }
7640df750e9SMarc-André Lureau
7650df750e9SMarc-André Lureau static bool
vu_get_features_exec(VuDev * dev,VhostUserMsg * vmsg)7660df750e9SMarc-André Lureau vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
7670df750e9SMarc-André Lureau {
7680df750e9SMarc-André Lureau vmsg->payload.u64 =
7690df750e9SMarc-André Lureau /*
7700df750e9SMarc-André Lureau * The following VIRTIO feature bits are supported by our virtqueue
7710df750e9SMarc-André Lureau * implementation:
7720df750e9SMarc-André Lureau */
7730df750e9SMarc-André Lureau 1ULL << VIRTIO_F_NOTIFY_ON_EMPTY |
7740df750e9SMarc-André Lureau 1ULL << VIRTIO_RING_F_INDIRECT_DESC |
7750df750e9SMarc-André Lureau 1ULL << VIRTIO_RING_F_EVENT_IDX |
7760df750e9SMarc-André Lureau 1ULL << VIRTIO_F_VERSION_1 |
7770df750e9SMarc-André Lureau
7780df750e9SMarc-André Lureau /* vhost-user feature bits */
7790df750e9SMarc-André Lureau 1ULL << VHOST_F_LOG_ALL |
7800df750e9SMarc-André Lureau 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
7810df750e9SMarc-André Lureau
7820df750e9SMarc-André Lureau if (dev->iface->get_features) {
7830df750e9SMarc-André Lureau vmsg->payload.u64 |= dev->iface->get_features(dev);
7840df750e9SMarc-André Lureau }
7850df750e9SMarc-André Lureau
7860df750e9SMarc-André Lureau vmsg->size = sizeof(vmsg->payload.u64);
7870df750e9SMarc-André Lureau vmsg->fd_num = 0;
7880df750e9SMarc-André Lureau
7890df750e9SMarc-André Lureau DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
7900df750e9SMarc-André Lureau
7910df750e9SMarc-André Lureau return true;
7920df750e9SMarc-André Lureau }
7930df750e9SMarc-André Lureau
7940df750e9SMarc-André Lureau static void
vu_set_enable_all_rings(VuDev * dev,bool enabled)7950df750e9SMarc-André Lureau vu_set_enable_all_rings(VuDev *dev, bool enabled)
7960df750e9SMarc-André Lureau {
7970df750e9SMarc-André Lureau uint16_t i;
7980df750e9SMarc-André Lureau
7990df750e9SMarc-André Lureau for (i = 0; i < dev->max_queues; i++) {
8000df750e9SMarc-André Lureau dev->vq[i].enable = enabled;
8010df750e9SMarc-André Lureau }
8020df750e9SMarc-André Lureau }
8030df750e9SMarc-André Lureau
8040df750e9SMarc-André Lureau static bool
vu_set_features_exec(VuDev * dev,VhostUserMsg * vmsg)8050df750e9SMarc-André Lureau vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg)
8060df750e9SMarc-André Lureau {
8070df750e9SMarc-André Lureau DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
8080df750e9SMarc-André Lureau
8090df750e9SMarc-André Lureau dev->features = vmsg->payload.u64;
8100df750e9SMarc-André Lureau if (!vu_has_feature(dev, VIRTIO_F_VERSION_1)) {
8110df750e9SMarc-André Lureau /*
8120df750e9SMarc-André Lureau * We only support devices conforming to VIRTIO 1.0 or
8130df750e9SMarc-André Lureau * later
8140df750e9SMarc-André Lureau */
8150df750e9SMarc-André Lureau vu_panic(dev, "virtio legacy devices aren't supported by libvhost-user");
8160df750e9SMarc-André Lureau return false;
8170df750e9SMarc-André Lureau }
8180df750e9SMarc-André Lureau
8190df750e9SMarc-André Lureau if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) {
8200df750e9SMarc-André Lureau vu_set_enable_all_rings(dev, true);
8210df750e9SMarc-André Lureau }
8220df750e9SMarc-André Lureau
8230df750e9SMarc-André Lureau if (dev->iface->set_features) {
8240df750e9SMarc-André Lureau dev->iface->set_features(dev, dev->features);
8250df750e9SMarc-André Lureau }
8260df750e9SMarc-André Lureau
8270df750e9SMarc-André Lureau return false;
8280df750e9SMarc-André Lureau }
8290df750e9SMarc-André Lureau
8300df750e9SMarc-André Lureau static bool
vu_set_owner_exec(VuDev * dev,VhostUserMsg * vmsg)8310df750e9SMarc-André Lureau vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg)
8320df750e9SMarc-André Lureau {
8330df750e9SMarc-André Lureau return false;
8340df750e9SMarc-André Lureau }
8350df750e9SMarc-André Lureau
8360df750e9SMarc-André Lureau static void
vu_close_log(VuDev * dev)8370df750e9SMarc-André Lureau vu_close_log(VuDev *dev)
8380df750e9SMarc-André Lureau {
8390df750e9SMarc-André Lureau if (dev->log_table) {
8400df750e9SMarc-André Lureau if (munmap(dev->log_table, dev->log_size) != 0) {
8410df750e9SMarc-André Lureau perror("close log munmap() error");
8420df750e9SMarc-André Lureau }
8430df750e9SMarc-André Lureau
8440df750e9SMarc-André Lureau dev->log_table = NULL;
8450df750e9SMarc-André Lureau }
8460df750e9SMarc-André Lureau if (dev->log_call_fd != -1) {
8470df750e9SMarc-André Lureau close(dev->log_call_fd);
8480df750e9SMarc-André Lureau dev->log_call_fd = -1;
8490df750e9SMarc-André Lureau }
8500df750e9SMarc-André Lureau }
8510df750e9SMarc-André Lureau
8520df750e9SMarc-André Lureau static bool
vu_reset_device_exec(VuDev * dev,VhostUserMsg * vmsg)8530df750e9SMarc-André Lureau vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
8540df750e9SMarc-André Lureau {
8550df750e9SMarc-André Lureau vu_set_enable_all_rings(dev, false);
8560df750e9SMarc-André Lureau
8570df750e9SMarc-André Lureau return false;
8580df750e9SMarc-André Lureau }
8590df750e9SMarc-André Lureau
8600df750e9SMarc-André Lureau static bool
generate_faults(VuDev * dev)8610df750e9SMarc-André Lureau generate_faults(VuDev *dev) {
86292bf2461SMarcel Holtmann unsigned int i;
8630df750e9SMarc-André Lureau for (i = 0; i < dev->nregions; i++) {
864bb302772SPierre Labatut #ifdef UFFDIO_REGISTER
8650df750e9SMarc-André Lureau VuDevRegion *dev_region = &dev->regions[i];
8660df750e9SMarc-André Lureau int ret;
867d87a6424SMarcel Holtmann struct uffdio_register reg_struct;
868d87a6424SMarcel Holtmann
8690df750e9SMarc-André Lureau /*
8700df750e9SMarc-André Lureau * We should already have an open ufd. Mark each memory
8710df750e9SMarc-André Lureau * range as ufd.
8720df750e9SMarc-André Lureau * Discard any mapping we have here; note I can't use MADV_REMOVE
8730df750e9SMarc-André Lureau * or fallocate to make the hole since I don't want to lose
8740df750e9SMarc-André Lureau * data that's already arrived in the shared process.
8750df750e9SMarc-André Lureau * TODO: How to do hugepage
8760df750e9SMarc-André Lureau */
8770df750e9SMarc-André Lureau ret = madvise((void *)(uintptr_t)dev_region->mmap_addr,
8780df750e9SMarc-André Lureau dev_region->size + dev_region->mmap_offset,
8790df750e9SMarc-André Lureau MADV_DONTNEED);
8800df750e9SMarc-André Lureau if (ret) {
8810df750e9SMarc-André Lureau fprintf(stderr,
8820df750e9SMarc-André Lureau "%s: Failed to madvise(DONTNEED) region %d: %s\n",
8830df750e9SMarc-André Lureau __func__, i, strerror(errno));
8840df750e9SMarc-André Lureau }
8850df750e9SMarc-André Lureau /*
8860df750e9SMarc-André Lureau * Turn off transparent hugepages so we dont get lose wakeups
8870df750e9SMarc-André Lureau * in neighbouring pages.
8880df750e9SMarc-André Lureau * TODO: Turn this backon later.
8890df750e9SMarc-André Lureau */
8900df750e9SMarc-André Lureau ret = madvise((void *)(uintptr_t)dev_region->mmap_addr,
8910df750e9SMarc-André Lureau dev_region->size + dev_region->mmap_offset,
8920df750e9SMarc-André Lureau MADV_NOHUGEPAGE);
8930df750e9SMarc-André Lureau if (ret) {
8940df750e9SMarc-André Lureau /*
8950df750e9SMarc-André Lureau * Note: This can happen legally on kernels that are configured
8960df750e9SMarc-André Lureau * without madvise'able hugepages
8970df750e9SMarc-André Lureau */
8980df750e9SMarc-André Lureau fprintf(stderr,
8990df750e9SMarc-André Lureau "%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n",
9000df750e9SMarc-André Lureau __func__, i, strerror(errno));
9010df750e9SMarc-André Lureau }
902d87a6424SMarcel Holtmann
9030df750e9SMarc-André Lureau reg_struct.range.start = (uintptr_t)dev_region->mmap_addr;
9040df750e9SMarc-André Lureau reg_struct.range.len = dev_region->size + dev_region->mmap_offset;
9050df750e9SMarc-André Lureau reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
9060df750e9SMarc-André Lureau
9070df750e9SMarc-André Lureau if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER, ®_struct)) {
9080df750e9SMarc-André Lureau vu_panic(dev, "%s: Failed to userfault region %d "
9097d4774e6SStefan Weil via "@%" PRIx64 " + size:%" PRIx64 " offset: %" PRIx64
9107d4774e6SStefan Weil via ": (ufd=%d)%s\n",
9110df750e9SMarc-André Lureau __func__, i,
9120df750e9SMarc-André Lureau dev_region->mmap_addr,
9130df750e9SMarc-André Lureau dev_region->size, dev_region->mmap_offset,
9140df750e9SMarc-André Lureau dev->postcopy_ufd, strerror(errno));
9150df750e9SMarc-André Lureau return false;
9160df750e9SMarc-André Lureau }
91773b49878SPaolo Bonzini if (!(reg_struct.ioctls & (1ULL << _UFFDIO_COPY))) {
9180df750e9SMarc-André Lureau vu_panic(dev, "%s Region (%d) doesn't support COPY",
9190df750e9SMarc-André Lureau __func__, i);
9200df750e9SMarc-André Lureau return false;
9210df750e9SMarc-André Lureau }
9220df750e9SMarc-André Lureau DPRINT("%s: region %d: Registered userfault for %"
9230df750e9SMarc-André Lureau PRIx64 " + %" PRIx64 "\n", __func__, i,
9240df750e9SMarc-André Lureau (uint64_t)reg_struct.range.start,
9250df750e9SMarc-André Lureau (uint64_t)reg_struct.range.len);
9260df750e9SMarc-André Lureau /* Now it's registered we can let the client at it */
9270df750e9SMarc-André Lureau if (mprotect((void *)(uintptr_t)dev_region->mmap_addr,
9280df750e9SMarc-André Lureau dev_region->size + dev_region->mmap_offset,
9290df750e9SMarc-André Lureau PROT_READ | PROT_WRITE)) {
9300df750e9SMarc-André Lureau vu_panic(dev, "failed to mprotect region %d for postcopy (%s)",
9310df750e9SMarc-André Lureau i, strerror(errno));
9320df750e9SMarc-André Lureau return false;
9330df750e9SMarc-André Lureau }
9340df750e9SMarc-André Lureau /* TODO: Stash 'zero' support flags somewhere */
9350df750e9SMarc-André Lureau #endif
9360df750e9SMarc-André Lureau }
9370df750e9SMarc-André Lureau
9380df750e9SMarc-André Lureau return true;
9390df750e9SMarc-André Lureau }
9400df750e9SMarc-André Lureau
9410df750e9SMarc-André Lureau static bool
vu_add_mem_reg(VuDev * dev,VhostUserMsg * vmsg)9420df750e9SMarc-André Lureau vu_add_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
9430df750e9SMarc-André Lureau VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m;
9440df750e9SMarc-André Lureau
9459f4e6349SRaphael Norwitz if (vmsg->fd_num != 1) {
9469f4e6349SRaphael Norwitz vmsg_close_fds(vmsg);
9479f4e6349SRaphael Norwitz vu_panic(dev, "VHOST_USER_ADD_MEM_REG received %d fds - only 1 fd "
9489f4e6349SRaphael Norwitz "should be sent for this message type", vmsg->fd_num);
9499f4e6349SRaphael Norwitz return false;
9509f4e6349SRaphael Norwitz }
9519f4e6349SRaphael Norwitz
9529f4e6349SRaphael Norwitz if (vmsg->size < VHOST_USER_MEM_REG_SIZE) {
9539f4e6349SRaphael Norwitz close(vmsg->fds[0]);
9549f4e6349SRaphael Norwitz vu_panic(dev, "VHOST_USER_ADD_MEM_REG requires a message size of at "
9558541bf45SStefan Weil via "least %zu bytes and only %d bytes were received",
9569f4e6349SRaphael Norwitz VHOST_USER_MEM_REG_SIZE, vmsg->size);
9579f4e6349SRaphael Norwitz return false;
9589f4e6349SRaphael Norwitz }
9599f4e6349SRaphael Norwitz
960b906a23cSRaphael Norwitz if (dev->nregions == VHOST_USER_MAX_RAM_SLOTS) {
961b906a23cSRaphael Norwitz close(vmsg->fds[0]);
962b906a23cSRaphael Norwitz vu_panic(dev, "failing attempt to hot add memory via "
963b906a23cSRaphael Norwitz "VHOST_USER_ADD_MEM_REG message because the backend has "
964b906a23cSRaphael Norwitz "no free ram slots available");
965b906a23cSRaphael Norwitz return false;
966b906a23cSRaphael Norwitz }
967b906a23cSRaphael Norwitz
9680df750e9SMarc-André Lureau /*
9690df750e9SMarc-André Lureau * If we are in postcopy mode and we receive a u64 payload with a 0 value
9700df750e9SMarc-André Lureau * we know all the postcopy client bases have been received, and we
9710df750e9SMarc-André Lureau * should start generating faults.
9720df750e9SMarc-André Lureau */
97393fec23dSDavid Hildenbrand if (dev->postcopy_listening &&
9740df750e9SMarc-André Lureau vmsg->size == sizeof(vmsg->payload.u64) &&
9750df750e9SMarc-André Lureau vmsg->payload.u64 == 0) {
9760df750e9SMarc-André Lureau (void)generate_faults(dev);
9770df750e9SMarc-André Lureau return false;
9780df750e9SMarc-André Lureau }
9790df750e9SMarc-André Lureau
98093fec23dSDavid Hildenbrand _vu_add_mem_reg(dev, msg_region, vmsg->fds[0]);
9810df750e9SMarc-André Lureau close(vmsg->fds[0]);
9820df750e9SMarc-André Lureau
98393fec23dSDavid Hildenbrand if (dev->postcopy_listening) {
9840df750e9SMarc-André Lureau /* Send the message back to qemu with the addresses filled in. */
9850df750e9SMarc-André Lureau vmsg->fd_num = 0;
9860df750e9SMarc-André Lureau DPRINT("Successfully added new region in postcopy\n");
9877f27d20dSKevin Wolf return true;
9880df750e9SMarc-André Lureau }
9890df750e9SMarc-André Lureau DPRINT("Successfully added new region\n");
9905ebfdeb2SKevin Wolf return false;
9910df750e9SMarc-André Lureau }
9920df750e9SMarc-André Lureau
reg_equal(VuDevRegion * vudev_reg,VhostUserMemoryRegion * msg_reg)9930df750e9SMarc-André Lureau static inline bool reg_equal(VuDevRegion *vudev_reg,
9940df750e9SMarc-André Lureau VhostUserMemoryRegion *msg_reg)
9950df750e9SMarc-André Lureau {
9960df750e9SMarc-André Lureau if (vudev_reg->gpa == msg_reg->guest_phys_addr &&
9970df750e9SMarc-André Lureau vudev_reg->qva == msg_reg->userspace_addr &&
9980df750e9SMarc-André Lureau vudev_reg->size == msg_reg->memory_size) {
9990df750e9SMarc-André Lureau return true;
10000df750e9SMarc-André Lureau }
10010df750e9SMarc-André Lureau
10020df750e9SMarc-André Lureau return false;
10030df750e9SMarc-André Lureau }
10040df750e9SMarc-André Lureau
10050df750e9SMarc-André Lureau static bool
vu_rem_mem_reg(VuDev * dev,VhostUserMsg * vmsg)10060df750e9SMarc-André Lureau vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
10070df750e9SMarc-André Lureau VhostUserMemoryRegion m = vmsg->payload.memreg.region, *msg_region = &m;
100860ccdca4SDavid Hildenbrand unsigned int idx;
100960ccdca4SDavid Hildenbrand VuDevRegion *r;
10100df750e9SMarc-André Lureau
1011a81d8d4aSKevin Wolf if (vmsg->fd_num > 1) {
1012316ee111SRaphael Norwitz vmsg_close_fds(vmsg);
1013a81d8d4aSKevin Wolf vu_panic(dev, "VHOST_USER_REM_MEM_REG received %d fds - at most 1 fd "
1014316ee111SRaphael Norwitz "should be sent for this message type", vmsg->fd_num);
1015316ee111SRaphael Norwitz return false;
1016316ee111SRaphael Norwitz }
1017316ee111SRaphael Norwitz
1018316ee111SRaphael Norwitz if (vmsg->size < VHOST_USER_MEM_REG_SIZE) {
1019a81d8d4aSKevin Wolf vmsg_close_fds(vmsg);
1020316ee111SRaphael Norwitz vu_panic(dev, "VHOST_USER_REM_MEM_REG requires a message size of at "
10218541bf45SStefan Weil via "least %zu bytes and only %d bytes were received",
1022316ee111SRaphael Norwitz VHOST_USER_MEM_REG_SIZE, vmsg->size);
1023316ee111SRaphael Norwitz return false;
1024316ee111SRaphael Norwitz }
1025316ee111SRaphael Norwitz
10260df750e9SMarc-André Lureau DPRINT("Removing region:\n");
10270df750e9SMarc-André Lureau DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
10280df750e9SMarc-André Lureau msg_region->guest_phys_addr);
10290df750e9SMarc-André Lureau DPRINT(" memory_size: 0x%016"PRIx64"\n",
10300df750e9SMarc-André Lureau msg_region->memory_size);
10310df750e9SMarc-André Lureau DPRINT(" userspace_addr 0x%016"PRIx64"\n",
10320df750e9SMarc-André Lureau msg_region->userspace_addr);
10330df750e9SMarc-André Lureau DPRINT(" mmap_offset 0x%016"PRIx64"\n",
10340df750e9SMarc-André Lureau msg_region->mmap_offset);
10350df750e9SMarc-André Lureau
103660ccdca4SDavid Hildenbrand r = vu_gpa_to_mem_region(dev, msg_region->guest_phys_addr);
103760ccdca4SDavid Hildenbrand if (!r || !reg_equal(r, msg_region)) {
103860ccdca4SDavid Hildenbrand vmsg_close_fds(vmsg);
103960ccdca4SDavid Hildenbrand vu_panic(dev, "Specified region not found\n");
104060ccdca4SDavid Hildenbrand return false;
104160ccdca4SDavid Hildenbrand }
10420df750e9SMarc-André Lureau
104367f4f663SDavid Hildenbrand /*
104467f4f663SDavid Hildenbrand * There might be valid cases where we temporarily remove memory regions
104567f4f663SDavid Hildenbrand * to readd them again, or remove memory regions and don't use the rings
104667f4f663SDavid Hildenbrand * anymore before we set the ring addresses and restart the device.
104767f4f663SDavid Hildenbrand *
104867f4f663SDavid Hildenbrand * Unmap all affected rings, remapping them on demand later. This should
104967f4f663SDavid Hildenbrand * be a corner case.
105067f4f663SDavid Hildenbrand */
105167f4f663SDavid Hildenbrand unmap_rings(dev, r);
105267f4f663SDavid Hildenbrand
10534f865c3bSDavid Hildenbrand munmap((void *)(uintptr_t)r->mmap_addr, r->size + r->mmap_offset);
10544fd5ca82SDavid Hildenbrand
105560ccdca4SDavid Hildenbrand idx = r - dev->regions;
105660ccdca4SDavid Hildenbrand assert(idx < dev->nregions);
105760ccdca4SDavid Hildenbrand /* Shift all affected entries by 1 to close the hole. */
105860ccdca4SDavid Hildenbrand memmove(r, r + 1, sizeof(VuDevRegion) * (dev->nregions - idx - 1));
10590df750e9SMarc-André Lureau DPRINT("Successfully removed a region\n");
10600df750e9SMarc-André Lureau dev->nregions--;
10610df750e9SMarc-André Lureau
1062a81d8d4aSKevin Wolf vmsg_close_fds(vmsg);
1063fa3d5483SDavid Hildenbrand
10645ebfdeb2SKevin Wolf return false;
10650df750e9SMarc-André Lureau }
10660df750e9SMarc-André Lureau
10670df750e9SMarc-André Lureau static bool
vu_get_shared_object(VuDev * dev,VhostUserMsg * vmsg)1068ce0f3b03SAlbert Esteve vu_get_shared_object(VuDev *dev, VhostUserMsg *vmsg)
1069ce0f3b03SAlbert Esteve {
1070ce0f3b03SAlbert Esteve int fd_num = 0;
1071ce0f3b03SAlbert Esteve int dmabuf_fd = -1;
1072ce0f3b03SAlbert Esteve if (dev->iface->get_shared_object) {
1073ce0f3b03SAlbert Esteve dmabuf_fd = dev->iface->get_shared_object(
1074ce0f3b03SAlbert Esteve dev, &vmsg->payload.object.uuid[0]);
1075ce0f3b03SAlbert Esteve }
1076ce0f3b03SAlbert Esteve if (dmabuf_fd != -1) {
1077ce0f3b03SAlbert Esteve DPRINT("dmabuf_fd found for requested UUID\n");
1078ce0f3b03SAlbert Esteve vmsg->fds[fd_num++] = dmabuf_fd;
1079ce0f3b03SAlbert Esteve }
1080ce0f3b03SAlbert Esteve vmsg->fd_num = fd_num;
1081ce0f3b03SAlbert Esteve
1082ce0f3b03SAlbert Esteve return true;
1083ce0f3b03SAlbert Esteve }
1084ce0f3b03SAlbert Esteve
1085ce0f3b03SAlbert Esteve static bool
vu_set_mem_table_exec(VuDev * dev,VhostUserMsg * vmsg)108605a58ce4SDavid Hildenbrand vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
10870df750e9SMarc-André Lureau {
10880df750e9SMarc-André Lureau VhostUserMemory m = vmsg->payload.memory, *memory = &m;
108905a58ce4SDavid Hildenbrand unsigned int i;
10900df750e9SMarc-André Lureau
1091bec58209SDavid Hildenbrand vu_remove_all_mem_regs(dev);
10920df750e9SMarc-André Lureau
10930df750e9SMarc-André Lureau DPRINT("Nregions: %u\n", memory->nregions);
109493fec23dSDavid Hildenbrand for (i = 0; i < memory->nregions; i++) {
109593fec23dSDavid Hildenbrand _vu_add_mem_reg(dev, &memory->regions[i], vmsg->fds[i]);
10960df750e9SMarc-André Lureau close(vmsg->fds[i]);
10970df750e9SMarc-André Lureau }
10980df750e9SMarc-André Lureau
109905a58ce4SDavid Hildenbrand if (dev->postcopy_listening) {
110005a58ce4SDavid Hildenbrand /* Send the message back to qemu with the addresses filled in */
110105a58ce4SDavid Hildenbrand vmsg->fd_num = 0;
110205a58ce4SDavid Hildenbrand if (!vu_send_reply(dev, dev->sock, vmsg)) {
110305a58ce4SDavid Hildenbrand vu_panic(dev, "failed to respond to set-mem-table for postcopy");
110405a58ce4SDavid Hildenbrand return false;
110505a58ce4SDavid Hildenbrand }
110605a58ce4SDavid Hildenbrand
110705a58ce4SDavid Hildenbrand /*
110805a58ce4SDavid Hildenbrand * Wait for QEMU to confirm that it's registered the handler for the
110905a58ce4SDavid Hildenbrand * faults.
111005a58ce4SDavid Hildenbrand */
111105a58ce4SDavid Hildenbrand if (!dev->read_msg(dev, dev->sock, vmsg) ||
111205a58ce4SDavid Hildenbrand vmsg->size != sizeof(vmsg->payload.u64) ||
111305a58ce4SDavid Hildenbrand vmsg->payload.u64 != 0) {
111405a58ce4SDavid Hildenbrand vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table");
111505a58ce4SDavid Hildenbrand return false;
111605a58ce4SDavid Hildenbrand }
111705a58ce4SDavid Hildenbrand
111805a58ce4SDavid Hildenbrand /* OK, now we can go and register the memory and generate faults */
111905a58ce4SDavid Hildenbrand (void)generate_faults(dev);
112005a58ce4SDavid Hildenbrand return false;
112105a58ce4SDavid Hildenbrand }
112205a58ce4SDavid Hildenbrand
11230df750e9SMarc-André Lureau for (i = 0; i < dev->max_queues; i++) {
11240df750e9SMarc-André Lureau if (dev->vq[i].vring.desc) {
11250df750e9SMarc-André Lureau if (map_ring(dev, &dev->vq[i])) {
11260df750e9SMarc-André Lureau vu_panic(dev, "remapping queue %d during setmemtable", i);
11270df750e9SMarc-André Lureau }
11280df750e9SMarc-André Lureau }
11290df750e9SMarc-André Lureau }
11300df750e9SMarc-André Lureau
11310df750e9SMarc-André Lureau return false;
11320df750e9SMarc-André Lureau }
11330df750e9SMarc-André Lureau
11340df750e9SMarc-André Lureau static bool
vu_set_log_base_exec(VuDev * dev,VhostUserMsg * vmsg)11350df750e9SMarc-André Lureau vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
11360df750e9SMarc-André Lureau {
11370df750e9SMarc-André Lureau int fd;
11380df750e9SMarc-André Lureau uint64_t log_mmap_size, log_mmap_offset;
11390df750e9SMarc-André Lureau void *rc;
11400df750e9SMarc-André Lureau
11410df750e9SMarc-André Lureau if (vmsg->fd_num != 1 ||
11420df750e9SMarc-André Lureau vmsg->size != sizeof(vmsg->payload.log)) {
11430df750e9SMarc-André Lureau vu_panic(dev, "Invalid log_base message");
11440df750e9SMarc-André Lureau return true;
11450df750e9SMarc-André Lureau }
11460df750e9SMarc-André Lureau
11470df750e9SMarc-André Lureau fd = vmsg->fds[0];
11480df750e9SMarc-André Lureau log_mmap_offset = vmsg->payload.log.mmap_offset;
11490df750e9SMarc-André Lureau log_mmap_size = vmsg->payload.log.mmap_size;
11500df750e9SMarc-André Lureau DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
11510df750e9SMarc-André Lureau DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
11520df750e9SMarc-André Lureau
11530df750e9SMarc-André Lureau rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
11540df750e9SMarc-André Lureau log_mmap_offset);
11550df750e9SMarc-André Lureau close(fd);
11560df750e9SMarc-André Lureau if (rc == MAP_FAILED) {
11570df750e9SMarc-André Lureau perror("log mmap error");
11580df750e9SMarc-André Lureau }
11590df750e9SMarc-André Lureau
11600df750e9SMarc-André Lureau if (dev->log_table) {
11610df750e9SMarc-André Lureau munmap(dev->log_table, dev->log_size);
11620df750e9SMarc-André Lureau }
11630df750e9SMarc-André Lureau dev->log_table = rc;
11640df750e9SMarc-André Lureau dev->log_size = log_mmap_size;
11650df750e9SMarc-André Lureau
11660df750e9SMarc-André Lureau vmsg->size = sizeof(vmsg->payload.u64);
11670df750e9SMarc-André Lureau vmsg->fd_num = 0;
11680df750e9SMarc-André Lureau
11690df750e9SMarc-André Lureau return true;
11700df750e9SMarc-André Lureau }
11710df750e9SMarc-André Lureau
11720df750e9SMarc-André Lureau static bool
vu_set_log_fd_exec(VuDev * dev,VhostUserMsg * vmsg)11730df750e9SMarc-André Lureau vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg)
11740df750e9SMarc-André Lureau {
11750df750e9SMarc-André Lureau if (vmsg->fd_num != 1) {
11760df750e9SMarc-André Lureau vu_panic(dev, "Invalid log_fd message");
11770df750e9SMarc-André Lureau return false;
11780df750e9SMarc-André Lureau }
11790df750e9SMarc-André Lureau
11800df750e9SMarc-André Lureau if (dev->log_call_fd != -1) {
11810df750e9SMarc-André Lureau close(dev->log_call_fd);
11820df750e9SMarc-André Lureau }
11830df750e9SMarc-André Lureau dev->log_call_fd = vmsg->fds[0];
11840df750e9SMarc-André Lureau DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
11850df750e9SMarc-André Lureau
11860df750e9SMarc-André Lureau return false;
11870df750e9SMarc-André Lureau }
11880df750e9SMarc-André Lureau
11890df750e9SMarc-André Lureau static bool
vu_set_vring_num_exec(VuDev * dev,VhostUserMsg * vmsg)11900df750e9SMarc-André Lureau vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg)
11910df750e9SMarc-André Lureau {
11920df750e9SMarc-André Lureau unsigned int index = vmsg->payload.state.index;
11930df750e9SMarc-André Lureau unsigned int num = vmsg->payload.state.num;
11940df750e9SMarc-André Lureau
11950df750e9SMarc-André Lureau DPRINT("State.index: %u\n", index);
11960df750e9SMarc-André Lureau DPRINT("State.num: %u\n", num);
11970df750e9SMarc-André Lureau dev->vq[index].vring.num = num;
11980df750e9SMarc-André Lureau
11990df750e9SMarc-André Lureau return false;
12000df750e9SMarc-André Lureau }
12010df750e9SMarc-André Lureau
12020df750e9SMarc-André Lureau static bool
vu_set_vring_addr_exec(VuDev * dev,VhostUserMsg * vmsg)12030df750e9SMarc-André Lureau vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
12040df750e9SMarc-André Lureau {
12050df750e9SMarc-André Lureau struct vhost_vring_addr addr = vmsg->payload.addr, *vra = &addr;
12060df750e9SMarc-André Lureau unsigned int index = vra->index;
12070df750e9SMarc-André Lureau VuVirtq *vq = &dev->vq[index];
12080df750e9SMarc-André Lureau
12090df750e9SMarc-André Lureau DPRINT("vhost_vring_addr:\n");
12100df750e9SMarc-André Lureau DPRINT(" index: %d\n", vra->index);
12110df750e9SMarc-André Lureau DPRINT(" flags: %d\n", vra->flags);
12124fe29344SMarc-André Lureau DPRINT(" desc_user_addr: 0x%016" PRIx64 "\n", (uint64_t)vra->desc_user_addr);
12134fe29344SMarc-André Lureau DPRINT(" used_user_addr: 0x%016" PRIx64 "\n", (uint64_t)vra->used_user_addr);
12144fe29344SMarc-André Lureau DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", (uint64_t)vra->avail_user_addr);
12154fe29344SMarc-André Lureau DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", (uint64_t)vra->log_guest_addr);
12160df750e9SMarc-André Lureau
12170df750e9SMarc-André Lureau vq->vra = *vra;
12180df750e9SMarc-André Lureau vq->vring.flags = vra->flags;
12190df750e9SMarc-André Lureau vq->vring.log_guest_addr = vra->log_guest_addr;
12200df750e9SMarc-André Lureau
12210df750e9SMarc-André Lureau
12220df750e9SMarc-André Lureau if (map_ring(dev, vq)) {
12230df750e9SMarc-André Lureau vu_panic(dev, "Invalid vring_addr message");
12240df750e9SMarc-André Lureau return false;
12250df750e9SMarc-André Lureau }
12260df750e9SMarc-André Lureau
12270df750e9SMarc-André Lureau vq->used_idx = le16toh(vq->vring.used->idx);
12280df750e9SMarc-André Lureau
12290df750e9SMarc-André Lureau if (vq->last_avail_idx != vq->used_idx) {
12300df750e9SMarc-André Lureau bool resume = dev->iface->queue_is_processed_in_order &&
12310df750e9SMarc-André Lureau dev->iface->queue_is_processed_in_order(dev, index);
12320df750e9SMarc-André Lureau
12330df750e9SMarc-André Lureau DPRINT("Last avail index != used index: %u != %u%s\n",
12340df750e9SMarc-André Lureau vq->last_avail_idx, vq->used_idx,
12350df750e9SMarc-André Lureau resume ? ", resuming" : "");
12360df750e9SMarc-André Lureau
12370df750e9SMarc-André Lureau if (resume) {
12380df750e9SMarc-André Lureau vq->shadow_avail_idx = vq->last_avail_idx = vq->used_idx;
12390df750e9SMarc-André Lureau }
12400df750e9SMarc-André Lureau }
12410df750e9SMarc-André Lureau
12420df750e9SMarc-André Lureau return false;
12430df750e9SMarc-André Lureau }
12440df750e9SMarc-André Lureau
12450df750e9SMarc-André Lureau static bool
vu_set_vring_base_exec(VuDev * dev,VhostUserMsg * vmsg)12460df750e9SMarc-André Lureau vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
12470df750e9SMarc-André Lureau {
12480df750e9SMarc-André Lureau unsigned int index = vmsg->payload.state.index;
12490df750e9SMarc-André Lureau unsigned int num = vmsg->payload.state.num;
12500df750e9SMarc-André Lureau
12510df750e9SMarc-André Lureau DPRINT("State.index: %u\n", index);
12520df750e9SMarc-André Lureau DPRINT("State.num: %u\n", num);
12530df750e9SMarc-André Lureau dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num;
12540df750e9SMarc-André Lureau
12550df750e9SMarc-André Lureau return false;
12560df750e9SMarc-André Lureau }
12570df750e9SMarc-André Lureau
12580df750e9SMarc-André Lureau static bool
vu_get_vring_base_exec(VuDev * dev,VhostUserMsg * vmsg)12590df750e9SMarc-André Lureau vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
12600df750e9SMarc-André Lureau {
12610df750e9SMarc-André Lureau unsigned int index = vmsg->payload.state.index;
12620df750e9SMarc-André Lureau
12630df750e9SMarc-André Lureau DPRINT("State.index: %u\n", index);
12640df750e9SMarc-André Lureau vmsg->payload.state.num = dev->vq[index].last_avail_idx;
12650df750e9SMarc-André Lureau vmsg->size = sizeof(vmsg->payload.state);
12660df750e9SMarc-André Lureau
12670df750e9SMarc-André Lureau dev->vq[index].started = false;
12680df750e9SMarc-André Lureau if (dev->iface->queue_set_started) {
12690df750e9SMarc-André Lureau dev->iface->queue_set_started(dev, index, false);
12700df750e9SMarc-André Lureau }
12710df750e9SMarc-André Lureau
12720df750e9SMarc-André Lureau if (dev->vq[index].call_fd != -1) {
12730df750e9SMarc-André Lureau close(dev->vq[index].call_fd);
12740df750e9SMarc-André Lureau dev->vq[index].call_fd = -1;
12750df750e9SMarc-André Lureau }
12760df750e9SMarc-André Lureau if (dev->vq[index].kick_fd != -1) {
12770df750e9SMarc-André Lureau dev->remove_watch(dev, dev->vq[index].kick_fd);
12780df750e9SMarc-André Lureau close(dev->vq[index].kick_fd);
12790df750e9SMarc-André Lureau dev->vq[index].kick_fd = -1;
12800df750e9SMarc-André Lureau }
12810df750e9SMarc-André Lureau
12820df750e9SMarc-André Lureau return true;
12830df750e9SMarc-André Lureau }
12840df750e9SMarc-André Lureau
12850df750e9SMarc-André Lureau static bool
vu_check_queue_msg_file(VuDev * dev,VhostUserMsg * vmsg)12860df750e9SMarc-André Lureau vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
12870df750e9SMarc-André Lureau {
12880df750e9SMarc-André Lureau int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
12890df750e9SMarc-André Lureau bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
12900df750e9SMarc-André Lureau
12910df750e9SMarc-André Lureau if (index >= dev->max_queues) {
12920df750e9SMarc-André Lureau vmsg_close_fds(vmsg);
12930df750e9SMarc-André Lureau vu_panic(dev, "Invalid queue index: %u", index);
12940df750e9SMarc-André Lureau return false;
12950df750e9SMarc-André Lureau }
12960df750e9SMarc-André Lureau
12970df750e9SMarc-André Lureau if (nofd) {
12980df750e9SMarc-André Lureau vmsg_close_fds(vmsg);
12990df750e9SMarc-André Lureau return true;
13000df750e9SMarc-André Lureau }
13010df750e9SMarc-André Lureau
13020df750e9SMarc-André Lureau if (vmsg->fd_num != 1) {
13030df750e9SMarc-André Lureau vmsg_close_fds(vmsg);
13040df750e9SMarc-André Lureau vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
13050df750e9SMarc-André Lureau return false;
13060df750e9SMarc-André Lureau }
13070df750e9SMarc-André Lureau
13080df750e9SMarc-André Lureau return true;
13090df750e9SMarc-André Lureau }
13100df750e9SMarc-André Lureau
13110df750e9SMarc-André Lureau static int
inflight_desc_compare(const void * a,const void * b)13120df750e9SMarc-André Lureau inflight_desc_compare(const void *a, const void *b)
13130df750e9SMarc-André Lureau {
13140df750e9SMarc-André Lureau VuVirtqInflightDesc *desc0 = (VuVirtqInflightDesc *)a,
13150df750e9SMarc-André Lureau *desc1 = (VuVirtqInflightDesc *)b;
13160df750e9SMarc-André Lureau
13170df750e9SMarc-André Lureau if (desc1->counter > desc0->counter &&
13180df750e9SMarc-André Lureau (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) {
13190df750e9SMarc-André Lureau return 1;
13200df750e9SMarc-André Lureau }
13210df750e9SMarc-André Lureau
13220df750e9SMarc-André Lureau return -1;
13230df750e9SMarc-André Lureau }
13240df750e9SMarc-André Lureau
13250df750e9SMarc-André Lureau static int
vu_check_queue_inflights(VuDev * dev,VuVirtq * vq)13260df750e9SMarc-André Lureau vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
13270df750e9SMarc-André Lureau {
13280df750e9SMarc-André Lureau int i = 0;
13290df750e9SMarc-André Lureau
13300df750e9SMarc-André Lureau if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
13310df750e9SMarc-André Lureau return 0;
13320df750e9SMarc-André Lureau }
13330df750e9SMarc-André Lureau
13340df750e9SMarc-André Lureau if (unlikely(!vq->inflight)) {
13350df750e9SMarc-André Lureau return -1;
13360df750e9SMarc-André Lureau }
13370df750e9SMarc-André Lureau
13380df750e9SMarc-André Lureau if (unlikely(!vq->inflight->version)) {
13390df750e9SMarc-André Lureau /* initialize the buffer */
13400df750e9SMarc-André Lureau vq->inflight->version = INFLIGHT_VERSION;
13410df750e9SMarc-André Lureau return 0;
13420df750e9SMarc-André Lureau }
13430df750e9SMarc-André Lureau
13440df750e9SMarc-André Lureau vq->used_idx = le16toh(vq->vring.used->idx);
13450df750e9SMarc-André Lureau vq->resubmit_num = 0;
13460df750e9SMarc-André Lureau vq->resubmit_list = NULL;
13470df750e9SMarc-André Lureau vq->counter = 0;
13480df750e9SMarc-André Lureau
13490df750e9SMarc-André Lureau if (unlikely(vq->inflight->used_idx != vq->used_idx)) {
13500df750e9SMarc-André Lureau vq->inflight->desc[vq->inflight->last_batch_head].inflight = 0;
13510df750e9SMarc-André Lureau
13520df750e9SMarc-André Lureau barrier();
13530df750e9SMarc-André Lureau
13540df750e9SMarc-André Lureau vq->inflight->used_idx = vq->used_idx;
13550df750e9SMarc-André Lureau }
13560df750e9SMarc-André Lureau
13570df750e9SMarc-André Lureau for (i = 0; i < vq->inflight->desc_num; i++) {
13580df750e9SMarc-André Lureau if (vq->inflight->desc[i].inflight == 1) {
13590df750e9SMarc-André Lureau vq->inuse++;
13600df750e9SMarc-André Lureau }
13610df750e9SMarc-André Lureau }
13620df750e9SMarc-André Lureau
13630df750e9SMarc-André Lureau vq->shadow_avail_idx = vq->last_avail_idx = vq->inuse + vq->used_idx;
13640df750e9SMarc-André Lureau
13650df750e9SMarc-André Lureau if (vq->inuse) {
13660df750e9SMarc-André Lureau vq->resubmit_list = calloc(vq->inuse, sizeof(VuVirtqInflightDesc));
13670df750e9SMarc-André Lureau if (!vq->resubmit_list) {
13680df750e9SMarc-André Lureau return -1;
13690df750e9SMarc-André Lureau }
13700df750e9SMarc-André Lureau
13710df750e9SMarc-André Lureau for (i = 0; i < vq->inflight->desc_num; i++) {
13720df750e9SMarc-André Lureau if (vq->inflight->desc[i].inflight) {
13730df750e9SMarc-André Lureau vq->resubmit_list[vq->resubmit_num].index = i;
13740df750e9SMarc-André Lureau vq->resubmit_list[vq->resubmit_num].counter =
13750df750e9SMarc-André Lureau vq->inflight->desc[i].counter;
13760df750e9SMarc-André Lureau vq->resubmit_num++;
13770df750e9SMarc-André Lureau }
13780df750e9SMarc-André Lureau }
13790df750e9SMarc-André Lureau
13800df750e9SMarc-André Lureau if (vq->resubmit_num > 1) {
13810df750e9SMarc-André Lureau qsort(vq->resubmit_list, vq->resubmit_num,
13820df750e9SMarc-André Lureau sizeof(VuVirtqInflightDesc), inflight_desc_compare);
13830df750e9SMarc-André Lureau }
13840df750e9SMarc-André Lureau vq->counter = vq->resubmit_list[0].counter + 1;
13850df750e9SMarc-André Lureau }
13860df750e9SMarc-André Lureau
13870df750e9SMarc-André Lureau /* in case of I/O hang after reconnecting */
13880df750e9SMarc-André Lureau if (eventfd_write(vq->kick_fd, 1)) {
13890df750e9SMarc-André Lureau return -1;
13900df750e9SMarc-André Lureau }
13910df750e9SMarc-André Lureau
13920df750e9SMarc-André Lureau return 0;
13930df750e9SMarc-André Lureau }
13940df750e9SMarc-André Lureau
13950df750e9SMarc-André Lureau static bool
vu_set_vring_kick_exec(VuDev * dev,VhostUserMsg * vmsg)13960df750e9SMarc-André Lureau vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
13970df750e9SMarc-André Lureau {
13980df750e9SMarc-André Lureau int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
13990df750e9SMarc-André Lureau bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
14000df750e9SMarc-André Lureau
14010df750e9SMarc-André Lureau DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
14020df750e9SMarc-André Lureau
14030df750e9SMarc-André Lureau if (!vu_check_queue_msg_file(dev, vmsg)) {
14040df750e9SMarc-André Lureau return false;
14050df750e9SMarc-André Lureau }
14060df750e9SMarc-André Lureau
14070df750e9SMarc-André Lureau if (dev->vq[index].kick_fd != -1) {
14080df750e9SMarc-André Lureau dev->remove_watch(dev, dev->vq[index].kick_fd);
14090df750e9SMarc-André Lureau close(dev->vq[index].kick_fd);
14100df750e9SMarc-André Lureau dev->vq[index].kick_fd = -1;
14110df750e9SMarc-André Lureau }
14120df750e9SMarc-André Lureau
14130df750e9SMarc-André Lureau dev->vq[index].kick_fd = nofd ? -1 : vmsg->fds[0];
14140df750e9SMarc-André Lureau DPRINT("Got kick_fd: %d for vq: %d\n", dev->vq[index].kick_fd, index);
14150df750e9SMarc-André Lureau
14160df750e9SMarc-André Lureau dev->vq[index].started = true;
14170df750e9SMarc-André Lureau if (dev->iface->queue_set_started) {
14180df750e9SMarc-André Lureau dev->iface->queue_set_started(dev, index, true);
14190df750e9SMarc-André Lureau }
14200df750e9SMarc-André Lureau
14210df750e9SMarc-André Lureau if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) {
14220df750e9SMarc-André Lureau dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN,
14230df750e9SMarc-André Lureau vu_kick_cb, (void *)(long)index);
14240df750e9SMarc-André Lureau
14250df750e9SMarc-André Lureau DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
14260df750e9SMarc-André Lureau dev->vq[index].kick_fd, index);
14270df750e9SMarc-André Lureau }
14280df750e9SMarc-André Lureau
14290df750e9SMarc-André Lureau if (vu_check_queue_inflights(dev, &dev->vq[index])) {
14300df750e9SMarc-André Lureau vu_panic(dev, "Failed to check inflights for vq: %d\n", index);
14310df750e9SMarc-André Lureau }
14320df750e9SMarc-André Lureau
14330df750e9SMarc-André Lureau return false;
14340df750e9SMarc-André Lureau }
14350df750e9SMarc-André Lureau
vu_set_queue_handler(VuDev * dev,VuVirtq * vq,vu_queue_handler_cb handler)14360df750e9SMarc-André Lureau void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
14370df750e9SMarc-André Lureau vu_queue_handler_cb handler)
14380df750e9SMarc-André Lureau {
14390df750e9SMarc-André Lureau int qidx = vq - dev->vq;
14400df750e9SMarc-André Lureau
14410df750e9SMarc-André Lureau vq->handler = handler;
14420df750e9SMarc-André Lureau if (vq->kick_fd >= 0) {
14430df750e9SMarc-André Lureau if (handler) {
14440df750e9SMarc-André Lureau dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN,
14450df750e9SMarc-André Lureau vu_kick_cb, (void *)(long)qidx);
14460df750e9SMarc-André Lureau } else {
14470df750e9SMarc-André Lureau dev->remove_watch(dev, vq->kick_fd);
14480df750e9SMarc-André Lureau }
14490df750e9SMarc-André Lureau }
14500df750e9SMarc-André Lureau }
14510df750e9SMarc-André Lureau
vu_set_queue_host_notifier(VuDev * dev,VuVirtq * vq,int fd,int size,int offset)14520df750e9SMarc-André Lureau bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
14530df750e9SMarc-André Lureau int size, int offset)
14540df750e9SMarc-André Lureau {
14550df750e9SMarc-André Lureau int qidx = vq - dev->vq;
14560df750e9SMarc-André Lureau int fd_num = 0;
14570df750e9SMarc-André Lureau VhostUserMsg vmsg = {
1458e608feedSMaxime Coquelin .request = VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG,
14590df750e9SMarc-André Lureau .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
14600df750e9SMarc-André Lureau .size = sizeof(vmsg.payload.area),
14610df750e9SMarc-André Lureau .payload.area = {
14620df750e9SMarc-André Lureau .u64 = qidx & VHOST_USER_VRING_IDX_MASK,
14630df750e9SMarc-André Lureau .size = size,
14640df750e9SMarc-André Lureau .offset = offset,
14650df750e9SMarc-André Lureau },
14660df750e9SMarc-André Lureau };
14670df750e9SMarc-André Lureau
14680df750e9SMarc-André Lureau if (fd == -1) {
14690df750e9SMarc-André Lureau vmsg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
14700df750e9SMarc-André Lureau } else {
14710df750e9SMarc-André Lureau vmsg.fds[fd_num++] = fd;
14720df750e9SMarc-André Lureau }
14730df750e9SMarc-André Lureau
14740df750e9SMarc-André Lureau vmsg.fd_num = fd_num;
14750df750e9SMarc-André Lureau
1476e608feedSMaxime Coquelin if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD)) {
14770df750e9SMarc-André Lureau return false;
14780df750e9SMarc-André Lureau }
14790df750e9SMarc-André Lureau
1480f8ed3648SManos Pitsidianakis pthread_mutex_lock(&dev->backend_mutex);
1481f8ed3648SManos Pitsidianakis if (!vu_message_write(dev, dev->backend_fd, &vmsg)) {
1482f8ed3648SManos Pitsidianakis pthread_mutex_unlock(&dev->backend_mutex);
14830df750e9SMarc-André Lureau return false;
14840df750e9SMarc-André Lureau }
14850df750e9SMarc-André Lureau
1486f8ed3648SManos Pitsidianakis /* Also unlocks the backend_mutex */
14870df750e9SMarc-André Lureau return vu_process_message_reply(dev, &vmsg);
14880df750e9SMarc-André Lureau }
14890df750e9SMarc-André Lureau
1490ce0f3b03SAlbert Esteve bool
vu_lookup_shared_object(VuDev * dev,unsigned char uuid[UUID_LEN],int * dmabuf_fd)1491ce0f3b03SAlbert Esteve vu_lookup_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN],
1492ce0f3b03SAlbert Esteve int *dmabuf_fd)
1493ce0f3b03SAlbert Esteve {
1494ce0f3b03SAlbert Esteve bool result = false;
1495ce0f3b03SAlbert Esteve VhostUserMsg msg_reply;
1496ce0f3b03SAlbert Esteve VhostUserMsg msg = {
1497ce0f3b03SAlbert Esteve .request = VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP,
1498ce0f3b03SAlbert Esteve .size = sizeof(msg.payload.object),
1499ce0f3b03SAlbert Esteve .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1500ce0f3b03SAlbert Esteve };
1501ce0f3b03SAlbert Esteve
1502ce0f3b03SAlbert Esteve memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
1503ce0f3b03SAlbert Esteve
1504ce0f3b03SAlbert Esteve if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
1505ce0f3b03SAlbert Esteve return false;
1506ce0f3b03SAlbert Esteve }
1507ce0f3b03SAlbert Esteve
1508ce0f3b03SAlbert Esteve pthread_mutex_lock(&dev->backend_mutex);
1509ce0f3b03SAlbert Esteve if (!vu_message_write(dev, dev->backend_fd, &msg)) {
1510ce0f3b03SAlbert Esteve goto out;
1511ce0f3b03SAlbert Esteve }
1512ce0f3b03SAlbert Esteve
1513ce0f3b03SAlbert Esteve if (!vu_message_read_default(dev, dev->backend_fd, &msg_reply)) {
1514ce0f3b03SAlbert Esteve goto out;
1515ce0f3b03SAlbert Esteve }
1516ce0f3b03SAlbert Esteve
1517ce0f3b03SAlbert Esteve if (msg_reply.request != msg.request) {
1518ce0f3b03SAlbert Esteve DPRINT("Received unexpected msg type. Expected %d, received %d",
1519ce0f3b03SAlbert Esteve msg.request, msg_reply.request);
1520ce0f3b03SAlbert Esteve goto out;
1521ce0f3b03SAlbert Esteve }
1522ce0f3b03SAlbert Esteve
1523ce0f3b03SAlbert Esteve if (msg_reply.fd_num != 1) {
1524ce0f3b03SAlbert Esteve DPRINT("Received unexpected number of fds. Expected 1, received %d",
1525ce0f3b03SAlbert Esteve msg_reply.fd_num);
1526ce0f3b03SAlbert Esteve goto out;
1527ce0f3b03SAlbert Esteve }
1528ce0f3b03SAlbert Esteve
1529ce0f3b03SAlbert Esteve *dmabuf_fd = msg_reply.fds[0];
1530ce0f3b03SAlbert Esteve result = *dmabuf_fd > 0 && msg_reply.payload.u64 == 0;
1531ce0f3b03SAlbert Esteve out:
1532ce0f3b03SAlbert Esteve pthread_mutex_unlock(&dev->backend_mutex);
1533ce0f3b03SAlbert Esteve
1534ce0f3b03SAlbert Esteve return result;
1535ce0f3b03SAlbert Esteve }
1536ce0f3b03SAlbert Esteve
1537ce0f3b03SAlbert Esteve static bool
vu_send_message(VuDev * dev,VhostUserMsg * vmsg)1538ce0f3b03SAlbert Esteve vu_send_message(VuDev *dev, VhostUserMsg *vmsg)
1539ce0f3b03SAlbert Esteve {
1540ce0f3b03SAlbert Esteve bool result = false;
1541ce0f3b03SAlbert Esteve pthread_mutex_lock(&dev->backend_mutex);
1542ce0f3b03SAlbert Esteve if (!vu_message_write(dev, dev->backend_fd, vmsg)) {
1543ce0f3b03SAlbert Esteve goto out;
1544ce0f3b03SAlbert Esteve }
1545ce0f3b03SAlbert Esteve
1546ce0f3b03SAlbert Esteve result = true;
1547ce0f3b03SAlbert Esteve out:
1548ce0f3b03SAlbert Esteve pthread_mutex_unlock(&dev->backend_mutex);
1549ce0f3b03SAlbert Esteve
1550ce0f3b03SAlbert Esteve return result;
1551ce0f3b03SAlbert Esteve }
1552ce0f3b03SAlbert Esteve
1553ce0f3b03SAlbert Esteve bool
vu_add_shared_object(VuDev * dev,unsigned char uuid[UUID_LEN])1554ce0f3b03SAlbert Esteve vu_add_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN])
1555ce0f3b03SAlbert Esteve {
1556ce0f3b03SAlbert Esteve VhostUserMsg msg = {
1557ce0f3b03SAlbert Esteve .request = VHOST_USER_BACKEND_SHARED_OBJECT_ADD,
1558ce0f3b03SAlbert Esteve .size = sizeof(msg.payload.object),
1559ce0f3b03SAlbert Esteve .flags = VHOST_USER_VERSION,
1560ce0f3b03SAlbert Esteve };
1561ce0f3b03SAlbert Esteve
1562ce0f3b03SAlbert Esteve memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
1563ce0f3b03SAlbert Esteve
1564ce0f3b03SAlbert Esteve if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
1565ce0f3b03SAlbert Esteve return false;
1566ce0f3b03SAlbert Esteve }
1567ce0f3b03SAlbert Esteve
1568ce0f3b03SAlbert Esteve return vu_send_message(dev, &msg);
1569ce0f3b03SAlbert Esteve }
1570ce0f3b03SAlbert Esteve
1571ce0f3b03SAlbert Esteve bool
vu_rm_shared_object(VuDev * dev,unsigned char uuid[UUID_LEN])1572ce0f3b03SAlbert Esteve vu_rm_shared_object(VuDev *dev, unsigned char uuid[UUID_LEN])
1573ce0f3b03SAlbert Esteve {
1574ce0f3b03SAlbert Esteve VhostUserMsg msg = {
1575ce0f3b03SAlbert Esteve .request = VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE,
1576ce0f3b03SAlbert Esteve .size = sizeof(msg.payload.object),
1577ce0f3b03SAlbert Esteve .flags = VHOST_USER_VERSION,
1578ce0f3b03SAlbert Esteve };
1579ce0f3b03SAlbert Esteve
1580ce0f3b03SAlbert Esteve memcpy(msg.payload.object.uuid, uuid, sizeof(uuid[0]) * UUID_LEN);
1581ce0f3b03SAlbert Esteve
1582ce0f3b03SAlbert Esteve if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SHARED_OBJECT)) {
1583ce0f3b03SAlbert Esteve return false;
1584ce0f3b03SAlbert Esteve }
1585ce0f3b03SAlbert Esteve
1586ce0f3b03SAlbert Esteve return vu_send_message(dev, &msg);
1587ce0f3b03SAlbert Esteve }
1588ce0f3b03SAlbert Esteve
15890df750e9SMarc-André Lureau static bool
vu_set_vring_call_exec(VuDev * dev,VhostUserMsg * vmsg)15900df750e9SMarc-André Lureau vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
15910df750e9SMarc-André Lureau {
15920df750e9SMarc-André Lureau int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
15930df750e9SMarc-André Lureau bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
15940df750e9SMarc-André Lureau
15950df750e9SMarc-André Lureau DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
15960df750e9SMarc-André Lureau
15970df750e9SMarc-André Lureau if (!vu_check_queue_msg_file(dev, vmsg)) {
15980df750e9SMarc-André Lureau return false;
15990df750e9SMarc-André Lureau }
16000df750e9SMarc-André Lureau
16010df750e9SMarc-André Lureau if (dev->vq[index].call_fd != -1) {
16020df750e9SMarc-André Lureau close(dev->vq[index].call_fd);
16030df750e9SMarc-André Lureau dev->vq[index].call_fd = -1;
16040df750e9SMarc-André Lureau }
16050df750e9SMarc-André Lureau
16060df750e9SMarc-André Lureau dev->vq[index].call_fd = nofd ? -1 : vmsg->fds[0];
16070df750e9SMarc-André Lureau
16080df750e9SMarc-André Lureau /* in case of I/O hang after reconnecting */
16090df750e9SMarc-André Lureau if (dev->vq[index].call_fd != -1 && eventfd_write(vmsg->fds[0], 1)) {
16100df750e9SMarc-André Lureau return -1;
16110df750e9SMarc-André Lureau }
16120df750e9SMarc-André Lureau
16130df750e9SMarc-André Lureau DPRINT("Got call_fd: %d for vq: %d\n", dev->vq[index].call_fd, index);
16140df750e9SMarc-André Lureau
16150df750e9SMarc-André Lureau return false;
16160df750e9SMarc-André Lureau }
16170df750e9SMarc-André Lureau
16180df750e9SMarc-André Lureau static bool
vu_set_vring_err_exec(VuDev * dev,VhostUserMsg * vmsg)16190df750e9SMarc-André Lureau vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
16200df750e9SMarc-André Lureau {
16210df750e9SMarc-André Lureau int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
16220df750e9SMarc-André Lureau bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
16230df750e9SMarc-André Lureau
16240df750e9SMarc-André Lureau DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
16250df750e9SMarc-André Lureau
16260df750e9SMarc-André Lureau if (!vu_check_queue_msg_file(dev, vmsg)) {
16270df750e9SMarc-André Lureau return false;
16280df750e9SMarc-André Lureau }
16290df750e9SMarc-André Lureau
16300df750e9SMarc-André Lureau if (dev->vq[index].err_fd != -1) {
16310df750e9SMarc-André Lureau close(dev->vq[index].err_fd);
16320df750e9SMarc-André Lureau dev->vq[index].err_fd = -1;
16330df750e9SMarc-André Lureau }
16340df750e9SMarc-André Lureau
16350df750e9SMarc-André Lureau dev->vq[index].err_fd = nofd ? -1 : vmsg->fds[0];
16360df750e9SMarc-André Lureau
16370df750e9SMarc-André Lureau return false;
16380df750e9SMarc-André Lureau }
16390df750e9SMarc-André Lureau
16400df750e9SMarc-André Lureau static bool
vu_get_protocol_features_exec(VuDev * dev,VhostUserMsg * vmsg)16410df750e9SMarc-André Lureau vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
16420df750e9SMarc-André Lureau {
16430df750e9SMarc-André Lureau /*
16440df750e9SMarc-André Lureau * Note that we support, but intentionally do not set,
16450df750e9SMarc-André Lureau * VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. This means that
16460df750e9SMarc-André Lureau * a device implementation can return it in its callback
16470df750e9SMarc-André Lureau * (get_protocol_features) if it wants to use this for
16480df750e9SMarc-André Lureau * simulation, but it is otherwise not desirable (if even
1649f8ed3648SManos Pitsidianakis * implemented by the frontend.)
16500df750e9SMarc-André Lureau */
16510df750e9SMarc-André Lureau uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ |
16520df750e9SMarc-André Lureau 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
1653e608feedSMaxime Coquelin 1ULL << VHOST_USER_PROTOCOL_F_BACKEND_REQ |
16540df750e9SMarc-André Lureau 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER |
1655e608feedSMaxime Coquelin 1ULL << VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD |
16560df750e9SMarc-André Lureau 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |
16570df750e9SMarc-André Lureau 1ULL << VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS;
16580df750e9SMarc-André Lureau
16590df750e9SMarc-André Lureau if (have_userfault()) {
16600df750e9SMarc-André Lureau features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT;
16610df750e9SMarc-André Lureau }
16620df750e9SMarc-André Lureau
16630df750e9SMarc-André Lureau if (dev->iface->get_config && dev->iface->set_config) {
16640df750e9SMarc-André Lureau features |= 1ULL << VHOST_USER_PROTOCOL_F_CONFIG;
16650df750e9SMarc-André Lureau }
16660df750e9SMarc-André Lureau
16670df750e9SMarc-André Lureau if (dev->iface->get_protocol_features) {
16680df750e9SMarc-André Lureau features |= dev->iface->get_protocol_features(dev);
16690df750e9SMarc-André Lureau }
16700df750e9SMarc-André Lureau
16710df750e9SMarc-André Lureau vmsg_set_reply_u64(vmsg, features);
16720df750e9SMarc-André Lureau return true;
16730df750e9SMarc-André Lureau }
16740df750e9SMarc-André Lureau
16750df750e9SMarc-André Lureau static bool
vu_set_protocol_features_exec(VuDev * dev,VhostUserMsg * vmsg)16760df750e9SMarc-André Lureau vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
16770df750e9SMarc-André Lureau {
16780df750e9SMarc-André Lureau uint64_t features = vmsg->payload.u64;
16790df750e9SMarc-André Lureau
16800df750e9SMarc-André Lureau DPRINT("u64: 0x%016"PRIx64"\n", features);
16810df750e9SMarc-André Lureau
16820df750e9SMarc-André Lureau dev->protocol_features = vmsg->payload.u64;
16830df750e9SMarc-André Lureau
16840df750e9SMarc-André Lureau if (vu_has_protocol_feature(dev,
16850df750e9SMarc-André Lureau VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
1686e608feedSMaxime Coquelin (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_REQ) ||
16870df750e9SMarc-André Lureau !vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
16880df750e9SMarc-André Lureau /*
16890df750e9SMarc-André Lureau * The use case for using messages for kick/call is simulation, to make
16900df750e9SMarc-André Lureau * the kick and call synchronous. To actually get that behaviour, both
16910df750e9SMarc-André Lureau * of the other features are required.
16920df750e9SMarc-André Lureau * Theoretically, one could use only kick messages, or do them without
16930df750e9SMarc-André Lureau * having F_REPLY_ACK, but too many (possibly pending) messages on the
1694f8ed3648SManos Pitsidianakis * socket will eventually cause the frontend to hang, to avoid this in
16950df750e9SMarc-André Lureau * scenarios where not desired enforce that the settings are in a way
16960df750e9SMarc-André Lureau * that actually enables the simulation case.
16970df750e9SMarc-André Lureau */
16980df750e9SMarc-André Lureau vu_panic(dev,
1699e608feedSMaxime Coquelin "F_IN_BAND_NOTIFICATIONS requires F_BACKEND_REQ && F_REPLY_ACK");
17000df750e9SMarc-André Lureau return false;
17010df750e9SMarc-André Lureau }
17020df750e9SMarc-André Lureau
17030df750e9SMarc-André Lureau if (dev->iface->set_protocol_features) {
17040df750e9SMarc-André Lureau dev->iface->set_protocol_features(dev, features);
17050df750e9SMarc-André Lureau }
17060df750e9SMarc-André Lureau
17070df750e9SMarc-André Lureau return false;
17080df750e9SMarc-André Lureau }
17090df750e9SMarc-André Lureau
17100df750e9SMarc-André Lureau static bool
vu_get_queue_num_exec(VuDev * dev,VhostUserMsg * vmsg)17110df750e9SMarc-André Lureau vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
17120df750e9SMarc-André Lureau {
17130df750e9SMarc-André Lureau vmsg_set_reply_u64(vmsg, dev->max_queues);
17140df750e9SMarc-André Lureau return true;
17150df750e9SMarc-André Lureau }
17160df750e9SMarc-André Lureau
17170df750e9SMarc-André Lureau static bool
vu_set_vring_enable_exec(VuDev * dev,VhostUserMsg * vmsg)17180df750e9SMarc-André Lureau vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
17190df750e9SMarc-André Lureau {
17200df750e9SMarc-André Lureau unsigned int index = vmsg->payload.state.index;
17210df750e9SMarc-André Lureau unsigned int enable = vmsg->payload.state.num;
17220df750e9SMarc-André Lureau
17230df750e9SMarc-André Lureau DPRINT("State.index: %u\n", index);
17240df750e9SMarc-André Lureau DPRINT("State.enable: %u\n", enable);
17250df750e9SMarc-André Lureau
17260df750e9SMarc-André Lureau if (index >= dev->max_queues) {
17270df750e9SMarc-André Lureau vu_panic(dev, "Invalid vring_enable index: %u", index);
17280df750e9SMarc-André Lureau return false;
17290df750e9SMarc-André Lureau }
17300df750e9SMarc-André Lureau
17310df750e9SMarc-André Lureau dev->vq[index].enable = enable;
17320df750e9SMarc-André Lureau return false;
17330df750e9SMarc-André Lureau }
17340df750e9SMarc-André Lureau
17350df750e9SMarc-André Lureau static bool
vu_set_backend_req_fd(VuDev * dev,VhostUserMsg * vmsg)1736f8ed3648SManos Pitsidianakis vu_set_backend_req_fd(VuDev *dev, VhostUserMsg *vmsg)
17370df750e9SMarc-André Lureau {
17380df750e9SMarc-André Lureau if (vmsg->fd_num != 1) {
1739f8ed3648SManos Pitsidianakis vu_panic(dev, "Invalid backend_req_fd message (%d fd's)", vmsg->fd_num);
17400df750e9SMarc-André Lureau return false;
17410df750e9SMarc-André Lureau }
17420df750e9SMarc-André Lureau
1743f8ed3648SManos Pitsidianakis if (dev->backend_fd != -1) {
1744f8ed3648SManos Pitsidianakis close(dev->backend_fd);
17450df750e9SMarc-André Lureau }
1746f8ed3648SManos Pitsidianakis dev->backend_fd = vmsg->fds[0];
1747f8ed3648SManos Pitsidianakis DPRINT("Got backend_fd: %d\n", vmsg->fds[0]);
17480df750e9SMarc-André Lureau
17490df750e9SMarc-André Lureau return false;
17500df750e9SMarc-André Lureau }
17510df750e9SMarc-André Lureau
17520df750e9SMarc-André Lureau static bool
vu_get_config(VuDev * dev,VhostUserMsg * vmsg)17530df750e9SMarc-André Lureau vu_get_config(VuDev *dev, VhostUserMsg *vmsg)
17540df750e9SMarc-André Lureau {
17550df750e9SMarc-André Lureau int ret = -1;
17560df750e9SMarc-André Lureau
17570df750e9SMarc-André Lureau if (dev->iface->get_config) {
17580df750e9SMarc-André Lureau ret = dev->iface->get_config(dev, vmsg->payload.config.region,
17590df750e9SMarc-André Lureau vmsg->payload.config.size);
17600df750e9SMarc-André Lureau }
17610df750e9SMarc-André Lureau
17620df750e9SMarc-André Lureau if (ret) {
1763f8ed3648SManos Pitsidianakis /* resize to zero to indicate an error to frontend */
17640df750e9SMarc-André Lureau vmsg->size = 0;
17650df750e9SMarc-André Lureau }
17660df750e9SMarc-André Lureau
17670df750e9SMarc-André Lureau return true;
17680df750e9SMarc-André Lureau }
17690df750e9SMarc-André Lureau
17700df750e9SMarc-André Lureau static bool
vu_set_config(VuDev * dev,VhostUserMsg * vmsg)17710df750e9SMarc-André Lureau vu_set_config(VuDev *dev, VhostUserMsg *vmsg)
17720df750e9SMarc-André Lureau {
17730df750e9SMarc-André Lureau int ret = -1;
17740df750e9SMarc-André Lureau
17750df750e9SMarc-André Lureau if (dev->iface->set_config) {
17760df750e9SMarc-André Lureau ret = dev->iface->set_config(dev, vmsg->payload.config.region,
17770df750e9SMarc-André Lureau vmsg->payload.config.offset,
17780df750e9SMarc-André Lureau vmsg->payload.config.size,
17790df750e9SMarc-André Lureau vmsg->payload.config.flags);
17800df750e9SMarc-André Lureau if (ret) {
17810df750e9SMarc-André Lureau vu_panic(dev, "Set virtio configuration space failed");
17820df750e9SMarc-André Lureau }
17830df750e9SMarc-André Lureau }
17840df750e9SMarc-André Lureau
17850df750e9SMarc-André Lureau return false;
17860df750e9SMarc-André Lureau }
17870df750e9SMarc-André Lureau
17880df750e9SMarc-André Lureau static bool
vu_set_postcopy_advise(VuDev * dev,VhostUserMsg * vmsg)17890df750e9SMarc-André Lureau vu_set_postcopy_advise(VuDev *dev, VhostUserMsg *vmsg)
17900df750e9SMarc-André Lureau {
17910df750e9SMarc-André Lureau #ifdef UFFDIO_API
17920df750e9SMarc-André Lureau struct uffdio_api api_struct;
17930df750e9SMarc-André Lureau
17940df750e9SMarc-André Lureau dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
17950df750e9SMarc-André Lureau vmsg->size = 0;
1796f1c563d2SMarcel Holtmann #else
1797f1c563d2SMarcel Holtmann dev->postcopy_ufd = -1;
17980df750e9SMarc-André Lureau #endif
17990df750e9SMarc-André Lureau
18000df750e9SMarc-André Lureau if (dev->postcopy_ufd == -1) {
18010df750e9SMarc-André Lureau vu_panic(dev, "Userfaultfd not available: %s", strerror(errno));
18020df750e9SMarc-André Lureau goto out;
18030df750e9SMarc-André Lureau }
18040df750e9SMarc-André Lureau
18050df750e9SMarc-André Lureau #ifdef UFFDIO_API
18060df750e9SMarc-André Lureau api_struct.api = UFFD_API;
18070df750e9SMarc-André Lureau api_struct.features = 0;
18080df750e9SMarc-André Lureau if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
18090df750e9SMarc-André Lureau vu_panic(dev, "Failed UFFDIO_API: %s", strerror(errno));
18100df750e9SMarc-André Lureau close(dev->postcopy_ufd);
18110df750e9SMarc-André Lureau dev->postcopy_ufd = -1;
18120df750e9SMarc-André Lureau goto out;
18130df750e9SMarc-André Lureau }
18140df750e9SMarc-André Lureau /* TODO: Stash feature flags somewhere */
18150df750e9SMarc-André Lureau #endif
18160df750e9SMarc-André Lureau
18170df750e9SMarc-André Lureau out:
18180df750e9SMarc-André Lureau /* Return a ufd to the QEMU */
18190df750e9SMarc-André Lureau vmsg->fd_num = 1;
18200df750e9SMarc-André Lureau vmsg->fds[0] = dev->postcopy_ufd;
18210df750e9SMarc-André Lureau return true; /* = send a reply */
18220df750e9SMarc-André Lureau }
18230df750e9SMarc-André Lureau
18240df750e9SMarc-André Lureau static bool
vu_set_postcopy_listen(VuDev * dev,VhostUserMsg * vmsg)18250df750e9SMarc-André Lureau vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg)
18260df750e9SMarc-André Lureau {
18270df750e9SMarc-André Lureau if (dev->nregions) {
18280df750e9SMarc-André Lureau vu_panic(dev, "Regions already registered at postcopy-listen");
18290df750e9SMarc-André Lureau vmsg_set_reply_u64(vmsg, -1);
18300df750e9SMarc-André Lureau return true;
18310df750e9SMarc-André Lureau }
18320df750e9SMarc-André Lureau dev->postcopy_listening = true;
18330df750e9SMarc-André Lureau
18340df750e9SMarc-André Lureau vmsg_set_reply_u64(vmsg, 0);
18350df750e9SMarc-André Lureau return true;
18360df750e9SMarc-André Lureau }
18370df750e9SMarc-André Lureau
18380df750e9SMarc-André Lureau static bool
vu_set_postcopy_end(VuDev * dev,VhostUserMsg * vmsg)18390df750e9SMarc-André Lureau vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg)
18400df750e9SMarc-André Lureau {
18410df750e9SMarc-André Lureau DPRINT("%s: Entry\n", __func__);
18420df750e9SMarc-André Lureau dev->postcopy_listening = false;
18430df750e9SMarc-André Lureau if (dev->postcopy_ufd > 0) {
18440df750e9SMarc-André Lureau close(dev->postcopy_ufd);
18450df750e9SMarc-André Lureau dev->postcopy_ufd = -1;
18460df750e9SMarc-André Lureau DPRINT("%s: Done close\n", __func__);
18470df750e9SMarc-André Lureau }
18480df750e9SMarc-André Lureau
18490df750e9SMarc-André Lureau vmsg_set_reply_u64(vmsg, 0);
18500df750e9SMarc-André Lureau DPRINT("%s: exit\n", __func__);
18510df750e9SMarc-André Lureau return true;
18520df750e9SMarc-André Lureau }
18530df750e9SMarc-André Lureau
18540df750e9SMarc-André Lureau static inline uint64_t
vu_inflight_queue_size(uint16_t queue_size)18550df750e9SMarc-André Lureau vu_inflight_queue_size(uint16_t queue_size)
18560df750e9SMarc-André Lureau {
18570df750e9SMarc-André Lureau return ALIGN_UP(sizeof(VuDescStateSplit) * queue_size +
18580df750e9SMarc-André Lureau sizeof(uint16_t), INFLIGHT_ALIGNMENT);
18590df750e9SMarc-André Lureau }
18600df750e9SMarc-André Lureau
18610df750e9SMarc-André Lureau #ifdef MFD_ALLOW_SEALING
18620df750e9SMarc-André Lureau static void *
memfd_alloc(const char * name,size_t size,unsigned int flags,int * fd)18630df750e9SMarc-André Lureau memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
18640df750e9SMarc-André Lureau {
18650df750e9SMarc-André Lureau void *ptr;
18660df750e9SMarc-André Lureau int ret;
18670df750e9SMarc-André Lureau
18680df750e9SMarc-André Lureau *fd = memfd_create(name, MFD_ALLOW_SEALING);
18690df750e9SMarc-André Lureau if (*fd < 0) {
18700df750e9SMarc-André Lureau return NULL;
18710df750e9SMarc-André Lureau }
18720df750e9SMarc-André Lureau
18730df750e9SMarc-André Lureau ret = ftruncate(*fd, size);
18740df750e9SMarc-André Lureau if (ret < 0) {
18750df750e9SMarc-André Lureau close(*fd);
18760df750e9SMarc-André Lureau return NULL;
18770df750e9SMarc-André Lureau }
18780df750e9SMarc-André Lureau
18790df750e9SMarc-André Lureau ret = fcntl(*fd, F_ADD_SEALS, flags);
18800df750e9SMarc-André Lureau if (ret < 0) {
18810df750e9SMarc-André Lureau close(*fd);
18820df750e9SMarc-André Lureau return NULL;
18830df750e9SMarc-André Lureau }
18840df750e9SMarc-André Lureau
18850df750e9SMarc-André Lureau ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
18860df750e9SMarc-André Lureau if (ptr == MAP_FAILED) {
18870df750e9SMarc-André Lureau close(*fd);
18880df750e9SMarc-André Lureau return NULL;
18890df750e9SMarc-André Lureau }
18900df750e9SMarc-André Lureau
18910df750e9SMarc-André Lureau return ptr;
18920df750e9SMarc-André Lureau }
18930df750e9SMarc-André Lureau #endif
18940df750e9SMarc-André Lureau
18950df750e9SMarc-André Lureau static bool
vu_get_inflight_fd(VuDev * dev,VhostUserMsg * vmsg)18960df750e9SMarc-André Lureau vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
18970df750e9SMarc-André Lureau {
18980df750e9SMarc-André Lureau int fd = -1;
18990df750e9SMarc-André Lureau void *addr = NULL;
19000df750e9SMarc-André Lureau uint64_t mmap_size;
19010df750e9SMarc-André Lureau uint16_t num_queues, queue_size;
19020df750e9SMarc-André Lureau
19030df750e9SMarc-André Lureau if (vmsg->size != sizeof(vmsg->payload.inflight)) {
19040df750e9SMarc-André Lureau vu_panic(dev, "Invalid get_inflight_fd message:%d", vmsg->size);
19050df750e9SMarc-André Lureau vmsg->payload.inflight.mmap_size = 0;
19060df750e9SMarc-André Lureau return true;
19070df750e9SMarc-André Lureau }
19080df750e9SMarc-André Lureau
19090df750e9SMarc-André Lureau num_queues = vmsg->payload.inflight.num_queues;
19100df750e9SMarc-André Lureau queue_size = vmsg->payload.inflight.queue_size;
19110df750e9SMarc-André Lureau
19120df750e9SMarc-André Lureau DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
19130df750e9SMarc-André Lureau DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
19140df750e9SMarc-André Lureau
19150df750e9SMarc-André Lureau mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
19160df750e9SMarc-André Lureau
19170df750e9SMarc-André Lureau #ifdef MFD_ALLOW_SEALING
19180df750e9SMarc-André Lureau addr = memfd_alloc("vhost-inflight", mmap_size,
19190df750e9SMarc-André Lureau F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
19200df750e9SMarc-André Lureau &fd);
19210df750e9SMarc-André Lureau #else
19220df750e9SMarc-André Lureau vu_panic(dev, "Not implemented: memfd support is missing");
19230df750e9SMarc-André Lureau #endif
19240df750e9SMarc-André Lureau
19250df750e9SMarc-André Lureau if (!addr) {
19260df750e9SMarc-André Lureau vu_panic(dev, "Failed to alloc vhost inflight area");
19270df750e9SMarc-André Lureau vmsg->payload.inflight.mmap_size = 0;
19280df750e9SMarc-André Lureau return true;
19290df750e9SMarc-André Lureau }
19300df750e9SMarc-André Lureau
19310df750e9SMarc-André Lureau memset(addr, 0, mmap_size);
19320df750e9SMarc-André Lureau
19330df750e9SMarc-André Lureau dev->inflight_info.addr = addr;
19340df750e9SMarc-André Lureau dev->inflight_info.size = vmsg->payload.inflight.mmap_size = mmap_size;
19350df750e9SMarc-André Lureau dev->inflight_info.fd = vmsg->fds[0] = fd;
19360df750e9SMarc-André Lureau vmsg->fd_num = 1;
19370df750e9SMarc-André Lureau vmsg->payload.inflight.mmap_offset = 0;
19380df750e9SMarc-André Lureau
19390df750e9SMarc-André Lureau DPRINT("send inflight mmap_size: %"PRId64"\n",
19400df750e9SMarc-André Lureau vmsg->payload.inflight.mmap_size);
19410df750e9SMarc-André Lureau DPRINT("send inflight mmap offset: %"PRId64"\n",
19420df750e9SMarc-André Lureau vmsg->payload.inflight.mmap_offset);
19430df750e9SMarc-André Lureau
19440df750e9SMarc-André Lureau return true;
19450df750e9SMarc-André Lureau }
19460df750e9SMarc-André Lureau
19470df750e9SMarc-André Lureau static bool
vu_set_inflight_fd(VuDev * dev,VhostUserMsg * vmsg)19480df750e9SMarc-André Lureau vu_set_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
19490df750e9SMarc-André Lureau {
19500df750e9SMarc-André Lureau int fd, i;
19510df750e9SMarc-André Lureau uint64_t mmap_size, mmap_offset;
19520df750e9SMarc-André Lureau uint16_t num_queues, queue_size;
19530df750e9SMarc-André Lureau void *rc;
19540df750e9SMarc-André Lureau
19550df750e9SMarc-André Lureau if (vmsg->fd_num != 1 ||
19560df750e9SMarc-André Lureau vmsg->size != sizeof(vmsg->payload.inflight)) {
19570df750e9SMarc-André Lureau vu_panic(dev, "Invalid set_inflight_fd message size:%d fds:%d",
19580df750e9SMarc-André Lureau vmsg->size, vmsg->fd_num);
19590df750e9SMarc-André Lureau return false;
19600df750e9SMarc-André Lureau }
19610df750e9SMarc-André Lureau
19620df750e9SMarc-André Lureau fd = vmsg->fds[0];
19630df750e9SMarc-André Lureau mmap_size = vmsg->payload.inflight.mmap_size;
19640df750e9SMarc-André Lureau mmap_offset = vmsg->payload.inflight.mmap_offset;
19650df750e9SMarc-André Lureau num_queues = vmsg->payload.inflight.num_queues;
19660df750e9SMarc-André Lureau queue_size = vmsg->payload.inflight.queue_size;
19670df750e9SMarc-André Lureau
19680df750e9SMarc-André Lureau DPRINT("set_inflight_fd mmap_size: %"PRId64"\n", mmap_size);
19690df750e9SMarc-André Lureau DPRINT("set_inflight_fd mmap_offset: %"PRId64"\n", mmap_offset);
19700df750e9SMarc-André Lureau DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
19710df750e9SMarc-André Lureau DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
19720df750e9SMarc-André Lureau
19730df750e9SMarc-André Lureau rc = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
19740df750e9SMarc-André Lureau fd, mmap_offset);
19750df750e9SMarc-André Lureau
19760df750e9SMarc-André Lureau if (rc == MAP_FAILED) {
19770df750e9SMarc-André Lureau vu_panic(dev, "set_inflight_fd mmap error: %s", strerror(errno));
19780df750e9SMarc-André Lureau return false;
19790df750e9SMarc-André Lureau }
19800df750e9SMarc-André Lureau
19810df750e9SMarc-André Lureau if (dev->inflight_info.fd) {
19820df750e9SMarc-André Lureau close(dev->inflight_info.fd);
19830df750e9SMarc-André Lureau }
19840df750e9SMarc-André Lureau
19850df750e9SMarc-André Lureau if (dev->inflight_info.addr) {
19860df750e9SMarc-André Lureau munmap(dev->inflight_info.addr, dev->inflight_info.size);
19870df750e9SMarc-André Lureau }
19880df750e9SMarc-André Lureau
19890df750e9SMarc-André Lureau dev->inflight_info.fd = fd;
19900df750e9SMarc-André Lureau dev->inflight_info.addr = rc;
19910df750e9SMarc-André Lureau dev->inflight_info.size = mmap_size;
19920df750e9SMarc-André Lureau
19930df750e9SMarc-André Lureau for (i = 0; i < num_queues; i++) {
19940df750e9SMarc-André Lureau dev->vq[i].inflight = (VuVirtqInflight *)rc;
19950df750e9SMarc-André Lureau dev->vq[i].inflight->desc_num = queue_size;
19960df750e9SMarc-André Lureau rc = (void *)((char *)rc + vu_inflight_queue_size(queue_size));
19970df750e9SMarc-André Lureau }
19980df750e9SMarc-André Lureau
19990df750e9SMarc-André Lureau return false;
20000df750e9SMarc-André Lureau }
20010df750e9SMarc-André Lureau
20020df750e9SMarc-André Lureau static bool
vu_handle_vring_kick(VuDev * dev,VhostUserMsg * vmsg)20030df750e9SMarc-André Lureau vu_handle_vring_kick(VuDev *dev, VhostUserMsg *vmsg)
20040df750e9SMarc-André Lureau {
20050df750e9SMarc-André Lureau unsigned int index = vmsg->payload.state.index;
20060df750e9SMarc-André Lureau
20070df750e9SMarc-André Lureau if (index >= dev->max_queues) {
20080df750e9SMarc-André Lureau vu_panic(dev, "Invalid queue index: %u", index);
20090df750e9SMarc-André Lureau return false;
20100df750e9SMarc-André Lureau }
20110df750e9SMarc-André Lureau
20120df750e9SMarc-André Lureau DPRINT("Got kick message: handler:%p idx:%u\n",
20130df750e9SMarc-André Lureau dev->vq[index].handler, index);
20140df750e9SMarc-André Lureau
20150df750e9SMarc-André Lureau if (!dev->vq[index].started) {
20160df750e9SMarc-André Lureau dev->vq[index].started = true;
20170df750e9SMarc-André Lureau
20180df750e9SMarc-André Lureau if (dev->iface->queue_set_started) {
20190df750e9SMarc-André Lureau dev->iface->queue_set_started(dev, index, true);
20200df750e9SMarc-André Lureau }
20210df750e9SMarc-André Lureau }
20220df750e9SMarc-André Lureau
20230df750e9SMarc-André Lureau if (dev->vq[index].handler) {
20240df750e9SMarc-André Lureau dev->vq[index].handler(dev, index);
20250df750e9SMarc-André Lureau }
20260df750e9SMarc-André Lureau
20270df750e9SMarc-André Lureau return false;
20280df750e9SMarc-André Lureau }
20290df750e9SMarc-André Lureau
vu_handle_get_max_memslots(VuDev * dev,VhostUserMsg * vmsg)20300df750e9SMarc-André Lureau static bool vu_handle_get_max_memslots(VuDev *dev, VhostUserMsg *vmsg)
20310df750e9SMarc-André Lureau {
203269a5daecSKevin Wolf vmsg_set_reply_u64(vmsg, VHOST_USER_MAX_RAM_SLOTS);
20330df750e9SMarc-André Lureau
20340df750e9SMarc-André Lureau DPRINT("u64: 0x%016"PRIx64"\n", (uint64_t) VHOST_USER_MAX_RAM_SLOTS);
20350df750e9SMarc-André Lureau
203669a5daecSKevin Wolf return true;
20370df750e9SMarc-André Lureau }
20380df750e9SMarc-André Lureau
20390df750e9SMarc-André Lureau static bool
vu_process_message(VuDev * dev,VhostUserMsg * vmsg)20400df750e9SMarc-André Lureau vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
20410df750e9SMarc-André Lureau {
20420df750e9SMarc-André Lureau int do_reply = 0;
20430df750e9SMarc-André Lureau
20440df750e9SMarc-André Lureau /* Print out generic part of the request. */
20450df750e9SMarc-André Lureau DPRINT("================ Vhost user message ================\n");
20460df750e9SMarc-André Lureau DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request),
20470df750e9SMarc-André Lureau vmsg->request);
20480df750e9SMarc-André Lureau DPRINT("Flags: 0x%x\n", vmsg->flags);
20490df750e9SMarc-André Lureau DPRINT("Size: %u\n", vmsg->size);
20500df750e9SMarc-André Lureau
20510df750e9SMarc-André Lureau if (vmsg->fd_num) {
20520df750e9SMarc-André Lureau int i;
20530df750e9SMarc-André Lureau DPRINT("Fds:");
20540df750e9SMarc-André Lureau for (i = 0; i < vmsg->fd_num; i++) {
20550df750e9SMarc-André Lureau DPRINT(" %d", vmsg->fds[i]);
20560df750e9SMarc-André Lureau }
20570df750e9SMarc-André Lureau DPRINT("\n");
20580df750e9SMarc-André Lureau }
20590df750e9SMarc-André Lureau
20600df750e9SMarc-André Lureau if (dev->iface->process_msg &&
20610df750e9SMarc-André Lureau dev->iface->process_msg(dev, vmsg, &do_reply)) {
20620df750e9SMarc-André Lureau return do_reply;
20630df750e9SMarc-André Lureau }
20640df750e9SMarc-André Lureau
20650df750e9SMarc-André Lureau switch (vmsg->request) {
20660df750e9SMarc-André Lureau case VHOST_USER_GET_FEATURES:
20670df750e9SMarc-André Lureau return vu_get_features_exec(dev, vmsg);
20680df750e9SMarc-André Lureau case VHOST_USER_SET_FEATURES:
20690df750e9SMarc-André Lureau return vu_set_features_exec(dev, vmsg);
20700df750e9SMarc-André Lureau case VHOST_USER_GET_PROTOCOL_FEATURES:
20710df750e9SMarc-André Lureau return vu_get_protocol_features_exec(dev, vmsg);
20720df750e9SMarc-André Lureau case VHOST_USER_SET_PROTOCOL_FEATURES:
20730df750e9SMarc-André Lureau return vu_set_protocol_features_exec(dev, vmsg);
20740df750e9SMarc-André Lureau case VHOST_USER_SET_OWNER:
20750df750e9SMarc-André Lureau return vu_set_owner_exec(dev, vmsg);
20760df750e9SMarc-André Lureau case VHOST_USER_RESET_OWNER:
20770df750e9SMarc-André Lureau return vu_reset_device_exec(dev, vmsg);
20780df750e9SMarc-André Lureau case VHOST_USER_SET_MEM_TABLE:
20790df750e9SMarc-André Lureau return vu_set_mem_table_exec(dev, vmsg);
20800df750e9SMarc-André Lureau case VHOST_USER_SET_LOG_BASE:
20810df750e9SMarc-André Lureau return vu_set_log_base_exec(dev, vmsg);
20820df750e9SMarc-André Lureau case VHOST_USER_SET_LOG_FD:
20830df750e9SMarc-André Lureau return vu_set_log_fd_exec(dev, vmsg);
20840df750e9SMarc-André Lureau case VHOST_USER_SET_VRING_NUM:
20850df750e9SMarc-André Lureau return vu_set_vring_num_exec(dev, vmsg);
20860df750e9SMarc-André Lureau case VHOST_USER_SET_VRING_ADDR:
20870df750e9SMarc-André Lureau return vu_set_vring_addr_exec(dev, vmsg);
20880df750e9SMarc-André Lureau case VHOST_USER_SET_VRING_BASE:
20890df750e9SMarc-André Lureau return vu_set_vring_base_exec(dev, vmsg);
20900df750e9SMarc-André Lureau case VHOST_USER_GET_VRING_BASE:
20910df750e9SMarc-André Lureau return vu_get_vring_base_exec(dev, vmsg);
20920df750e9SMarc-André Lureau case VHOST_USER_SET_VRING_KICK:
20930df750e9SMarc-André Lureau return vu_set_vring_kick_exec(dev, vmsg);
20940df750e9SMarc-André Lureau case VHOST_USER_SET_VRING_CALL:
20950df750e9SMarc-André Lureau return vu_set_vring_call_exec(dev, vmsg);
20960df750e9SMarc-André Lureau case VHOST_USER_SET_VRING_ERR:
20970df750e9SMarc-André Lureau return vu_set_vring_err_exec(dev, vmsg);
20980df750e9SMarc-André Lureau case VHOST_USER_GET_QUEUE_NUM:
20990df750e9SMarc-André Lureau return vu_get_queue_num_exec(dev, vmsg);
21000df750e9SMarc-André Lureau case VHOST_USER_SET_VRING_ENABLE:
21010df750e9SMarc-André Lureau return vu_set_vring_enable_exec(dev, vmsg);
2102e608feedSMaxime Coquelin case VHOST_USER_SET_BACKEND_REQ_FD:
2103f8ed3648SManos Pitsidianakis return vu_set_backend_req_fd(dev, vmsg);
21040df750e9SMarc-André Lureau case VHOST_USER_GET_CONFIG:
21050df750e9SMarc-André Lureau return vu_get_config(dev, vmsg);
21060df750e9SMarc-André Lureau case VHOST_USER_SET_CONFIG:
21070df750e9SMarc-André Lureau return vu_set_config(dev, vmsg);
21080df750e9SMarc-André Lureau case VHOST_USER_NONE:
21090df750e9SMarc-André Lureau /* if you need processing before exit, override iface->process_msg */
21100df750e9SMarc-André Lureau exit(0);
21110df750e9SMarc-André Lureau case VHOST_USER_POSTCOPY_ADVISE:
21120df750e9SMarc-André Lureau return vu_set_postcopy_advise(dev, vmsg);
21130df750e9SMarc-André Lureau case VHOST_USER_POSTCOPY_LISTEN:
21140df750e9SMarc-André Lureau return vu_set_postcopy_listen(dev, vmsg);
21150df750e9SMarc-André Lureau case VHOST_USER_POSTCOPY_END:
21160df750e9SMarc-André Lureau return vu_set_postcopy_end(dev, vmsg);
21170df750e9SMarc-André Lureau case VHOST_USER_GET_INFLIGHT_FD:
21180df750e9SMarc-André Lureau return vu_get_inflight_fd(dev, vmsg);
21190df750e9SMarc-André Lureau case VHOST_USER_SET_INFLIGHT_FD:
21200df750e9SMarc-André Lureau return vu_set_inflight_fd(dev, vmsg);
21210df750e9SMarc-André Lureau case VHOST_USER_VRING_KICK:
21220df750e9SMarc-André Lureau return vu_handle_vring_kick(dev, vmsg);
21230df750e9SMarc-André Lureau case VHOST_USER_GET_MAX_MEM_SLOTS:
21240df750e9SMarc-André Lureau return vu_handle_get_max_memslots(dev, vmsg);
21250df750e9SMarc-André Lureau case VHOST_USER_ADD_MEM_REG:
21260df750e9SMarc-André Lureau return vu_add_mem_reg(dev, vmsg);
21270df750e9SMarc-André Lureau case VHOST_USER_REM_MEM_REG:
21280df750e9SMarc-André Lureau return vu_rem_mem_reg(dev, vmsg);
2129ce0f3b03SAlbert Esteve case VHOST_USER_GET_SHARED_OBJECT:
2130ce0f3b03SAlbert Esteve return vu_get_shared_object(dev, vmsg);
21310df750e9SMarc-André Lureau default:
21320df750e9SMarc-André Lureau vmsg_close_fds(vmsg);
21330df750e9SMarc-André Lureau vu_panic(dev, "Unhandled request: %d", vmsg->request);
21340df750e9SMarc-André Lureau }
21350df750e9SMarc-André Lureau
21360df750e9SMarc-André Lureau return false;
21370df750e9SMarc-André Lureau }
21380df750e9SMarc-André Lureau
21390df750e9SMarc-André Lureau bool
vu_dispatch(VuDev * dev)21400df750e9SMarc-André Lureau vu_dispatch(VuDev *dev)
21410df750e9SMarc-André Lureau {
21420df750e9SMarc-André Lureau VhostUserMsg vmsg = { 0, };
21430df750e9SMarc-André Lureau int reply_requested;
21440df750e9SMarc-André Lureau bool need_reply, success = false;
21450df750e9SMarc-André Lureau
21460df750e9SMarc-André Lureau if (!dev->read_msg(dev, dev->sock, &vmsg)) {
21470df750e9SMarc-André Lureau goto end;
21480df750e9SMarc-André Lureau }
21490df750e9SMarc-André Lureau
21500df750e9SMarc-André Lureau need_reply = vmsg.flags & VHOST_USER_NEED_REPLY_MASK;
21510df750e9SMarc-André Lureau
21520df750e9SMarc-André Lureau reply_requested = vu_process_message(dev, &vmsg);
21530df750e9SMarc-André Lureau if (!reply_requested && need_reply) {
21540df750e9SMarc-André Lureau vmsg_set_reply_u64(&vmsg, 0);
21550df750e9SMarc-André Lureau reply_requested = 1;
21560df750e9SMarc-André Lureau }
21570df750e9SMarc-André Lureau
21580df750e9SMarc-André Lureau if (!reply_requested) {
21590df750e9SMarc-André Lureau success = true;
21600df750e9SMarc-André Lureau goto end;
21610df750e9SMarc-André Lureau }
21620df750e9SMarc-André Lureau
21630df750e9SMarc-André Lureau if (!vu_send_reply(dev, dev->sock, &vmsg)) {
21640df750e9SMarc-André Lureau goto end;
21650df750e9SMarc-André Lureau }
21660df750e9SMarc-André Lureau
21670df750e9SMarc-André Lureau success = true;
21680df750e9SMarc-André Lureau
21690df750e9SMarc-André Lureau end:
21700df750e9SMarc-André Lureau free(vmsg.data);
21710df750e9SMarc-André Lureau return success;
21720df750e9SMarc-André Lureau }
21730df750e9SMarc-André Lureau
21740df750e9SMarc-André Lureau void
vu_deinit(VuDev * dev)21750df750e9SMarc-André Lureau vu_deinit(VuDev *dev)
21760df750e9SMarc-André Lureau {
217792bf2461SMarcel Holtmann unsigned int i;
21780df750e9SMarc-André Lureau
2179bec58209SDavid Hildenbrand vu_remove_all_mem_regs(dev);
21800df750e9SMarc-André Lureau
21810df750e9SMarc-André Lureau for (i = 0; i < dev->max_queues; i++) {
21820df750e9SMarc-André Lureau VuVirtq *vq = &dev->vq[i];
21830df750e9SMarc-André Lureau
21840df750e9SMarc-André Lureau if (vq->call_fd != -1) {
21850df750e9SMarc-André Lureau close(vq->call_fd);
21860df750e9SMarc-André Lureau vq->call_fd = -1;
21870df750e9SMarc-André Lureau }
21880df750e9SMarc-André Lureau
21890df750e9SMarc-André Lureau if (vq->kick_fd != -1) {
21900df750e9SMarc-André Lureau dev->remove_watch(dev, vq->kick_fd);
21910df750e9SMarc-André Lureau close(vq->kick_fd);
21920df750e9SMarc-André Lureau vq->kick_fd = -1;
21930df750e9SMarc-André Lureau }
21940df750e9SMarc-André Lureau
21950df750e9SMarc-André Lureau if (vq->err_fd != -1) {
21960df750e9SMarc-André Lureau close(vq->err_fd);
21970df750e9SMarc-André Lureau vq->err_fd = -1;
21980df750e9SMarc-André Lureau }
21990df750e9SMarc-André Lureau
22000df750e9SMarc-André Lureau if (vq->resubmit_list) {
22010df750e9SMarc-André Lureau free(vq->resubmit_list);
22020df750e9SMarc-André Lureau vq->resubmit_list = NULL;
22030df750e9SMarc-André Lureau }
22040df750e9SMarc-André Lureau
22050df750e9SMarc-André Lureau vq->inflight = NULL;
22060df750e9SMarc-André Lureau }
22070df750e9SMarc-André Lureau
22080df750e9SMarc-André Lureau if (dev->inflight_info.addr) {
22090df750e9SMarc-André Lureau munmap(dev->inflight_info.addr, dev->inflight_info.size);
22100df750e9SMarc-André Lureau dev->inflight_info.addr = NULL;
22110df750e9SMarc-André Lureau }
22120df750e9SMarc-André Lureau
22130df750e9SMarc-André Lureau if (dev->inflight_info.fd > 0) {
22140df750e9SMarc-André Lureau close(dev->inflight_info.fd);
22150df750e9SMarc-André Lureau dev->inflight_info.fd = -1;
22160df750e9SMarc-André Lureau }
22170df750e9SMarc-André Lureau
22180df750e9SMarc-André Lureau vu_close_log(dev);
2219f8ed3648SManos Pitsidianakis if (dev->backend_fd != -1) {
2220f8ed3648SManos Pitsidianakis close(dev->backend_fd);
2221f8ed3648SManos Pitsidianakis dev->backend_fd = -1;
22220df750e9SMarc-André Lureau }
2223f8ed3648SManos Pitsidianakis pthread_mutex_destroy(&dev->backend_mutex);
22240df750e9SMarc-André Lureau
22250df750e9SMarc-André Lureau if (dev->sock != -1) {
22260df750e9SMarc-André Lureau close(dev->sock);
22270df750e9SMarc-André Lureau }
22280df750e9SMarc-André Lureau
22290df750e9SMarc-André Lureau free(dev->vq);
22300df750e9SMarc-André Lureau dev->vq = NULL;
2231d884e272SDavid Hildenbrand free(dev->regions);
2232d884e272SDavid Hildenbrand dev->regions = NULL;
22330df750e9SMarc-André Lureau }
22340df750e9SMarc-André Lureau
22350df750e9SMarc-André Lureau bool
vu_init(VuDev * dev,uint16_t max_queues,int socket,vu_panic_cb panic,vu_read_msg_cb read_msg,vu_set_watch_cb set_watch,vu_remove_watch_cb remove_watch,const VuDevIface * iface)22360df750e9SMarc-André Lureau vu_init(VuDev *dev,
22370df750e9SMarc-André Lureau uint16_t max_queues,
22380df750e9SMarc-André Lureau int socket,
22390df750e9SMarc-André Lureau vu_panic_cb panic,
22400df750e9SMarc-André Lureau vu_read_msg_cb read_msg,
22410df750e9SMarc-André Lureau vu_set_watch_cb set_watch,
22420df750e9SMarc-André Lureau vu_remove_watch_cb remove_watch,
22430df750e9SMarc-André Lureau const VuDevIface *iface)
22440df750e9SMarc-André Lureau {
22450df750e9SMarc-André Lureau uint16_t i;
22460df750e9SMarc-André Lureau
22470df750e9SMarc-André Lureau assert(max_queues > 0);
22480df750e9SMarc-André Lureau assert(socket >= 0);
22490df750e9SMarc-André Lureau assert(set_watch);
22500df750e9SMarc-André Lureau assert(remove_watch);
22510df750e9SMarc-André Lureau assert(iface);
22520df750e9SMarc-André Lureau assert(panic);
22530df750e9SMarc-André Lureau
22540df750e9SMarc-André Lureau memset(dev, 0, sizeof(*dev));
22550df750e9SMarc-André Lureau
22560df750e9SMarc-André Lureau dev->sock = socket;
22570df750e9SMarc-André Lureau dev->panic = panic;
22580df750e9SMarc-André Lureau dev->read_msg = read_msg ? read_msg : vu_message_read_default;
22590df750e9SMarc-André Lureau dev->set_watch = set_watch;
22600df750e9SMarc-André Lureau dev->remove_watch = remove_watch;
22610df750e9SMarc-André Lureau dev->iface = iface;
22620df750e9SMarc-André Lureau dev->log_call_fd = -1;
2263f8ed3648SManos Pitsidianakis pthread_mutex_init(&dev->backend_mutex, NULL);
2264f8ed3648SManos Pitsidianakis dev->backend_fd = -1;
22650df750e9SMarc-André Lureau dev->max_queues = max_queues;
22660df750e9SMarc-André Lureau
2267d884e272SDavid Hildenbrand dev->regions = malloc(VHOST_USER_MAX_RAM_SLOTS * sizeof(dev->regions[0]));
2268d884e272SDavid Hildenbrand if (!dev->regions) {
2269d884e272SDavid Hildenbrand DPRINT("%s: failed to malloc mem regions\n", __func__);
2270d884e272SDavid Hildenbrand return false;
2271d884e272SDavid Hildenbrand }
2272d884e272SDavid Hildenbrand
22730df750e9SMarc-André Lureau dev->vq = malloc(max_queues * sizeof(dev->vq[0]));
22740df750e9SMarc-André Lureau if (!dev->vq) {
22750df750e9SMarc-André Lureau DPRINT("%s: failed to malloc virtqueues\n", __func__);
2276d884e272SDavid Hildenbrand free(dev->regions);
2277d884e272SDavid Hildenbrand dev->regions = NULL;
22780df750e9SMarc-André Lureau return false;
22790df750e9SMarc-André Lureau }
22800df750e9SMarc-André Lureau
22810df750e9SMarc-André Lureau for (i = 0; i < max_queues; i++) {
22820df750e9SMarc-André Lureau dev->vq[i] = (VuVirtq) {
22830df750e9SMarc-André Lureau .call_fd = -1, .kick_fd = -1, .err_fd = -1,
22840df750e9SMarc-André Lureau .notification = true,
22850df750e9SMarc-André Lureau };
22860df750e9SMarc-André Lureau }
22870df750e9SMarc-André Lureau
22880df750e9SMarc-André Lureau return true;
22890df750e9SMarc-André Lureau }
22900df750e9SMarc-André Lureau
22910df750e9SMarc-André Lureau VuVirtq *
vu_get_queue(VuDev * dev,int qidx)22920df750e9SMarc-André Lureau vu_get_queue(VuDev *dev, int qidx)
22930df750e9SMarc-André Lureau {
22940df750e9SMarc-André Lureau assert(qidx < dev->max_queues);
22950df750e9SMarc-André Lureau return &dev->vq[qidx];
22960df750e9SMarc-André Lureau }
22970df750e9SMarc-André Lureau
22980df750e9SMarc-André Lureau bool
vu_queue_enabled(VuDev * dev,VuVirtq * vq)22990df750e9SMarc-André Lureau vu_queue_enabled(VuDev *dev, VuVirtq *vq)
23000df750e9SMarc-André Lureau {
23010df750e9SMarc-André Lureau return vq->enable;
23020df750e9SMarc-André Lureau }
23030df750e9SMarc-André Lureau
23040df750e9SMarc-André Lureau bool
vu_queue_started(const VuDev * dev,const VuVirtq * vq)23050df750e9SMarc-André Lureau vu_queue_started(const VuDev *dev, const VuVirtq *vq)
23060df750e9SMarc-André Lureau {
23070df750e9SMarc-André Lureau return vq->started;
23080df750e9SMarc-André Lureau }
23090df750e9SMarc-André Lureau
23100df750e9SMarc-André Lureau static inline uint16_t
vring_avail_flags(VuVirtq * vq)23110df750e9SMarc-André Lureau vring_avail_flags(VuVirtq *vq)
23120df750e9SMarc-André Lureau {
23130df750e9SMarc-André Lureau return le16toh(vq->vring.avail->flags);
23140df750e9SMarc-André Lureau }
23150df750e9SMarc-André Lureau
23160df750e9SMarc-André Lureau static inline uint16_t
vring_avail_idx(VuVirtq * vq)23170df750e9SMarc-André Lureau vring_avail_idx(VuVirtq *vq)
23180df750e9SMarc-André Lureau {
23190df750e9SMarc-André Lureau vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
23200df750e9SMarc-André Lureau
23210df750e9SMarc-André Lureau return vq->shadow_avail_idx;
23220df750e9SMarc-André Lureau }
23230df750e9SMarc-André Lureau
23240df750e9SMarc-André Lureau static inline uint16_t
vring_avail_ring(VuVirtq * vq,int i)23250df750e9SMarc-André Lureau vring_avail_ring(VuVirtq *vq, int i)
23260df750e9SMarc-André Lureau {
23270df750e9SMarc-André Lureau return le16toh(vq->vring.avail->ring[i]);
23280df750e9SMarc-André Lureau }
23290df750e9SMarc-André Lureau
23300df750e9SMarc-André Lureau static inline uint16_t
vring_get_used_event(VuVirtq * vq)23310df750e9SMarc-André Lureau vring_get_used_event(VuVirtq *vq)
23320df750e9SMarc-André Lureau {
23330df750e9SMarc-André Lureau return vring_avail_ring(vq, vq->vring.num);
23340df750e9SMarc-André Lureau }
23350df750e9SMarc-André Lureau
23360df750e9SMarc-André Lureau static int
virtqueue_num_heads(VuDev * dev,VuVirtq * vq,unsigned int idx)23370df750e9SMarc-André Lureau virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
23380df750e9SMarc-André Lureau {
23390df750e9SMarc-André Lureau uint16_t num_heads = vring_avail_idx(vq) - idx;
23400df750e9SMarc-André Lureau
23410df750e9SMarc-André Lureau /* Check it isn't doing very strange things with descriptor numbers. */
23420df750e9SMarc-André Lureau if (num_heads > vq->vring.num) {
23430df750e9SMarc-André Lureau vu_panic(dev, "Guest moved used index from %u to %u",
23440df750e9SMarc-André Lureau idx, vq->shadow_avail_idx);
23450df750e9SMarc-André Lureau return -1;
23460df750e9SMarc-André Lureau }
23470df750e9SMarc-André Lureau if (num_heads) {
23480df750e9SMarc-André Lureau /* On success, callers read a descriptor at vq->last_avail_idx.
23490df750e9SMarc-André Lureau * Make sure descriptor read does not bypass avail index read. */
23500df750e9SMarc-André Lureau smp_rmb();
23510df750e9SMarc-André Lureau }
23520df750e9SMarc-André Lureau
23530df750e9SMarc-André Lureau return num_heads;
23540df750e9SMarc-André Lureau }
23550df750e9SMarc-André Lureau
23560df750e9SMarc-André Lureau static bool
virtqueue_get_head(VuDev * dev,VuVirtq * vq,unsigned int idx,unsigned int * head)23570df750e9SMarc-André Lureau virtqueue_get_head(VuDev *dev, VuVirtq *vq,
23580df750e9SMarc-André Lureau unsigned int idx, unsigned int *head)
23590df750e9SMarc-André Lureau {
23600df750e9SMarc-André Lureau /* Grab the next descriptor number they're advertising, and increment
23610df750e9SMarc-André Lureau * the index we've seen. */
23620df750e9SMarc-André Lureau *head = vring_avail_ring(vq, idx % vq->vring.num);
23630df750e9SMarc-André Lureau
23640df750e9SMarc-André Lureau /* If their number is silly, that's a fatal mistake. */
23650df750e9SMarc-André Lureau if (*head >= vq->vring.num) {
23660df750e9SMarc-André Lureau vu_panic(dev, "Guest says index %u is available", *head);
23670df750e9SMarc-André Lureau return false;
23680df750e9SMarc-André Lureau }
23690df750e9SMarc-André Lureau
23700df750e9SMarc-André Lureau return true;
23710df750e9SMarc-André Lureau }
23720df750e9SMarc-André Lureau
23730df750e9SMarc-André Lureau static int
virtqueue_read_indirect_desc(VuDev * dev,struct vring_desc * desc,uint64_t addr,size_t len)23740df750e9SMarc-André Lureau virtqueue_read_indirect_desc(VuDev *dev, struct vring_desc *desc,
23750df750e9SMarc-André Lureau uint64_t addr, size_t len)
23760df750e9SMarc-André Lureau {
23770df750e9SMarc-André Lureau struct vring_desc *ori_desc;
23780df750e9SMarc-André Lureau uint64_t read_len;
23790df750e9SMarc-André Lureau
23800df750e9SMarc-André Lureau if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc))) {
23810df750e9SMarc-André Lureau return -1;
23820df750e9SMarc-André Lureau }
23830df750e9SMarc-André Lureau
23840df750e9SMarc-André Lureau if (len == 0) {
23850df750e9SMarc-André Lureau return -1;
23860df750e9SMarc-André Lureau }
23870df750e9SMarc-André Lureau
23880df750e9SMarc-André Lureau while (len) {
23890df750e9SMarc-André Lureau read_len = len;
23900df750e9SMarc-André Lureau ori_desc = vu_gpa_to_va(dev, &read_len, addr);
23910df750e9SMarc-André Lureau if (!ori_desc) {
23920df750e9SMarc-André Lureau return -1;
23930df750e9SMarc-André Lureau }
23940df750e9SMarc-André Lureau
23950df750e9SMarc-André Lureau memcpy(desc, ori_desc, read_len);
23960df750e9SMarc-André Lureau len -= read_len;
23970df750e9SMarc-André Lureau addr += read_len;
23980df750e9SMarc-André Lureau desc += read_len;
23990df750e9SMarc-André Lureau }
24000df750e9SMarc-André Lureau
24010df750e9SMarc-André Lureau return 0;
24020df750e9SMarc-André Lureau }
24030df750e9SMarc-André Lureau
24040df750e9SMarc-André Lureau enum {
24050df750e9SMarc-André Lureau VIRTQUEUE_READ_DESC_ERROR = -1,
24060df750e9SMarc-André Lureau VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
24070df750e9SMarc-André Lureau VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
24080df750e9SMarc-André Lureau };
24090df750e9SMarc-André Lureau
24100df750e9SMarc-André Lureau static int
virtqueue_read_next_desc(VuDev * dev,struct vring_desc * desc,int i,unsigned int max,unsigned int * next)24110df750e9SMarc-André Lureau virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
24120df750e9SMarc-André Lureau int i, unsigned int max, unsigned int *next)
24130df750e9SMarc-André Lureau {
24140df750e9SMarc-André Lureau /* If this descriptor says it doesn't chain, we're done. */
24150df750e9SMarc-André Lureau if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT)) {
24160df750e9SMarc-André Lureau return VIRTQUEUE_READ_DESC_DONE;
24170df750e9SMarc-André Lureau }
24180df750e9SMarc-André Lureau
24190df750e9SMarc-André Lureau /* Check they're not leading us off end of descriptors. */
24200df750e9SMarc-André Lureau *next = le16toh(desc[i].next);
24210df750e9SMarc-André Lureau /* Make sure compiler knows to grab that: we don't want it changing! */
24220df750e9SMarc-André Lureau smp_wmb();
24230df750e9SMarc-André Lureau
24240df750e9SMarc-André Lureau if (*next >= max) {
24250df750e9SMarc-André Lureau vu_panic(dev, "Desc next is %u", *next);
24260df750e9SMarc-André Lureau return VIRTQUEUE_READ_DESC_ERROR;
24270df750e9SMarc-André Lureau }
24280df750e9SMarc-André Lureau
24290df750e9SMarc-André Lureau return VIRTQUEUE_READ_DESC_MORE;
24300df750e9SMarc-André Lureau }
24310df750e9SMarc-André Lureau
24320df750e9SMarc-André Lureau void
vu_queue_get_avail_bytes(VuDev * dev,VuVirtq * vq,unsigned int * in_bytes,unsigned int * out_bytes,unsigned max_in_bytes,unsigned max_out_bytes)24330df750e9SMarc-André Lureau vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
24340df750e9SMarc-André Lureau unsigned int *out_bytes,
24350df750e9SMarc-André Lureau unsigned max_in_bytes, unsigned max_out_bytes)
24360df750e9SMarc-André Lureau {
24370df750e9SMarc-André Lureau unsigned int idx;
24380df750e9SMarc-André Lureau unsigned int total_bufs, in_total, out_total;
24390df750e9SMarc-André Lureau int rc;
24400df750e9SMarc-André Lureau
24410df750e9SMarc-André Lureau idx = vq->last_avail_idx;
24420df750e9SMarc-André Lureau
24430df750e9SMarc-André Lureau total_bufs = in_total = out_total = 0;
24442a290227SDavid Hildenbrand if (!vu_is_vq_usable(dev, vq)) {
24450df750e9SMarc-André Lureau goto done;
24460df750e9SMarc-André Lureau }
24470df750e9SMarc-André Lureau
24480df750e9SMarc-André Lureau while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
24490df750e9SMarc-André Lureau unsigned int max, desc_len, num_bufs, indirect = 0;
24500df750e9SMarc-André Lureau uint64_t desc_addr, read_len;
24510df750e9SMarc-André Lureau struct vring_desc *desc;
24520df750e9SMarc-André Lureau struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
24530df750e9SMarc-André Lureau unsigned int i;
24540df750e9SMarc-André Lureau
24550df750e9SMarc-André Lureau max = vq->vring.num;
24560df750e9SMarc-André Lureau num_bufs = total_bufs;
24570df750e9SMarc-André Lureau if (!virtqueue_get_head(dev, vq, idx++, &i)) {
24580df750e9SMarc-André Lureau goto err;
24590df750e9SMarc-André Lureau }
24600df750e9SMarc-André Lureau desc = vq->vring.desc;
24610df750e9SMarc-André Lureau
24620df750e9SMarc-André Lureau if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
24630df750e9SMarc-André Lureau if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
24640df750e9SMarc-André Lureau vu_panic(dev, "Invalid size for indirect buffer table");
24650df750e9SMarc-André Lureau goto err;
24660df750e9SMarc-André Lureau }
24670df750e9SMarc-André Lureau
24680df750e9SMarc-André Lureau /* If we've got too many, that implies a descriptor loop. */
24690df750e9SMarc-André Lureau if (num_bufs >= max) {
24700df750e9SMarc-André Lureau vu_panic(dev, "Looped descriptor");
24710df750e9SMarc-André Lureau goto err;
24720df750e9SMarc-André Lureau }
24730df750e9SMarc-André Lureau
24740df750e9SMarc-André Lureau /* loop over the indirect descriptor table */
24750df750e9SMarc-André Lureau indirect = 1;
24760df750e9SMarc-André Lureau desc_addr = le64toh(desc[i].addr);
24770df750e9SMarc-André Lureau desc_len = le32toh(desc[i].len);
24780df750e9SMarc-André Lureau max = desc_len / sizeof(struct vring_desc);
24790df750e9SMarc-André Lureau read_len = desc_len;
24800df750e9SMarc-André Lureau desc = vu_gpa_to_va(dev, &read_len, desc_addr);
24810df750e9SMarc-André Lureau if (unlikely(desc && read_len != desc_len)) {
24820df750e9SMarc-André Lureau /* Failed to use zero copy */
24830df750e9SMarc-André Lureau desc = NULL;
24840df750e9SMarc-André Lureau if (!virtqueue_read_indirect_desc(dev, desc_buf,
24850df750e9SMarc-André Lureau desc_addr,
24860df750e9SMarc-André Lureau desc_len)) {
24870df750e9SMarc-André Lureau desc = desc_buf;
24880df750e9SMarc-André Lureau }
24890df750e9SMarc-André Lureau }
24900df750e9SMarc-André Lureau if (!desc) {
24910df750e9SMarc-André Lureau vu_panic(dev, "Invalid indirect buffer table");
24920df750e9SMarc-André Lureau goto err;
24930df750e9SMarc-André Lureau }
24940df750e9SMarc-André Lureau num_bufs = i = 0;
24950df750e9SMarc-André Lureau }
24960df750e9SMarc-André Lureau
24970df750e9SMarc-André Lureau do {
24980df750e9SMarc-André Lureau /* If we've got too many, that implies a descriptor loop. */
24990df750e9SMarc-André Lureau if (++num_bufs > max) {
25000df750e9SMarc-André Lureau vu_panic(dev, "Looped descriptor");
25010df750e9SMarc-André Lureau goto err;
25020df750e9SMarc-André Lureau }
25030df750e9SMarc-André Lureau
25040df750e9SMarc-André Lureau if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
25050df750e9SMarc-André Lureau in_total += le32toh(desc[i].len);
25060df750e9SMarc-André Lureau } else {
25070df750e9SMarc-André Lureau out_total += le32toh(desc[i].len);
25080df750e9SMarc-André Lureau }
25090df750e9SMarc-André Lureau if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
25100df750e9SMarc-André Lureau goto done;
25110df750e9SMarc-André Lureau }
25120df750e9SMarc-André Lureau rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
25130df750e9SMarc-André Lureau } while (rc == VIRTQUEUE_READ_DESC_MORE);
25140df750e9SMarc-André Lureau
25150df750e9SMarc-André Lureau if (rc == VIRTQUEUE_READ_DESC_ERROR) {
25160df750e9SMarc-André Lureau goto err;
25170df750e9SMarc-André Lureau }
25180df750e9SMarc-André Lureau
25190df750e9SMarc-André Lureau if (!indirect) {
25200df750e9SMarc-André Lureau total_bufs = num_bufs;
25210df750e9SMarc-André Lureau } else {
25220df750e9SMarc-André Lureau total_bufs++;
25230df750e9SMarc-André Lureau }
25240df750e9SMarc-André Lureau }
25250df750e9SMarc-André Lureau if (rc < 0) {
25260df750e9SMarc-André Lureau goto err;
25270df750e9SMarc-André Lureau }
25280df750e9SMarc-André Lureau done:
25290df750e9SMarc-André Lureau if (in_bytes) {
25300df750e9SMarc-André Lureau *in_bytes = in_total;
25310df750e9SMarc-André Lureau }
25320df750e9SMarc-André Lureau if (out_bytes) {
25330df750e9SMarc-André Lureau *out_bytes = out_total;
25340df750e9SMarc-André Lureau }
25350df750e9SMarc-André Lureau return;
25360df750e9SMarc-André Lureau
25370df750e9SMarc-André Lureau err:
25380df750e9SMarc-André Lureau in_total = out_total = 0;
25390df750e9SMarc-André Lureau goto done;
25400df750e9SMarc-André Lureau }
25410df750e9SMarc-André Lureau
25420df750e9SMarc-André Lureau bool
vu_queue_avail_bytes(VuDev * dev,VuVirtq * vq,unsigned int in_bytes,unsigned int out_bytes)25430df750e9SMarc-André Lureau vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
25440df750e9SMarc-André Lureau unsigned int out_bytes)
25450df750e9SMarc-André Lureau {
25460df750e9SMarc-André Lureau unsigned int in_total, out_total;
25470df750e9SMarc-André Lureau
25480df750e9SMarc-André Lureau vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total,
25490df750e9SMarc-André Lureau in_bytes, out_bytes);
25500df750e9SMarc-André Lureau
25510df750e9SMarc-André Lureau return in_bytes <= in_total && out_bytes <= out_total;
25520df750e9SMarc-André Lureau }
25530df750e9SMarc-André Lureau
25540df750e9SMarc-André Lureau /* Fetch avail_idx from VQ memory only when we really need to know if
25550df750e9SMarc-André Lureau * guest has added some buffers. */
25560df750e9SMarc-André Lureau bool
vu_queue_empty(VuDev * dev,VuVirtq * vq)25570df750e9SMarc-André Lureau vu_queue_empty(VuDev *dev, VuVirtq *vq)
25580df750e9SMarc-André Lureau {
25592a290227SDavid Hildenbrand if (!vu_is_vq_usable(dev, vq)) {
25600df750e9SMarc-André Lureau return true;
25610df750e9SMarc-André Lureau }
25620df750e9SMarc-André Lureau
25630df750e9SMarc-André Lureau if (vq->shadow_avail_idx != vq->last_avail_idx) {
25640df750e9SMarc-André Lureau return false;
25650df750e9SMarc-André Lureau }
25660df750e9SMarc-André Lureau
25670df750e9SMarc-André Lureau return vring_avail_idx(vq) == vq->last_avail_idx;
25680df750e9SMarc-André Lureau }
25690df750e9SMarc-André Lureau
25700df750e9SMarc-André Lureau static bool
vring_notify(VuDev * dev,VuVirtq * vq)25710df750e9SMarc-André Lureau vring_notify(VuDev *dev, VuVirtq *vq)
25720df750e9SMarc-André Lureau {
25730df750e9SMarc-André Lureau uint16_t old, new;
25740df750e9SMarc-André Lureau bool v;
25750df750e9SMarc-André Lureau
25760df750e9SMarc-André Lureau /* We need to expose used array entries before checking used event. */
25770df750e9SMarc-André Lureau smp_mb();
25780df750e9SMarc-André Lureau
25790df750e9SMarc-André Lureau /* Always notify when queue is empty (when feature acknowledge) */
25800df750e9SMarc-André Lureau if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
25810df750e9SMarc-André Lureau !vq->inuse && vu_queue_empty(dev, vq)) {
25820df750e9SMarc-André Lureau return true;
25830df750e9SMarc-André Lureau }
25840df750e9SMarc-André Lureau
25850df750e9SMarc-André Lureau if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
25860df750e9SMarc-André Lureau return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
25870df750e9SMarc-André Lureau }
25880df750e9SMarc-André Lureau
25890df750e9SMarc-André Lureau v = vq->signalled_used_valid;
25900df750e9SMarc-André Lureau vq->signalled_used_valid = true;
25910df750e9SMarc-André Lureau old = vq->signalled_used;
25920df750e9SMarc-André Lureau new = vq->signalled_used = vq->used_idx;
25930df750e9SMarc-André Lureau return !v || vring_need_event(vring_get_used_event(vq), new, old);
25940df750e9SMarc-André Lureau }
25950df750e9SMarc-André Lureau
_vu_queue_notify(VuDev * dev,VuVirtq * vq,bool sync)25960df750e9SMarc-André Lureau static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
25970df750e9SMarc-André Lureau {
25982a290227SDavid Hildenbrand if (!vu_is_vq_usable(dev, vq)) {
25990df750e9SMarc-André Lureau return;
26000df750e9SMarc-André Lureau }
26010df750e9SMarc-André Lureau
26020df750e9SMarc-André Lureau if (!vring_notify(dev, vq)) {
26030df750e9SMarc-André Lureau DPRINT("skipped notify...\n");
26040df750e9SMarc-André Lureau return;
26050df750e9SMarc-André Lureau }
26060df750e9SMarc-André Lureau
26070df750e9SMarc-André Lureau if (vq->call_fd < 0 &&
26080df750e9SMarc-André Lureau vu_has_protocol_feature(dev,
26090df750e9SMarc-André Lureau VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
2610e608feedSMaxime Coquelin vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_BACKEND_REQ)) {
26110df750e9SMarc-André Lureau VhostUserMsg vmsg = {
2612e608feedSMaxime Coquelin .request = VHOST_USER_BACKEND_VRING_CALL,
26130df750e9SMarc-André Lureau .flags = VHOST_USER_VERSION,
26140df750e9SMarc-André Lureau .size = sizeof(vmsg.payload.state),
26150df750e9SMarc-André Lureau .payload.state = {
26160df750e9SMarc-André Lureau .index = vq - dev->vq,
26170df750e9SMarc-André Lureau },
26180df750e9SMarc-André Lureau };
26190df750e9SMarc-André Lureau bool ack = sync &&
26200df750e9SMarc-André Lureau vu_has_protocol_feature(dev,
26210df750e9SMarc-André Lureau VHOST_USER_PROTOCOL_F_REPLY_ACK);
26220df750e9SMarc-André Lureau
26230df750e9SMarc-André Lureau if (ack) {
26240df750e9SMarc-André Lureau vmsg.flags |= VHOST_USER_NEED_REPLY_MASK;
26250df750e9SMarc-André Lureau }
26260df750e9SMarc-André Lureau
2627f8ed3648SManos Pitsidianakis vu_message_write(dev, dev->backend_fd, &vmsg);
26280df750e9SMarc-André Lureau if (ack) {
2629f8ed3648SManos Pitsidianakis vu_message_read_default(dev, dev->backend_fd, &vmsg);
26300df750e9SMarc-André Lureau }
26310df750e9SMarc-André Lureau return;
26320df750e9SMarc-André Lureau }
26330df750e9SMarc-André Lureau
26340df750e9SMarc-André Lureau if (eventfd_write(vq->call_fd, 1) < 0) {
26350df750e9SMarc-André Lureau vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
26360df750e9SMarc-André Lureau }
26370df750e9SMarc-André Lureau }
26380df750e9SMarc-André Lureau
vu_queue_notify(VuDev * dev,VuVirtq * vq)26390df750e9SMarc-André Lureau void vu_queue_notify(VuDev *dev, VuVirtq *vq)
26400df750e9SMarc-André Lureau {
26410df750e9SMarc-André Lureau _vu_queue_notify(dev, vq, false);
26420df750e9SMarc-André Lureau }
26430df750e9SMarc-André Lureau
vu_queue_notify_sync(VuDev * dev,VuVirtq * vq)26440df750e9SMarc-André Lureau void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq)
26450df750e9SMarc-André Lureau {
26460df750e9SMarc-André Lureau _vu_queue_notify(dev, vq, true);
26470df750e9SMarc-André Lureau }
26480df750e9SMarc-André Lureau
vu_config_change_msg(VuDev * dev)2649ca858a5fSVladimir Sementsov-Ogievskiy void vu_config_change_msg(VuDev *dev)
2650ca858a5fSVladimir Sementsov-Ogievskiy {
2651ca858a5fSVladimir Sementsov-Ogievskiy VhostUserMsg vmsg = {
2652ca858a5fSVladimir Sementsov-Ogievskiy .request = VHOST_USER_BACKEND_CONFIG_CHANGE_MSG,
2653ca858a5fSVladimir Sementsov-Ogievskiy .flags = VHOST_USER_VERSION,
2654ca858a5fSVladimir Sementsov-Ogievskiy };
2655ca858a5fSVladimir Sementsov-Ogievskiy
2656f8ed3648SManos Pitsidianakis vu_message_write(dev, dev->backend_fd, &vmsg);
2657ca858a5fSVladimir Sementsov-Ogievskiy }
2658ca858a5fSVladimir Sementsov-Ogievskiy
26590df750e9SMarc-André Lureau static inline void
vring_used_flags_set_bit(VuVirtq * vq,int mask)26600df750e9SMarc-André Lureau vring_used_flags_set_bit(VuVirtq *vq, int mask)
26610df750e9SMarc-André Lureau {
26620df750e9SMarc-André Lureau uint16_t *flags;
26630df750e9SMarc-André Lureau
26640df750e9SMarc-André Lureau flags = (uint16_t *)((char*)vq->vring.used +
26650df750e9SMarc-André Lureau offsetof(struct vring_used, flags));
26660df750e9SMarc-André Lureau *flags = htole16(le16toh(*flags) | mask);
26670df750e9SMarc-André Lureau }
26680df750e9SMarc-André Lureau
26690df750e9SMarc-André Lureau static inline void
vring_used_flags_unset_bit(VuVirtq * vq,int mask)26700df750e9SMarc-André Lureau vring_used_flags_unset_bit(VuVirtq *vq, int mask)
26710df750e9SMarc-André Lureau {
26720df750e9SMarc-André Lureau uint16_t *flags;
26730df750e9SMarc-André Lureau
26740df750e9SMarc-André Lureau flags = (uint16_t *)((char*)vq->vring.used +
26750df750e9SMarc-André Lureau offsetof(struct vring_used, flags));
26760df750e9SMarc-André Lureau *flags = htole16(le16toh(*flags) & ~mask);
26770df750e9SMarc-André Lureau }
26780df750e9SMarc-André Lureau
26790df750e9SMarc-André Lureau static inline void
vring_set_avail_event(VuVirtq * vq,uint16_t val)26800df750e9SMarc-André Lureau vring_set_avail_event(VuVirtq *vq, uint16_t val)
26810df750e9SMarc-André Lureau {
2682950a2f2eSMarcel Holtmann uint16_t val_le = htole16(val);
26830df750e9SMarc-André Lureau
26840df750e9SMarc-André Lureau if (!vq->notification) {
26850df750e9SMarc-André Lureau return;
26860df750e9SMarc-André Lureau }
26870df750e9SMarc-André Lureau
2688950a2f2eSMarcel Holtmann memcpy(&vq->vring.used->ring[vq->vring.num], &val_le, sizeof(uint16_t));
26890df750e9SMarc-André Lureau }
26900df750e9SMarc-André Lureau
26910df750e9SMarc-André Lureau void
vu_queue_set_notification(VuDev * dev,VuVirtq * vq,int enable)26920df750e9SMarc-André Lureau vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
26930df750e9SMarc-André Lureau {
26940df750e9SMarc-André Lureau vq->notification = enable;
26950df750e9SMarc-André Lureau if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
26960df750e9SMarc-André Lureau vring_set_avail_event(vq, vring_avail_idx(vq));
26970df750e9SMarc-André Lureau } else if (enable) {
26980df750e9SMarc-André Lureau vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
26990df750e9SMarc-André Lureau } else {
27000df750e9SMarc-André Lureau vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
27010df750e9SMarc-André Lureau }
27020df750e9SMarc-André Lureau if (enable) {
27030df750e9SMarc-André Lureau /* Expose avail event/used flags before caller checks the avail idx. */
27040df750e9SMarc-André Lureau smp_mb();
27050df750e9SMarc-André Lureau }
27060df750e9SMarc-André Lureau }
27070df750e9SMarc-André Lureau
27080df750e9SMarc-André Lureau static bool
virtqueue_map_desc(VuDev * dev,unsigned int * p_num_sg,struct iovec * iov,unsigned int max_num_sg,bool is_write,uint64_t pa,size_t sz)27090df750e9SMarc-André Lureau virtqueue_map_desc(VuDev *dev,
27100df750e9SMarc-André Lureau unsigned int *p_num_sg, struct iovec *iov,
27110df750e9SMarc-André Lureau unsigned int max_num_sg, bool is_write,
27120df750e9SMarc-André Lureau uint64_t pa, size_t sz)
27130df750e9SMarc-André Lureau {
27140df750e9SMarc-André Lureau unsigned num_sg = *p_num_sg;
27150df750e9SMarc-André Lureau
27160df750e9SMarc-André Lureau assert(num_sg <= max_num_sg);
27170df750e9SMarc-André Lureau
27180df750e9SMarc-André Lureau if (!sz) {
27190df750e9SMarc-André Lureau vu_panic(dev, "virtio: zero sized buffers are not allowed");
27200df750e9SMarc-André Lureau return false;
27210df750e9SMarc-André Lureau }
27220df750e9SMarc-André Lureau
27230df750e9SMarc-André Lureau while (sz) {
27240df750e9SMarc-André Lureau uint64_t len = sz;
27250df750e9SMarc-André Lureau
27260df750e9SMarc-André Lureau if (num_sg == max_num_sg) {
27270df750e9SMarc-André Lureau vu_panic(dev, "virtio: too many descriptors in indirect table");
27280df750e9SMarc-André Lureau return false;
27290df750e9SMarc-André Lureau }
27300df750e9SMarc-André Lureau
27310df750e9SMarc-André Lureau iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa);
27320df750e9SMarc-André Lureau if (iov[num_sg].iov_base == NULL) {
27330df750e9SMarc-André Lureau vu_panic(dev, "virtio: invalid address for buffers");
27340df750e9SMarc-André Lureau return false;
27350df750e9SMarc-André Lureau }
27360df750e9SMarc-André Lureau iov[num_sg].iov_len = len;
27370df750e9SMarc-André Lureau num_sg++;
27380df750e9SMarc-André Lureau sz -= len;
27390df750e9SMarc-André Lureau pa += len;
27400df750e9SMarc-André Lureau }
27410df750e9SMarc-André Lureau
27420df750e9SMarc-André Lureau *p_num_sg = num_sg;
27430df750e9SMarc-André Lureau return true;
27440df750e9SMarc-André Lureau }
27450df750e9SMarc-André Lureau
27460df750e9SMarc-André Lureau static void *
virtqueue_alloc_element(size_t sz,unsigned out_num,unsigned in_num)27470df750e9SMarc-André Lureau virtqueue_alloc_element(size_t sz,
27480df750e9SMarc-André Lureau unsigned out_num, unsigned in_num)
27490df750e9SMarc-André Lureau {
27500df750e9SMarc-André Lureau VuVirtqElement *elem;
27510df750e9SMarc-André Lureau size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
27520df750e9SMarc-André Lureau size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
27530df750e9SMarc-André Lureau size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
27540df750e9SMarc-André Lureau
27550df750e9SMarc-André Lureau assert(sz >= sizeof(VuVirtqElement));
27560df750e9SMarc-André Lureau elem = malloc(out_sg_end);
27579c191605SCarlos López if (!elem) {
27589c191605SCarlos López DPRINT("%s: failed to malloc virtqueue element\n", __func__);
27599c191605SCarlos López return NULL;
27609c191605SCarlos López }
27610df750e9SMarc-André Lureau elem->out_num = out_num;
27620df750e9SMarc-André Lureau elem->in_num = in_num;
27630df750e9SMarc-André Lureau elem->in_sg = (void *)elem + in_sg_ofs;
27640df750e9SMarc-André Lureau elem->out_sg = (void *)elem + out_sg_ofs;
27650df750e9SMarc-André Lureau return elem;
27660df750e9SMarc-André Lureau }
27670df750e9SMarc-André Lureau
27680df750e9SMarc-André Lureau static void *
vu_queue_map_desc(VuDev * dev,VuVirtq * vq,unsigned int idx,size_t sz)27690df750e9SMarc-André Lureau vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
27700df750e9SMarc-André Lureau {
27710df750e9SMarc-André Lureau struct vring_desc *desc = vq->vring.desc;
27720df750e9SMarc-André Lureau uint64_t desc_addr, read_len;
27730df750e9SMarc-André Lureau unsigned int desc_len;
27740df750e9SMarc-André Lureau unsigned int max = vq->vring.num;
27750df750e9SMarc-André Lureau unsigned int i = idx;
27760df750e9SMarc-André Lureau VuVirtqElement *elem;
27770df750e9SMarc-André Lureau unsigned int out_num = 0, in_num = 0;
27780df750e9SMarc-André Lureau struct iovec iov[VIRTQUEUE_MAX_SIZE];
27790df750e9SMarc-André Lureau struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
27800df750e9SMarc-André Lureau int rc;
27810df750e9SMarc-André Lureau
27820df750e9SMarc-André Lureau if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
27830df750e9SMarc-André Lureau if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
27840df750e9SMarc-André Lureau vu_panic(dev, "Invalid size for indirect buffer table");
27850df750e9SMarc-André Lureau return NULL;
27860df750e9SMarc-André Lureau }
27870df750e9SMarc-André Lureau
27880df750e9SMarc-André Lureau /* loop over the indirect descriptor table */
27890df750e9SMarc-André Lureau desc_addr = le64toh(desc[i].addr);
27900df750e9SMarc-André Lureau desc_len = le32toh(desc[i].len);
27910df750e9SMarc-André Lureau max = desc_len / sizeof(struct vring_desc);
27920df750e9SMarc-André Lureau read_len = desc_len;
27930df750e9SMarc-André Lureau desc = vu_gpa_to_va(dev, &read_len, desc_addr);
27940df750e9SMarc-André Lureau if (unlikely(desc && read_len != desc_len)) {
27950df750e9SMarc-André Lureau /* Failed to use zero copy */
27960df750e9SMarc-André Lureau desc = NULL;
27970df750e9SMarc-André Lureau if (!virtqueue_read_indirect_desc(dev, desc_buf,
27980df750e9SMarc-André Lureau desc_addr,
27990df750e9SMarc-André Lureau desc_len)) {
28000df750e9SMarc-André Lureau desc = desc_buf;
28010df750e9SMarc-André Lureau }
28020df750e9SMarc-André Lureau }
28030df750e9SMarc-André Lureau if (!desc) {
28040df750e9SMarc-André Lureau vu_panic(dev, "Invalid indirect buffer table");
28050df750e9SMarc-André Lureau return NULL;
28060df750e9SMarc-André Lureau }
28070df750e9SMarc-André Lureau i = 0;
28080df750e9SMarc-André Lureau }
28090df750e9SMarc-André Lureau
28100df750e9SMarc-André Lureau /* Collect all the descriptors */
28110df750e9SMarc-André Lureau do {
28120df750e9SMarc-André Lureau if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
28130df750e9SMarc-André Lureau if (!virtqueue_map_desc(dev, &in_num, iov + out_num,
28140df750e9SMarc-André Lureau VIRTQUEUE_MAX_SIZE - out_num, true,
28150df750e9SMarc-André Lureau le64toh(desc[i].addr),
28160df750e9SMarc-André Lureau le32toh(desc[i].len))) {
28170df750e9SMarc-André Lureau return NULL;
28180df750e9SMarc-André Lureau }
28190df750e9SMarc-André Lureau } else {
28200df750e9SMarc-André Lureau if (in_num) {
28210df750e9SMarc-André Lureau vu_panic(dev, "Incorrect order for descriptors");
28220df750e9SMarc-André Lureau return NULL;
28230df750e9SMarc-André Lureau }
28240df750e9SMarc-André Lureau if (!virtqueue_map_desc(dev, &out_num, iov,
28250df750e9SMarc-André Lureau VIRTQUEUE_MAX_SIZE, false,
28260df750e9SMarc-André Lureau le64toh(desc[i].addr),
28270df750e9SMarc-André Lureau le32toh(desc[i].len))) {
28280df750e9SMarc-André Lureau return NULL;
28290df750e9SMarc-André Lureau }
28300df750e9SMarc-André Lureau }
28310df750e9SMarc-André Lureau
28320df750e9SMarc-André Lureau /* If we've got too many, that implies a descriptor loop. */
28330df750e9SMarc-André Lureau if ((in_num + out_num) > max) {
28340df750e9SMarc-André Lureau vu_panic(dev, "Looped descriptor");
28350df750e9SMarc-André Lureau return NULL;
28360df750e9SMarc-André Lureau }
28370df750e9SMarc-André Lureau rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
28380df750e9SMarc-André Lureau } while (rc == VIRTQUEUE_READ_DESC_MORE);
28390df750e9SMarc-André Lureau
28400df750e9SMarc-André Lureau if (rc == VIRTQUEUE_READ_DESC_ERROR) {
28410df750e9SMarc-André Lureau vu_panic(dev, "read descriptor error");
28420df750e9SMarc-André Lureau return NULL;
28430df750e9SMarc-André Lureau }
28440df750e9SMarc-André Lureau
28450df750e9SMarc-André Lureau /* Now copy what we have collected and mapped */
28460df750e9SMarc-André Lureau elem = virtqueue_alloc_element(sz, out_num, in_num);
28479c191605SCarlos López if (!elem) {
28489c191605SCarlos López return NULL;
28499c191605SCarlos López }
28500df750e9SMarc-André Lureau elem->index = idx;
28510df750e9SMarc-André Lureau for (i = 0; i < out_num; i++) {
28520df750e9SMarc-André Lureau elem->out_sg[i] = iov[i];
28530df750e9SMarc-André Lureau }
28540df750e9SMarc-André Lureau for (i = 0; i < in_num; i++) {
28550df750e9SMarc-André Lureau elem->in_sg[i] = iov[out_num + i];
28560df750e9SMarc-André Lureau }
28570df750e9SMarc-André Lureau
28580df750e9SMarc-André Lureau return elem;
28590df750e9SMarc-André Lureau }
28600df750e9SMarc-André Lureau
28610df750e9SMarc-André Lureau static int
vu_queue_inflight_get(VuDev * dev,VuVirtq * vq,int desc_idx)28620df750e9SMarc-André Lureau vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
28630df750e9SMarc-André Lureau {
28640df750e9SMarc-André Lureau if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
28650df750e9SMarc-André Lureau return 0;
28660df750e9SMarc-André Lureau }
28670df750e9SMarc-André Lureau
28680df750e9SMarc-André Lureau if (unlikely(!vq->inflight)) {
28690df750e9SMarc-André Lureau return -1;
28700df750e9SMarc-André Lureau }
28710df750e9SMarc-André Lureau
28720df750e9SMarc-André Lureau vq->inflight->desc[desc_idx].counter = vq->counter++;
28730df750e9SMarc-André Lureau vq->inflight->desc[desc_idx].inflight = 1;
28740df750e9SMarc-André Lureau
28750df750e9SMarc-André Lureau return 0;
28760df750e9SMarc-André Lureau }
28770df750e9SMarc-André Lureau
28780df750e9SMarc-André Lureau static int
vu_queue_inflight_pre_put(VuDev * dev,VuVirtq * vq,int desc_idx)28790df750e9SMarc-André Lureau vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
28800df750e9SMarc-André Lureau {
28810df750e9SMarc-André Lureau if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
28820df750e9SMarc-André Lureau return 0;
28830df750e9SMarc-André Lureau }
28840df750e9SMarc-André Lureau
28850df750e9SMarc-André Lureau if (unlikely(!vq->inflight)) {
28860df750e9SMarc-André Lureau return -1;
28870df750e9SMarc-André Lureau }
28880df750e9SMarc-André Lureau
28890df750e9SMarc-André Lureau vq->inflight->last_batch_head = desc_idx;
28900df750e9SMarc-André Lureau
28910df750e9SMarc-André Lureau return 0;
28920df750e9SMarc-André Lureau }
28930df750e9SMarc-André Lureau
28940df750e9SMarc-André Lureau static int
vu_queue_inflight_post_put(VuDev * dev,VuVirtq * vq,int desc_idx)28950df750e9SMarc-André Lureau vu_queue_inflight_post_put(VuDev *dev, VuVirtq *vq, int desc_idx)
28960df750e9SMarc-André Lureau {
28970df750e9SMarc-André Lureau if (!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
28980df750e9SMarc-André Lureau return 0;
28990df750e9SMarc-André Lureau }
29000df750e9SMarc-André Lureau
29010df750e9SMarc-André Lureau if (unlikely(!vq->inflight)) {
29020df750e9SMarc-André Lureau return -1;
29030df750e9SMarc-André Lureau }
29040df750e9SMarc-André Lureau
29050df750e9SMarc-André Lureau barrier();
29060df750e9SMarc-André Lureau
29070df750e9SMarc-André Lureau vq->inflight->desc[desc_idx].inflight = 0;
29080df750e9SMarc-André Lureau
29090df750e9SMarc-André Lureau barrier();
29100df750e9SMarc-André Lureau
29110df750e9SMarc-André Lureau vq->inflight->used_idx = vq->used_idx;
29120df750e9SMarc-André Lureau
29130df750e9SMarc-André Lureau return 0;
29140df750e9SMarc-André Lureau }
29150df750e9SMarc-André Lureau
29160df750e9SMarc-André Lureau void *
vu_queue_pop(VuDev * dev,VuVirtq * vq,size_t sz)29170df750e9SMarc-André Lureau vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
29180df750e9SMarc-André Lureau {
29190df750e9SMarc-André Lureau int i;
29200df750e9SMarc-André Lureau unsigned int head;
29210df750e9SMarc-André Lureau VuVirtqElement *elem;
29220df750e9SMarc-André Lureau
29232a290227SDavid Hildenbrand if (!vu_is_vq_usable(dev, vq)) {
29240df750e9SMarc-André Lureau return NULL;
29250df750e9SMarc-André Lureau }
29260df750e9SMarc-André Lureau
29270df750e9SMarc-André Lureau if (unlikely(vq->resubmit_list && vq->resubmit_num > 0)) {
29280df750e9SMarc-André Lureau i = (--vq->resubmit_num);
29290df750e9SMarc-André Lureau elem = vu_queue_map_desc(dev, vq, vq->resubmit_list[i].index, sz);
29300df750e9SMarc-André Lureau
29310df750e9SMarc-André Lureau if (!vq->resubmit_num) {
29320df750e9SMarc-André Lureau free(vq->resubmit_list);
29330df750e9SMarc-André Lureau vq->resubmit_list = NULL;
29340df750e9SMarc-André Lureau }
29350df750e9SMarc-André Lureau
29360df750e9SMarc-André Lureau return elem;
29370df750e9SMarc-André Lureau }
29380df750e9SMarc-André Lureau
29390df750e9SMarc-André Lureau if (vu_queue_empty(dev, vq)) {
29400df750e9SMarc-André Lureau return NULL;
29410df750e9SMarc-André Lureau }
29420df750e9SMarc-André Lureau /*
29430df750e9SMarc-André Lureau * Needed after virtio_queue_empty(), see comment in
29440df750e9SMarc-André Lureau * virtqueue_num_heads().
29450df750e9SMarc-André Lureau */
29460df750e9SMarc-André Lureau smp_rmb();
29470df750e9SMarc-André Lureau
29480df750e9SMarc-André Lureau if (vq->inuse >= vq->vring.num) {
29490df750e9SMarc-André Lureau vu_panic(dev, "Virtqueue size exceeded");
29500df750e9SMarc-André Lureau return NULL;
29510df750e9SMarc-André Lureau }
29520df750e9SMarc-André Lureau
29530df750e9SMarc-André Lureau if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
29540df750e9SMarc-André Lureau return NULL;
29550df750e9SMarc-André Lureau }
29560df750e9SMarc-André Lureau
29570df750e9SMarc-André Lureau if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
29580df750e9SMarc-André Lureau vring_set_avail_event(vq, vq->last_avail_idx);
29590df750e9SMarc-André Lureau }
29600df750e9SMarc-André Lureau
29610df750e9SMarc-André Lureau elem = vu_queue_map_desc(dev, vq, head, sz);
29620df750e9SMarc-André Lureau
29630df750e9SMarc-André Lureau if (!elem) {
29640df750e9SMarc-André Lureau return NULL;
29650df750e9SMarc-André Lureau }
29660df750e9SMarc-André Lureau
29670df750e9SMarc-André Lureau vq->inuse++;
29680df750e9SMarc-André Lureau
29690df750e9SMarc-André Lureau vu_queue_inflight_get(dev, vq, head);
29700df750e9SMarc-André Lureau
29710df750e9SMarc-André Lureau return elem;
29720df750e9SMarc-André Lureau }
29730df750e9SMarc-André Lureau
29740df750e9SMarc-André Lureau static void
vu_queue_detach_element(VuDev * dev,VuVirtq * vq,VuVirtqElement * elem,size_t len)29750df750e9SMarc-André Lureau vu_queue_detach_element(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
29760df750e9SMarc-André Lureau size_t len)
29770df750e9SMarc-André Lureau {
29780df750e9SMarc-André Lureau vq->inuse--;
29790df750e9SMarc-André Lureau /* unmap, when DMA support is added */
29800df750e9SMarc-André Lureau }
29810df750e9SMarc-André Lureau
29820df750e9SMarc-André Lureau void
vu_queue_unpop(VuDev * dev,VuVirtq * vq,VuVirtqElement * elem,size_t len)29830df750e9SMarc-André Lureau vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
29840df750e9SMarc-André Lureau size_t len)
29850df750e9SMarc-André Lureau {
29860df750e9SMarc-André Lureau vq->last_avail_idx--;
29870df750e9SMarc-André Lureau vu_queue_detach_element(dev, vq, elem, len);
29880df750e9SMarc-André Lureau }
29890df750e9SMarc-André Lureau
29900df750e9SMarc-André Lureau bool
vu_queue_rewind(VuDev * dev,VuVirtq * vq,unsigned int num)29910df750e9SMarc-André Lureau vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
29920df750e9SMarc-André Lureau {
29930df750e9SMarc-André Lureau if (num > vq->inuse) {
29940df750e9SMarc-André Lureau return false;
29950df750e9SMarc-André Lureau }
29960df750e9SMarc-André Lureau vq->last_avail_idx -= num;
29970df750e9SMarc-André Lureau vq->inuse -= num;
29980df750e9SMarc-André Lureau return true;
29990df750e9SMarc-André Lureau }
30000df750e9SMarc-André Lureau
30010df750e9SMarc-André Lureau static inline
vring_used_write(VuDev * dev,VuVirtq * vq,struct vring_used_elem * uelem,int i)30020df750e9SMarc-André Lureau void vring_used_write(VuDev *dev, VuVirtq *vq,
30030df750e9SMarc-André Lureau struct vring_used_elem *uelem, int i)
30040df750e9SMarc-André Lureau {
30050df750e9SMarc-André Lureau struct vring_used *used = vq->vring.used;
30060df750e9SMarc-André Lureau
30070df750e9SMarc-André Lureau used->ring[i] = *uelem;
30080df750e9SMarc-André Lureau vu_log_write(dev, vq->vring.log_guest_addr +
30090df750e9SMarc-André Lureau offsetof(struct vring_used, ring[i]),
30100df750e9SMarc-André Lureau sizeof(used->ring[i]));
30110df750e9SMarc-André Lureau }
30120df750e9SMarc-André Lureau
30130df750e9SMarc-André Lureau
30140df750e9SMarc-André Lureau static void
vu_log_queue_fill(VuDev * dev,VuVirtq * vq,const VuVirtqElement * elem,unsigned int len)30150df750e9SMarc-André Lureau vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
30160df750e9SMarc-André Lureau const VuVirtqElement *elem,
30170df750e9SMarc-André Lureau unsigned int len)
30180df750e9SMarc-André Lureau {
30190df750e9SMarc-André Lureau struct vring_desc *desc = vq->vring.desc;
30200df750e9SMarc-André Lureau unsigned int i, max, min, desc_len;
30210df750e9SMarc-André Lureau uint64_t desc_addr, read_len;
30220df750e9SMarc-André Lureau struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
30230df750e9SMarc-André Lureau unsigned num_bufs = 0;
30240df750e9SMarc-André Lureau
30250df750e9SMarc-André Lureau max = vq->vring.num;
30260df750e9SMarc-André Lureau i = elem->index;
30270df750e9SMarc-André Lureau
30280df750e9SMarc-André Lureau if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
30290df750e9SMarc-André Lureau if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
30300df750e9SMarc-André Lureau vu_panic(dev, "Invalid size for indirect buffer table");
30310df750e9SMarc-André Lureau return;
30320df750e9SMarc-André Lureau }
30330df750e9SMarc-André Lureau
30340df750e9SMarc-André Lureau /* loop over the indirect descriptor table */
30350df750e9SMarc-André Lureau desc_addr = le64toh(desc[i].addr);
30360df750e9SMarc-André Lureau desc_len = le32toh(desc[i].len);
30370df750e9SMarc-André Lureau max = desc_len / sizeof(struct vring_desc);
30380df750e9SMarc-André Lureau read_len = desc_len;
30390df750e9SMarc-André Lureau desc = vu_gpa_to_va(dev, &read_len, desc_addr);
30400df750e9SMarc-André Lureau if (unlikely(desc && read_len != desc_len)) {
30410df750e9SMarc-André Lureau /* Failed to use zero copy */
30420df750e9SMarc-André Lureau desc = NULL;
30430df750e9SMarc-André Lureau if (!virtqueue_read_indirect_desc(dev, desc_buf,
30440df750e9SMarc-André Lureau desc_addr,
30450df750e9SMarc-André Lureau desc_len)) {
30460df750e9SMarc-André Lureau desc = desc_buf;
30470df750e9SMarc-André Lureau }
30480df750e9SMarc-André Lureau }
30490df750e9SMarc-André Lureau if (!desc) {
30500df750e9SMarc-André Lureau vu_panic(dev, "Invalid indirect buffer table");
30510df750e9SMarc-André Lureau return;
30520df750e9SMarc-André Lureau }
30530df750e9SMarc-André Lureau i = 0;
30540df750e9SMarc-André Lureau }
30550df750e9SMarc-André Lureau
30560df750e9SMarc-André Lureau do {
30570df750e9SMarc-André Lureau if (++num_bufs > max) {
30580df750e9SMarc-André Lureau vu_panic(dev, "Looped descriptor");
30590df750e9SMarc-André Lureau return;
30600df750e9SMarc-André Lureau }
30610df750e9SMarc-André Lureau
30620df750e9SMarc-André Lureau if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
30630df750e9SMarc-André Lureau min = MIN(le32toh(desc[i].len), len);
30640df750e9SMarc-André Lureau vu_log_write(dev, le64toh(desc[i].addr), min);
30650df750e9SMarc-André Lureau len -= min;
30660df750e9SMarc-André Lureau }
30670df750e9SMarc-André Lureau
30680df750e9SMarc-André Lureau } while (len > 0 &&
30690df750e9SMarc-André Lureau (virtqueue_read_next_desc(dev, desc, i, max, &i)
30700df750e9SMarc-André Lureau == VIRTQUEUE_READ_DESC_MORE));
30710df750e9SMarc-André Lureau }
30720df750e9SMarc-André Lureau
30730df750e9SMarc-André Lureau void
vu_queue_fill(VuDev * dev,VuVirtq * vq,const VuVirtqElement * elem,unsigned int len,unsigned int idx)30740df750e9SMarc-André Lureau vu_queue_fill(VuDev *dev, VuVirtq *vq,
30750df750e9SMarc-André Lureau const VuVirtqElement *elem,
30760df750e9SMarc-André Lureau unsigned int len, unsigned int idx)
30770df750e9SMarc-André Lureau {
30780df750e9SMarc-André Lureau struct vring_used_elem uelem;
30790df750e9SMarc-André Lureau
30802a290227SDavid Hildenbrand if (!vu_is_vq_usable(dev, vq)) {
30810df750e9SMarc-André Lureau return;
30820df750e9SMarc-André Lureau }
30830df750e9SMarc-André Lureau
30840df750e9SMarc-André Lureau vu_log_queue_fill(dev, vq, elem, len);
30850df750e9SMarc-André Lureau
30860df750e9SMarc-André Lureau idx = (idx + vq->used_idx) % vq->vring.num;
30870df750e9SMarc-André Lureau
30880df750e9SMarc-André Lureau uelem.id = htole32(elem->index);
30890df750e9SMarc-André Lureau uelem.len = htole32(len);
30900df750e9SMarc-André Lureau vring_used_write(dev, vq, &uelem, idx);
30910df750e9SMarc-André Lureau }
30920df750e9SMarc-André Lureau
30930df750e9SMarc-André Lureau static inline
vring_used_idx_set(VuDev * dev,VuVirtq * vq,uint16_t val)30940df750e9SMarc-André Lureau void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
30950df750e9SMarc-André Lureau {
30960df750e9SMarc-André Lureau vq->vring.used->idx = htole16(val);
30970df750e9SMarc-André Lureau vu_log_write(dev,
30980df750e9SMarc-André Lureau vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
30990df750e9SMarc-André Lureau sizeof(vq->vring.used->idx));
31000df750e9SMarc-André Lureau
31010df750e9SMarc-André Lureau vq->used_idx = val;
31020df750e9SMarc-André Lureau }
31030df750e9SMarc-André Lureau
31040df750e9SMarc-André Lureau void
vu_queue_flush(VuDev * dev,VuVirtq * vq,unsigned int count)31050df750e9SMarc-André Lureau vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
31060df750e9SMarc-André Lureau {
31070df750e9SMarc-André Lureau uint16_t old, new;
31080df750e9SMarc-André Lureau
31092a290227SDavid Hildenbrand if (!vu_is_vq_usable(dev, vq)) {
31100df750e9SMarc-André Lureau return;
31110df750e9SMarc-André Lureau }
31120df750e9SMarc-André Lureau
31130df750e9SMarc-André Lureau /* Make sure buffer is written before we update index. */
31140df750e9SMarc-André Lureau smp_wmb();
31150df750e9SMarc-André Lureau
31160df750e9SMarc-André Lureau old = vq->used_idx;
31170df750e9SMarc-André Lureau new = old + count;
31180df750e9SMarc-André Lureau vring_used_idx_set(dev, vq, new);
31190df750e9SMarc-André Lureau vq->inuse -= count;
31200df750e9SMarc-André Lureau if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
31210df750e9SMarc-André Lureau vq->signalled_used_valid = false;
31220df750e9SMarc-André Lureau }
31230df750e9SMarc-André Lureau }
31240df750e9SMarc-André Lureau
31250df750e9SMarc-André Lureau void
vu_queue_push(VuDev * dev,VuVirtq * vq,const VuVirtqElement * elem,unsigned int len)31260df750e9SMarc-André Lureau vu_queue_push(VuDev *dev, VuVirtq *vq,
31270df750e9SMarc-André Lureau const VuVirtqElement *elem, unsigned int len)
31280df750e9SMarc-André Lureau {
31290df750e9SMarc-André Lureau vu_queue_fill(dev, vq, elem, len, 0);
31300df750e9SMarc-André Lureau vu_queue_inflight_pre_put(dev, vq, elem->index);
31310df750e9SMarc-André Lureau vu_queue_flush(dev, vq, 1);
31320df750e9SMarc-André Lureau vu_queue_inflight_post_put(dev, vq, elem->index);
31330df750e9SMarc-André Lureau }
3134