xref: /qemu/hw/virtio/vhost-user.c (revision d5657258)
1 /*
2  * vhost-user
3  *
4  * Copyright (c) 2013 Virtual Open Systems Sarl.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  *
9  */
10 
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/vhost-user.h"
15 #include "hw/virtio/vhost-backend.h"
16 #include "hw/virtio/virtio.h"
17 #include "hw/virtio/virtio-net.h"
18 #include "chardev/char-fe.h"
19 #include "io/channel-socket.h"
20 #include "sysemu/kvm.h"
21 #include "qemu/error-report.h"
22 #include "qemu/main-loop.h"
23 #include "qemu/sockets.h"
24 #include "sysemu/runstate.h"
25 #include "sysemu/cryptodev.h"
26 #include "migration/migration.h"
27 #include "migration/postcopy-ram.h"
28 #include "trace.h"
29 #include "exec/ramblock.h"
30 
31 #include <sys/ioctl.h>
32 #include <sys/socket.h>
33 #include <sys/un.h>
34 
35 #include "standard-headers/linux/vhost_types.h"
36 
37 #ifdef CONFIG_LINUX
38 #include <linux/userfaultfd.h>
39 #endif
40 
41 #define VHOST_MEMORY_BASELINE_NREGIONS    8
42 #define VHOST_USER_F_PROTOCOL_FEATURES 30
43 #define VHOST_USER_BACKEND_MAX_FDS     8
44 
45 #if defined(TARGET_PPC) || defined(TARGET_PPC64)
46 #include "hw/ppc/spapr.h"
47 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
48 
49 #else
50 #define VHOST_USER_MAX_RAM_SLOTS 512
51 #endif
52 
53 /*
54  * Maximum size of virtio device config space
55  */
56 #define VHOST_USER_MAX_CONFIG_SIZE 256
57 
58 enum VhostUserProtocolFeature {
59     VHOST_USER_PROTOCOL_F_MQ = 0,
60     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
61     VHOST_USER_PROTOCOL_F_RARP = 2,
62     VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
63     VHOST_USER_PROTOCOL_F_NET_MTU = 4,
64     VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5,
65     VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
66     VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
67     VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
68     VHOST_USER_PROTOCOL_F_CONFIG = 9,
69     VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10,
70     VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
71     VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
72     VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
73     /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
74     VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
75     VHOST_USER_PROTOCOL_F_STATUS = 16,
76     VHOST_USER_PROTOCOL_F_MAX
77 };
78 
79 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
80 
81 typedef enum VhostUserRequest {
82     VHOST_USER_NONE = 0,
83     VHOST_USER_GET_FEATURES = 1,
84     VHOST_USER_SET_FEATURES = 2,
85     VHOST_USER_SET_OWNER = 3,
86     VHOST_USER_RESET_OWNER = 4,
87     VHOST_USER_SET_MEM_TABLE = 5,
88     VHOST_USER_SET_LOG_BASE = 6,
89     VHOST_USER_SET_LOG_FD = 7,
90     VHOST_USER_SET_VRING_NUM = 8,
91     VHOST_USER_SET_VRING_ADDR = 9,
92     VHOST_USER_SET_VRING_BASE = 10,
93     VHOST_USER_GET_VRING_BASE = 11,
94     VHOST_USER_SET_VRING_KICK = 12,
95     VHOST_USER_SET_VRING_CALL = 13,
96     VHOST_USER_SET_VRING_ERR = 14,
97     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
98     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
99     VHOST_USER_GET_QUEUE_NUM = 17,
100     VHOST_USER_SET_VRING_ENABLE = 18,
101     VHOST_USER_SEND_RARP = 19,
102     VHOST_USER_NET_SET_MTU = 20,
103     VHOST_USER_SET_BACKEND_REQ_FD = 21,
104     VHOST_USER_IOTLB_MSG = 22,
105     VHOST_USER_SET_VRING_ENDIAN = 23,
106     VHOST_USER_GET_CONFIG = 24,
107     VHOST_USER_SET_CONFIG = 25,
108     VHOST_USER_CREATE_CRYPTO_SESSION = 26,
109     VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
110     VHOST_USER_POSTCOPY_ADVISE  = 28,
111     VHOST_USER_POSTCOPY_LISTEN  = 29,
112     VHOST_USER_POSTCOPY_END     = 30,
113     VHOST_USER_GET_INFLIGHT_FD = 31,
114     VHOST_USER_SET_INFLIGHT_FD = 32,
115     VHOST_USER_GPU_SET_SOCKET = 33,
116     VHOST_USER_RESET_DEVICE = 34,
117     /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
118     VHOST_USER_GET_MAX_MEM_SLOTS = 36,
119     VHOST_USER_ADD_MEM_REG = 37,
120     VHOST_USER_REM_MEM_REG = 38,
121     VHOST_USER_SET_STATUS = 39,
122     VHOST_USER_GET_STATUS = 40,
123     VHOST_USER_MAX
124 } VhostUserRequest;
125 
126 typedef enum VhostUserSlaveRequest {
127     VHOST_USER_BACKEND_NONE = 0,
128     VHOST_USER_BACKEND_IOTLB_MSG = 1,
129     VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2,
130     VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
131     VHOST_USER_BACKEND_MAX
132 }  VhostUserSlaveRequest;
133 
134 typedef struct VhostUserMemoryRegion {
135     uint64_t guest_phys_addr;
136     uint64_t memory_size;
137     uint64_t userspace_addr;
138     uint64_t mmap_offset;
139 } VhostUserMemoryRegion;
140 
141 typedef struct VhostUserMemory {
142     uint32_t nregions;
143     uint32_t padding;
144     VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
145 } VhostUserMemory;
146 
147 typedef struct VhostUserMemRegMsg {
148     uint64_t padding;
149     VhostUserMemoryRegion region;
150 } VhostUserMemRegMsg;
151 
152 typedef struct VhostUserLog {
153     uint64_t mmap_size;
154     uint64_t mmap_offset;
155 } VhostUserLog;
156 
157 typedef struct VhostUserConfig {
158     uint32_t offset;
159     uint32_t size;
160     uint32_t flags;
161     uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
162 } VhostUserConfig;
163 
164 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN    512
165 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN  64
166 
167 typedef struct VhostUserCryptoSession {
168     /* session id for success, -1 on errors */
169     int64_t session_id;
170     CryptoDevBackendSymSessionInfo session_setup_data;
171     uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
172     uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
173 } VhostUserCryptoSession;
174 
175 static VhostUserConfig c __attribute__ ((unused));
176 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
177                                    + sizeof(c.size) \
178                                    + sizeof(c.flags))
179 
180 typedef struct VhostUserVringArea {
181     uint64_t u64;
182     uint64_t size;
183     uint64_t offset;
184 } VhostUserVringArea;
185 
186 typedef struct VhostUserInflight {
187     uint64_t mmap_size;
188     uint64_t mmap_offset;
189     uint16_t num_queues;
190     uint16_t queue_size;
191 } VhostUserInflight;
192 
193 typedef struct {
194     VhostUserRequest request;
195 
196 #define VHOST_USER_VERSION_MASK     (0x3)
197 #define VHOST_USER_REPLY_MASK       (0x1 << 2)
198 #define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
199     uint32_t flags;
200     uint32_t size; /* the following payload size */
201 } QEMU_PACKED VhostUserHeader;
202 
203 typedef union {
204 #define VHOST_USER_VRING_IDX_MASK   (0xff)
205 #define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
206         uint64_t u64;
207         struct vhost_vring_state state;
208         struct vhost_vring_addr addr;
209         VhostUserMemory memory;
210         VhostUserMemRegMsg mem_reg;
211         VhostUserLog log;
212         struct vhost_iotlb_msg iotlb;
213         VhostUserConfig config;
214         VhostUserCryptoSession session;
215         VhostUserVringArea area;
216         VhostUserInflight inflight;
217 } VhostUserPayload;
218 
219 typedef struct VhostUserMsg {
220     VhostUserHeader hdr;
221     VhostUserPayload payload;
222 } QEMU_PACKED VhostUserMsg;
223 
224 static VhostUserMsg m __attribute__ ((unused));
225 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
226 
227 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
228 
229 /* The version of the protocol we support */
230 #define VHOST_USER_VERSION    (0x1)
231 
232 struct vhost_user {
233     struct vhost_dev *dev;
234     /* Shared between vhost devs of the same virtio device */
235     VhostUserState *user;
236     QIOChannel *slave_ioc;
237     GSource *slave_src;
238     NotifierWithReturn postcopy_notifier;
239     struct PostCopyFD  postcopy_fd;
240     uint64_t           postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
241     /* Length of the region_rb and region_rb_offset arrays */
242     size_t             region_rb_len;
243     /* RAMBlock associated with a given region */
244     RAMBlock         **region_rb;
245     /*
246      * The offset from the start of the RAMBlock to the start of the
247      * vhost region.
248      */
249     ram_addr_t        *region_rb_offset;
250 
251     /* True once we've entered postcopy_listen */
252     bool               postcopy_listen;
253 
254     /* Our current regions */
255     int num_shadow_regions;
256     struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS];
257 };
258 
259 struct scrub_regions {
260     struct vhost_memory_region *region;
261     int reg_idx;
262     int fd_idx;
263 };
264 
265 static bool ioeventfd_enabled(void)
266 {
267     return !kvm_enabled() || kvm_eventfds_enabled();
268 }
269 
270 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
271 {
272     struct vhost_user *u = dev->opaque;
273     CharBackend *chr = u->user->chr;
274     uint8_t *p = (uint8_t *) msg;
275     int r, size = VHOST_USER_HDR_SIZE;
276 
277     r = qemu_chr_fe_read_all(chr, p, size);
278     if (r != size) {
279         int saved_errno = errno;
280         error_report("Failed to read msg header. Read %d instead of %d."
281                      " Original request %d.", r, size, msg->hdr.request);
282         return r < 0 ? -saved_errno : -EIO;
283     }
284 
285     /* validate received flags */
286     if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
287         error_report("Failed to read msg header."
288                 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
289                 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
290         return -EPROTO;
291     }
292 
293     trace_vhost_user_read(msg->hdr.request, msg->hdr.flags);
294 
295     return 0;
296 }
297 
298 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
299 {
300     struct vhost_user *u = dev->opaque;
301     CharBackend *chr = u->user->chr;
302     uint8_t *p = (uint8_t *) msg;
303     int r, size;
304 
305     r = vhost_user_read_header(dev, msg);
306     if (r < 0) {
307         return r;
308     }
309 
310     /* validate message size is sane */
311     if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
312         error_report("Failed to read msg header."
313                 " Size %d exceeds the maximum %zu.", msg->hdr.size,
314                 VHOST_USER_PAYLOAD_SIZE);
315         return -EPROTO;
316     }
317 
318     if (msg->hdr.size) {
319         p += VHOST_USER_HDR_SIZE;
320         size = msg->hdr.size;
321         r = qemu_chr_fe_read_all(chr, p, size);
322         if (r != size) {
323             int saved_errno = errno;
324             error_report("Failed to read msg payload."
325                          " Read %d instead of %d.", r, msg->hdr.size);
326             return r < 0 ? -saved_errno : -EIO;
327         }
328     }
329 
330     return 0;
331 }
332 
333 static int process_message_reply(struct vhost_dev *dev,
334                                  const VhostUserMsg *msg)
335 {
336     int ret;
337     VhostUserMsg msg_reply;
338 
339     if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
340         return 0;
341     }
342 
343     ret = vhost_user_read(dev, &msg_reply);
344     if (ret < 0) {
345         return ret;
346     }
347 
348     if (msg_reply.hdr.request != msg->hdr.request) {
349         error_report("Received unexpected msg type. "
350                      "Expected %d received %d",
351                      msg->hdr.request, msg_reply.hdr.request);
352         return -EPROTO;
353     }
354 
355     return msg_reply.payload.u64 ? -EIO : 0;
356 }
357 
358 static bool vhost_user_one_time_request(VhostUserRequest request)
359 {
360     switch (request) {
361     case VHOST_USER_SET_OWNER:
362     case VHOST_USER_RESET_OWNER:
363     case VHOST_USER_SET_MEM_TABLE:
364     case VHOST_USER_GET_QUEUE_NUM:
365     case VHOST_USER_NET_SET_MTU:
366     case VHOST_USER_ADD_MEM_REG:
367     case VHOST_USER_REM_MEM_REG:
368         return true;
369     default:
370         return false;
371     }
372 }
373 
374 /* most non-init callers ignore the error */
375 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
376                             int *fds, int fd_num)
377 {
378     struct vhost_user *u = dev->opaque;
379     CharBackend *chr = u->user->chr;
380     int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
381 
382     /*
383      * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
384      * we just need send it once in the first time. For later such
385      * request, we just ignore it.
386      */
387     if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
388         msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
389         return 0;
390     }
391 
392     if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
393         error_report("Failed to set msg fds.");
394         return -EINVAL;
395     }
396 
397     ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
398     if (ret != size) {
399         int saved_errno = errno;
400         error_report("Failed to write msg."
401                      " Wrote %d instead of %d.", ret, size);
402         return ret < 0 ? -saved_errno : -EIO;
403     }
404 
405     trace_vhost_user_write(msg->hdr.request, msg->hdr.flags);
406 
407     return 0;
408 }
409 
410 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
411 {
412     VhostUserMsg msg = {
413         .hdr.request = VHOST_USER_GPU_SET_SOCKET,
414         .hdr.flags = VHOST_USER_VERSION,
415     };
416 
417     return vhost_user_write(dev, &msg, &fd, 1);
418 }
419 
420 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
421                                    struct vhost_log *log)
422 {
423     int fds[VHOST_USER_MAX_RAM_SLOTS];
424     size_t fd_num = 0;
425     bool shmfd = virtio_has_feature(dev->protocol_features,
426                                     VHOST_USER_PROTOCOL_F_LOG_SHMFD);
427     int ret;
428     VhostUserMsg msg = {
429         .hdr.request = VHOST_USER_SET_LOG_BASE,
430         .hdr.flags = VHOST_USER_VERSION,
431         .payload.log.mmap_size = log->size * sizeof(*(log->log)),
432         .payload.log.mmap_offset = 0,
433         .hdr.size = sizeof(msg.payload.log),
434     };
435 
436     /* Send only once with first queue pair */
437     if (dev->vq_index != 0) {
438         return 0;
439     }
440 
441     if (shmfd && log->fd != -1) {
442         fds[fd_num++] = log->fd;
443     }
444 
445     ret = vhost_user_write(dev, &msg, fds, fd_num);
446     if (ret < 0) {
447         return ret;
448     }
449 
450     if (shmfd) {
451         msg.hdr.size = 0;
452         ret = vhost_user_read(dev, &msg);
453         if (ret < 0) {
454             return ret;
455         }
456 
457         if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
458             error_report("Received unexpected msg type. "
459                          "Expected %d received %d",
460                          VHOST_USER_SET_LOG_BASE, msg.hdr.request);
461             return -EPROTO;
462         }
463     }
464 
465     return 0;
466 }
467 
468 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
469                                             int *fd)
470 {
471     MemoryRegion *mr;
472 
473     assert((uintptr_t)addr == addr);
474     mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
475     *fd = memory_region_get_fd(mr);
476 
477     return mr;
478 }
479 
480 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
481                                        struct vhost_memory_region *src,
482                                        uint64_t mmap_offset)
483 {
484     assert(src != NULL && dst != NULL);
485     dst->userspace_addr = src->userspace_addr;
486     dst->memory_size = src->memory_size;
487     dst->guest_phys_addr = src->guest_phys_addr;
488     dst->mmap_offset = mmap_offset;
489 }
490 
491 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
492                                              struct vhost_dev *dev,
493                                              VhostUserMsg *msg,
494                                              int *fds, size_t *fd_num,
495                                              bool track_ramblocks)
496 {
497     int i, fd;
498     ram_addr_t offset;
499     MemoryRegion *mr;
500     struct vhost_memory_region *reg;
501     VhostUserMemoryRegion region_buffer;
502 
503     msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
504 
505     for (i = 0; i < dev->mem->nregions; ++i) {
506         reg = dev->mem->regions + i;
507 
508         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
509         if (fd > 0) {
510             if (track_ramblocks) {
511                 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
512                 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
513                                                       reg->memory_size,
514                                                       reg->guest_phys_addr,
515                                                       reg->userspace_addr,
516                                                       offset);
517                 u->region_rb_offset[i] = offset;
518                 u->region_rb[i] = mr->ram_block;
519             } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
520                 error_report("Failed preparing vhost-user memory table msg");
521                 return -ENOBUFS;
522             }
523             vhost_user_fill_msg_region(&region_buffer, reg, offset);
524             msg->payload.memory.regions[*fd_num] = region_buffer;
525             fds[(*fd_num)++] = fd;
526         } else if (track_ramblocks) {
527             u->region_rb_offset[i] = 0;
528             u->region_rb[i] = NULL;
529         }
530     }
531 
532     msg->payload.memory.nregions = *fd_num;
533 
534     if (!*fd_num) {
535         error_report("Failed initializing vhost-user memory map, "
536                      "consider using -object memory-backend-file share=on");
537         return -EINVAL;
538     }
539 
540     msg->hdr.size = sizeof(msg->payload.memory.nregions);
541     msg->hdr.size += sizeof(msg->payload.memory.padding);
542     msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
543 
544     return 0;
545 }
546 
547 static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
548                              struct vhost_memory_region *vdev_reg)
549 {
550     return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
551         shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
552         shadow_reg->memory_size == vdev_reg->memory_size;
553 }
554 
555 static void scrub_shadow_regions(struct vhost_dev *dev,
556                                  struct scrub_regions *add_reg,
557                                  int *nr_add_reg,
558                                  struct scrub_regions *rem_reg,
559                                  int *nr_rem_reg, uint64_t *shadow_pcb,
560                                  bool track_ramblocks)
561 {
562     struct vhost_user *u = dev->opaque;
563     bool found[VHOST_USER_MAX_RAM_SLOTS] = {};
564     struct vhost_memory_region *reg, *shadow_reg;
565     int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
566     ram_addr_t offset;
567     MemoryRegion *mr;
568     bool matching;
569 
570     /*
571      * Find memory regions present in our shadow state which are not in
572      * the device's current memory state.
573      *
574      * Mark regions in both the shadow and device state as "found".
575      */
576     for (i = 0; i < u->num_shadow_regions; i++) {
577         shadow_reg = &u->shadow_regions[i];
578         matching = false;
579 
580         for (j = 0; j < dev->mem->nregions; j++) {
581             reg = &dev->mem->regions[j];
582 
583             mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
584 
585             if (reg_equal(shadow_reg, reg)) {
586                 matching = true;
587                 found[j] = true;
588                 if (track_ramblocks) {
589                     /*
590                      * Reset postcopy client bases, region_rb, and
591                      * region_rb_offset in case regions are removed.
592                      */
593                     if (fd > 0) {
594                         u->region_rb_offset[j] = offset;
595                         u->region_rb[j] = mr->ram_block;
596                         shadow_pcb[j] = u->postcopy_client_bases[i];
597                     } else {
598                         u->region_rb_offset[j] = 0;
599                         u->region_rb[j] = NULL;
600                     }
601                 }
602                 break;
603             }
604         }
605 
606         /*
607          * If the region was not found in the current device memory state
608          * create an entry for it in the removed list.
609          */
610         if (!matching) {
611             rem_reg[rm_idx].region = shadow_reg;
612             rem_reg[rm_idx++].reg_idx = i;
613         }
614     }
615 
616     /*
617      * For regions not marked "found", create entries in the added list.
618      *
619      * Note their indexes in the device memory state and the indexes of their
620      * file descriptors.
621      */
622     for (i = 0; i < dev->mem->nregions; i++) {
623         reg = &dev->mem->regions[i];
624         vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
625         if (fd > 0) {
626             ++fd_num;
627         }
628 
629         /*
630          * If the region was in both the shadow and device state we don't
631          * need to send a VHOST_USER_ADD_MEM_REG message for it.
632          */
633         if (found[i]) {
634             continue;
635         }
636 
637         add_reg[add_idx].region = reg;
638         add_reg[add_idx].reg_idx = i;
639         add_reg[add_idx++].fd_idx = fd_num;
640     }
641     *nr_rem_reg = rm_idx;
642     *nr_add_reg = add_idx;
643 
644     return;
645 }
646 
647 static int send_remove_regions(struct vhost_dev *dev,
648                                struct scrub_regions *remove_reg,
649                                int nr_rem_reg, VhostUserMsg *msg,
650                                bool reply_supported)
651 {
652     struct vhost_user *u = dev->opaque;
653     struct vhost_memory_region *shadow_reg;
654     int i, fd, shadow_reg_idx, ret;
655     ram_addr_t offset;
656     VhostUserMemoryRegion region_buffer;
657 
658     /*
659      * The regions in remove_reg appear in the same order they do in the
660      * shadow table. Therefore we can minimize memory copies by iterating
661      * through remove_reg backwards.
662      */
663     for (i = nr_rem_reg - 1; i >= 0; i--) {
664         shadow_reg = remove_reg[i].region;
665         shadow_reg_idx = remove_reg[i].reg_idx;
666 
667         vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
668 
669         if (fd > 0) {
670             msg->hdr.request = VHOST_USER_REM_MEM_REG;
671             vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
672             msg->payload.mem_reg.region = region_buffer;
673 
674             ret = vhost_user_write(dev, msg, NULL, 0);
675             if (ret < 0) {
676                 return ret;
677             }
678 
679             if (reply_supported) {
680                 ret = process_message_reply(dev, msg);
681                 if (ret) {
682                     return ret;
683                 }
684             }
685         }
686 
687         /*
688          * At this point we know the backend has unmapped the region. It is now
689          * safe to remove it from the shadow table.
690          */
691         memmove(&u->shadow_regions[shadow_reg_idx],
692                 &u->shadow_regions[shadow_reg_idx + 1],
693                 sizeof(struct vhost_memory_region) *
694                 (u->num_shadow_regions - shadow_reg_idx - 1));
695         u->num_shadow_regions--;
696     }
697 
698     return 0;
699 }
700 
701 static int send_add_regions(struct vhost_dev *dev,
702                             struct scrub_regions *add_reg, int nr_add_reg,
703                             VhostUserMsg *msg, uint64_t *shadow_pcb,
704                             bool reply_supported, bool track_ramblocks)
705 {
706     struct vhost_user *u = dev->opaque;
707     int i, fd, ret, reg_idx, reg_fd_idx;
708     struct vhost_memory_region *reg;
709     MemoryRegion *mr;
710     ram_addr_t offset;
711     VhostUserMsg msg_reply;
712     VhostUserMemoryRegion region_buffer;
713 
714     for (i = 0; i < nr_add_reg; i++) {
715         reg = add_reg[i].region;
716         reg_idx = add_reg[i].reg_idx;
717         reg_fd_idx = add_reg[i].fd_idx;
718 
719         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
720 
721         if (fd > 0) {
722             if (track_ramblocks) {
723                 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
724                                                       reg->memory_size,
725                                                       reg->guest_phys_addr,
726                                                       reg->userspace_addr,
727                                                       offset);
728                 u->region_rb_offset[reg_idx] = offset;
729                 u->region_rb[reg_idx] = mr->ram_block;
730             }
731             msg->hdr.request = VHOST_USER_ADD_MEM_REG;
732             vhost_user_fill_msg_region(&region_buffer, reg, offset);
733             msg->payload.mem_reg.region = region_buffer;
734 
735             ret = vhost_user_write(dev, msg, &fd, 1);
736             if (ret < 0) {
737                 return ret;
738             }
739 
740             if (track_ramblocks) {
741                 uint64_t reply_gpa;
742 
743                 ret = vhost_user_read(dev, &msg_reply);
744                 if (ret < 0) {
745                     return ret;
746                 }
747 
748                 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
749 
750                 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
751                     error_report("%s: Received unexpected msg type."
752                                  "Expected %d received %d", __func__,
753                                  VHOST_USER_ADD_MEM_REG,
754                                  msg_reply.hdr.request);
755                     return -EPROTO;
756                 }
757 
758                 /*
759                  * We're using the same structure, just reusing one of the
760                  * fields, so it should be the same size.
761                  */
762                 if (msg_reply.hdr.size != msg->hdr.size) {
763                     error_report("%s: Unexpected size for postcopy reply "
764                                  "%d vs %d", __func__, msg_reply.hdr.size,
765                                  msg->hdr.size);
766                     return -EPROTO;
767                 }
768 
769                 /* Get the postcopy client base from the backend's reply. */
770                 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
771                     shadow_pcb[reg_idx] =
772                         msg_reply.payload.mem_reg.region.userspace_addr;
773                     trace_vhost_user_set_mem_table_postcopy(
774                         msg_reply.payload.mem_reg.region.userspace_addr,
775                         msg->payload.mem_reg.region.userspace_addr,
776                         reg_fd_idx, reg_idx);
777                 } else {
778                     error_report("%s: invalid postcopy reply for region. "
779                                  "Got guest physical address %" PRIX64 ", expected "
780                                  "%" PRIX64, __func__, reply_gpa,
781                                  dev->mem->regions[reg_idx].guest_phys_addr);
782                     return -EPROTO;
783                 }
784             } else if (reply_supported) {
785                 ret = process_message_reply(dev, msg);
786                 if (ret) {
787                     return ret;
788                 }
789             }
790         } else if (track_ramblocks) {
791             u->region_rb_offset[reg_idx] = 0;
792             u->region_rb[reg_idx] = NULL;
793         }
794 
795         /*
796          * At this point, we know the backend has mapped in the new
797          * region, if the region has a valid file descriptor.
798          *
799          * The region should now be added to the shadow table.
800          */
801         u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
802             reg->guest_phys_addr;
803         u->shadow_regions[u->num_shadow_regions].userspace_addr =
804             reg->userspace_addr;
805         u->shadow_regions[u->num_shadow_regions].memory_size =
806             reg->memory_size;
807         u->num_shadow_regions++;
808     }
809 
810     return 0;
811 }
812 
813 static int vhost_user_add_remove_regions(struct vhost_dev *dev,
814                                          VhostUserMsg *msg,
815                                          bool reply_supported,
816                                          bool track_ramblocks)
817 {
818     struct vhost_user *u = dev->opaque;
819     struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS];
820     struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
821     uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
822     int nr_add_reg, nr_rem_reg;
823     int ret;
824 
825     msg->hdr.size = sizeof(msg->payload.mem_reg);
826 
827     /* Find the regions which need to be removed or added. */
828     scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
829                          shadow_pcb, track_ramblocks);
830 
831     if (nr_rem_reg) {
832         ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
833                                   reply_supported);
834         if (ret < 0) {
835             goto err;
836         }
837     }
838 
839     if (nr_add_reg) {
840         ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb,
841                                reply_supported, track_ramblocks);
842         if (ret < 0) {
843             goto err;
844         }
845     }
846 
847     if (track_ramblocks) {
848         memcpy(u->postcopy_client_bases, shadow_pcb,
849                sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
850         /*
851          * Now we've registered this with the postcopy code, we ack to the
852          * client, because now we're in the position to be able to deal with
853          * any faults it generates.
854          */
855         /* TODO: Use this for failure cases as well with a bad value. */
856         msg->hdr.size = sizeof(msg->payload.u64);
857         msg->payload.u64 = 0; /* OK */
858 
859         ret = vhost_user_write(dev, msg, NULL, 0);
860         if (ret < 0) {
861             return ret;
862         }
863     }
864 
865     return 0;
866 
867 err:
868     if (track_ramblocks) {
869         memcpy(u->postcopy_client_bases, shadow_pcb,
870                sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
871     }
872 
873     return ret;
874 }
875 
876 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
877                                              struct vhost_memory *mem,
878                                              bool reply_supported,
879                                              bool config_mem_slots)
880 {
881     struct vhost_user *u = dev->opaque;
882     int fds[VHOST_MEMORY_BASELINE_NREGIONS];
883     size_t fd_num = 0;
884     VhostUserMsg msg_reply;
885     int region_i, msg_i;
886     int ret;
887 
888     VhostUserMsg msg = {
889         .hdr.flags = VHOST_USER_VERSION,
890     };
891 
892     if (u->region_rb_len < dev->mem->nregions) {
893         u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
894         u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
895                                       dev->mem->nregions);
896         memset(&(u->region_rb[u->region_rb_len]), '\0',
897                sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
898         memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
899                sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
900         u->region_rb_len = dev->mem->nregions;
901     }
902 
903     if (config_mem_slots) {
904         ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true);
905         if (ret < 0) {
906             return ret;
907         }
908     } else {
909         ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
910                                                 true);
911         if (ret < 0) {
912             return ret;
913         }
914 
915         ret = vhost_user_write(dev, &msg, fds, fd_num);
916         if (ret < 0) {
917             return ret;
918         }
919 
920         ret = vhost_user_read(dev, &msg_reply);
921         if (ret < 0) {
922             return ret;
923         }
924 
925         if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
926             error_report("%s: Received unexpected msg type."
927                          "Expected %d received %d", __func__,
928                          VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
929             return -EPROTO;
930         }
931 
932         /*
933          * We're using the same structure, just reusing one of the
934          * fields, so it should be the same size.
935          */
936         if (msg_reply.hdr.size != msg.hdr.size) {
937             error_report("%s: Unexpected size for postcopy reply "
938                          "%d vs %d", __func__, msg_reply.hdr.size,
939                          msg.hdr.size);
940             return -EPROTO;
941         }
942 
943         memset(u->postcopy_client_bases, 0,
944                sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
945 
946         /*
947          * They're in the same order as the regions that were sent
948          * but some of the regions were skipped (above) if they
949          * didn't have fd's
950          */
951         for (msg_i = 0, region_i = 0;
952              region_i < dev->mem->nregions;
953              region_i++) {
954             if (msg_i < fd_num &&
955                 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
956                 dev->mem->regions[region_i].guest_phys_addr) {
957                 u->postcopy_client_bases[region_i] =
958                     msg_reply.payload.memory.regions[msg_i].userspace_addr;
959                 trace_vhost_user_set_mem_table_postcopy(
960                     msg_reply.payload.memory.regions[msg_i].userspace_addr,
961                     msg.payload.memory.regions[msg_i].userspace_addr,
962                     msg_i, region_i);
963                 msg_i++;
964             }
965         }
966         if (msg_i != fd_num) {
967             error_report("%s: postcopy reply not fully consumed "
968                          "%d vs %zd",
969                          __func__, msg_i, fd_num);
970             return -EIO;
971         }
972 
973         /*
974          * Now we've registered this with the postcopy code, we ack to the
975          * client, because now we're in the position to be able to deal
976          * with any faults it generates.
977          */
978         /* TODO: Use this for failure cases as well with a bad value. */
979         msg.hdr.size = sizeof(msg.payload.u64);
980         msg.payload.u64 = 0; /* OK */
981         ret = vhost_user_write(dev, &msg, NULL, 0);
982         if (ret < 0) {
983             return ret;
984         }
985     }
986 
987     return 0;
988 }
989 
990 static int vhost_user_set_mem_table(struct vhost_dev *dev,
991                                     struct vhost_memory *mem)
992 {
993     struct vhost_user *u = dev->opaque;
994     int fds[VHOST_MEMORY_BASELINE_NREGIONS];
995     size_t fd_num = 0;
996     bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
997     bool reply_supported = virtio_has_feature(dev->protocol_features,
998                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
999     bool config_mem_slots =
1000         virtio_has_feature(dev->protocol_features,
1001                            VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
1002     int ret;
1003 
1004     if (do_postcopy) {
1005         /*
1006          * Postcopy has enough differences that it's best done in it's own
1007          * version
1008          */
1009         return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
1010                                                  config_mem_slots);
1011     }
1012 
1013     VhostUserMsg msg = {
1014         .hdr.flags = VHOST_USER_VERSION,
1015     };
1016 
1017     if (reply_supported) {
1018         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1019     }
1020 
1021     if (config_mem_slots) {
1022         ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false);
1023         if (ret < 0) {
1024             return ret;
1025         }
1026     } else {
1027         ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
1028                                                 false);
1029         if (ret < 0) {
1030             return ret;
1031         }
1032 
1033         ret = vhost_user_write(dev, &msg, fds, fd_num);
1034         if (ret < 0) {
1035             return ret;
1036         }
1037 
1038         if (reply_supported) {
1039             return process_message_reply(dev, &msg);
1040         }
1041     }
1042 
1043     return 0;
1044 }
1045 
1046 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1047                                        struct vhost_vring_state *ring)
1048 {
1049     bool cross_endian = virtio_has_feature(dev->protocol_features,
1050                                            VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1051     VhostUserMsg msg = {
1052         .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1053         .hdr.flags = VHOST_USER_VERSION,
1054         .payload.state = *ring,
1055         .hdr.size = sizeof(msg.payload.state),
1056     };
1057 
1058     if (!cross_endian) {
1059         error_report("vhost-user trying to send unhandled ioctl");
1060         return -ENOTSUP;
1061     }
1062 
1063     return vhost_user_write(dev, &msg, NULL, 0);
1064 }
1065 
1066 static int vhost_set_vring(struct vhost_dev *dev,
1067                            unsigned long int request,
1068                            struct vhost_vring_state *ring)
1069 {
1070     VhostUserMsg msg = {
1071         .hdr.request = request,
1072         .hdr.flags = VHOST_USER_VERSION,
1073         .payload.state = *ring,
1074         .hdr.size = sizeof(msg.payload.state),
1075     };
1076 
1077     return vhost_user_write(dev, &msg, NULL, 0);
1078 }
1079 
1080 static int vhost_user_set_vring_num(struct vhost_dev *dev,
1081                                     struct vhost_vring_state *ring)
1082 {
1083     return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1084 }
1085 
1086 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
1087 {
1088     assert(n && n->unmap_addr);
1089     munmap(n->unmap_addr, qemu_real_host_page_size());
1090     n->unmap_addr = NULL;
1091 }
1092 
1093 /*
1094  * clean-up function for notifier, will finally free the structure
1095  * under rcu.
1096  */
1097 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
1098                                             VirtIODevice *vdev)
1099 {
1100     if (n->addr) {
1101         if (vdev) {
1102             virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
1103         }
1104         assert(!n->unmap_addr);
1105         n->unmap_addr = n->addr;
1106         n->addr = NULL;
1107         call_rcu(n, vhost_user_host_notifier_free, rcu);
1108     }
1109 }
1110 
1111 static int vhost_user_set_vring_base(struct vhost_dev *dev,
1112                                      struct vhost_vring_state *ring)
1113 {
1114     return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1115 }
1116 
1117 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1118 {
1119     int i;
1120 
1121     if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1122         return -EINVAL;
1123     }
1124 
1125     for (i = 0; i < dev->nvqs; ++i) {
1126         int ret;
1127         struct vhost_vring_state state = {
1128             .index = dev->vq_index + i,
1129             .num   = enable,
1130         };
1131 
1132         ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1133         if (ret < 0) {
1134             /*
1135              * Restoring the previous state is likely infeasible, as well as
1136              * proceeding regardless the error, so just bail out and hope for
1137              * the device-level recovery.
1138              */
1139             return ret;
1140         }
1141     }
1142 
1143     return 0;
1144 }
1145 
1146 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u,
1147                                              int idx)
1148 {
1149     if (idx >= u->notifiers->len) {
1150         return NULL;
1151     }
1152     return g_ptr_array_index(u->notifiers, idx);
1153 }
1154 
1155 static int vhost_user_get_vring_base(struct vhost_dev *dev,
1156                                      struct vhost_vring_state *ring)
1157 {
1158     int ret;
1159     VhostUserMsg msg = {
1160         .hdr.request = VHOST_USER_GET_VRING_BASE,
1161         .hdr.flags = VHOST_USER_VERSION,
1162         .payload.state = *ring,
1163         .hdr.size = sizeof(msg.payload.state),
1164     };
1165     struct vhost_user *u = dev->opaque;
1166 
1167     VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
1168     if (n) {
1169         vhost_user_host_notifier_remove(n, dev->vdev);
1170     }
1171 
1172     ret = vhost_user_write(dev, &msg, NULL, 0);
1173     if (ret < 0) {
1174         return ret;
1175     }
1176 
1177     ret = vhost_user_read(dev, &msg);
1178     if (ret < 0) {
1179         return ret;
1180     }
1181 
1182     if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
1183         error_report("Received unexpected msg type. Expected %d received %d",
1184                      VHOST_USER_GET_VRING_BASE, msg.hdr.request);
1185         return -EPROTO;
1186     }
1187 
1188     if (msg.hdr.size != sizeof(msg.payload.state)) {
1189         error_report("Received bad msg size.");
1190         return -EPROTO;
1191     }
1192 
1193     *ring = msg.payload.state;
1194 
1195     return 0;
1196 }
1197 
1198 static int vhost_set_vring_file(struct vhost_dev *dev,
1199                                 VhostUserRequest request,
1200                                 struct vhost_vring_file *file)
1201 {
1202     int fds[VHOST_USER_MAX_RAM_SLOTS];
1203     size_t fd_num = 0;
1204     VhostUserMsg msg = {
1205         .hdr.request = request,
1206         .hdr.flags = VHOST_USER_VERSION,
1207         .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
1208         .hdr.size = sizeof(msg.payload.u64),
1209     };
1210 
1211     if (ioeventfd_enabled() && file->fd > 0) {
1212         fds[fd_num++] = file->fd;
1213     } else {
1214         msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
1215     }
1216 
1217     return vhost_user_write(dev, &msg, fds, fd_num);
1218 }
1219 
1220 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1221                                      struct vhost_vring_file *file)
1222 {
1223     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1224 }
1225 
1226 static int vhost_user_set_vring_call(struct vhost_dev *dev,
1227                                      struct vhost_vring_file *file)
1228 {
1229     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1230 }
1231 
1232 static int vhost_user_set_vring_err(struct vhost_dev *dev,
1233                                     struct vhost_vring_file *file)
1234 {
1235     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file);
1236 }
1237 
1238 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1239 {
1240     int ret;
1241     VhostUserMsg msg = {
1242         .hdr.request = request,
1243         .hdr.flags = VHOST_USER_VERSION,
1244     };
1245 
1246     if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
1247         return 0;
1248     }
1249 
1250     ret = vhost_user_write(dev, &msg, NULL, 0);
1251     if (ret < 0) {
1252         return ret;
1253     }
1254 
1255     ret = vhost_user_read(dev, &msg);
1256     if (ret < 0) {
1257         return ret;
1258     }
1259 
1260     if (msg.hdr.request != request) {
1261         error_report("Received unexpected msg type. Expected %d received %d",
1262                      request, msg.hdr.request);
1263         return -EPROTO;
1264     }
1265 
1266     if (msg.hdr.size != sizeof(msg.payload.u64)) {
1267         error_report("Received bad msg size.");
1268         return -EPROTO;
1269     }
1270 
1271     *u64 = msg.payload.u64;
1272 
1273     return 0;
1274 }
1275 
1276 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1277 {
1278     if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
1279         return -EPROTO;
1280     }
1281 
1282     return 0;
1283 }
1284 
1285 static int enforce_reply(struct vhost_dev *dev,
1286                          const VhostUserMsg *msg)
1287 {
1288     uint64_t dummy;
1289 
1290     if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1291         return process_message_reply(dev, msg);
1292     }
1293 
1294    /*
1295     * We need to wait for a reply but the backend does not
1296     * support replies for the command we just sent.
1297     * Send VHOST_USER_GET_FEATURES which makes all backends
1298     * send a reply.
1299     */
1300     return vhost_user_get_features(dev, &dummy);
1301 }
1302 
1303 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
1304                                      struct vhost_vring_addr *addr)
1305 {
1306     int ret;
1307     VhostUserMsg msg = {
1308         .hdr.request = VHOST_USER_SET_VRING_ADDR,
1309         .hdr.flags = VHOST_USER_VERSION,
1310         .payload.addr = *addr,
1311         .hdr.size = sizeof(msg.payload.addr),
1312     };
1313 
1314     bool reply_supported = virtio_has_feature(dev->protocol_features,
1315                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1316 
1317     /*
1318      * wait for a reply if logging is enabled to make sure
1319      * backend is actually logging changes
1320      */
1321     bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG);
1322 
1323     if (reply_supported && wait_for_reply) {
1324         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1325     }
1326 
1327     ret = vhost_user_write(dev, &msg, NULL, 0);
1328     if (ret < 0) {
1329         return ret;
1330     }
1331 
1332     if (wait_for_reply) {
1333         return enforce_reply(dev, &msg);
1334     }
1335 
1336     return 0;
1337 }
1338 
1339 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
1340                               bool wait_for_reply)
1341 {
1342     VhostUserMsg msg = {
1343         .hdr.request = request,
1344         .hdr.flags = VHOST_USER_VERSION,
1345         .payload.u64 = u64,
1346         .hdr.size = sizeof(msg.payload.u64),
1347     };
1348     int ret;
1349 
1350     if (wait_for_reply) {
1351         bool reply_supported = virtio_has_feature(dev->protocol_features,
1352                                           VHOST_USER_PROTOCOL_F_REPLY_ACK);
1353         if (reply_supported) {
1354             msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1355         }
1356     }
1357 
1358     ret = vhost_user_write(dev, &msg, NULL, 0);
1359     if (ret < 0) {
1360         return ret;
1361     }
1362 
1363     if (wait_for_reply) {
1364         return enforce_reply(dev, &msg);
1365     }
1366 
1367     return 0;
1368 }
1369 
1370 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status)
1371 {
1372     return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false);
1373 }
1374 
1375 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status)
1376 {
1377     uint64_t value;
1378     int ret;
1379 
1380     ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value);
1381     if (ret < 0) {
1382         return ret;
1383     }
1384     *status = value;
1385 
1386     return 0;
1387 }
1388 
1389 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status)
1390 {
1391     uint8_t s;
1392     int ret;
1393 
1394     ret = vhost_user_get_status(dev, &s);
1395     if (ret < 0) {
1396         return ret;
1397     }
1398 
1399     if ((s & status) == status) {
1400         return 0;
1401     }
1402     s |= status;
1403 
1404     return vhost_user_set_status(dev, s);
1405 }
1406 
1407 static int vhost_user_set_features(struct vhost_dev *dev,
1408                                    uint64_t features)
1409 {
1410     /*
1411      * wait for a reply if logging is enabled to make sure
1412      * backend is actually logging changes
1413      */
1414     bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
1415     int ret;
1416 
1417     /*
1418      * We need to include any extra backend only feature bits that
1419      * might be needed by our device. Currently this includes the
1420      * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol
1421      * features.
1422      */
1423     ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
1424                               features | dev->backend_features,
1425                               log_enabled);
1426 
1427     if (virtio_has_feature(dev->protocol_features,
1428                            VHOST_USER_PROTOCOL_F_STATUS)) {
1429         if (!ret) {
1430             return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
1431         }
1432     }
1433 
1434     return ret;
1435 }
1436 
1437 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1438                                             uint64_t features)
1439 {
1440     return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features,
1441                               false);
1442 }
1443 
1444 static int vhost_user_set_owner(struct vhost_dev *dev)
1445 {
1446     VhostUserMsg msg = {
1447         .hdr.request = VHOST_USER_SET_OWNER,
1448         .hdr.flags = VHOST_USER_VERSION,
1449     };
1450 
1451     return vhost_user_write(dev, &msg, NULL, 0);
1452 }
1453 
1454 static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1455                                        uint64_t *max_memslots)
1456 {
1457     uint64_t backend_max_memslots;
1458     int err;
1459 
1460     err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1461                              &backend_max_memslots);
1462     if (err < 0) {
1463         return err;
1464     }
1465 
1466     *max_memslots = backend_max_memslots;
1467 
1468     return 0;
1469 }
1470 
1471 static int vhost_user_reset_device(struct vhost_dev *dev)
1472 {
1473     VhostUserMsg msg = {
1474         .hdr.flags = VHOST_USER_VERSION,
1475     };
1476 
1477     msg.hdr.request = virtio_has_feature(dev->protocol_features,
1478                                          VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1479         ? VHOST_USER_RESET_DEVICE
1480         : VHOST_USER_RESET_OWNER;
1481 
1482     return vhost_user_write(dev, &msg, NULL, 0);
1483 }
1484 
1485 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
1486 {
1487     if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1488         return -ENOSYS;
1489     }
1490 
1491     return dev->config_ops->vhost_dev_config_notifier(dev);
1492 }
1493 
1494 /*
1495  * Fetch or create the notifier for a given idx. Newly created
1496  * notifiers are added to the pointer array that tracks them.
1497  */
1498 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
1499                                                        int idx)
1500 {
1501     VhostUserHostNotifier *n = NULL;
1502     if (idx >= u->notifiers->len) {
1503         g_ptr_array_set_size(u->notifiers, idx + 1);
1504     }
1505 
1506     n = g_ptr_array_index(u->notifiers, idx);
1507     if (!n) {
1508         /*
1509          * In case notification arrive out-of-order,
1510          * make room for current index.
1511          */
1512         g_ptr_array_remove_index(u->notifiers, idx);
1513         n = g_new0(VhostUserHostNotifier, 1);
1514         n->idx = idx;
1515         g_ptr_array_insert(u->notifiers, idx, n);
1516         trace_vhost_user_create_notifier(idx, n);
1517     }
1518 
1519     return n;
1520 }
1521 
1522 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
1523                                                        VhostUserVringArea *area,
1524                                                        int fd)
1525 {
1526     int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
1527     size_t page_size = qemu_real_host_page_size();
1528     struct vhost_user *u = dev->opaque;
1529     VhostUserState *user = u->user;
1530     VirtIODevice *vdev = dev->vdev;
1531     VhostUserHostNotifier *n;
1532     void *addr;
1533     char *name;
1534 
1535     if (!virtio_has_feature(dev->protocol_features,
1536                             VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1537         vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
1538         return -EINVAL;
1539     }
1540 
1541     /*
1542      * Fetch notifier and invalidate any old data before setting up
1543      * new mapped address.
1544      */
1545     n = fetch_or_create_notifier(user, queue_idx);
1546     vhost_user_host_notifier_remove(n, vdev);
1547 
1548     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1549         return 0;
1550     }
1551 
1552     /* Sanity check. */
1553     if (area->size != page_size) {
1554         return -EINVAL;
1555     }
1556 
1557     addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1558                 fd, area->offset);
1559     if (addr == MAP_FAILED) {
1560         return -EFAULT;
1561     }
1562 
1563     name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1564                            user, queue_idx);
1565     if (!n->mr.ram) { /* Don't init again after suspend. */
1566         memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1567                                           page_size, addr);
1568     } else {
1569         n->mr.ram_block->host = addr;
1570     }
1571     g_free(name);
1572 
1573     if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1574         object_unparent(OBJECT(&n->mr));
1575         munmap(addr, page_size);
1576         return -ENXIO;
1577     }
1578 
1579     n->addr = addr;
1580 
1581     return 0;
1582 }
1583 
1584 static void close_slave_channel(struct vhost_user *u)
1585 {
1586     g_source_destroy(u->slave_src);
1587     g_source_unref(u->slave_src);
1588     u->slave_src = NULL;
1589     object_unref(OBJECT(u->slave_ioc));
1590     u->slave_ioc = NULL;
1591 }
1592 
1593 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
1594                            gpointer opaque)
1595 {
1596     struct vhost_dev *dev = opaque;
1597     struct vhost_user *u = dev->opaque;
1598     VhostUserHeader hdr = { 0, };
1599     VhostUserPayload payload = { 0, };
1600     Error *local_err = NULL;
1601     gboolean rc = G_SOURCE_CONTINUE;
1602     int ret = 0;
1603     struct iovec iov;
1604     g_autofree int *fd = NULL;
1605     size_t fdsize = 0;
1606     int i;
1607 
1608     /* Read header */
1609     iov.iov_base = &hdr;
1610     iov.iov_len = VHOST_USER_HDR_SIZE;
1611 
1612     if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) {
1613         error_report_err(local_err);
1614         goto err;
1615     }
1616 
1617     if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1618         error_report("Failed to read msg header."
1619                 " Size %d exceeds the maximum %zu.", hdr.size,
1620                 VHOST_USER_PAYLOAD_SIZE);
1621         goto err;
1622     }
1623 
1624     /* Read payload */
1625     if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) {
1626         error_report_err(local_err);
1627         goto err;
1628     }
1629 
1630     switch (hdr.request) {
1631     case VHOST_USER_BACKEND_IOTLB_MSG:
1632         ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1633         break;
1634     case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG:
1635         ret = vhost_user_slave_handle_config_change(dev);
1636         break;
1637     case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG:
1638         ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1639                                                           fd ? fd[0] : -1);
1640         break;
1641     default:
1642         error_report("Received unexpected msg type: %d.", hdr.request);
1643         ret = -EINVAL;
1644     }
1645 
1646     /*
1647      * REPLY_ACK feature handling. Other reply types has to be managed
1648      * directly in their request handlers.
1649      */
1650     if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1651         struct iovec iovec[2];
1652 
1653 
1654         hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1655         hdr.flags |= VHOST_USER_REPLY_MASK;
1656 
1657         payload.u64 = !!ret;
1658         hdr.size = sizeof(payload.u64);
1659 
1660         iovec[0].iov_base = &hdr;
1661         iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1662         iovec[1].iov_base = &payload;
1663         iovec[1].iov_len = hdr.size;
1664 
1665         if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) {
1666             error_report_err(local_err);
1667             goto err;
1668         }
1669     }
1670 
1671     goto fdcleanup;
1672 
1673 err:
1674     close_slave_channel(u);
1675     rc = G_SOURCE_REMOVE;
1676 
1677 fdcleanup:
1678     if (fd) {
1679         for (i = 0; i < fdsize; i++) {
1680             close(fd[i]);
1681         }
1682     }
1683     return rc;
1684 }
1685 
1686 static int vhost_setup_slave_channel(struct vhost_dev *dev)
1687 {
1688     VhostUserMsg msg = {
1689         .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD,
1690         .hdr.flags = VHOST_USER_VERSION,
1691     };
1692     struct vhost_user *u = dev->opaque;
1693     int sv[2], ret = 0;
1694     bool reply_supported = virtio_has_feature(dev->protocol_features,
1695                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1696     Error *local_err = NULL;
1697     QIOChannel *ioc;
1698 
1699     if (!virtio_has_feature(dev->protocol_features,
1700                             VHOST_USER_PROTOCOL_F_BACKEND_REQ)) {
1701         return 0;
1702     }
1703 
1704     if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1705         int saved_errno = errno;
1706         error_report("socketpair() failed");
1707         return -saved_errno;
1708     }
1709 
1710     ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err));
1711     if (!ioc) {
1712         error_report_err(local_err);
1713         return -ECONNREFUSED;
1714     }
1715     u->slave_ioc = ioc;
1716     u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
1717                                                 G_IO_IN | G_IO_HUP,
1718                                                 slave_read, dev, NULL, NULL);
1719 
1720     if (reply_supported) {
1721         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1722     }
1723 
1724     ret = vhost_user_write(dev, &msg, &sv[1], 1);
1725     if (ret) {
1726         goto out;
1727     }
1728 
1729     if (reply_supported) {
1730         ret = process_message_reply(dev, &msg);
1731     }
1732 
1733 out:
1734     close(sv[1]);
1735     if (ret) {
1736         close_slave_channel(u);
1737     }
1738 
1739     return ret;
1740 }
1741 
1742 #ifdef CONFIG_LINUX
1743 /*
1744  * Called back from the postcopy fault thread when a fault is received on our
1745  * ufd.
1746  * TODO: This is Linux specific
1747  */
1748 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1749                                              void *ufd)
1750 {
1751     struct vhost_dev *dev = pcfd->data;
1752     struct vhost_user *u = dev->opaque;
1753     struct uffd_msg *msg = ufd;
1754     uint64_t faultaddr = msg->arg.pagefault.address;
1755     RAMBlock *rb = NULL;
1756     uint64_t rb_offset;
1757     int i;
1758 
1759     trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1760                                             dev->mem->nregions);
1761     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1762         trace_vhost_user_postcopy_fault_handler_loop(i,
1763                 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1764         if (faultaddr >= u->postcopy_client_bases[i]) {
1765             /* Ofset of the fault address in the vhost region */
1766             uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1767             if (region_offset < dev->mem->regions[i].memory_size) {
1768                 rb_offset = region_offset + u->region_rb_offset[i];
1769                 trace_vhost_user_postcopy_fault_handler_found(i,
1770                         region_offset, rb_offset);
1771                 rb = u->region_rb[i];
1772                 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1773                                                     rb_offset);
1774             }
1775         }
1776     }
1777     error_report("%s: Failed to find region for fault %" PRIx64,
1778                  __func__, faultaddr);
1779     return -1;
1780 }
1781 
1782 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1783                                      uint64_t offset)
1784 {
1785     struct vhost_dev *dev = pcfd->data;
1786     struct vhost_user *u = dev->opaque;
1787     int i;
1788 
1789     trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1790 
1791     if (!u) {
1792         return 0;
1793     }
1794     /* Translate the offset into an address in the clients address space */
1795     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1796         if (u->region_rb[i] == rb &&
1797             offset >= u->region_rb_offset[i] &&
1798             offset < (u->region_rb_offset[i] +
1799                       dev->mem->regions[i].memory_size)) {
1800             uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1801                                    u->postcopy_client_bases[i];
1802             trace_vhost_user_postcopy_waker_found(client_addr);
1803             return postcopy_wake_shared(pcfd, client_addr, rb);
1804         }
1805     }
1806 
1807     trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1808     return 0;
1809 }
1810 #endif
1811 
1812 /*
1813  * Called at the start of an inbound postcopy on reception of the
1814  * 'advise' command.
1815  */
1816 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1817 {
1818 #ifdef CONFIG_LINUX
1819     struct vhost_user *u = dev->opaque;
1820     CharBackend *chr = u->user->chr;
1821     int ufd;
1822     int ret;
1823     VhostUserMsg msg = {
1824         .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1825         .hdr.flags = VHOST_USER_VERSION,
1826     };
1827 
1828     ret = vhost_user_write(dev, &msg, NULL, 0);
1829     if (ret < 0) {
1830         error_setg(errp, "Failed to send postcopy_advise to vhost");
1831         return ret;
1832     }
1833 
1834     ret = vhost_user_read(dev, &msg);
1835     if (ret < 0) {
1836         error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1837         return ret;
1838     }
1839 
1840     if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1841         error_setg(errp, "Unexpected msg type. Expected %d received %d",
1842                      VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1843         return -EPROTO;
1844     }
1845 
1846     if (msg.hdr.size) {
1847         error_setg(errp, "Received bad msg size.");
1848         return -EPROTO;
1849     }
1850     ufd = qemu_chr_fe_get_msgfd(chr);
1851     if (ufd < 0) {
1852         error_setg(errp, "%s: Failed to get ufd", __func__);
1853         return -EIO;
1854     }
1855     qemu_socket_set_nonblock(ufd);
1856 
1857     /* register ufd with userfault thread */
1858     u->postcopy_fd.fd = ufd;
1859     u->postcopy_fd.data = dev;
1860     u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1861     u->postcopy_fd.waker = vhost_user_postcopy_waker;
1862     u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1863     postcopy_register_shared_ufd(&u->postcopy_fd);
1864     return 0;
1865 #else
1866     error_setg(errp, "Postcopy not supported on non-Linux systems");
1867     return -ENOSYS;
1868 #endif
1869 }
1870 
1871 /*
1872  * Called at the switch to postcopy on reception of the 'listen' command.
1873  */
1874 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1875 {
1876     struct vhost_user *u = dev->opaque;
1877     int ret;
1878     VhostUserMsg msg = {
1879         .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1880         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1881     };
1882     u->postcopy_listen = true;
1883 
1884     trace_vhost_user_postcopy_listen();
1885 
1886     ret = vhost_user_write(dev, &msg, NULL, 0);
1887     if (ret < 0) {
1888         error_setg(errp, "Failed to send postcopy_listen to vhost");
1889         return ret;
1890     }
1891 
1892     ret = process_message_reply(dev, &msg);
1893     if (ret) {
1894         error_setg(errp, "Failed to receive reply to postcopy_listen");
1895         return ret;
1896     }
1897 
1898     return 0;
1899 }
1900 
1901 /*
1902  * Called at the end of postcopy
1903  */
1904 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1905 {
1906     VhostUserMsg msg = {
1907         .hdr.request = VHOST_USER_POSTCOPY_END,
1908         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1909     };
1910     int ret;
1911     struct vhost_user *u = dev->opaque;
1912 
1913     trace_vhost_user_postcopy_end_entry();
1914 
1915     ret = vhost_user_write(dev, &msg, NULL, 0);
1916     if (ret < 0) {
1917         error_setg(errp, "Failed to send postcopy_end to vhost");
1918         return ret;
1919     }
1920 
1921     ret = process_message_reply(dev, &msg);
1922     if (ret) {
1923         error_setg(errp, "Failed to receive reply to postcopy_end");
1924         return ret;
1925     }
1926     postcopy_unregister_shared_ufd(&u->postcopy_fd);
1927     close(u->postcopy_fd.fd);
1928     u->postcopy_fd.handler = NULL;
1929 
1930     trace_vhost_user_postcopy_end_exit();
1931 
1932     return 0;
1933 }
1934 
1935 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1936                                         void *opaque)
1937 {
1938     struct PostcopyNotifyData *pnd = opaque;
1939     struct vhost_user *u = container_of(notifier, struct vhost_user,
1940                                          postcopy_notifier);
1941     struct vhost_dev *dev = u->dev;
1942 
1943     switch (pnd->reason) {
1944     case POSTCOPY_NOTIFY_PROBE:
1945         if (!virtio_has_feature(dev->protocol_features,
1946                                 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1947             /* TODO: Get the device name into this error somehow */
1948             error_setg(pnd->errp,
1949                        "vhost-user backend not capable of postcopy");
1950             return -ENOENT;
1951         }
1952         break;
1953 
1954     case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1955         return vhost_user_postcopy_advise(dev, pnd->errp);
1956 
1957     case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1958         return vhost_user_postcopy_listen(dev, pnd->errp);
1959 
1960     case POSTCOPY_NOTIFY_INBOUND_END:
1961         return vhost_user_postcopy_end(dev, pnd->errp);
1962 
1963     default:
1964         /* We ignore notifications we don't know */
1965         break;
1966     }
1967 
1968     return 0;
1969 }
1970 
1971 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
1972                                    Error **errp)
1973 {
1974     uint64_t features, ram_slots;
1975     struct vhost_user *u;
1976     VhostUserState *vus = (VhostUserState *) opaque;
1977     int err;
1978 
1979     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1980 
1981     u = g_new0(struct vhost_user, 1);
1982     u->user = vus;
1983     u->dev = dev;
1984     dev->opaque = u;
1985 
1986     err = vhost_user_get_features(dev, &features);
1987     if (err < 0) {
1988         error_setg_errno(errp, -err, "vhost_backend_init failed");
1989         return err;
1990     }
1991 
1992     if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1993         bool supports_f_config = vus->supports_config ||
1994             (dev->config_ops && dev->config_ops->vhost_dev_config_notifier);
1995         uint64_t protocol_features;
1996 
1997         dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1998 
1999         err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
2000                                  &protocol_features);
2001         if (err < 0) {
2002             error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2003             return -EPROTO;
2004         }
2005 
2006         /*
2007          * We will use all the protocol features we support - although
2008          * we suppress F_CONFIG if we know QEMUs internal code can not support
2009          * it.
2010          */
2011         protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK;
2012 
2013         if (supports_f_config) {
2014             if (!virtio_has_feature(protocol_features,
2015                                     VHOST_USER_PROTOCOL_F_CONFIG)) {
2016                 error_setg(errp, "vhost-user device expecting "
2017                            "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does "
2018                            "not support it.");
2019                 return -EPROTO;
2020             }
2021         } else {
2022             if (virtio_has_feature(protocol_features,
2023                                    VHOST_USER_PROTOCOL_F_CONFIG)) {
2024                 warn_report("vhost-user backend supports "
2025                             "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
2026                 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
2027             }
2028         }
2029 
2030         /* final set of protocol features */
2031         dev->protocol_features = protocol_features;
2032         err = vhost_user_set_protocol_features(dev, dev->protocol_features);
2033         if (err < 0) {
2034             error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2035             return -EPROTO;
2036         }
2037 
2038         /* query the max queues we support if backend supports Multiple Queue */
2039         if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
2040             err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
2041                                      &dev->max_queues);
2042             if (err < 0) {
2043                 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2044                 return -EPROTO;
2045             }
2046         } else {
2047             dev->max_queues = 1;
2048         }
2049 
2050         if (dev->num_queues && dev->max_queues < dev->num_queues) {
2051             error_setg(errp, "The maximum number of queues supported by the "
2052                        "backend is %" PRIu64, dev->max_queues);
2053             return -EINVAL;
2054         }
2055 
2056         if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
2057                 !(virtio_has_feature(dev->protocol_features,
2058                     VHOST_USER_PROTOCOL_F_BACKEND_REQ) &&
2059                  virtio_has_feature(dev->protocol_features,
2060                     VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
2061             error_setg(errp, "IOMMU support requires reply-ack and "
2062                        "slave-req protocol features.");
2063             return -EINVAL;
2064         }
2065 
2066         /* get max memory regions if backend supports configurable RAM slots */
2067         if (!virtio_has_feature(dev->protocol_features,
2068                                 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
2069             u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS;
2070         } else {
2071             err = vhost_user_get_max_memslots(dev, &ram_slots);
2072             if (err < 0) {
2073                 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2074                 return -EPROTO;
2075             }
2076 
2077             if (ram_slots < u->user->memory_slots) {
2078                 error_setg(errp, "The backend specified a max ram slots limit "
2079                            "of %" PRIu64", when the prior validated limit was "
2080                            "%d. This limit should never decrease.", ram_slots,
2081                            u->user->memory_slots);
2082                 return -EINVAL;
2083             }
2084 
2085             u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
2086         }
2087     }
2088 
2089     if (dev->migration_blocker == NULL &&
2090         !virtio_has_feature(dev->protocol_features,
2091                             VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
2092         error_setg(&dev->migration_blocker,
2093                    "Migration disabled: vhost-user backend lacks "
2094                    "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
2095     }
2096 
2097     if (dev->vq_index == 0) {
2098         err = vhost_setup_slave_channel(dev);
2099         if (err < 0) {
2100             error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2101             return -EPROTO;
2102         }
2103     }
2104 
2105     u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
2106     postcopy_add_notifier(&u->postcopy_notifier);
2107 
2108     return 0;
2109 }
2110 
2111 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
2112 {
2113     struct vhost_user *u;
2114 
2115     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2116 
2117     u = dev->opaque;
2118     if (u->postcopy_notifier.notify) {
2119         postcopy_remove_notifier(&u->postcopy_notifier);
2120         u->postcopy_notifier.notify = NULL;
2121     }
2122     u->postcopy_listen = false;
2123     if (u->postcopy_fd.handler) {
2124         postcopy_unregister_shared_ufd(&u->postcopy_fd);
2125         close(u->postcopy_fd.fd);
2126         u->postcopy_fd.handler = NULL;
2127     }
2128     if (u->slave_ioc) {
2129         close_slave_channel(u);
2130     }
2131     g_free(u->region_rb);
2132     u->region_rb = NULL;
2133     g_free(u->region_rb_offset);
2134     u->region_rb_offset = NULL;
2135     u->region_rb_len = 0;
2136     g_free(u);
2137     dev->opaque = 0;
2138 
2139     return 0;
2140 }
2141 
2142 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
2143 {
2144     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
2145 
2146     return idx;
2147 }
2148 
2149 static int vhost_user_memslots_limit(struct vhost_dev *dev)
2150 {
2151     struct vhost_user *u = dev->opaque;
2152 
2153     return u->user->memory_slots;
2154 }
2155 
2156 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
2157 {
2158     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2159 
2160     return virtio_has_feature(dev->protocol_features,
2161                               VHOST_USER_PROTOCOL_F_LOG_SHMFD);
2162 }
2163 
2164 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
2165 {
2166     VhostUserMsg msg = { };
2167 
2168     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2169 
2170     /* If guest supports GUEST_ANNOUNCE do nothing */
2171     if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
2172         return 0;
2173     }
2174 
2175     /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
2176     if (virtio_has_feature(dev->protocol_features,
2177                            VHOST_USER_PROTOCOL_F_RARP)) {
2178         msg.hdr.request = VHOST_USER_SEND_RARP;
2179         msg.hdr.flags = VHOST_USER_VERSION;
2180         memcpy((char *)&msg.payload.u64, mac_addr, 6);
2181         msg.hdr.size = sizeof(msg.payload.u64);
2182 
2183         return vhost_user_write(dev, &msg, NULL, 0);
2184     }
2185     return -ENOTSUP;
2186 }
2187 
2188 static bool vhost_user_can_merge(struct vhost_dev *dev,
2189                                  uint64_t start1, uint64_t size1,
2190                                  uint64_t start2, uint64_t size2)
2191 {
2192     ram_addr_t offset;
2193     int mfd, rfd;
2194 
2195     (void)vhost_user_get_mr_data(start1, &offset, &mfd);
2196     (void)vhost_user_get_mr_data(start2, &offset, &rfd);
2197 
2198     return mfd == rfd;
2199 }
2200 
2201 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
2202 {
2203     VhostUserMsg msg;
2204     bool reply_supported = virtio_has_feature(dev->protocol_features,
2205                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
2206     int ret;
2207 
2208     if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
2209         return 0;
2210     }
2211 
2212     msg.hdr.request = VHOST_USER_NET_SET_MTU;
2213     msg.payload.u64 = mtu;
2214     msg.hdr.size = sizeof(msg.payload.u64);
2215     msg.hdr.flags = VHOST_USER_VERSION;
2216     if (reply_supported) {
2217         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2218     }
2219 
2220     ret = vhost_user_write(dev, &msg, NULL, 0);
2221     if (ret < 0) {
2222         return ret;
2223     }
2224 
2225     /* If reply_ack supported, slave has to ack specified MTU is valid */
2226     if (reply_supported) {
2227         return process_message_reply(dev, &msg);
2228     }
2229 
2230     return 0;
2231 }
2232 
2233 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2234                                             struct vhost_iotlb_msg *imsg)
2235 {
2236     int ret;
2237     VhostUserMsg msg = {
2238         .hdr.request = VHOST_USER_IOTLB_MSG,
2239         .hdr.size = sizeof(msg.payload.iotlb),
2240         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
2241         .payload.iotlb = *imsg,
2242     };
2243 
2244     ret = vhost_user_write(dev, &msg, NULL, 0);
2245     if (ret < 0) {
2246         return ret;
2247     }
2248 
2249     return process_message_reply(dev, &msg);
2250 }
2251 
2252 
2253 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2254 {
2255     /* No-op as the receive channel is not dedicated to IOTLB messages. */
2256 }
2257 
2258 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
2259                                  uint32_t config_len, Error **errp)
2260 {
2261     int ret;
2262     VhostUserMsg msg = {
2263         .hdr.request = VHOST_USER_GET_CONFIG,
2264         .hdr.flags = VHOST_USER_VERSION,
2265         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
2266     };
2267 
2268     if (!virtio_has_feature(dev->protocol_features,
2269                 VHOST_USER_PROTOCOL_F_CONFIG)) {
2270         error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported");
2271         return -EINVAL;
2272     }
2273 
2274     assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE);
2275 
2276     msg.payload.config.offset = 0;
2277     msg.payload.config.size = config_len;
2278     ret = vhost_user_write(dev, &msg, NULL, 0);
2279     if (ret < 0) {
2280         error_setg_errno(errp, -ret, "vhost_get_config failed");
2281         return ret;
2282     }
2283 
2284     ret = vhost_user_read(dev, &msg);
2285     if (ret < 0) {
2286         error_setg_errno(errp, -ret, "vhost_get_config failed");
2287         return ret;
2288     }
2289 
2290     if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
2291         error_setg(errp,
2292                    "Received unexpected msg type. Expected %d received %d",
2293                    VHOST_USER_GET_CONFIG, msg.hdr.request);
2294         return -EPROTO;
2295     }
2296 
2297     if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
2298         error_setg(errp, "Received bad msg size.");
2299         return -EPROTO;
2300     }
2301 
2302     memcpy(config, msg.payload.config.region, config_len);
2303 
2304     return 0;
2305 }
2306 
2307 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2308                                  uint32_t offset, uint32_t size, uint32_t flags)
2309 {
2310     int ret;
2311     uint8_t *p;
2312     bool reply_supported = virtio_has_feature(dev->protocol_features,
2313                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
2314 
2315     VhostUserMsg msg = {
2316         .hdr.request = VHOST_USER_SET_CONFIG,
2317         .hdr.flags = VHOST_USER_VERSION,
2318         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
2319     };
2320 
2321     if (!virtio_has_feature(dev->protocol_features,
2322                 VHOST_USER_PROTOCOL_F_CONFIG)) {
2323         return -ENOTSUP;
2324     }
2325 
2326     if (reply_supported) {
2327         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2328     }
2329 
2330     if (size > VHOST_USER_MAX_CONFIG_SIZE) {
2331         return -EINVAL;
2332     }
2333 
2334     msg.payload.config.offset = offset,
2335     msg.payload.config.size = size,
2336     msg.payload.config.flags = flags,
2337     p = msg.payload.config.region;
2338     memcpy(p, data, size);
2339 
2340     ret = vhost_user_write(dev, &msg, NULL, 0);
2341     if (ret < 0) {
2342         return ret;
2343     }
2344 
2345     if (reply_supported) {
2346         return process_message_reply(dev, &msg);
2347     }
2348 
2349     return 0;
2350 }
2351 
2352 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2353                                             void *session_info,
2354                                             uint64_t *session_id)
2355 {
2356     int ret;
2357     bool crypto_session = virtio_has_feature(dev->protocol_features,
2358                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2359     CryptoDevBackendSymSessionInfo *sess_info = session_info;
2360     VhostUserMsg msg = {
2361         .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2362         .hdr.flags = VHOST_USER_VERSION,
2363         .hdr.size = sizeof(msg.payload.session),
2364     };
2365 
2366     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2367 
2368     if (!crypto_session) {
2369         error_report("vhost-user trying to send unhandled ioctl");
2370         return -ENOTSUP;
2371     }
2372 
2373     memcpy(&msg.payload.session.session_setup_data, sess_info,
2374               sizeof(CryptoDevBackendSymSessionInfo));
2375     if (sess_info->key_len) {
2376         memcpy(&msg.payload.session.key, sess_info->cipher_key,
2377                sess_info->key_len);
2378     }
2379     if (sess_info->auth_key_len > 0) {
2380         memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
2381                sess_info->auth_key_len);
2382     }
2383     ret = vhost_user_write(dev, &msg, NULL, 0);
2384     if (ret < 0) {
2385         error_report("vhost_user_write() return %d, create session failed",
2386                      ret);
2387         return ret;
2388     }
2389 
2390     ret = vhost_user_read(dev, &msg);
2391     if (ret < 0) {
2392         error_report("vhost_user_read() return %d, create session failed",
2393                      ret);
2394         return ret;
2395     }
2396 
2397     if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2398         error_report("Received unexpected msg type. Expected %d received %d",
2399                      VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
2400         return -EPROTO;
2401     }
2402 
2403     if (msg.hdr.size != sizeof(msg.payload.session)) {
2404         error_report("Received bad msg size.");
2405         return -EPROTO;
2406     }
2407 
2408     if (msg.payload.session.session_id < 0) {
2409         error_report("Bad session id: %" PRId64 "",
2410                               msg.payload.session.session_id);
2411         return -EINVAL;
2412     }
2413     *session_id = msg.payload.session.session_id;
2414 
2415     return 0;
2416 }
2417 
2418 static int
2419 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2420 {
2421     int ret;
2422     bool crypto_session = virtio_has_feature(dev->protocol_features,
2423                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2424     VhostUserMsg msg = {
2425         .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2426         .hdr.flags = VHOST_USER_VERSION,
2427         .hdr.size = sizeof(msg.payload.u64),
2428     };
2429     msg.payload.u64 = session_id;
2430 
2431     if (!crypto_session) {
2432         error_report("vhost-user trying to send unhandled ioctl");
2433         return -ENOTSUP;
2434     }
2435 
2436     ret = vhost_user_write(dev, &msg, NULL, 0);
2437     if (ret < 0) {
2438         error_report("vhost_user_write() return %d, close session failed",
2439                      ret);
2440         return ret;
2441     }
2442 
2443     return 0;
2444 }
2445 
2446 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2447                                           MemoryRegionSection *section)
2448 {
2449     return memory_region_get_fd(section->mr) >= 0;
2450 }
2451 
2452 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2453                                       uint16_t queue_size,
2454                                       struct vhost_inflight *inflight)
2455 {
2456     void *addr;
2457     int fd;
2458     int ret;
2459     struct vhost_user *u = dev->opaque;
2460     CharBackend *chr = u->user->chr;
2461     VhostUserMsg msg = {
2462         .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2463         .hdr.flags = VHOST_USER_VERSION,
2464         .payload.inflight.num_queues = dev->nvqs,
2465         .payload.inflight.queue_size = queue_size,
2466         .hdr.size = sizeof(msg.payload.inflight),
2467     };
2468 
2469     if (!virtio_has_feature(dev->protocol_features,
2470                             VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2471         return 0;
2472     }
2473 
2474     ret = vhost_user_write(dev, &msg, NULL, 0);
2475     if (ret < 0) {
2476         return ret;
2477     }
2478 
2479     ret = vhost_user_read(dev, &msg);
2480     if (ret < 0) {
2481         return ret;
2482     }
2483 
2484     if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2485         error_report("Received unexpected msg type. "
2486                      "Expected %d received %d",
2487                      VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
2488         return -EPROTO;
2489     }
2490 
2491     if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2492         error_report("Received bad msg size.");
2493         return -EPROTO;
2494     }
2495 
2496     if (!msg.payload.inflight.mmap_size) {
2497         return 0;
2498     }
2499 
2500     fd = qemu_chr_fe_get_msgfd(chr);
2501     if (fd < 0) {
2502         error_report("Failed to get mem fd");
2503         return -EIO;
2504     }
2505 
2506     addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2507                 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2508 
2509     if (addr == MAP_FAILED) {
2510         error_report("Failed to mmap mem fd");
2511         close(fd);
2512         return -EFAULT;
2513     }
2514 
2515     inflight->addr = addr;
2516     inflight->fd = fd;
2517     inflight->size = msg.payload.inflight.mmap_size;
2518     inflight->offset = msg.payload.inflight.mmap_offset;
2519     inflight->queue_size = queue_size;
2520 
2521     return 0;
2522 }
2523 
2524 static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2525                                       struct vhost_inflight *inflight)
2526 {
2527     VhostUserMsg msg = {
2528         .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2529         .hdr.flags = VHOST_USER_VERSION,
2530         .payload.inflight.mmap_size = inflight->size,
2531         .payload.inflight.mmap_offset = inflight->offset,
2532         .payload.inflight.num_queues = dev->nvqs,
2533         .payload.inflight.queue_size = inflight->queue_size,
2534         .hdr.size = sizeof(msg.payload.inflight),
2535     };
2536 
2537     if (!virtio_has_feature(dev->protocol_features,
2538                             VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2539         return 0;
2540     }
2541 
2542     return vhost_user_write(dev, &msg, &inflight->fd, 1);
2543 }
2544 
2545 static void vhost_user_state_destroy(gpointer data)
2546 {
2547     VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
2548     if (n) {
2549         vhost_user_host_notifier_remove(n, NULL);
2550         object_unparent(OBJECT(&n->mr));
2551         /*
2552          * We can't free until vhost_user_host_notifier_remove has
2553          * done it's thing so schedule the free with RCU.
2554          */
2555         g_free_rcu(n, rcu);
2556     }
2557 }
2558 
2559 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
2560 {
2561     if (user->chr) {
2562         error_setg(errp, "Cannot initialize vhost-user state");
2563         return false;
2564     }
2565     user->chr = chr;
2566     user->memory_slots = 0;
2567     user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4,
2568                                            &vhost_user_state_destroy);
2569     return true;
2570 }
2571 
2572 void vhost_user_cleanup(VhostUserState *user)
2573 {
2574     if (!user->chr) {
2575         return;
2576     }
2577     memory_region_transaction_begin();
2578     user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
2579     memory_region_transaction_commit();
2580     user->chr = NULL;
2581 }
2582 
2583 
2584 typedef struct {
2585     vu_async_close_fn cb;
2586     DeviceState *dev;
2587     CharBackend *cd;
2588     struct vhost_dev *vhost;
2589 } VhostAsyncCallback;
2590 
2591 static void vhost_user_async_close_bh(void *opaque)
2592 {
2593     VhostAsyncCallback *data = opaque;
2594     struct vhost_dev *vhost = data->vhost;
2595 
2596     /*
2597      * If the vhost_dev has been cleared in the meantime there is
2598      * nothing left to do as some other path has completed the
2599      * cleanup.
2600      */
2601     if (vhost->vdev) {
2602         data->cb(data->dev);
2603     }
2604 
2605     g_free(data);
2606 }
2607 
2608 /*
2609  * We only schedule the work if the machine is running. If suspended
2610  * we want to keep all the in-flight data as is for migration
2611  * purposes.
2612  */
2613 void vhost_user_async_close(DeviceState *d,
2614                             CharBackend *chardev, struct vhost_dev *vhost,
2615                             vu_async_close_fn cb)
2616 {
2617     if (!runstate_check(RUN_STATE_SHUTDOWN)) {
2618         /*
2619          * A close event may happen during a read/write, but vhost
2620          * code assumes the vhost_dev remains setup, so delay the
2621          * stop & clear.
2622          */
2623         AioContext *ctx = qemu_get_current_aio_context();
2624         VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1);
2625 
2626         /* Save data for the callback */
2627         data->cb = cb;
2628         data->dev = d;
2629         data->cd = chardev;
2630         data->vhost = vhost;
2631 
2632         /* Disable any further notifications on the chardev */
2633         qemu_chr_fe_set_handlers(chardev,
2634                                  NULL, NULL, NULL, NULL, NULL, NULL,
2635                                  false);
2636 
2637         aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data);
2638 
2639         /*
2640          * Move vhost device to the stopped state. The vhost-user device
2641          * will be clean up and disconnected in BH. This can be useful in
2642          * the vhost migration code. If disconnect was caught there is an
2643          * option for the general vhost code to get the dev state without
2644          * knowing its type (in this case vhost-user).
2645          *
2646          * Note if the vhost device is fully cleared by the time we
2647          * execute the bottom half we won't continue with the cleanup.
2648          */
2649         vhost->started = false;
2650     }
2651 }
2652 
2653 static int vhost_user_dev_start(struct vhost_dev *dev, bool started)
2654 {
2655     if (!virtio_has_feature(dev->protocol_features,
2656                             VHOST_USER_PROTOCOL_F_STATUS)) {
2657         return 0;
2658     }
2659 
2660     /* Set device status only for last queue pair */
2661     if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2662         return 0;
2663     }
2664 
2665     if (started) {
2666         return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
2667                                           VIRTIO_CONFIG_S_DRIVER |
2668                                           VIRTIO_CONFIG_S_DRIVER_OK);
2669     } else {
2670         return 0;
2671     }
2672 }
2673 
2674 static void vhost_user_reset_status(struct vhost_dev *dev)
2675 {
2676     /* Set device status only for last queue pair */
2677     if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2678         return;
2679     }
2680 
2681     if (virtio_has_feature(dev->protocol_features,
2682                            VHOST_USER_PROTOCOL_F_STATUS)) {
2683         vhost_user_set_status(dev, 0);
2684     }
2685 }
2686 
2687 const VhostOps user_ops = {
2688         .backend_type = VHOST_BACKEND_TYPE_USER,
2689         .vhost_backend_init = vhost_user_backend_init,
2690         .vhost_backend_cleanup = vhost_user_backend_cleanup,
2691         .vhost_backend_memslots_limit = vhost_user_memslots_limit,
2692         .vhost_set_log_base = vhost_user_set_log_base,
2693         .vhost_set_mem_table = vhost_user_set_mem_table,
2694         .vhost_set_vring_addr = vhost_user_set_vring_addr,
2695         .vhost_set_vring_endian = vhost_user_set_vring_endian,
2696         .vhost_set_vring_num = vhost_user_set_vring_num,
2697         .vhost_set_vring_base = vhost_user_set_vring_base,
2698         .vhost_get_vring_base = vhost_user_get_vring_base,
2699         .vhost_set_vring_kick = vhost_user_set_vring_kick,
2700         .vhost_set_vring_call = vhost_user_set_vring_call,
2701         .vhost_set_vring_err = vhost_user_set_vring_err,
2702         .vhost_set_features = vhost_user_set_features,
2703         .vhost_get_features = vhost_user_get_features,
2704         .vhost_set_owner = vhost_user_set_owner,
2705         .vhost_reset_device = vhost_user_reset_device,
2706         .vhost_get_vq_index = vhost_user_get_vq_index,
2707         .vhost_set_vring_enable = vhost_user_set_vring_enable,
2708         .vhost_requires_shm_log = vhost_user_requires_shm_log,
2709         .vhost_migration_done = vhost_user_migration_done,
2710         .vhost_backend_can_merge = vhost_user_can_merge,
2711         .vhost_net_set_mtu = vhost_user_net_set_mtu,
2712         .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2713         .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
2714         .vhost_get_config = vhost_user_get_config,
2715         .vhost_set_config = vhost_user_set_config,
2716         .vhost_crypto_create_session = vhost_user_crypto_create_session,
2717         .vhost_crypto_close_session = vhost_user_crypto_close_session,
2718         .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
2719         .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2720         .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
2721         .vhost_dev_start = vhost_user_dev_start,
2722         .vhost_reset_status = vhost_user_reset_status,
2723 };
2724