1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/vhost.h" 14 #include "hw/virtio/virtio-crypto.h" 15 #include "hw/virtio/vhost-user.h" 16 #include "hw/virtio/vhost-backend.h" 17 #include "hw/virtio/virtio.h" 18 #include "hw/virtio/virtio-net.h" 19 #include "chardev/char-fe.h" 20 #include "io/channel-socket.h" 21 #include "sysemu/kvm.h" 22 #include "qemu/error-report.h" 23 #include "qemu/main-loop.h" 24 #include "qemu/sockets.h" 25 #include "sysemu/runstate.h" 26 #include "sysemu/cryptodev.h" 27 #include "migration/migration.h" 28 #include "migration/postcopy-ram.h" 29 #include "trace.h" 30 #include "exec/ramblock.h" 31 32 #include <sys/ioctl.h> 33 #include <sys/socket.h> 34 #include <sys/un.h> 35 36 #include "standard-headers/linux/vhost_types.h" 37 38 #ifdef CONFIG_LINUX 39 #include <linux/userfaultfd.h> 40 #endif 41 42 #define VHOST_MEMORY_BASELINE_NREGIONS 8 43 #define VHOST_USER_F_PROTOCOL_FEATURES 30 44 #define VHOST_USER_BACKEND_MAX_FDS 8 45 46 #if defined(TARGET_PPC) || defined(TARGET_PPC64) 47 #include "hw/ppc/spapr.h" 48 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 49 50 #else 51 #define VHOST_USER_MAX_RAM_SLOTS 512 52 #endif 53 54 /* 55 * Maximum size of virtio device config space 56 */ 57 #define VHOST_USER_MAX_CONFIG_SIZE 256 58 59 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 60 61 typedef enum VhostUserRequest { 62 VHOST_USER_NONE = 0, 63 VHOST_USER_GET_FEATURES = 1, 64 VHOST_USER_SET_FEATURES = 2, 65 VHOST_USER_SET_OWNER = 3, 66 VHOST_USER_RESET_OWNER = 4, 67 VHOST_USER_SET_MEM_TABLE = 5, 68 VHOST_USER_SET_LOG_BASE = 6, 69 VHOST_USER_SET_LOG_FD = 7, 70 VHOST_USER_SET_VRING_NUM = 8, 71 VHOST_USER_SET_VRING_ADDR = 9, 72 VHOST_USER_SET_VRING_BASE = 10, 73 VHOST_USER_GET_VRING_BASE = 11, 74 VHOST_USER_SET_VRING_KICK = 12, 75 VHOST_USER_SET_VRING_CALL = 13, 76 VHOST_USER_SET_VRING_ERR = 14, 77 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 78 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 79 VHOST_USER_GET_QUEUE_NUM = 17, 80 VHOST_USER_SET_VRING_ENABLE = 18, 81 VHOST_USER_SEND_RARP = 19, 82 VHOST_USER_NET_SET_MTU = 20, 83 VHOST_USER_SET_BACKEND_REQ_FD = 21, 84 VHOST_USER_IOTLB_MSG = 22, 85 VHOST_USER_SET_VRING_ENDIAN = 23, 86 VHOST_USER_GET_CONFIG = 24, 87 VHOST_USER_SET_CONFIG = 25, 88 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 89 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 90 VHOST_USER_POSTCOPY_ADVISE = 28, 91 VHOST_USER_POSTCOPY_LISTEN = 29, 92 VHOST_USER_POSTCOPY_END = 30, 93 VHOST_USER_GET_INFLIGHT_FD = 31, 94 VHOST_USER_SET_INFLIGHT_FD = 32, 95 VHOST_USER_GPU_SET_SOCKET = 33, 96 VHOST_USER_RESET_DEVICE = 34, 97 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 98 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 99 VHOST_USER_ADD_MEM_REG = 37, 100 VHOST_USER_REM_MEM_REG = 38, 101 VHOST_USER_SET_STATUS = 39, 102 VHOST_USER_GET_STATUS = 40, 103 VHOST_USER_MAX 104 } VhostUserRequest; 105 106 typedef enum VhostUserBackendRequest { 107 VHOST_USER_BACKEND_NONE = 0, 108 VHOST_USER_BACKEND_IOTLB_MSG = 1, 109 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, 110 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, 111 VHOST_USER_BACKEND_MAX 112 } VhostUserBackendRequest; 113 114 typedef struct VhostUserMemoryRegion { 115 uint64_t guest_phys_addr; 116 uint64_t memory_size; 117 uint64_t userspace_addr; 118 uint64_t mmap_offset; 119 } VhostUserMemoryRegion; 120 121 typedef struct VhostUserMemory { 122 uint32_t nregions; 123 uint32_t padding; 124 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 125 } VhostUserMemory; 126 127 typedef struct VhostUserMemRegMsg { 128 uint64_t padding; 129 VhostUserMemoryRegion region; 130 } VhostUserMemRegMsg; 131 132 typedef struct VhostUserLog { 133 uint64_t mmap_size; 134 uint64_t mmap_offset; 135 } VhostUserLog; 136 137 typedef struct VhostUserConfig { 138 uint32_t offset; 139 uint32_t size; 140 uint32_t flags; 141 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 142 } VhostUserConfig; 143 144 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 145 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 146 #define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 147 148 typedef struct VhostUserCryptoSession { 149 uint64_t op_code; 150 union { 151 struct { 152 CryptoDevBackendSymSessionInfo session_setup_data; 153 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 154 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 155 } sym; 156 struct { 157 CryptoDevBackendAsymSessionInfo session_setup_data; 158 uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; 159 } asym; 160 } u; 161 162 /* session id for success, -1 on errors */ 163 int64_t session_id; 164 } VhostUserCryptoSession; 165 166 static VhostUserConfig c __attribute__ ((unused)); 167 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 168 + sizeof(c.size) \ 169 + sizeof(c.flags)) 170 171 typedef struct VhostUserVringArea { 172 uint64_t u64; 173 uint64_t size; 174 uint64_t offset; 175 } VhostUserVringArea; 176 177 typedef struct VhostUserInflight { 178 uint64_t mmap_size; 179 uint64_t mmap_offset; 180 uint16_t num_queues; 181 uint16_t queue_size; 182 } VhostUserInflight; 183 184 typedef struct { 185 VhostUserRequest request; 186 187 #define VHOST_USER_VERSION_MASK (0x3) 188 #define VHOST_USER_REPLY_MASK (0x1 << 2) 189 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 190 uint32_t flags; 191 uint32_t size; /* the following payload size */ 192 } QEMU_PACKED VhostUserHeader; 193 194 typedef union { 195 #define VHOST_USER_VRING_IDX_MASK (0xff) 196 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 197 uint64_t u64; 198 struct vhost_vring_state state; 199 struct vhost_vring_addr addr; 200 VhostUserMemory memory; 201 VhostUserMemRegMsg mem_reg; 202 VhostUserLog log; 203 struct vhost_iotlb_msg iotlb; 204 VhostUserConfig config; 205 VhostUserCryptoSession session; 206 VhostUserVringArea area; 207 VhostUserInflight inflight; 208 } VhostUserPayload; 209 210 typedef struct VhostUserMsg { 211 VhostUserHeader hdr; 212 VhostUserPayload payload; 213 } QEMU_PACKED VhostUserMsg; 214 215 static VhostUserMsg m __attribute__ ((unused)); 216 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 217 218 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 219 220 /* The version of the protocol we support */ 221 #define VHOST_USER_VERSION (0x1) 222 223 struct vhost_user { 224 struct vhost_dev *dev; 225 /* Shared between vhost devs of the same virtio device */ 226 VhostUserState *user; 227 QIOChannel *backend_ioc; 228 GSource *backend_src; 229 NotifierWithReturn postcopy_notifier; 230 struct PostCopyFD postcopy_fd; 231 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 232 /* Length of the region_rb and region_rb_offset arrays */ 233 size_t region_rb_len; 234 /* RAMBlock associated with a given region */ 235 RAMBlock **region_rb; 236 /* 237 * The offset from the start of the RAMBlock to the start of the 238 * vhost region. 239 */ 240 ram_addr_t *region_rb_offset; 241 242 /* True once we've entered postcopy_listen */ 243 bool postcopy_listen; 244 245 /* Our current regions */ 246 int num_shadow_regions; 247 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 248 }; 249 250 struct scrub_regions { 251 struct vhost_memory_region *region; 252 int reg_idx; 253 int fd_idx; 254 }; 255 256 static bool ioeventfd_enabled(void) 257 { 258 return !kvm_enabled() || kvm_eventfds_enabled(); 259 } 260 261 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 262 { 263 struct vhost_user *u = dev->opaque; 264 CharBackend *chr = u->user->chr; 265 uint8_t *p = (uint8_t *) msg; 266 int r, size = VHOST_USER_HDR_SIZE; 267 268 r = qemu_chr_fe_read_all(chr, p, size); 269 if (r != size) { 270 int saved_errno = errno; 271 error_report("Failed to read msg header. Read %d instead of %d." 272 " Original request %d.", r, size, msg->hdr.request); 273 return r < 0 ? -saved_errno : -EIO; 274 } 275 276 /* validate received flags */ 277 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 278 error_report("Failed to read msg header." 279 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 280 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 281 return -EPROTO; 282 } 283 284 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 285 286 return 0; 287 } 288 289 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 290 { 291 struct vhost_user *u = dev->opaque; 292 CharBackend *chr = u->user->chr; 293 uint8_t *p = (uint8_t *) msg; 294 int r, size; 295 296 r = vhost_user_read_header(dev, msg); 297 if (r < 0) { 298 return r; 299 } 300 301 /* validate message size is sane */ 302 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 303 error_report("Failed to read msg header." 304 " Size %d exceeds the maximum %zu.", msg->hdr.size, 305 VHOST_USER_PAYLOAD_SIZE); 306 return -EPROTO; 307 } 308 309 if (msg->hdr.size) { 310 p += VHOST_USER_HDR_SIZE; 311 size = msg->hdr.size; 312 r = qemu_chr_fe_read_all(chr, p, size); 313 if (r != size) { 314 int saved_errno = errno; 315 error_report("Failed to read msg payload." 316 " Read %d instead of %d.", r, msg->hdr.size); 317 return r < 0 ? -saved_errno : -EIO; 318 } 319 } 320 321 return 0; 322 } 323 324 static int process_message_reply(struct vhost_dev *dev, 325 const VhostUserMsg *msg) 326 { 327 int ret; 328 VhostUserMsg msg_reply; 329 330 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 331 return 0; 332 } 333 334 ret = vhost_user_read(dev, &msg_reply); 335 if (ret < 0) { 336 return ret; 337 } 338 339 if (msg_reply.hdr.request != msg->hdr.request) { 340 error_report("Received unexpected msg type. " 341 "Expected %d received %d", 342 msg->hdr.request, msg_reply.hdr.request); 343 return -EPROTO; 344 } 345 346 return msg_reply.payload.u64 ? -EIO : 0; 347 } 348 349 static bool vhost_user_per_device_request(VhostUserRequest request) 350 { 351 switch (request) { 352 case VHOST_USER_SET_OWNER: 353 case VHOST_USER_RESET_OWNER: 354 case VHOST_USER_SET_MEM_TABLE: 355 case VHOST_USER_GET_QUEUE_NUM: 356 case VHOST_USER_NET_SET_MTU: 357 case VHOST_USER_RESET_DEVICE: 358 case VHOST_USER_ADD_MEM_REG: 359 case VHOST_USER_REM_MEM_REG: 360 return true; 361 default: 362 return false; 363 } 364 } 365 366 /* most non-init callers ignore the error */ 367 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 368 int *fds, int fd_num) 369 { 370 struct vhost_user *u = dev->opaque; 371 CharBackend *chr = u->user->chr; 372 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 373 374 /* 375 * Some devices, like virtio-scsi, are implemented as a single vhost_dev, 376 * while others, like virtio-net, contain multiple vhost_devs. For 377 * operations such as configuring device memory mappings or issuing device 378 * resets, which affect the whole device instead of individual VQs, 379 * vhost-user messages should only be sent once. 380 * 381 * Devices with multiple vhost_devs are given an associated dev->vq_index 382 * so per_device requests are only sent if vq_index is 0. 383 */ 384 if (vhost_user_per_device_request(msg->hdr.request) 385 && dev->vq_index != 0) { 386 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 387 return 0; 388 } 389 390 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 391 error_report("Failed to set msg fds."); 392 return -EINVAL; 393 } 394 395 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 396 if (ret != size) { 397 int saved_errno = errno; 398 error_report("Failed to write msg." 399 " Wrote %d instead of %d.", ret, size); 400 return ret < 0 ? -saved_errno : -EIO; 401 } 402 403 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 404 405 return 0; 406 } 407 408 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 409 { 410 VhostUserMsg msg = { 411 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 412 .hdr.flags = VHOST_USER_VERSION, 413 }; 414 415 return vhost_user_write(dev, &msg, &fd, 1); 416 } 417 418 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 419 struct vhost_log *log) 420 { 421 int fds[VHOST_USER_MAX_RAM_SLOTS]; 422 size_t fd_num = 0; 423 bool shmfd = virtio_has_feature(dev->protocol_features, 424 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 425 int ret; 426 VhostUserMsg msg = { 427 .hdr.request = VHOST_USER_SET_LOG_BASE, 428 .hdr.flags = VHOST_USER_VERSION, 429 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 430 .payload.log.mmap_offset = 0, 431 .hdr.size = sizeof(msg.payload.log), 432 }; 433 434 /* Send only once with first queue pair */ 435 if (dev->vq_index != 0) { 436 return 0; 437 } 438 439 if (shmfd && log->fd != -1) { 440 fds[fd_num++] = log->fd; 441 } 442 443 ret = vhost_user_write(dev, &msg, fds, fd_num); 444 if (ret < 0) { 445 return ret; 446 } 447 448 if (shmfd) { 449 msg.hdr.size = 0; 450 ret = vhost_user_read(dev, &msg); 451 if (ret < 0) { 452 return ret; 453 } 454 455 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 456 error_report("Received unexpected msg type. " 457 "Expected %d received %d", 458 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 459 return -EPROTO; 460 } 461 } 462 463 return 0; 464 } 465 466 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 467 int *fd) 468 { 469 MemoryRegion *mr; 470 471 assert((uintptr_t)addr == addr); 472 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 473 *fd = memory_region_get_fd(mr); 474 *offset += mr->ram_block->fd_offset; 475 476 return mr; 477 } 478 479 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 480 struct vhost_memory_region *src, 481 uint64_t mmap_offset) 482 { 483 assert(src != NULL && dst != NULL); 484 dst->userspace_addr = src->userspace_addr; 485 dst->memory_size = src->memory_size; 486 dst->guest_phys_addr = src->guest_phys_addr; 487 dst->mmap_offset = mmap_offset; 488 } 489 490 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 491 struct vhost_dev *dev, 492 VhostUserMsg *msg, 493 int *fds, size_t *fd_num, 494 bool track_ramblocks) 495 { 496 int i, fd; 497 ram_addr_t offset; 498 MemoryRegion *mr; 499 struct vhost_memory_region *reg; 500 VhostUserMemoryRegion region_buffer; 501 502 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 503 504 for (i = 0; i < dev->mem->nregions; ++i) { 505 reg = dev->mem->regions + i; 506 507 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 508 if (fd > 0) { 509 if (track_ramblocks) { 510 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 511 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 512 reg->memory_size, 513 reg->guest_phys_addr, 514 reg->userspace_addr, 515 offset); 516 u->region_rb_offset[i] = offset; 517 u->region_rb[i] = mr->ram_block; 518 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 519 error_report("Failed preparing vhost-user memory table msg"); 520 return -ENOBUFS; 521 } 522 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 523 msg->payload.memory.regions[*fd_num] = region_buffer; 524 fds[(*fd_num)++] = fd; 525 } else if (track_ramblocks) { 526 u->region_rb_offset[i] = 0; 527 u->region_rb[i] = NULL; 528 } 529 } 530 531 msg->payload.memory.nregions = *fd_num; 532 533 if (!*fd_num) { 534 error_report("Failed initializing vhost-user memory map, " 535 "consider using -object memory-backend-file share=on"); 536 return -EINVAL; 537 } 538 539 msg->hdr.size = sizeof(msg->payload.memory.nregions); 540 msg->hdr.size += sizeof(msg->payload.memory.padding); 541 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 542 543 return 0; 544 } 545 546 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 547 struct vhost_memory_region *vdev_reg) 548 { 549 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 550 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 551 shadow_reg->memory_size == vdev_reg->memory_size; 552 } 553 554 static void scrub_shadow_regions(struct vhost_dev *dev, 555 struct scrub_regions *add_reg, 556 int *nr_add_reg, 557 struct scrub_regions *rem_reg, 558 int *nr_rem_reg, uint64_t *shadow_pcb, 559 bool track_ramblocks) 560 { 561 struct vhost_user *u = dev->opaque; 562 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 563 struct vhost_memory_region *reg, *shadow_reg; 564 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 565 ram_addr_t offset; 566 MemoryRegion *mr; 567 bool matching; 568 569 /* 570 * Find memory regions present in our shadow state which are not in 571 * the device's current memory state. 572 * 573 * Mark regions in both the shadow and device state as "found". 574 */ 575 for (i = 0; i < u->num_shadow_regions; i++) { 576 shadow_reg = &u->shadow_regions[i]; 577 matching = false; 578 579 for (j = 0; j < dev->mem->nregions; j++) { 580 reg = &dev->mem->regions[j]; 581 582 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 583 584 if (reg_equal(shadow_reg, reg)) { 585 matching = true; 586 found[j] = true; 587 if (track_ramblocks) { 588 /* 589 * Reset postcopy client bases, region_rb, and 590 * region_rb_offset in case regions are removed. 591 */ 592 if (fd > 0) { 593 u->region_rb_offset[j] = offset; 594 u->region_rb[j] = mr->ram_block; 595 shadow_pcb[j] = u->postcopy_client_bases[i]; 596 } else { 597 u->region_rb_offset[j] = 0; 598 u->region_rb[j] = NULL; 599 } 600 } 601 break; 602 } 603 } 604 605 /* 606 * If the region was not found in the current device memory state 607 * create an entry for it in the removed list. 608 */ 609 if (!matching) { 610 rem_reg[rm_idx].region = shadow_reg; 611 rem_reg[rm_idx++].reg_idx = i; 612 } 613 } 614 615 /* 616 * For regions not marked "found", create entries in the added list. 617 * 618 * Note their indexes in the device memory state and the indexes of their 619 * file descriptors. 620 */ 621 for (i = 0; i < dev->mem->nregions; i++) { 622 reg = &dev->mem->regions[i]; 623 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 624 if (fd > 0) { 625 ++fd_num; 626 } 627 628 /* 629 * If the region was in both the shadow and device state we don't 630 * need to send a VHOST_USER_ADD_MEM_REG message for it. 631 */ 632 if (found[i]) { 633 continue; 634 } 635 636 add_reg[add_idx].region = reg; 637 add_reg[add_idx].reg_idx = i; 638 add_reg[add_idx++].fd_idx = fd_num; 639 } 640 *nr_rem_reg = rm_idx; 641 *nr_add_reg = add_idx; 642 643 return; 644 } 645 646 static int send_remove_regions(struct vhost_dev *dev, 647 struct scrub_regions *remove_reg, 648 int nr_rem_reg, VhostUserMsg *msg, 649 bool reply_supported) 650 { 651 struct vhost_user *u = dev->opaque; 652 struct vhost_memory_region *shadow_reg; 653 int i, fd, shadow_reg_idx, ret; 654 ram_addr_t offset; 655 VhostUserMemoryRegion region_buffer; 656 657 /* 658 * The regions in remove_reg appear in the same order they do in the 659 * shadow table. Therefore we can minimize memory copies by iterating 660 * through remove_reg backwards. 661 */ 662 for (i = nr_rem_reg - 1; i >= 0; i--) { 663 shadow_reg = remove_reg[i].region; 664 shadow_reg_idx = remove_reg[i].reg_idx; 665 666 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 667 668 if (fd > 0) { 669 msg->hdr.request = VHOST_USER_REM_MEM_REG; 670 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 671 msg->payload.mem_reg.region = region_buffer; 672 673 ret = vhost_user_write(dev, msg, NULL, 0); 674 if (ret < 0) { 675 return ret; 676 } 677 678 if (reply_supported) { 679 ret = process_message_reply(dev, msg); 680 if (ret) { 681 return ret; 682 } 683 } 684 } 685 686 /* 687 * At this point we know the backend has unmapped the region. It is now 688 * safe to remove it from the shadow table. 689 */ 690 memmove(&u->shadow_regions[shadow_reg_idx], 691 &u->shadow_regions[shadow_reg_idx + 1], 692 sizeof(struct vhost_memory_region) * 693 (u->num_shadow_regions - shadow_reg_idx - 1)); 694 u->num_shadow_regions--; 695 } 696 697 return 0; 698 } 699 700 static int send_add_regions(struct vhost_dev *dev, 701 struct scrub_regions *add_reg, int nr_add_reg, 702 VhostUserMsg *msg, uint64_t *shadow_pcb, 703 bool reply_supported, bool track_ramblocks) 704 { 705 struct vhost_user *u = dev->opaque; 706 int i, fd, ret, reg_idx, reg_fd_idx; 707 struct vhost_memory_region *reg; 708 MemoryRegion *mr; 709 ram_addr_t offset; 710 VhostUserMsg msg_reply; 711 VhostUserMemoryRegion region_buffer; 712 713 for (i = 0; i < nr_add_reg; i++) { 714 reg = add_reg[i].region; 715 reg_idx = add_reg[i].reg_idx; 716 reg_fd_idx = add_reg[i].fd_idx; 717 718 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 719 720 if (fd > 0) { 721 if (track_ramblocks) { 722 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 723 reg->memory_size, 724 reg->guest_phys_addr, 725 reg->userspace_addr, 726 offset); 727 u->region_rb_offset[reg_idx] = offset; 728 u->region_rb[reg_idx] = mr->ram_block; 729 } 730 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 731 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 732 msg->payload.mem_reg.region = region_buffer; 733 734 ret = vhost_user_write(dev, msg, &fd, 1); 735 if (ret < 0) { 736 return ret; 737 } 738 739 if (track_ramblocks) { 740 uint64_t reply_gpa; 741 742 ret = vhost_user_read(dev, &msg_reply); 743 if (ret < 0) { 744 return ret; 745 } 746 747 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 748 749 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 750 error_report("%s: Received unexpected msg type." 751 "Expected %d received %d", __func__, 752 VHOST_USER_ADD_MEM_REG, 753 msg_reply.hdr.request); 754 return -EPROTO; 755 } 756 757 /* 758 * We're using the same structure, just reusing one of the 759 * fields, so it should be the same size. 760 */ 761 if (msg_reply.hdr.size != msg->hdr.size) { 762 error_report("%s: Unexpected size for postcopy reply " 763 "%d vs %d", __func__, msg_reply.hdr.size, 764 msg->hdr.size); 765 return -EPROTO; 766 } 767 768 /* Get the postcopy client base from the backend's reply. */ 769 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 770 shadow_pcb[reg_idx] = 771 msg_reply.payload.mem_reg.region.userspace_addr; 772 trace_vhost_user_set_mem_table_postcopy( 773 msg_reply.payload.mem_reg.region.userspace_addr, 774 msg->payload.mem_reg.region.userspace_addr, 775 reg_fd_idx, reg_idx); 776 } else { 777 error_report("%s: invalid postcopy reply for region. " 778 "Got guest physical address %" PRIX64 ", expected " 779 "%" PRIX64, __func__, reply_gpa, 780 dev->mem->regions[reg_idx].guest_phys_addr); 781 return -EPROTO; 782 } 783 } else if (reply_supported) { 784 ret = process_message_reply(dev, msg); 785 if (ret) { 786 return ret; 787 } 788 } 789 } else if (track_ramblocks) { 790 u->region_rb_offset[reg_idx] = 0; 791 u->region_rb[reg_idx] = NULL; 792 } 793 794 /* 795 * At this point, we know the backend has mapped in the new 796 * region, if the region has a valid file descriptor. 797 * 798 * The region should now be added to the shadow table. 799 */ 800 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 801 reg->guest_phys_addr; 802 u->shadow_regions[u->num_shadow_regions].userspace_addr = 803 reg->userspace_addr; 804 u->shadow_regions[u->num_shadow_regions].memory_size = 805 reg->memory_size; 806 u->num_shadow_regions++; 807 } 808 809 return 0; 810 } 811 812 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 813 VhostUserMsg *msg, 814 bool reply_supported, 815 bool track_ramblocks) 816 { 817 struct vhost_user *u = dev->opaque; 818 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 819 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 820 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 821 int nr_add_reg, nr_rem_reg; 822 int ret; 823 824 msg->hdr.size = sizeof(msg->payload.mem_reg); 825 826 /* Find the regions which need to be removed or added. */ 827 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 828 shadow_pcb, track_ramblocks); 829 830 if (nr_rem_reg) { 831 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 832 reply_supported); 833 if (ret < 0) { 834 goto err; 835 } 836 } 837 838 if (nr_add_reg) { 839 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 840 reply_supported, track_ramblocks); 841 if (ret < 0) { 842 goto err; 843 } 844 } 845 846 if (track_ramblocks) { 847 memcpy(u->postcopy_client_bases, shadow_pcb, 848 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 849 /* 850 * Now we've registered this with the postcopy code, we ack to the 851 * client, because now we're in the position to be able to deal with 852 * any faults it generates. 853 */ 854 /* TODO: Use this for failure cases as well with a bad value. */ 855 msg->hdr.size = sizeof(msg->payload.u64); 856 msg->payload.u64 = 0; /* OK */ 857 858 ret = vhost_user_write(dev, msg, NULL, 0); 859 if (ret < 0) { 860 return ret; 861 } 862 } 863 864 return 0; 865 866 err: 867 if (track_ramblocks) { 868 memcpy(u->postcopy_client_bases, shadow_pcb, 869 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 870 } 871 872 return ret; 873 } 874 875 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 876 struct vhost_memory *mem, 877 bool reply_supported, 878 bool config_mem_slots) 879 { 880 struct vhost_user *u = dev->opaque; 881 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 882 size_t fd_num = 0; 883 VhostUserMsg msg_reply; 884 int region_i, msg_i; 885 int ret; 886 887 VhostUserMsg msg = { 888 .hdr.flags = VHOST_USER_VERSION, 889 }; 890 891 if (u->region_rb_len < dev->mem->nregions) { 892 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 893 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 894 dev->mem->nregions); 895 memset(&(u->region_rb[u->region_rb_len]), '\0', 896 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 897 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 898 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 899 u->region_rb_len = dev->mem->nregions; 900 } 901 902 if (config_mem_slots) { 903 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 904 if (ret < 0) { 905 return ret; 906 } 907 } else { 908 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 909 true); 910 if (ret < 0) { 911 return ret; 912 } 913 914 ret = vhost_user_write(dev, &msg, fds, fd_num); 915 if (ret < 0) { 916 return ret; 917 } 918 919 ret = vhost_user_read(dev, &msg_reply); 920 if (ret < 0) { 921 return ret; 922 } 923 924 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 925 error_report("%s: Received unexpected msg type." 926 "Expected %d received %d", __func__, 927 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 928 return -EPROTO; 929 } 930 931 /* 932 * We're using the same structure, just reusing one of the 933 * fields, so it should be the same size. 934 */ 935 if (msg_reply.hdr.size != msg.hdr.size) { 936 error_report("%s: Unexpected size for postcopy reply " 937 "%d vs %d", __func__, msg_reply.hdr.size, 938 msg.hdr.size); 939 return -EPROTO; 940 } 941 942 memset(u->postcopy_client_bases, 0, 943 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 944 945 /* 946 * They're in the same order as the regions that were sent 947 * but some of the regions were skipped (above) if they 948 * didn't have fd's 949 */ 950 for (msg_i = 0, region_i = 0; 951 region_i < dev->mem->nregions; 952 region_i++) { 953 if (msg_i < fd_num && 954 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 955 dev->mem->regions[region_i].guest_phys_addr) { 956 u->postcopy_client_bases[region_i] = 957 msg_reply.payload.memory.regions[msg_i].userspace_addr; 958 trace_vhost_user_set_mem_table_postcopy( 959 msg_reply.payload.memory.regions[msg_i].userspace_addr, 960 msg.payload.memory.regions[msg_i].userspace_addr, 961 msg_i, region_i); 962 msg_i++; 963 } 964 } 965 if (msg_i != fd_num) { 966 error_report("%s: postcopy reply not fully consumed " 967 "%d vs %zd", 968 __func__, msg_i, fd_num); 969 return -EIO; 970 } 971 972 /* 973 * Now we've registered this with the postcopy code, we ack to the 974 * client, because now we're in the position to be able to deal 975 * with any faults it generates. 976 */ 977 /* TODO: Use this for failure cases as well with a bad value. */ 978 msg.hdr.size = sizeof(msg.payload.u64); 979 msg.payload.u64 = 0; /* OK */ 980 ret = vhost_user_write(dev, &msg, NULL, 0); 981 if (ret < 0) { 982 return ret; 983 } 984 } 985 986 return 0; 987 } 988 989 static int vhost_user_set_mem_table(struct vhost_dev *dev, 990 struct vhost_memory *mem) 991 { 992 struct vhost_user *u = dev->opaque; 993 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 994 size_t fd_num = 0; 995 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 996 bool reply_supported = virtio_has_feature(dev->protocol_features, 997 VHOST_USER_PROTOCOL_F_REPLY_ACK); 998 bool config_mem_slots = 999 virtio_has_feature(dev->protocol_features, 1000 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1001 int ret; 1002 1003 if (do_postcopy) { 1004 /* 1005 * Postcopy has enough differences that it's best done in it's own 1006 * version 1007 */ 1008 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1009 config_mem_slots); 1010 } 1011 1012 VhostUserMsg msg = { 1013 .hdr.flags = VHOST_USER_VERSION, 1014 }; 1015 1016 if (reply_supported) { 1017 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1018 } 1019 1020 if (config_mem_slots) { 1021 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1022 if (ret < 0) { 1023 return ret; 1024 } 1025 } else { 1026 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1027 false); 1028 if (ret < 0) { 1029 return ret; 1030 } 1031 1032 ret = vhost_user_write(dev, &msg, fds, fd_num); 1033 if (ret < 0) { 1034 return ret; 1035 } 1036 1037 if (reply_supported) { 1038 return process_message_reply(dev, &msg); 1039 } 1040 } 1041 1042 return 0; 1043 } 1044 1045 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1046 struct vhost_vring_state *ring) 1047 { 1048 bool cross_endian = virtio_has_feature(dev->protocol_features, 1049 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1050 VhostUserMsg msg = { 1051 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1052 .hdr.flags = VHOST_USER_VERSION, 1053 .payload.state = *ring, 1054 .hdr.size = sizeof(msg.payload.state), 1055 }; 1056 1057 if (!cross_endian) { 1058 error_report("vhost-user trying to send unhandled ioctl"); 1059 return -ENOTSUP; 1060 } 1061 1062 return vhost_user_write(dev, &msg, NULL, 0); 1063 } 1064 1065 static int vhost_set_vring(struct vhost_dev *dev, 1066 unsigned long int request, 1067 struct vhost_vring_state *ring) 1068 { 1069 VhostUserMsg msg = { 1070 .hdr.request = request, 1071 .hdr.flags = VHOST_USER_VERSION, 1072 .payload.state = *ring, 1073 .hdr.size = sizeof(msg.payload.state), 1074 }; 1075 1076 return vhost_user_write(dev, &msg, NULL, 0); 1077 } 1078 1079 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1080 struct vhost_vring_state *ring) 1081 { 1082 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); 1083 } 1084 1085 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1086 { 1087 assert(n && n->unmap_addr); 1088 munmap(n->unmap_addr, qemu_real_host_page_size()); 1089 n->unmap_addr = NULL; 1090 } 1091 1092 /* 1093 * clean-up function for notifier, will finally free the structure 1094 * under rcu. 1095 */ 1096 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1097 VirtIODevice *vdev) 1098 { 1099 if (n->addr) { 1100 if (vdev) { 1101 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1102 } 1103 assert(!n->unmap_addr); 1104 n->unmap_addr = n->addr; 1105 n->addr = NULL; 1106 call_rcu(n, vhost_user_host_notifier_free, rcu); 1107 } 1108 } 1109 1110 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1111 struct vhost_vring_state *ring) 1112 { 1113 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); 1114 } 1115 1116 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1117 { 1118 int i; 1119 1120 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1121 return -EINVAL; 1122 } 1123 1124 for (i = 0; i < dev->nvqs; ++i) { 1125 int ret; 1126 struct vhost_vring_state state = { 1127 .index = dev->vq_index + i, 1128 .num = enable, 1129 }; 1130 1131 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); 1132 if (ret < 0) { 1133 /* 1134 * Restoring the previous state is likely infeasible, as well as 1135 * proceeding regardless the error, so just bail out and hope for 1136 * the device-level recovery. 1137 */ 1138 return ret; 1139 } 1140 } 1141 1142 return 0; 1143 } 1144 1145 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1146 int idx) 1147 { 1148 if (idx >= u->notifiers->len) { 1149 return NULL; 1150 } 1151 return g_ptr_array_index(u->notifiers, idx); 1152 } 1153 1154 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1155 struct vhost_vring_state *ring) 1156 { 1157 int ret; 1158 VhostUserMsg msg = { 1159 .hdr.request = VHOST_USER_GET_VRING_BASE, 1160 .hdr.flags = VHOST_USER_VERSION, 1161 .payload.state = *ring, 1162 .hdr.size = sizeof(msg.payload.state), 1163 }; 1164 struct vhost_user *u = dev->opaque; 1165 1166 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1167 if (n) { 1168 vhost_user_host_notifier_remove(n, dev->vdev); 1169 } 1170 1171 ret = vhost_user_write(dev, &msg, NULL, 0); 1172 if (ret < 0) { 1173 return ret; 1174 } 1175 1176 ret = vhost_user_read(dev, &msg); 1177 if (ret < 0) { 1178 return ret; 1179 } 1180 1181 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1182 error_report("Received unexpected msg type. Expected %d received %d", 1183 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1184 return -EPROTO; 1185 } 1186 1187 if (msg.hdr.size != sizeof(msg.payload.state)) { 1188 error_report("Received bad msg size."); 1189 return -EPROTO; 1190 } 1191 1192 *ring = msg.payload.state; 1193 1194 return 0; 1195 } 1196 1197 static int vhost_set_vring_file(struct vhost_dev *dev, 1198 VhostUserRequest request, 1199 struct vhost_vring_file *file) 1200 { 1201 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1202 size_t fd_num = 0; 1203 VhostUserMsg msg = { 1204 .hdr.request = request, 1205 .hdr.flags = VHOST_USER_VERSION, 1206 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1207 .hdr.size = sizeof(msg.payload.u64), 1208 }; 1209 1210 if (ioeventfd_enabled() && file->fd > 0) { 1211 fds[fd_num++] = file->fd; 1212 } else { 1213 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1214 } 1215 1216 return vhost_user_write(dev, &msg, fds, fd_num); 1217 } 1218 1219 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1220 struct vhost_vring_file *file) 1221 { 1222 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1223 } 1224 1225 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1226 struct vhost_vring_file *file) 1227 { 1228 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1229 } 1230 1231 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1232 struct vhost_vring_file *file) 1233 { 1234 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1235 } 1236 1237 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1238 { 1239 int ret; 1240 VhostUserMsg msg = { 1241 .hdr.request = request, 1242 .hdr.flags = VHOST_USER_VERSION, 1243 }; 1244 1245 if (vhost_user_per_device_request(request) && dev->vq_index != 0) { 1246 return 0; 1247 } 1248 1249 ret = vhost_user_write(dev, &msg, NULL, 0); 1250 if (ret < 0) { 1251 return ret; 1252 } 1253 1254 ret = vhost_user_read(dev, &msg); 1255 if (ret < 0) { 1256 return ret; 1257 } 1258 1259 if (msg.hdr.request != request) { 1260 error_report("Received unexpected msg type. Expected %d received %d", 1261 request, msg.hdr.request); 1262 return -EPROTO; 1263 } 1264 1265 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1266 error_report("Received bad msg size."); 1267 return -EPROTO; 1268 } 1269 1270 *u64 = msg.payload.u64; 1271 1272 return 0; 1273 } 1274 1275 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1276 { 1277 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1278 return -EPROTO; 1279 } 1280 1281 return 0; 1282 } 1283 1284 static int enforce_reply(struct vhost_dev *dev, 1285 const VhostUserMsg *msg) 1286 { 1287 uint64_t dummy; 1288 1289 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1290 return process_message_reply(dev, msg); 1291 } 1292 1293 /* 1294 * We need to wait for a reply but the backend does not 1295 * support replies for the command we just sent. 1296 * Send VHOST_USER_GET_FEATURES which makes all backends 1297 * send a reply. 1298 */ 1299 return vhost_user_get_features(dev, &dummy); 1300 } 1301 1302 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1303 struct vhost_vring_addr *addr) 1304 { 1305 int ret; 1306 VhostUserMsg msg = { 1307 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1308 .hdr.flags = VHOST_USER_VERSION, 1309 .payload.addr = *addr, 1310 .hdr.size = sizeof(msg.payload.addr), 1311 }; 1312 1313 bool reply_supported = virtio_has_feature(dev->protocol_features, 1314 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1315 1316 /* 1317 * wait for a reply if logging is enabled to make sure 1318 * backend is actually logging changes 1319 */ 1320 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1321 1322 if (reply_supported && wait_for_reply) { 1323 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1324 } 1325 1326 ret = vhost_user_write(dev, &msg, NULL, 0); 1327 if (ret < 0) { 1328 return ret; 1329 } 1330 1331 if (wait_for_reply) { 1332 return enforce_reply(dev, &msg); 1333 } 1334 1335 return 0; 1336 } 1337 1338 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1339 bool wait_for_reply) 1340 { 1341 VhostUserMsg msg = { 1342 .hdr.request = request, 1343 .hdr.flags = VHOST_USER_VERSION, 1344 .payload.u64 = u64, 1345 .hdr.size = sizeof(msg.payload.u64), 1346 }; 1347 int ret; 1348 1349 if (wait_for_reply) { 1350 bool reply_supported = virtio_has_feature(dev->protocol_features, 1351 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1352 if (reply_supported) { 1353 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1354 } 1355 } 1356 1357 ret = vhost_user_write(dev, &msg, NULL, 0); 1358 if (ret < 0) { 1359 return ret; 1360 } 1361 1362 if (wait_for_reply) { 1363 return enforce_reply(dev, &msg); 1364 } 1365 1366 return 0; 1367 } 1368 1369 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1370 { 1371 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1372 } 1373 1374 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1375 { 1376 uint64_t value; 1377 int ret; 1378 1379 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1380 if (ret < 0) { 1381 return ret; 1382 } 1383 *status = value; 1384 1385 return 0; 1386 } 1387 1388 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1389 { 1390 uint8_t s; 1391 int ret; 1392 1393 ret = vhost_user_get_status(dev, &s); 1394 if (ret < 0) { 1395 return ret; 1396 } 1397 1398 if ((s & status) == status) { 1399 return 0; 1400 } 1401 s |= status; 1402 1403 return vhost_user_set_status(dev, s); 1404 } 1405 1406 static int vhost_user_set_features(struct vhost_dev *dev, 1407 uint64_t features) 1408 { 1409 /* 1410 * wait for a reply if logging is enabled to make sure 1411 * backend is actually logging changes 1412 */ 1413 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1414 int ret; 1415 1416 /* 1417 * We need to include any extra backend only feature bits that 1418 * might be needed by our device. Currently this includes the 1419 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1420 * features. 1421 */ 1422 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1423 features | dev->backend_features, 1424 log_enabled); 1425 1426 if (virtio_has_feature(dev->protocol_features, 1427 VHOST_USER_PROTOCOL_F_STATUS)) { 1428 if (!ret) { 1429 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1430 } 1431 } 1432 1433 return ret; 1434 } 1435 1436 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1437 uint64_t features) 1438 { 1439 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1440 false); 1441 } 1442 1443 static int vhost_user_set_owner(struct vhost_dev *dev) 1444 { 1445 VhostUserMsg msg = { 1446 .hdr.request = VHOST_USER_SET_OWNER, 1447 .hdr.flags = VHOST_USER_VERSION, 1448 }; 1449 1450 return vhost_user_write(dev, &msg, NULL, 0); 1451 } 1452 1453 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1454 uint64_t *max_memslots) 1455 { 1456 uint64_t backend_max_memslots; 1457 int err; 1458 1459 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1460 &backend_max_memslots); 1461 if (err < 0) { 1462 return err; 1463 } 1464 1465 *max_memslots = backend_max_memslots; 1466 1467 return 0; 1468 } 1469 1470 static int vhost_user_reset_device(struct vhost_dev *dev) 1471 { 1472 VhostUserMsg msg = { 1473 .hdr.flags = VHOST_USER_VERSION, 1474 }; 1475 1476 msg.hdr.request = virtio_has_feature(dev->protocol_features, 1477 VHOST_USER_PROTOCOL_F_RESET_DEVICE) 1478 ? VHOST_USER_RESET_DEVICE 1479 : VHOST_USER_RESET_OWNER; 1480 1481 return vhost_user_write(dev, &msg, NULL, 0); 1482 } 1483 1484 static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) 1485 { 1486 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1487 return -ENOSYS; 1488 } 1489 1490 return dev->config_ops->vhost_dev_config_notifier(dev); 1491 } 1492 1493 /* 1494 * Fetch or create the notifier for a given idx. Newly created 1495 * notifiers are added to the pointer array that tracks them. 1496 */ 1497 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1498 int idx) 1499 { 1500 VhostUserHostNotifier *n = NULL; 1501 if (idx >= u->notifiers->len) { 1502 g_ptr_array_set_size(u->notifiers, idx + 1); 1503 } 1504 1505 n = g_ptr_array_index(u->notifiers, idx); 1506 if (!n) { 1507 /* 1508 * In case notification arrive out-of-order, 1509 * make room for current index. 1510 */ 1511 g_ptr_array_remove_index(u->notifiers, idx); 1512 n = g_new0(VhostUserHostNotifier, 1); 1513 n->idx = idx; 1514 g_ptr_array_insert(u->notifiers, idx, n); 1515 trace_vhost_user_create_notifier(idx, n); 1516 } 1517 1518 return n; 1519 } 1520 1521 static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, 1522 VhostUserVringArea *area, 1523 int fd) 1524 { 1525 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1526 size_t page_size = qemu_real_host_page_size(); 1527 struct vhost_user *u = dev->opaque; 1528 VhostUserState *user = u->user; 1529 VirtIODevice *vdev = dev->vdev; 1530 VhostUserHostNotifier *n; 1531 void *addr; 1532 char *name; 1533 1534 if (!virtio_has_feature(dev->protocol_features, 1535 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1536 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1537 return -EINVAL; 1538 } 1539 1540 /* 1541 * Fetch notifier and invalidate any old data before setting up 1542 * new mapped address. 1543 */ 1544 n = fetch_or_create_notifier(user, queue_idx); 1545 vhost_user_host_notifier_remove(n, vdev); 1546 1547 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1548 return 0; 1549 } 1550 1551 /* Sanity check. */ 1552 if (area->size != page_size) { 1553 return -EINVAL; 1554 } 1555 1556 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1557 fd, area->offset); 1558 if (addr == MAP_FAILED) { 1559 return -EFAULT; 1560 } 1561 1562 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1563 user, queue_idx); 1564 if (!n->mr.ram) { /* Don't init again after suspend. */ 1565 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1566 page_size, addr); 1567 } else { 1568 n->mr.ram_block->host = addr; 1569 } 1570 g_free(name); 1571 1572 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1573 object_unparent(OBJECT(&n->mr)); 1574 munmap(addr, page_size); 1575 return -ENXIO; 1576 } 1577 1578 n->addr = addr; 1579 1580 return 0; 1581 } 1582 1583 static void close_backend_channel(struct vhost_user *u) 1584 { 1585 g_source_destroy(u->backend_src); 1586 g_source_unref(u->backend_src); 1587 u->backend_src = NULL; 1588 object_unref(OBJECT(u->backend_ioc)); 1589 u->backend_ioc = NULL; 1590 } 1591 1592 static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, 1593 gpointer opaque) 1594 { 1595 struct vhost_dev *dev = opaque; 1596 struct vhost_user *u = dev->opaque; 1597 VhostUserHeader hdr = { 0, }; 1598 VhostUserPayload payload = { 0, }; 1599 Error *local_err = NULL; 1600 gboolean rc = G_SOURCE_CONTINUE; 1601 int ret = 0; 1602 struct iovec iov; 1603 g_autofree int *fd = NULL; 1604 size_t fdsize = 0; 1605 int i; 1606 1607 /* Read header */ 1608 iov.iov_base = &hdr; 1609 iov.iov_len = VHOST_USER_HDR_SIZE; 1610 1611 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1612 error_report_err(local_err); 1613 goto err; 1614 } 1615 1616 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1617 error_report("Failed to read msg header." 1618 " Size %d exceeds the maximum %zu.", hdr.size, 1619 VHOST_USER_PAYLOAD_SIZE); 1620 goto err; 1621 } 1622 1623 /* Read payload */ 1624 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1625 error_report_err(local_err); 1626 goto err; 1627 } 1628 1629 switch (hdr.request) { 1630 case VHOST_USER_BACKEND_IOTLB_MSG: 1631 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1632 break; 1633 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: 1634 ret = vhost_user_backend_handle_config_change(dev); 1635 break; 1636 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: 1637 ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, 1638 fd ? fd[0] : -1); 1639 break; 1640 default: 1641 error_report("Received unexpected msg type: %d.", hdr.request); 1642 ret = -EINVAL; 1643 } 1644 1645 /* 1646 * REPLY_ACK feature handling. Other reply types has to be managed 1647 * directly in their request handlers. 1648 */ 1649 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1650 struct iovec iovec[2]; 1651 1652 1653 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 1654 hdr.flags |= VHOST_USER_REPLY_MASK; 1655 1656 payload.u64 = !!ret; 1657 hdr.size = sizeof(payload.u64); 1658 1659 iovec[0].iov_base = &hdr; 1660 iovec[0].iov_len = VHOST_USER_HDR_SIZE; 1661 iovec[1].iov_base = &payload; 1662 iovec[1].iov_len = hdr.size; 1663 1664 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) { 1665 error_report_err(local_err); 1666 goto err; 1667 } 1668 } 1669 1670 goto fdcleanup; 1671 1672 err: 1673 close_backend_channel(u); 1674 rc = G_SOURCE_REMOVE; 1675 1676 fdcleanup: 1677 if (fd) { 1678 for (i = 0; i < fdsize; i++) { 1679 close(fd[i]); 1680 } 1681 } 1682 return rc; 1683 } 1684 1685 static int vhost_setup_backend_channel(struct vhost_dev *dev) 1686 { 1687 VhostUserMsg msg = { 1688 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, 1689 .hdr.flags = VHOST_USER_VERSION, 1690 }; 1691 struct vhost_user *u = dev->opaque; 1692 int sv[2], ret = 0; 1693 bool reply_supported = virtio_has_feature(dev->protocol_features, 1694 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1695 Error *local_err = NULL; 1696 QIOChannel *ioc; 1697 1698 if (!virtio_has_feature(dev->protocol_features, 1699 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { 1700 return 0; 1701 } 1702 1703 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1704 int saved_errno = errno; 1705 error_report("socketpair() failed"); 1706 return -saved_errno; 1707 } 1708 1709 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1710 if (!ioc) { 1711 error_report_err(local_err); 1712 return -ECONNREFUSED; 1713 } 1714 u->backend_ioc = ioc; 1715 u->backend_src = qio_channel_add_watch_source(u->backend_ioc, 1716 G_IO_IN | G_IO_HUP, 1717 backend_read, dev, NULL, NULL); 1718 1719 if (reply_supported) { 1720 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1721 } 1722 1723 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1724 if (ret) { 1725 goto out; 1726 } 1727 1728 if (reply_supported) { 1729 ret = process_message_reply(dev, &msg); 1730 } 1731 1732 out: 1733 close(sv[1]); 1734 if (ret) { 1735 close_backend_channel(u); 1736 } 1737 1738 return ret; 1739 } 1740 1741 #ifdef CONFIG_LINUX 1742 /* 1743 * Called back from the postcopy fault thread when a fault is received on our 1744 * ufd. 1745 * TODO: This is Linux specific 1746 */ 1747 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1748 void *ufd) 1749 { 1750 struct vhost_dev *dev = pcfd->data; 1751 struct vhost_user *u = dev->opaque; 1752 struct uffd_msg *msg = ufd; 1753 uint64_t faultaddr = msg->arg.pagefault.address; 1754 RAMBlock *rb = NULL; 1755 uint64_t rb_offset; 1756 int i; 1757 1758 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1759 dev->mem->nregions); 1760 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1761 trace_vhost_user_postcopy_fault_handler_loop(i, 1762 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1763 if (faultaddr >= u->postcopy_client_bases[i]) { 1764 /* Ofset of the fault address in the vhost region */ 1765 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1766 if (region_offset < dev->mem->regions[i].memory_size) { 1767 rb_offset = region_offset + u->region_rb_offset[i]; 1768 trace_vhost_user_postcopy_fault_handler_found(i, 1769 region_offset, rb_offset); 1770 rb = u->region_rb[i]; 1771 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1772 rb_offset); 1773 } 1774 } 1775 } 1776 error_report("%s: Failed to find region for fault %" PRIx64, 1777 __func__, faultaddr); 1778 return -1; 1779 } 1780 1781 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1782 uint64_t offset) 1783 { 1784 struct vhost_dev *dev = pcfd->data; 1785 struct vhost_user *u = dev->opaque; 1786 int i; 1787 1788 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1789 1790 if (!u) { 1791 return 0; 1792 } 1793 /* Translate the offset into an address in the clients address space */ 1794 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1795 if (u->region_rb[i] == rb && 1796 offset >= u->region_rb_offset[i] && 1797 offset < (u->region_rb_offset[i] + 1798 dev->mem->regions[i].memory_size)) { 1799 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1800 u->postcopy_client_bases[i]; 1801 trace_vhost_user_postcopy_waker_found(client_addr); 1802 return postcopy_wake_shared(pcfd, client_addr, rb); 1803 } 1804 } 1805 1806 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1807 return 0; 1808 } 1809 #endif 1810 1811 /* 1812 * Called at the start of an inbound postcopy on reception of the 1813 * 'advise' command. 1814 */ 1815 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1816 { 1817 #ifdef CONFIG_LINUX 1818 struct vhost_user *u = dev->opaque; 1819 CharBackend *chr = u->user->chr; 1820 int ufd; 1821 int ret; 1822 VhostUserMsg msg = { 1823 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1824 .hdr.flags = VHOST_USER_VERSION, 1825 }; 1826 1827 ret = vhost_user_write(dev, &msg, NULL, 0); 1828 if (ret < 0) { 1829 error_setg(errp, "Failed to send postcopy_advise to vhost"); 1830 return ret; 1831 } 1832 1833 ret = vhost_user_read(dev, &msg); 1834 if (ret < 0) { 1835 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1836 return ret; 1837 } 1838 1839 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1840 error_setg(errp, "Unexpected msg type. Expected %d received %d", 1841 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1842 return -EPROTO; 1843 } 1844 1845 if (msg.hdr.size) { 1846 error_setg(errp, "Received bad msg size."); 1847 return -EPROTO; 1848 } 1849 ufd = qemu_chr_fe_get_msgfd(chr); 1850 if (ufd < 0) { 1851 error_setg(errp, "%s: Failed to get ufd", __func__); 1852 return -EIO; 1853 } 1854 qemu_socket_set_nonblock(ufd); 1855 1856 /* register ufd with userfault thread */ 1857 u->postcopy_fd.fd = ufd; 1858 u->postcopy_fd.data = dev; 1859 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1860 u->postcopy_fd.waker = vhost_user_postcopy_waker; 1861 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1862 postcopy_register_shared_ufd(&u->postcopy_fd); 1863 return 0; 1864 #else 1865 error_setg(errp, "Postcopy not supported on non-Linux systems"); 1866 return -ENOSYS; 1867 #endif 1868 } 1869 1870 /* 1871 * Called at the switch to postcopy on reception of the 'listen' command. 1872 */ 1873 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1874 { 1875 struct vhost_user *u = dev->opaque; 1876 int ret; 1877 VhostUserMsg msg = { 1878 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1879 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1880 }; 1881 u->postcopy_listen = true; 1882 1883 trace_vhost_user_postcopy_listen(); 1884 1885 ret = vhost_user_write(dev, &msg, NULL, 0); 1886 if (ret < 0) { 1887 error_setg(errp, "Failed to send postcopy_listen to vhost"); 1888 return ret; 1889 } 1890 1891 ret = process_message_reply(dev, &msg); 1892 if (ret) { 1893 error_setg(errp, "Failed to receive reply to postcopy_listen"); 1894 return ret; 1895 } 1896 1897 return 0; 1898 } 1899 1900 /* 1901 * Called at the end of postcopy 1902 */ 1903 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1904 { 1905 VhostUserMsg msg = { 1906 .hdr.request = VHOST_USER_POSTCOPY_END, 1907 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1908 }; 1909 int ret; 1910 struct vhost_user *u = dev->opaque; 1911 1912 trace_vhost_user_postcopy_end_entry(); 1913 1914 ret = vhost_user_write(dev, &msg, NULL, 0); 1915 if (ret < 0) { 1916 error_setg(errp, "Failed to send postcopy_end to vhost"); 1917 return ret; 1918 } 1919 1920 ret = process_message_reply(dev, &msg); 1921 if (ret) { 1922 error_setg(errp, "Failed to receive reply to postcopy_end"); 1923 return ret; 1924 } 1925 postcopy_unregister_shared_ufd(&u->postcopy_fd); 1926 close(u->postcopy_fd.fd); 1927 u->postcopy_fd.handler = NULL; 1928 1929 trace_vhost_user_postcopy_end_exit(); 1930 1931 return 0; 1932 } 1933 1934 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1935 void *opaque) 1936 { 1937 struct PostcopyNotifyData *pnd = opaque; 1938 struct vhost_user *u = container_of(notifier, struct vhost_user, 1939 postcopy_notifier); 1940 struct vhost_dev *dev = u->dev; 1941 1942 switch (pnd->reason) { 1943 case POSTCOPY_NOTIFY_PROBE: 1944 if (!virtio_has_feature(dev->protocol_features, 1945 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1946 /* TODO: Get the device name into this error somehow */ 1947 error_setg(pnd->errp, 1948 "vhost-user backend not capable of postcopy"); 1949 return -ENOENT; 1950 } 1951 break; 1952 1953 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1954 return vhost_user_postcopy_advise(dev, pnd->errp); 1955 1956 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1957 return vhost_user_postcopy_listen(dev, pnd->errp); 1958 1959 case POSTCOPY_NOTIFY_INBOUND_END: 1960 return vhost_user_postcopy_end(dev, pnd->errp); 1961 1962 default: 1963 /* We ignore notifications we don't know */ 1964 break; 1965 } 1966 1967 return 0; 1968 } 1969 1970 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 1971 Error **errp) 1972 { 1973 uint64_t features, ram_slots; 1974 struct vhost_user *u; 1975 VhostUserState *vus = (VhostUserState *) opaque; 1976 int err; 1977 1978 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 1979 1980 u = g_new0(struct vhost_user, 1); 1981 u->user = vus; 1982 u->dev = dev; 1983 dev->opaque = u; 1984 1985 err = vhost_user_get_features(dev, &features); 1986 if (err < 0) { 1987 error_setg_errno(errp, -err, "vhost_backend_init failed"); 1988 return err; 1989 } 1990 1991 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1992 bool supports_f_config = vus->supports_config || 1993 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 1994 uint64_t protocol_features; 1995 1996 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 1997 1998 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 1999 &protocol_features); 2000 if (err < 0) { 2001 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2002 return -EPROTO; 2003 } 2004 2005 /* 2006 * We will use all the protocol features we support - although 2007 * we suppress F_CONFIG if we know QEMUs internal code can not support 2008 * it. 2009 */ 2010 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2011 2012 if (supports_f_config) { 2013 if (!virtio_has_feature(protocol_features, 2014 VHOST_USER_PROTOCOL_F_CONFIG)) { 2015 error_setg(errp, "vhost-user device expecting " 2016 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2017 "not support it."); 2018 return -EPROTO; 2019 } 2020 } else { 2021 if (virtio_has_feature(protocol_features, 2022 VHOST_USER_PROTOCOL_F_CONFIG)) { 2023 warn_report("vhost-user backend supports " 2024 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2025 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2026 } 2027 } 2028 2029 /* final set of protocol features */ 2030 dev->protocol_features = protocol_features; 2031 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2032 if (err < 0) { 2033 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2034 return -EPROTO; 2035 } 2036 2037 /* query the max queues we support if backend supports Multiple Queue */ 2038 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2039 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2040 &dev->max_queues); 2041 if (err < 0) { 2042 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2043 return -EPROTO; 2044 } 2045 } else { 2046 dev->max_queues = 1; 2047 } 2048 2049 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2050 error_setg(errp, "The maximum number of queues supported by the " 2051 "backend is %" PRIu64, dev->max_queues); 2052 return -EINVAL; 2053 } 2054 2055 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2056 !(virtio_has_feature(dev->protocol_features, 2057 VHOST_USER_PROTOCOL_F_BACKEND_REQ) && 2058 virtio_has_feature(dev->protocol_features, 2059 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2060 error_setg(errp, "IOMMU support requires reply-ack and " 2061 "backend-req protocol features."); 2062 return -EINVAL; 2063 } 2064 2065 /* get max memory regions if backend supports configurable RAM slots */ 2066 if (!virtio_has_feature(dev->protocol_features, 2067 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2068 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2069 } else { 2070 err = vhost_user_get_max_memslots(dev, &ram_slots); 2071 if (err < 0) { 2072 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2073 return -EPROTO; 2074 } 2075 2076 if (ram_slots < u->user->memory_slots) { 2077 error_setg(errp, "The backend specified a max ram slots limit " 2078 "of %" PRIu64", when the prior validated limit was " 2079 "%d. This limit should never decrease.", ram_slots, 2080 u->user->memory_slots); 2081 return -EINVAL; 2082 } 2083 2084 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2085 } 2086 } 2087 2088 if (dev->migration_blocker == NULL && 2089 !virtio_has_feature(dev->protocol_features, 2090 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2091 error_setg(&dev->migration_blocker, 2092 "Migration disabled: vhost-user backend lacks " 2093 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2094 } 2095 2096 if (dev->vq_index == 0) { 2097 err = vhost_setup_backend_channel(dev); 2098 if (err < 0) { 2099 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2100 return -EPROTO; 2101 } 2102 } 2103 2104 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2105 postcopy_add_notifier(&u->postcopy_notifier); 2106 2107 return 0; 2108 } 2109 2110 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2111 { 2112 struct vhost_user *u; 2113 2114 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2115 2116 u = dev->opaque; 2117 if (u->postcopy_notifier.notify) { 2118 postcopy_remove_notifier(&u->postcopy_notifier); 2119 u->postcopy_notifier.notify = NULL; 2120 } 2121 u->postcopy_listen = false; 2122 if (u->postcopy_fd.handler) { 2123 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2124 close(u->postcopy_fd.fd); 2125 u->postcopy_fd.handler = NULL; 2126 } 2127 if (u->backend_ioc) { 2128 close_backend_channel(u); 2129 } 2130 g_free(u->region_rb); 2131 u->region_rb = NULL; 2132 g_free(u->region_rb_offset); 2133 u->region_rb_offset = NULL; 2134 u->region_rb_len = 0; 2135 g_free(u); 2136 dev->opaque = 0; 2137 2138 return 0; 2139 } 2140 2141 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2142 { 2143 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2144 2145 return idx; 2146 } 2147 2148 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2149 { 2150 struct vhost_user *u = dev->opaque; 2151 2152 return u->user->memory_slots; 2153 } 2154 2155 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2156 { 2157 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2158 2159 return virtio_has_feature(dev->protocol_features, 2160 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2161 } 2162 2163 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2164 { 2165 VhostUserMsg msg = { }; 2166 2167 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2168 2169 /* If guest supports GUEST_ANNOUNCE do nothing */ 2170 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2171 return 0; 2172 } 2173 2174 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2175 if (virtio_has_feature(dev->protocol_features, 2176 VHOST_USER_PROTOCOL_F_RARP)) { 2177 msg.hdr.request = VHOST_USER_SEND_RARP; 2178 msg.hdr.flags = VHOST_USER_VERSION; 2179 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2180 msg.hdr.size = sizeof(msg.payload.u64); 2181 2182 return vhost_user_write(dev, &msg, NULL, 0); 2183 } 2184 return -ENOTSUP; 2185 } 2186 2187 static bool vhost_user_can_merge(struct vhost_dev *dev, 2188 uint64_t start1, uint64_t size1, 2189 uint64_t start2, uint64_t size2) 2190 { 2191 ram_addr_t offset; 2192 int mfd, rfd; 2193 2194 (void)vhost_user_get_mr_data(start1, &offset, &mfd); 2195 (void)vhost_user_get_mr_data(start2, &offset, &rfd); 2196 2197 return mfd == rfd; 2198 } 2199 2200 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2201 { 2202 VhostUserMsg msg; 2203 bool reply_supported = virtio_has_feature(dev->protocol_features, 2204 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2205 int ret; 2206 2207 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2208 return 0; 2209 } 2210 2211 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2212 msg.payload.u64 = mtu; 2213 msg.hdr.size = sizeof(msg.payload.u64); 2214 msg.hdr.flags = VHOST_USER_VERSION; 2215 if (reply_supported) { 2216 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2217 } 2218 2219 ret = vhost_user_write(dev, &msg, NULL, 0); 2220 if (ret < 0) { 2221 return ret; 2222 } 2223 2224 /* If reply_ack supported, backend has to ack specified MTU is valid */ 2225 if (reply_supported) { 2226 return process_message_reply(dev, &msg); 2227 } 2228 2229 return 0; 2230 } 2231 2232 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2233 struct vhost_iotlb_msg *imsg) 2234 { 2235 int ret; 2236 VhostUserMsg msg = { 2237 .hdr.request = VHOST_USER_IOTLB_MSG, 2238 .hdr.size = sizeof(msg.payload.iotlb), 2239 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2240 .payload.iotlb = *imsg, 2241 }; 2242 2243 ret = vhost_user_write(dev, &msg, NULL, 0); 2244 if (ret < 0) { 2245 return ret; 2246 } 2247 2248 return process_message_reply(dev, &msg); 2249 } 2250 2251 2252 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2253 { 2254 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2255 } 2256 2257 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2258 uint32_t config_len, Error **errp) 2259 { 2260 int ret; 2261 VhostUserMsg msg = { 2262 .hdr.request = VHOST_USER_GET_CONFIG, 2263 .hdr.flags = VHOST_USER_VERSION, 2264 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2265 }; 2266 2267 if (!virtio_has_feature(dev->protocol_features, 2268 VHOST_USER_PROTOCOL_F_CONFIG)) { 2269 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2270 return -EINVAL; 2271 } 2272 2273 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2274 2275 msg.payload.config.offset = 0; 2276 msg.payload.config.size = config_len; 2277 ret = vhost_user_write(dev, &msg, NULL, 0); 2278 if (ret < 0) { 2279 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2280 return ret; 2281 } 2282 2283 ret = vhost_user_read(dev, &msg); 2284 if (ret < 0) { 2285 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2286 return ret; 2287 } 2288 2289 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2290 error_setg(errp, 2291 "Received unexpected msg type. Expected %d received %d", 2292 VHOST_USER_GET_CONFIG, msg.hdr.request); 2293 return -EPROTO; 2294 } 2295 2296 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2297 error_setg(errp, "Received bad msg size."); 2298 return -EPROTO; 2299 } 2300 2301 memcpy(config, msg.payload.config.region, config_len); 2302 2303 return 0; 2304 } 2305 2306 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2307 uint32_t offset, uint32_t size, uint32_t flags) 2308 { 2309 int ret; 2310 uint8_t *p; 2311 bool reply_supported = virtio_has_feature(dev->protocol_features, 2312 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2313 2314 VhostUserMsg msg = { 2315 .hdr.request = VHOST_USER_SET_CONFIG, 2316 .hdr.flags = VHOST_USER_VERSION, 2317 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2318 }; 2319 2320 if (!virtio_has_feature(dev->protocol_features, 2321 VHOST_USER_PROTOCOL_F_CONFIG)) { 2322 return -ENOTSUP; 2323 } 2324 2325 if (reply_supported) { 2326 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2327 } 2328 2329 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2330 return -EINVAL; 2331 } 2332 2333 msg.payload.config.offset = offset, 2334 msg.payload.config.size = size, 2335 msg.payload.config.flags = flags, 2336 p = msg.payload.config.region; 2337 memcpy(p, data, size); 2338 2339 ret = vhost_user_write(dev, &msg, NULL, 0); 2340 if (ret < 0) { 2341 return ret; 2342 } 2343 2344 if (reply_supported) { 2345 return process_message_reply(dev, &msg); 2346 } 2347 2348 return 0; 2349 } 2350 2351 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2352 void *session_info, 2353 uint64_t *session_id) 2354 { 2355 int ret; 2356 bool crypto_session = virtio_has_feature(dev->protocol_features, 2357 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2358 CryptoDevBackendSessionInfo *backend_info = session_info; 2359 VhostUserMsg msg = { 2360 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2361 .hdr.flags = VHOST_USER_VERSION, 2362 .hdr.size = sizeof(msg.payload.session), 2363 }; 2364 2365 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2366 2367 if (!crypto_session) { 2368 error_report("vhost-user trying to send unhandled ioctl"); 2369 return -ENOTSUP; 2370 } 2371 2372 if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { 2373 CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; 2374 size_t keylen; 2375 2376 memcpy(&msg.payload.session.u.asym.session_setup_data, sess, 2377 sizeof(CryptoDevBackendAsymSessionInfo)); 2378 if (sess->keylen) { 2379 keylen = sizeof(msg.payload.session.u.asym.key); 2380 if (sess->keylen > keylen) { 2381 error_report("Unsupported asymmetric key size"); 2382 return -ENOTSUP; 2383 } 2384 2385 memcpy(&msg.payload.session.u.asym.key, sess->key, 2386 sess->keylen); 2387 } 2388 } else { 2389 CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; 2390 size_t keylen; 2391 2392 memcpy(&msg.payload.session.u.sym.session_setup_data, sess, 2393 sizeof(CryptoDevBackendSymSessionInfo)); 2394 if (sess->key_len) { 2395 keylen = sizeof(msg.payload.session.u.sym.key); 2396 if (sess->key_len > keylen) { 2397 error_report("Unsupported cipher key size"); 2398 return -ENOTSUP; 2399 } 2400 2401 memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, 2402 sess->key_len); 2403 } 2404 2405 if (sess->auth_key_len > 0) { 2406 keylen = sizeof(msg.payload.session.u.sym.auth_key); 2407 if (sess->auth_key_len > keylen) { 2408 error_report("Unsupported auth key size"); 2409 return -ENOTSUP; 2410 } 2411 2412 memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, 2413 sess->auth_key_len); 2414 } 2415 } 2416 2417 msg.payload.session.op_code = backend_info->op_code; 2418 msg.payload.session.session_id = backend_info->session_id; 2419 ret = vhost_user_write(dev, &msg, NULL, 0); 2420 if (ret < 0) { 2421 error_report("vhost_user_write() return %d, create session failed", 2422 ret); 2423 return ret; 2424 } 2425 2426 ret = vhost_user_read(dev, &msg); 2427 if (ret < 0) { 2428 error_report("vhost_user_read() return %d, create session failed", 2429 ret); 2430 return ret; 2431 } 2432 2433 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2434 error_report("Received unexpected msg type. Expected %d received %d", 2435 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2436 return -EPROTO; 2437 } 2438 2439 if (msg.hdr.size != sizeof(msg.payload.session)) { 2440 error_report("Received bad msg size."); 2441 return -EPROTO; 2442 } 2443 2444 if (msg.payload.session.session_id < 0) { 2445 error_report("Bad session id: %" PRId64 "", 2446 msg.payload.session.session_id); 2447 return -EINVAL; 2448 } 2449 *session_id = msg.payload.session.session_id; 2450 2451 return 0; 2452 } 2453 2454 static int 2455 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2456 { 2457 int ret; 2458 bool crypto_session = virtio_has_feature(dev->protocol_features, 2459 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2460 VhostUserMsg msg = { 2461 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2462 .hdr.flags = VHOST_USER_VERSION, 2463 .hdr.size = sizeof(msg.payload.u64), 2464 }; 2465 msg.payload.u64 = session_id; 2466 2467 if (!crypto_session) { 2468 error_report("vhost-user trying to send unhandled ioctl"); 2469 return -ENOTSUP; 2470 } 2471 2472 ret = vhost_user_write(dev, &msg, NULL, 0); 2473 if (ret < 0) { 2474 error_report("vhost_user_write() return %d, close session failed", 2475 ret); 2476 return ret; 2477 } 2478 2479 return 0; 2480 } 2481 2482 static bool vhost_user_mem_section_filter(struct vhost_dev *dev, 2483 MemoryRegionSection *section) 2484 { 2485 return memory_region_get_fd(section->mr) >= 0; 2486 } 2487 2488 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2489 uint16_t queue_size, 2490 struct vhost_inflight *inflight) 2491 { 2492 void *addr; 2493 int fd; 2494 int ret; 2495 struct vhost_user *u = dev->opaque; 2496 CharBackend *chr = u->user->chr; 2497 VhostUserMsg msg = { 2498 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2499 .hdr.flags = VHOST_USER_VERSION, 2500 .payload.inflight.num_queues = dev->nvqs, 2501 .payload.inflight.queue_size = queue_size, 2502 .hdr.size = sizeof(msg.payload.inflight), 2503 }; 2504 2505 if (!virtio_has_feature(dev->protocol_features, 2506 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2507 return 0; 2508 } 2509 2510 ret = vhost_user_write(dev, &msg, NULL, 0); 2511 if (ret < 0) { 2512 return ret; 2513 } 2514 2515 ret = vhost_user_read(dev, &msg); 2516 if (ret < 0) { 2517 return ret; 2518 } 2519 2520 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2521 error_report("Received unexpected msg type. " 2522 "Expected %d received %d", 2523 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2524 return -EPROTO; 2525 } 2526 2527 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2528 error_report("Received bad msg size."); 2529 return -EPROTO; 2530 } 2531 2532 if (!msg.payload.inflight.mmap_size) { 2533 return 0; 2534 } 2535 2536 fd = qemu_chr_fe_get_msgfd(chr); 2537 if (fd < 0) { 2538 error_report("Failed to get mem fd"); 2539 return -EIO; 2540 } 2541 2542 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2543 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2544 2545 if (addr == MAP_FAILED) { 2546 error_report("Failed to mmap mem fd"); 2547 close(fd); 2548 return -EFAULT; 2549 } 2550 2551 inflight->addr = addr; 2552 inflight->fd = fd; 2553 inflight->size = msg.payload.inflight.mmap_size; 2554 inflight->offset = msg.payload.inflight.mmap_offset; 2555 inflight->queue_size = queue_size; 2556 2557 return 0; 2558 } 2559 2560 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2561 struct vhost_inflight *inflight) 2562 { 2563 VhostUserMsg msg = { 2564 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2565 .hdr.flags = VHOST_USER_VERSION, 2566 .payload.inflight.mmap_size = inflight->size, 2567 .payload.inflight.mmap_offset = inflight->offset, 2568 .payload.inflight.num_queues = dev->nvqs, 2569 .payload.inflight.queue_size = inflight->queue_size, 2570 .hdr.size = sizeof(msg.payload.inflight), 2571 }; 2572 2573 if (!virtio_has_feature(dev->protocol_features, 2574 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2575 return 0; 2576 } 2577 2578 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2579 } 2580 2581 static void vhost_user_state_destroy(gpointer data) 2582 { 2583 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2584 if (n) { 2585 vhost_user_host_notifier_remove(n, NULL); 2586 object_unparent(OBJECT(&n->mr)); 2587 /* 2588 * We can't free until vhost_user_host_notifier_remove has 2589 * done it's thing so schedule the free with RCU. 2590 */ 2591 g_free_rcu(n, rcu); 2592 } 2593 } 2594 2595 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2596 { 2597 if (user->chr) { 2598 error_setg(errp, "Cannot initialize vhost-user state"); 2599 return false; 2600 } 2601 user->chr = chr; 2602 user->memory_slots = 0; 2603 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2604 &vhost_user_state_destroy); 2605 return true; 2606 } 2607 2608 void vhost_user_cleanup(VhostUserState *user) 2609 { 2610 if (!user->chr) { 2611 return; 2612 } 2613 memory_region_transaction_begin(); 2614 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2615 memory_region_transaction_commit(); 2616 user->chr = NULL; 2617 } 2618 2619 2620 typedef struct { 2621 vu_async_close_fn cb; 2622 DeviceState *dev; 2623 CharBackend *cd; 2624 struct vhost_dev *vhost; 2625 } VhostAsyncCallback; 2626 2627 static void vhost_user_async_close_bh(void *opaque) 2628 { 2629 VhostAsyncCallback *data = opaque; 2630 struct vhost_dev *vhost = data->vhost; 2631 2632 /* 2633 * If the vhost_dev has been cleared in the meantime there is 2634 * nothing left to do as some other path has completed the 2635 * cleanup. 2636 */ 2637 if (vhost->vdev) { 2638 data->cb(data->dev); 2639 } 2640 2641 g_free(data); 2642 } 2643 2644 /* 2645 * We only schedule the work if the machine is running. If suspended 2646 * we want to keep all the in-flight data as is for migration 2647 * purposes. 2648 */ 2649 void vhost_user_async_close(DeviceState *d, 2650 CharBackend *chardev, struct vhost_dev *vhost, 2651 vu_async_close_fn cb) 2652 { 2653 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2654 /* 2655 * A close event may happen during a read/write, but vhost 2656 * code assumes the vhost_dev remains setup, so delay the 2657 * stop & clear. 2658 */ 2659 AioContext *ctx = qemu_get_current_aio_context(); 2660 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2661 2662 /* Save data for the callback */ 2663 data->cb = cb; 2664 data->dev = d; 2665 data->cd = chardev; 2666 data->vhost = vhost; 2667 2668 /* Disable any further notifications on the chardev */ 2669 qemu_chr_fe_set_handlers(chardev, 2670 NULL, NULL, NULL, NULL, NULL, NULL, 2671 false); 2672 2673 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2674 2675 /* 2676 * Move vhost device to the stopped state. The vhost-user device 2677 * will be clean up and disconnected in BH. This can be useful in 2678 * the vhost migration code. If disconnect was caught there is an 2679 * option for the general vhost code to get the dev state without 2680 * knowing its type (in this case vhost-user). 2681 * 2682 * Note if the vhost device is fully cleared by the time we 2683 * execute the bottom half we won't continue with the cleanup. 2684 */ 2685 vhost->started = false; 2686 } 2687 } 2688 2689 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2690 { 2691 if (!virtio_has_feature(dev->protocol_features, 2692 VHOST_USER_PROTOCOL_F_STATUS)) { 2693 return 0; 2694 } 2695 2696 /* Set device status only for last queue pair */ 2697 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2698 return 0; 2699 } 2700 2701 if (started) { 2702 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2703 VIRTIO_CONFIG_S_DRIVER | 2704 VIRTIO_CONFIG_S_DRIVER_OK); 2705 } else { 2706 return 0; 2707 } 2708 } 2709 2710 static void vhost_user_reset_status(struct vhost_dev *dev) 2711 { 2712 /* Set device status only for last queue pair */ 2713 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2714 return; 2715 } 2716 2717 if (virtio_has_feature(dev->protocol_features, 2718 VHOST_USER_PROTOCOL_F_STATUS)) { 2719 vhost_user_set_status(dev, 0); 2720 } 2721 } 2722 2723 const VhostOps user_ops = { 2724 .backend_type = VHOST_BACKEND_TYPE_USER, 2725 .vhost_backend_init = vhost_user_backend_init, 2726 .vhost_backend_cleanup = vhost_user_backend_cleanup, 2727 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 2728 .vhost_set_log_base = vhost_user_set_log_base, 2729 .vhost_set_mem_table = vhost_user_set_mem_table, 2730 .vhost_set_vring_addr = vhost_user_set_vring_addr, 2731 .vhost_set_vring_endian = vhost_user_set_vring_endian, 2732 .vhost_set_vring_num = vhost_user_set_vring_num, 2733 .vhost_set_vring_base = vhost_user_set_vring_base, 2734 .vhost_get_vring_base = vhost_user_get_vring_base, 2735 .vhost_set_vring_kick = vhost_user_set_vring_kick, 2736 .vhost_set_vring_call = vhost_user_set_vring_call, 2737 .vhost_set_vring_err = vhost_user_set_vring_err, 2738 .vhost_set_features = vhost_user_set_features, 2739 .vhost_get_features = vhost_user_get_features, 2740 .vhost_set_owner = vhost_user_set_owner, 2741 .vhost_reset_device = vhost_user_reset_device, 2742 .vhost_get_vq_index = vhost_user_get_vq_index, 2743 .vhost_set_vring_enable = vhost_user_set_vring_enable, 2744 .vhost_requires_shm_log = vhost_user_requires_shm_log, 2745 .vhost_migration_done = vhost_user_migration_done, 2746 .vhost_backend_can_merge = vhost_user_can_merge, 2747 .vhost_net_set_mtu = vhost_user_net_set_mtu, 2748 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 2749 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 2750 .vhost_get_config = vhost_user_get_config, 2751 .vhost_set_config = vhost_user_set_config, 2752 .vhost_crypto_create_session = vhost_user_crypto_create_session, 2753 .vhost_crypto_close_session = vhost_user_crypto_close_session, 2754 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, 2755 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 2756 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 2757 .vhost_dev_start = vhost_user_dev_start, 2758 .vhost_reset_status = vhost_user_reset_status, 2759 }; 2760