1 /* 2 * vhost-vdpa.c 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include "clients.h" 14 #include "hw/virtio/virtio-net.h" 15 #include "net/vhost_net.h" 16 #include "net/vhost-vdpa.h" 17 #include "hw/virtio/vhost-vdpa.h" 18 #include "qemu/config-file.h" 19 #include "qemu/error-report.h" 20 #include "qemu/log.h" 21 #include "qemu/memalign.h" 22 #include "qemu/option.h" 23 #include "qapi/error.h" 24 #include <linux/vhost.h> 25 #include <sys/ioctl.h> 26 #include <err.h> 27 #include "standard-headers/linux/virtio_net.h" 28 #include "monitor/monitor.h" 29 #include "migration/migration.h" 30 #include "migration/misc.h" 31 #include "hw/virtio/vhost.h" 32 33 /* Todo:need to add the multiqueue support here */ 34 typedef struct VhostVDPAState { 35 NetClientState nc; 36 struct vhost_vdpa vhost_vdpa; 37 NotifierWithReturn migration_state; 38 VHostNetState *vhost_net; 39 40 /* Control commands shadow buffers */ 41 void *cvq_cmd_out_buffer; 42 virtio_net_ctrl_ack *status; 43 44 /* The device always have SVQ enabled */ 45 bool always_svq; 46 47 /* The device can isolate CVQ in its own ASID */ 48 bool cvq_isolated; 49 50 bool started; 51 } VhostVDPAState; 52 53 /* 54 * The array is sorted alphabetically in ascending order, 55 * with the exception of VHOST_INVALID_FEATURE_BIT, 56 * which should always be the last entry. 57 */ 58 const int vdpa_feature_bits[] = { 59 VIRTIO_F_ANY_LAYOUT, 60 VIRTIO_F_IOMMU_PLATFORM, 61 VIRTIO_F_NOTIFY_ON_EMPTY, 62 VIRTIO_F_RING_PACKED, 63 VIRTIO_F_RING_RESET, 64 VIRTIO_F_VERSION_1, 65 VIRTIO_NET_F_CSUM, 66 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, 67 VIRTIO_NET_F_CTRL_MAC_ADDR, 68 VIRTIO_NET_F_CTRL_RX, 69 VIRTIO_NET_F_CTRL_RX_EXTRA, 70 VIRTIO_NET_F_CTRL_VLAN, 71 VIRTIO_NET_F_CTRL_VQ, 72 VIRTIO_NET_F_GSO, 73 VIRTIO_NET_F_GUEST_CSUM, 74 VIRTIO_NET_F_GUEST_ECN, 75 VIRTIO_NET_F_GUEST_TSO4, 76 VIRTIO_NET_F_GUEST_TSO6, 77 VIRTIO_NET_F_GUEST_UFO, 78 VIRTIO_NET_F_GUEST_USO4, 79 VIRTIO_NET_F_GUEST_USO6, 80 VIRTIO_NET_F_HASH_REPORT, 81 VIRTIO_NET_F_HOST_ECN, 82 VIRTIO_NET_F_HOST_TSO4, 83 VIRTIO_NET_F_HOST_TSO6, 84 VIRTIO_NET_F_HOST_UFO, 85 VIRTIO_NET_F_HOST_USO, 86 VIRTIO_NET_F_MQ, 87 VIRTIO_NET_F_MRG_RXBUF, 88 VIRTIO_NET_F_MTU, 89 VIRTIO_NET_F_RSS, 90 VIRTIO_NET_F_STATUS, 91 VIRTIO_RING_F_EVENT_IDX, 92 VIRTIO_RING_F_INDIRECT_DESC, 93 94 /* VHOST_INVALID_FEATURE_BIT should always be the last entry */ 95 VHOST_INVALID_FEATURE_BIT 96 }; 97 98 /** Supported device specific feature bits with SVQ */ 99 static const uint64_t vdpa_svq_device_features = 100 BIT_ULL(VIRTIO_NET_F_CSUM) | 101 BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | 102 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | 103 BIT_ULL(VIRTIO_NET_F_MTU) | 104 BIT_ULL(VIRTIO_NET_F_MAC) | 105 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | 106 BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | 107 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | 108 BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | 109 BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | 110 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | 111 BIT_ULL(VIRTIO_NET_F_HOST_ECN) | 112 BIT_ULL(VIRTIO_NET_F_HOST_UFO) | 113 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | 114 BIT_ULL(VIRTIO_NET_F_STATUS) | 115 BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | 116 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | 117 BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | 118 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | 119 BIT_ULL(VIRTIO_NET_F_MQ) | 120 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | 121 BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | 122 /* VHOST_F_LOG_ALL is exposed by SVQ */ 123 BIT_ULL(VHOST_F_LOG_ALL) | 124 BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | 125 BIT_ULL(VIRTIO_NET_F_RSS) | 126 BIT_ULL(VIRTIO_NET_F_RSC_EXT) | 127 BIT_ULL(VIRTIO_NET_F_STANDBY) | 128 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX); 129 130 #define VHOST_VDPA_NET_CVQ_ASID 1 131 132 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) 133 { 134 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 135 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 136 return s->vhost_net; 137 } 138 139 static size_t vhost_vdpa_net_cvq_cmd_len(void) 140 { 141 /* 142 * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. 143 * In buffer is always 1 byte, so it should fit here 144 */ 145 return sizeof(struct virtio_net_ctrl_hdr) + 146 2 * sizeof(struct virtio_net_ctrl_mac) + 147 MAC_TABLE_ENTRIES * ETH_ALEN; 148 } 149 150 static size_t vhost_vdpa_net_cvq_cmd_page_len(void) 151 { 152 return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); 153 } 154 155 static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) 156 { 157 uint64_t invalid_dev_features = 158 features & ~vdpa_svq_device_features & 159 /* Transport are all accepted at this point */ 160 ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, 161 VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); 162 163 if (invalid_dev_features) { 164 error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, 165 invalid_dev_features); 166 return false; 167 } 168 169 return vhost_svq_valid_features(features, errp); 170 } 171 172 static int vhost_vdpa_net_check_device_id(struct vhost_net *net) 173 { 174 uint32_t device_id; 175 int ret; 176 struct vhost_dev *hdev; 177 178 hdev = (struct vhost_dev *)&net->dev; 179 ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id); 180 if (device_id != VIRTIO_ID_NET) { 181 return -ENOTSUP; 182 } 183 return ret; 184 } 185 186 static int vhost_vdpa_add(NetClientState *ncs, void *be, 187 int queue_pair_index, int nvqs) 188 { 189 VhostNetOptions options; 190 struct vhost_net *net = NULL; 191 VhostVDPAState *s; 192 int ret; 193 194 options.backend_type = VHOST_BACKEND_TYPE_VDPA; 195 assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 196 s = DO_UPCAST(VhostVDPAState, nc, ncs); 197 options.net_backend = ncs; 198 options.opaque = be; 199 options.busyloop_timeout = 0; 200 options.nvqs = nvqs; 201 202 net = vhost_net_init(&options); 203 if (!net) { 204 error_report("failed to init vhost_net for queue"); 205 goto err_init; 206 } 207 s->vhost_net = net; 208 ret = vhost_vdpa_net_check_device_id(net); 209 if (ret) { 210 goto err_check; 211 } 212 return 0; 213 err_check: 214 vhost_net_cleanup(net); 215 g_free(net); 216 err_init: 217 return -1; 218 } 219 220 static void vhost_vdpa_cleanup(NetClientState *nc) 221 { 222 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 223 224 /* 225 * If a peer NIC is attached, do not cleanup anything. 226 * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup() 227 * when the guest is shutting down. 228 */ 229 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { 230 return; 231 } 232 munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len()); 233 munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len()); 234 if (s->vhost_net) { 235 vhost_net_cleanup(s->vhost_net); 236 g_free(s->vhost_net); 237 s->vhost_net = NULL; 238 } 239 if (s->vhost_vdpa.index != 0) { 240 return; 241 } 242 qemu_close(s->vhost_vdpa.shared->device_fd); 243 g_free(s->vhost_vdpa.shared); 244 } 245 246 /** Dummy SetSteeringEBPF to support RSS for vhost-vdpa backend */ 247 static bool vhost_vdpa_set_steering_ebpf(NetClientState *nc, int prog_fd) 248 { 249 return true; 250 } 251 252 static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc) 253 { 254 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 255 256 return true; 257 } 258 259 static bool vhost_vdpa_has_ufo(NetClientState *nc) 260 { 261 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 262 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 263 uint64_t features = 0; 264 features |= (1ULL << VIRTIO_NET_F_HOST_UFO); 265 features = vhost_net_get_features(s->vhost_net, features); 266 return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO)); 267 268 } 269 270 static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc, 271 Error **errp) 272 { 273 const char *driver = object_class_get_name(oc); 274 275 if (!g_str_has_prefix(driver, "virtio-net-")) { 276 error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*"); 277 return false; 278 } 279 280 return true; 281 } 282 283 /** Dummy receive in case qemu falls back to userland tap networking */ 284 static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, 285 size_t size) 286 { 287 return size; 288 } 289 290 291 /** From any vdpa net client, get the netclient of the i-th queue pair */ 292 static VhostVDPAState *vhost_vdpa_net_get_nc_vdpa(VhostVDPAState *s, int i) 293 { 294 NICState *nic = qemu_get_nic(s->nc.peer); 295 NetClientState *nc_i = qemu_get_peer(nic->ncs, i); 296 297 return DO_UPCAST(VhostVDPAState, nc, nc_i); 298 } 299 300 static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s) 301 { 302 return vhost_vdpa_net_get_nc_vdpa(s, 0); 303 } 304 305 static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable) 306 { 307 struct vhost_vdpa *v = &s->vhost_vdpa; 308 VirtIONet *n; 309 VirtIODevice *vdev; 310 int data_queue_pairs, cvq, r; 311 312 /* We are only called on the first data vqs and only if x-svq is not set */ 313 if (s->vhost_vdpa.shadow_vqs_enabled == enable) { 314 return; 315 } 316 317 vdev = v->dev->vdev; 318 n = VIRTIO_NET(vdev); 319 if (!n->vhost_started) { 320 return; 321 } 322 323 data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; 324 cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? 325 n->max_ncs - n->max_queue_pairs : 0; 326 /* 327 * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter 328 * in the future and resume the device if read-only operations between 329 * suspend and reset goes wrong. 330 */ 331 vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq); 332 333 /* Start will check migration setup_or_active to configure or not SVQ */ 334 r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq); 335 if (unlikely(r < 0)) { 336 error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r); 337 } 338 } 339 340 static int vdpa_net_migration_state_notifier(NotifierWithReturn *notifier, 341 MigrationEvent *e, Error **errp) 342 { 343 VhostVDPAState *s = container_of(notifier, VhostVDPAState, migration_state); 344 345 if (e->type == MIG_EVENT_PRECOPY_SETUP) { 346 vhost_vdpa_net_log_global_enable(s, true); 347 } else if (e->type == MIG_EVENT_PRECOPY_FAILED) { 348 vhost_vdpa_net_log_global_enable(s, false); 349 } 350 return 0; 351 } 352 353 static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) 354 { 355 struct vhost_vdpa *v = &s->vhost_vdpa; 356 357 migration_add_notifier(&s->migration_state, 358 vdpa_net_migration_state_notifier); 359 if (v->shadow_vqs_enabled) { 360 v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, 361 v->shared->iova_range.last); 362 } 363 } 364 365 static int vhost_vdpa_net_data_start(NetClientState *nc) 366 { 367 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 368 struct vhost_vdpa *v = &s->vhost_vdpa; 369 370 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 371 372 if (s->always_svq || 373 migration_is_setup_or_active(migrate_get_current()->state)) { 374 v->shadow_vqs_enabled = true; 375 } else { 376 v->shadow_vqs_enabled = false; 377 } 378 379 if (v->index == 0) { 380 v->shared->shadow_data = v->shadow_vqs_enabled; 381 vhost_vdpa_net_data_start_first(s); 382 return 0; 383 } 384 385 return 0; 386 } 387 388 static int vhost_vdpa_net_data_load(NetClientState *nc) 389 { 390 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 391 struct vhost_vdpa *v = &s->vhost_vdpa; 392 bool has_cvq = v->dev->vq_index_end % 2; 393 394 if (has_cvq) { 395 return 0; 396 } 397 398 for (int i = 0; i < v->dev->nvqs; ++i) { 399 vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index); 400 } 401 return 0; 402 } 403 404 static void vhost_vdpa_net_client_stop(NetClientState *nc) 405 { 406 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 407 struct vhost_dev *dev; 408 409 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 410 411 if (s->vhost_vdpa.index == 0) { 412 migration_remove_notifier(&s->migration_state); 413 } 414 415 dev = s->vhost_vdpa.dev; 416 if (dev->vq_index + dev->nvqs == dev->vq_index_end) { 417 g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, 418 vhost_iova_tree_delete); 419 } 420 } 421 422 static NetClientInfo net_vhost_vdpa_info = { 423 .type = NET_CLIENT_DRIVER_VHOST_VDPA, 424 .size = sizeof(VhostVDPAState), 425 .receive = vhost_vdpa_receive, 426 .start = vhost_vdpa_net_data_start, 427 .load = vhost_vdpa_net_data_load, 428 .stop = vhost_vdpa_net_client_stop, 429 .cleanup = vhost_vdpa_cleanup, 430 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, 431 .has_ufo = vhost_vdpa_has_ufo, 432 .check_peer_type = vhost_vdpa_check_peer_type, 433 .set_steering_ebpf = vhost_vdpa_set_steering_ebpf, 434 }; 435 436 static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, 437 Error **errp) 438 { 439 struct vhost_vring_state state = { 440 .index = vq_index, 441 }; 442 int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); 443 444 if (unlikely(r < 0)) { 445 r = -errno; 446 error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); 447 return r; 448 } 449 450 return state.num; 451 } 452 453 static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, 454 unsigned vq_group, 455 unsigned asid_num) 456 { 457 struct vhost_vring_state asid = { 458 .index = vq_group, 459 .num = asid_num, 460 }; 461 int r; 462 463 r = ioctl(v->shared->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); 464 if (unlikely(r < 0)) { 465 error_report("Can't set vq group %u asid %u, errno=%d (%s)", 466 asid.index, asid.num, errno, g_strerror(errno)); 467 } 468 return r; 469 } 470 471 static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) 472 { 473 VhostIOVATree *tree = v->shared->iova_tree; 474 DMAMap needle = { 475 /* 476 * No need to specify size or to look for more translations since 477 * this contiguous chunk was allocated by us. 478 */ 479 .translated_addr = (hwaddr)(uintptr_t)addr, 480 }; 481 const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle); 482 int r; 483 484 if (unlikely(!map)) { 485 error_report("Cannot locate expected map"); 486 return; 487 } 488 489 r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, map->iova, 490 map->size + 1); 491 if (unlikely(r != 0)) { 492 error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); 493 } 494 495 vhost_iova_tree_remove(tree, *map); 496 } 497 498 /** Map CVQ buffer. */ 499 static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, 500 bool write) 501 { 502 DMAMap map = {}; 503 int r; 504 505 map.translated_addr = (hwaddr)(uintptr_t)buf; 506 map.size = size - 1; 507 map.perm = write ? IOMMU_RW : IOMMU_RO, 508 r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &map); 509 if (unlikely(r != IOVA_OK)) { 510 error_report("Cannot map injected element"); 511 return r; 512 } 513 514 r = vhost_vdpa_dma_map(v->shared, v->address_space_id, map.iova, 515 vhost_vdpa_net_cvq_cmd_page_len(), buf, !write); 516 if (unlikely(r < 0)) { 517 goto dma_map_err; 518 } 519 520 return 0; 521 522 dma_map_err: 523 vhost_iova_tree_remove(v->shared->iova_tree, map); 524 return r; 525 } 526 527 static int vhost_vdpa_net_cvq_start(NetClientState *nc) 528 { 529 VhostVDPAState *s, *s0; 530 struct vhost_vdpa *v; 531 int64_t cvq_group; 532 int r; 533 Error *err = NULL; 534 535 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 536 537 s = DO_UPCAST(VhostVDPAState, nc, nc); 538 v = &s->vhost_vdpa; 539 540 s0 = vhost_vdpa_net_first_nc_vdpa(s); 541 v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled; 542 s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; 543 544 if (v->shared->shadow_data) { 545 /* SVQ is already configured for all virtqueues */ 546 goto out; 547 } 548 549 /* 550 * If we early return in these cases SVQ will not be enabled. The migration 551 * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. 552 */ 553 if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { 554 return 0; 555 } 556 557 if (!s->cvq_isolated) { 558 return 0; 559 } 560 561 cvq_group = vhost_vdpa_get_vring_group(v->shared->device_fd, 562 v->dev->vq_index_end - 1, 563 &err); 564 if (unlikely(cvq_group < 0)) { 565 error_report_err(err); 566 return cvq_group; 567 } 568 569 r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); 570 if (unlikely(r < 0)) { 571 return r; 572 } 573 574 v->shadow_vqs_enabled = true; 575 s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; 576 577 out: 578 if (!s->vhost_vdpa.shadow_vqs_enabled) { 579 return 0; 580 } 581 582 /* 583 * If other vhost_vdpa already have an iova_tree, reuse it for simplicity, 584 * whether CVQ shares ASID with guest or not, because: 585 * - Memory listener need access to guest's memory addresses allocated in 586 * the IOVA tree. 587 * - There should be plenty of IOVA address space for both ASID not to 588 * worry about collisions between them. Guest's translations are still 589 * validated with virtio virtqueue_pop so there is no risk for the guest 590 * to access memory that it shouldn't. 591 * 592 * To allocate a iova tree per ASID is doable but it complicates the code 593 * and it is not worth it for the moment. 594 */ 595 if (!v->shared->iova_tree) { 596 v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, 597 v->shared->iova_range.last); 598 } 599 600 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, 601 vhost_vdpa_net_cvq_cmd_page_len(), false); 602 if (unlikely(r < 0)) { 603 return r; 604 } 605 606 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status, 607 vhost_vdpa_net_cvq_cmd_page_len(), true); 608 if (unlikely(r < 0)) { 609 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); 610 } 611 612 return r; 613 } 614 615 static void vhost_vdpa_net_cvq_stop(NetClientState *nc) 616 { 617 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 618 619 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 620 621 if (s->vhost_vdpa.shadow_vqs_enabled) { 622 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); 623 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); 624 } 625 626 vhost_vdpa_net_client_stop(nc); 627 } 628 629 static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, 630 const struct iovec *out_sg, size_t out_num, 631 const struct iovec *in_sg, size_t in_num) 632 { 633 VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); 634 int r; 635 636 r = vhost_svq_add(svq, out_sg, out_num, in_sg, in_num, NULL); 637 if (unlikely(r != 0)) { 638 if (unlikely(r == -ENOSPC)) { 639 qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", 640 __func__); 641 } 642 } 643 644 return r; 645 } 646 647 /* 648 * Convenience wrapper to poll SVQ for multiple control commands. 649 * 650 * Caller should hold the BQL when invoking this function, and should take 651 * the answer before SVQ pulls by itself when BQL is released. 652 */ 653 static ssize_t vhost_vdpa_net_svq_poll(VhostVDPAState *s, size_t cmds_in_flight) 654 { 655 VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); 656 return vhost_svq_poll(svq, cmds_in_flight); 657 } 658 659 static void vhost_vdpa_net_load_cursor_reset(VhostVDPAState *s, 660 struct iovec *out_cursor, 661 struct iovec *in_cursor) 662 { 663 /* reset the cursor of the output buffer for the device */ 664 out_cursor->iov_base = s->cvq_cmd_out_buffer; 665 out_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len(); 666 667 /* reset the cursor of the in buffer for the device */ 668 in_cursor->iov_base = s->status; 669 in_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len(); 670 } 671 672 /* 673 * Poll SVQ for multiple pending control commands and check the device's ack. 674 * 675 * Caller should hold the BQL when invoking this function. 676 * 677 * @s: The VhostVDPAState 678 * @len: The length of the pending status shadow buffer 679 */ 680 static ssize_t vhost_vdpa_net_svq_flush(VhostVDPAState *s, size_t len) 681 { 682 /* device uses a one-byte length ack for each control command */ 683 ssize_t dev_written = vhost_vdpa_net_svq_poll(s, len); 684 if (unlikely(dev_written != len)) { 685 return -EIO; 686 } 687 688 /* check the device's ack */ 689 for (int i = 0; i < len; ++i) { 690 if (s->status[i] != VIRTIO_NET_OK) { 691 return -EIO; 692 } 693 } 694 return 0; 695 } 696 697 static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, 698 struct iovec *out_cursor, 699 struct iovec *in_cursor, uint8_t class, 700 uint8_t cmd, const struct iovec *data_sg, 701 size_t data_num) 702 { 703 const struct virtio_net_ctrl_hdr ctrl = { 704 .class = class, 705 .cmd = cmd, 706 }; 707 size_t data_size = iov_size(data_sg, data_num), cmd_size; 708 struct iovec out, in; 709 ssize_t r; 710 unsigned dummy_cursor_iov_cnt; 711 VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); 712 713 assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); 714 cmd_size = sizeof(ctrl) + data_size; 715 if (vhost_svq_available_slots(svq) < 2 || 716 iov_size(out_cursor, 1) < cmd_size) { 717 /* 718 * It is time to flush all pending control commands if SVQ is full 719 * or control commands shadow buffers are full. 720 * 721 * We can poll here since we've had BQL from the time 722 * we sent the descriptor. 723 */ 724 r = vhost_vdpa_net_svq_flush(s, in_cursor->iov_base - 725 (void *)s->status); 726 if (unlikely(r < 0)) { 727 return r; 728 } 729 730 vhost_vdpa_net_load_cursor_reset(s, out_cursor, in_cursor); 731 } 732 733 /* pack the CVQ command header */ 734 iov_from_buf(out_cursor, 1, 0, &ctrl, sizeof(ctrl)); 735 /* pack the CVQ command command-specific-data */ 736 iov_to_buf(data_sg, data_num, 0, 737 out_cursor->iov_base + sizeof(ctrl), data_size); 738 739 /* extract the required buffer from the cursor for output */ 740 iov_copy(&out, 1, out_cursor, 1, 0, cmd_size); 741 /* extract the required buffer from the cursor for input */ 742 iov_copy(&in, 1, in_cursor, 1, 0, sizeof(*s->status)); 743 744 r = vhost_vdpa_net_cvq_add(s, &out, 1, &in, 1); 745 if (unlikely(r < 0)) { 746 return r; 747 } 748 749 /* iterate the cursors */ 750 dummy_cursor_iov_cnt = 1; 751 iov_discard_front(&out_cursor, &dummy_cursor_iov_cnt, cmd_size); 752 dummy_cursor_iov_cnt = 1; 753 iov_discard_front(&in_cursor, &dummy_cursor_iov_cnt, sizeof(*s->status)); 754 755 return 0; 756 } 757 758 static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n, 759 struct iovec *out_cursor, 760 struct iovec *in_cursor) 761 { 762 if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) { 763 const struct iovec data = { 764 .iov_base = (void *)n->mac, 765 .iov_len = sizeof(n->mac), 766 }; 767 ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 768 VIRTIO_NET_CTRL_MAC, 769 VIRTIO_NET_CTRL_MAC_ADDR_SET, 770 &data, 1); 771 if (unlikely(r < 0)) { 772 return r; 773 } 774 } 775 776 /* 777 * According to VirtIO standard, "The device MUST have an 778 * empty MAC filtering table on reset.". 779 * 780 * Therefore, there is no need to send this CVQ command if the 781 * driver also sets an empty MAC filter table, which aligns with 782 * the device's defaults. 783 * 784 * Note that the device's defaults can mismatch the driver's 785 * configuration only at live migration. 786 */ 787 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX) || 788 n->mac_table.in_use == 0) { 789 return 0; 790 } 791 792 uint32_t uni_entries = n->mac_table.first_multi, 793 uni_macs_size = uni_entries * ETH_ALEN, 794 mul_entries = n->mac_table.in_use - uni_entries, 795 mul_macs_size = mul_entries * ETH_ALEN; 796 struct virtio_net_ctrl_mac uni = { 797 .entries = cpu_to_le32(uni_entries), 798 }; 799 struct virtio_net_ctrl_mac mul = { 800 .entries = cpu_to_le32(mul_entries), 801 }; 802 const struct iovec data[] = { 803 { 804 .iov_base = &uni, 805 .iov_len = sizeof(uni), 806 }, { 807 .iov_base = n->mac_table.macs, 808 .iov_len = uni_macs_size, 809 }, { 810 .iov_base = &mul, 811 .iov_len = sizeof(mul), 812 }, { 813 .iov_base = &n->mac_table.macs[uni_macs_size], 814 .iov_len = mul_macs_size, 815 }, 816 }; 817 ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 818 VIRTIO_NET_CTRL_MAC, 819 VIRTIO_NET_CTRL_MAC_TABLE_SET, 820 data, ARRAY_SIZE(data)); 821 if (unlikely(r < 0)) { 822 return r; 823 } 824 825 return 0; 826 } 827 828 static int vhost_vdpa_net_load_rss(VhostVDPAState *s, const VirtIONet *n, 829 struct iovec *out_cursor, 830 struct iovec *in_cursor, bool do_rss) 831 { 832 struct virtio_net_rss_config cfg = {}; 833 ssize_t r; 834 g_autofree uint16_t *table = NULL; 835 836 /* 837 * According to VirtIO standard, "Initially the device has all hash 838 * types disabled and reports only VIRTIO_NET_HASH_REPORT_NONE.". 839 * 840 * Therefore, there is no need to send this CVQ command if the 841 * driver disables the all hash types, which aligns with 842 * the device's defaults. 843 * 844 * Note that the device's defaults can mismatch the driver's 845 * configuration only at live migration. 846 */ 847 if (!n->rss_data.enabled || 848 n->rss_data.hash_types == VIRTIO_NET_HASH_REPORT_NONE) { 849 return 0; 850 } 851 852 table = g_malloc_n(n->rss_data.indirections_len, 853 sizeof(n->rss_data.indirections_table[0])); 854 cfg.hash_types = cpu_to_le32(n->rss_data.hash_types); 855 856 if (do_rss) { 857 /* 858 * According to VirtIO standard, "Number of entries in indirection_table 859 * is (indirection_table_mask + 1)". 860 */ 861 cfg.indirection_table_mask = cpu_to_le16(n->rss_data.indirections_len - 862 1); 863 cfg.unclassified_queue = cpu_to_le16(n->rss_data.default_queue); 864 for (int i = 0; i < n->rss_data.indirections_len; ++i) { 865 table[i] = cpu_to_le16(n->rss_data.indirections_table[i]); 866 } 867 cfg.max_tx_vq = cpu_to_le16(n->curr_queue_pairs); 868 } else { 869 /* 870 * According to VirtIO standard, "Field reserved MUST contain zeroes. 871 * It is defined to make the structure to match the layout of 872 * virtio_net_rss_config structure, defined in 5.1.6.5.7.". 873 * 874 * Therefore, we need to zero the fields in 875 * struct virtio_net_rss_config, which corresponds to the 876 * `reserved` field in struct virtio_net_hash_config. 877 * 878 * Note that all other fields are zeroed at their definitions, 879 * except for the `indirection_table` field, where the actual data 880 * is stored in the `table` variable to ensure compatibility 881 * with RSS case. Therefore, we need to zero the `table` variable here. 882 */ 883 table[0] = 0; 884 } 885 886 /* 887 * Considering that virtio_net_handle_rss() currently does not restore 888 * the hash key length parsed from the CVQ command sent from the guest 889 * into n->rss_data and uses the maximum key length in other code, so 890 * we also employ the maximum key length here. 891 */ 892 cfg.hash_key_length = sizeof(n->rss_data.key); 893 894 const struct iovec data[] = { 895 { 896 .iov_base = &cfg, 897 .iov_len = offsetof(struct virtio_net_rss_config, 898 indirection_table), 899 }, { 900 .iov_base = table, 901 .iov_len = n->rss_data.indirections_len * 902 sizeof(n->rss_data.indirections_table[0]), 903 }, { 904 .iov_base = &cfg.max_tx_vq, 905 .iov_len = offsetof(struct virtio_net_rss_config, hash_key_data) - 906 offsetof(struct virtio_net_rss_config, max_tx_vq), 907 }, { 908 .iov_base = (void *)n->rss_data.key, 909 .iov_len = sizeof(n->rss_data.key), 910 } 911 }; 912 913 r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 914 VIRTIO_NET_CTRL_MQ, 915 do_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG : 916 VIRTIO_NET_CTRL_MQ_HASH_CONFIG, 917 data, ARRAY_SIZE(data)); 918 if (unlikely(r < 0)) { 919 return r; 920 } 921 922 return 0; 923 } 924 925 static int vhost_vdpa_net_load_mq(VhostVDPAState *s, 926 const VirtIONet *n, 927 struct iovec *out_cursor, 928 struct iovec *in_cursor) 929 { 930 struct virtio_net_ctrl_mq mq; 931 ssize_t r; 932 933 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) { 934 return 0; 935 } 936 937 mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs); 938 const struct iovec data = { 939 .iov_base = &mq, 940 .iov_len = sizeof(mq), 941 }; 942 r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 943 VIRTIO_NET_CTRL_MQ, 944 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, 945 &data, 1); 946 if (unlikely(r < 0)) { 947 return r; 948 } 949 950 if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_RSS)) { 951 /* load the receive-side scaling state */ 952 r = vhost_vdpa_net_load_rss(s, n, out_cursor, in_cursor, true); 953 if (unlikely(r < 0)) { 954 return r; 955 } 956 } else if (virtio_vdev_has_feature(&n->parent_obj, 957 VIRTIO_NET_F_HASH_REPORT)) { 958 /* load the hash calculation state */ 959 r = vhost_vdpa_net_load_rss(s, n, out_cursor, in_cursor, false); 960 if (unlikely(r < 0)) { 961 return r; 962 } 963 } 964 965 return 0; 966 } 967 968 static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, 969 const VirtIONet *n, 970 struct iovec *out_cursor, 971 struct iovec *in_cursor) 972 { 973 uint64_t offloads; 974 ssize_t r; 975 976 if (!virtio_vdev_has_feature(&n->parent_obj, 977 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { 978 return 0; 979 } 980 981 if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) { 982 /* 983 * According to VirtIO standard, "Upon feature negotiation 984 * corresponding offload gets enabled to preserve 985 * backward compatibility.". 986 * 987 * Therefore, there is no need to send this CVQ command if the 988 * driver also enables all supported offloads, which aligns with 989 * the device's defaults. 990 * 991 * Note that the device's defaults can mismatch the driver's 992 * configuration only at live migration. 993 */ 994 return 0; 995 } 996 997 offloads = cpu_to_le64(n->curr_guest_offloads); 998 const struct iovec data = { 999 .iov_base = &offloads, 1000 .iov_len = sizeof(offloads), 1001 }; 1002 r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 1003 VIRTIO_NET_CTRL_GUEST_OFFLOADS, 1004 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, 1005 &data, 1); 1006 if (unlikely(r < 0)) { 1007 return r; 1008 } 1009 1010 return 0; 1011 } 1012 1013 static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s, 1014 struct iovec *out_cursor, 1015 struct iovec *in_cursor, 1016 uint8_t cmd, 1017 uint8_t on) 1018 { 1019 const struct iovec data = { 1020 .iov_base = &on, 1021 .iov_len = sizeof(on), 1022 }; 1023 ssize_t r; 1024 1025 r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 1026 VIRTIO_NET_CTRL_RX, cmd, &data, 1); 1027 if (unlikely(r < 0)) { 1028 return r; 1029 } 1030 1031 return 0; 1032 } 1033 1034 static int vhost_vdpa_net_load_rx(VhostVDPAState *s, 1035 const VirtIONet *n, 1036 struct iovec *out_cursor, 1037 struct iovec *in_cursor) 1038 { 1039 ssize_t r; 1040 1041 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX)) { 1042 return 0; 1043 } 1044 1045 /* 1046 * According to virtio_net_reset(), device turns promiscuous mode 1047 * on by default. 1048 * 1049 * Additionally, according to VirtIO standard, "Since there are 1050 * no guarantees, it can use a hash filter or silently switch to 1051 * allmulti or promiscuous mode if it is given too many addresses.". 1052 * QEMU marks `n->mac_table.uni_overflow` if guest sets too many 1053 * non-multicast MAC addresses, indicating that promiscuous mode 1054 * should be enabled. 1055 * 1056 * Therefore, QEMU should only send this CVQ command if the 1057 * `n->mac_table.uni_overflow` is not marked and `n->promisc` is off, 1058 * which sets promiscuous mode on, different from the device's defaults. 1059 * 1060 * Note that the device's defaults can mismatch the driver's 1061 * configuration only at live migration. 1062 */ 1063 if (!n->mac_table.uni_overflow && !n->promisc) { 1064 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1065 VIRTIO_NET_CTRL_RX_PROMISC, 0); 1066 if (unlikely(r < 0)) { 1067 return r; 1068 } 1069 } 1070 1071 /* 1072 * According to virtio_net_reset(), device turns all-multicast mode 1073 * off by default. 1074 * 1075 * According to VirtIO standard, "Since there are no guarantees, 1076 * it can use a hash filter or silently switch to allmulti or 1077 * promiscuous mode if it is given too many addresses.". QEMU marks 1078 * `n->mac_table.multi_overflow` if guest sets too many 1079 * non-multicast MAC addresses. 1080 * 1081 * Therefore, QEMU should only send this CVQ command if the 1082 * `n->mac_table.multi_overflow` is marked or `n->allmulti` is on, 1083 * which sets all-multicast mode on, different from the device's defaults. 1084 * 1085 * Note that the device's defaults can mismatch the driver's 1086 * configuration only at live migration. 1087 */ 1088 if (n->mac_table.multi_overflow || n->allmulti) { 1089 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1090 VIRTIO_NET_CTRL_RX_ALLMULTI, 1); 1091 if (unlikely(r < 0)) { 1092 return r; 1093 } 1094 } 1095 1096 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX_EXTRA)) { 1097 return 0; 1098 } 1099 1100 /* 1101 * According to virtio_net_reset(), device turns all-unicast mode 1102 * off by default. 1103 * 1104 * Therefore, QEMU should only send this CVQ command if the driver 1105 * sets all-unicast mode on, different from the device's defaults. 1106 * 1107 * Note that the device's defaults can mismatch the driver's 1108 * configuration only at live migration. 1109 */ 1110 if (n->alluni) { 1111 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1112 VIRTIO_NET_CTRL_RX_ALLUNI, 1); 1113 if (r < 0) { 1114 return r; 1115 } 1116 } 1117 1118 /* 1119 * According to virtio_net_reset(), device turns non-multicast mode 1120 * off by default. 1121 * 1122 * Therefore, QEMU should only send this CVQ command if the driver 1123 * sets non-multicast mode on, different from the device's defaults. 1124 * 1125 * Note that the device's defaults can mismatch the driver's 1126 * configuration only at live migration. 1127 */ 1128 if (n->nomulti) { 1129 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1130 VIRTIO_NET_CTRL_RX_NOMULTI, 1); 1131 if (r < 0) { 1132 return r; 1133 } 1134 } 1135 1136 /* 1137 * According to virtio_net_reset(), device turns non-unicast mode 1138 * off by default. 1139 * 1140 * Therefore, QEMU should only send this CVQ command if the driver 1141 * sets non-unicast mode on, different from the device's defaults. 1142 * 1143 * Note that the device's defaults can mismatch the driver's 1144 * configuration only at live migration. 1145 */ 1146 if (n->nouni) { 1147 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1148 VIRTIO_NET_CTRL_RX_NOUNI, 1); 1149 if (r < 0) { 1150 return r; 1151 } 1152 } 1153 1154 /* 1155 * According to virtio_net_reset(), device turns non-broadcast mode 1156 * off by default. 1157 * 1158 * Therefore, QEMU should only send this CVQ command if the driver 1159 * sets non-broadcast mode on, different from the device's defaults. 1160 * 1161 * Note that the device's defaults can mismatch the driver's 1162 * configuration only at live migration. 1163 */ 1164 if (n->nobcast) { 1165 r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, 1166 VIRTIO_NET_CTRL_RX_NOBCAST, 1); 1167 if (r < 0) { 1168 return r; 1169 } 1170 } 1171 1172 return 0; 1173 } 1174 1175 static int vhost_vdpa_net_load_single_vlan(VhostVDPAState *s, 1176 const VirtIONet *n, 1177 struct iovec *out_cursor, 1178 struct iovec *in_cursor, 1179 uint16_t vid) 1180 { 1181 const struct iovec data = { 1182 .iov_base = &vid, 1183 .iov_len = sizeof(vid), 1184 }; 1185 ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, 1186 VIRTIO_NET_CTRL_VLAN, 1187 VIRTIO_NET_CTRL_VLAN_ADD, 1188 &data, 1); 1189 if (unlikely(r < 0)) { 1190 return r; 1191 } 1192 1193 return 0; 1194 } 1195 1196 static int vhost_vdpa_net_load_vlan(VhostVDPAState *s, 1197 const VirtIONet *n, 1198 struct iovec *out_cursor, 1199 struct iovec *in_cursor) 1200 { 1201 int r; 1202 1203 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_VLAN)) { 1204 return 0; 1205 } 1206 1207 for (int i = 0; i < MAX_VLAN >> 5; i++) { 1208 for (int j = 0; n->vlans[i] && j <= 0x1f; j++) { 1209 if (n->vlans[i] & (1U << j)) { 1210 r = vhost_vdpa_net_load_single_vlan(s, n, out_cursor, 1211 in_cursor, (i << 5) + j); 1212 if (unlikely(r != 0)) { 1213 return r; 1214 } 1215 } 1216 } 1217 } 1218 1219 return 0; 1220 } 1221 1222 static int vhost_vdpa_net_cvq_load(NetClientState *nc) 1223 { 1224 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); 1225 struct vhost_vdpa *v = &s->vhost_vdpa; 1226 const VirtIONet *n; 1227 int r; 1228 struct iovec out_cursor, in_cursor; 1229 1230 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); 1231 1232 vhost_vdpa_set_vring_ready(v, v->dev->vq_index); 1233 1234 if (v->shadow_vqs_enabled) { 1235 n = VIRTIO_NET(v->dev->vdev); 1236 vhost_vdpa_net_load_cursor_reset(s, &out_cursor, &in_cursor); 1237 r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor); 1238 if (unlikely(r < 0)) { 1239 return r; 1240 } 1241 r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor); 1242 if (unlikely(r)) { 1243 return r; 1244 } 1245 r = vhost_vdpa_net_load_offloads(s, n, &out_cursor, &in_cursor); 1246 if (unlikely(r)) { 1247 return r; 1248 } 1249 r = vhost_vdpa_net_load_rx(s, n, &out_cursor, &in_cursor); 1250 if (unlikely(r)) { 1251 return r; 1252 } 1253 r = vhost_vdpa_net_load_vlan(s, n, &out_cursor, &in_cursor); 1254 if (unlikely(r)) { 1255 return r; 1256 } 1257 1258 /* 1259 * We need to poll and check all pending device's used buffers. 1260 * 1261 * We can poll here since we've had BQL from the time 1262 * we sent the descriptor. 1263 */ 1264 r = vhost_vdpa_net_svq_flush(s, in_cursor.iov_base - (void *)s->status); 1265 if (unlikely(r)) { 1266 return r; 1267 } 1268 } 1269 1270 for (int i = 0; i < v->dev->vq_index; ++i) { 1271 vhost_vdpa_set_vring_ready(v, i); 1272 } 1273 1274 return 0; 1275 } 1276 1277 static NetClientInfo net_vhost_vdpa_cvq_info = { 1278 .type = NET_CLIENT_DRIVER_VHOST_VDPA, 1279 .size = sizeof(VhostVDPAState), 1280 .receive = vhost_vdpa_receive, 1281 .start = vhost_vdpa_net_cvq_start, 1282 .load = vhost_vdpa_net_cvq_load, 1283 .stop = vhost_vdpa_net_cvq_stop, 1284 .cleanup = vhost_vdpa_cleanup, 1285 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, 1286 .has_ufo = vhost_vdpa_has_ufo, 1287 .check_peer_type = vhost_vdpa_check_peer_type, 1288 .set_steering_ebpf = vhost_vdpa_set_steering_ebpf, 1289 }; 1290 1291 /* 1292 * Forward the excessive VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command to 1293 * vdpa device. 1294 * 1295 * Considering that QEMU cannot send the entire filter table to the 1296 * vdpa device, it should send the VIRTIO_NET_CTRL_RX_PROMISC CVQ 1297 * command to enable promiscuous mode to receive all packets, 1298 * according to VirtIO standard, "Since there are no guarantees, 1299 * it can use a hash filter or silently switch to allmulti or 1300 * promiscuous mode if it is given too many addresses.". 1301 * 1302 * Since QEMU ignores MAC addresses beyond `MAC_TABLE_ENTRIES` and 1303 * marks `n->mac_table.x_overflow` accordingly, it should have 1304 * the same effect on the device model to receive 1305 * (`MAC_TABLE_ENTRIES` + 1) or more non-multicast MAC addresses. 1306 * The same applies to multicast MAC addresses. 1307 * 1308 * Therefore, QEMU can provide the device model with a fake 1309 * VIRTIO_NET_CTRL_MAC_TABLE_SET command with (`MAC_TABLE_ENTRIES` + 1) 1310 * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1) multicast 1311 * MAC addresses. This ensures that the device model marks 1312 * `n->mac_table.uni_overflow` and `n->mac_table.multi_overflow`, 1313 * allowing all packets to be received, which aligns with the 1314 * state of the vdpa device. 1315 */ 1316 static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s, 1317 VirtQueueElement *elem, 1318 struct iovec *out, 1319 const struct iovec *in) 1320 { 1321 struct virtio_net_ctrl_mac mac_data, *mac_ptr; 1322 struct virtio_net_ctrl_hdr *hdr_ptr; 1323 uint32_t cursor; 1324 ssize_t r; 1325 uint8_t on = 1; 1326 1327 /* parse the non-multicast MAC address entries from CVQ command */ 1328 cursor = sizeof(*hdr_ptr); 1329 r = iov_to_buf(elem->out_sg, elem->out_num, cursor, 1330 &mac_data, sizeof(mac_data)); 1331 if (unlikely(r != sizeof(mac_data))) { 1332 /* 1333 * If the CVQ command is invalid, we should simulate the vdpa device 1334 * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command 1335 */ 1336 *s->status = VIRTIO_NET_ERR; 1337 return sizeof(*s->status); 1338 } 1339 cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN; 1340 1341 /* parse the multicast MAC address entries from CVQ command */ 1342 r = iov_to_buf(elem->out_sg, elem->out_num, cursor, 1343 &mac_data, sizeof(mac_data)); 1344 if (r != sizeof(mac_data)) { 1345 /* 1346 * If the CVQ command is invalid, we should simulate the vdpa device 1347 * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command 1348 */ 1349 *s->status = VIRTIO_NET_ERR; 1350 return sizeof(*s->status); 1351 } 1352 cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN; 1353 1354 /* validate the CVQ command */ 1355 if (iov_size(elem->out_sg, elem->out_num) != cursor) { 1356 /* 1357 * If the CVQ command is invalid, we should simulate the vdpa device 1358 * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command 1359 */ 1360 *s->status = VIRTIO_NET_ERR; 1361 return sizeof(*s->status); 1362 } 1363 1364 /* 1365 * According to VirtIO standard, "Since there are no guarantees, 1366 * it can use a hash filter or silently switch to allmulti or 1367 * promiscuous mode if it is given too many addresses.". 1368 * 1369 * Therefore, considering that QEMU is unable to send the entire 1370 * filter table to the vdpa device, it should send the 1371 * VIRTIO_NET_CTRL_RX_PROMISC CVQ command to enable promiscuous mode 1372 */ 1373 hdr_ptr = out->iov_base; 1374 out->iov_len = sizeof(*hdr_ptr) + sizeof(on); 1375 1376 hdr_ptr->class = VIRTIO_NET_CTRL_RX; 1377 hdr_ptr->cmd = VIRTIO_NET_CTRL_RX_PROMISC; 1378 iov_from_buf(out, 1, sizeof(*hdr_ptr), &on, sizeof(on)); 1379 r = vhost_vdpa_net_cvq_add(s, out, 1, in, 1); 1380 if (unlikely(r < 0)) { 1381 return r; 1382 } 1383 1384 /* 1385 * We can poll here since we've had BQL from the time 1386 * we sent the descriptor. 1387 */ 1388 r = vhost_vdpa_net_svq_poll(s, 1); 1389 if (unlikely(r < sizeof(*s->status))) { 1390 return r; 1391 } 1392 if (*s->status != VIRTIO_NET_OK) { 1393 return sizeof(*s->status); 1394 } 1395 1396 /* 1397 * QEMU should also send a fake VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ 1398 * command to the device model, including (`MAC_TABLE_ENTRIES` + 1) 1399 * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1) 1400 * multicast MAC addresses. 1401 * 1402 * By doing so, the device model can mark `n->mac_table.uni_overflow` 1403 * and `n->mac_table.multi_overflow`, enabling all packets to be 1404 * received, which aligns with the state of the vdpa device. 1405 */ 1406 cursor = 0; 1407 uint32_t fake_uni_entries = MAC_TABLE_ENTRIES + 1, 1408 fake_mul_entries = MAC_TABLE_ENTRIES + 1, 1409 fake_cvq_size = sizeof(struct virtio_net_ctrl_hdr) + 1410 sizeof(mac_data) + fake_uni_entries * ETH_ALEN + 1411 sizeof(mac_data) + fake_mul_entries * ETH_ALEN; 1412 1413 assert(fake_cvq_size < vhost_vdpa_net_cvq_cmd_page_len()); 1414 out->iov_len = fake_cvq_size; 1415 1416 /* pack the header for fake CVQ command */ 1417 hdr_ptr = out->iov_base + cursor; 1418 hdr_ptr->class = VIRTIO_NET_CTRL_MAC; 1419 hdr_ptr->cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; 1420 cursor += sizeof(*hdr_ptr); 1421 1422 /* 1423 * Pack the non-multicast MAC addresses part for fake CVQ command. 1424 * 1425 * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC 1426 * addresses provided in CVQ command. Therefore, only the entries 1427 * field need to be prepared in the CVQ command. 1428 */ 1429 mac_ptr = out->iov_base + cursor; 1430 mac_ptr->entries = cpu_to_le32(fake_uni_entries); 1431 cursor += sizeof(*mac_ptr) + fake_uni_entries * ETH_ALEN; 1432 1433 /* 1434 * Pack the multicast MAC addresses part for fake CVQ command. 1435 * 1436 * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC 1437 * addresses provided in CVQ command. Therefore, only the entries 1438 * field need to be prepared in the CVQ command. 1439 */ 1440 mac_ptr = out->iov_base + cursor; 1441 mac_ptr->entries = cpu_to_le32(fake_mul_entries); 1442 1443 /* 1444 * Simulating QEMU poll a vdpa device used buffer 1445 * for VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command 1446 */ 1447 return sizeof(*s->status); 1448 } 1449 1450 /** 1451 * Validate and copy control virtqueue commands. 1452 * 1453 * Following QEMU guidelines, we offer a copy of the buffers to the device to 1454 * prevent TOCTOU bugs. 1455 */ 1456 static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, 1457 VirtQueueElement *elem, 1458 void *opaque) 1459 { 1460 VhostVDPAState *s = opaque; 1461 size_t in_len; 1462 const struct virtio_net_ctrl_hdr *ctrl; 1463 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1464 /* Out buffer sent to both the vdpa device and the device model */ 1465 struct iovec out = { 1466 .iov_base = s->cvq_cmd_out_buffer, 1467 }; 1468 /* in buffer used for device model */ 1469 const struct iovec model_in = { 1470 .iov_base = &status, 1471 .iov_len = sizeof(status), 1472 }; 1473 /* in buffer used for vdpa device */ 1474 const struct iovec vdpa_in = { 1475 .iov_base = s->status, 1476 .iov_len = sizeof(*s->status), 1477 }; 1478 ssize_t dev_written = -EINVAL; 1479 1480 out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, 1481 s->cvq_cmd_out_buffer, 1482 vhost_vdpa_net_cvq_cmd_page_len()); 1483 1484 ctrl = s->cvq_cmd_out_buffer; 1485 if (ctrl->class == VIRTIO_NET_CTRL_ANNOUNCE) { 1486 /* 1487 * Guest announce capability is emulated by qemu, so don't forward to 1488 * the device. 1489 */ 1490 dev_written = sizeof(status); 1491 *s->status = VIRTIO_NET_OK; 1492 } else if (unlikely(ctrl->class == VIRTIO_NET_CTRL_MAC && 1493 ctrl->cmd == VIRTIO_NET_CTRL_MAC_TABLE_SET && 1494 iov_size(elem->out_sg, elem->out_num) > out.iov_len)) { 1495 /* 1496 * Due to the size limitation of the out buffer sent to the vdpa device, 1497 * which is determined by vhost_vdpa_net_cvq_cmd_page_len(), excessive 1498 * MAC addresses set by the driver for the filter table can cause 1499 * truncation of the CVQ command in QEMU. As a result, the vdpa device 1500 * rejects the flawed CVQ command. 1501 * 1502 * Therefore, QEMU must handle this situation instead of sending 1503 * the CVQ command directly. 1504 */ 1505 dev_written = vhost_vdpa_net_excessive_mac_filter_cvq_add(s, elem, 1506 &out, &vdpa_in); 1507 if (unlikely(dev_written < 0)) { 1508 goto out; 1509 } 1510 } else { 1511 ssize_t r; 1512 r = vhost_vdpa_net_cvq_add(s, &out, 1, &vdpa_in, 1); 1513 if (unlikely(r < 0)) { 1514 dev_written = r; 1515 goto out; 1516 } 1517 1518 /* 1519 * We can poll here since we've had BQL from the time 1520 * we sent the descriptor. 1521 */ 1522 dev_written = vhost_vdpa_net_svq_poll(s, 1); 1523 } 1524 1525 if (unlikely(dev_written < sizeof(status))) { 1526 error_report("Insufficient written data (%zu)", dev_written); 1527 goto out; 1528 } 1529 1530 if (*s->status != VIRTIO_NET_OK) { 1531 goto out; 1532 } 1533 1534 status = VIRTIO_NET_ERR; 1535 virtio_net_handle_ctrl_iov(svq->vdev, &model_in, 1, &out, 1); 1536 if (status != VIRTIO_NET_OK) { 1537 error_report("Bad CVQ processing in model"); 1538 } 1539 1540 out: 1541 in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, 1542 sizeof(status)); 1543 if (unlikely(in_len < sizeof(status))) { 1544 error_report("Bad device CVQ written length"); 1545 } 1546 vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); 1547 /* 1548 * `elem` belongs to vhost_vdpa_net_handle_ctrl_avail() only when 1549 * the function successfully forwards the CVQ command, indicated 1550 * by a non-negative value of `dev_written`. Otherwise, it still 1551 * belongs to SVQ. 1552 * This function should only free the `elem` when it owns. 1553 */ 1554 if (dev_written >= 0) { 1555 g_free(elem); 1556 } 1557 return dev_written < 0 ? dev_written : 0; 1558 } 1559 1560 static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { 1561 .avail_handler = vhost_vdpa_net_handle_ctrl_avail, 1562 }; 1563 1564 /** 1565 * Probe if CVQ is isolated 1566 * 1567 * @device_fd The vdpa device fd 1568 * @features Features offered by the device. 1569 * @cvq_index The control vq pair index 1570 * 1571 * Returns <0 in case of failure, 0 if false and 1 if true. 1572 */ 1573 static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, 1574 int cvq_index, Error **errp) 1575 { 1576 uint64_t backend_features; 1577 int64_t cvq_group; 1578 uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | 1579 VIRTIO_CONFIG_S_DRIVER; 1580 int r; 1581 1582 ERRP_GUARD(); 1583 1584 r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); 1585 if (unlikely(r < 0)) { 1586 error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); 1587 return r; 1588 } 1589 1590 if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { 1591 return 0; 1592 } 1593 1594 r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); 1595 if (unlikely(r)) { 1596 error_setg_errno(errp, -r, "Cannot set device status"); 1597 goto out; 1598 } 1599 1600 r = ioctl(device_fd, VHOST_SET_FEATURES, &features); 1601 if (unlikely(r)) { 1602 error_setg_errno(errp, -r, "Cannot set features"); 1603 goto out; 1604 } 1605 1606 status |= VIRTIO_CONFIG_S_FEATURES_OK; 1607 r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); 1608 if (unlikely(r)) { 1609 error_setg_errno(errp, -r, "Cannot set device status"); 1610 goto out; 1611 } 1612 1613 cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); 1614 if (unlikely(cvq_group < 0)) { 1615 if (cvq_group != -ENOTSUP) { 1616 r = cvq_group; 1617 goto out; 1618 } 1619 1620 /* 1621 * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend 1622 * support ASID even if the parent driver does not. The CVQ cannot be 1623 * isolated in this case. 1624 */ 1625 error_free(*errp); 1626 *errp = NULL; 1627 r = 0; 1628 goto out; 1629 } 1630 1631 for (int i = 0; i < cvq_index; ++i) { 1632 int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); 1633 if (unlikely(group < 0)) { 1634 r = group; 1635 goto out; 1636 } 1637 1638 if (group == (int64_t)cvq_group) { 1639 r = 0; 1640 goto out; 1641 } 1642 } 1643 1644 r = 1; 1645 1646 out: 1647 status = 0; 1648 ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); 1649 return r; 1650 } 1651 1652 static NetClientState *net_vhost_vdpa_init(NetClientState *peer, 1653 const char *device, 1654 const char *name, 1655 int vdpa_device_fd, 1656 int queue_pair_index, 1657 int nvqs, 1658 bool is_datapath, 1659 bool svq, 1660 struct vhost_vdpa_iova_range iova_range, 1661 uint64_t features, 1662 VhostVDPAShared *shared, 1663 Error **errp) 1664 { 1665 NetClientState *nc = NULL; 1666 VhostVDPAState *s; 1667 int ret = 0; 1668 assert(name); 1669 int cvq_isolated = 0; 1670 1671 if (is_datapath) { 1672 nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, 1673 name); 1674 } else { 1675 cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, 1676 queue_pair_index * 2, 1677 errp); 1678 if (unlikely(cvq_isolated < 0)) { 1679 return NULL; 1680 } 1681 1682 nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, 1683 device, name); 1684 } 1685 qemu_set_info_str(nc, TYPE_VHOST_VDPA); 1686 s = DO_UPCAST(VhostVDPAState, nc, nc); 1687 1688 s->vhost_vdpa.index = queue_pair_index; 1689 s->always_svq = svq; 1690 s->migration_state.notify = NULL; 1691 s->vhost_vdpa.shadow_vqs_enabled = svq; 1692 if (queue_pair_index == 0) { 1693 vhost_vdpa_net_valid_svq_features(features, 1694 &s->vhost_vdpa.migration_blocker); 1695 s->vhost_vdpa.shared = g_new0(VhostVDPAShared, 1); 1696 s->vhost_vdpa.shared->device_fd = vdpa_device_fd; 1697 s->vhost_vdpa.shared->iova_range = iova_range; 1698 s->vhost_vdpa.shared->shadow_data = svq; 1699 } else if (!is_datapath) { 1700 s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), 1701 PROT_READ | PROT_WRITE, 1702 MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1703 s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), 1704 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 1705 -1, 0); 1706 1707 s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; 1708 s->vhost_vdpa.shadow_vq_ops_opaque = s; 1709 s->cvq_isolated = cvq_isolated; 1710 } 1711 if (queue_pair_index != 0) { 1712 s->vhost_vdpa.shared = shared; 1713 } 1714 1715 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); 1716 if (ret) { 1717 qemu_del_net_client(nc); 1718 return NULL; 1719 } 1720 1721 return nc; 1722 } 1723 1724 static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) 1725 { 1726 int ret = ioctl(fd, VHOST_GET_FEATURES, features); 1727 if (unlikely(ret < 0)) { 1728 error_setg_errno(errp, errno, 1729 "Fail to query features from vhost-vDPA device"); 1730 } 1731 return ret; 1732 } 1733 1734 static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features, 1735 int *has_cvq, Error **errp) 1736 { 1737 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 1738 g_autofree struct vhost_vdpa_config *config = NULL; 1739 __virtio16 *max_queue_pairs; 1740 int ret; 1741 1742 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) { 1743 *has_cvq = 1; 1744 } else { 1745 *has_cvq = 0; 1746 } 1747 1748 if (features & (1 << VIRTIO_NET_F_MQ)) { 1749 config = g_malloc0(config_size + sizeof(*max_queue_pairs)); 1750 config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs); 1751 config->len = sizeof(*max_queue_pairs); 1752 1753 ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config); 1754 if (ret) { 1755 error_setg(errp, "Fail to get config from vhost-vDPA device"); 1756 return -ret; 1757 } 1758 1759 max_queue_pairs = (__virtio16 *)&config->buf; 1760 1761 return lduw_le_p(max_queue_pairs); 1762 } 1763 1764 return 1; 1765 } 1766 1767 int net_init_vhost_vdpa(const Netdev *netdev, const char *name, 1768 NetClientState *peer, Error **errp) 1769 { 1770 const NetdevVhostVDPAOptions *opts; 1771 uint64_t features; 1772 int vdpa_device_fd; 1773 g_autofree NetClientState **ncs = NULL; 1774 struct vhost_vdpa_iova_range iova_range; 1775 NetClientState *nc; 1776 int queue_pairs, r, i = 0, has_cvq = 0; 1777 1778 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); 1779 opts = &netdev->u.vhost_vdpa; 1780 if (!opts->vhostdev && !opts->vhostfd) { 1781 error_setg(errp, 1782 "vhost-vdpa: neither vhostdev= nor vhostfd= was specified"); 1783 return -1; 1784 } 1785 1786 if (opts->vhostdev && opts->vhostfd) { 1787 error_setg(errp, 1788 "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive"); 1789 return -1; 1790 } 1791 1792 if (opts->vhostdev) { 1793 vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); 1794 if (vdpa_device_fd == -1) { 1795 return -errno; 1796 } 1797 } else { 1798 /* has_vhostfd */ 1799 vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp); 1800 if (vdpa_device_fd == -1) { 1801 error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: "); 1802 return -1; 1803 } 1804 } 1805 1806 r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); 1807 if (unlikely(r < 0)) { 1808 goto err; 1809 } 1810 1811 queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, 1812 &has_cvq, errp); 1813 if (queue_pairs < 0) { 1814 qemu_close(vdpa_device_fd); 1815 return queue_pairs; 1816 } 1817 1818 r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); 1819 if (unlikely(r < 0)) { 1820 error_setg(errp, "vhost-vdpa: get iova range failed: %s", 1821 strerror(-r)); 1822 goto err; 1823 } 1824 1825 if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) { 1826 goto err; 1827 } 1828 1829 ncs = g_malloc0(sizeof(*ncs) * queue_pairs); 1830 1831 for (i = 0; i < queue_pairs; i++) { 1832 VhostVDPAShared *shared = NULL; 1833 1834 if (i) { 1835 shared = DO_UPCAST(VhostVDPAState, nc, ncs[0])->vhost_vdpa.shared; 1836 } 1837 ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, 1838 vdpa_device_fd, i, 2, true, opts->x_svq, 1839 iova_range, features, shared, errp); 1840 if (!ncs[i]) 1841 goto err; 1842 } 1843 1844 if (has_cvq) { 1845 VhostVDPAState *s0 = DO_UPCAST(VhostVDPAState, nc, ncs[0]); 1846 VhostVDPAShared *shared = s0->vhost_vdpa.shared; 1847 1848 nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, 1849 vdpa_device_fd, i, 1, false, 1850 opts->x_svq, iova_range, features, shared, 1851 errp); 1852 if (!nc) 1853 goto err; 1854 } 1855 1856 return 0; 1857 1858 err: 1859 if (i) { 1860 for (i--; i >= 0; i--) { 1861 qemu_del_net_client(ncs[i]); 1862 } 1863 } 1864 1865 qemu_close(vdpa_device_fd); 1866 1867 return -1; 1868 } 1869