xref: /qemu/net/vhost-vdpa.c (revision 3e775730)
1 /*
2  * vhost-vdpa.c
3  *
4  * Copyright(c) 2017-2018 Intel Corporation.
5  * Copyright(c) 2020 Red Hat, Inc.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
8  * See the COPYING file in the top-level directory.
9  *
10  */
11 
12 #include "qemu/osdep.h"
13 #include "clients.h"
14 #include "hw/virtio/virtio-net.h"
15 #include "net/vhost_net.h"
16 #include "net/vhost-vdpa.h"
17 #include "hw/virtio/vhost-vdpa.h"
18 #include "qemu/config-file.h"
19 #include "qemu/error-report.h"
20 #include "qemu/log.h"
21 #include "qemu/memalign.h"
22 #include "qemu/option.h"
23 #include "qapi/error.h"
24 #include <linux/vhost.h>
25 #include <sys/ioctl.h>
26 #include <err.h>
27 #include "standard-headers/linux/virtio_net.h"
28 #include "monitor/monitor.h"
29 #include "migration/migration.h"
30 #include "migration/misc.h"
31 #include "hw/virtio/vhost.h"
32 
33 /* Todo:need to add the multiqueue support here */
34 typedef struct VhostVDPAState {
35     NetClientState nc;
36     struct vhost_vdpa vhost_vdpa;
37     NotifierWithReturn migration_state;
38     VHostNetState *vhost_net;
39 
40     /* Control commands shadow buffers */
41     void *cvq_cmd_out_buffer;
42     virtio_net_ctrl_ack *status;
43 
44     /* The device always have SVQ enabled */
45     bool always_svq;
46 
47     /* The device can isolate CVQ in its own ASID */
48     bool cvq_isolated;
49 
50     bool started;
51 } VhostVDPAState;
52 
53 /*
54  * The array is sorted alphabetically in ascending order,
55  * with the exception of VHOST_INVALID_FEATURE_BIT,
56  * which should always be the last entry.
57  */
58 const int vdpa_feature_bits[] = {
59     VIRTIO_F_ANY_LAYOUT,
60     VIRTIO_F_IOMMU_PLATFORM,
61     VIRTIO_F_NOTIFY_ON_EMPTY,
62     VIRTIO_F_RING_PACKED,
63     VIRTIO_F_RING_RESET,
64     VIRTIO_F_VERSION_1,
65     VIRTIO_NET_F_CSUM,
66     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
67     VIRTIO_NET_F_CTRL_MAC_ADDR,
68     VIRTIO_NET_F_CTRL_RX,
69     VIRTIO_NET_F_CTRL_RX_EXTRA,
70     VIRTIO_NET_F_CTRL_VLAN,
71     VIRTIO_NET_F_CTRL_VQ,
72     VIRTIO_NET_F_GSO,
73     VIRTIO_NET_F_GUEST_CSUM,
74     VIRTIO_NET_F_GUEST_ECN,
75     VIRTIO_NET_F_GUEST_TSO4,
76     VIRTIO_NET_F_GUEST_TSO6,
77     VIRTIO_NET_F_GUEST_UFO,
78     VIRTIO_NET_F_GUEST_USO4,
79     VIRTIO_NET_F_GUEST_USO6,
80     VIRTIO_NET_F_HASH_REPORT,
81     VIRTIO_NET_F_HOST_ECN,
82     VIRTIO_NET_F_HOST_TSO4,
83     VIRTIO_NET_F_HOST_TSO6,
84     VIRTIO_NET_F_HOST_UFO,
85     VIRTIO_NET_F_HOST_USO,
86     VIRTIO_NET_F_MQ,
87     VIRTIO_NET_F_MRG_RXBUF,
88     VIRTIO_NET_F_MTU,
89     VIRTIO_NET_F_RSS,
90     VIRTIO_NET_F_STATUS,
91     VIRTIO_RING_F_EVENT_IDX,
92     VIRTIO_RING_F_INDIRECT_DESC,
93 
94     /* VHOST_INVALID_FEATURE_BIT should always be the last entry */
95     VHOST_INVALID_FEATURE_BIT
96 };
97 
98 /** Supported device specific feature bits with SVQ */
99 static const uint64_t vdpa_svq_device_features =
100     BIT_ULL(VIRTIO_NET_F_CSUM) |
101     BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
102     BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) |
103     BIT_ULL(VIRTIO_NET_F_MTU) |
104     BIT_ULL(VIRTIO_NET_F_MAC) |
105     BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
106     BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
107     BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
108     BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
109     BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
110     BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
111     BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
112     BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
113     BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
114     BIT_ULL(VIRTIO_NET_F_STATUS) |
115     BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
116     BIT_ULL(VIRTIO_NET_F_CTRL_RX) |
117     BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |
118     BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) |
119     BIT_ULL(VIRTIO_NET_F_MQ) |
120     BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
121     BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
122     /* VHOST_F_LOG_ALL is exposed by SVQ */
123     BIT_ULL(VHOST_F_LOG_ALL) |
124     BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |
125     BIT_ULL(VIRTIO_NET_F_RSS) |
126     BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
127     BIT_ULL(VIRTIO_NET_F_STANDBY) |
128     BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX);
129 
130 #define VHOST_VDPA_NET_CVQ_ASID 1
131 
132 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
133 {
134     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
135     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
136     return s->vhost_net;
137 }
138 
139 static size_t vhost_vdpa_net_cvq_cmd_len(void)
140 {
141     /*
142      * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
143      * In buffer is always 1 byte, so it should fit here
144      */
145     return sizeof(struct virtio_net_ctrl_hdr) +
146            2 * sizeof(struct virtio_net_ctrl_mac) +
147            MAC_TABLE_ENTRIES * ETH_ALEN;
148 }
149 
150 static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
151 {
152     return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
153 }
154 
155 static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
156 {
157     uint64_t invalid_dev_features =
158         features & ~vdpa_svq_device_features &
159         /* Transport are all accepted at this point */
160         ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
161                          VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
162 
163     if (invalid_dev_features) {
164         error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
165                    invalid_dev_features);
166         return false;
167     }
168 
169     return vhost_svq_valid_features(features, errp);
170 }
171 
172 static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
173 {
174     uint32_t device_id;
175     int ret;
176     struct vhost_dev *hdev;
177 
178     hdev = (struct vhost_dev *)&net->dev;
179     ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
180     if (device_id != VIRTIO_ID_NET) {
181         return -ENOTSUP;
182     }
183     return ret;
184 }
185 
186 static int vhost_vdpa_add(NetClientState *ncs, void *be,
187                           int queue_pair_index, int nvqs)
188 {
189     VhostNetOptions options;
190     struct vhost_net *net = NULL;
191     VhostVDPAState *s;
192     int ret;
193 
194     options.backend_type = VHOST_BACKEND_TYPE_VDPA;
195     assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
196     s = DO_UPCAST(VhostVDPAState, nc, ncs);
197     options.net_backend = ncs;
198     options.opaque      = be;
199     options.busyloop_timeout = 0;
200     options.nvqs = nvqs;
201 
202     net = vhost_net_init(&options);
203     if (!net) {
204         error_report("failed to init vhost_net for queue");
205         goto err_init;
206     }
207     s->vhost_net = net;
208     ret = vhost_vdpa_net_check_device_id(net);
209     if (ret) {
210         goto err_check;
211     }
212     return 0;
213 err_check:
214     vhost_net_cleanup(net);
215     g_free(net);
216 err_init:
217     return -1;
218 }
219 
220 static void vhost_vdpa_cleanup(NetClientState *nc)
221 {
222     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
223 
224     /*
225      * If a peer NIC is attached, do not cleanup anything.
226      * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup()
227      * when the guest is shutting down.
228      */
229     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
230         return;
231     }
232     munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len());
233     munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len());
234     if (s->vhost_net) {
235         vhost_net_cleanup(s->vhost_net);
236         g_free(s->vhost_net);
237         s->vhost_net = NULL;
238     }
239     if (s->vhost_vdpa.index != 0) {
240         return;
241     }
242     qemu_close(s->vhost_vdpa.shared->device_fd);
243     g_free(s->vhost_vdpa.shared);
244 }
245 
246 /** Dummy SetSteeringEBPF to support RSS for vhost-vdpa backend  */
247 static bool vhost_vdpa_set_steering_ebpf(NetClientState *nc, int prog_fd)
248 {
249     return true;
250 }
251 
252 static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
253 {
254     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
255 
256     return true;
257 }
258 
259 static bool vhost_vdpa_has_ufo(NetClientState *nc)
260 {
261     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
262     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
263     uint64_t features = 0;
264     features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
265     features = vhost_net_get_features(s->vhost_net, features);
266     return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
267 
268 }
269 
270 static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
271                                        Error **errp)
272 {
273     const char *driver = object_class_get_name(oc);
274 
275     if (!g_str_has_prefix(driver, "virtio-net-")) {
276         error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
277         return false;
278     }
279 
280     return true;
281 }
282 
283 /** Dummy receive in case qemu falls back to userland tap networking */
284 static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
285                                   size_t size)
286 {
287     return size;
288 }
289 
290 static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
291 {
292     struct vhost_vdpa *v = &s->vhost_vdpa;
293     VirtIONet *n;
294     VirtIODevice *vdev;
295     int data_queue_pairs, cvq, r;
296 
297     /* We are only called on the first data vqs and only if x-svq is not set */
298     if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
299         return;
300     }
301 
302     vdev = v->dev->vdev;
303     n = VIRTIO_NET(vdev);
304     if (!n->vhost_started) {
305         return;
306     }
307 
308     data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
309     cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
310                                   n->max_ncs - n->max_queue_pairs : 0;
311     /*
312      * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
313      * in the future and resume the device if read-only operations between
314      * suspend and reset goes wrong.
315      */
316     vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
317 
318     /* Start will check migration setup_or_active to configure or not SVQ */
319     r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
320     if (unlikely(r < 0)) {
321         error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
322     }
323 }
324 
325 static int vdpa_net_migration_state_notifier(NotifierWithReturn *notifier,
326                                              void *data, Error **errp)
327 {
328     MigrationState *migration = data;
329     VhostVDPAState *s = container_of(notifier, VhostVDPAState,
330                                      migration_state);
331 
332     if (migration_in_setup(migration)) {
333         vhost_vdpa_net_log_global_enable(s, true);
334     } else if (migration_has_failed(migration)) {
335         vhost_vdpa_net_log_global_enable(s, false);
336     }
337     return 0;
338 }
339 
340 static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
341 {
342     struct vhost_vdpa *v = &s->vhost_vdpa;
343 
344     migration_add_notifier(&s->migration_state,
345                            vdpa_net_migration_state_notifier);
346     if (v->shadow_vqs_enabled) {
347         v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first,
348                                                    v->shared->iova_range.last);
349     }
350 }
351 
352 static int vhost_vdpa_net_data_start(NetClientState *nc)
353 {
354     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
355     struct vhost_vdpa *v = &s->vhost_vdpa;
356 
357     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
358 
359     if (s->always_svq ||
360         migration_is_setup_or_active(migrate_get_current()->state)) {
361         v->shadow_vqs_enabled = true;
362     } else {
363         v->shadow_vqs_enabled = false;
364     }
365 
366     if (v->index == 0) {
367         v->shared->shadow_data = v->shadow_vqs_enabled;
368         vhost_vdpa_net_data_start_first(s);
369         return 0;
370     }
371 
372     return 0;
373 }
374 
375 static int vhost_vdpa_net_data_load(NetClientState *nc)
376 {
377     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
378     struct vhost_vdpa *v = &s->vhost_vdpa;
379     bool has_cvq = v->dev->vq_index_end % 2;
380 
381     if (has_cvq) {
382         return 0;
383     }
384 
385     for (int i = 0; i < v->dev->nvqs; ++i) {
386         vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index);
387     }
388     return 0;
389 }
390 
391 static void vhost_vdpa_net_client_stop(NetClientState *nc)
392 {
393     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
394     struct vhost_dev *dev;
395 
396     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
397 
398     if (s->vhost_vdpa.index == 0) {
399         migration_remove_notifier(&s->migration_state);
400     }
401 
402     dev = s->vhost_vdpa.dev;
403     if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
404         g_clear_pointer(&s->vhost_vdpa.shared->iova_tree,
405                         vhost_iova_tree_delete);
406     }
407 }
408 
409 static NetClientInfo net_vhost_vdpa_info = {
410         .type = NET_CLIENT_DRIVER_VHOST_VDPA,
411         .size = sizeof(VhostVDPAState),
412         .receive = vhost_vdpa_receive,
413         .start = vhost_vdpa_net_data_start,
414         .load = vhost_vdpa_net_data_load,
415         .stop = vhost_vdpa_net_client_stop,
416         .cleanup = vhost_vdpa_cleanup,
417         .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
418         .has_ufo = vhost_vdpa_has_ufo,
419         .check_peer_type = vhost_vdpa_check_peer_type,
420         .set_steering_ebpf = vhost_vdpa_set_steering_ebpf,
421 };
422 
423 static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
424                                           Error **errp)
425 {
426     struct vhost_vring_state state = {
427         .index = vq_index,
428     };
429     int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state);
430 
431     if (unlikely(r < 0)) {
432         r = -errno;
433         error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index);
434         return r;
435     }
436 
437     return state.num;
438 }
439 
440 static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
441                                            unsigned vq_group,
442                                            unsigned asid_num)
443 {
444     struct vhost_vring_state asid = {
445         .index = vq_group,
446         .num = asid_num,
447     };
448     int r;
449 
450     r = ioctl(v->shared->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid);
451     if (unlikely(r < 0)) {
452         error_report("Can't set vq group %u asid %u, errno=%d (%s)",
453                      asid.index, asid.num, errno, g_strerror(errno));
454     }
455     return r;
456 }
457 
458 static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
459 {
460     VhostIOVATree *tree = v->shared->iova_tree;
461     DMAMap needle = {
462         /*
463          * No need to specify size or to look for more translations since
464          * this contiguous chunk was allocated by us.
465          */
466         .translated_addr = (hwaddr)(uintptr_t)addr,
467     };
468     const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
469     int r;
470 
471     if (unlikely(!map)) {
472         error_report("Cannot locate expected map");
473         return;
474     }
475 
476     r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, map->iova,
477                              map->size + 1);
478     if (unlikely(r != 0)) {
479         error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
480     }
481 
482     vhost_iova_tree_remove(tree, *map);
483 }
484 
485 /** Map CVQ buffer. */
486 static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
487                                   bool write)
488 {
489     DMAMap map = {};
490     int r;
491 
492     map.translated_addr = (hwaddr)(uintptr_t)buf;
493     map.size = size - 1;
494     map.perm = write ? IOMMU_RW : IOMMU_RO,
495     r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &map);
496     if (unlikely(r != IOVA_OK)) {
497         error_report("Cannot map injected element");
498         return r;
499     }
500 
501     r = vhost_vdpa_dma_map(v->shared, v->address_space_id, map.iova,
502                            vhost_vdpa_net_cvq_cmd_page_len(), buf, !write);
503     if (unlikely(r < 0)) {
504         goto dma_map_err;
505     }
506 
507     return 0;
508 
509 dma_map_err:
510     vhost_iova_tree_remove(v->shared->iova_tree, map);
511     return r;
512 }
513 
514 static int vhost_vdpa_net_cvq_start(NetClientState *nc)
515 {
516     VhostVDPAState *s;
517     struct vhost_vdpa *v;
518     int64_t cvq_group;
519     int r;
520     Error *err = NULL;
521 
522     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
523 
524     s = DO_UPCAST(VhostVDPAState, nc, nc);
525     v = &s->vhost_vdpa;
526 
527     v->shadow_vqs_enabled = v->shared->shadow_data;
528     s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
529 
530     if (v->shared->shadow_data) {
531         /* SVQ is already configured for all virtqueues */
532         goto out;
533     }
534 
535     /*
536      * If we early return in these cases SVQ will not be enabled. The migration
537      * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
538      */
539     if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
540         return 0;
541     }
542 
543     if (!s->cvq_isolated) {
544         return 0;
545     }
546 
547     cvq_group = vhost_vdpa_get_vring_group(v->shared->device_fd,
548                                            v->dev->vq_index_end - 1,
549                                            &err);
550     if (unlikely(cvq_group < 0)) {
551         error_report_err(err);
552         return cvq_group;
553     }
554 
555     r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
556     if (unlikely(r < 0)) {
557         return r;
558     }
559 
560     v->shadow_vqs_enabled = true;
561     s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
562 
563 out:
564     if (!s->vhost_vdpa.shadow_vqs_enabled) {
565         return 0;
566     }
567 
568     /*
569      * If other vhost_vdpa already have an iova_tree, reuse it for simplicity,
570      * whether CVQ shares ASID with guest or not, because:
571      * - Memory listener need access to guest's memory addresses allocated in
572      *   the IOVA tree.
573      * - There should be plenty of IOVA address space for both ASID not to
574      *   worry about collisions between them.  Guest's translations are still
575      *   validated with virtio virtqueue_pop so there is no risk for the guest
576      *   to access memory that it shouldn't.
577      *
578      * To allocate a iova tree per ASID is doable but it complicates the code
579      * and it is not worth it for the moment.
580      */
581     if (!v->shared->iova_tree) {
582         v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first,
583                                                    v->shared->iova_range.last);
584     }
585 
586     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
587                                vhost_vdpa_net_cvq_cmd_page_len(), false);
588     if (unlikely(r < 0)) {
589         return r;
590     }
591 
592     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
593                                vhost_vdpa_net_cvq_cmd_page_len(), true);
594     if (unlikely(r < 0)) {
595         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
596     }
597 
598     return r;
599 }
600 
601 static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
602 {
603     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
604 
605     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
606 
607     if (s->vhost_vdpa.shadow_vqs_enabled) {
608         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
609         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
610     }
611 
612     vhost_vdpa_net_client_stop(nc);
613 }
614 
615 static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s,
616                                     const struct iovec *out_sg, size_t out_num,
617                                     const struct iovec *in_sg, size_t in_num)
618 {
619     VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
620     int r;
621 
622     r = vhost_svq_add(svq, out_sg, out_num, in_sg, in_num, NULL);
623     if (unlikely(r != 0)) {
624         if (unlikely(r == -ENOSPC)) {
625             qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
626                           __func__);
627         }
628     }
629 
630     return r;
631 }
632 
633 /*
634  * Convenience wrapper to poll SVQ for multiple control commands.
635  *
636  * Caller should hold the BQL when invoking this function, and should take
637  * the answer before SVQ pulls by itself when BQL is released.
638  */
639 static ssize_t vhost_vdpa_net_svq_poll(VhostVDPAState *s, size_t cmds_in_flight)
640 {
641     VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
642     return vhost_svq_poll(svq, cmds_in_flight);
643 }
644 
645 static void vhost_vdpa_net_load_cursor_reset(VhostVDPAState *s,
646                                              struct iovec *out_cursor,
647                                              struct iovec *in_cursor)
648 {
649     /* reset the cursor of the output buffer for the device */
650     out_cursor->iov_base = s->cvq_cmd_out_buffer;
651     out_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len();
652 
653     /* reset the cursor of the in buffer for the device */
654     in_cursor->iov_base = s->status;
655     in_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len();
656 }
657 
658 /*
659  * Poll SVQ for multiple pending control commands and check the device's ack.
660  *
661  * Caller should hold the BQL when invoking this function.
662  *
663  * @s: The VhostVDPAState
664  * @len: The length of the pending status shadow buffer
665  */
666 static ssize_t vhost_vdpa_net_svq_flush(VhostVDPAState *s, size_t len)
667 {
668     /* device uses a one-byte length ack for each control command */
669     ssize_t dev_written = vhost_vdpa_net_svq_poll(s, len);
670     if (unlikely(dev_written != len)) {
671         return -EIO;
672     }
673 
674     /* check the device's ack */
675     for (int i = 0; i < len; ++i) {
676         if (s->status[i] != VIRTIO_NET_OK) {
677             return -EIO;
678         }
679     }
680     return 0;
681 }
682 
683 static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s,
684                                        struct iovec *out_cursor,
685                                        struct iovec *in_cursor, uint8_t class,
686                                        uint8_t cmd, const struct iovec *data_sg,
687                                        size_t data_num)
688 {
689     const struct virtio_net_ctrl_hdr ctrl = {
690         .class = class,
691         .cmd = cmd,
692     };
693     size_t data_size = iov_size(data_sg, data_num), cmd_size;
694     struct iovec out, in;
695     ssize_t r;
696     unsigned dummy_cursor_iov_cnt;
697     VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
698 
699     assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
700     cmd_size = sizeof(ctrl) + data_size;
701     if (vhost_svq_available_slots(svq) < 2 ||
702         iov_size(out_cursor, 1) < cmd_size) {
703         /*
704          * It is time to flush all pending control commands if SVQ is full
705          * or control commands shadow buffers are full.
706          *
707          * We can poll here since we've had BQL from the time
708          * we sent the descriptor.
709          */
710         r = vhost_vdpa_net_svq_flush(s, in_cursor->iov_base -
711                                      (void *)s->status);
712         if (unlikely(r < 0)) {
713             return r;
714         }
715 
716         vhost_vdpa_net_load_cursor_reset(s, out_cursor, in_cursor);
717     }
718 
719     /* pack the CVQ command header */
720     iov_from_buf(out_cursor, 1, 0, &ctrl, sizeof(ctrl));
721     /* pack the CVQ command command-specific-data */
722     iov_to_buf(data_sg, data_num, 0,
723                out_cursor->iov_base + sizeof(ctrl), data_size);
724 
725     /* extract the required buffer from the cursor for output */
726     iov_copy(&out, 1, out_cursor, 1, 0, cmd_size);
727     /* extract the required buffer from the cursor for input */
728     iov_copy(&in, 1, in_cursor, 1, 0, sizeof(*s->status));
729 
730     r = vhost_vdpa_net_cvq_add(s, &out, 1, &in, 1);
731     if (unlikely(r < 0)) {
732         return r;
733     }
734 
735     /* iterate the cursors */
736     dummy_cursor_iov_cnt = 1;
737     iov_discard_front(&out_cursor, &dummy_cursor_iov_cnt, cmd_size);
738     dummy_cursor_iov_cnt = 1;
739     iov_discard_front(&in_cursor, &dummy_cursor_iov_cnt, sizeof(*s->status));
740 
741     return 0;
742 }
743 
744 static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n,
745                                    struct iovec *out_cursor,
746                                    struct iovec *in_cursor)
747 {
748     if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
749         const struct iovec data = {
750             .iov_base = (void *)n->mac,
751             .iov_len = sizeof(n->mac),
752         };
753         ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
754                                             VIRTIO_NET_CTRL_MAC,
755                                             VIRTIO_NET_CTRL_MAC_ADDR_SET,
756                                             &data, 1);
757         if (unlikely(r < 0)) {
758             return r;
759         }
760     }
761 
762     /*
763      * According to VirtIO standard, "The device MUST have an
764      * empty MAC filtering table on reset.".
765      *
766      * Therefore, there is no need to send this CVQ command if the
767      * driver also sets an empty MAC filter table, which aligns with
768      * the device's defaults.
769      *
770      * Note that the device's defaults can mismatch the driver's
771      * configuration only at live migration.
772      */
773     if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX) ||
774         n->mac_table.in_use == 0) {
775         return 0;
776     }
777 
778     uint32_t uni_entries = n->mac_table.first_multi,
779              uni_macs_size = uni_entries * ETH_ALEN,
780              mul_entries = n->mac_table.in_use - uni_entries,
781              mul_macs_size = mul_entries * ETH_ALEN;
782     struct virtio_net_ctrl_mac uni = {
783         .entries = cpu_to_le32(uni_entries),
784     };
785     struct virtio_net_ctrl_mac mul = {
786         .entries = cpu_to_le32(mul_entries),
787     };
788     const struct iovec data[] = {
789         {
790             .iov_base = &uni,
791             .iov_len = sizeof(uni),
792         }, {
793             .iov_base = n->mac_table.macs,
794             .iov_len = uni_macs_size,
795         }, {
796             .iov_base = &mul,
797             .iov_len = sizeof(mul),
798         }, {
799             .iov_base = &n->mac_table.macs[uni_macs_size],
800             .iov_len = mul_macs_size,
801         },
802     };
803     ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
804                                         VIRTIO_NET_CTRL_MAC,
805                                         VIRTIO_NET_CTRL_MAC_TABLE_SET,
806                                         data, ARRAY_SIZE(data));
807     if (unlikely(r < 0)) {
808         return r;
809     }
810 
811     return 0;
812 }
813 
814 static int vhost_vdpa_net_load_rss(VhostVDPAState *s, const VirtIONet *n,
815                                    struct iovec *out_cursor,
816                                    struct iovec *in_cursor, bool do_rss)
817 {
818     struct virtio_net_rss_config cfg = {};
819     ssize_t r;
820     g_autofree uint16_t *table = NULL;
821 
822     /*
823      * According to VirtIO standard, "Initially the device has all hash
824      * types disabled and reports only VIRTIO_NET_HASH_REPORT_NONE.".
825      *
826      * Therefore, there is no need to send this CVQ command if the
827      * driver disables the all hash types, which aligns with
828      * the device's defaults.
829      *
830      * Note that the device's defaults can mismatch the driver's
831      * configuration only at live migration.
832      */
833     if (!n->rss_data.enabled ||
834         n->rss_data.hash_types == VIRTIO_NET_HASH_REPORT_NONE) {
835         return 0;
836     }
837 
838     table = g_malloc_n(n->rss_data.indirections_len,
839                        sizeof(n->rss_data.indirections_table[0]));
840     cfg.hash_types = cpu_to_le32(n->rss_data.hash_types);
841 
842     if (do_rss) {
843         /*
844          * According to VirtIO standard, "Number of entries in indirection_table
845          * is (indirection_table_mask + 1)".
846          */
847         cfg.indirection_table_mask = cpu_to_le16(n->rss_data.indirections_len -
848                                                  1);
849         cfg.unclassified_queue = cpu_to_le16(n->rss_data.default_queue);
850         for (int i = 0; i < n->rss_data.indirections_len; ++i) {
851             table[i] = cpu_to_le16(n->rss_data.indirections_table[i]);
852         }
853         cfg.max_tx_vq = cpu_to_le16(n->curr_queue_pairs);
854     } else {
855         /*
856          * According to VirtIO standard, "Field reserved MUST contain zeroes.
857          * It is defined to make the structure to match the layout of
858          * virtio_net_rss_config structure, defined in 5.1.6.5.7.".
859          *
860          * Therefore, we need to zero the fields in
861          * struct virtio_net_rss_config, which corresponds to the
862          * `reserved` field in struct virtio_net_hash_config.
863          *
864          * Note that all other fields are zeroed at their definitions,
865          * except for the `indirection_table` field, where the actual data
866          * is stored in the `table` variable to ensure compatibility
867          * with RSS case. Therefore, we need to zero the `table` variable here.
868          */
869         table[0] = 0;
870     }
871 
872     /*
873      * Considering that virtio_net_handle_rss() currently does not restore
874      * the hash key length parsed from the CVQ command sent from the guest
875      * into n->rss_data and uses the maximum key length in other code, so
876      * we also employ the maximum key length here.
877      */
878     cfg.hash_key_length = sizeof(n->rss_data.key);
879 
880     const struct iovec data[] = {
881         {
882             .iov_base = &cfg,
883             .iov_len = offsetof(struct virtio_net_rss_config,
884                                 indirection_table),
885         }, {
886             .iov_base = table,
887             .iov_len = n->rss_data.indirections_len *
888                        sizeof(n->rss_data.indirections_table[0]),
889         }, {
890             .iov_base = &cfg.max_tx_vq,
891             .iov_len = offsetof(struct virtio_net_rss_config, hash_key_data) -
892                        offsetof(struct virtio_net_rss_config, max_tx_vq),
893         }, {
894             .iov_base = (void *)n->rss_data.key,
895             .iov_len = sizeof(n->rss_data.key),
896         }
897     };
898 
899     r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
900                                 VIRTIO_NET_CTRL_MQ,
901                                 do_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG :
902                                 VIRTIO_NET_CTRL_MQ_HASH_CONFIG,
903                                 data, ARRAY_SIZE(data));
904     if (unlikely(r < 0)) {
905         return r;
906     }
907 
908     return 0;
909 }
910 
911 static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
912                                   const VirtIONet *n,
913                                   struct iovec *out_cursor,
914                                   struct iovec *in_cursor)
915 {
916     struct virtio_net_ctrl_mq mq;
917     ssize_t r;
918 
919     if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) {
920         return 0;
921     }
922 
923     mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
924     const struct iovec data = {
925         .iov_base = &mq,
926         .iov_len = sizeof(mq),
927     };
928     r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
929                                 VIRTIO_NET_CTRL_MQ,
930                                 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
931                                 &data, 1);
932     if (unlikely(r < 0)) {
933         return r;
934     }
935 
936     if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_RSS)) {
937         /* load the receive-side scaling state */
938         r = vhost_vdpa_net_load_rss(s, n, out_cursor, in_cursor, true);
939         if (unlikely(r < 0)) {
940             return r;
941         }
942     } else if (virtio_vdev_has_feature(&n->parent_obj,
943                                        VIRTIO_NET_F_HASH_REPORT)) {
944         /* load the hash calculation state */
945         r = vhost_vdpa_net_load_rss(s, n, out_cursor, in_cursor, false);
946         if (unlikely(r < 0)) {
947             return r;
948         }
949     }
950 
951     return 0;
952 }
953 
954 static int vhost_vdpa_net_load_offloads(VhostVDPAState *s,
955                                         const VirtIONet *n,
956                                         struct iovec *out_cursor,
957                                         struct iovec *in_cursor)
958 {
959     uint64_t offloads;
960     ssize_t r;
961 
962     if (!virtio_vdev_has_feature(&n->parent_obj,
963                                  VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
964         return 0;
965     }
966 
967     if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) {
968         /*
969          * According to VirtIO standard, "Upon feature negotiation
970          * corresponding offload gets enabled to preserve
971          * backward compatibility.".
972          *
973          * Therefore, there is no need to send this CVQ command if the
974          * driver also enables all supported offloads, which aligns with
975          * the device's defaults.
976          *
977          * Note that the device's defaults can mismatch the driver's
978          * configuration only at live migration.
979          */
980         return 0;
981     }
982 
983     offloads = cpu_to_le64(n->curr_guest_offloads);
984     const struct iovec data = {
985         .iov_base = &offloads,
986         .iov_len = sizeof(offloads),
987     };
988     r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
989                                 VIRTIO_NET_CTRL_GUEST_OFFLOADS,
990                                 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET,
991                                 &data, 1);
992     if (unlikely(r < 0)) {
993         return r;
994     }
995 
996     return 0;
997 }
998 
999 static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s,
1000                                        struct iovec *out_cursor,
1001                                        struct iovec *in_cursor,
1002                                        uint8_t cmd,
1003                                        uint8_t on)
1004 {
1005     const struct iovec data = {
1006         .iov_base = &on,
1007         .iov_len = sizeof(on),
1008     };
1009     ssize_t r;
1010 
1011     r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
1012                                 VIRTIO_NET_CTRL_RX, cmd, &data, 1);
1013     if (unlikely(r < 0)) {
1014         return r;
1015     }
1016 
1017     return 0;
1018 }
1019 
1020 static int vhost_vdpa_net_load_rx(VhostVDPAState *s,
1021                                   const VirtIONet *n,
1022                                   struct iovec *out_cursor,
1023                                   struct iovec *in_cursor)
1024 {
1025     ssize_t r;
1026 
1027     if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX)) {
1028         return 0;
1029     }
1030 
1031     /*
1032      * According to virtio_net_reset(), device turns promiscuous mode
1033      * on by default.
1034      *
1035      * Additionally, according to VirtIO standard, "Since there are
1036      * no guarantees, it can use a hash filter or silently switch to
1037      * allmulti or promiscuous mode if it is given too many addresses.".
1038      * QEMU marks `n->mac_table.uni_overflow` if guest sets too many
1039      * non-multicast MAC addresses, indicating that promiscuous mode
1040      * should be enabled.
1041      *
1042      * Therefore, QEMU should only send this CVQ command if the
1043      * `n->mac_table.uni_overflow` is not marked and `n->promisc` is off,
1044      * which sets promiscuous mode on, different from the device's defaults.
1045      *
1046      * Note that the device's defaults can mismatch the driver's
1047      * configuration only at live migration.
1048      */
1049     if (!n->mac_table.uni_overflow && !n->promisc) {
1050         r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1051                                         VIRTIO_NET_CTRL_RX_PROMISC, 0);
1052         if (unlikely(r < 0)) {
1053             return r;
1054         }
1055     }
1056 
1057     /*
1058      * According to virtio_net_reset(), device turns all-multicast mode
1059      * off by default.
1060      *
1061      * According to VirtIO standard, "Since there are no guarantees,
1062      * it can use a hash filter or silently switch to allmulti or
1063      * promiscuous mode if it is given too many addresses.". QEMU marks
1064      * `n->mac_table.multi_overflow` if guest sets too many
1065      * non-multicast MAC addresses.
1066      *
1067      * Therefore, QEMU should only send this CVQ command if the
1068      * `n->mac_table.multi_overflow` is marked or `n->allmulti` is on,
1069      * which sets all-multicast mode on, different from the device's defaults.
1070      *
1071      * Note that the device's defaults can mismatch the driver's
1072      * configuration only at live migration.
1073      */
1074     if (n->mac_table.multi_overflow || n->allmulti) {
1075         r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1076                                         VIRTIO_NET_CTRL_RX_ALLMULTI, 1);
1077         if (unlikely(r < 0)) {
1078             return r;
1079         }
1080     }
1081 
1082     if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX_EXTRA)) {
1083         return 0;
1084     }
1085 
1086     /*
1087      * According to virtio_net_reset(), device turns all-unicast mode
1088      * off by default.
1089      *
1090      * Therefore, QEMU should only send this CVQ command if the driver
1091      * sets all-unicast mode on, different from the device's defaults.
1092      *
1093      * Note that the device's defaults can mismatch the driver's
1094      * configuration only at live migration.
1095      */
1096     if (n->alluni) {
1097         r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1098                                         VIRTIO_NET_CTRL_RX_ALLUNI, 1);
1099         if (r < 0) {
1100             return r;
1101         }
1102     }
1103 
1104     /*
1105      * According to virtio_net_reset(), device turns non-multicast mode
1106      * off by default.
1107      *
1108      * Therefore, QEMU should only send this CVQ command if the driver
1109      * sets non-multicast mode on, different from the device's defaults.
1110      *
1111      * Note that the device's defaults can mismatch the driver's
1112      * configuration only at live migration.
1113      */
1114     if (n->nomulti) {
1115         r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1116                                         VIRTIO_NET_CTRL_RX_NOMULTI, 1);
1117         if (r < 0) {
1118             return r;
1119         }
1120     }
1121 
1122     /*
1123      * According to virtio_net_reset(), device turns non-unicast mode
1124      * off by default.
1125      *
1126      * Therefore, QEMU should only send this CVQ command if the driver
1127      * sets non-unicast mode on, different from the device's defaults.
1128      *
1129      * Note that the device's defaults can mismatch the driver's
1130      * configuration only at live migration.
1131      */
1132     if (n->nouni) {
1133         r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1134                                         VIRTIO_NET_CTRL_RX_NOUNI, 1);
1135         if (r < 0) {
1136             return r;
1137         }
1138     }
1139 
1140     /*
1141      * According to virtio_net_reset(), device turns non-broadcast mode
1142      * off by default.
1143      *
1144      * Therefore, QEMU should only send this CVQ command if the driver
1145      * sets non-broadcast mode on, different from the device's defaults.
1146      *
1147      * Note that the device's defaults can mismatch the driver's
1148      * configuration only at live migration.
1149      */
1150     if (n->nobcast) {
1151         r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor,
1152                                         VIRTIO_NET_CTRL_RX_NOBCAST, 1);
1153         if (r < 0) {
1154             return r;
1155         }
1156     }
1157 
1158     return 0;
1159 }
1160 
1161 static int vhost_vdpa_net_load_single_vlan(VhostVDPAState *s,
1162                                            const VirtIONet *n,
1163                                            struct iovec *out_cursor,
1164                                            struct iovec *in_cursor,
1165                                            uint16_t vid)
1166 {
1167     const struct iovec data = {
1168         .iov_base = &vid,
1169         .iov_len = sizeof(vid),
1170     };
1171     ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor,
1172                                         VIRTIO_NET_CTRL_VLAN,
1173                                         VIRTIO_NET_CTRL_VLAN_ADD,
1174                                         &data, 1);
1175     if (unlikely(r < 0)) {
1176         return r;
1177     }
1178 
1179     return 0;
1180 }
1181 
1182 static int vhost_vdpa_net_load_vlan(VhostVDPAState *s,
1183                                     const VirtIONet *n,
1184                                     struct iovec *out_cursor,
1185                                     struct iovec *in_cursor)
1186 {
1187     int r;
1188 
1189     if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_VLAN)) {
1190         return 0;
1191     }
1192 
1193     for (int i = 0; i < MAX_VLAN >> 5; i++) {
1194         for (int j = 0; n->vlans[i] && j <= 0x1f; j++) {
1195             if (n->vlans[i] & (1U << j)) {
1196                 r = vhost_vdpa_net_load_single_vlan(s, n, out_cursor,
1197                                                     in_cursor, (i << 5) + j);
1198                 if (unlikely(r != 0)) {
1199                     return r;
1200                 }
1201             }
1202         }
1203     }
1204 
1205     return 0;
1206 }
1207 
1208 static int vhost_vdpa_net_cvq_load(NetClientState *nc)
1209 {
1210     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1211     struct vhost_vdpa *v = &s->vhost_vdpa;
1212     const VirtIONet *n;
1213     int r;
1214     struct iovec out_cursor, in_cursor;
1215 
1216     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1217 
1218     vhost_vdpa_set_vring_ready(v, v->dev->vq_index);
1219 
1220     if (v->shadow_vqs_enabled) {
1221         n = VIRTIO_NET(v->dev->vdev);
1222         vhost_vdpa_net_load_cursor_reset(s, &out_cursor, &in_cursor);
1223         r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor);
1224         if (unlikely(r < 0)) {
1225             return r;
1226         }
1227         r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor);
1228         if (unlikely(r)) {
1229             return r;
1230         }
1231         r = vhost_vdpa_net_load_offloads(s, n, &out_cursor, &in_cursor);
1232         if (unlikely(r)) {
1233             return r;
1234         }
1235         r = vhost_vdpa_net_load_rx(s, n, &out_cursor, &in_cursor);
1236         if (unlikely(r)) {
1237             return r;
1238         }
1239         r = vhost_vdpa_net_load_vlan(s, n, &out_cursor, &in_cursor);
1240         if (unlikely(r)) {
1241             return r;
1242         }
1243 
1244         /*
1245          * We need to poll and check all pending device's used buffers.
1246          *
1247          * We can poll here since we've had BQL from the time
1248          * we sent the descriptor.
1249          */
1250         r = vhost_vdpa_net_svq_flush(s, in_cursor.iov_base - (void *)s->status);
1251         if (unlikely(r)) {
1252             return r;
1253         }
1254     }
1255 
1256     for (int i = 0; i < v->dev->vq_index; ++i) {
1257         vhost_vdpa_set_vring_ready(v, i);
1258     }
1259 
1260     return 0;
1261 }
1262 
1263 static NetClientInfo net_vhost_vdpa_cvq_info = {
1264     .type = NET_CLIENT_DRIVER_VHOST_VDPA,
1265     .size = sizeof(VhostVDPAState),
1266     .receive = vhost_vdpa_receive,
1267     .start = vhost_vdpa_net_cvq_start,
1268     .load = vhost_vdpa_net_cvq_load,
1269     .stop = vhost_vdpa_net_cvq_stop,
1270     .cleanup = vhost_vdpa_cleanup,
1271     .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
1272     .has_ufo = vhost_vdpa_has_ufo,
1273     .check_peer_type = vhost_vdpa_check_peer_type,
1274     .set_steering_ebpf = vhost_vdpa_set_steering_ebpf,
1275 };
1276 
1277 /*
1278  * Forward the excessive VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command to
1279  * vdpa device.
1280  *
1281  * Considering that QEMU cannot send the entire filter table to the
1282  * vdpa device, it should send the VIRTIO_NET_CTRL_RX_PROMISC CVQ
1283  * command to enable promiscuous mode to receive all packets,
1284  * according to VirtIO standard, "Since there are no guarantees,
1285  * it can use a hash filter or silently switch to allmulti or
1286  * promiscuous mode if it is given too many addresses.".
1287  *
1288  * Since QEMU ignores MAC addresses beyond `MAC_TABLE_ENTRIES` and
1289  * marks `n->mac_table.x_overflow` accordingly, it should have
1290  * the same effect on the device model to receive
1291  * (`MAC_TABLE_ENTRIES` + 1) or more non-multicast MAC addresses.
1292  * The same applies to multicast MAC addresses.
1293  *
1294  * Therefore, QEMU can provide the device model with a fake
1295  * VIRTIO_NET_CTRL_MAC_TABLE_SET command with (`MAC_TABLE_ENTRIES` + 1)
1296  * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1) multicast
1297  * MAC addresses. This ensures that the device model marks
1298  * `n->mac_table.uni_overflow` and `n->mac_table.multi_overflow`,
1299  * allowing all packets to be received, which aligns with the
1300  * state of the vdpa device.
1301  */
1302 static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s,
1303                                                        VirtQueueElement *elem,
1304                                                        struct iovec *out,
1305                                                        const struct iovec *in)
1306 {
1307     struct virtio_net_ctrl_mac mac_data, *mac_ptr;
1308     struct virtio_net_ctrl_hdr *hdr_ptr;
1309     uint32_t cursor;
1310     ssize_t r;
1311     uint8_t on = 1;
1312 
1313     /* parse the non-multicast MAC address entries from CVQ command */
1314     cursor = sizeof(*hdr_ptr);
1315     r = iov_to_buf(elem->out_sg, elem->out_num, cursor,
1316                    &mac_data, sizeof(mac_data));
1317     if (unlikely(r != sizeof(mac_data))) {
1318         /*
1319          * If the CVQ command is invalid, we should simulate the vdpa device
1320          * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1321          */
1322         *s->status = VIRTIO_NET_ERR;
1323         return sizeof(*s->status);
1324     }
1325     cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN;
1326 
1327     /* parse the multicast MAC address entries from CVQ command */
1328     r = iov_to_buf(elem->out_sg, elem->out_num, cursor,
1329                    &mac_data, sizeof(mac_data));
1330     if (r != sizeof(mac_data)) {
1331         /*
1332          * If the CVQ command is invalid, we should simulate the vdpa device
1333          * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1334          */
1335         *s->status = VIRTIO_NET_ERR;
1336         return sizeof(*s->status);
1337     }
1338     cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN;
1339 
1340     /* validate the CVQ command */
1341     if (iov_size(elem->out_sg, elem->out_num) != cursor) {
1342         /*
1343          * If the CVQ command is invalid, we should simulate the vdpa device
1344          * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1345          */
1346         *s->status = VIRTIO_NET_ERR;
1347         return sizeof(*s->status);
1348     }
1349 
1350     /*
1351      * According to VirtIO standard, "Since there are no guarantees,
1352      * it can use a hash filter or silently switch to allmulti or
1353      * promiscuous mode if it is given too many addresses.".
1354      *
1355      * Therefore, considering that QEMU is unable to send the entire
1356      * filter table to the vdpa device, it should send the
1357      * VIRTIO_NET_CTRL_RX_PROMISC CVQ command to enable promiscuous mode
1358      */
1359     hdr_ptr = out->iov_base;
1360     out->iov_len = sizeof(*hdr_ptr) + sizeof(on);
1361 
1362     hdr_ptr->class = VIRTIO_NET_CTRL_RX;
1363     hdr_ptr->cmd = VIRTIO_NET_CTRL_RX_PROMISC;
1364     iov_from_buf(out, 1, sizeof(*hdr_ptr), &on, sizeof(on));
1365     r = vhost_vdpa_net_cvq_add(s, out, 1, in, 1);
1366     if (unlikely(r < 0)) {
1367         return r;
1368     }
1369 
1370     /*
1371      * We can poll here since we've had BQL from the time
1372      * we sent the descriptor.
1373      */
1374     r = vhost_vdpa_net_svq_poll(s, 1);
1375     if (unlikely(r < sizeof(*s->status))) {
1376         return r;
1377     }
1378     if (*s->status != VIRTIO_NET_OK) {
1379         return sizeof(*s->status);
1380     }
1381 
1382     /*
1383      * QEMU should also send a fake VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ
1384      * command to the device model, including (`MAC_TABLE_ENTRIES` + 1)
1385      * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1)
1386      * multicast MAC addresses.
1387      *
1388      * By doing so, the device model can mark `n->mac_table.uni_overflow`
1389      * and `n->mac_table.multi_overflow`, enabling all packets to be
1390      * received, which aligns with the state of the vdpa device.
1391      */
1392     cursor = 0;
1393     uint32_t fake_uni_entries = MAC_TABLE_ENTRIES + 1,
1394              fake_mul_entries = MAC_TABLE_ENTRIES + 1,
1395              fake_cvq_size = sizeof(struct virtio_net_ctrl_hdr) +
1396                              sizeof(mac_data) + fake_uni_entries * ETH_ALEN +
1397                              sizeof(mac_data) + fake_mul_entries * ETH_ALEN;
1398 
1399     assert(fake_cvq_size < vhost_vdpa_net_cvq_cmd_page_len());
1400     out->iov_len = fake_cvq_size;
1401 
1402     /* pack the header for fake CVQ command */
1403     hdr_ptr = out->iov_base + cursor;
1404     hdr_ptr->class = VIRTIO_NET_CTRL_MAC;
1405     hdr_ptr->cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1406     cursor += sizeof(*hdr_ptr);
1407 
1408     /*
1409      * Pack the non-multicast MAC addresses part for fake CVQ command.
1410      *
1411      * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC
1412      * addresses provided in CVQ command. Therefore, only the entries
1413      * field need to be prepared in the CVQ command.
1414      */
1415     mac_ptr = out->iov_base + cursor;
1416     mac_ptr->entries = cpu_to_le32(fake_uni_entries);
1417     cursor += sizeof(*mac_ptr) + fake_uni_entries * ETH_ALEN;
1418 
1419     /*
1420      * Pack the multicast MAC addresses part for fake CVQ command.
1421      *
1422      * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC
1423      * addresses provided in CVQ command. Therefore, only the entries
1424      * field need to be prepared in the CVQ command.
1425      */
1426     mac_ptr = out->iov_base + cursor;
1427     mac_ptr->entries = cpu_to_le32(fake_mul_entries);
1428 
1429     /*
1430      * Simulating QEMU poll a vdpa device used buffer
1431      * for VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command
1432      */
1433     return sizeof(*s->status);
1434 }
1435 
1436 /**
1437  * Validate and copy control virtqueue commands.
1438  *
1439  * Following QEMU guidelines, we offer a copy of the buffers to the device to
1440  * prevent TOCTOU bugs.
1441  */
1442 static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
1443                                             VirtQueueElement *elem,
1444                                             void *opaque)
1445 {
1446     VhostVDPAState *s = opaque;
1447     size_t in_len;
1448     const struct virtio_net_ctrl_hdr *ctrl;
1449     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1450     /* Out buffer sent to both the vdpa device and the device model */
1451     struct iovec out = {
1452         .iov_base = s->cvq_cmd_out_buffer,
1453     };
1454     /* in buffer used for device model */
1455     const struct iovec model_in = {
1456         .iov_base = &status,
1457         .iov_len = sizeof(status),
1458     };
1459     /* in buffer used for vdpa device */
1460     const struct iovec vdpa_in = {
1461         .iov_base = s->status,
1462         .iov_len = sizeof(*s->status),
1463     };
1464     ssize_t dev_written = -EINVAL;
1465 
1466     out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
1467                              s->cvq_cmd_out_buffer,
1468                              vhost_vdpa_net_cvq_cmd_page_len());
1469 
1470     ctrl = s->cvq_cmd_out_buffer;
1471     if (ctrl->class == VIRTIO_NET_CTRL_ANNOUNCE) {
1472         /*
1473          * Guest announce capability is emulated by qemu, so don't forward to
1474          * the device.
1475          */
1476         dev_written = sizeof(status);
1477         *s->status = VIRTIO_NET_OK;
1478     } else if (unlikely(ctrl->class == VIRTIO_NET_CTRL_MAC &&
1479                         ctrl->cmd == VIRTIO_NET_CTRL_MAC_TABLE_SET &&
1480                         iov_size(elem->out_sg, elem->out_num) > out.iov_len)) {
1481         /*
1482          * Due to the size limitation of the out buffer sent to the vdpa device,
1483          * which is determined by vhost_vdpa_net_cvq_cmd_page_len(), excessive
1484          * MAC addresses set by the driver for the filter table can cause
1485          * truncation of the CVQ command in QEMU. As a result, the vdpa device
1486          * rejects the flawed CVQ command.
1487          *
1488          * Therefore, QEMU must handle this situation instead of sending
1489          * the CVQ command directly.
1490          */
1491         dev_written = vhost_vdpa_net_excessive_mac_filter_cvq_add(s, elem,
1492                                                             &out, &vdpa_in);
1493         if (unlikely(dev_written < 0)) {
1494             goto out;
1495         }
1496     } else {
1497         ssize_t r;
1498         r = vhost_vdpa_net_cvq_add(s, &out, 1, &vdpa_in, 1);
1499         if (unlikely(r < 0)) {
1500             dev_written = r;
1501             goto out;
1502         }
1503 
1504         /*
1505          * We can poll here since we've had BQL from the time
1506          * we sent the descriptor.
1507          */
1508         dev_written = vhost_vdpa_net_svq_poll(s, 1);
1509     }
1510 
1511     if (unlikely(dev_written < sizeof(status))) {
1512         error_report("Insufficient written data (%zu)", dev_written);
1513         goto out;
1514     }
1515 
1516     if (*s->status != VIRTIO_NET_OK) {
1517         goto out;
1518     }
1519 
1520     status = VIRTIO_NET_ERR;
1521     virtio_net_handle_ctrl_iov(svq->vdev, &model_in, 1, &out, 1);
1522     if (status != VIRTIO_NET_OK) {
1523         error_report("Bad CVQ processing in model");
1524     }
1525 
1526 out:
1527     in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
1528                           sizeof(status));
1529     if (unlikely(in_len < sizeof(status))) {
1530         error_report("Bad device CVQ written length");
1531     }
1532     vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
1533     /*
1534      * `elem` belongs to vhost_vdpa_net_handle_ctrl_avail() only when
1535      * the function successfully forwards the CVQ command, indicated
1536      * by a non-negative value of `dev_written`. Otherwise, it still
1537      * belongs to SVQ.
1538      * This function should only free the `elem` when it owns.
1539      */
1540     if (dev_written >= 0) {
1541         g_free(elem);
1542     }
1543     return dev_written < 0 ? dev_written : 0;
1544 }
1545 
1546 static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
1547     .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
1548 };
1549 
1550 /**
1551  * Probe if CVQ is isolated
1552  *
1553  * @device_fd         The vdpa device fd
1554  * @features          Features offered by the device.
1555  * @cvq_index         The control vq pair index
1556  *
1557  * Returns <0 in case of failure, 0 if false and 1 if true.
1558  */
1559 static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features,
1560                                           int cvq_index, Error **errp)
1561 {
1562     uint64_t backend_features;
1563     int64_t cvq_group;
1564     uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE |
1565                      VIRTIO_CONFIG_S_DRIVER;
1566     int r;
1567 
1568     ERRP_GUARD();
1569 
1570     r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
1571     if (unlikely(r < 0)) {
1572         error_setg_errno(errp, errno, "Cannot get vdpa backend_features");
1573         return r;
1574     }
1575 
1576     if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
1577         return 0;
1578     }
1579 
1580     r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
1581     if (unlikely(r)) {
1582         error_setg_errno(errp, -r, "Cannot set device status");
1583         goto out;
1584     }
1585 
1586     r = ioctl(device_fd, VHOST_SET_FEATURES, &features);
1587     if (unlikely(r)) {
1588         error_setg_errno(errp, -r, "Cannot set features");
1589         goto out;
1590     }
1591 
1592     status |= VIRTIO_CONFIG_S_FEATURES_OK;
1593     r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
1594     if (unlikely(r)) {
1595         error_setg_errno(errp, -r, "Cannot set device status");
1596         goto out;
1597     }
1598 
1599     cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp);
1600     if (unlikely(cvq_group < 0)) {
1601         if (cvq_group != -ENOTSUP) {
1602             r = cvq_group;
1603             goto out;
1604         }
1605 
1606         /*
1607          * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend
1608          * support ASID even if the parent driver does not.  The CVQ cannot be
1609          * isolated in this case.
1610          */
1611         error_free(*errp);
1612         *errp = NULL;
1613         r = 0;
1614         goto out;
1615     }
1616 
1617     for (int i = 0; i < cvq_index; ++i) {
1618         int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp);
1619         if (unlikely(group < 0)) {
1620             r = group;
1621             goto out;
1622         }
1623 
1624         if (group == (int64_t)cvq_group) {
1625             r = 0;
1626             goto out;
1627         }
1628     }
1629 
1630     r = 1;
1631 
1632 out:
1633     status = 0;
1634     ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
1635     return r;
1636 }
1637 
1638 static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
1639                                        const char *device,
1640                                        const char *name,
1641                                        int vdpa_device_fd,
1642                                        int queue_pair_index,
1643                                        int nvqs,
1644                                        bool is_datapath,
1645                                        bool svq,
1646                                        struct vhost_vdpa_iova_range iova_range,
1647                                        uint64_t features,
1648                                        VhostVDPAShared *shared,
1649                                        Error **errp)
1650 {
1651     NetClientState *nc = NULL;
1652     VhostVDPAState *s;
1653     int ret = 0;
1654     assert(name);
1655     int cvq_isolated = 0;
1656 
1657     if (is_datapath) {
1658         nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
1659                                  name);
1660     } else {
1661         cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features,
1662                                                       queue_pair_index * 2,
1663                                                       errp);
1664         if (unlikely(cvq_isolated < 0)) {
1665             return NULL;
1666         }
1667 
1668         nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
1669                                          device, name);
1670     }
1671     qemu_set_info_str(nc, TYPE_VHOST_VDPA);
1672     s = DO_UPCAST(VhostVDPAState, nc, nc);
1673 
1674     s->vhost_vdpa.index = queue_pair_index;
1675     s->always_svq = svq;
1676     s->migration_state.notify = NULL;
1677     s->vhost_vdpa.shadow_vqs_enabled = svq;
1678     if (queue_pair_index == 0) {
1679         vhost_vdpa_net_valid_svq_features(features,
1680                                           &s->vhost_vdpa.migration_blocker);
1681         s->vhost_vdpa.shared = g_new0(VhostVDPAShared, 1);
1682         s->vhost_vdpa.shared->device_fd = vdpa_device_fd;
1683         s->vhost_vdpa.shared->iova_range = iova_range;
1684         s->vhost_vdpa.shared->shadow_data = svq;
1685     } else if (!is_datapath) {
1686         s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
1687                                      PROT_READ | PROT_WRITE,
1688                                      MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1689         s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
1690                          PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
1691                          -1, 0);
1692 
1693         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
1694         s->vhost_vdpa.shadow_vq_ops_opaque = s;
1695         s->cvq_isolated = cvq_isolated;
1696     }
1697     if (queue_pair_index != 0) {
1698         s->vhost_vdpa.shared = shared;
1699     }
1700 
1701     ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
1702     if (ret) {
1703         qemu_del_net_client(nc);
1704         return NULL;
1705     }
1706 
1707     return nc;
1708 }
1709 
1710 static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
1711 {
1712     int ret = ioctl(fd, VHOST_GET_FEATURES, features);
1713     if (unlikely(ret < 0)) {
1714         error_setg_errno(errp, errno,
1715                          "Fail to query features from vhost-vDPA device");
1716     }
1717     return ret;
1718 }
1719 
1720 static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
1721                                           int *has_cvq, Error **errp)
1722 {
1723     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
1724     g_autofree struct vhost_vdpa_config *config = NULL;
1725     __virtio16 *max_queue_pairs;
1726     int ret;
1727 
1728     if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
1729         *has_cvq = 1;
1730     } else {
1731         *has_cvq = 0;
1732     }
1733 
1734     if (features & (1 << VIRTIO_NET_F_MQ)) {
1735         config = g_malloc0(config_size + sizeof(*max_queue_pairs));
1736         config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
1737         config->len = sizeof(*max_queue_pairs);
1738 
1739         ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
1740         if (ret) {
1741             error_setg(errp, "Fail to get config from vhost-vDPA device");
1742             return -ret;
1743         }
1744 
1745         max_queue_pairs = (__virtio16 *)&config->buf;
1746 
1747         return lduw_le_p(max_queue_pairs);
1748     }
1749 
1750     return 1;
1751 }
1752 
1753 int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
1754                         NetClientState *peer, Error **errp)
1755 {
1756     const NetdevVhostVDPAOptions *opts;
1757     uint64_t features;
1758     int vdpa_device_fd;
1759     g_autofree NetClientState **ncs = NULL;
1760     struct vhost_vdpa_iova_range iova_range;
1761     NetClientState *nc;
1762     int queue_pairs, r, i = 0, has_cvq = 0;
1763 
1764     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1765     opts = &netdev->u.vhost_vdpa;
1766     if (!opts->vhostdev && !opts->vhostfd) {
1767         error_setg(errp,
1768                    "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
1769         return -1;
1770     }
1771 
1772     if (opts->vhostdev && opts->vhostfd) {
1773         error_setg(errp,
1774                    "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
1775         return -1;
1776     }
1777 
1778     if (opts->vhostdev) {
1779         vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
1780         if (vdpa_device_fd == -1) {
1781             return -errno;
1782         }
1783     } else {
1784         /* has_vhostfd */
1785         vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
1786         if (vdpa_device_fd == -1) {
1787             error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
1788             return -1;
1789         }
1790     }
1791 
1792     r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
1793     if (unlikely(r < 0)) {
1794         goto err;
1795     }
1796 
1797     queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
1798                                                  &has_cvq, errp);
1799     if (queue_pairs < 0) {
1800         qemu_close(vdpa_device_fd);
1801         return queue_pairs;
1802     }
1803 
1804     r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
1805     if (unlikely(r < 0)) {
1806         error_setg(errp, "vhost-vdpa: get iova range failed: %s",
1807                    strerror(-r));
1808         goto err;
1809     }
1810 
1811     if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
1812         goto err;
1813     }
1814 
1815     ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
1816 
1817     for (i = 0; i < queue_pairs; i++) {
1818         VhostVDPAShared *shared = NULL;
1819 
1820         if (i) {
1821             shared = DO_UPCAST(VhostVDPAState, nc, ncs[0])->vhost_vdpa.shared;
1822         }
1823         ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
1824                                      vdpa_device_fd, i, 2, true, opts->x_svq,
1825                                      iova_range, features, shared, errp);
1826         if (!ncs[i])
1827             goto err;
1828     }
1829 
1830     if (has_cvq) {
1831         VhostVDPAState *s0 = DO_UPCAST(VhostVDPAState, nc, ncs[0]);
1832         VhostVDPAShared *shared = s0->vhost_vdpa.shared;
1833 
1834         nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
1835                                  vdpa_device_fd, i, 1, false,
1836                                  opts->x_svq, iova_range, features, shared,
1837                                  errp);
1838         if (!nc)
1839             goto err;
1840     }
1841 
1842     return 0;
1843 
1844 err:
1845     if (i) {
1846         for (i--; i >= 0; i--) {
1847             qemu_del_net_client(ncs[i]);
1848         }
1849     }
1850 
1851     qemu_close(vdpa_device_fd);
1852 
1853     return -1;
1854 }
1855