xref: /qemu/hw/net/vhost_net.c (revision 66997c42)
1 /*
2  * vhost-net support
3  *
4  * Copyright Red Hat, Inc. 2010
5  *
6  * Authors:
7  *  Michael S. Tsirkin <mst@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "net/net.h"
18 #include "net/tap.h"
19 #include "net/vhost-user.h"
20 #include "net/vhost-vdpa.h"
21 
22 #include "standard-headers/linux/vhost_types.h"
23 #include "hw/virtio/virtio-net.h"
24 #include "net/vhost_net.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "qemu/main-loop.h"
28 
29 #include <sys/socket.h>
30 #include <net/if.h>
31 #include <netinet/in.h>
32 
33 
34 #include "standard-headers/linux/virtio_ring.h"
35 #include "hw/virtio/vhost.h"
36 #include "hw/virtio/virtio-bus.h"
37 #include "linux-headers/linux/vhost.h"
38 
39 
40 /* Features supported by host kernel. */
41 static const int kernel_feature_bits[] = {
42     VIRTIO_F_NOTIFY_ON_EMPTY,
43     VIRTIO_RING_F_INDIRECT_DESC,
44     VIRTIO_RING_F_EVENT_IDX,
45     VIRTIO_NET_F_MRG_RXBUF,
46     VIRTIO_F_VERSION_1,
47     VIRTIO_NET_F_MTU,
48     VIRTIO_F_IOMMU_PLATFORM,
49     VIRTIO_F_RING_PACKED,
50     VIRTIO_F_RING_RESET,
51     VIRTIO_NET_F_HASH_REPORT,
52     VHOST_INVALID_FEATURE_BIT
53 };
54 
55 /* Features supported by others. */
56 static const int user_feature_bits[] = {
57     VIRTIO_F_NOTIFY_ON_EMPTY,
58     VIRTIO_RING_F_INDIRECT_DESC,
59     VIRTIO_RING_F_EVENT_IDX,
60 
61     VIRTIO_F_ANY_LAYOUT,
62     VIRTIO_F_VERSION_1,
63     VIRTIO_NET_F_CSUM,
64     VIRTIO_NET_F_GUEST_CSUM,
65     VIRTIO_NET_F_GSO,
66     VIRTIO_NET_F_GUEST_TSO4,
67     VIRTIO_NET_F_GUEST_TSO6,
68     VIRTIO_NET_F_GUEST_ECN,
69     VIRTIO_NET_F_GUEST_UFO,
70     VIRTIO_NET_F_HOST_TSO4,
71     VIRTIO_NET_F_HOST_TSO6,
72     VIRTIO_NET_F_HOST_ECN,
73     VIRTIO_NET_F_HOST_UFO,
74     VIRTIO_NET_F_MRG_RXBUF,
75     VIRTIO_NET_F_MTU,
76     VIRTIO_F_IOMMU_PLATFORM,
77     VIRTIO_F_RING_PACKED,
78     VIRTIO_F_RING_RESET,
79     VIRTIO_NET_F_RSS,
80     VIRTIO_NET_F_HASH_REPORT,
81 
82     /* This bit implies RARP isn't sent by QEMU out of band */
83     VIRTIO_NET_F_GUEST_ANNOUNCE,
84 
85     VIRTIO_NET_F_MQ,
86 
87     VHOST_INVALID_FEATURE_BIT
88 };
89 
90 static const int *vhost_net_get_feature_bits(struct vhost_net *net)
91 {
92     const int *feature_bits = 0;
93 
94     switch (net->nc->info->type) {
95     case NET_CLIENT_DRIVER_TAP:
96         feature_bits = kernel_feature_bits;
97         break;
98     case NET_CLIENT_DRIVER_VHOST_USER:
99         feature_bits = user_feature_bits;
100         break;
101 #ifdef CONFIG_VHOST_NET_VDPA
102     case NET_CLIENT_DRIVER_VHOST_VDPA:
103         feature_bits = vdpa_feature_bits;
104         break;
105 #endif
106     default:
107         error_report("Feature bits not defined for this type: %d",
108                 net->nc->info->type);
109         break;
110     }
111 
112     return feature_bits;
113 }
114 
115 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
116 {
117     return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
118             features);
119 }
120 int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
121                          uint32_t config_len)
122 {
123     return vhost_dev_get_config(&net->dev, config, config_len, NULL);
124 }
125 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
126                          uint32_t offset, uint32_t size, uint32_t flags)
127 {
128     return vhost_dev_set_config(&net->dev, data, offset, size, flags);
129 }
130 
131 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
132 {
133     net->dev.acked_features = net->dev.backend_features;
134     vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
135 }
136 
137 uint64_t vhost_net_get_max_queues(VHostNetState *net)
138 {
139     return net->dev.max_queues;
140 }
141 
142 uint64_t vhost_net_get_acked_features(VHostNetState *net)
143 {
144     return net->dev.acked_features;
145 }
146 
147 static int vhost_net_get_fd(NetClientState *backend)
148 {
149     switch (backend->info->type) {
150     case NET_CLIENT_DRIVER_TAP:
151         return tap_get_fd(backend);
152     default:
153         fprintf(stderr, "vhost-net requires tap backend\n");
154         return -ENOSYS;
155     }
156 }
157 
158 struct vhost_net *vhost_net_init(VhostNetOptions *options)
159 {
160     int r;
161     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
162     struct vhost_net *net = g_new0(struct vhost_net, 1);
163     uint64_t features = 0;
164     Error *local_err = NULL;
165 
166     if (!options->net_backend) {
167         fprintf(stderr, "vhost-net requires net backend to be setup\n");
168         goto fail;
169     }
170     net->nc = options->net_backend;
171     net->dev.nvqs = options->nvqs;
172 
173     net->dev.max_queues = 1;
174     net->dev.vqs = net->vqs;
175 
176     if (backend_kernel) {
177         r = vhost_net_get_fd(options->net_backend);
178         if (r < 0) {
179             goto fail;
180         }
181         net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
182             ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
183         net->backend = r;
184         net->dev.protocol_features = 0;
185     } else {
186         net->dev.backend_features = 0;
187         net->dev.protocol_features = 0;
188         net->backend = -1;
189 
190         /* vhost-user needs vq_index to initiate a specific queue pair */
191         net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
192     }
193 
194     r = vhost_dev_init(&net->dev, options->opaque,
195                        options->backend_type, options->busyloop_timeout,
196                        &local_err);
197     if (r < 0) {
198         error_report_err(local_err);
199         goto fail;
200     }
201     if (backend_kernel) {
202         if (!qemu_has_vnet_hdr_len(options->net_backend,
203                                sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
204             net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
205         }
206         if (~net->dev.features & net->dev.backend_features) {
207             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
208                    " for backend\n",
209                    (uint64_t)(~net->dev.features & net->dev.backend_features));
210             goto fail;
211         }
212     }
213 
214     /* Set sane init value. Override when guest acks. */
215 #ifdef CONFIG_VHOST_NET_USER
216     if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
217         features = vhost_user_get_acked_features(net->nc);
218         if (~net->dev.features & features) {
219             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
220                     " for backend\n",
221                     (uint64_t)(~net->dev.features & features));
222             goto fail;
223         }
224     }
225 #endif
226 
227     vhost_net_ack_features(net, features);
228 
229     return net;
230 
231 fail:
232     vhost_dev_cleanup(&net->dev);
233     g_free(net);
234     return NULL;
235 }
236 
237 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
238                                    int vq_index_end)
239 {
240     net->dev.vq_index = vq_index;
241     net->dev.vq_index_end = vq_index_end;
242 }
243 
244 static int vhost_net_start_one(struct vhost_net *net,
245                                VirtIODevice *dev)
246 {
247     struct vhost_vring_file file = { };
248     int r;
249 
250     if (net->nc->info->start) {
251         r = net->nc->info->start(net->nc);
252         if (r < 0) {
253             return r;
254         }
255     }
256 
257     r = vhost_dev_enable_notifiers(&net->dev, dev);
258     if (r < 0) {
259         goto fail_notifiers;
260     }
261 
262     r = vhost_dev_start(&net->dev, dev, false);
263     if (r < 0) {
264         goto fail_start;
265     }
266 
267     if (net->nc->info->poll) {
268         net->nc->info->poll(net->nc, false);
269     }
270 
271     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
272         qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
273         file.fd = net->backend;
274         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
275             if (!virtio_queue_enabled(dev, net->dev.vq_index +
276                                       file.index)) {
277                 /* Queue might not be ready for start */
278                 continue;
279             }
280             r = vhost_net_set_backend(&net->dev, &file);
281             if (r < 0) {
282                 r = -errno;
283                 goto fail;
284             }
285         }
286     }
287 
288     if (net->nc->info->load) {
289         r = net->nc->info->load(net->nc);
290         if (r < 0) {
291             goto fail;
292         }
293     }
294     return 0;
295 fail:
296     file.fd = -1;
297     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
298         while (file.index-- > 0) {
299             if (!virtio_queue_enabled(dev, net->dev.vq_index +
300                                       file.index)) {
301                 /* Queue might not be ready for start */
302                 continue;
303             }
304             int r = vhost_net_set_backend(&net->dev, &file);
305             assert(r >= 0);
306         }
307     }
308     if (net->nc->info->poll) {
309         net->nc->info->poll(net->nc, true);
310     }
311     vhost_dev_stop(&net->dev, dev, false);
312 fail_start:
313     vhost_dev_disable_notifiers(&net->dev, dev);
314 fail_notifiers:
315     return r;
316 }
317 
318 static void vhost_net_stop_one(struct vhost_net *net,
319                                VirtIODevice *dev)
320 {
321     struct vhost_vring_file file = { .fd = -1 };
322 
323     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
324         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
325             int r = vhost_net_set_backend(&net->dev, &file);
326             assert(r >= 0);
327         }
328     }
329     if (net->nc->info->poll) {
330         net->nc->info->poll(net->nc, true);
331     }
332     vhost_dev_stop(&net->dev, dev, false);
333     if (net->nc->info->stop) {
334         net->nc->info->stop(net->nc);
335     }
336     vhost_dev_disable_notifiers(&net->dev, dev);
337 }
338 
339 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
340                     int data_queue_pairs, int cvq)
341 {
342     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
343     VirtioBusState *vbus = VIRTIO_BUS(qbus);
344     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
345     int total_notifiers = data_queue_pairs * 2 + cvq;
346     VirtIONet *n = VIRTIO_NET(dev);
347     int nvhosts = data_queue_pairs + cvq;
348     struct vhost_net *net;
349     int r, e, i, index_end = data_queue_pairs * 2;
350     NetClientState *peer;
351 
352     if (cvq) {
353         index_end += 1;
354     }
355 
356     if (!k->set_guest_notifiers) {
357         error_report("binding does not support guest notifiers");
358         return -ENOSYS;
359     }
360 
361     for (i = 0; i < nvhosts; i++) {
362 
363         if (i < data_queue_pairs) {
364             peer = qemu_get_peer(ncs, i);
365         } else { /* Control Virtqueue */
366             peer = qemu_get_peer(ncs, n->max_queue_pairs);
367         }
368 
369         net = get_vhost_net(peer);
370         vhost_net_set_vq_index(net, i * 2, index_end);
371 
372         /* Suppress the masking guest notifiers on vhost user
373          * because vhost user doesn't interrupt masking/unmasking
374          * properly.
375          */
376         if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
377             dev->use_guest_notifier_mask = false;
378         }
379      }
380 
381     r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
382     if (r < 0) {
383         error_report("Error binding guest notifier: %d", -r);
384         goto err;
385     }
386 
387     for (i = 0; i < nvhosts; i++) {
388         if (i < data_queue_pairs) {
389             peer = qemu_get_peer(ncs, i);
390         } else {
391             peer = qemu_get_peer(ncs, n->max_queue_pairs);
392         }
393 
394         if (peer->vring_enable) {
395             /* restore vring enable state */
396             r = vhost_set_vring_enable(peer, peer->vring_enable);
397 
398             if (r < 0) {
399                 goto err_start;
400             }
401         }
402 
403         r = vhost_net_start_one(get_vhost_net(peer), dev);
404         if (r < 0) {
405             goto err_start;
406         }
407     }
408 
409     return 0;
410 
411 err_start:
412     while (--i >= 0) {
413         peer = qemu_get_peer(ncs, i < data_queue_pairs ?
414                                   i : n->max_queue_pairs);
415         vhost_net_stop_one(get_vhost_net(peer), dev);
416     }
417     e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
418     if (e < 0) {
419         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
420         fflush(stderr);
421     }
422 err:
423     return r;
424 }
425 
426 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
427                     int data_queue_pairs, int cvq)
428 {
429     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
430     VirtioBusState *vbus = VIRTIO_BUS(qbus);
431     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
432     VirtIONet *n = VIRTIO_NET(dev);
433     NetClientState *peer;
434     int total_notifiers = data_queue_pairs * 2 + cvq;
435     int nvhosts = data_queue_pairs + cvq;
436     int i, r;
437 
438     for (i = 0; i < nvhosts; i++) {
439         if (i < data_queue_pairs) {
440             peer = qemu_get_peer(ncs, i);
441         } else {
442             peer = qemu_get_peer(ncs, n->max_queue_pairs);
443         }
444         vhost_net_stop_one(get_vhost_net(peer), dev);
445     }
446 
447     r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
448     if (r < 0) {
449         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
450         fflush(stderr);
451     }
452     assert(r >= 0);
453 }
454 
455 void vhost_net_cleanup(struct vhost_net *net)
456 {
457     vhost_dev_cleanup(&net->dev);
458 }
459 
460 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
461 {
462     const VhostOps *vhost_ops = net->dev.vhost_ops;
463 
464     assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
465     assert(vhost_ops->vhost_migration_done);
466 
467     return vhost_ops->vhost_migration_done(&net->dev, mac_addr);
468 }
469 
470 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
471 {
472     return vhost_virtqueue_pending(&net->dev, idx);
473 }
474 
475 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
476                               int idx, bool mask)
477 {
478     vhost_virtqueue_mask(&net->dev, dev, idx, mask);
479 }
480 
481 VHostNetState *get_vhost_net(NetClientState *nc)
482 {
483     VHostNetState *vhost_net = 0;
484 
485     if (!nc) {
486         return 0;
487     }
488 
489     switch (nc->info->type) {
490     case NET_CLIENT_DRIVER_TAP:
491         vhost_net = tap_get_vhost_net(nc);
492         break;
493 #ifdef CONFIG_VHOST_NET_USER
494     case NET_CLIENT_DRIVER_VHOST_USER:
495         vhost_net = vhost_user_get_vhost_net(nc);
496         assert(vhost_net);
497         break;
498 #endif
499 #ifdef CONFIG_VHOST_NET_VDPA
500     case NET_CLIENT_DRIVER_VHOST_VDPA:
501         vhost_net = vhost_vdpa_get_vhost_net(nc);
502         assert(vhost_net);
503         break;
504 #endif
505     default:
506         break;
507     }
508 
509     return vhost_net;
510 }
511 
512 int vhost_set_vring_enable(NetClientState *nc, int enable)
513 {
514     VHostNetState *net = get_vhost_net(nc);
515     const VhostOps *vhost_ops = net->dev.vhost_ops;
516 
517     nc->vring_enable = enable;
518 
519     if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
520         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
521     }
522 
523     return 0;
524 }
525 
526 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
527 {
528     const VhostOps *vhost_ops = net->dev.vhost_ops;
529 
530     if (!vhost_ops->vhost_net_set_mtu) {
531         return 0;
532     }
533 
534     return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
535 }
536 
537 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
538                                int vq_index)
539 {
540     VHostNetState *net = get_vhost_net(nc->peer);
541     const VhostOps *vhost_ops = net->dev.vhost_ops;
542     struct vhost_vring_file file = { .fd = -1 };
543     int idx;
544 
545     /* should only be called after backend is connected */
546     assert(vhost_ops);
547 
548     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
549 
550     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
551         file.index = idx;
552         int r = vhost_net_set_backend(&net->dev, &file);
553         assert(r >= 0);
554     }
555 
556     vhost_virtqueue_stop(&net->dev,
557                          vdev,
558                          net->dev.vqs + idx,
559                          net->dev.vq_index + idx);
560 }
561 
562 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
563                                 int vq_index)
564 {
565     VHostNetState *net = get_vhost_net(nc->peer);
566     const VhostOps *vhost_ops = net->dev.vhost_ops;
567     struct vhost_vring_file file = { };
568     int idx, r;
569 
570     if (!net->dev.started) {
571         return -EBUSY;
572     }
573 
574     /* should only be called after backend is connected */
575     assert(vhost_ops);
576 
577     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
578 
579     r = vhost_virtqueue_start(&net->dev,
580                               vdev,
581                               net->dev.vqs + idx,
582                               net->dev.vq_index + idx);
583     if (r < 0) {
584         goto err_start;
585     }
586 
587     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
588         file.index = idx;
589         file.fd = net->backend;
590         r = vhost_net_set_backend(&net->dev, &file);
591         if (r < 0) {
592             r = -errno;
593             goto err_start;
594         }
595     }
596 
597     return 0;
598 
599 err_start:
600     error_report("Error when restarting the queue.");
601 
602     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
603         file.fd = VHOST_FILE_UNBIND;
604         file.index = idx;
605         int r = vhost_net_set_backend(&net->dev, &file);
606         assert(r >= 0);
607     }
608 
609     vhost_dev_stop(&net->dev, vdev, false);
610 
611     return r;
612 }
613