xref: /qemu/hw/net/vhost_net.c (revision 6170d09c)
1 /*
2  * vhost-net support
3  *
4  * Copyright Red Hat, Inc. 2010
5  *
6  * Authors:
7  *  Michael S. Tsirkin <mst@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "net/net.h"
18 #include "net/tap.h"
19 #include "net/vhost-user.h"
20 #include "net/vhost-vdpa.h"
21 
22 #include "standard-headers/linux/vhost_types.h"
23 #include "hw/virtio/virtio-net.h"
24 #include "net/vhost_net.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "qemu/main-loop.h"
28 
29 #include <sys/socket.h>
30 #include <net/if.h>
31 #include <netinet/in.h>
32 
33 
34 #include "standard-headers/linux/virtio_ring.h"
35 #include "hw/virtio/vhost.h"
36 #include "hw/virtio/virtio-bus.h"
37 #include "linux-headers/linux/vhost.h"
38 
39 
40 /* Features supported by host kernel. */
41 static const int kernel_feature_bits[] = {
42     VIRTIO_F_NOTIFY_ON_EMPTY,
43     VIRTIO_RING_F_INDIRECT_DESC,
44     VIRTIO_RING_F_EVENT_IDX,
45     VIRTIO_NET_F_MRG_RXBUF,
46     VIRTIO_F_VERSION_1,
47     VIRTIO_NET_F_MTU,
48     VIRTIO_F_IOMMU_PLATFORM,
49     VIRTIO_F_RING_PACKED,
50     VIRTIO_F_RING_RESET,
51     VIRTIO_NET_F_HASH_REPORT,
52     VHOST_INVALID_FEATURE_BIT
53 };
54 
55 /* Features supported by others. */
56 static const int user_feature_bits[] = {
57     VIRTIO_F_NOTIFY_ON_EMPTY,
58     VIRTIO_RING_F_INDIRECT_DESC,
59     VIRTIO_RING_F_EVENT_IDX,
60 
61     VIRTIO_F_ANY_LAYOUT,
62     VIRTIO_F_VERSION_1,
63     VIRTIO_NET_F_CSUM,
64     VIRTIO_NET_F_GUEST_CSUM,
65     VIRTIO_NET_F_GSO,
66     VIRTIO_NET_F_GUEST_TSO4,
67     VIRTIO_NET_F_GUEST_TSO6,
68     VIRTIO_NET_F_GUEST_ECN,
69     VIRTIO_NET_F_GUEST_UFO,
70     VIRTIO_NET_F_HOST_TSO4,
71     VIRTIO_NET_F_HOST_TSO6,
72     VIRTIO_NET_F_HOST_ECN,
73     VIRTIO_NET_F_HOST_UFO,
74     VIRTIO_NET_F_MRG_RXBUF,
75     VIRTIO_NET_F_MTU,
76     VIRTIO_F_IOMMU_PLATFORM,
77     VIRTIO_F_RING_PACKED,
78     VIRTIO_F_RING_RESET,
79     VIRTIO_NET_F_RSS,
80     VIRTIO_NET_F_HASH_REPORT,
81 
82     /* This bit implies RARP isn't sent by QEMU out of band */
83     VIRTIO_NET_F_GUEST_ANNOUNCE,
84 
85     VIRTIO_NET_F_MQ,
86 
87     VHOST_INVALID_FEATURE_BIT
88 };
89 
90 static const int *vhost_net_get_feature_bits(struct vhost_net *net)
91 {
92     const int *feature_bits = 0;
93 
94     switch (net->nc->info->type) {
95     case NET_CLIENT_DRIVER_TAP:
96         feature_bits = kernel_feature_bits;
97         break;
98     case NET_CLIENT_DRIVER_VHOST_USER:
99         feature_bits = user_feature_bits;
100         break;
101 #ifdef CONFIG_VHOST_NET_VDPA
102     case NET_CLIENT_DRIVER_VHOST_VDPA:
103         feature_bits = vdpa_feature_bits;
104         break;
105 #endif
106     default:
107         error_report("Feature bits not defined for this type: %d",
108                 net->nc->info->type);
109         break;
110     }
111 
112     return feature_bits;
113 }
114 
115 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
116 {
117     return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
118             features);
119 }
120 int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
121                          uint32_t config_len)
122 {
123     return vhost_dev_get_config(&net->dev, config, config_len, NULL);
124 }
125 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
126                          uint32_t offset, uint32_t size, uint32_t flags)
127 {
128     return vhost_dev_set_config(&net->dev, data, offset, size, flags);
129 }
130 
131 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
132 {
133     net->dev.acked_features = net->dev.backend_features;
134     vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
135 }
136 
137 uint64_t vhost_net_get_max_queues(VHostNetState *net)
138 {
139     return net->dev.max_queues;
140 }
141 
142 uint64_t vhost_net_get_acked_features(VHostNetState *net)
143 {
144     return net->dev.acked_features;
145 }
146 
147 void vhost_net_save_acked_features(NetClientState *nc)
148 {
149 #ifdef CONFIG_VHOST_NET_USER
150     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
151         vhost_user_save_acked_features(nc);
152     }
153 #endif
154 }
155 
156 static int vhost_net_get_fd(NetClientState *backend)
157 {
158     switch (backend->info->type) {
159     case NET_CLIENT_DRIVER_TAP:
160         return tap_get_fd(backend);
161     default:
162         fprintf(stderr, "vhost-net requires tap backend\n");
163         return -ENOSYS;
164     }
165 }
166 
167 struct vhost_net *vhost_net_init(VhostNetOptions *options)
168 {
169     int r;
170     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
171     struct vhost_net *net = g_new0(struct vhost_net, 1);
172     uint64_t features = 0;
173     Error *local_err = NULL;
174 
175     if (!options->net_backend) {
176         fprintf(stderr, "vhost-net requires net backend to be setup\n");
177         goto fail;
178     }
179     net->nc = options->net_backend;
180     net->dev.nvqs = options->nvqs;
181 
182     net->dev.max_queues = 1;
183     net->dev.vqs = net->vqs;
184 
185     if (backend_kernel) {
186         r = vhost_net_get_fd(options->net_backend);
187         if (r < 0) {
188             goto fail;
189         }
190         net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
191             ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
192         net->backend = r;
193         net->dev.protocol_features = 0;
194     } else {
195         net->dev.backend_features = 0;
196         net->dev.protocol_features = 0;
197         net->backend = -1;
198 
199         /* vhost-user needs vq_index to initiate a specific queue pair */
200         net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
201     }
202 
203     r = vhost_dev_init(&net->dev, options->opaque,
204                        options->backend_type, options->busyloop_timeout,
205                        &local_err);
206     if (r < 0) {
207         error_report_err(local_err);
208         goto fail;
209     }
210     if (backend_kernel) {
211         if (!qemu_has_vnet_hdr_len(options->net_backend,
212                                sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
213             net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
214         }
215         if (~net->dev.features & net->dev.backend_features) {
216             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
217                    " for backend\n",
218                    (uint64_t)(~net->dev.features & net->dev.backend_features));
219             goto fail;
220         }
221     }
222 
223     /* Set sane init value. Override when guest acks. */
224 #ifdef CONFIG_VHOST_NET_USER
225     if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
226         features = vhost_user_get_acked_features(net->nc);
227         if (~net->dev.features & features) {
228             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
229                     " for backend\n",
230                     (uint64_t)(~net->dev.features & features));
231             goto fail;
232         }
233     }
234 #endif
235 
236     vhost_net_ack_features(net, features);
237 
238     return net;
239 
240 fail:
241     vhost_dev_cleanup(&net->dev);
242     g_free(net);
243     return NULL;
244 }
245 
246 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
247                                    int vq_index_end)
248 {
249     net->dev.vq_index = vq_index;
250     net->dev.vq_index_end = vq_index_end;
251 }
252 
253 static int vhost_net_start_one(struct vhost_net *net,
254                                VirtIODevice *dev)
255 {
256     struct vhost_vring_file file = { };
257     int r;
258 
259     if (net->nc->info->start) {
260         r = net->nc->info->start(net->nc);
261         if (r < 0) {
262             return r;
263         }
264     }
265 
266     r = vhost_dev_enable_notifiers(&net->dev, dev);
267     if (r < 0) {
268         goto fail_notifiers;
269     }
270 
271     r = vhost_dev_start(&net->dev, dev, false);
272     if (r < 0) {
273         goto fail_start;
274     }
275 
276     if (net->nc->info->poll) {
277         net->nc->info->poll(net->nc, false);
278     }
279 
280     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
281         qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
282         file.fd = net->backend;
283         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
284             if (!virtio_queue_enabled(dev, net->dev.vq_index +
285                                       file.index)) {
286                 /* Queue might not be ready for start */
287                 continue;
288             }
289             r = vhost_net_set_backend(&net->dev, &file);
290             if (r < 0) {
291                 r = -errno;
292                 goto fail;
293             }
294         }
295     }
296 
297     if (net->nc->info->load) {
298         r = net->nc->info->load(net->nc);
299         if (r < 0) {
300             goto fail;
301         }
302     }
303     return 0;
304 fail:
305     file.fd = -1;
306     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
307         while (file.index-- > 0) {
308             if (!virtio_queue_enabled(dev, net->dev.vq_index +
309                                       file.index)) {
310                 /* Queue might not be ready for start */
311                 continue;
312             }
313             int r = vhost_net_set_backend(&net->dev, &file);
314             assert(r >= 0);
315         }
316     }
317     if (net->nc->info->poll) {
318         net->nc->info->poll(net->nc, true);
319     }
320     vhost_dev_stop(&net->dev, dev, false);
321 fail_start:
322     vhost_dev_disable_notifiers(&net->dev, dev);
323 fail_notifiers:
324     return r;
325 }
326 
327 static void vhost_net_stop_one(struct vhost_net *net,
328                                VirtIODevice *dev)
329 {
330     struct vhost_vring_file file = { .fd = -1 };
331 
332     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
333         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
334             int r = vhost_net_set_backend(&net->dev, &file);
335             assert(r >= 0);
336         }
337     }
338     if (net->nc->info->poll) {
339         net->nc->info->poll(net->nc, true);
340     }
341     vhost_dev_stop(&net->dev, dev, false);
342     if (net->nc->info->stop) {
343         net->nc->info->stop(net->nc);
344     }
345     vhost_dev_disable_notifiers(&net->dev, dev);
346 }
347 
348 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
349                     int data_queue_pairs, int cvq)
350 {
351     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
352     VirtioBusState *vbus = VIRTIO_BUS(qbus);
353     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
354     int total_notifiers = data_queue_pairs * 2 + cvq;
355     VirtIONet *n = VIRTIO_NET(dev);
356     int nvhosts = data_queue_pairs + cvq;
357     struct vhost_net *net;
358     int r, e, i, index_end = data_queue_pairs * 2;
359     NetClientState *peer;
360 
361     if (cvq) {
362         index_end += 1;
363     }
364 
365     if (!k->set_guest_notifiers) {
366         error_report("binding does not support guest notifiers");
367         return -ENOSYS;
368     }
369 
370     for (i = 0; i < nvhosts; i++) {
371 
372         if (i < data_queue_pairs) {
373             peer = qemu_get_peer(ncs, i);
374         } else { /* Control Virtqueue */
375             peer = qemu_get_peer(ncs, n->max_queue_pairs);
376         }
377 
378         net = get_vhost_net(peer);
379         vhost_net_set_vq_index(net, i * 2, index_end);
380 
381         /* Suppress the masking guest notifiers on vhost user
382          * because vhost user doesn't interrupt masking/unmasking
383          * properly.
384          */
385         if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
386             dev->use_guest_notifier_mask = false;
387         }
388      }
389 
390     r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
391     if (r < 0) {
392         error_report("Error binding guest notifier: %d", -r);
393         goto err;
394     }
395 
396     for (i = 0; i < nvhosts; i++) {
397         if (i < data_queue_pairs) {
398             peer = qemu_get_peer(ncs, i);
399         } else {
400             peer = qemu_get_peer(ncs, n->max_queue_pairs);
401         }
402 
403         if (peer->vring_enable) {
404             /* restore vring enable state */
405             r = vhost_set_vring_enable(peer, peer->vring_enable);
406 
407             if (r < 0) {
408                 goto err_start;
409             }
410         }
411 
412         r = vhost_net_start_one(get_vhost_net(peer), dev);
413         if (r < 0) {
414             goto err_start;
415         }
416     }
417 
418     return 0;
419 
420 err_start:
421     while (--i >= 0) {
422         peer = qemu_get_peer(ncs, i < data_queue_pairs ?
423                                   i : n->max_queue_pairs);
424         vhost_net_stop_one(get_vhost_net(peer), dev);
425     }
426     e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
427     if (e < 0) {
428         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
429         fflush(stderr);
430     }
431 err:
432     return r;
433 }
434 
435 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
436                     int data_queue_pairs, int cvq)
437 {
438     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
439     VirtioBusState *vbus = VIRTIO_BUS(qbus);
440     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
441     VirtIONet *n = VIRTIO_NET(dev);
442     NetClientState *peer;
443     int total_notifiers = data_queue_pairs * 2 + cvq;
444     int nvhosts = data_queue_pairs + cvq;
445     int i, r;
446 
447     for (i = 0; i < nvhosts; i++) {
448         if (i < data_queue_pairs) {
449             peer = qemu_get_peer(ncs, i);
450         } else {
451             peer = qemu_get_peer(ncs, n->max_queue_pairs);
452         }
453         vhost_net_stop_one(get_vhost_net(peer), dev);
454     }
455 
456     r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
457     if (r < 0) {
458         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
459         fflush(stderr);
460     }
461     assert(r >= 0);
462 }
463 
464 void vhost_net_cleanup(struct vhost_net *net)
465 {
466     vhost_dev_cleanup(&net->dev);
467 }
468 
469 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
470 {
471     const VhostOps *vhost_ops = net->dev.vhost_ops;
472 
473     assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
474     assert(vhost_ops->vhost_migration_done);
475 
476     return vhost_ops->vhost_migration_done(&net->dev, mac_addr);
477 }
478 
479 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
480 {
481     return vhost_virtqueue_pending(&net->dev, idx);
482 }
483 
484 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
485                               int idx, bool mask)
486 {
487     vhost_virtqueue_mask(&net->dev, dev, idx, mask);
488 }
489 
490 bool vhost_net_config_pending(VHostNetState *net)
491 {
492     return vhost_config_pending(&net->dev);
493 }
494 
495 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
496 {
497     vhost_config_mask(&net->dev, dev, mask);
498 }
499 VHostNetState *get_vhost_net(NetClientState *nc)
500 {
501     VHostNetState *vhost_net = 0;
502 
503     if (!nc) {
504         return 0;
505     }
506 
507     switch (nc->info->type) {
508     case NET_CLIENT_DRIVER_TAP:
509         vhost_net = tap_get_vhost_net(nc);
510         /*
511          * tap_get_vhost_net() can return NULL if a tap net-device backend is
512          * created with 'vhost=off' option, 'vhostforce=off' or no vhost or
513          * vhostforce or vhostfd options at all. Please see net_init_tap_one().
514          * Hence, we omit the assertion here.
515          */
516         break;
517 #ifdef CONFIG_VHOST_NET_USER
518     case NET_CLIENT_DRIVER_VHOST_USER:
519         vhost_net = vhost_user_get_vhost_net(nc);
520         assert(vhost_net);
521         break;
522 #endif
523 #ifdef CONFIG_VHOST_NET_VDPA
524     case NET_CLIENT_DRIVER_VHOST_VDPA:
525         vhost_net = vhost_vdpa_get_vhost_net(nc);
526         assert(vhost_net);
527         break;
528 #endif
529     default:
530         break;
531     }
532 
533     return vhost_net;
534 }
535 
536 int vhost_set_vring_enable(NetClientState *nc, int enable)
537 {
538     VHostNetState *net = get_vhost_net(nc);
539     const VhostOps *vhost_ops = net->dev.vhost_ops;
540 
541     nc->vring_enable = enable;
542 
543     if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
544         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
545     }
546 
547     return 0;
548 }
549 
550 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
551 {
552     const VhostOps *vhost_ops = net->dev.vhost_ops;
553 
554     if (!vhost_ops->vhost_net_set_mtu) {
555         return 0;
556     }
557 
558     return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
559 }
560 
561 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
562                                int vq_index)
563 {
564     VHostNetState *net = get_vhost_net(nc->peer);
565     const VhostOps *vhost_ops = net->dev.vhost_ops;
566     struct vhost_vring_file file = { .fd = -1 };
567     int idx;
568 
569     /* should only be called after backend is connected */
570     assert(vhost_ops);
571 
572     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
573 
574     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
575         file.index = idx;
576         int r = vhost_net_set_backend(&net->dev, &file);
577         assert(r >= 0);
578     }
579 
580     vhost_virtqueue_stop(&net->dev,
581                          vdev,
582                          net->dev.vqs + idx,
583                          net->dev.vq_index + idx);
584 }
585 
586 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
587                                 int vq_index)
588 {
589     VHostNetState *net = get_vhost_net(nc->peer);
590     const VhostOps *vhost_ops = net->dev.vhost_ops;
591     struct vhost_vring_file file = { };
592     int idx, r;
593 
594     if (!net->dev.started) {
595         return -EBUSY;
596     }
597 
598     /* should only be called after backend is connected */
599     assert(vhost_ops);
600 
601     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
602 
603     r = vhost_virtqueue_start(&net->dev,
604                               vdev,
605                               net->dev.vqs + idx,
606                               net->dev.vq_index + idx);
607     if (r < 0) {
608         goto err_start;
609     }
610 
611     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
612         file.index = idx;
613         file.fd = net->backend;
614         r = vhost_net_set_backend(&net->dev, &file);
615         if (r < 0) {
616             r = -errno;
617             goto err_start;
618         }
619     }
620 
621     return 0;
622 
623 err_start:
624     error_report("Error when restarting the queue.");
625 
626     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
627         file.fd = VHOST_FILE_UNBIND;
628         file.index = idx;
629         int r = vhost_net_set_backend(&net->dev, &file);
630         assert(r >= 0);
631     }
632 
633     vhost_dev_stop(&net->dev, vdev, false);
634 
635     return r;
636 }
637