xref: /qemu/hw/net/vhost_net.c (revision 03e18810)
1 /*
2  * vhost-net support
3  *
4  * Copyright Red Hat, Inc. 2010
5  *
6  * Authors:
7  *  Michael S. Tsirkin <mst@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15 
16 #include "qemu/osdep.h"
17 #include "net/net.h"
18 #include "net/tap.h"
19 #include "net/vhost-user.h"
20 
21 #include "hw/virtio/virtio-net.h"
22 #include "net/vhost_net.h"
23 #include "qemu/error-report.h"
24 
25 
26 #ifdef CONFIG_VHOST_NET
27 #include <linux/vhost.h>
28 #include <sys/socket.h>
29 #include <linux/kvm.h>
30 #include <netpacket/packet.h>
31 #include <net/ethernet.h>
32 #include <net/if.h>
33 #include <netinet/in.h>
34 
35 
36 #include "standard-headers/linux/virtio_ring.h"
37 #include "hw/virtio/vhost.h"
38 #include "hw/virtio/virtio-bus.h"
39 #include "hw/virtio/virtio-access.h"
40 
41 struct vhost_net {
42     struct vhost_dev dev;
43     struct vhost_virtqueue vqs[2];
44     int backend;
45     NetClientState *nc;
46 };
47 
48 /* Features supported by host kernel. */
49 static const int kernel_feature_bits[] = {
50     VIRTIO_F_NOTIFY_ON_EMPTY,
51     VIRTIO_RING_F_INDIRECT_DESC,
52     VIRTIO_RING_F_EVENT_IDX,
53     VIRTIO_NET_F_MRG_RXBUF,
54     VIRTIO_F_VERSION_1,
55     VHOST_INVALID_FEATURE_BIT
56 };
57 
58 /* Features supported by others. */
59 static const int user_feature_bits[] = {
60     VIRTIO_F_NOTIFY_ON_EMPTY,
61     VIRTIO_RING_F_INDIRECT_DESC,
62     VIRTIO_RING_F_EVENT_IDX,
63 
64     VIRTIO_F_ANY_LAYOUT,
65     VIRTIO_F_VERSION_1,
66     VIRTIO_NET_F_CSUM,
67     VIRTIO_NET_F_GUEST_CSUM,
68     VIRTIO_NET_F_GSO,
69     VIRTIO_NET_F_GUEST_TSO4,
70     VIRTIO_NET_F_GUEST_TSO6,
71     VIRTIO_NET_F_GUEST_ECN,
72     VIRTIO_NET_F_GUEST_UFO,
73     VIRTIO_NET_F_HOST_TSO4,
74     VIRTIO_NET_F_HOST_TSO6,
75     VIRTIO_NET_F_HOST_ECN,
76     VIRTIO_NET_F_HOST_UFO,
77     VIRTIO_NET_F_MRG_RXBUF,
78 
79     /* This bit implies RARP isn't sent by QEMU out of band */
80     VIRTIO_NET_F_GUEST_ANNOUNCE,
81 
82     VIRTIO_NET_F_MQ,
83 
84     VHOST_INVALID_FEATURE_BIT
85 };
86 
87 static const int *vhost_net_get_feature_bits(struct vhost_net *net)
88 {
89     const int *feature_bits = 0;
90 
91     switch (net->nc->info->type) {
92     case NET_CLIENT_OPTIONS_KIND_TAP:
93         feature_bits = kernel_feature_bits;
94         break;
95     case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
96         feature_bits = user_feature_bits;
97         break;
98     default:
99         error_report("Feature bits not defined for this type: %d",
100                 net->nc->info->type);
101         break;
102     }
103 
104     return feature_bits;
105 }
106 
107 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
108 {
109     return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
110             features);
111 }
112 
113 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
114 {
115     net->dev.acked_features = net->dev.backend_features;
116     vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
117 }
118 
119 uint64_t vhost_net_get_max_queues(VHostNetState *net)
120 {
121     return net->dev.max_queues;
122 }
123 
124 static int vhost_net_get_fd(NetClientState *backend)
125 {
126     switch (backend->info->type) {
127     case NET_CLIENT_OPTIONS_KIND_TAP:
128         return tap_get_fd(backend);
129     default:
130         fprintf(stderr, "vhost-net requires tap backend\n");
131         return -EBADFD;
132     }
133 }
134 
135 struct vhost_net *vhost_net_init(VhostNetOptions *options)
136 {
137     int r;
138     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
139     struct vhost_net *net = g_malloc(sizeof *net);
140 
141     if (!options->net_backend) {
142         fprintf(stderr, "vhost-net requires net backend to be setup\n");
143         goto fail;
144     }
145     net->nc = options->net_backend;
146 
147     net->dev.max_queues = 1;
148     net->dev.nvqs = 2;
149     net->dev.vqs = net->vqs;
150 
151     if (backend_kernel) {
152         r = vhost_net_get_fd(options->net_backend);
153         if (r < 0) {
154             goto fail;
155         }
156         net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
157             ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
158         net->backend = r;
159         net->dev.protocol_features = 0;
160     } else {
161         net->dev.backend_features = 0;
162         net->dev.protocol_features = 0;
163         net->backend = -1;
164 
165         /* vhost-user needs vq_index to initiate a specific queue pair */
166         net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
167     }
168 
169     r = vhost_dev_init(&net->dev, options->opaque,
170                        options->backend_type);
171     if (r < 0) {
172         goto fail;
173     }
174     if (backend_kernel) {
175         if (!qemu_has_vnet_hdr_len(options->net_backend,
176                                sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
177             net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
178         }
179         if (~net->dev.features & net->dev.backend_features) {
180             fprintf(stderr, "vhost lacks feature mask %" PRIu64
181                    " for backend\n",
182                    (uint64_t)(~net->dev.features & net->dev.backend_features));
183             vhost_dev_cleanup(&net->dev);
184             goto fail;
185         }
186     }
187     /* Set sane init value. Override when guest acks. */
188     vhost_net_ack_features(net, 0);
189     return net;
190 fail:
191     g_free(net);
192     return NULL;
193 }
194 
195 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index)
196 {
197     net->dev.vq_index = vq_index;
198 }
199 
200 static int vhost_net_set_vnet_endian(VirtIODevice *dev, NetClientState *peer,
201                                      bool set)
202 {
203     int r = 0;
204 
205     if (virtio_vdev_has_feature(dev, VIRTIO_F_VERSION_1) ||
206         (virtio_legacy_is_cross_endian(dev) && !virtio_is_big_endian(dev))) {
207         r = qemu_set_vnet_le(peer, set);
208         if (r) {
209             error_report("backend does not support LE vnet headers");
210         }
211     } else if (virtio_legacy_is_cross_endian(dev)) {
212         r = qemu_set_vnet_be(peer, set);
213         if (r) {
214             error_report("backend does not support BE vnet headers");
215         }
216     }
217 
218     return r;
219 }
220 
221 static int vhost_net_start_one(struct vhost_net *net,
222                                VirtIODevice *dev)
223 {
224     struct vhost_vring_file file = { };
225     int r;
226 
227     net->dev.nvqs = 2;
228     net->dev.vqs = net->vqs;
229 
230     r = vhost_dev_enable_notifiers(&net->dev, dev);
231     if (r < 0) {
232         goto fail_notifiers;
233     }
234 
235     r = vhost_dev_start(&net->dev, dev);
236     if (r < 0) {
237         goto fail_start;
238     }
239 
240     if (net->nc->info->poll) {
241         net->nc->info->poll(net->nc, false);
242     }
243 
244     if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
245         qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
246         file.fd = net->backend;
247         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
248             const VhostOps *vhost_ops = net->dev.vhost_ops;
249             r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
250             if (r < 0) {
251                 r = -errno;
252                 goto fail;
253             }
254         }
255     }
256     return 0;
257 fail:
258     file.fd = -1;
259     if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
260         while (file.index-- > 0) {
261             const VhostOps *vhost_ops = net->dev.vhost_ops;
262             int r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
263             assert(r >= 0);
264         }
265     }
266     if (net->nc->info->poll) {
267         net->nc->info->poll(net->nc, true);
268     }
269     vhost_dev_stop(&net->dev, dev);
270 fail_start:
271     vhost_dev_disable_notifiers(&net->dev, dev);
272 fail_notifiers:
273     return r;
274 }
275 
276 static void vhost_net_stop_one(struct vhost_net *net,
277                                VirtIODevice *dev)
278 {
279     struct vhost_vring_file file = { .fd = -1 };
280 
281     if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
282         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
283             const VhostOps *vhost_ops = net->dev.vhost_ops;
284             int r = vhost_ops->vhost_net_set_backend(&net->dev, &file);
285             assert(r >= 0);
286         }
287     }
288     if (net->nc->info->poll) {
289         net->nc->info->poll(net->nc, true);
290     }
291     vhost_dev_stop(&net->dev, dev);
292     vhost_dev_disable_notifiers(&net->dev, dev);
293 }
294 
295 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
296                     int total_queues)
297 {
298     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
299     VirtioBusState *vbus = VIRTIO_BUS(qbus);
300     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
301     int r, e, i, j;
302 
303     if (!k->set_guest_notifiers) {
304         error_report("binding does not support guest notifiers");
305         return -ENOSYS;
306     }
307 
308     for (j = 0; j < total_queues; j++) {
309         r = vhost_net_set_vnet_endian(dev, ncs[j].peer, true);
310         if (r < 0) {
311             goto err_endian;
312         }
313         vhost_net_set_vq_index(get_vhost_net(ncs[j].peer), j * 2);
314     }
315 
316     r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
317     if (r < 0) {
318         error_report("Error binding guest notifier: %d", -r);
319         goto err_endian;
320     }
321 
322     for (i = 0; i < total_queues; i++) {
323         r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev);
324 
325         if (r < 0) {
326             goto err_start;
327         }
328     }
329 
330     return 0;
331 
332 err_start:
333     while (--i >= 0) {
334         vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
335     }
336     e = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
337     if (e < 0) {
338         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
339         fflush(stderr);
340     }
341 err_endian:
342     while (--j >= 0) {
343         vhost_net_set_vnet_endian(dev, ncs[j].peer, false);
344     }
345     return r;
346 }
347 
348 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
349                     int total_queues)
350 {
351     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
352     VirtioBusState *vbus = VIRTIO_BUS(qbus);
353     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
354     int i, r;
355 
356     for (i = 0; i < total_queues; i++) {
357         vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
358     }
359 
360     r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
361     if (r < 0) {
362         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
363         fflush(stderr);
364     }
365     assert(r >= 0);
366 
367     assert(vhost_net_set_vnet_endian(dev, ncs[0].peer, false) >= 0);
368 }
369 
370 void vhost_net_cleanup(struct vhost_net *net)
371 {
372     vhost_dev_cleanup(&net->dev);
373     g_free(net);
374 }
375 
376 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
377 {
378     const VhostOps *vhost_ops = net->dev.vhost_ops;
379     int r = -1;
380 
381     if (vhost_ops->vhost_migration_done) {
382         r = vhost_ops->vhost_migration_done(&net->dev, mac_addr);
383     }
384 
385     return r;
386 }
387 
388 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
389 {
390     return vhost_virtqueue_pending(&net->dev, idx);
391 }
392 
393 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
394                               int idx, bool mask)
395 {
396     vhost_virtqueue_mask(&net->dev, dev, idx, mask);
397 }
398 
399 VHostNetState *get_vhost_net(NetClientState *nc)
400 {
401     VHostNetState *vhost_net = 0;
402 
403     if (!nc) {
404         return 0;
405     }
406 
407     switch (nc->info->type) {
408     case NET_CLIENT_OPTIONS_KIND_TAP:
409         vhost_net = tap_get_vhost_net(nc);
410         break;
411     case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
412         vhost_net = vhost_user_get_vhost_net(nc);
413         break;
414     default:
415         break;
416     }
417 
418     return vhost_net;
419 }
420 
421 int vhost_set_vring_enable(NetClientState *nc, int enable)
422 {
423     VHostNetState *net = get_vhost_net(nc);
424     const VhostOps *vhost_ops = net->dev.vhost_ops;
425 
426     if (vhost_ops->vhost_set_vring_enable) {
427         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
428     }
429 
430     return 0;
431 }
432 
433 #else
434 uint64_t vhost_net_get_max_queues(VHostNetState *net)
435 {
436     return 1;
437 }
438 
439 struct vhost_net *vhost_net_init(VhostNetOptions *options)
440 {
441     error_report("vhost-net support is not compiled in");
442     return NULL;
443 }
444 
445 int vhost_net_start(VirtIODevice *dev,
446                     NetClientState *ncs,
447                     int total_queues)
448 {
449     return -ENOSYS;
450 }
451 void vhost_net_stop(VirtIODevice *dev,
452                     NetClientState *ncs,
453                     int total_queues)
454 {
455 }
456 
457 void vhost_net_cleanup(struct vhost_net *net)
458 {
459 }
460 
461 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
462 {
463     return features;
464 }
465 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
466 {
467 }
468 
469 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
470 {
471     return false;
472 }
473 
474 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
475                               int idx, bool mask)
476 {
477 }
478 
479 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
480 {
481     return -1;
482 }
483 
484 VHostNetState *get_vhost_net(NetClientState *nc)
485 {
486     return 0;
487 }
488 
489 int vhost_set_vring_enable(NetClientState *nc, int enable)
490 {
491     return 0;
492 }
493 #endif
494