xref: /qemu/hw/net/virtio-net.c (revision b64b7ed8)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci_device.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
49 
50 #define VIRTIO_NET_VM_VERSION    11
51 
52 /* previously fixed value */
53 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
54 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
55 
56 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
57 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
58 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
59 
60 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
61 
62 #define VIRTIO_NET_TCP_FLAG         0x3F
63 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
64 
65 /* IPv4 max payload, 16 bits in the header */
66 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
67 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
68 
69 /* header length value in ip header without option */
70 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
71 
72 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
73 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
74 
75 /* Purge coalesced packets timer interval, This value affects the performance
76    a lot, and should be tuned carefully, '300000'(300us) is the recommended
77    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
78    tso/gso/gro 'off'. */
79 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
80 
81 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
82                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
83                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
84                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
90 
91 static const VirtIOFeature feature_sizes[] = {
92     {.flags = 1ULL << VIRTIO_NET_F_MAC,
93      .end = endof(struct virtio_net_config, mac)},
94     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
95      .end = endof(struct virtio_net_config, status)},
96     {.flags = 1ULL << VIRTIO_NET_F_MQ,
97      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
98     {.flags = 1ULL << VIRTIO_NET_F_MTU,
99      .end = endof(struct virtio_net_config, mtu)},
100     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
101      .end = endof(struct virtio_net_config, duplex)},
102     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
103      .end = endof(struct virtio_net_config, supported_hash_types)},
104     {}
105 };
106 
107 static const VirtIOConfigSizeParams cfg_size_params = {
108     .min_size = endof(struct virtio_net_config, mac),
109     .max_size = sizeof(struct virtio_net_config),
110     .feature_sizes = feature_sizes
111 };
112 
113 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
114 {
115     VirtIONet *n = qemu_get_nic_opaque(nc);
116 
117     return &n->vqs[nc->queue_index];
118 }
119 
120 static int vq2q(int queue_index)
121 {
122     return queue_index / 2;
123 }
124 
125 static void flush_or_purge_queued_packets(NetClientState *nc)
126 {
127     if (!nc->peer) {
128         return;
129     }
130 
131     qemu_flush_or_purge_queued_packets(nc->peer, true);
132     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
133 }
134 
135 /* TODO
136  * - we could suppress RX interrupt if we were so inclined.
137  */
138 
139 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
140 {
141     VirtIONet *n = VIRTIO_NET(vdev);
142     struct virtio_net_config netcfg;
143     NetClientState *nc = qemu_get_queue(n->nic);
144     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
145 
146     int ret = 0;
147     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
148     virtio_stw_p(vdev, &netcfg.status, n->status);
149     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
150     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
151     memcpy(netcfg.mac, n->mac, ETH_ALEN);
152     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
153     netcfg.duplex = n->net_conf.duplex;
154     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
155     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
156                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
157                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
158     virtio_stl_p(vdev, &netcfg.supported_hash_types,
159                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
160     memcpy(config, &netcfg, n->config_size);
161 
162     /*
163      * Is this VDPA? No peer means not VDPA: there's no way to
164      * disconnect/reconnect a VDPA peer.
165      */
166     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
167         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
168                                    n->config_size);
169         if (ret == -1) {
170             return;
171         }
172 
173         /*
174          * Some NIC/kernel combinations present 0 as the mac address.  As that
175          * is not a legal address, try to proceed with the address from the
176          * QEMU command line in the hope that the address has been configured
177          * correctly elsewhere - just not reported by the device.
178          */
179         if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
180             info_report("Zero hardware mac address detected. Ignoring.");
181             memcpy(netcfg.mac, n->mac, ETH_ALEN);
182         }
183 
184         netcfg.status |= virtio_tswap16(vdev,
185                                         n->status & VIRTIO_NET_S_ANNOUNCE);
186         memcpy(config, &netcfg, n->config_size);
187     }
188 }
189 
190 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
191 {
192     VirtIONet *n = VIRTIO_NET(vdev);
193     struct virtio_net_config netcfg = {};
194     NetClientState *nc = qemu_get_queue(n->nic);
195 
196     memcpy(&netcfg, config, n->config_size);
197 
198     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
199         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
200         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
201         memcpy(n->mac, netcfg.mac, ETH_ALEN);
202         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
203     }
204 
205     /*
206      * Is this VDPA? No peer means not VDPA: there's no way to
207      * disconnect/reconnect a VDPA peer.
208      */
209     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
210         vhost_net_set_config(get_vhost_net(nc->peer),
211                              (uint8_t *)&netcfg, 0, n->config_size,
212                              VHOST_SET_CONFIG_TYPE_FRONTEND);
213       }
214 }
215 
216 static bool virtio_net_started(VirtIONet *n, uint8_t status)
217 {
218     VirtIODevice *vdev = VIRTIO_DEVICE(n);
219     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
220         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
221 }
222 
223 static void virtio_net_announce_notify(VirtIONet *net)
224 {
225     VirtIODevice *vdev = VIRTIO_DEVICE(net);
226     trace_virtio_net_announce_notify();
227 
228     net->status |= VIRTIO_NET_S_ANNOUNCE;
229     virtio_notify_config(vdev);
230 }
231 
232 static void virtio_net_announce_timer(void *opaque)
233 {
234     VirtIONet *n = opaque;
235     trace_virtio_net_announce_timer(n->announce_timer.round);
236 
237     n->announce_timer.round--;
238     virtio_net_announce_notify(n);
239 }
240 
241 static void virtio_net_announce(NetClientState *nc)
242 {
243     VirtIONet *n = qemu_get_nic_opaque(nc);
244     VirtIODevice *vdev = VIRTIO_DEVICE(n);
245 
246     /*
247      * Make sure the virtio migration announcement timer isn't running
248      * If it is, let it trigger announcement so that we do not cause
249      * confusion.
250      */
251     if (n->announce_timer.round) {
252         return;
253     }
254 
255     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
256         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
257             virtio_net_announce_notify(n);
258     }
259 }
260 
261 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
262 {
263     VirtIODevice *vdev = VIRTIO_DEVICE(n);
264     NetClientState *nc = qemu_get_queue(n->nic);
265     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
266     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
267               n->max_ncs - n->max_queue_pairs : 0;
268 
269     if (!get_vhost_net(nc->peer)) {
270         return;
271     }
272 
273     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
274         !!n->vhost_started) {
275         return;
276     }
277     if (!n->vhost_started) {
278         int r, i;
279 
280         if (n->needs_vnet_hdr_swap) {
281             error_report("backend does not support %s vnet headers; "
282                          "falling back on userspace virtio",
283                          virtio_is_big_endian(vdev) ? "BE" : "LE");
284             return;
285         }
286 
287         /* Any packets outstanding? Purge them to avoid touching rings
288          * when vhost is running.
289          */
290         for (i = 0;  i < queue_pairs; i++) {
291             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
292 
293             /* Purge both directions: TX and RX. */
294             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
295             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
296         }
297 
298         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
299             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
300             if (r < 0) {
301                 error_report("%uBytes MTU not supported by the backend",
302                              n->net_conf.mtu);
303 
304                 return;
305             }
306         }
307 
308         n->vhost_started = 1;
309         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
310         if (r < 0) {
311             error_report("unable to start vhost net: %d: "
312                          "falling back on userspace virtio", -r);
313             n->vhost_started = 0;
314         }
315     } else {
316         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
317         n->vhost_started = 0;
318     }
319 }
320 
321 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
322                                           NetClientState *peer,
323                                           bool enable)
324 {
325     if (virtio_is_big_endian(vdev)) {
326         return qemu_set_vnet_be(peer, enable);
327     } else {
328         return qemu_set_vnet_le(peer, enable);
329     }
330 }
331 
332 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
333                                        int queue_pairs, bool enable)
334 {
335     int i;
336 
337     for (i = 0; i < queue_pairs; i++) {
338         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
339             enable) {
340             while (--i >= 0) {
341                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
342             }
343 
344             return true;
345         }
346     }
347 
348     return false;
349 }
350 
351 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
352 {
353     VirtIODevice *vdev = VIRTIO_DEVICE(n);
354     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
355 
356     if (virtio_net_started(n, status)) {
357         /* Before using the device, we tell the network backend about the
358          * endianness to use when parsing vnet headers. If the backend
359          * can't do it, we fallback onto fixing the headers in the core
360          * virtio-net code.
361          */
362         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
363                                                             queue_pairs, true);
364     } else if (virtio_net_started(n, vdev->status)) {
365         /* After using the device, we need to reset the network backend to
366          * the default (guest native endianness), otherwise the guest may
367          * lose network connectivity if it is rebooted into a different
368          * endianness.
369          */
370         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
371     }
372 }
373 
374 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
375 {
376     unsigned int dropped = virtqueue_drop_all(vq);
377     if (dropped) {
378         virtio_notify(vdev, vq);
379     }
380 }
381 
382 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
383 {
384     VirtIONet *n = VIRTIO_NET(vdev);
385     VirtIONetQueue *q;
386     int i;
387     uint8_t queue_status;
388 
389     virtio_net_vnet_endian_status(n, status);
390     virtio_net_vhost_status(n, status);
391 
392     for (i = 0; i < n->max_queue_pairs; i++) {
393         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
394         bool queue_started;
395         q = &n->vqs[i];
396 
397         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
398             queue_status = 0;
399         } else {
400             queue_status = status;
401         }
402         queue_started =
403             virtio_net_started(n, queue_status) && !n->vhost_started;
404 
405         if (queue_started) {
406             qemu_flush_queued_packets(ncs);
407         }
408 
409         if (!q->tx_waiting) {
410             continue;
411         }
412 
413         if (queue_started) {
414             if (q->tx_timer) {
415                 timer_mod(q->tx_timer,
416                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
417             } else {
418                 qemu_bh_schedule(q->tx_bh);
419             }
420         } else {
421             if (q->tx_timer) {
422                 timer_del(q->tx_timer);
423             } else {
424                 qemu_bh_cancel(q->tx_bh);
425             }
426             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
427                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
428                 vdev->vm_running) {
429                 /* if tx is waiting we are likely have some packets in tx queue
430                  * and disabled notification */
431                 q->tx_waiting = 0;
432                 virtio_queue_set_notification(q->tx_vq, 1);
433                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
434             }
435         }
436     }
437 }
438 
439 static void virtio_net_set_link_status(NetClientState *nc)
440 {
441     VirtIONet *n = qemu_get_nic_opaque(nc);
442     VirtIODevice *vdev = VIRTIO_DEVICE(n);
443     uint16_t old_status = n->status;
444 
445     if (nc->link_down)
446         n->status &= ~VIRTIO_NET_S_LINK_UP;
447     else
448         n->status |= VIRTIO_NET_S_LINK_UP;
449 
450     if (n->status != old_status)
451         virtio_notify_config(vdev);
452 
453     virtio_net_set_status(vdev, vdev->status);
454 }
455 
456 static void rxfilter_notify(NetClientState *nc)
457 {
458     VirtIONet *n = qemu_get_nic_opaque(nc);
459 
460     if (nc->rxfilter_notify_enabled) {
461         char *path = object_get_canonical_path(OBJECT(n->qdev));
462         qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
463         g_free(path);
464 
465         /* disable event notification to avoid events flooding */
466         nc->rxfilter_notify_enabled = 0;
467     }
468 }
469 
470 static intList *get_vlan_table(VirtIONet *n)
471 {
472     intList *list;
473     int i, j;
474 
475     list = NULL;
476     for (i = 0; i < MAX_VLAN >> 5; i++) {
477         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
478             if (n->vlans[i] & (1U << j)) {
479                 QAPI_LIST_PREPEND(list, (i << 5) + j);
480             }
481         }
482     }
483 
484     return list;
485 }
486 
487 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
488 {
489     VirtIONet *n = qemu_get_nic_opaque(nc);
490     VirtIODevice *vdev = VIRTIO_DEVICE(n);
491     RxFilterInfo *info;
492     strList *str_list;
493     int i;
494 
495     info = g_malloc0(sizeof(*info));
496     info->name = g_strdup(nc->name);
497     info->promiscuous = n->promisc;
498 
499     if (n->nouni) {
500         info->unicast = RX_STATE_NONE;
501     } else if (n->alluni) {
502         info->unicast = RX_STATE_ALL;
503     } else {
504         info->unicast = RX_STATE_NORMAL;
505     }
506 
507     if (n->nomulti) {
508         info->multicast = RX_STATE_NONE;
509     } else if (n->allmulti) {
510         info->multicast = RX_STATE_ALL;
511     } else {
512         info->multicast = RX_STATE_NORMAL;
513     }
514 
515     info->broadcast_allowed = n->nobcast;
516     info->multicast_overflow = n->mac_table.multi_overflow;
517     info->unicast_overflow = n->mac_table.uni_overflow;
518 
519     info->main_mac = qemu_mac_strdup_printf(n->mac);
520 
521     str_list = NULL;
522     for (i = 0; i < n->mac_table.first_multi; i++) {
523         QAPI_LIST_PREPEND(str_list,
524                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
525     }
526     info->unicast_table = str_list;
527 
528     str_list = NULL;
529     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
530         QAPI_LIST_PREPEND(str_list,
531                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
532     }
533     info->multicast_table = str_list;
534     info->vlan_table = get_vlan_table(n);
535 
536     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
537         info->vlan = RX_STATE_ALL;
538     } else if (!info->vlan_table) {
539         info->vlan = RX_STATE_NONE;
540     } else {
541         info->vlan = RX_STATE_NORMAL;
542     }
543 
544     /* enable event notification after query */
545     nc->rxfilter_notify_enabled = 1;
546 
547     return info;
548 }
549 
550 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
551 {
552     VirtIONet *n = VIRTIO_NET(vdev);
553     NetClientState *nc;
554 
555     /* validate queue_index and skip for cvq */
556     if (queue_index >= n->max_queue_pairs * 2) {
557         return;
558     }
559 
560     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
561 
562     if (!nc->peer) {
563         return;
564     }
565 
566     if (get_vhost_net(nc->peer) &&
567         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
568         vhost_net_virtqueue_reset(vdev, nc, queue_index);
569     }
570 
571     flush_or_purge_queued_packets(nc);
572 }
573 
574 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
575 {
576     VirtIONet *n = VIRTIO_NET(vdev);
577     NetClientState *nc;
578     int r;
579 
580     /* validate queue_index and skip for cvq */
581     if (queue_index >= n->max_queue_pairs * 2) {
582         return;
583     }
584 
585     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
586 
587     if (!nc->peer || !vdev->vhost_started) {
588         return;
589     }
590 
591     if (get_vhost_net(nc->peer) &&
592         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
593         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
594         if (r < 0) {
595             error_report("unable to restart vhost net virtqueue: %d, "
596                             "when resetting the queue", queue_index);
597         }
598     }
599 }
600 
601 static void virtio_net_reset(VirtIODevice *vdev)
602 {
603     VirtIONet *n = VIRTIO_NET(vdev);
604     int i;
605 
606     /* Reset back to compatibility mode */
607     n->promisc = 1;
608     n->allmulti = 0;
609     n->alluni = 0;
610     n->nomulti = 0;
611     n->nouni = 0;
612     n->nobcast = 0;
613     /* multiqueue is disabled by default */
614     n->curr_queue_pairs = 1;
615     timer_del(n->announce_timer.tm);
616     n->announce_timer.round = 0;
617     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
618 
619     /* Flush any MAC and VLAN filter table state */
620     n->mac_table.in_use = 0;
621     n->mac_table.first_multi = 0;
622     n->mac_table.multi_overflow = 0;
623     n->mac_table.uni_overflow = 0;
624     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
625     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
626     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
627     memset(n->vlans, 0, MAX_VLAN >> 3);
628 
629     /* Flush any async TX */
630     for (i = 0;  i < n->max_queue_pairs; i++) {
631         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
632     }
633 }
634 
635 static void peer_test_vnet_hdr(VirtIONet *n)
636 {
637     NetClientState *nc = qemu_get_queue(n->nic);
638     if (!nc->peer) {
639         return;
640     }
641 
642     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
643 }
644 
645 static int peer_has_vnet_hdr(VirtIONet *n)
646 {
647     return n->has_vnet_hdr;
648 }
649 
650 static int peer_has_ufo(VirtIONet *n)
651 {
652     if (!peer_has_vnet_hdr(n))
653         return 0;
654 
655     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
656 
657     return n->has_ufo;
658 }
659 
660 static int peer_has_uso(VirtIONet *n)
661 {
662     if (!peer_has_vnet_hdr(n)) {
663         return 0;
664     }
665 
666     return qemu_has_uso(qemu_get_queue(n->nic)->peer);
667 }
668 
669 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
670                                        int version_1, int hash_report)
671 {
672     int i;
673     NetClientState *nc;
674 
675     n->mergeable_rx_bufs = mergeable_rx_bufs;
676 
677     /*
678      * Note: when extending the vnet header, please make sure to
679      * change the vnet header copying logic in virtio_net_flush_tx()
680      * as well.
681      */
682     if (version_1) {
683         n->guest_hdr_len = hash_report ?
684             sizeof(struct virtio_net_hdr_v1_hash) :
685             sizeof(struct virtio_net_hdr_mrg_rxbuf);
686         n->rss_data.populate_hash = !!hash_report;
687     } else {
688         n->guest_hdr_len = n->mergeable_rx_bufs ?
689             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
690             sizeof(struct virtio_net_hdr);
691     }
692 
693     for (i = 0; i < n->max_queue_pairs; i++) {
694         nc = qemu_get_subqueue(n->nic, i);
695 
696         if (peer_has_vnet_hdr(n) &&
697             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
698             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
699             n->host_hdr_len = n->guest_hdr_len;
700         }
701     }
702 }
703 
704 static int virtio_net_max_tx_queue_size(VirtIONet *n)
705 {
706     NetClientState *peer = n->nic_conf.peers.ncs[0];
707 
708     /*
709      * Backends other than vhost-user or vhost-vdpa don't support max queue
710      * size.
711      */
712     if (!peer) {
713         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
714     }
715 
716     switch(peer->info->type) {
717     case NET_CLIENT_DRIVER_VHOST_USER:
718     case NET_CLIENT_DRIVER_VHOST_VDPA:
719         return VIRTQUEUE_MAX_SIZE;
720     default:
721         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
722     };
723 }
724 
725 static int peer_attach(VirtIONet *n, int index)
726 {
727     NetClientState *nc = qemu_get_subqueue(n->nic, index);
728 
729     if (!nc->peer) {
730         return 0;
731     }
732 
733     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
734         vhost_set_vring_enable(nc->peer, 1);
735     }
736 
737     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
738         return 0;
739     }
740 
741     if (n->max_queue_pairs == 1) {
742         return 0;
743     }
744 
745     return tap_enable(nc->peer);
746 }
747 
748 static int peer_detach(VirtIONet *n, int index)
749 {
750     NetClientState *nc = qemu_get_subqueue(n->nic, index);
751 
752     if (!nc->peer) {
753         return 0;
754     }
755 
756     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
757         vhost_set_vring_enable(nc->peer, 0);
758     }
759 
760     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
761         return 0;
762     }
763 
764     return tap_disable(nc->peer);
765 }
766 
767 static void virtio_net_set_queue_pairs(VirtIONet *n)
768 {
769     int i;
770     int r;
771 
772     if (n->nic->peer_deleted) {
773         return;
774     }
775 
776     for (i = 0; i < n->max_queue_pairs; i++) {
777         if (i < n->curr_queue_pairs) {
778             r = peer_attach(n, i);
779             assert(!r);
780         } else {
781             r = peer_detach(n, i);
782             assert(!r);
783         }
784     }
785 }
786 
787 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
788 
789 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
790                                         Error **errp)
791 {
792     VirtIONet *n = VIRTIO_NET(vdev);
793     NetClientState *nc = qemu_get_queue(n->nic);
794 
795     /* Firstly sync all virtio-net possible supported features */
796     features |= n->host_features;
797 
798     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
799 
800     if (!peer_has_vnet_hdr(n)) {
801         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
802         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
803         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
804         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
805 
806         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
807         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
808         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
809         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
810 
811         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
812         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
813         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
814 
815         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
816     }
817 
818     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
819         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
820         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
821     }
822 
823     if (!peer_has_uso(n)) {
824         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
825         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
826         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
827     }
828 
829     if (!get_vhost_net(nc->peer)) {
830         return features;
831     }
832 
833     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
834         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
835     }
836     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
837     vdev->backend_features = features;
838 
839     if (n->mtu_bypass_backend &&
840             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
841         features |= (1ULL << VIRTIO_NET_F_MTU);
842     }
843 
844     /*
845      * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
846      * enabled. This happens in the vDPA case.
847      *
848      * Make sure the feature set is not incoherent, as the driver could refuse
849      * to start.
850      *
851      * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
852      * helping guest to notify the new location with vDPA devices that does not
853      * support it.
854      */
855     if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
856         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
857     }
858 
859     return features;
860 }
861 
862 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
863 {
864     uint64_t features = 0;
865 
866     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
867      * but also these: */
868     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
869     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
870     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
871     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
872     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
873 
874     return features;
875 }
876 
877 static void virtio_net_apply_guest_offloads(VirtIONet *n)
878 {
879     qemu_set_offload(qemu_get_queue(n->nic)->peer,
880             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
881             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
882             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
883             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
884             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
885             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
886             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
887 }
888 
889 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
890 {
891     static const uint64_t guest_offloads_mask =
892         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
893         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
894         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
895         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
896         (1ULL << VIRTIO_NET_F_GUEST_UFO)  |
897         (1ULL << VIRTIO_NET_F_GUEST_USO4) |
898         (1ULL << VIRTIO_NET_F_GUEST_USO6);
899 
900     return guest_offloads_mask & features;
901 }
902 
903 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
904 {
905     VirtIODevice *vdev = VIRTIO_DEVICE(n);
906     return virtio_net_guest_offloads_by_features(vdev->guest_features);
907 }
908 
909 typedef struct {
910     VirtIONet *n;
911     DeviceState *dev;
912 } FailoverDevice;
913 
914 /**
915  * Set the failover primary device
916  *
917  * @opaque: FailoverId to setup
918  * @opts: opts for device we are handling
919  * @errp: returns an error if this function fails
920  */
921 static int failover_set_primary(DeviceState *dev, void *opaque)
922 {
923     FailoverDevice *fdev = opaque;
924     PCIDevice *pci_dev = (PCIDevice *)
925         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
926 
927     if (!pci_dev) {
928         return 0;
929     }
930 
931     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
932         fdev->dev = dev;
933         return 1;
934     }
935 
936     return 0;
937 }
938 
939 /**
940  * Find the primary device for this failover virtio-net
941  *
942  * @n: VirtIONet device
943  * @errp: returns an error if this function fails
944  */
945 static DeviceState *failover_find_primary_device(VirtIONet *n)
946 {
947     FailoverDevice fdev = {
948         .n = n,
949     };
950 
951     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
952                        NULL, NULL, &fdev);
953     return fdev.dev;
954 }
955 
956 static void failover_add_primary(VirtIONet *n, Error **errp)
957 {
958     Error *err = NULL;
959     DeviceState *dev = failover_find_primary_device(n);
960 
961     if (dev) {
962         return;
963     }
964 
965     if (!n->primary_opts) {
966         error_setg(errp, "Primary device not found");
967         error_append_hint(errp, "Virtio-net failover will not work. Make "
968                           "sure primary device has parameter"
969                           " failover_pair_id=%s\n", n->netclient_name);
970         return;
971     }
972 
973     dev = qdev_device_add_from_qdict(n->primary_opts,
974                                      n->primary_opts_from_json,
975                                      &err);
976     if (err) {
977         qobject_unref(n->primary_opts);
978         n->primary_opts = NULL;
979     } else {
980         object_unref(OBJECT(dev));
981     }
982     error_propagate(errp, err);
983 }
984 
985 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
986 {
987     VirtIONet *n = VIRTIO_NET(vdev);
988     Error *err = NULL;
989     int i;
990 
991     if (n->mtu_bypass_backend &&
992             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
993         features &= ~(1ULL << VIRTIO_NET_F_MTU);
994     }
995 
996     virtio_net_set_multiqueue(n,
997                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
998                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
999 
1000     virtio_net_set_mrg_rx_bufs(n,
1001                                virtio_has_feature(features,
1002                                                   VIRTIO_NET_F_MRG_RXBUF),
1003                                virtio_has_feature(features,
1004                                                   VIRTIO_F_VERSION_1),
1005                                virtio_has_feature(features,
1006                                                   VIRTIO_NET_F_HASH_REPORT));
1007 
1008     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
1009         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
1010     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
1011         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
1012     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
1013 
1014     if (n->has_vnet_hdr) {
1015         n->curr_guest_offloads =
1016             virtio_net_guest_offloads_by_features(features);
1017         virtio_net_apply_guest_offloads(n);
1018     }
1019 
1020     for (i = 0;  i < n->max_queue_pairs; i++) {
1021         NetClientState *nc = qemu_get_subqueue(n->nic, i);
1022 
1023         if (!get_vhost_net(nc->peer)) {
1024             continue;
1025         }
1026         vhost_net_ack_features(get_vhost_net(nc->peer), features);
1027 
1028         /*
1029          * keep acked_features in NetVhostUserState up-to-date so it
1030          * can't miss any features configured by guest virtio driver.
1031          */
1032         vhost_net_save_acked_features(nc->peer);
1033     }
1034 
1035     if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1036         memset(n->vlans, 0xff, MAX_VLAN >> 3);
1037     }
1038 
1039     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1040         qapi_event_send_failover_negotiated(n->netclient_name);
1041         qatomic_set(&n->failover_primary_hidden, false);
1042         failover_add_primary(n, &err);
1043         if (err) {
1044             if (!qtest_enabled()) {
1045                 warn_report_err(err);
1046             } else {
1047                 error_free(err);
1048             }
1049         }
1050     }
1051 }
1052 
1053 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1054                                      struct iovec *iov, unsigned int iov_cnt)
1055 {
1056     uint8_t on;
1057     size_t s;
1058     NetClientState *nc = qemu_get_queue(n->nic);
1059 
1060     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1061     if (s != sizeof(on)) {
1062         return VIRTIO_NET_ERR;
1063     }
1064 
1065     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1066         n->promisc = on;
1067     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1068         n->allmulti = on;
1069     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1070         n->alluni = on;
1071     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1072         n->nomulti = on;
1073     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1074         n->nouni = on;
1075     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1076         n->nobcast = on;
1077     } else {
1078         return VIRTIO_NET_ERR;
1079     }
1080 
1081     rxfilter_notify(nc);
1082 
1083     return VIRTIO_NET_OK;
1084 }
1085 
1086 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1087                                      struct iovec *iov, unsigned int iov_cnt)
1088 {
1089     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1090     uint64_t offloads;
1091     size_t s;
1092 
1093     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1094         return VIRTIO_NET_ERR;
1095     }
1096 
1097     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1098     if (s != sizeof(offloads)) {
1099         return VIRTIO_NET_ERR;
1100     }
1101 
1102     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1103         uint64_t supported_offloads;
1104 
1105         offloads = virtio_ldq_p(vdev, &offloads);
1106 
1107         if (!n->has_vnet_hdr) {
1108             return VIRTIO_NET_ERR;
1109         }
1110 
1111         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1112             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1113         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1114             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1115         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1116 
1117         supported_offloads = virtio_net_supported_guest_offloads(n);
1118         if (offloads & ~supported_offloads) {
1119             return VIRTIO_NET_ERR;
1120         }
1121 
1122         n->curr_guest_offloads = offloads;
1123         virtio_net_apply_guest_offloads(n);
1124 
1125         return VIRTIO_NET_OK;
1126     } else {
1127         return VIRTIO_NET_ERR;
1128     }
1129 }
1130 
1131 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1132                                  struct iovec *iov, unsigned int iov_cnt)
1133 {
1134     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1135     struct virtio_net_ctrl_mac mac_data;
1136     size_t s;
1137     NetClientState *nc = qemu_get_queue(n->nic);
1138 
1139     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1140         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1141             return VIRTIO_NET_ERR;
1142         }
1143         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1144         assert(s == sizeof(n->mac));
1145         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1146         rxfilter_notify(nc);
1147 
1148         return VIRTIO_NET_OK;
1149     }
1150 
1151     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1152         return VIRTIO_NET_ERR;
1153     }
1154 
1155     int in_use = 0;
1156     int first_multi = 0;
1157     uint8_t uni_overflow = 0;
1158     uint8_t multi_overflow = 0;
1159     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1160 
1161     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1162                    sizeof(mac_data.entries));
1163     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1164     if (s != sizeof(mac_data.entries)) {
1165         goto error;
1166     }
1167     iov_discard_front(&iov, &iov_cnt, s);
1168 
1169     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1170         goto error;
1171     }
1172 
1173     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1174         s = iov_to_buf(iov, iov_cnt, 0, macs,
1175                        mac_data.entries * ETH_ALEN);
1176         if (s != mac_data.entries * ETH_ALEN) {
1177             goto error;
1178         }
1179         in_use += mac_data.entries;
1180     } else {
1181         uni_overflow = 1;
1182     }
1183 
1184     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1185 
1186     first_multi = in_use;
1187 
1188     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1189                    sizeof(mac_data.entries));
1190     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1191     if (s != sizeof(mac_data.entries)) {
1192         goto error;
1193     }
1194 
1195     iov_discard_front(&iov, &iov_cnt, s);
1196 
1197     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1198         goto error;
1199     }
1200 
1201     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1202         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1203                        mac_data.entries * ETH_ALEN);
1204         if (s != mac_data.entries * ETH_ALEN) {
1205             goto error;
1206         }
1207         in_use += mac_data.entries;
1208     } else {
1209         multi_overflow = 1;
1210     }
1211 
1212     n->mac_table.in_use = in_use;
1213     n->mac_table.first_multi = first_multi;
1214     n->mac_table.uni_overflow = uni_overflow;
1215     n->mac_table.multi_overflow = multi_overflow;
1216     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1217     g_free(macs);
1218     rxfilter_notify(nc);
1219 
1220     return VIRTIO_NET_OK;
1221 
1222 error:
1223     g_free(macs);
1224     return VIRTIO_NET_ERR;
1225 }
1226 
1227 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1228                                         struct iovec *iov, unsigned int iov_cnt)
1229 {
1230     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1231     uint16_t vid;
1232     size_t s;
1233     NetClientState *nc = qemu_get_queue(n->nic);
1234 
1235     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1236     vid = virtio_lduw_p(vdev, &vid);
1237     if (s != sizeof(vid)) {
1238         return VIRTIO_NET_ERR;
1239     }
1240 
1241     if (vid >= MAX_VLAN)
1242         return VIRTIO_NET_ERR;
1243 
1244     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1245         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1246     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1247         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1248     else
1249         return VIRTIO_NET_ERR;
1250 
1251     rxfilter_notify(nc);
1252 
1253     return VIRTIO_NET_OK;
1254 }
1255 
1256 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1257                                       struct iovec *iov, unsigned int iov_cnt)
1258 {
1259     trace_virtio_net_handle_announce(n->announce_timer.round);
1260     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1261         n->status & VIRTIO_NET_S_ANNOUNCE) {
1262         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1263         if (n->announce_timer.round) {
1264             qemu_announce_timer_step(&n->announce_timer);
1265         }
1266         return VIRTIO_NET_OK;
1267     } else {
1268         return VIRTIO_NET_ERR;
1269     }
1270 }
1271 
1272 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1273 
1274 static void virtio_net_disable_rss(VirtIONet *n)
1275 {
1276     if (n->rss_data.enabled) {
1277         trace_virtio_net_rss_disable();
1278     }
1279     n->rss_data.enabled = false;
1280 
1281     virtio_net_detach_epbf_rss(n);
1282 }
1283 
1284 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1285 {
1286     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1287     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1288         return false;
1289     }
1290 
1291     return nc->info->set_steering_ebpf(nc, prog_fd);
1292 }
1293 
1294 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1295                                    struct EBPFRSSConfig *config)
1296 {
1297     config->redirect = data->redirect;
1298     config->populate_hash = data->populate_hash;
1299     config->hash_types = data->hash_types;
1300     config->indirections_len = data->indirections_len;
1301     config->default_queue = data->default_queue;
1302 }
1303 
1304 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1305 {
1306     struct EBPFRSSConfig config = {};
1307 
1308     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1309         return false;
1310     }
1311 
1312     rss_data_to_rss_config(&n->rss_data, &config);
1313 
1314     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1315                           n->rss_data.indirections_table, n->rss_data.key)) {
1316         return false;
1317     }
1318 
1319     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1320         return false;
1321     }
1322 
1323     return true;
1324 }
1325 
1326 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1327 {
1328     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1329 }
1330 
1331 static bool virtio_net_load_ebpf(VirtIONet *n)
1332 {
1333     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1334         /* backend doesn't support steering ebpf */
1335         return false;
1336     }
1337 
1338     return ebpf_rss_load(&n->ebpf_rss);
1339 }
1340 
1341 static void virtio_net_unload_ebpf(VirtIONet *n)
1342 {
1343     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1344     ebpf_rss_unload(&n->ebpf_rss);
1345 }
1346 
1347 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1348                                       struct iovec *iov,
1349                                       unsigned int iov_cnt,
1350                                       bool do_rss)
1351 {
1352     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1353     struct virtio_net_rss_config cfg;
1354     size_t s, offset = 0, size_get;
1355     uint16_t queue_pairs, i;
1356     struct {
1357         uint16_t us;
1358         uint8_t b;
1359     } QEMU_PACKED temp;
1360     const char *err_msg = "";
1361     uint32_t err_value = 0;
1362 
1363     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1364         err_msg = "RSS is not negotiated";
1365         goto error;
1366     }
1367     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1368         err_msg = "Hash report is not negotiated";
1369         goto error;
1370     }
1371     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1372     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1373     if (s != size_get) {
1374         err_msg = "Short command buffer";
1375         err_value = (uint32_t)s;
1376         goto error;
1377     }
1378     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1379     n->rss_data.indirections_len =
1380         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1381     n->rss_data.indirections_len++;
1382     if (!do_rss) {
1383         n->rss_data.indirections_len = 1;
1384     }
1385     if (!is_power_of_2(n->rss_data.indirections_len)) {
1386         err_msg = "Invalid size of indirection table";
1387         err_value = n->rss_data.indirections_len;
1388         goto error;
1389     }
1390     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1391         err_msg = "Too large indirection table";
1392         err_value = n->rss_data.indirections_len;
1393         goto error;
1394     }
1395     n->rss_data.default_queue = do_rss ?
1396         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1397     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1398         err_msg = "Invalid default queue";
1399         err_value = n->rss_data.default_queue;
1400         goto error;
1401     }
1402     offset += size_get;
1403     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1404     g_free(n->rss_data.indirections_table);
1405     n->rss_data.indirections_table = g_malloc(size_get);
1406     if (!n->rss_data.indirections_table) {
1407         err_msg = "Can't allocate indirections table";
1408         err_value = n->rss_data.indirections_len;
1409         goto error;
1410     }
1411     s = iov_to_buf(iov, iov_cnt, offset,
1412                    n->rss_data.indirections_table, size_get);
1413     if (s != size_get) {
1414         err_msg = "Short indirection table buffer";
1415         err_value = (uint32_t)s;
1416         goto error;
1417     }
1418     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1419         uint16_t val = n->rss_data.indirections_table[i];
1420         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1421     }
1422     offset += size_get;
1423     size_get = sizeof(temp);
1424     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1425     if (s != size_get) {
1426         err_msg = "Can't get queue_pairs";
1427         err_value = (uint32_t)s;
1428         goto error;
1429     }
1430     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1431     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1432         err_msg = "Invalid number of queue_pairs";
1433         err_value = queue_pairs;
1434         goto error;
1435     }
1436     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1437         err_msg = "Invalid key size";
1438         err_value = temp.b;
1439         goto error;
1440     }
1441     if (!temp.b && n->rss_data.hash_types) {
1442         err_msg = "No key provided";
1443         err_value = 0;
1444         goto error;
1445     }
1446     if (!temp.b && !n->rss_data.hash_types) {
1447         virtio_net_disable_rss(n);
1448         return queue_pairs;
1449     }
1450     offset += size_get;
1451     size_get = temp.b;
1452     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1453     if (s != size_get) {
1454         err_msg = "Can get key buffer";
1455         err_value = (uint32_t)s;
1456         goto error;
1457     }
1458     n->rss_data.enabled = true;
1459 
1460     if (!n->rss_data.populate_hash) {
1461         if (!virtio_net_attach_epbf_rss(n)) {
1462             /* EBPF must be loaded for vhost */
1463             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1464                 warn_report("Can't load eBPF RSS for vhost");
1465                 goto error;
1466             }
1467             /* fallback to software RSS */
1468             warn_report("Can't load eBPF RSS - fallback to software RSS");
1469             n->rss_data.enabled_software_rss = true;
1470         }
1471     } else {
1472         /* use software RSS for hash populating */
1473         /* and detach eBPF if was loaded before */
1474         virtio_net_detach_epbf_rss(n);
1475         n->rss_data.enabled_software_rss = true;
1476     }
1477 
1478     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1479                                 n->rss_data.indirections_len,
1480                                 temp.b);
1481     return queue_pairs;
1482 error:
1483     trace_virtio_net_rss_error(err_msg, err_value);
1484     virtio_net_disable_rss(n);
1485     return 0;
1486 }
1487 
1488 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1489                                 struct iovec *iov, unsigned int iov_cnt)
1490 {
1491     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1492     uint16_t queue_pairs;
1493     NetClientState *nc = qemu_get_queue(n->nic);
1494 
1495     virtio_net_disable_rss(n);
1496     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1497         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1498         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1499     }
1500     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1501         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1502     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1503         struct virtio_net_ctrl_mq mq;
1504         size_t s;
1505         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1506             return VIRTIO_NET_ERR;
1507         }
1508         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1509         if (s != sizeof(mq)) {
1510             return VIRTIO_NET_ERR;
1511         }
1512         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1513 
1514     } else {
1515         return VIRTIO_NET_ERR;
1516     }
1517 
1518     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1519         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1520         queue_pairs > n->max_queue_pairs ||
1521         !n->multiqueue) {
1522         return VIRTIO_NET_ERR;
1523     }
1524 
1525     n->curr_queue_pairs = queue_pairs;
1526     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1527         /*
1528          * Avoid updating the backend for a vdpa device: We're only interested
1529          * in updating the device model queues.
1530          */
1531         return VIRTIO_NET_OK;
1532     }
1533     /* stop the backend before changing the number of queue_pairs to avoid handling a
1534      * disabled queue */
1535     virtio_net_set_status(vdev, vdev->status);
1536     virtio_net_set_queue_pairs(n);
1537 
1538     return VIRTIO_NET_OK;
1539 }
1540 
1541 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1542                                   const struct iovec *in_sg, unsigned in_num,
1543                                   const struct iovec *out_sg,
1544                                   unsigned out_num)
1545 {
1546     VirtIONet *n = VIRTIO_NET(vdev);
1547     struct virtio_net_ctrl_hdr ctrl;
1548     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1549     size_t s;
1550     struct iovec *iov, *iov2;
1551 
1552     if (iov_size(in_sg, in_num) < sizeof(status) ||
1553         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1554         virtio_error(vdev, "virtio-net ctrl missing headers");
1555         return 0;
1556     }
1557 
1558     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1559     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1560     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1561     if (s != sizeof(ctrl)) {
1562         status = VIRTIO_NET_ERR;
1563     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1564         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1565     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1566         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1567     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1568         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1569     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1570         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1571     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1572         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1573     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1574         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1575     }
1576 
1577     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1578     assert(s == sizeof(status));
1579 
1580     g_free(iov2);
1581     return sizeof(status);
1582 }
1583 
1584 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1585 {
1586     VirtQueueElement *elem;
1587 
1588     for (;;) {
1589         size_t written;
1590         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1591         if (!elem) {
1592             break;
1593         }
1594 
1595         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1596                                              elem->out_sg, elem->out_num);
1597         if (written > 0) {
1598             virtqueue_push(vq, elem, written);
1599             virtio_notify(vdev, vq);
1600             g_free(elem);
1601         } else {
1602             virtqueue_detach_element(vq, elem, 0);
1603             g_free(elem);
1604             break;
1605         }
1606     }
1607 }
1608 
1609 /* RX */
1610 
1611 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1612 {
1613     VirtIONet *n = VIRTIO_NET(vdev);
1614     int queue_index = vq2q(virtio_get_queue_index(vq));
1615 
1616     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1617 }
1618 
1619 static bool virtio_net_can_receive(NetClientState *nc)
1620 {
1621     VirtIONet *n = qemu_get_nic_opaque(nc);
1622     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1623     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1624 
1625     if (!vdev->vm_running) {
1626         return false;
1627     }
1628 
1629     if (nc->queue_index >= n->curr_queue_pairs) {
1630         return false;
1631     }
1632 
1633     if (!virtio_queue_ready(q->rx_vq) ||
1634         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1635         return false;
1636     }
1637 
1638     return true;
1639 }
1640 
1641 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1642 {
1643     VirtIONet *n = q->n;
1644     if (virtio_queue_empty(q->rx_vq) ||
1645         (n->mergeable_rx_bufs &&
1646          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1647         virtio_queue_set_notification(q->rx_vq, 1);
1648 
1649         /* To avoid a race condition where the guest has made some buffers
1650          * available after the above check but before notification was
1651          * enabled, check for available buffers again.
1652          */
1653         if (virtio_queue_empty(q->rx_vq) ||
1654             (n->mergeable_rx_bufs &&
1655              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1656             return 0;
1657         }
1658     }
1659 
1660     virtio_queue_set_notification(q->rx_vq, 0);
1661     return 1;
1662 }
1663 
1664 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1665 {
1666     virtio_tswap16s(vdev, &hdr->hdr_len);
1667     virtio_tswap16s(vdev, &hdr->gso_size);
1668     virtio_tswap16s(vdev, &hdr->csum_start);
1669     virtio_tswap16s(vdev, &hdr->csum_offset);
1670 }
1671 
1672 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1673  * it never finds out that the packets don't have valid checksums.  This
1674  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1675  * fix this with Xen but it hasn't appeared in an upstream release of
1676  * dhclient yet.
1677  *
1678  * To avoid breaking existing guests, we catch udp packets and add
1679  * checksums.  This is terrible but it's better than hacking the guest
1680  * kernels.
1681  *
1682  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1683  * we should provide a mechanism to disable it to avoid polluting the host
1684  * cache.
1685  */
1686 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1687                                         uint8_t *buf, size_t size)
1688 {
1689     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1690         (size > 27 && size < 1500) && /* normal sized MTU */
1691         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1692         (buf[23] == 17) && /* ip.protocol == UDP */
1693         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1694         net_checksum_calculate(buf, size, CSUM_UDP);
1695         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1696     }
1697 }
1698 
1699 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1700                            const void *buf, size_t size)
1701 {
1702     if (n->has_vnet_hdr) {
1703         /* FIXME this cast is evil */
1704         void *wbuf = (void *)buf;
1705         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1706                                     size - n->host_hdr_len);
1707 
1708         if (n->needs_vnet_hdr_swap) {
1709             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1710         }
1711         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1712     } else {
1713         struct virtio_net_hdr hdr = {
1714             .flags = 0,
1715             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1716         };
1717         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1718     }
1719 }
1720 
1721 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1722 {
1723     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1724     static const uint8_t vlan[] = {0x81, 0x00};
1725     uint8_t *ptr = (uint8_t *)buf;
1726     int i;
1727 
1728     if (n->promisc)
1729         return 1;
1730 
1731     ptr += n->host_hdr_len;
1732 
1733     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1734         int vid = lduw_be_p(ptr + 14) & 0xfff;
1735         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1736             return 0;
1737     }
1738 
1739     if (ptr[0] & 1) { // multicast
1740         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1741             return !n->nobcast;
1742         } else if (n->nomulti) {
1743             return 0;
1744         } else if (n->allmulti || n->mac_table.multi_overflow) {
1745             return 1;
1746         }
1747 
1748         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1749             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1750                 return 1;
1751             }
1752         }
1753     } else { // unicast
1754         if (n->nouni) {
1755             return 0;
1756         } else if (n->alluni || n->mac_table.uni_overflow) {
1757             return 1;
1758         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1759             return 1;
1760         }
1761 
1762         for (i = 0; i < n->mac_table.first_multi; i++) {
1763             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1764                 return 1;
1765             }
1766         }
1767     }
1768 
1769     return 0;
1770 }
1771 
1772 static uint8_t virtio_net_get_hash_type(bool hasip4,
1773                                         bool hasip6,
1774                                         EthL4HdrProto l4hdr_proto,
1775                                         uint32_t types)
1776 {
1777     if (hasip4) {
1778         switch (l4hdr_proto) {
1779         case ETH_L4_HDR_PROTO_TCP:
1780             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1781                 return NetPktRssIpV4Tcp;
1782             }
1783             break;
1784 
1785         case ETH_L4_HDR_PROTO_UDP:
1786             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1787                 return NetPktRssIpV4Udp;
1788             }
1789             break;
1790 
1791         default:
1792             break;
1793         }
1794 
1795         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1796             return NetPktRssIpV4;
1797         }
1798     } else if (hasip6) {
1799         switch (l4hdr_proto) {
1800         case ETH_L4_HDR_PROTO_TCP:
1801             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1802                 return NetPktRssIpV6TcpEx;
1803             }
1804             if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1805                 return NetPktRssIpV6Tcp;
1806             }
1807             break;
1808 
1809         case ETH_L4_HDR_PROTO_UDP:
1810             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1811                 return NetPktRssIpV6UdpEx;
1812             }
1813             if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1814                 return NetPktRssIpV6Udp;
1815             }
1816             break;
1817 
1818         default:
1819             break;
1820         }
1821 
1822         if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1823             return NetPktRssIpV6Ex;
1824         }
1825         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1826             return NetPktRssIpV6;
1827         }
1828     }
1829     return 0xff;
1830 }
1831 
1832 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1833                                    uint32_t hash)
1834 {
1835     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1836     hdr->hash_value = hash;
1837     hdr->hash_report = report;
1838 }
1839 
1840 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1841                                   size_t size)
1842 {
1843     VirtIONet *n = qemu_get_nic_opaque(nc);
1844     unsigned int index = nc->queue_index, new_index = index;
1845     struct NetRxPkt *pkt = n->rx_pkt;
1846     uint8_t net_hash_type;
1847     uint32_t hash;
1848     bool hasip4, hasip6;
1849     EthL4HdrProto l4hdr_proto;
1850     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1851         VIRTIO_NET_HASH_REPORT_IPv4,
1852         VIRTIO_NET_HASH_REPORT_TCPv4,
1853         VIRTIO_NET_HASH_REPORT_TCPv6,
1854         VIRTIO_NET_HASH_REPORT_IPv6,
1855         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1856         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1857         VIRTIO_NET_HASH_REPORT_UDPv4,
1858         VIRTIO_NET_HASH_REPORT_UDPv6,
1859         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1860     };
1861     struct iovec iov = {
1862         .iov_base = (void *)buf,
1863         .iov_len = size
1864     };
1865 
1866     net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1867     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1868     net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1869                                              n->rss_data.hash_types);
1870     if (net_hash_type > NetPktRssIpV6UdpEx) {
1871         if (n->rss_data.populate_hash) {
1872             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1873         }
1874         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1875     }
1876 
1877     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1878 
1879     if (n->rss_data.populate_hash) {
1880         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1881     }
1882 
1883     if (n->rss_data.redirect) {
1884         new_index = hash & (n->rss_data.indirections_len - 1);
1885         new_index = n->rss_data.indirections_table[new_index];
1886     }
1887 
1888     return (index == new_index) ? -1 : new_index;
1889 }
1890 
1891 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1892                                       size_t size, bool no_rss)
1893 {
1894     VirtIONet *n = qemu_get_nic_opaque(nc);
1895     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1896     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1897     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1898     size_t lens[VIRTQUEUE_MAX_SIZE];
1899     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1900     struct virtio_net_hdr_mrg_rxbuf mhdr;
1901     unsigned mhdr_cnt = 0;
1902     size_t offset, i, guest_offset, j;
1903     ssize_t err;
1904 
1905     if (!virtio_net_can_receive(nc)) {
1906         return -1;
1907     }
1908 
1909     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1910         int index = virtio_net_process_rss(nc, buf, size);
1911         if (index >= 0) {
1912             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1913             return virtio_net_receive_rcu(nc2, buf, size, true);
1914         }
1915     }
1916 
1917     /* hdr_len refers to the header we supply to the guest */
1918     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1919         return 0;
1920     }
1921 
1922     if (!receive_filter(n, buf, size))
1923         return size;
1924 
1925     offset = i = 0;
1926 
1927     while (offset < size) {
1928         VirtQueueElement *elem;
1929         int len, total;
1930         const struct iovec *sg;
1931 
1932         total = 0;
1933 
1934         if (i == VIRTQUEUE_MAX_SIZE) {
1935             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1936             err = size;
1937             goto err;
1938         }
1939 
1940         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1941         if (!elem) {
1942             if (i) {
1943                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1944                              "i %zd mergeable %d offset %zd, size %zd, "
1945                              "guest hdr len %zd, host hdr len %zd "
1946                              "guest features 0x%" PRIx64,
1947                              i, n->mergeable_rx_bufs, offset, size,
1948                              n->guest_hdr_len, n->host_hdr_len,
1949                              vdev->guest_features);
1950             }
1951             err = -1;
1952             goto err;
1953         }
1954 
1955         if (elem->in_num < 1) {
1956             virtio_error(vdev,
1957                          "virtio-net receive queue contains no in buffers");
1958             virtqueue_detach_element(q->rx_vq, elem, 0);
1959             g_free(elem);
1960             err = -1;
1961             goto err;
1962         }
1963 
1964         sg = elem->in_sg;
1965         if (i == 0) {
1966             assert(offset == 0);
1967             if (n->mergeable_rx_bufs) {
1968                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1969                                     sg, elem->in_num,
1970                                     offsetof(typeof(mhdr), num_buffers),
1971                                     sizeof(mhdr.num_buffers));
1972             }
1973 
1974             receive_header(n, sg, elem->in_num, buf, size);
1975             if (n->rss_data.populate_hash) {
1976                 offset = sizeof(mhdr);
1977                 iov_from_buf(sg, elem->in_num, offset,
1978                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1979             }
1980             offset = n->host_hdr_len;
1981             total += n->guest_hdr_len;
1982             guest_offset = n->guest_hdr_len;
1983         } else {
1984             guest_offset = 0;
1985         }
1986 
1987         /* copy in packet.  ugh */
1988         len = iov_from_buf(sg, elem->in_num, guest_offset,
1989                            buf + offset, size - offset);
1990         total += len;
1991         offset += len;
1992         /* If buffers can't be merged, at this point we
1993          * must have consumed the complete packet.
1994          * Otherwise, drop it. */
1995         if (!n->mergeable_rx_bufs && offset < size) {
1996             virtqueue_unpop(q->rx_vq, elem, total);
1997             g_free(elem);
1998             err = size;
1999             goto err;
2000         }
2001 
2002         /* Mark dirty page's bitmap of guest memory */
2003         if (vdev->lm_logging_ctrl == LM_ENABLE) {
2004             uint64_t chunk = elem->in_addr[i] / VHOST_LOG_CHUNK;
2005             /* Get chunk index */
2006             BitmapMemoryRegionCaches *caches = qatomic_rcu_read(&vdev->caches);
2007             uint64_t index = chunk / 8;
2008             uint64_t shift = chunk % 8;
2009             uint8_t val = 0;
2010             address_space_read_cached(&caches->bitmap, index, &val,
2011                                       sizeof(val));
2012             val |= 1 << shift;
2013             address_space_write_cached(&caches->bitmap, index, &val,
2014                                        sizeof(val));
2015             address_space_cache_invalidate(&caches->bitmap, index, sizeof(val));
2016         }
2017 
2018         elems[i] = elem;
2019         lens[i] = total;
2020         i++;
2021     }
2022 
2023     if (mhdr_cnt) {
2024         virtio_stw_p(vdev, &mhdr.num_buffers, i);
2025         iov_from_buf(mhdr_sg, mhdr_cnt,
2026                      0,
2027                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
2028     }
2029 
2030     for (j = 0; j < i; j++) {
2031         /* signal other side */
2032         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2033         g_free(elems[j]);
2034     }
2035 
2036     virtqueue_flush(q->rx_vq, i);
2037     virtio_notify(vdev, q->rx_vq);
2038 
2039     return size;
2040 
2041 err:
2042     for (j = 0; j < i; j++) {
2043         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2044         g_free(elems[j]);
2045     }
2046 
2047     return err;
2048 }
2049 
2050 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2051                                   size_t size)
2052 {
2053     RCU_READ_LOCK_GUARD();
2054 
2055     return virtio_net_receive_rcu(nc, buf, size, false);
2056 }
2057 
2058 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2059                                          const uint8_t *buf,
2060                                          VirtioNetRscUnit *unit)
2061 {
2062     uint16_t ip_hdrlen;
2063     struct ip_header *ip;
2064 
2065     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2066                               + sizeof(struct eth_header));
2067     unit->ip = (void *)ip;
2068     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2069     unit->ip_plen = &ip->ip_len;
2070     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2071     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2072     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
2073 }
2074 
2075 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2076                                          const uint8_t *buf,
2077                                          VirtioNetRscUnit *unit)
2078 {
2079     struct ip6_header *ip6;
2080 
2081     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2082                                  + sizeof(struct eth_header));
2083     unit->ip = ip6;
2084     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2085     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2086                                         + sizeof(struct ip6_header));
2087     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2088 
2089     /* There is a difference between payload length in ipv4 and v6,
2090        ip header is excluded in ipv6 */
2091     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2092 }
2093 
2094 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2095                                        VirtioNetRscSeg *seg)
2096 {
2097     int ret;
2098     struct virtio_net_hdr_v1 *h;
2099 
2100     h = (struct virtio_net_hdr_v1 *)seg->buf;
2101     h->flags = 0;
2102     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2103 
2104     if (seg->is_coalesced) {
2105         h->rsc.segments = seg->packets;
2106         h->rsc.dup_acks = seg->dup_ack;
2107         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2108         if (chain->proto == ETH_P_IP) {
2109             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2110         } else {
2111             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2112         }
2113     }
2114 
2115     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2116     QTAILQ_REMOVE(&chain->buffers, seg, next);
2117     g_free(seg->buf);
2118     g_free(seg);
2119 
2120     return ret;
2121 }
2122 
2123 static void virtio_net_rsc_purge(void *opq)
2124 {
2125     VirtioNetRscSeg *seg, *rn;
2126     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2127 
2128     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2129         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2130             chain->stat.purge_failed++;
2131             continue;
2132         }
2133     }
2134 
2135     chain->stat.timer++;
2136     if (!QTAILQ_EMPTY(&chain->buffers)) {
2137         timer_mod(chain->drain_timer,
2138               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2139     }
2140 }
2141 
2142 static void virtio_net_rsc_cleanup(VirtIONet *n)
2143 {
2144     VirtioNetRscChain *chain, *rn_chain;
2145     VirtioNetRscSeg *seg, *rn_seg;
2146 
2147     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2148         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2149             QTAILQ_REMOVE(&chain->buffers, seg, next);
2150             g_free(seg->buf);
2151             g_free(seg);
2152         }
2153 
2154         timer_free(chain->drain_timer);
2155         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2156         g_free(chain);
2157     }
2158 }
2159 
2160 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2161                                      NetClientState *nc,
2162                                      const uint8_t *buf, size_t size)
2163 {
2164     uint16_t hdr_len;
2165     VirtioNetRscSeg *seg;
2166 
2167     hdr_len = chain->n->guest_hdr_len;
2168     seg = g_new(VirtioNetRscSeg, 1);
2169     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2170         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2171     memcpy(seg->buf, buf, size);
2172     seg->size = size;
2173     seg->packets = 1;
2174     seg->dup_ack = 0;
2175     seg->is_coalesced = 0;
2176     seg->nc = nc;
2177 
2178     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2179     chain->stat.cache++;
2180 
2181     switch (chain->proto) {
2182     case ETH_P_IP:
2183         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2184         break;
2185     case ETH_P_IPV6:
2186         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2187         break;
2188     default:
2189         g_assert_not_reached();
2190     }
2191 }
2192 
2193 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2194                                          VirtioNetRscSeg *seg,
2195                                          const uint8_t *buf,
2196                                          struct tcp_header *n_tcp,
2197                                          struct tcp_header *o_tcp)
2198 {
2199     uint32_t nack, oack;
2200     uint16_t nwin, owin;
2201 
2202     nack = htonl(n_tcp->th_ack);
2203     nwin = htons(n_tcp->th_win);
2204     oack = htonl(o_tcp->th_ack);
2205     owin = htons(o_tcp->th_win);
2206 
2207     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2208         chain->stat.ack_out_of_win++;
2209         return RSC_FINAL;
2210     } else if (nack == oack) {
2211         /* duplicated ack or window probe */
2212         if (nwin == owin) {
2213             /* duplicated ack, add dup ack count due to whql test up to 1 */
2214             chain->stat.dup_ack++;
2215             return RSC_FINAL;
2216         } else {
2217             /* Coalesce window update */
2218             o_tcp->th_win = n_tcp->th_win;
2219             chain->stat.win_update++;
2220             return RSC_COALESCE;
2221         }
2222     } else {
2223         /* pure ack, go to 'C', finalize*/
2224         chain->stat.pure_ack++;
2225         return RSC_FINAL;
2226     }
2227 }
2228 
2229 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2230                                             VirtioNetRscSeg *seg,
2231                                             const uint8_t *buf,
2232                                             VirtioNetRscUnit *n_unit)
2233 {
2234     void *data;
2235     uint16_t o_ip_len;
2236     uint32_t nseq, oseq;
2237     VirtioNetRscUnit *o_unit;
2238 
2239     o_unit = &seg->unit;
2240     o_ip_len = htons(*o_unit->ip_plen);
2241     nseq = htonl(n_unit->tcp->th_seq);
2242     oseq = htonl(o_unit->tcp->th_seq);
2243 
2244     /* out of order or retransmitted. */
2245     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2246         chain->stat.data_out_of_win++;
2247         return RSC_FINAL;
2248     }
2249 
2250     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2251     if (nseq == oseq) {
2252         if ((o_unit->payload == 0) && n_unit->payload) {
2253             /* From no payload to payload, normal case, not a dup ack or etc */
2254             chain->stat.data_after_pure_ack++;
2255             goto coalesce;
2256         } else {
2257             return virtio_net_rsc_handle_ack(chain, seg, buf,
2258                                              n_unit->tcp, o_unit->tcp);
2259         }
2260     } else if ((nseq - oseq) != o_unit->payload) {
2261         /* Not a consistent packet, out of order */
2262         chain->stat.data_out_of_order++;
2263         return RSC_FINAL;
2264     } else {
2265 coalesce:
2266         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2267             chain->stat.over_size++;
2268             return RSC_FINAL;
2269         }
2270 
2271         /* Here comes the right data, the payload length in v4/v6 is different,
2272            so use the field value to update and record the new data len */
2273         o_unit->payload += n_unit->payload; /* update new data len */
2274 
2275         /* update field in ip header */
2276         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2277 
2278         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2279            for windows guest, while this may change the behavior for linux
2280            guest (only if it uses RSC feature). */
2281         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2282 
2283         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2284         o_unit->tcp->th_win = n_unit->tcp->th_win;
2285 
2286         memmove(seg->buf + seg->size, data, n_unit->payload);
2287         seg->size += n_unit->payload;
2288         seg->packets++;
2289         chain->stat.coalesced++;
2290         return RSC_COALESCE;
2291     }
2292 }
2293 
2294 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2295                                         VirtioNetRscSeg *seg,
2296                                         const uint8_t *buf, size_t size,
2297                                         VirtioNetRscUnit *unit)
2298 {
2299     struct ip_header *ip1, *ip2;
2300 
2301     ip1 = (struct ip_header *)(unit->ip);
2302     ip2 = (struct ip_header *)(seg->unit.ip);
2303     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2304         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2305         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2306         chain->stat.no_match++;
2307         return RSC_NO_MATCH;
2308     }
2309 
2310     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2311 }
2312 
2313 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2314                                         VirtioNetRscSeg *seg,
2315                                         const uint8_t *buf, size_t size,
2316                                         VirtioNetRscUnit *unit)
2317 {
2318     struct ip6_header *ip1, *ip2;
2319 
2320     ip1 = (struct ip6_header *)(unit->ip);
2321     ip2 = (struct ip6_header *)(seg->unit.ip);
2322     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2323         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2324         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2325         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2326             chain->stat.no_match++;
2327             return RSC_NO_MATCH;
2328     }
2329 
2330     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2331 }
2332 
2333 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2334  * to prevent out of order */
2335 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2336                                          struct tcp_header *tcp)
2337 {
2338     uint16_t tcp_hdr;
2339     uint16_t tcp_flag;
2340 
2341     tcp_flag = htons(tcp->th_offset_flags);
2342     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2343     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2344     if (tcp_flag & TH_SYN) {
2345         chain->stat.tcp_syn++;
2346         return RSC_BYPASS;
2347     }
2348 
2349     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2350         chain->stat.tcp_ctrl_drain++;
2351         return RSC_FINAL;
2352     }
2353 
2354     if (tcp_hdr > sizeof(struct tcp_header)) {
2355         chain->stat.tcp_all_opt++;
2356         return RSC_FINAL;
2357     }
2358 
2359     return RSC_CANDIDATE;
2360 }
2361 
2362 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2363                                          NetClientState *nc,
2364                                          const uint8_t *buf, size_t size,
2365                                          VirtioNetRscUnit *unit)
2366 {
2367     int ret;
2368     VirtioNetRscSeg *seg, *nseg;
2369 
2370     if (QTAILQ_EMPTY(&chain->buffers)) {
2371         chain->stat.empty_cache++;
2372         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2373         timer_mod(chain->drain_timer,
2374               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2375         return size;
2376     }
2377 
2378     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2379         if (chain->proto == ETH_P_IP) {
2380             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2381         } else {
2382             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2383         }
2384 
2385         if (ret == RSC_FINAL) {
2386             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2387                 /* Send failed */
2388                 chain->stat.final_failed++;
2389                 return 0;
2390             }
2391 
2392             /* Send current packet */
2393             return virtio_net_do_receive(nc, buf, size);
2394         } else if (ret == RSC_NO_MATCH) {
2395             continue;
2396         } else {
2397             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2398             seg->is_coalesced = 1;
2399             return size;
2400         }
2401     }
2402 
2403     chain->stat.no_match_cache++;
2404     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2405     return size;
2406 }
2407 
2408 /* Drain a connection data, this is to avoid out of order segments */
2409 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2410                                         NetClientState *nc,
2411                                         const uint8_t *buf, size_t size,
2412                                         uint16_t ip_start, uint16_t ip_size,
2413                                         uint16_t tcp_port)
2414 {
2415     VirtioNetRscSeg *seg, *nseg;
2416     uint32_t ppair1, ppair2;
2417 
2418     ppair1 = *(uint32_t *)(buf + tcp_port);
2419     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2420         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2421         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2422             || (ppair1 != ppair2)) {
2423             continue;
2424         }
2425         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2426             chain->stat.drain_failed++;
2427         }
2428 
2429         break;
2430     }
2431 
2432     return virtio_net_do_receive(nc, buf, size);
2433 }
2434 
2435 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2436                                             struct ip_header *ip,
2437                                             const uint8_t *buf, size_t size)
2438 {
2439     uint16_t ip_len;
2440 
2441     /* Not an ipv4 packet */
2442     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2443         chain->stat.ip_option++;
2444         return RSC_BYPASS;
2445     }
2446 
2447     /* Don't handle packets with ip option */
2448     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2449         chain->stat.ip_option++;
2450         return RSC_BYPASS;
2451     }
2452 
2453     if (ip->ip_p != IPPROTO_TCP) {
2454         chain->stat.bypass_not_tcp++;
2455         return RSC_BYPASS;
2456     }
2457 
2458     /* Don't handle packets with ip fragment */
2459     if (!(htons(ip->ip_off) & IP_DF)) {
2460         chain->stat.ip_frag++;
2461         return RSC_BYPASS;
2462     }
2463 
2464     /* Don't handle packets with ecn flag */
2465     if (IPTOS_ECN(ip->ip_tos)) {
2466         chain->stat.ip_ecn++;
2467         return RSC_BYPASS;
2468     }
2469 
2470     ip_len = htons(ip->ip_len);
2471     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2472         || ip_len > (size - chain->n->guest_hdr_len -
2473                      sizeof(struct eth_header))) {
2474         chain->stat.ip_hacked++;
2475         return RSC_BYPASS;
2476     }
2477 
2478     return RSC_CANDIDATE;
2479 }
2480 
2481 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2482                                       NetClientState *nc,
2483                                       const uint8_t *buf, size_t size)
2484 {
2485     int32_t ret;
2486     uint16_t hdr_len;
2487     VirtioNetRscUnit unit;
2488 
2489     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2490 
2491     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2492         + sizeof(struct tcp_header))) {
2493         chain->stat.bypass_not_tcp++;
2494         return virtio_net_do_receive(nc, buf, size);
2495     }
2496 
2497     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2498     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2499         != RSC_CANDIDATE) {
2500         return virtio_net_do_receive(nc, buf, size);
2501     }
2502 
2503     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2504     if (ret == RSC_BYPASS) {
2505         return virtio_net_do_receive(nc, buf, size);
2506     } else if (ret == RSC_FINAL) {
2507         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2508                 ((hdr_len + sizeof(struct eth_header)) + 12),
2509                 VIRTIO_NET_IP4_ADDR_SIZE,
2510                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2511     }
2512 
2513     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2514 }
2515 
2516 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2517                                             struct ip6_header *ip6,
2518                                             const uint8_t *buf, size_t size)
2519 {
2520     uint16_t ip_len;
2521 
2522     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2523         != IP_HEADER_VERSION_6) {
2524         return RSC_BYPASS;
2525     }
2526 
2527     /* Both option and protocol is checked in this */
2528     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2529         chain->stat.bypass_not_tcp++;
2530         return RSC_BYPASS;
2531     }
2532 
2533     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2534     if (ip_len < sizeof(struct tcp_header) ||
2535         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2536                   - sizeof(struct ip6_header))) {
2537         chain->stat.ip_hacked++;
2538         return RSC_BYPASS;
2539     }
2540 
2541     /* Don't handle packets with ecn flag */
2542     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2543         chain->stat.ip_ecn++;
2544         return RSC_BYPASS;
2545     }
2546 
2547     return RSC_CANDIDATE;
2548 }
2549 
2550 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2551                                       const uint8_t *buf, size_t size)
2552 {
2553     int32_t ret;
2554     uint16_t hdr_len;
2555     VirtioNetRscChain *chain;
2556     VirtioNetRscUnit unit;
2557 
2558     chain = opq;
2559     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2560 
2561     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2562         + sizeof(tcp_header))) {
2563         return virtio_net_do_receive(nc, buf, size);
2564     }
2565 
2566     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2567     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2568                                                  unit.ip, buf, size)) {
2569         return virtio_net_do_receive(nc, buf, size);
2570     }
2571 
2572     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2573     if (ret == RSC_BYPASS) {
2574         return virtio_net_do_receive(nc, buf, size);
2575     } else if (ret == RSC_FINAL) {
2576         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2577                 ((hdr_len + sizeof(struct eth_header)) + 8),
2578                 VIRTIO_NET_IP6_ADDR_SIZE,
2579                 hdr_len + sizeof(struct eth_header)
2580                 + sizeof(struct ip6_header));
2581     }
2582 
2583     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2584 }
2585 
2586 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2587                                                       NetClientState *nc,
2588                                                       uint16_t proto)
2589 {
2590     VirtioNetRscChain *chain;
2591 
2592     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2593         return NULL;
2594     }
2595 
2596     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2597         if (chain->proto == proto) {
2598             return chain;
2599         }
2600     }
2601 
2602     chain = g_malloc(sizeof(*chain));
2603     chain->n = n;
2604     chain->proto = proto;
2605     if (proto == (uint16_t)ETH_P_IP) {
2606         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2607         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2608     } else {
2609         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2610         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2611     }
2612     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2613                                       virtio_net_rsc_purge, chain);
2614     memset(&chain->stat, 0, sizeof(chain->stat));
2615 
2616     QTAILQ_INIT(&chain->buffers);
2617     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2618 
2619     return chain;
2620 }
2621 
2622 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2623                                       const uint8_t *buf,
2624                                       size_t size)
2625 {
2626     uint16_t proto;
2627     VirtioNetRscChain *chain;
2628     struct eth_header *eth;
2629     VirtIONet *n;
2630 
2631     n = qemu_get_nic_opaque(nc);
2632     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2633         return virtio_net_do_receive(nc, buf, size);
2634     }
2635 
2636     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2637     proto = htons(eth->h_proto);
2638 
2639     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2640     if (chain) {
2641         chain->stat.received++;
2642         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2643             return virtio_net_rsc_receive4(chain, nc, buf, size);
2644         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2645             return virtio_net_rsc_receive6(chain, nc, buf, size);
2646         }
2647     }
2648     return virtio_net_do_receive(nc, buf, size);
2649 }
2650 
2651 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2652                                   size_t size)
2653 {
2654     VirtIONet *n = qemu_get_nic_opaque(nc);
2655     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2656         return virtio_net_rsc_receive(nc, buf, size);
2657     } else {
2658         return virtio_net_do_receive(nc, buf, size);
2659     }
2660 }
2661 
2662 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2663 
2664 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2665 {
2666     VirtIONet *n = qemu_get_nic_opaque(nc);
2667     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2668     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2669     int ret;
2670 
2671     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2672     virtio_notify(vdev, q->tx_vq);
2673 
2674     g_free(q->async_tx.elem);
2675     q->async_tx.elem = NULL;
2676 
2677     virtio_queue_set_notification(q->tx_vq, 1);
2678     ret = virtio_net_flush_tx(q);
2679     if (ret >= n->tx_burst) {
2680         /*
2681          * the flush has been stopped by tx_burst
2682          * we will not receive notification for the
2683          * remainining part, so re-schedule
2684          */
2685         virtio_queue_set_notification(q->tx_vq, 0);
2686         if (q->tx_bh) {
2687             qemu_bh_schedule(q->tx_bh);
2688         } else {
2689             timer_mod(q->tx_timer,
2690                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2691         }
2692         q->tx_waiting = 1;
2693     }
2694 }
2695 
2696 /* TX */
2697 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2698 {
2699     VirtIONet *n = q->n;
2700     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2701     VirtQueueElement *elem;
2702     int32_t num_packets = 0;
2703     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2704     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2705         return num_packets;
2706     }
2707 
2708     if (q->async_tx.elem) {
2709         virtio_queue_set_notification(q->tx_vq, 0);
2710         return num_packets;
2711     }
2712 
2713     for (;;) {
2714         ssize_t ret;
2715         unsigned int out_num;
2716         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2717         struct virtio_net_hdr_v1_hash vhdr;
2718 
2719         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2720         if (!elem) {
2721             break;
2722         }
2723 
2724         out_num = elem->out_num;
2725         out_sg = elem->out_sg;
2726         if (out_num < 1) {
2727             virtio_error(vdev, "virtio-net header not in first element");
2728             virtqueue_detach_element(q->tx_vq, elem, 0);
2729             g_free(elem);
2730             return -EINVAL;
2731         }
2732 
2733         if (n->has_vnet_hdr) {
2734             if (iov_to_buf(out_sg, out_num, 0, &vhdr, n->guest_hdr_len) <
2735                 n->guest_hdr_len) {
2736                 virtio_error(vdev, "virtio-net header incorrect");
2737                 virtqueue_detach_element(q->tx_vq, elem, 0);
2738                 g_free(elem);
2739                 return -EINVAL;
2740             }
2741             if (n->needs_vnet_hdr_swap) {
2742                 virtio_net_hdr_swap(vdev, (void *) &vhdr);
2743                 sg2[0].iov_base = &vhdr;
2744                 sg2[0].iov_len = n->guest_hdr_len;
2745                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2746                                    out_sg, out_num,
2747                                    n->guest_hdr_len, -1);
2748                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2749                     goto drop;
2750                 }
2751                 out_num += 1;
2752                 out_sg = sg2;
2753             }
2754         }
2755         /*
2756          * If host wants to see the guest header as is, we can
2757          * pass it on unchanged. Otherwise, copy just the parts
2758          * that host is interested in.
2759          */
2760         assert(n->host_hdr_len <= n->guest_hdr_len);
2761         if (n->host_hdr_len != n->guest_hdr_len) {
2762             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2763                                        out_sg, out_num,
2764                                        0, n->host_hdr_len);
2765             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2766                              out_sg, out_num,
2767                              n->guest_hdr_len, -1);
2768             out_num = sg_num;
2769             out_sg = sg;
2770         }
2771 
2772         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2773                                       out_sg, out_num, virtio_net_tx_complete);
2774         if (ret == 0) {
2775             virtio_queue_set_notification(q->tx_vq, 0);
2776             q->async_tx.elem = elem;
2777             return -EBUSY;
2778         }
2779 
2780 drop:
2781         virtqueue_push(q->tx_vq, elem, 0);
2782         virtio_notify(vdev, q->tx_vq);
2783         g_free(elem);
2784 
2785         if (++num_packets >= n->tx_burst) {
2786             break;
2787         }
2788     }
2789     return num_packets;
2790 }
2791 
2792 static void virtio_net_tx_timer(void *opaque);
2793 
2794 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2795 {
2796     VirtIONet *n = VIRTIO_NET(vdev);
2797     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2798 
2799     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2800         virtio_net_drop_tx_queue_data(vdev, vq);
2801         return;
2802     }
2803 
2804     /* This happens when device was stopped but VCPU wasn't. */
2805     if (!vdev->vm_running) {
2806         q->tx_waiting = 1;
2807         return;
2808     }
2809 
2810     if (q->tx_waiting) {
2811         /* We already have queued packets, immediately flush */
2812         timer_del(q->tx_timer);
2813         virtio_net_tx_timer(q);
2814     } else {
2815         /* re-arm timer to flush it (and more) on next tick */
2816         timer_mod(q->tx_timer,
2817                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2818         q->tx_waiting = 1;
2819         virtio_queue_set_notification(vq, 0);
2820     }
2821 }
2822 
2823 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2824 {
2825     VirtIONet *n = VIRTIO_NET(vdev);
2826     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2827 
2828     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2829         virtio_net_drop_tx_queue_data(vdev, vq);
2830         return;
2831     }
2832 
2833     if (unlikely(q->tx_waiting)) {
2834         return;
2835     }
2836     q->tx_waiting = 1;
2837     /* This happens when device was stopped but VCPU wasn't. */
2838     if (!vdev->vm_running) {
2839         return;
2840     }
2841     virtio_queue_set_notification(vq, 0);
2842     qemu_bh_schedule(q->tx_bh);
2843 }
2844 
2845 static void virtio_net_tx_timer(void *opaque)
2846 {
2847     VirtIONetQueue *q = opaque;
2848     VirtIONet *n = q->n;
2849     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2850     int ret;
2851 
2852     /* This happens when device was stopped but BH wasn't. */
2853     if (!vdev->vm_running) {
2854         /* Make sure tx waiting is set, so we'll run when restarted. */
2855         assert(q->tx_waiting);
2856         return;
2857     }
2858 
2859     q->tx_waiting = 0;
2860 
2861     /* Just in case the driver is not ready on more */
2862     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2863         return;
2864     }
2865 
2866     ret = virtio_net_flush_tx(q);
2867     if (ret == -EBUSY || ret == -EINVAL) {
2868         return;
2869     }
2870     /*
2871      * If we flush a full burst of packets, assume there are
2872      * more coming and immediately rearm
2873      */
2874     if (ret >= n->tx_burst) {
2875         q->tx_waiting = 1;
2876         timer_mod(q->tx_timer,
2877                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2878         return;
2879     }
2880     /*
2881      * If less than a full burst, re-enable notification and flush
2882      * anything that may have come in while we weren't looking.  If
2883      * we find something, assume the guest is still active and rearm
2884      */
2885     virtio_queue_set_notification(q->tx_vq, 1);
2886     ret = virtio_net_flush_tx(q);
2887     if (ret > 0) {
2888         virtio_queue_set_notification(q->tx_vq, 0);
2889         q->tx_waiting = 1;
2890         timer_mod(q->tx_timer,
2891                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2892     }
2893 }
2894 
2895 static void virtio_net_tx_bh(void *opaque)
2896 {
2897     VirtIONetQueue *q = opaque;
2898     VirtIONet *n = q->n;
2899     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2900     int32_t ret;
2901 
2902     /* This happens when device was stopped but BH wasn't. */
2903     if (!vdev->vm_running) {
2904         /* Make sure tx waiting is set, so we'll run when restarted. */
2905         assert(q->tx_waiting);
2906         return;
2907     }
2908 
2909     q->tx_waiting = 0;
2910 
2911     /* Just in case the driver is not ready on more */
2912     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2913         return;
2914     }
2915 
2916     ret = virtio_net_flush_tx(q);
2917     if (ret == -EBUSY || ret == -EINVAL) {
2918         return; /* Notification re-enable handled by tx_complete or device
2919                  * broken */
2920     }
2921 
2922     /* If we flush a full burst of packets, assume there are
2923      * more coming and immediately reschedule */
2924     if (ret >= n->tx_burst) {
2925         qemu_bh_schedule(q->tx_bh);
2926         q->tx_waiting = 1;
2927         return;
2928     }
2929 
2930     /* If less than a full burst, re-enable notification and flush
2931      * anything that may have come in while we weren't looking.  If
2932      * we find something, assume the guest is still active and reschedule */
2933     virtio_queue_set_notification(q->tx_vq, 1);
2934     ret = virtio_net_flush_tx(q);
2935     if (ret == -EINVAL) {
2936         return;
2937     } else if (ret > 0) {
2938         virtio_queue_set_notification(q->tx_vq, 0);
2939         qemu_bh_schedule(q->tx_bh);
2940         q->tx_waiting = 1;
2941     }
2942 }
2943 
2944 static void virtio_net_add_queue(VirtIONet *n, int index)
2945 {
2946     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2947 
2948     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2949                                            virtio_net_handle_rx);
2950 
2951     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2952         n->vqs[index].tx_vq =
2953             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2954                              virtio_net_handle_tx_timer);
2955         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2956                                               virtio_net_tx_timer,
2957                                               &n->vqs[index]);
2958     } else {
2959         n->vqs[index].tx_vq =
2960             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2961                              virtio_net_handle_tx_bh);
2962         n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2963                                                   &DEVICE(vdev)->mem_reentrancy_guard);
2964     }
2965 
2966     n->vqs[index].tx_waiting = 0;
2967     n->vqs[index].n = n;
2968 }
2969 
2970 static void virtio_net_del_queue(VirtIONet *n, int index)
2971 {
2972     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2973     VirtIONetQueue *q = &n->vqs[index];
2974     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2975 
2976     qemu_purge_queued_packets(nc);
2977 
2978     virtio_del_queue(vdev, index * 2);
2979     if (q->tx_timer) {
2980         timer_free(q->tx_timer);
2981         q->tx_timer = NULL;
2982     } else {
2983         qemu_bh_delete(q->tx_bh);
2984         q->tx_bh = NULL;
2985     }
2986     q->tx_waiting = 0;
2987     virtio_del_queue(vdev, index * 2 + 1);
2988 }
2989 
2990 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2991 {
2992     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2993     int old_num_queues = virtio_get_num_queues(vdev);
2994     int new_num_queues = new_max_queue_pairs * 2 + 1;
2995     int i;
2996 
2997     assert(old_num_queues >= 3);
2998     assert(old_num_queues % 2 == 1);
2999 
3000     if (old_num_queues == new_num_queues) {
3001         return;
3002     }
3003 
3004     /*
3005      * We always need to remove and add ctrl vq if
3006      * old_num_queues != new_num_queues. Remove ctrl_vq first,
3007      * and then we only enter one of the following two loops.
3008      */
3009     virtio_del_queue(vdev, old_num_queues - 1);
3010 
3011     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
3012         /* new_num_queues < old_num_queues */
3013         virtio_net_del_queue(n, i / 2);
3014     }
3015 
3016     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
3017         /* new_num_queues > old_num_queues */
3018         virtio_net_add_queue(n, i / 2);
3019     }
3020 
3021     /* add ctrl_vq last */
3022     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3023 }
3024 
3025 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3026 {
3027     int max = multiqueue ? n->max_queue_pairs : 1;
3028 
3029     n->multiqueue = multiqueue;
3030     virtio_net_change_num_queue_pairs(n, max);
3031 
3032     virtio_net_set_queue_pairs(n);
3033 }
3034 
3035 static int virtio_net_post_load_device(void *opaque, int version_id)
3036 {
3037     VirtIONet *n = opaque;
3038     VirtIODevice *vdev = VIRTIO_DEVICE(n);
3039     int i, link_down;
3040 
3041     trace_virtio_net_post_load_device();
3042     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3043                                virtio_vdev_has_feature(vdev,
3044                                                        VIRTIO_F_VERSION_1),
3045                                virtio_vdev_has_feature(vdev,
3046                                                        VIRTIO_NET_F_HASH_REPORT));
3047 
3048     /* MAC_TABLE_ENTRIES may be different from the saved image */
3049     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3050         n->mac_table.in_use = 0;
3051     }
3052 
3053     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3054         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3055     }
3056 
3057     /*
3058      * curr_guest_offloads will be later overwritten by the
3059      * virtio_set_features_nocheck call done from the virtio_load.
3060      * Here we make sure it is preserved and restored accordingly
3061      * in the virtio_net_post_load_virtio callback.
3062      */
3063     n->saved_guest_offloads = n->curr_guest_offloads;
3064 
3065     virtio_net_set_queue_pairs(n);
3066 
3067     /* Find the first multicast entry in the saved MAC filter */
3068     for (i = 0; i < n->mac_table.in_use; i++) {
3069         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3070             break;
3071         }
3072     }
3073     n->mac_table.first_multi = i;
3074 
3075     /* nc.link_down can't be migrated, so infer link_down according
3076      * to link status bit in n->status */
3077     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3078     for (i = 0; i < n->max_queue_pairs; i++) {
3079         qemu_get_subqueue(n->nic, i)->link_down = link_down;
3080     }
3081 
3082     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3083         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3084         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3085                                   QEMU_CLOCK_VIRTUAL,
3086                                   virtio_net_announce_timer, n);
3087         if (n->announce_timer.round) {
3088             timer_mod(n->announce_timer.tm,
3089                       qemu_clock_get_ms(n->announce_timer.type));
3090         } else {
3091             qemu_announce_timer_del(&n->announce_timer, false);
3092         }
3093     }
3094 
3095     if (n->rss_data.enabled) {
3096         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3097         if (!n->rss_data.populate_hash) {
3098             if (!virtio_net_attach_epbf_rss(n)) {
3099                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3100                     warn_report("Can't post-load eBPF RSS for vhost");
3101                 } else {
3102                     warn_report("Can't post-load eBPF RSS - "
3103                                 "fallback to software RSS");
3104                     n->rss_data.enabled_software_rss = true;
3105                 }
3106             }
3107         }
3108 
3109         trace_virtio_net_rss_enable(n->rss_data.hash_types,
3110                                     n->rss_data.indirections_len,
3111                                     sizeof(n->rss_data.key));
3112     } else {
3113         trace_virtio_net_rss_disable();
3114     }
3115     return 0;
3116 }
3117 
3118 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3119 {
3120     VirtIONet *n = VIRTIO_NET(vdev);
3121     /*
3122      * The actual needed state is now in saved_guest_offloads,
3123      * see virtio_net_post_load_device for detail.
3124      * Restore it back and apply the desired offloads.
3125      */
3126     n->curr_guest_offloads = n->saved_guest_offloads;
3127     if (peer_has_vnet_hdr(n)) {
3128         virtio_net_apply_guest_offloads(n);
3129     }
3130 
3131     return 0;
3132 }
3133 
3134 /* tx_waiting field of a VirtIONetQueue */
3135 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3136     .name = "virtio-net-queue-tx_waiting",
3137     .fields = (const VMStateField[]) {
3138         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3139         VMSTATE_END_OF_LIST()
3140    },
3141 };
3142 
3143 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3144 {
3145     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3146 }
3147 
3148 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3149 {
3150     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3151                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3152 }
3153 
3154 static bool mac_table_fits(void *opaque, int version_id)
3155 {
3156     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3157 }
3158 
3159 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3160 {
3161     return !mac_table_fits(opaque, version_id);
3162 }
3163 
3164 /* This temporary type is shared by all the WITH_TMP methods
3165  * although only some fields are used by each.
3166  */
3167 struct VirtIONetMigTmp {
3168     VirtIONet      *parent;
3169     VirtIONetQueue *vqs_1;
3170     uint16_t        curr_queue_pairs_1;
3171     uint8_t         has_ufo;
3172     uint32_t        has_vnet_hdr;
3173 };
3174 
3175 /* The 2nd and subsequent tx_waiting flags are loaded later than
3176  * the 1st entry in the queue_pairs and only if there's more than one
3177  * entry.  We use the tmp mechanism to calculate a temporary
3178  * pointer and count and also validate the count.
3179  */
3180 
3181 static int virtio_net_tx_waiting_pre_save(void *opaque)
3182 {
3183     struct VirtIONetMigTmp *tmp = opaque;
3184 
3185     tmp->vqs_1 = tmp->parent->vqs + 1;
3186     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3187     if (tmp->parent->curr_queue_pairs == 0) {
3188         tmp->curr_queue_pairs_1 = 0;
3189     }
3190 
3191     return 0;
3192 }
3193 
3194 static int virtio_net_tx_waiting_pre_load(void *opaque)
3195 {
3196     struct VirtIONetMigTmp *tmp = opaque;
3197 
3198     /* Reuse the pointer setup from save */
3199     virtio_net_tx_waiting_pre_save(opaque);
3200 
3201     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3202         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3203             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3204 
3205         return -EINVAL;
3206     }
3207 
3208     return 0; /* all good */
3209 }
3210 
3211 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3212     .name      = "virtio-net-tx_waiting",
3213     .pre_load  = virtio_net_tx_waiting_pre_load,
3214     .pre_save  = virtio_net_tx_waiting_pre_save,
3215     .fields    = (const VMStateField[]) {
3216         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3217                                      curr_queue_pairs_1,
3218                                      vmstate_virtio_net_queue_tx_waiting,
3219                                      struct VirtIONetQueue),
3220         VMSTATE_END_OF_LIST()
3221     },
3222 };
3223 
3224 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3225  * flag set we need to check that we have it
3226  */
3227 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3228 {
3229     struct VirtIONetMigTmp *tmp = opaque;
3230 
3231     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3232         error_report("virtio-net: saved image requires TUN_F_UFO support");
3233         return -EINVAL;
3234     }
3235 
3236     return 0;
3237 }
3238 
3239 static int virtio_net_ufo_pre_save(void *opaque)
3240 {
3241     struct VirtIONetMigTmp *tmp = opaque;
3242 
3243     tmp->has_ufo = tmp->parent->has_ufo;
3244 
3245     return 0;
3246 }
3247 
3248 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3249     .name      = "virtio-net-ufo",
3250     .post_load = virtio_net_ufo_post_load,
3251     .pre_save  = virtio_net_ufo_pre_save,
3252     .fields    = (const VMStateField[]) {
3253         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3254         VMSTATE_END_OF_LIST()
3255     },
3256 };
3257 
3258 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3259  * flag set we need to check that we have it
3260  */
3261 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3262 {
3263     struct VirtIONetMigTmp *tmp = opaque;
3264 
3265     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3266         error_report("virtio-net: saved image requires vnet_hdr=on");
3267         return -EINVAL;
3268     }
3269 
3270     return 0;
3271 }
3272 
3273 static int virtio_net_vnet_pre_save(void *opaque)
3274 {
3275     struct VirtIONetMigTmp *tmp = opaque;
3276 
3277     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3278 
3279     return 0;
3280 }
3281 
3282 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3283     .name      = "virtio-net-vnet",
3284     .post_load = virtio_net_vnet_post_load,
3285     .pre_save  = virtio_net_vnet_pre_save,
3286     .fields    = (const VMStateField[]) {
3287         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3288         VMSTATE_END_OF_LIST()
3289     },
3290 };
3291 
3292 static bool virtio_net_rss_needed(void *opaque)
3293 {
3294     return VIRTIO_NET(opaque)->rss_data.enabled;
3295 }
3296 
3297 static const VMStateDescription vmstate_virtio_net_rss = {
3298     .name      = "virtio-net-device/rss",
3299     .version_id = 1,
3300     .minimum_version_id = 1,
3301     .needed = virtio_net_rss_needed,
3302     .fields = (const VMStateField[]) {
3303         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3304         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3305         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3306         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3307         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3308         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3309         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3310                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3311         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3312                                     rss_data.indirections_len, 0,
3313                                     vmstate_info_uint16, uint16_t),
3314         VMSTATE_END_OF_LIST()
3315     },
3316 };
3317 
3318 static const VMStateDescription vmstate_virtio_net_device = {
3319     .name = "virtio-net-device",
3320     .version_id = VIRTIO_NET_VM_VERSION,
3321     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3322     .post_load = virtio_net_post_load_device,
3323     .fields = (const VMStateField[]) {
3324         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3325         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3326                                vmstate_virtio_net_queue_tx_waiting,
3327                                VirtIONetQueue),
3328         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3329         VMSTATE_UINT16(status, VirtIONet),
3330         VMSTATE_UINT8(promisc, VirtIONet),
3331         VMSTATE_UINT8(allmulti, VirtIONet),
3332         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3333 
3334         /* Guarded pair: If it fits we load it, else we throw it away
3335          * - can happen if source has a larger MAC table.; post-load
3336          *  sets flags in this case.
3337          */
3338         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3339                                 0, mac_table_fits, mac_table.in_use,
3340                                  ETH_ALEN),
3341         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3342                                      mac_table.in_use, ETH_ALEN),
3343 
3344         /* Note: This is an array of uint32's that's always been saved as a
3345          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3346          * but based on the uint.
3347          */
3348         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3349         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3350                          vmstate_virtio_net_has_vnet),
3351         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3352         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3353         VMSTATE_UINT8(alluni, VirtIONet),
3354         VMSTATE_UINT8(nomulti, VirtIONet),
3355         VMSTATE_UINT8(nouni, VirtIONet),
3356         VMSTATE_UINT8(nobcast, VirtIONet),
3357         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3358                          vmstate_virtio_net_has_ufo),
3359         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3360                             vmstate_info_uint16_equal, uint16_t),
3361         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3362         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3363                          vmstate_virtio_net_tx_waiting),
3364         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3365                             has_ctrl_guest_offloads),
3366         VMSTATE_END_OF_LIST()
3367     },
3368     .subsections = (const VMStateDescription * const []) {
3369         &vmstate_virtio_net_rss,
3370         NULL
3371     }
3372 };
3373 
3374 static NetClientInfo net_virtio_info = {
3375     .type = NET_CLIENT_DRIVER_NIC,
3376     .size = sizeof(NICState),
3377     .can_receive = virtio_net_can_receive,
3378     .receive = virtio_net_receive,
3379     .link_status_changed = virtio_net_set_link_status,
3380     .query_rx_filter = virtio_net_query_rxfilter,
3381     .announce = virtio_net_announce,
3382 };
3383 
3384 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3385 {
3386     VirtIONet *n = VIRTIO_NET(vdev);
3387     NetClientState *nc;
3388     assert(n->vhost_started);
3389     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3390         /* Must guard against invalid features and bogus queue index
3391          * from being set by malicious guest, or penetrated through
3392          * buggy migration stream.
3393          */
3394         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3395             qemu_log_mask(LOG_GUEST_ERROR,
3396                           "%s: bogus vq index ignored\n", __func__);
3397             return false;
3398         }
3399         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3400     } else {
3401         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3402     }
3403     /*
3404      * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3405      * as the macro of configure interrupt's IDX, If this driver does not
3406      * support, the function will return false
3407      */
3408 
3409     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3410         return vhost_net_config_pending(get_vhost_net(nc->peer));
3411     }
3412     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3413 }
3414 
3415 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3416                                            bool mask)
3417 {
3418     VirtIONet *n = VIRTIO_NET(vdev);
3419     NetClientState *nc;
3420     assert(n->vhost_started);
3421     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3422         /* Must guard against invalid features and bogus queue index
3423          * from being set by malicious guest, or penetrated through
3424          * buggy migration stream.
3425          */
3426         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3427             qemu_log_mask(LOG_GUEST_ERROR,
3428                           "%s: bogus vq index ignored\n", __func__);
3429             return;
3430         }
3431         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3432     } else {
3433         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3434     }
3435     /*
3436      *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3437      * as the macro of configure interrupt's IDX, If this driver does not
3438      * support, the function will return
3439      */
3440 
3441     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3442         vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3443         return;
3444     }
3445     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3446 }
3447 
3448 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3449 {
3450     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3451 
3452     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3453 }
3454 
3455 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3456                                    const char *type)
3457 {
3458     /*
3459      * The name can be NULL, the netclient name will be type.x.
3460      */
3461     assert(type != NULL);
3462 
3463     g_free(n->netclient_name);
3464     g_free(n->netclient_type);
3465     n->netclient_name = g_strdup(name);
3466     n->netclient_type = g_strdup(type);
3467 }
3468 
3469 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3470 {
3471     HotplugHandler *hotplug_ctrl;
3472     PCIDevice *pci_dev;
3473     Error *err = NULL;
3474 
3475     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3476     if (hotplug_ctrl) {
3477         pci_dev = PCI_DEVICE(dev);
3478         pci_dev->partially_hotplugged = true;
3479         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3480         if (err) {
3481             error_report_err(err);
3482             return false;
3483         }
3484     } else {
3485         return false;
3486     }
3487     return true;
3488 }
3489 
3490 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3491                                     Error **errp)
3492 {
3493     Error *err = NULL;
3494     HotplugHandler *hotplug_ctrl;
3495     PCIDevice *pdev = PCI_DEVICE(dev);
3496     BusState *primary_bus;
3497 
3498     if (!pdev->partially_hotplugged) {
3499         return true;
3500     }
3501     primary_bus = dev->parent_bus;
3502     if (!primary_bus) {
3503         error_setg(errp, "virtio_net: couldn't find primary bus");
3504         return false;
3505     }
3506     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3507     qatomic_set(&n->failover_primary_hidden, false);
3508     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3509     if (hotplug_ctrl) {
3510         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3511         if (err) {
3512             goto out;
3513         }
3514         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3515     }
3516     pdev->partially_hotplugged = false;
3517 
3518 out:
3519     error_propagate(errp, err);
3520     return !err;
3521 }
3522 
3523 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationEvent *e)
3524 {
3525     bool should_be_hidden;
3526     Error *err = NULL;
3527     DeviceState *dev = failover_find_primary_device(n);
3528 
3529     if (!dev) {
3530         return;
3531     }
3532 
3533     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3534 
3535     if (e->type == MIG_EVENT_PRECOPY_SETUP && !should_be_hidden) {
3536         if (failover_unplug_primary(n, dev)) {
3537             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3538             qapi_event_send_unplug_primary(dev->id);
3539             qatomic_set(&n->failover_primary_hidden, true);
3540         } else {
3541             warn_report("couldn't unplug primary device");
3542         }
3543     } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
3544         /* We already unplugged the device let's plug it back */
3545         if (!failover_replug_primary(n, dev, &err)) {
3546             if (err) {
3547                 error_report_err(err);
3548             }
3549         }
3550     }
3551 }
3552 
3553 static int virtio_net_migration_state_notifier(NotifierWithReturn *notifier,
3554                                                MigrationEvent *e, Error **errp)
3555 {
3556     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3557     virtio_net_handle_migration_primary(n, e);
3558     return 0;
3559 }
3560 
3561 static bool failover_hide_primary_device(DeviceListener *listener,
3562                                          const QDict *device_opts,
3563                                          bool from_json,
3564                                          Error **errp)
3565 {
3566     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3567     const char *standby_id;
3568 
3569     if (!device_opts) {
3570         return false;
3571     }
3572 
3573     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3574         return false;
3575     }
3576 
3577     if (!qdict_haskey(device_opts, "id")) {
3578         error_setg(errp, "Device with failover_pair_id needs to have id");
3579         return false;
3580     }
3581 
3582     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3583     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3584         return false;
3585     }
3586 
3587     /*
3588      * The hide helper can be called several times for a given device.
3589      * Check there is only one primary for a virtio-net device but
3590      * don't duplicate the qdict several times if it's called for the same
3591      * device.
3592      */
3593     if (n->primary_opts) {
3594         const char *old, *new;
3595         /* devices with failover_pair_id always have an id */
3596         old = qdict_get_str(n->primary_opts, "id");
3597         new = qdict_get_str(device_opts, "id");
3598         if (strcmp(old, new) != 0) {
3599             error_setg(errp, "Cannot attach more than one primary device to "
3600                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3601             return false;
3602         }
3603     } else {
3604         n->primary_opts = qdict_clone_shallow(device_opts);
3605         n->primary_opts_from_json = from_json;
3606     }
3607 
3608     /* failover_primary_hidden is set during feature negotiation */
3609     return qatomic_read(&n->failover_primary_hidden);
3610 }
3611 
3612 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3613 {
3614     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3615     VirtIONet *n = VIRTIO_NET(dev);
3616     NetClientState *nc;
3617     int i;
3618 
3619     if (n->net_conf.mtu) {
3620         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3621     }
3622 
3623     if (n->net_conf.duplex_str) {
3624         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3625             n->net_conf.duplex = DUPLEX_HALF;
3626         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3627             n->net_conf.duplex = DUPLEX_FULL;
3628         } else {
3629             error_setg(errp, "'duplex' must be 'half' or 'full'");
3630             return;
3631         }
3632         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3633     } else {
3634         n->net_conf.duplex = DUPLEX_UNKNOWN;
3635     }
3636 
3637     if (n->net_conf.speed < SPEED_UNKNOWN) {
3638         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3639         return;
3640     }
3641     if (n->net_conf.speed >= 0) {
3642         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3643     }
3644 
3645     if (n->failover) {
3646         n->primary_listener.hide_device = failover_hide_primary_device;
3647         qatomic_set(&n->failover_primary_hidden, true);
3648         device_listener_register(&n->primary_listener);
3649         migration_add_notifier(&n->migration_state,
3650                                virtio_net_migration_state_notifier);
3651         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3652     }
3653 
3654     virtio_net_set_config_size(n, n->host_features);
3655     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3656 
3657     /*
3658      * We set a lower limit on RX queue size to what it always was.
3659      * Guests that want a smaller ring can always resize it without
3660      * help from us (using virtio 1 and up).
3661      */
3662     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3663         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3664         !is_power_of_2(n->net_conf.rx_queue_size)) {
3665         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3666                    "must be a power of 2 between %d and %d.",
3667                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3668                    VIRTQUEUE_MAX_SIZE);
3669         virtio_cleanup(vdev);
3670         return;
3671     }
3672 
3673     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3674         n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3675         !is_power_of_2(n->net_conf.tx_queue_size)) {
3676         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3677                    "must be a power of 2 between %d and %d",
3678                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3679                    virtio_net_max_tx_queue_size(n));
3680         virtio_cleanup(vdev);
3681         return;
3682     }
3683 
3684     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3685 
3686     /*
3687      * Figure out the datapath queue pairs since the backend could
3688      * provide control queue via peers as well.
3689      */
3690     if (n->nic_conf.peers.queues) {
3691         for (i = 0; i < n->max_ncs; i++) {
3692             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3693                 ++n->max_queue_pairs;
3694             }
3695         }
3696     }
3697     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3698 
3699     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3700         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3701                    "must be a positive integer less than %d.",
3702                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3703         virtio_cleanup(vdev);
3704         return;
3705     }
3706     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3707     n->curr_queue_pairs = 1;
3708     n->tx_timeout = n->net_conf.txtimer;
3709 
3710     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3711                        && strcmp(n->net_conf.tx, "bh")) {
3712         warn_report("virtio-net: "
3713                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3714                     n->net_conf.tx);
3715         error_printf("Defaulting to \"bh\"");
3716     }
3717 
3718     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3719                                     n->net_conf.tx_queue_size);
3720 
3721     for (i = 0; i < n->max_queue_pairs; i++) {
3722         virtio_net_add_queue(n, i);
3723     }
3724 
3725     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3726     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3727     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3728     n->status = VIRTIO_NET_S_LINK_UP;
3729     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3730                               QEMU_CLOCK_VIRTUAL,
3731                               virtio_net_announce_timer, n);
3732     n->announce_timer.round = 0;
3733 
3734     if (n->netclient_type) {
3735         /*
3736          * Happen when virtio_net_set_netclient_name has been called.
3737          */
3738         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3739                               n->netclient_type, n->netclient_name,
3740                               &dev->mem_reentrancy_guard, n);
3741     } else {
3742         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3743                               object_get_typename(OBJECT(dev)), dev->id,
3744                               &dev->mem_reentrancy_guard, n);
3745     }
3746 
3747     for (i = 0; i < n->max_queue_pairs; i++) {
3748         n->nic->ncs[i].do_not_pad = true;
3749     }
3750 
3751     peer_test_vnet_hdr(n);
3752     if (peer_has_vnet_hdr(n)) {
3753         for (i = 0; i < n->max_queue_pairs; i++) {
3754             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3755         }
3756         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3757     } else {
3758         n->host_hdr_len = 0;
3759     }
3760 
3761     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3762 
3763     n->vqs[0].tx_waiting = 0;
3764     n->tx_burst = n->net_conf.txburst;
3765     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3766     n->promisc = 1; /* for compatibility */
3767 
3768     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3769 
3770     n->vlans = g_malloc0(MAX_VLAN >> 3);
3771 
3772     nc = qemu_get_queue(n->nic);
3773     nc->rxfilter_notify_enabled = 1;
3774 
3775    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3776         struct virtio_net_config netcfg = {};
3777         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3778         vhost_net_set_config(get_vhost_net(nc->peer),
3779             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3780     }
3781     QTAILQ_INIT(&n->rsc_chains);
3782     n->qdev = dev;
3783 
3784     net_rx_pkt_init(&n->rx_pkt);
3785 
3786     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3787         virtio_net_load_ebpf(n);
3788     }
3789 }
3790 
3791 static void virtio_net_device_unrealize(DeviceState *dev)
3792 {
3793     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3794     VirtIONet *n = VIRTIO_NET(dev);
3795     int i, max_queue_pairs;
3796 
3797     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3798         virtio_net_unload_ebpf(n);
3799     }
3800 
3801     /* This will stop vhost backend if appropriate. */
3802     virtio_net_set_status(vdev, 0);
3803 
3804     g_free(n->netclient_name);
3805     n->netclient_name = NULL;
3806     g_free(n->netclient_type);
3807     n->netclient_type = NULL;
3808 
3809     g_free(n->mac_table.macs);
3810     g_free(n->vlans);
3811 
3812     if (n->failover) {
3813         qobject_unref(n->primary_opts);
3814         device_listener_unregister(&n->primary_listener);
3815         migration_remove_notifier(&n->migration_state);
3816     } else {
3817         assert(n->primary_opts == NULL);
3818     }
3819 
3820     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3821     for (i = 0; i < max_queue_pairs; i++) {
3822         virtio_net_del_queue(n, i);
3823     }
3824     /* delete also control vq */
3825     virtio_del_queue(vdev, max_queue_pairs * 2);
3826     qemu_announce_timer_del(&n->announce_timer, false);
3827     g_free(n->vqs);
3828     qemu_del_nic(n->nic);
3829     virtio_net_rsc_cleanup(n);
3830     g_free(n->rss_data.indirections_table);
3831     net_rx_pkt_uninit(n->rx_pkt);
3832     virtio_cleanup(vdev);
3833 }
3834 
3835 static void virtio_net_instance_init(Object *obj)
3836 {
3837     VirtIONet *n = VIRTIO_NET(obj);
3838 
3839     /*
3840      * The default config_size is sizeof(struct virtio_net_config).
3841      * Can be overridden with virtio_net_set_config_size.
3842      */
3843     n->config_size = sizeof(struct virtio_net_config);
3844     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3845                                   "bootindex", "/ethernet-phy@0",
3846                                   DEVICE(n));
3847 
3848     ebpf_rss_init(&n->ebpf_rss);
3849 }
3850 
3851 static int virtio_net_pre_save(void *opaque)
3852 {
3853     VirtIONet *n = opaque;
3854 
3855     /* At this point, backend must be stopped, otherwise
3856      * it might keep writing to memory. */
3857     assert(!n->vhost_started);
3858 
3859     return 0;
3860 }
3861 
3862 static bool primary_unplug_pending(void *opaque)
3863 {
3864     DeviceState *dev = opaque;
3865     DeviceState *primary;
3866     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3867     VirtIONet *n = VIRTIO_NET(vdev);
3868 
3869     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3870         return false;
3871     }
3872     primary = failover_find_primary_device(n);
3873     return primary ? primary->pending_deleted_event : false;
3874 }
3875 
3876 static bool dev_unplug_pending(void *opaque)
3877 {
3878     DeviceState *dev = opaque;
3879     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3880 
3881     return vdc->primary_unplug_pending(dev);
3882 }
3883 
3884 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3885 {
3886     VirtIONet *n = VIRTIO_NET(vdev);
3887     NetClientState *nc = qemu_get_queue(n->nic);
3888     struct vhost_net *net = get_vhost_net(nc->peer);
3889     return &net->dev;
3890 }
3891 
3892 static const VMStateDescription vmstate_virtio_net = {
3893     .name = "virtio-net",
3894     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3895     .version_id = VIRTIO_NET_VM_VERSION,
3896     .fields = (const VMStateField[]) {
3897         VMSTATE_VIRTIO_DEVICE,
3898         VMSTATE_END_OF_LIST()
3899     },
3900     .pre_save = virtio_net_pre_save,
3901     .dev_unplug_pending = dev_unplug_pending,
3902 };
3903 
3904 static Property virtio_net_properties[] = {
3905     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3906                     VIRTIO_NET_F_CSUM, true),
3907     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3908                     VIRTIO_NET_F_GUEST_CSUM, true),
3909     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3910     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3911                     VIRTIO_NET_F_GUEST_TSO4, true),
3912     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3913                     VIRTIO_NET_F_GUEST_TSO6, true),
3914     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3915                     VIRTIO_NET_F_GUEST_ECN, true),
3916     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3917                     VIRTIO_NET_F_GUEST_UFO, true),
3918     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3919                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3920     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3921                     VIRTIO_NET_F_HOST_TSO4, true),
3922     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3923                     VIRTIO_NET_F_HOST_TSO6, true),
3924     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3925                     VIRTIO_NET_F_HOST_ECN, true),
3926     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3927                     VIRTIO_NET_F_HOST_UFO, true),
3928     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3929                     VIRTIO_NET_F_MRG_RXBUF, true),
3930     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3931                     VIRTIO_NET_F_STATUS, true),
3932     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3933                     VIRTIO_NET_F_CTRL_VQ, true),
3934     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3935                     VIRTIO_NET_F_CTRL_RX, true),
3936     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3937                     VIRTIO_NET_F_CTRL_VLAN, true),
3938     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3939                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3940     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3941                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3942     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3943                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3944     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3945     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3946                     VIRTIO_NET_F_RSS, false),
3947     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3948                     VIRTIO_NET_F_HASH_REPORT, false),
3949     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3950                     VIRTIO_NET_F_RSC_EXT, false),
3951     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3952                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3953     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3954     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3955                        TX_TIMER_INTERVAL),
3956     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3957     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3958     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3959                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3960     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3961                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3962     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3963     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3964                      true),
3965     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3966     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3967     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3968     DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
3969                       VIRTIO_NET_F_GUEST_USO4, true),
3970     DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
3971                       VIRTIO_NET_F_GUEST_USO6, true),
3972     DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
3973                       VIRTIO_NET_F_HOST_USO, true),
3974     DEFINE_PROP_END_OF_LIST(),
3975 };
3976 
3977 static void virtio_net_class_init(ObjectClass *klass, void *data)
3978 {
3979     DeviceClass *dc = DEVICE_CLASS(klass);
3980     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3981 
3982     device_class_set_props(dc, virtio_net_properties);
3983     dc->vmsd = &vmstate_virtio_net;
3984     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3985     vdc->realize = virtio_net_device_realize;
3986     vdc->unrealize = virtio_net_device_unrealize;
3987     vdc->get_config = virtio_net_get_config;
3988     vdc->set_config = virtio_net_set_config;
3989     vdc->get_features = virtio_net_get_features;
3990     vdc->set_features = virtio_net_set_features;
3991     vdc->bad_features = virtio_net_bad_features;
3992     vdc->reset = virtio_net_reset;
3993     vdc->queue_reset = virtio_net_queue_reset;
3994     vdc->queue_enable = virtio_net_queue_enable;
3995     vdc->set_status = virtio_net_set_status;
3996     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3997     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3998     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3999     vdc->post_load = virtio_net_post_load_virtio;
4000     vdc->vmsd = &vmstate_virtio_net_device;
4001     vdc->primary_unplug_pending = primary_unplug_pending;
4002     vdc->get_vhost = virtio_net_get_vhost;
4003     vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
4004 }
4005 
4006 static const TypeInfo virtio_net_info = {
4007     .name = TYPE_VIRTIO_NET,
4008     .parent = TYPE_VIRTIO_DEVICE,
4009     .instance_size = sizeof(VirtIONet),
4010     .instance_init = virtio_net_instance_init,
4011     .class_init = virtio_net_class_init,
4012 };
4013 
4014 static void virtio_register_types(void)
4015 {
4016     type_register_static(&virtio_net_info);
4017 }
4018 
4019 type_init(virtio_register_types)
4020