xref: /qemu/hw/net/virtio-net.c (revision ca61e750)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
49 
50 #define VIRTIO_NET_VM_VERSION    11
51 
52 #define MAC_TABLE_ENTRIES    64
53 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
54 
55 /* previously fixed value */
56 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
57 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
58 
59 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
60 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
61 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
62 
63 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
64 
65 #define VIRTIO_NET_TCP_FLAG         0x3F
66 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
67 
68 /* IPv4 max payload, 16 bits in the header */
69 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
70 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
71 
72 /* header length value in ip header without option */
73 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
74 
75 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
76 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
77 
78 /* Purge coalesced packets timer interval, This value affects the performance
79    a lot, and should be tuned carefully, '300000'(300us) is the recommended
80    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
81    tso/gso/gro 'off'. */
82 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
83 
84 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
85                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
86                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
87                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
88                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
89                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
90                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
91                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
92                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
93 
94 static const VirtIOFeature feature_sizes[] = {
95     {.flags = 1ULL << VIRTIO_NET_F_MAC,
96      .end = endof(struct virtio_net_config, mac)},
97     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
98      .end = endof(struct virtio_net_config, status)},
99     {.flags = 1ULL << VIRTIO_NET_F_MQ,
100      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
101     {.flags = 1ULL << VIRTIO_NET_F_MTU,
102      .end = endof(struct virtio_net_config, mtu)},
103     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
104      .end = endof(struct virtio_net_config, duplex)},
105     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
106      .end = endof(struct virtio_net_config, supported_hash_types)},
107     {}
108 };
109 
110 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
111 {
112     VirtIONet *n = qemu_get_nic_opaque(nc);
113 
114     return &n->vqs[nc->queue_index];
115 }
116 
117 static int vq2q(int queue_index)
118 {
119     return queue_index / 2;
120 }
121 
122 /* TODO
123  * - we could suppress RX interrupt if we were so inclined.
124  */
125 
126 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
127 {
128     VirtIONet *n = VIRTIO_NET(vdev);
129     struct virtio_net_config netcfg;
130     NetClientState *nc = qemu_get_queue(n->nic);
131     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
132 
133     int ret = 0;
134     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
135     virtio_stw_p(vdev, &netcfg.status, n->status);
136     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
137     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
138     memcpy(netcfg.mac, n->mac, ETH_ALEN);
139     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
140     netcfg.duplex = n->net_conf.duplex;
141     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
142     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
143                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
144                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
145     virtio_stl_p(vdev, &netcfg.supported_hash_types,
146                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
147     memcpy(config, &netcfg, n->config_size);
148 
149     /*
150      * Is this VDPA? No peer means not VDPA: there's no way to
151      * disconnect/reconnect a VDPA peer.
152      */
153     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
154         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
155                                    n->config_size);
156         if (ret != -1) {
157             /*
158              * Some NIC/kernel combinations present 0 as the mac address.  As
159              * that is not a legal address, try to proceed with the
160              * address from the QEMU command line in the hope that the
161              * address has been configured correctly elsewhere - just not
162              * reported by the device.
163              */
164             if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
165                 info_report("Zero hardware mac address detected. Ignoring.");
166                 memcpy(netcfg.mac, n->mac, ETH_ALEN);
167             }
168             memcpy(config, &netcfg, n->config_size);
169         }
170     }
171 }
172 
173 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
174 {
175     VirtIONet *n = VIRTIO_NET(vdev);
176     struct virtio_net_config netcfg = {};
177     NetClientState *nc = qemu_get_queue(n->nic);
178 
179     memcpy(&netcfg, config, n->config_size);
180 
181     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
182         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
183         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
184         memcpy(n->mac, netcfg.mac, ETH_ALEN);
185         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
186     }
187 
188     /*
189      * Is this VDPA? No peer means not VDPA: there's no way to
190      * disconnect/reconnect a VDPA peer.
191      */
192     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
193         vhost_net_set_config(get_vhost_net(nc->peer),
194                              (uint8_t *)&netcfg, 0, n->config_size,
195                              VHOST_SET_CONFIG_TYPE_MASTER);
196       }
197 }
198 
199 static bool virtio_net_started(VirtIONet *n, uint8_t status)
200 {
201     VirtIODevice *vdev = VIRTIO_DEVICE(n);
202     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
203         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
204 }
205 
206 static void virtio_net_announce_notify(VirtIONet *net)
207 {
208     VirtIODevice *vdev = VIRTIO_DEVICE(net);
209     trace_virtio_net_announce_notify();
210 
211     net->status |= VIRTIO_NET_S_ANNOUNCE;
212     virtio_notify_config(vdev);
213 }
214 
215 static void virtio_net_announce_timer(void *opaque)
216 {
217     VirtIONet *n = opaque;
218     trace_virtio_net_announce_timer(n->announce_timer.round);
219 
220     n->announce_timer.round--;
221     virtio_net_announce_notify(n);
222 }
223 
224 static void virtio_net_announce(NetClientState *nc)
225 {
226     VirtIONet *n = qemu_get_nic_opaque(nc);
227     VirtIODevice *vdev = VIRTIO_DEVICE(n);
228 
229     /*
230      * Make sure the virtio migration announcement timer isn't running
231      * If it is, let it trigger announcement so that we do not cause
232      * confusion.
233      */
234     if (n->announce_timer.round) {
235         return;
236     }
237 
238     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
239         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
240             virtio_net_announce_notify(n);
241     }
242 }
243 
244 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
245 {
246     VirtIODevice *vdev = VIRTIO_DEVICE(n);
247     NetClientState *nc = qemu_get_queue(n->nic);
248     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
249     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
250               n->max_ncs - n->max_queue_pairs : 0;
251 
252     if (!get_vhost_net(nc->peer)) {
253         return;
254     }
255 
256     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
257         !!n->vhost_started) {
258         return;
259     }
260     if (!n->vhost_started) {
261         int r, i;
262 
263         if (n->needs_vnet_hdr_swap) {
264             error_report("backend does not support %s vnet headers; "
265                          "falling back on userspace virtio",
266                          virtio_is_big_endian(vdev) ? "BE" : "LE");
267             return;
268         }
269 
270         /* Any packets outstanding? Purge them to avoid touching rings
271          * when vhost is running.
272          */
273         for (i = 0;  i < queue_pairs; i++) {
274             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
275 
276             /* Purge both directions: TX and RX. */
277             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
278             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
279         }
280 
281         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
282             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
283             if (r < 0) {
284                 error_report("%uBytes MTU not supported by the backend",
285                              n->net_conf.mtu);
286 
287                 return;
288             }
289         }
290 
291         n->vhost_started = 1;
292         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
293         if (r < 0) {
294             error_report("unable to start vhost net: %d: "
295                          "falling back on userspace virtio", -r);
296             n->vhost_started = 0;
297         }
298     } else {
299         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
300         n->vhost_started = 0;
301     }
302 }
303 
304 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
305                                           NetClientState *peer,
306                                           bool enable)
307 {
308     if (virtio_is_big_endian(vdev)) {
309         return qemu_set_vnet_be(peer, enable);
310     } else {
311         return qemu_set_vnet_le(peer, enable);
312     }
313 }
314 
315 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
316                                        int queue_pairs, bool enable)
317 {
318     int i;
319 
320     for (i = 0; i < queue_pairs; i++) {
321         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
322             enable) {
323             while (--i >= 0) {
324                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
325             }
326 
327             return true;
328         }
329     }
330 
331     return false;
332 }
333 
334 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
335 {
336     VirtIODevice *vdev = VIRTIO_DEVICE(n);
337     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
338 
339     if (virtio_net_started(n, status)) {
340         /* Before using the device, we tell the network backend about the
341          * endianness to use when parsing vnet headers. If the backend
342          * can't do it, we fallback onto fixing the headers in the core
343          * virtio-net code.
344          */
345         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
346                                                             queue_pairs, true);
347     } else if (virtio_net_started(n, vdev->status)) {
348         /* After using the device, we need to reset the network backend to
349          * the default (guest native endianness), otherwise the guest may
350          * lose network connectivity if it is rebooted into a different
351          * endianness.
352          */
353         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
354     }
355 }
356 
357 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
358 {
359     unsigned int dropped = virtqueue_drop_all(vq);
360     if (dropped) {
361         virtio_notify(vdev, vq);
362     }
363 }
364 
365 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
366 {
367     VirtIONet *n = VIRTIO_NET(vdev);
368     VirtIONetQueue *q;
369     int i;
370     uint8_t queue_status;
371 
372     virtio_net_vnet_endian_status(n, status);
373     virtio_net_vhost_status(n, status);
374 
375     for (i = 0; i < n->max_queue_pairs; i++) {
376         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
377         bool queue_started;
378         q = &n->vqs[i];
379 
380         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
381             queue_status = 0;
382         } else {
383             queue_status = status;
384         }
385         queue_started =
386             virtio_net_started(n, queue_status) && !n->vhost_started;
387 
388         if (queue_started) {
389             qemu_flush_queued_packets(ncs);
390         }
391 
392         if (!q->tx_waiting) {
393             continue;
394         }
395 
396         if (queue_started) {
397             if (q->tx_timer) {
398                 timer_mod(q->tx_timer,
399                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
400             } else {
401                 qemu_bh_schedule(q->tx_bh);
402             }
403         } else {
404             if (q->tx_timer) {
405                 timer_del(q->tx_timer);
406             } else {
407                 qemu_bh_cancel(q->tx_bh);
408             }
409             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
410                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
411                 vdev->vm_running) {
412                 /* if tx is waiting we are likely have some packets in tx queue
413                  * and disabled notification */
414                 q->tx_waiting = 0;
415                 virtio_queue_set_notification(q->tx_vq, 1);
416                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
417             }
418         }
419     }
420 }
421 
422 static void virtio_net_set_link_status(NetClientState *nc)
423 {
424     VirtIONet *n = qemu_get_nic_opaque(nc);
425     VirtIODevice *vdev = VIRTIO_DEVICE(n);
426     uint16_t old_status = n->status;
427 
428     if (nc->link_down)
429         n->status &= ~VIRTIO_NET_S_LINK_UP;
430     else
431         n->status |= VIRTIO_NET_S_LINK_UP;
432 
433     if (n->status != old_status)
434         virtio_notify_config(vdev);
435 
436     virtio_net_set_status(vdev, vdev->status);
437 }
438 
439 static void rxfilter_notify(NetClientState *nc)
440 {
441     VirtIONet *n = qemu_get_nic_opaque(nc);
442 
443     if (nc->rxfilter_notify_enabled) {
444         char *path = object_get_canonical_path(OBJECT(n->qdev));
445         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
446                                               n->netclient_name, path);
447         g_free(path);
448 
449         /* disable event notification to avoid events flooding */
450         nc->rxfilter_notify_enabled = 0;
451     }
452 }
453 
454 static intList *get_vlan_table(VirtIONet *n)
455 {
456     intList *list;
457     int i, j;
458 
459     list = NULL;
460     for (i = 0; i < MAX_VLAN >> 5; i++) {
461         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
462             if (n->vlans[i] & (1U << j)) {
463                 QAPI_LIST_PREPEND(list, (i << 5) + j);
464             }
465         }
466     }
467 
468     return list;
469 }
470 
471 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
472 {
473     VirtIONet *n = qemu_get_nic_opaque(nc);
474     VirtIODevice *vdev = VIRTIO_DEVICE(n);
475     RxFilterInfo *info;
476     strList *str_list;
477     int i;
478 
479     info = g_malloc0(sizeof(*info));
480     info->name = g_strdup(nc->name);
481     info->promiscuous = n->promisc;
482 
483     if (n->nouni) {
484         info->unicast = RX_STATE_NONE;
485     } else if (n->alluni) {
486         info->unicast = RX_STATE_ALL;
487     } else {
488         info->unicast = RX_STATE_NORMAL;
489     }
490 
491     if (n->nomulti) {
492         info->multicast = RX_STATE_NONE;
493     } else if (n->allmulti) {
494         info->multicast = RX_STATE_ALL;
495     } else {
496         info->multicast = RX_STATE_NORMAL;
497     }
498 
499     info->broadcast_allowed = n->nobcast;
500     info->multicast_overflow = n->mac_table.multi_overflow;
501     info->unicast_overflow = n->mac_table.uni_overflow;
502 
503     info->main_mac = qemu_mac_strdup_printf(n->mac);
504 
505     str_list = NULL;
506     for (i = 0; i < n->mac_table.first_multi; i++) {
507         QAPI_LIST_PREPEND(str_list,
508                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
509     }
510     info->unicast_table = str_list;
511 
512     str_list = NULL;
513     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
514         QAPI_LIST_PREPEND(str_list,
515                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
516     }
517     info->multicast_table = str_list;
518     info->vlan_table = get_vlan_table(n);
519 
520     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
521         info->vlan = RX_STATE_ALL;
522     } else if (!info->vlan_table) {
523         info->vlan = RX_STATE_NONE;
524     } else {
525         info->vlan = RX_STATE_NORMAL;
526     }
527 
528     /* enable event notification after query */
529     nc->rxfilter_notify_enabled = 1;
530 
531     return info;
532 }
533 
534 static void virtio_net_reset(VirtIODevice *vdev)
535 {
536     VirtIONet *n = VIRTIO_NET(vdev);
537     int i;
538 
539     /* Reset back to compatibility mode */
540     n->promisc = 1;
541     n->allmulti = 0;
542     n->alluni = 0;
543     n->nomulti = 0;
544     n->nouni = 0;
545     n->nobcast = 0;
546     /* multiqueue is disabled by default */
547     n->curr_queue_pairs = 1;
548     timer_del(n->announce_timer.tm);
549     n->announce_timer.round = 0;
550     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
551 
552     /* Flush any MAC and VLAN filter table state */
553     n->mac_table.in_use = 0;
554     n->mac_table.first_multi = 0;
555     n->mac_table.multi_overflow = 0;
556     n->mac_table.uni_overflow = 0;
557     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
558     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
559     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
560     memset(n->vlans, 0, MAX_VLAN >> 3);
561 
562     /* Flush any async TX */
563     for (i = 0;  i < n->max_queue_pairs; i++) {
564         NetClientState *nc = qemu_get_subqueue(n->nic, i);
565 
566         if (nc->peer) {
567             qemu_flush_or_purge_queued_packets(nc->peer, true);
568             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
569         }
570     }
571 }
572 
573 static void peer_test_vnet_hdr(VirtIONet *n)
574 {
575     NetClientState *nc = qemu_get_queue(n->nic);
576     if (!nc->peer) {
577         return;
578     }
579 
580     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
581 }
582 
583 static int peer_has_vnet_hdr(VirtIONet *n)
584 {
585     return n->has_vnet_hdr;
586 }
587 
588 static int peer_has_ufo(VirtIONet *n)
589 {
590     if (!peer_has_vnet_hdr(n))
591         return 0;
592 
593     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
594 
595     return n->has_ufo;
596 }
597 
598 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
599                                        int version_1, int hash_report)
600 {
601     int i;
602     NetClientState *nc;
603 
604     n->mergeable_rx_bufs = mergeable_rx_bufs;
605 
606     if (version_1) {
607         n->guest_hdr_len = hash_report ?
608             sizeof(struct virtio_net_hdr_v1_hash) :
609             sizeof(struct virtio_net_hdr_mrg_rxbuf);
610         n->rss_data.populate_hash = !!hash_report;
611     } else {
612         n->guest_hdr_len = n->mergeable_rx_bufs ?
613             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
614             sizeof(struct virtio_net_hdr);
615     }
616 
617     for (i = 0; i < n->max_queue_pairs; i++) {
618         nc = qemu_get_subqueue(n->nic, i);
619 
620         if (peer_has_vnet_hdr(n) &&
621             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
622             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
623             n->host_hdr_len = n->guest_hdr_len;
624         }
625     }
626 }
627 
628 static int virtio_net_max_tx_queue_size(VirtIONet *n)
629 {
630     NetClientState *peer = n->nic_conf.peers.ncs[0];
631 
632     /*
633      * Backends other than vhost-user or vhost-vdpa don't support max queue
634      * size.
635      */
636     if (!peer) {
637         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
638     }
639 
640     switch(peer->info->type) {
641     case NET_CLIENT_DRIVER_VHOST_USER:
642     case NET_CLIENT_DRIVER_VHOST_VDPA:
643         return VIRTQUEUE_MAX_SIZE;
644     default:
645         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
646     };
647 }
648 
649 static int peer_attach(VirtIONet *n, int index)
650 {
651     NetClientState *nc = qemu_get_subqueue(n->nic, index);
652 
653     if (!nc->peer) {
654         return 0;
655     }
656 
657     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
658         vhost_set_vring_enable(nc->peer, 1);
659     }
660 
661     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
662         return 0;
663     }
664 
665     if (n->max_queue_pairs == 1) {
666         return 0;
667     }
668 
669     return tap_enable(nc->peer);
670 }
671 
672 static int peer_detach(VirtIONet *n, int index)
673 {
674     NetClientState *nc = qemu_get_subqueue(n->nic, index);
675 
676     if (!nc->peer) {
677         return 0;
678     }
679 
680     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
681         vhost_set_vring_enable(nc->peer, 0);
682     }
683 
684     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
685         return 0;
686     }
687 
688     return tap_disable(nc->peer);
689 }
690 
691 static void virtio_net_set_queue_pairs(VirtIONet *n)
692 {
693     int i;
694     int r;
695 
696     if (n->nic->peer_deleted) {
697         return;
698     }
699 
700     for (i = 0; i < n->max_queue_pairs; i++) {
701         if (i < n->curr_queue_pairs) {
702             r = peer_attach(n, i);
703             assert(!r);
704         } else {
705             r = peer_detach(n, i);
706             assert(!r);
707         }
708     }
709 }
710 
711 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
712 
713 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
714                                         Error **errp)
715 {
716     VirtIONet *n = VIRTIO_NET(vdev);
717     NetClientState *nc = qemu_get_queue(n->nic);
718 
719     /* Firstly sync all virtio-net possible supported features */
720     features |= n->host_features;
721 
722     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
723 
724     if (!peer_has_vnet_hdr(n)) {
725         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
726         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
727         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
728         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
729 
730         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
731         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
732         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
733         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
734 
735         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
736     }
737 
738     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
739         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
740         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
741     }
742 
743     if (!get_vhost_net(nc->peer)) {
744         return features;
745     }
746 
747     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
748         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
749     }
750     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
751     vdev->backend_features = features;
752 
753     if (n->mtu_bypass_backend &&
754             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
755         features |= (1ULL << VIRTIO_NET_F_MTU);
756     }
757 
758     return features;
759 }
760 
761 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
762 {
763     uint64_t features = 0;
764 
765     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
766      * but also these: */
767     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
768     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
769     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
770     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
771     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
772 
773     return features;
774 }
775 
776 static void virtio_net_apply_guest_offloads(VirtIONet *n)
777 {
778     qemu_set_offload(qemu_get_queue(n->nic)->peer,
779             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
780             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
781             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
782             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
783             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
784 }
785 
786 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
787 {
788     static const uint64_t guest_offloads_mask =
789         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
790         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
791         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
792         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
793         (1ULL << VIRTIO_NET_F_GUEST_UFO);
794 
795     return guest_offloads_mask & features;
796 }
797 
798 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
799 {
800     VirtIODevice *vdev = VIRTIO_DEVICE(n);
801     return virtio_net_guest_offloads_by_features(vdev->guest_features);
802 }
803 
804 typedef struct {
805     VirtIONet *n;
806     DeviceState *dev;
807 } FailoverDevice;
808 
809 /**
810  * Set the failover primary device
811  *
812  * @opaque: FailoverId to setup
813  * @opts: opts for device we are handling
814  * @errp: returns an error if this function fails
815  */
816 static int failover_set_primary(DeviceState *dev, void *opaque)
817 {
818     FailoverDevice *fdev = opaque;
819     PCIDevice *pci_dev = (PCIDevice *)
820         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
821 
822     if (!pci_dev) {
823         return 0;
824     }
825 
826     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
827         fdev->dev = dev;
828         return 1;
829     }
830 
831     return 0;
832 }
833 
834 /**
835  * Find the primary device for this failover virtio-net
836  *
837  * @n: VirtIONet device
838  * @errp: returns an error if this function fails
839  */
840 static DeviceState *failover_find_primary_device(VirtIONet *n)
841 {
842     FailoverDevice fdev = {
843         .n = n,
844     };
845 
846     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
847                        NULL, NULL, &fdev);
848     return fdev.dev;
849 }
850 
851 static void failover_add_primary(VirtIONet *n, Error **errp)
852 {
853     Error *err = NULL;
854     DeviceState *dev = failover_find_primary_device(n);
855 
856     if (dev) {
857         return;
858     }
859 
860     if (!n->primary_opts) {
861         error_setg(errp, "Primary device not found");
862         error_append_hint(errp, "Virtio-net failover will not work. Make "
863                           "sure primary device has parameter"
864                           " failover_pair_id=%s\n", n->netclient_name);
865         return;
866     }
867 
868     dev = qdev_device_add_from_qdict(n->primary_opts,
869                                      n->primary_opts_from_json,
870                                      &err);
871     if (err) {
872         qobject_unref(n->primary_opts);
873         n->primary_opts = NULL;
874     } else {
875         object_unref(OBJECT(dev));
876     }
877     error_propagate(errp, err);
878 }
879 
880 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
881 {
882     VirtIONet *n = VIRTIO_NET(vdev);
883     Error *err = NULL;
884     int i;
885 
886     if (n->mtu_bypass_backend &&
887             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
888         features &= ~(1ULL << VIRTIO_NET_F_MTU);
889     }
890 
891     virtio_net_set_multiqueue(n,
892                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
893                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
894 
895     virtio_net_set_mrg_rx_bufs(n,
896                                virtio_has_feature(features,
897                                                   VIRTIO_NET_F_MRG_RXBUF),
898                                virtio_has_feature(features,
899                                                   VIRTIO_F_VERSION_1),
900                                virtio_has_feature(features,
901                                                   VIRTIO_NET_F_HASH_REPORT));
902 
903     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
904         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
905     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
906         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
907     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
908 
909     if (n->has_vnet_hdr) {
910         n->curr_guest_offloads =
911             virtio_net_guest_offloads_by_features(features);
912         virtio_net_apply_guest_offloads(n);
913     }
914 
915     for (i = 0;  i < n->max_queue_pairs; i++) {
916         NetClientState *nc = qemu_get_subqueue(n->nic, i);
917 
918         if (!get_vhost_net(nc->peer)) {
919             continue;
920         }
921         vhost_net_ack_features(get_vhost_net(nc->peer), features);
922     }
923 
924     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
925         memset(n->vlans, 0, MAX_VLAN >> 3);
926     } else {
927         memset(n->vlans, 0xff, MAX_VLAN >> 3);
928     }
929 
930     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
931         qapi_event_send_failover_negotiated(n->netclient_name);
932         qatomic_set(&n->failover_primary_hidden, false);
933         failover_add_primary(n, &err);
934         if (err) {
935             if (!qtest_enabled()) {
936                 warn_report_err(err);
937             } else {
938                 error_free(err);
939             }
940         }
941     }
942 }
943 
944 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
945                                      struct iovec *iov, unsigned int iov_cnt)
946 {
947     uint8_t on;
948     size_t s;
949     NetClientState *nc = qemu_get_queue(n->nic);
950 
951     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
952     if (s != sizeof(on)) {
953         return VIRTIO_NET_ERR;
954     }
955 
956     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
957         n->promisc = on;
958     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
959         n->allmulti = on;
960     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
961         n->alluni = on;
962     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
963         n->nomulti = on;
964     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
965         n->nouni = on;
966     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
967         n->nobcast = on;
968     } else {
969         return VIRTIO_NET_ERR;
970     }
971 
972     rxfilter_notify(nc);
973 
974     return VIRTIO_NET_OK;
975 }
976 
977 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
978                                      struct iovec *iov, unsigned int iov_cnt)
979 {
980     VirtIODevice *vdev = VIRTIO_DEVICE(n);
981     uint64_t offloads;
982     size_t s;
983 
984     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
985         return VIRTIO_NET_ERR;
986     }
987 
988     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
989     if (s != sizeof(offloads)) {
990         return VIRTIO_NET_ERR;
991     }
992 
993     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
994         uint64_t supported_offloads;
995 
996         offloads = virtio_ldq_p(vdev, &offloads);
997 
998         if (!n->has_vnet_hdr) {
999             return VIRTIO_NET_ERR;
1000         }
1001 
1002         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1003             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1004         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1005             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1006         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1007 
1008         supported_offloads = virtio_net_supported_guest_offloads(n);
1009         if (offloads & ~supported_offloads) {
1010             return VIRTIO_NET_ERR;
1011         }
1012 
1013         n->curr_guest_offloads = offloads;
1014         virtio_net_apply_guest_offloads(n);
1015 
1016         return VIRTIO_NET_OK;
1017     } else {
1018         return VIRTIO_NET_ERR;
1019     }
1020 }
1021 
1022 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1023                                  struct iovec *iov, unsigned int iov_cnt)
1024 {
1025     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1026     struct virtio_net_ctrl_mac mac_data;
1027     size_t s;
1028     NetClientState *nc = qemu_get_queue(n->nic);
1029 
1030     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1031         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1032             return VIRTIO_NET_ERR;
1033         }
1034         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1035         assert(s == sizeof(n->mac));
1036         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1037         rxfilter_notify(nc);
1038 
1039         return VIRTIO_NET_OK;
1040     }
1041 
1042     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1043         return VIRTIO_NET_ERR;
1044     }
1045 
1046     int in_use = 0;
1047     int first_multi = 0;
1048     uint8_t uni_overflow = 0;
1049     uint8_t multi_overflow = 0;
1050     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1051 
1052     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1053                    sizeof(mac_data.entries));
1054     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1055     if (s != sizeof(mac_data.entries)) {
1056         goto error;
1057     }
1058     iov_discard_front(&iov, &iov_cnt, s);
1059 
1060     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1061         goto error;
1062     }
1063 
1064     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1065         s = iov_to_buf(iov, iov_cnt, 0, macs,
1066                        mac_data.entries * ETH_ALEN);
1067         if (s != mac_data.entries * ETH_ALEN) {
1068             goto error;
1069         }
1070         in_use += mac_data.entries;
1071     } else {
1072         uni_overflow = 1;
1073     }
1074 
1075     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1076 
1077     first_multi = in_use;
1078 
1079     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1080                    sizeof(mac_data.entries));
1081     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1082     if (s != sizeof(mac_data.entries)) {
1083         goto error;
1084     }
1085 
1086     iov_discard_front(&iov, &iov_cnt, s);
1087 
1088     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1089         goto error;
1090     }
1091 
1092     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1093         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1094                        mac_data.entries * ETH_ALEN);
1095         if (s != mac_data.entries * ETH_ALEN) {
1096             goto error;
1097         }
1098         in_use += mac_data.entries;
1099     } else {
1100         multi_overflow = 1;
1101     }
1102 
1103     n->mac_table.in_use = in_use;
1104     n->mac_table.first_multi = first_multi;
1105     n->mac_table.uni_overflow = uni_overflow;
1106     n->mac_table.multi_overflow = multi_overflow;
1107     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1108     g_free(macs);
1109     rxfilter_notify(nc);
1110 
1111     return VIRTIO_NET_OK;
1112 
1113 error:
1114     g_free(macs);
1115     return VIRTIO_NET_ERR;
1116 }
1117 
1118 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1119                                         struct iovec *iov, unsigned int iov_cnt)
1120 {
1121     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1122     uint16_t vid;
1123     size_t s;
1124     NetClientState *nc = qemu_get_queue(n->nic);
1125 
1126     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1127     vid = virtio_lduw_p(vdev, &vid);
1128     if (s != sizeof(vid)) {
1129         return VIRTIO_NET_ERR;
1130     }
1131 
1132     if (vid >= MAX_VLAN)
1133         return VIRTIO_NET_ERR;
1134 
1135     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1136         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1137     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1138         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1139     else
1140         return VIRTIO_NET_ERR;
1141 
1142     rxfilter_notify(nc);
1143 
1144     return VIRTIO_NET_OK;
1145 }
1146 
1147 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1148                                       struct iovec *iov, unsigned int iov_cnt)
1149 {
1150     trace_virtio_net_handle_announce(n->announce_timer.round);
1151     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1152         n->status & VIRTIO_NET_S_ANNOUNCE) {
1153         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1154         if (n->announce_timer.round) {
1155             qemu_announce_timer_step(&n->announce_timer);
1156         }
1157         return VIRTIO_NET_OK;
1158     } else {
1159         return VIRTIO_NET_ERR;
1160     }
1161 }
1162 
1163 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1164 
1165 static void virtio_net_disable_rss(VirtIONet *n)
1166 {
1167     if (n->rss_data.enabled) {
1168         trace_virtio_net_rss_disable();
1169     }
1170     n->rss_data.enabled = false;
1171 
1172     virtio_net_detach_epbf_rss(n);
1173 }
1174 
1175 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1176 {
1177     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1178     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1179         return false;
1180     }
1181 
1182     return nc->info->set_steering_ebpf(nc, prog_fd);
1183 }
1184 
1185 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1186                                    struct EBPFRSSConfig *config)
1187 {
1188     config->redirect = data->redirect;
1189     config->populate_hash = data->populate_hash;
1190     config->hash_types = data->hash_types;
1191     config->indirections_len = data->indirections_len;
1192     config->default_queue = data->default_queue;
1193 }
1194 
1195 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1196 {
1197     struct EBPFRSSConfig config = {};
1198 
1199     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1200         return false;
1201     }
1202 
1203     rss_data_to_rss_config(&n->rss_data, &config);
1204 
1205     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1206                           n->rss_data.indirections_table, n->rss_data.key)) {
1207         return false;
1208     }
1209 
1210     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1211         return false;
1212     }
1213 
1214     return true;
1215 }
1216 
1217 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1218 {
1219     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1220 }
1221 
1222 static bool virtio_net_load_ebpf(VirtIONet *n)
1223 {
1224     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1225         /* backend does't support steering ebpf */
1226         return false;
1227     }
1228 
1229     return ebpf_rss_load(&n->ebpf_rss);
1230 }
1231 
1232 static void virtio_net_unload_ebpf(VirtIONet *n)
1233 {
1234     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1235     ebpf_rss_unload(&n->ebpf_rss);
1236 }
1237 
1238 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1239                                       struct iovec *iov,
1240                                       unsigned int iov_cnt,
1241                                       bool do_rss)
1242 {
1243     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1244     struct virtio_net_rss_config cfg;
1245     size_t s, offset = 0, size_get;
1246     uint16_t queue_pairs, i;
1247     struct {
1248         uint16_t us;
1249         uint8_t b;
1250     } QEMU_PACKED temp;
1251     const char *err_msg = "";
1252     uint32_t err_value = 0;
1253 
1254     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1255         err_msg = "RSS is not negotiated";
1256         goto error;
1257     }
1258     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1259         err_msg = "Hash report is not negotiated";
1260         goto error;
1261     }
1262     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1263     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1264     if (s != size_get) {
1265         err_msg = "Short command buffer";
1266         err_value = (uint32_t)s;
1267         goto error;
1268     }
1269     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1270     n->rss_data.indirections_len =
1271         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1272     n->rss_data.indirections_len++;
1273     if (!do_rss) {
1274         n->rss_data.indirections_len = 1;
1275     }
1276     if (!is_power_of_2(n->rss_data.indirections_len)) {
1277         err_msg = "Invalid size of indirection table";
1278         err_value = n->rss_data.indirections_len;
1279         goto error;
1280     }
1281     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1282         err_msg = "Too large indirection table";
1283         err_value = n->rss_data.indirections_len;
1284         goto error;
1285     }
1286     n->rss_data.default_queue = do_rss ?
1287         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1288     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1289         err_msg = "Invalid default queue";
1290         err_value = n->rss_data.default_queue;
1291         goto error;
1292     }
1293     offset += size_get;
1294     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1295     g_free(n->rss_data.indirections_table);
1296     n->rss_data.indirections_table = g_malloc(size_get);
1297     if (!n->rss_data.indirections_table) {
1298         err_msg = "Can't allocate indirections table";
1299         err_value = n->rss_data.indirections_len;
1300         goto error;
1301     }
1302     s = iov_to_buf(iov, iov_cnt, offset,
1303                    n->rss_data.indirections_table, size_get);
1304     if (s != size_get) {
1305         err_msg = "Short indirection table buffer";
1306         err_value = (uint32_t)s;
1307         goto error;
1308     }
1309     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1310         uint16_t val = n->rss_data.indirections_table[i];
1311         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1312     }
1313     offset += size_get;
1314     size_get = sizeof(temp);
1315     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1316     if (s != size_get) {
1317         err_msg = "Can't get queue_pairs";
1318         err_value = (uint32_t)s;
1319         goto error;
1320     }
1321     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1322     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1323         err_msg = "Invalid number of queue_pairs";
1324         err_value = queue_pairs;
1325         goto error;
1326     }
1327     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1328         err_msg = "Invalid key size";
1329         err_value = temp.b;
1330         goto error;
1331     }
1332     if (!temp.b && n->rss_data.hash_types) {
1333         err_msg = "No key provided";
1334         err_value = 0;
1335         goto error;
1336     }
1337     if (!temp.b && !n->rss_data.hash_types) {
1338         virtio_net_disable_rss(n);
1339         return queue_pairs;
1340     }
1341     offset += size_get;
1342     size_get = temp.b;
1343     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1344     if (s != size_get) {
1345         err_msg = "Can get key buffer";
1346         err_value = (uint32_t)s;
1347         goto error;
1348     }
1349     n->rss_data.enabled = true;
1350 
1351     if (!n->rss_data.populate_hash) {
1352         if (!virtio_net_attach_epbf_rss(n)) {
1353             /* EBPF must be loaded for vhost */
1354             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1355                 warn_report("Can't load eBPF RSS for vhost");
1356                 goto error;
1357             }
1358             /* fallback to software RSS */
1359             warn_report("Can't load eBPF RSS - fallback to software RSS");
1360             n->rss_data.enabled_software_rss = true;
1361         }
1362     } else {
1363         /* use software RSS for hash populating */
1364         /* and detach eBPF if was loaded before */
1365         virtio_net_detach_epbf_rss(n);
1366         n->rss_data.enabled_software_rss = true;
1367     }
1368 
1369     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1370                                 n->rss_data.indirections_len,
1371                                 temp.b);
1372     return queue_pairs;
1373 error:
1374     trace_virtio_net_rss_error(err_msg, err_value);
1375     virtio_net_disable_rss(n);
1376     return 0;
1377 }
1378 
1379 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1380                                 struct iovec *iov, unsigned int iov_cnt)
1381 {
1382     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1383     uint16_t queue_pairs;
1384     NetClientState *nc = qemu_get_queue(n->nic);
1385 
1386     virtio_net_disable_rss(n);
1387     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1388         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1389         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1390     }
1391     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1392         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1393     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1394         struct virtio_net_ctrl_mq mq;
1395         size_t s;
1396         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1397             return VIRTIO_NET_ERR;
1398         }
1399         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1400         if (s != sizeof(mq)) {
1401             return VIRTIO_NET_ERR;
1402         }
1403         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1404 
1405     } else {
1406         return VIRTIO_NET_ERR;
1407     }
1408 
1409     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1410         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1411         queue_pairs > n->max_queue_pairs ||
1412         !n->multiqueue) {
1413         return VIRTIO_NET_ERR;
1414     }
1415 
1416     /* Avoid changing the number of queue_pairs for vdpa device in
1417      * userspace handler. A future fix is needed to handle the mq
1418      * change in userspace handler with vhost-vdpa. Let's disable
1419      * the mq handling from userspace for now and only allow get
1420      * done through the kernel. Ripples may be seen when falling
1421      * back to userspace, but without doing it qemu process would
1422      * crash on a recursive entry to virtio_net_set_status().
1423      */
1424     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1425         return VIRTIO_NET_ERR;
1426     }
1427 
1428     n->curr_queue_pairs = queue_pairs;
1429     /* stop the backend before changing the number of queue_pairs to avoid handling a
1430      * disabled queue */
1431     virtio_net_set_status(vdev, vdev->status);
1432     virtio_net_set_queue_pairs(n);
1433 
1434     return VIRTIO_NET_OK;
1435 }
1436 
1437 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1438 {
1439     VirtIONet *n = VIRTIO_NET(vdev);
1440     struct virtio_net_ctrl_hdr ctrl;
1441     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1442     VirtQueueElement *elem;
1443     size_t s;
1444     struct iovec *iov, *iov2;
1445     unsigned int iov_cnt;
1446 
1447     for (;;) {
1448         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1449         if (!elem) {
1450             break;
1451         }
1452         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1453             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1454             virtio_error(vdev, "virtio-net ctrl missing headers");
1455             virtqueue_detach_element(vq, elem, 0);
1456             g_free(elem);
1457             break;
1458         }
1459 
1460         iov_cnt = elem->out_num;
1461         iov2 = iov = g_memdup2(elem->out_sg,
1462                                sizeof(struct iovec) * elem->out_num);
1463         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1464         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1465         if (s != sizeof(ctrl)) {
1466             status = VIRTIO_NET_ERR;
1467         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1468             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1469         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1470             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1471         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1472             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1473         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1474             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1475         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1476             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1477         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1478             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1479         }
1480 
1481         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1482         assert(s == sizeof(status));
1483 
1484         virtqueue_push(vq, elem, sizeof(status));
1485         virtio_notify(vdev, vq);
1486         g_free(iov2);
1487         g_free(elem);
1488     }
1489 }
1490 
1491 /* RX */
1492 
1493 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1494 {
1495     VirtIONet *n = VIRTIO_NET(vdev);
1496     int queue_index = vq2q(virtio_get_queue_index(vq));
1497 
1498     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1499 }
1500 
1501 static bool virtio_net_can_receive(NetClientState *nc)
1502 {
1503     VirtIONet *n = qemu_get_nic_opaque(nc);
1504     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1505     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1506 
1507     if (!vdev->vm_running) {
1508         return false;
1509     }
1510 
1511     if (nc->queue_index >= n->curr_queue_pairs) {
1512         return false;
1513     }
1514 
1515     if (!virtio_queue_ready(q->rx_vq) ||
1516         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1517         return false;
1518     }
1519 
1520     return true;
1521 }
1522 
1523 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1524 {
1525     VirtIONet *n = q->n;
1526     if (virtio_queue_empty(q->rx_vq) ||
1527         (n->mergeable_rx_bufs &&
1528          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1529         virtio_queue_set_notification(q->rx_vq, 1);
1530 
1531         /* To avoid a race condition where the guest has made some buffers
1532          * available after the above check but before notification was
1533          * enabled, check for available buffers again.
1534          */
1535         if (virtio_queue_empty(q->rx_vq) ||
1536             (n->mergeable_rx_bufs &&
1537              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1538             return 0;
1539         }
1540     }
1541 
1542     virtio_queue_set_notification(q->rx_vq, 0);
1543     return 1;
1544 }
1545 
1546 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1547 {
1548     virtio_tswap16s(vdev, &hdr->hdr_len);
1549     virtio_tswap16s(vdev, &hdr->gso_size);
1550     virtio_tswap16s(vdev, &hdr->csum_start);
1551     virtio_tswap16s(vdev, &hdr->csum_offset);
1552 }
1553 
1554 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1555  * it never finds out that the packets don't have valid checksums.  This
1556  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1557  * fix this with Xen but it hasn't appeared in an upstream release of
1558  * dhclient yet.
1559  *
1560  * To avoid breaking existing guests, we catch udp packets and add
1561  * checksums.  This is terrible but it's better than hacking the guest
1562  * kernels.
1563  *
1564  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1565  * we should provide a mechanism to disable it to avoid polluting the host
1566  * cache.
1567  */
1568 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1569                                         uint8_t *buf, size_t size)
1570 {
1571     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1572         (size > 27 && size < 1500) && /* normal sized MTU */
1573         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1574         (buf[23] == 17) && /* ip.protocol == UDP */
1575         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1576         net_checksum_calculate(buf, size, CSUM_UDP);
1577         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1578     }
1579 }
1580 
1581 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1582                            const void *buf, size_t size)
1583 {
1584     if (n->has_vnet_hdr) {
1585         /* FIXME this cast is evil */
1586         void *wbuf = (void *)buf;
1587         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1588                                     size - n->host_hdr_len);
1589 
1590         if (n->needs_vnet_hdr_swap) {
1591             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1592         }
1593         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1594     } else {
1595         struct virtio_net_hdr hdr = {
1596             .flags = 0,
1597             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1598         };
1599         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1600     }
1601 }
1602 
1603 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1604 {
1605     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1606     static const uint8_t vlan[] = {0x81, 0x00};
1607     uint8_t *ptr = (uint8_t *)buf;
1608     int i;
1609 
1610     if (n->promisc)
1611         return 1;
1612 
1613     ptr += n->host_hdr_len;
1614 
1615     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1616         int vid = lduw_be_p(ptr + 14) & 0xfff;
1617         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1618             return 0;
1619     }
1620 
1621     if (ptr[0] & 1) { // multicast
1622         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1623             return !n->nobcast;
1624         } else if (n->nomulti) {
1625             return 0;
1626         } else if (n->allmulti || n->mac_table.multi_overflow) {
1627             return 1;
1628         }
1629 
1630         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1631             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1632                 return 1;
1633             }
1634         }
1635     } else { // unicast
1636         if (n->nouni) {
1637             return 0;
1638         } else if (n->alluni || n->mac_table.uni_overflow) {
1639             return 1;
1640         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1641             return 1;
1642         }
1643 
1644         for (i = 0; i < n->mac_table.first_multi; i++) {
1645             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1646                 return 1;
1647             }
1648         }
1649     }
1650 
1651     return 0;
1652 }
1653 
1654 static uint8_t virtio_net_get_hash_type(bool isip4,
1655                                         bool isip6,
1656                                         bool isudp,
1657                                         bool istcp,
1658                                         uint32_t types)
1659 {
1660     if (isip4) {
1661         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1662             return NetPktRssIpV4Tcp;
1663         }
1664         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1665             return NetPktRssIpV4Udp;
1666         }
1667         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1668             return NetPktRssIpV4;
1669         }
1670     } else if (isip6) {
1671         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1672                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1673 
1674         if (istcp && (types & mask)) {
1675             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1676                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1677         }
1678         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1679         if (isudp && (types & mask)) {
1680             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1681                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1682         }
1683         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1684         if (types & mask) {
1685             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1686                 NetPktRssIpV6Ex : NetPktRssIpV6;
1687         }
1688     }
1689     return 0xff;
1690 }
1691 
1692 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1693                                    uint32_t hash)
1694 {
1695     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1696     hdr->hash_value = hash;
1697     hdr->hash_report = report;
1698 }
1699 
1700 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1701                                   size_t size)
1702 {
1703     VirtIONet *n = qemu_get_nic_opaque(nc);
1704     unsigned int index = nc->queue_index, new_index = index;
1705     struct NetRxPkt *pkt = n->rx_pkt;
1706     uint8_t net_hash_type;
1707     uint32_t hash;
1708     bool isip4, isip6, isudp, istcp;
1709     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1710         VIRTIO_NET_HASH_REPORT_IPv4,
1711         VIRTIO_NET_HASH_REPORT_TCPv4,
1712         VIRTIO_NET_HASH_REPORT_TCPv6,
1713         VIRTIO_NET_HASH_REPORT_IPv6,
1714         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1715         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1716         VIRTIO_NET_HASH_REPORT_UDPv4,
1717         VIRTIO_NET_HASH_REPORT_UDPv6,
1718         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1719     };
1720 
1721     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1722                              size - n->host_hdr_len);
1723     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1724     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1725         istcp = isudp = false;
1726     }
1727     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1728         istcp = isudp = false;
1729     }
1730     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1731                                              n->rss_data.hash_types);
1732     if (net_hash_type > NetPktRssIpV6UdpEx) {
1733         if (n->rss_data.populate_hash) {
1734             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1735         }
1736         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1737     }
1738 
1739     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1740 
1741     if (n->rss_data.populate_hash) {
1742         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1743     }
1744 
1745     if (n->rss_data.redirect) {
1746         new_index = hash & (n->rss_data.indirections_len - 1);
1747         new_index = n->rss_data.indirections_table[new_index];
1748     }
1749 
1750     return (index == new_index) ? -1 : new_index;
1751 }
1752 
1753 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1754                                       size_t size, bool no_rss)
1755 {
1756     VirtIONet *n = qemu_get_nic_opaque(nc);
1757     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1758     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1759     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1760     size_t lens[VIRTQUEUE_MAX_SIZE];
1761     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1762     struct virtio_net_hdr_mrg_rxbuf mhdr;
1763     unsigned mhdr_cnt = 0;
1764     size_t offset, i, guest_offset, j;
1765     ssize_t err;
1766 
1767     if (!virtio_net_can_receive(nc)) {
1768         return -1;
1769     }
1770 
1771     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1772         int index = virtio_net_process_rss(nc, buf, size);
1773         if (index >= 0) {
1774             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1775             return virtio_net_receive_rcu(nc2, buf, size, true);
1776         }
1777     }
1778 
1779     /* hdr_len refers to the header we supply to the guest */
1780     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1781         return 0;
1782     }
1783 
1784     if (!receive_filter(n, buf, size))
1785         return size;
1786 
1787     offset = i = 0;
1788 
1789     while (offset < size) {
1790         VirtQueueElement *elem;
1791         int len, total;
1792         const struct iovec *sg;
1793 
1794         total = 0;
1795 
1796         if (i == VIRTQUEUE_MAX_SIZE) {
1797             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1798             err = size;
1799             goto err;
1800         }
1801 
1802         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1803         if (!elem) {
1804             if (i) {
1805                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1806                              "i %zd mergeable %d offset %zd, size %zd, "
1807                              "guest hdr len %zd, host hdr len %zd "
1808                              "guest features 0x%" PRIx64,
1809                              i, n->mergeable_rx_bufs, offset, size,
1810                              n->guest_hdr_len, n->host_hdr_len,
1811                              vdev->guest_features);
1812             }
1813             err = -1;
1814             goto err;
1815         }
1816 
1817         if (elem->in_num < 1) {
1818             virtio_error(vdev,
1819                          "virtio-net receive queue contains no in buffers");
1820             virtqueue_detach_element(q->rx_vq, elem, 0);
1821             g_free(elem);
1822             err = -1;
1823             goto err;
1824         }
1825 
1826         sg = elem->in_sg;
1827         if (i == 0) {
1828             assert(offset == 0);
1829             if (n->mergeable_rx_bufs) {
1830                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1831                                     sg, elem->in_num,
1832                                     offsetof(typeof(mhdr), num_buffers),
1833                                     sizeof(mhdr.num_buffers));
1834             }
1835 
1836             receive_header(n, sg, elem->in_num, buf, size);
1837             if (n->rss_data.populate_hash) {
1838                 offset = sizeof(mhdr);
1839                 iov_from_buf(sg, elem->in_num, offset,
1840                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1841             }
1842             offset = n->host_hdr_len;
1843             total += n->guest_hdr_len;
1844             guest_offset = n->guest_hdr_len;
1845         } else {
1846             guest_offset = 0;
1847         }
1848 
1849         /* copy in packet.  ugh */
1850         len = iov_from_buf(sg, elem->in_num, guest_offset,
1851                            buf + offset, size - offset);
1852         total += len;
1853         offset += len;
1854         /* If buffers can't be merged, at this point we
1855          * must have consumed the complete packet.
1856          * Otherwise, drop it. */
1857         if (!n->mergeable_rx_bufs && offset < size) {
1858             virtqueue_unpop(q->rx_vq, elem, total);
1859             g_free(elem);
1860             err = size;
1861             goto err;
1862         }
1863 
1864         elems[i] = elem;
1865         lens[i] = total;
1866         i++;
1867     }
1868 
1869     if (mhdr_cnt) {
1870         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1871         iov_from_buf(mhdr_sg, mhdr_cnt,
1872                      0,
1873                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1874     }
1875 
1876     for (j = 0; j < i; j++) {
1877         /* signal other side */
1878         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1879         g_free(elems[j]);
1880     }
1881 
1882     virtqueue_flush(q->rx_vq, i);
1883     virtio_notify(vdev, q->rx_vq);
1884 
1885     return size;
1886 
1887 err:
1888     for (j = 0; j < i; j++) {
1889         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1890         g_free(elems[j]);
1891     }
1892 
1893     return err;
1894 }
1895 
1896 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1897                                   size_t size)
1898 {
1899     RCU_READ_LOCK_GUARD();
1900 
1901     return virtio_net_receive_rcu(nc, buf, size, false);
1902 }
1903 
1904 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1905                                          const uint8_t *buf,
1906                                          VirtioNetRscUnit *unit)
1907 {
1908     uint16_t ip_hdrlen;
1909     struct ip_header *ip;
1910 
1911     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1912                               + sizeof(struct eth_header));
1913     unit->ip = (void *)ip;
1914     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1915     unit->ip_plen = &ip->ip_len;
1916     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1917     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1918     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1919 }
1920 
1921 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1922                                          const uint8_t *buf,
1923                                          VirtioNetRscUnit *unit)
1924 {
1925     struct ip6_header *ip6;
1926 
1927     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1928                                  + sizeof(struct eth_header));
1929     unit->ip = ip6;
1930     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1931     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1932                                         + sizeof(struct ip6_header));
1933     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1934 
1935     /* There is a difference between payload lenght in ipv4 and v6,
1936        ip header is excluded in ipv6 */
1937     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1938 }
1939 
1940 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1941                                        VirtioNetRscSeg *seg)
1942 {
1943     int ret;
1944     struct virtio_net_hdr_v1 *h;
1945 
1946     h = (struct virtio_net_hdr_v1 *)seg->buf;
1947     h->flags = 0;
1948     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1949 
1950     if (seg->is_coalesced) {
1951         h->rsc.segments = seg->packets;
1952         h->rsc.dup_acks = seg->dup_ack;
1953         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1954         if (chain->proto == ETH_P_IP) {
1955             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1956         } else {
1957             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1958         }
1959     }
1960 
1961     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1962     QTAILQ_REMOVE(&chain->buffers, seg, next);
1963     g_free(seg->buf);
1964     g_free(seg);
1965 
1966     return ret;
1967 }
1968 
1969 static void virtio_net_rsc_purge(void *opq)
1970 {
1971     VirtioNetRscSeg *seg, *rn;
1972     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1973 
1974     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1975         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1976             chain->stat.purge_failed++;
1977             continue;
1978         }
1979     }
1980 
1981     chain->stat.timer++;
1982     if (!QTAILQ_EMPTY(&chain->buffers)) {
1983         timer_mod(chain->drain_timer,
1984               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1985     }
1986 }
1987 
1988 static void virtio_net_rsc_cleanup(VirtIONet *n)
1989 {
1990     VirtioNetRscChain *chain, *rn_chain;
1991     VirtioNetRscSeg *seg, *rn_seg;
1992 
1993     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1994         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1995             QTAILQ_REMOVE(&chain->buffers, seg, next);
1996             g_free(seg->buf);
1997             g_free(seg);
1998         }
1999 
2000         timer_free(chain->drain_timer);
2001         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2002         g_free(chain);
2003     }
2004 }
2005 
2006 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2007                                      NetClientState *nc,
2008                                      const uint8_t *buf, size_t size)
2009 {
2010     uint16_t hdr_len;
2011     VirtioNetRscSeg *seg;
2012 
2013     hdr_len = chain->n->guest_hdr_len;
2014     seg = g_new(VirtioNetRscSeg, 1);
2015     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2016         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2017     memcpy(seg->buf, buf, size);
2018     seg->size = size;
2019     seg->packets = 1;
2020     seg->dup_ack = 0;
2021     seg->is_coalesced = 0;
2022     seg->nc = nc;
2023 
2024     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2025     chain->stat.cache++;
2026 
2027     switch (chain->proto) {
2028     case ETH_P_IP:
2029         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2030         break;
2031     case ETH_P_IPV6:
2032         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2033         break;
2034     default:
2035         g_assert_not_reached();
2036     }
2037 }
2038 
2039 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2040                                          VirtioNetRscSeg *seg,
2041                                          const uint8_t *buf,
2042                                          struct tcp_header *n_tcp,
2043                                          struct tcp_header *o_tcp)
2044 {
2045     uint32_t nack, oack;
2046     uint16_t nwin, owin;
2047 
2048     nack = htonl(n_tcp->th_ack);
2049     nwin = htons(n_tcp->th_win);
2050     oack = htonl(o_tcp->th_ack);
2051     owin = htons(o_tcp->th_win);
2052 
2053     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2054         chain->stat.ack_out_of_win++;
2055         return RSC_FINAL;
2056     } else if (nack == oack) {
2057         /* duplicated ack or window probe */
2058         if (nwin == owin) {
2059             /* duplicated ack, add dup ack count due to whql test up to 1 */
2060             chain->stat.dup_ack++;
2061             return RSC_FINAL;
2062         } else {
2063             /* Coalesce window update */
2064             o_tcp->th_win = n_tcp->th_win;
2065             chain->stat.win_update++;
2066             return RSC_COALESCE;
2067         }
2068     } else {
2069         /* pure ack, go to 'C', finalize*/
2070         chain->stat.pure_ack++;
2071         return RSC_FINAL;
2072     }
2073 }
2074 
2075 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2076                                             VirtioNetRscSeg *seg,
2077                                             const uint8_t *buf,
2078                                             VirtioNetRscUnit *n_unit)
2079 {
2080     void *data;
2081     uint16_t o_ip_len;
2082     uint32_t nseq, oseq;
2083     VirtioNetRscUnit *o_unit;
2084 
2085     o_unit = &seg->unit;
2086     o_ip_len = htons(*o_unit->ip_plen);
2087     nseq = htonl(n_unit->tcp->th_seq);
2088     oseq = htonl(o_unit->tcp->th_seq);
2089 
2090     /* out of order or retransmitted. */
2091     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2092         chain->stat.data_out_of_win++;
2093         return RSC_FINAL;
2094     }
2095 
2096     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2097     if (nseq == oseq) {
2098         if ((o_unit->payload == 0) && n_unit->payload) {
2099             /* From no payload to payload, normal case, not a dup ack or etc */
2100             chain->stat.data_after_pure_ack++;
2101             goto coalesce;
2102         } else {
2103             return virtio_net_rsc_handle_ack(chain, seg, buf,
2104                                              n_unit->tcp, o_unit->tcp);
2105         }
2106     } else if ((nseq - oseq) != o_unit->payload) {
2107         /* Not a consistent packet, out of order */
2108         chain->stat.data_out_of_order++;
2109         return RSC_FINAL;
2110     } else {
2111 coalesce:
2112         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2113             chain->stat.over_size++;
2114             return RSC_FINAL;
2115         }
2116 
2117         /* Here comes the right data, the payload length in v4/v6 is different,
2118            so use the field value to update and record the new data len */
2119         o_unit->payload += n_unit->payload; /* update new data len */
2120 
2121         /* update field in ip header */
2122         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2123 
2124         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2125            for windows guest, while this may change the behavior for linux
2126            guest (only if it uses RSC feature). */
2127         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2128 
2129         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2130         o_unit->tcp->th_win = n_unit->tcp->th_win;
2131 
2132         memmove(seg->buf + seg->size, data, n_unit->payload);
2133         seg->size += n_unit->payload;
2134         seg->packets++;
2135         chain->stat.coalesced++;
2136         return RSC_COALESCE;
2137     }
2138 }
2139 
2140 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2141                                         VirtioNetRscSeg *seg,
2142                                         const uint8_t *buf, size_t size,
2143                                         VirtioNetRscUnit *unit)
2144 {
2145     struct ip_header *ip1, *ip2;
2146 
2147     ip1 = (struct ip_header *)(unit->ip);
2148     ip2 = (struct ip_header *)(seg->unit.ip);
2149     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2150         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2151         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2152         chain->stat.no_match++;
2153         return RSC_NO_MATCH;
2154     }
2155 
2156     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2157 }
2158 
2159 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2160                                         VirtioNetRscSeg *seg,
2161                                         const uint8_t *buf, size_t size,
2162                                         VirtioNetRscUnit *unit)
2163 {
2164     struct ip6_header *ip1, *ip2;
2165 
2166     ip1 = (struct ip6_header *)(unit->ip);
2167     ip2 = (struct ip6_header *)(seg->unit.ip);
2168     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2169         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2170         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2171         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2172             chain->stat.no_match++;
2173             return RSC_NO_MATCH;
2174     }
2175 
2176     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2177 }
2178 
2179 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2180  * to prevent out of order */
2181 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2182                                          struct tcp_header *tcp)
2183 {
2184     uint16_t tcp_hdr;
2185     uint16_t tcp_flag;
2186 
2187     tcp_flag = htons(tcp->th_offset_flags);
2188     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2189     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2190     if (tcp_flag & TH_SYN) {
2191         chain->stat.tcp_syn++;
2192         return RSC_BYPASS;
2193     }
2194 
2195     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2196         chain->stat.tcp_ctrl_drain++;
2197         return RSC_FINAL;
2198     }
2199 
2200     if (tcp_hdr > sizeof(struct tcp_header)) {
2201         chain->stat.tcp_all_opt++;
2202         return RSC_FINAL;
2203     }
2204 
2205     return RSC_CANDIDATE;
2206 }
2207 
2208 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2209                                          NetClientState *nc,
2210                                          const uint8_t *buf, size_t size,
2211                                          VirtioNetRscUnit *unit)
2212 {
2213     int ret;
2214     VirtioNetRscSeg *seg, *nseg;
2215 
2216     if (QTAILQ_EMPTY(&chain->buffers)) {
2217         chain->stat.empty_cache++;
2218         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2219         timer_mod(chain->drain_timer,
2220               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2221         return size;
2222     }
2223 
2224     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2225         if (chain->proto == ETH_P_IP) {
2226             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2227         } else {
2228             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2229         }
2230 
2231         if (ret == RSC_FINAL) {
2232             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2233                 /* Send failed */
2234                 chain->stat.final_failed++;
2235                 return 0;
2236             }
2237 
2238             /* Send current packet */
2239             return virtio_net_do_receive(nc, buf, size);
2240         } else if (ret == RSC_NO_MATCH) {
2241             continue;
2242         } else {
2243             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2244             seg->is_coalesced = 1;
2245             return size;
2246         }
2247     }
2248 
2249     chain->stat.no_match_cache++;
2250     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2251     return size;
2252 }
2253 
2254 /* Drain a connection data, this is to avoid out of order segments */
2255 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2256                                         NetClientState *nc,
2257                                         const uint8_t *buf, size_t size,
2258                                         uint16_t ip_start, uint16_t ip_size,
2259                                         uint16_t tcp_port)
2260 {
2261     VirtioNetRscSeg *seg, *nseg;
2262     uint32_t ppair1, ppair2;
2263 
2264     ppair1 = *(uint32_t *)(buf + tcp_port);
2265     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2266         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2267         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2268             || (ppair1 != ppair2)) {
2269             continue;
2270         }
2271         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2272             chain->stat.drain_failed++;
2273         }
2274 
2275         break;
2276     }
2277 
2278     return virtio_net_do_receive(nc, buf, size);
2279 }
2280 
2281 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2282                                             struct ip_header *ip,
2283                                             const uint8_t *buf, size_t size)
2284 {
2285     uint16_t ip_len;
2286 
2287     /* Not an ipv4 packet */
2288     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2289         chain->stat.ip_option++;
2290         return RSC_BYPASS;
2291     }
2292 
2293     /* Don't handle packets with ip option */
2294     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2295         chain->stat.ip_option++;
2296         return RSC_BYPASS;
2297     }
2298 
2299     if (ip->ip_p != IPPROTO_TCP) {
2300         chain->stat.bypass_not_tcp++;
2301         return RSC_BYPASS;
2302     }
2303 
2304     /* Don't handle packets with ip fragment */
2305     if (!(htons(ip->ip_off) & IP_DF)) {
2306         chain->stat.ip_frag++;
2307         return RSC_BYPASS;
2308     }
2309 
2310     /* Don't handle packets with ecn flag */
2311     if (IPTOS_ECN(ip->ip_tos)) {
2312         chain->stat.ip_ecn++;
2313         return RSC_BYPASS;
2314     }
2315 
2316     ip_len = htons(ip->ip_len);
2317     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2318         || ip_len > (size - chain->n->guest_hdr_len -
2319                      sizeof(struct eth_header))) {
2320         chain->stat.ip_hacked++;
2321         return RSC_BYPASS;
2322     }
2323 
2324     return RSC_CANDIDATE;
2325 }
2326 
2327 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2328                                       NetClientState *nc,
2329                                       const uint8_t *buf, size_t size)
2330 {
2331     int32_t ret;
2332     uint16_t hdr_len;
2333     VirtioNetRscUnit unit;
2334 
2335     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2336 
2337     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2338         + sizeof(struct tcp_header))) {
2339         chain->stat.bypass_not_tcp++;
2340         return virtio_net_do_receive(nc, buf, size);
2341     }
2342 
2343     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2344     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2345         != RSC_CANDIDATE) {
2346         return virtio_net_do_receive(nc, buf, size);
2347     }
2348 
2349     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2350     if (ret == RSC_BYPASS) {
2351         return virtio_net_do_receive(nc, buf, size);
2352     } else if (ret == RSC_FINAL) {
2353         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2354                 ((hdr_len + sizeof(struct eth_header)) + 12),
2355                 VIRTIO_NET_IP4_ADDR_SIZE,
2356                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2357     }
2358 
2359     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2360 }
2361 
2362 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2363                                             struct ip6_header *ip6,
2364                                             const uint8_t *buf, size_t size)
2365 {
2366     uint16_t ip_len;
2367 
2368     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2369         != IP_HEADER_VERSION_6) {
2370         return RSC_BYPASS;
2371     }
2372 
2373     /* Both option and protocol is checked in this */
2374     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2375         chain->stat.bypass_not_tcp++;
2376         return RSC_BYPASS;
2377     }
2378 
2379     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2380     if (ip_len < sizeof(struct tcp_header) ||
2381         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2382                   - sizeof(struct ip6_header))) {
2383         chain->stat.ip_hacked++;
2384         return RSC_BYPASS;
2385     }
2386 
2387     /* Don't handle packets with ecn flag */
2388     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2389         chain->stat.ip_ecn++;
2390         return RSC_BYPASS;
2391     }
2392 
2393     return RSC_CANDIDATE;
2394 }
2395 
2396 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2397                                       const uint8_t *buf, size_t size)
2398 {
2399     int32_t ret;
2400     uint16_t hdr_len;
2401     VirtioNetRscChain *chain;
2402     VirtioNetRscUnit unit;
2403 
2404     chain = (VirtioNetRscChain *)opq;
2405     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2406 
2407     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2408         + sizeof(tcp_header))) {
2409         return virtio_net_do_receive(nc, buf, size);
2410     }
2411 
2412     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2413     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2414                                                  unit.ip, buf, size)) {
2415         return virtio_net_do_receive(nc, buf, size);
2416     }
2417 
2418     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2419     if (ret == RSC_BYPASS) {
2420         return virtio_net_do_receive(nc, buf, size);
2421     } else if (ret == RSC_FINAL) {
2422         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2423                 ((hdr_len + sizeof(struct eth_header)) + 8),
2424                 VIRTIO_NET_IP6_ADDR_SIZE,
2425                 hdr_len + sizeof(struct eth_header)
2426                 + sizeof(struct ip6_header));
2427     }
2428 
2429     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2430 }
2431 
2432 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2433                                                       NetClientState *nc,
2434                                                       uint16_t proto)
2435 {
2436     VirtioNetRscChain *chain;
2437 
2438     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2439         return NULL;
2440     }
2441 
2442     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2443         if (chain->proto == proto) {
2444             return chain;
2445         }
2446     }
2447 
2448     chain = g_malloc(sizeof(*chain));
2449     chain->n = n;
2450     chain->proto = proto;
2451     if (proto == (uint16_t)ETH_P_IP) {
2452         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2453         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2454     } else {
2455         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2456         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2457     }
2458     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2459                                       virtio_net_rsc_purge, chain);
2460     memset(&chain->stat, 0, sizeof(chain->stat));
2461 
2462     QTAILQ_INIT(&chain->buffers);
2463     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2464 
2465     return chain;
2466 }
2467 
2468 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2469                                       const uint8_t *buf,
2470                                       size_t size)
2471 {
2472     uint16_t proto;
2473     VirtioNetRscChain *chain;
2474     struct eth_header *eth;
2475     VirtIONet *n;
2476 
2477     n = qemu_get_nic_opaque(nc);
2478     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2479         return virtio_net_do_receive(nc, buf, size);
2480     }
2481 
2482     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2483     proto = htons(eth->h_proto);
2484 
2485     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2486     if (chain) {
2487         chain->stat.received++;
2488         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2489             return virtio_net_rsc_receive4(chain, nc, buf, size);
2490         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2491             return virtio_net_rsc_receive6(chain, nc, buf, size);
2492         }
2493     }
2494     return virtio_net_do_receive(nc, buf, size);
2495 }
2496 
2497 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2498                                   size_t size)
2499 {
2500     VirtIONet *n = qemu_get_nic_opaque(nc);
2501     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2502         return virtio_net_rsc_receive(nc, buf, size);
2503     } else {
2504         return virtio_net_do_receive(nc, buf, size);
2505     }
2506 }
2507 
2508 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2509 
2510 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2511 {
2512     VirtIONet *n = qemu_get_nic_opaque(nc);
2513     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2514     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2515 
2516     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2517     virtio_notify(vdev, q->tx_vq);
2518 
2519     g_free(q->async_tx.elem);
2520     q->async_tx.elem = NULL;
2521 
2522     virtio_queue_set_notification(q->tx_vq, 1);
2523     virtio_net_flush_tx(q);
2524 }
2525 
2526 /* TX */
2527 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2528 {
2529     VirtIONet *n = q->n;
2530     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2531     VirtQueueElement *elem;
2532     int32_t num_packets = 0;
2533     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2534     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2535         return num_packets;
2536     }
2537 
2538     if (q->async_tx.elem) {
2539         virtio_queue_set_notification(q->tx_vq, 0);
2540         return num_packets;
2541     }
2542 
2543     for (;;) {
2544         ssize_t ret;
2545         unsigned int out_num;
2546         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2547         struct virtio_net_hdr_mrg_rxbuf mhdr;
2548 
2549         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2550         if (!elem) {
2551             break;
2552         }
2553 
2554         out_num = elem->out_num;
2555         out_sg = elem->out_sg;
2556         if (out_num < 1) {
2557             virtio_error(vdev, "virtio-net header not in first element");
2558             virtqueue_detach_element(q->tx_vq, elem, 0);
2559             g_free(elem);
2560             return -EINVAL;
2561         }
2562 
2563         if (n->has_vnet_hdr) {
2564             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2565                 n->guest_hdr_len) {
2566                 virtio_error(vdev, "virtio-net header incorrect");
2567                 virtqueue_detach_element(q->tx_vq, elem, 0);
2568                 g_free(elem);
2569                 return -EINVAL;
2570             }
2571             if (n->needs_vnet_hdr_swap) {
2572                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2573                 sg2[0].iov_base = &mhdr;
2574                 sg2[0].iov_len = n->guest_hdr_len;
2575                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2576                                    out_sg, out_num,
2577                                    n->guest_hdr_len, -1);
2578                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2579                     goto drop;
2580                 }
2581                 out_num += 1;
2582                 out_sg = sg2;
2583             }
2584         }
2585         /*
2586          * If host wants to see the guest header as is, we can
2587          * pass it on unchanged. Otherwise, copy just the parts
2588          * that host is interested in.
2589          */
2590         assert(n->host_hdr_len <= n->guest_hdr_len);
2591         if (n->host_hdr_len != n->guest_hdr_len) {
2592             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2593                                        out_sg, out_num,
2594                                        0, n->host_hdr_len);
2595             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2596                              out_sg, out_num,
2597                              n->guest_hdr_len, -1);
2598             out_num = sg_num;
2599             out_sg = sg;
2600         }
2601 
2602         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2603                                       out_sg, out_num, virtio_net_tx_complete);
2604         if (ret == 0) {
2605             virtio_queue_set_notification(q->tx_vq, 0);
2606             q->async_tx.elem = elem;
2607             return -EBUSY;
2608         }
2609 
2610 drop:
2611         virtqueue_push(q->tx_vq, elem, 0);
2612         virtio_notify(vdev, q->tx_vq);
2613         g_free(elem);
2614 
2615         if (++num_packets >= n->tx_burst) {
2616             break;
2617         }
2618     }
2619     return num_packets;
2620 }
2621 
2622 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2623 {
2624     VirtIONet *n = VIRTIO_NET(vdev);
2625     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2626 
2627     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2628         virtio_net_drop_tx_queue_data(vdev, vq);
2629         return;
2630     }
2631 
2632     /* This happens when device was stopped but VCPU wasn't. */
2633     if (!vdev->vm_running) {
2634         q->tx_waiting = 1;
2635         return;
2636     }
2637 
2638     if (q->tx_waiting) {
2639         virtio_queue_set_notification(vq, 1);
2640         timer_del(q->tx_timer);
2641         q->tx_waiting = 0;
2642         if (virtio_net_flush_tx(q) == -EINVAL) {
2643             return;
2644         }
2645     } else {
2646         timer_mod(q->tx_timer,
2647                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2648         q->tx_waiting = 1;
2649         virtio_queue_set_notification(vq, 0);
2650     }
2651 }
2652 
2653 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2654 {
2655     VirtIONet *n = VIRTIO_NET(vdev);
2656     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2657 
2658     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2659         virtio_net_drop_tx_queue_data(vdev, vq);
2660         return;
2661     }
2662 
2663     if (unlikely(q->tx_waiting)) {
2664         return;
2665     }
2666     q->tx_waiting = 1;
2667     /* This happens when device was stopped but VCPU wasn't. */
2668     if (!vdev->vm_running) {
2669         return;
2670     }
2671     virtio_queue_set_notification(vq, 0);
2672     qemu_bh_schedule(q->tx_bh);
2673 }
2674 
2675 static void virtio_net_tx_timer(void *opaque)
2676 {
2677     VirtIONetQueue *q = opaque;
2678     VirtIONet *n = q->n;
2679     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2680     /* This happens when device was stopped but BH wasn't. */
2681     if (!vdev->vm_running) {
2682         /* Make sure tx waiting is set, so we'll run when restarted. */
2683         assert(q->tx_waiting);
2684         return;
2685     }
2686 
2687     q->tx_waiting = 0;
2688 
2689     /* Just in case the driver is not ready on more */
2690     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2691         return;
2692     }
2693 
2694     virtio_queue_set_notification(q->tx_vq, 1);
2695     virtio_net_flush_tx(q);
2696 }
2697 
2698 static void virtio_net_tx_bh(void *opaque)
2699 {
2700     VirtIONetQueue *q = opaque;
2701     VirtIONet *n = q->n;
2702     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2703     int32_t ret;
2704 
2705     /* This happens when device was stopped but BH wasn't. */
2706     if (!vdev->vm_running) {
2707         /* Make sure tx waiting is set, so we'll run when restarted. */
2708         assert(q->tx_waiting);
2709         return;
2710     }
2711 
2712     q->tx_waiting = 0;
2713 
2714     /* Just in case the driver is not ready on more */
2715     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2716         return;
2717     }
2718 
2719     ret = virtio_net_flush_tx(q);
2720     if (ret == -EBUSY || ret == -EINVAL) {
2721         return; /* Notification re-enable handled by tx_complete or device
2722                  * broken */
2723     }
2724 
2725     /* If we flush a full burst of packets, assume there are
2726      * more coming and immediately reschedule */
2727     if (ret >= n->tx_burst) {
2728         qemu_bh_schedule(q->tx_bh);
2729         q->tx_waiting = 1;
2730         return;
2731     }
2732 
2733     /* If less than a full burst, re-enable notification and flush
2734      * anything that may have come in while we weren't looking.  If
2735      * we find something, assume the guest is still active and reschedule */
2736     virtio_queue_set_notification(q->tx_vq, 1);
2737     ret = virtio_net_flush_tx(q);
2738     if (ret == -EINVAL) {
2739         return;
2740     } else if (ret > 0) {
2741         virtio_queue_set_notification(q->tx_vq, 0);
2742         qemu_bh_schedule(q->tx_bh);
2743         q->tx_waiting = 1;
2744     }
2745 }
2746 
2747 static void virtio_net_add_queue(VirtIONet *n, int index)
2748 {
2749     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2750 
2751     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2752                                            virtio_net_handle_rx);
2753 
2754     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2755         n->vqs[index].tx_vq =
2756             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2757                              virtio_net_handle_tx_timer);
2758         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2759                                               virtio_net_tx_timer,
2760                                               &n->vqs[index]);
2761     } else {
2762         n->vqs[index].tx_vq =
2763             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2764                              virtio_net_handle_tx_bh);
2765         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2766     }
2767 
2768     n->vqs[index].tx_waiting = 0;
2769     n->vqs[index].n = n;
2770 }
2771 
2772 static void virtio_net_del_queue(VirtIONet *n, int index)
2773 {
2774     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2775     VirtIONetQueue *q = &n->vqs[index];
2776     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2777 
2778     qemu_purge_queued_packets(nc);
2779 
2780     virtio_del_queue(vdev, index * 2);
2781     if (q->tx_timer) {
2782         timer_free(q->tx_timer);
2783         q->tx_timer = NULL;
2784     } else {
2785         qemu_bh_delete(q->tx_bh);
2786         q->tx_bh = NULL;
2787     }
2788     q->tx_waiting = 0;
2789     virtio_del_queue(vdev, index * 2 + 1);
2790 }
2791 
2792 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2793 {
2794     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2795     int old_num_queues = virtio_get_num_queues(vdev);
2796     int new_num_queues = new_max_queue_pairs * 2 + 1;
2797     int i;
2798 
2799     assert(old_num_queues >= 3);
2800     assert(old_num_queues % 2 == 1);
2801 
2802     if (old_num_queues == new_num_queues) {
2803         return;
2804     }
2805 
2806     /*
2807      * We always need to remove and add ctrl vq if
2808      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2809      * and then we only enter one of the following two loops.
2810      */
2811     virtio_del_queue(vdev, old_num_queues - 1);
2812 
2813     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2814         /* new_num_queues < old_num_queues */
2815         virtio_net_del_queue(n, i / 2);
2816     }
2817 
2818     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2819         /* new_num_queues > old_num_queues */
2820         virtio_net_add_queue(n, i / 2);
2821     }
2822 
2823     /* add ctrl_vq last */
2824     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2825 }
2826 
2827 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2828 {
2829     int max = multiqueue ? n->max_queue_pairs : 1;
2830 
2831     n->multiqueue = multiqueue;
2832     virtio_net_change_num_queue_pairs(n, max);
2833 
2834     virtio_net_set_queue_pairs(n);
2835 }
2836 
2837 static int virtio_net_post_load_device(void *opaque, int version_id)
2838 {
2839     VirtIONet *n = opaque;
2840     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2841     int i, link_down;
2842 
2843     trace_virtio_net_post_load_device();
2844     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2845                                virtio_vdev_has_feature(vdev,
2846                                                        VIRTIO_F_VERSION_1),
2847                                virtio_vdev_has_feature(vdev,
2848                                                        VIRTIO_NET_F_HASH_REPORT));
2849 
2850     /* MAC_TABLE_ENTRIES may be different from the saved image */
2851     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2852         n->mac_table.in_use = 0;
2853     }
2854 
2855     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2856         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2857     }
2858 
2859     /*
2860      * curr_guest_offloads will be later overwritten by the
2861      * virtio_set_features_nocheck call done from the virtio_load.
2862      * Here we make sure it is preserved and restored accordingly
2863      * in the virtio_net_post_load_virtio callback.
2864      */
2865     n->saved_guest_offloads = n->curr_guest_offloads;
2866 
2867     virtio_net_set_queue_pairs(n);
2868 
2869     /* Find the first multicast entry in the saved MAC filter */
2870     for (i = 0; i < n->mac_table.in_use; i++) {
2871         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2872             break;
2873         }
2874     }
2875     n->mac_table.first_multi = i;
2876 
2877     /* nc.link_down can't be migrated, so infer link_down according
2878      * to link status bit in n->status */
2879     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2880     for (i = 0; i < n->max_queue_pairs; i++) {
2881         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2882     }
2883 
2884     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2885         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2886         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2887                                   QEMU_CLOCK_VIRTUAL,
2888                                   virtio_net_announce_timer, n);
2889         if (n->announce_timer.round) {
2890             timer_mod(n->announce_timer.tm,
2891                       qemu_clock_get_ms(n->announce_timer.type));
2892         } else {
2893             qemu_announce_timer_del(&n->announce_timer, false);
2894         }
2895     }
2896 
2897     if (n->rss_data.enabled) {
2898         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2899         if (!n->rss_data.populate_hash) {
2900             if (!virtio_net_attach_epbf_rss(n)) {
2901                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2902                     warn_report("Can't post-load eBPF RSS for vhost");
2903                 } else {
2904                     warn_report("Can't post-load eBPF RSS - "
2905                                 "fallback to software RSS");
2906                     n->rss_data.enabled_software_rss = true;
2907                 }
2908             }
2909         }
2910 
2911         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2912                                     n->rss_data.indirections_len,
2913                                     sizeof(n->rss_data.key));
2914     } else {
2915         trace_virtio_net_rss_disable();
2916     }
2917     return 0;
2918 }
2919 
2920 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2921 {
2922     VirtIONet *n = VIRTIO_NET(vdev);
2923     /*
2924      * The actual needed state is now in saved_guest_offloads,
2925      * see virtio_net_post_load_device for detail.
2926      * Restore it back and apply the desired offloads.
2927      */
2928     n->curr_guest_offloads = n->saved_guest_offloads;
2929     if (peer_has_vnet_hdr(n)) {
2930         virtio_net_apply_guest_offloads(n);
2931     }
2932 
2933     return 0;
2934 }
2935 
2936 /* tx_waiting field of a VirtIONetQueue */
2937 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2938     .name = "virtio-net-queue-tx_waiting",
2939     .fields = (VMStateField[]) {
2940         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2941         VMSTATE_END_OF_LIST()
2942    },
2943 };
2944 
2945 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
2946 {
2947     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
2948 }
2949 
2950 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2951 {
2952     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2953                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2954 }
2955 
2956 static bool mac_table_fits(void *opaque, int version_id)
2957 {
2958     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2959 }
2960 
2961 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2962 {
2963     return !mac_table_fits(opaque, version_id);
2964 }
2965 
2966 /* This temporary type is shared by all the WITH_TMP methods
2967  * although only some fields are used by each.
2968  */
2969 struct VirtIONetMigTmp {
2970     VirtIONet      *parent;
2971     VirtIONetQueue *vqs_1;
2972     uint16_t        curr_queue_pairs_1;
2973     uint8_t         has_ufo;
2974     uint32_t        has_vnet_hdr;
2975 };
2976 
2977 /* The 2nd and subsequent tx_waiting flags are loaded later than
2978  * the 1st entry in the queue_pairs and only if there's more than one
2979  * entry.  We use the tmp mechanism to calculate a temporary
2980  * pointer and count and also validate the count.
2981  */
2982 
2983 static int virtio_net_tx_waiting_pre_save(void *opaque)
2984 {
2985     struct VirtIONetMigTmp *tmp = opaque;
2986 
2987     tmp->vqs_1 = tmp->parent->vqs + 1;
2988     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
2989     if (tmp->parent->curr_queue_pairs == 0) {
2990         tmp->curr_queue_pairs_1 = 0;
2991     }
2992 
2993     return 0;
2994 }
2995 
2996 static int virtio_net_tx_waiting_pre_load(void *opaque)
2997 {
2998     struct VirtIONetMigTmp *tmp = opaque;
2999 
3000     /* Reuse the pointer setup from save */
3001     virtio_net_tx_waiting_pre_save(opaque);
3002 
3003     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3004         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3005             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3006 
3007         return -EINVAL;
3008     }
3009 
3010     return 0; /* all good */
3011 }
3012 
3013 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3014     .name      = "virtio-net-tx_waiting",
3015     .pre_load  = virtio_net_tx_waiting_pre_load,
3016     .pre_save  = virtio_net_tx_waiting_pre_save,
3017     .fields    = (VMStateField[]) {
3018         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3019                                      curr_queue_pairs_1,
3020                                      vmstate_virtio_net_queue_tx_waiting,
3021                                      struct VirtIONetQueue),
3022         VMSTATE_END_OF_LIST()
3023     },
3024 };
3025 
3026 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3027  * flag set we need to check that we have it
3028  */
3029 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3030 {
3031     struct VirtIONetMigTmp *tmp = opaque;
3032 
3033     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3034         error_report("virtio-net: saved image requires TUN_F_UFO support");
3035         return -EINVAL;
3036     }
3037 
3038     return 0;
3039 }
3040 
3041 static int virtio_net_ufo_pre_save(void *opaque)
3042 {
3043     struct VirtIONetMigTmp *tmp = opaque;
3044 
3045     tmp->has_ufo = tmp->parent->has_ufo;
3046 
3047     return 0;
3048 }
3049 
3050 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3051     .name      = "virtio-net-ufo",
3052     .post_load = virtio_net_ufo_post_load,
3053     .pre_save  = virtio_net_ufo_pre_save,
3054     .fields    = (VMStateField[]) {
3055         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3056         VMSTATE_END_OF_LIST()
3057     },
3058 };
3059 
3060 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3061  * flag set we need to check that we have it
3062  */
3063 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3064 {
3065     struct VirtIONetMigTmp *tmp = opaque;
3066 
3067     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3068         error_report("virtio-net: saved image requires vnet_hdr=on");
3069         return -EINVAL;
3070     }
3071 
3072     return 0;
3073 }
3074 
3075 static int virtio_net_vnet_pre_save(void *opaque)
3076 {
3077     struct VirtIONetMigTmp *tmp = opaque;
3078 
3079     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3080 
3081     return 0;
3082 }
3083 
3084 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3085     .name      = "virtio-net-vnet",
3086     .post_load = virtio_net_vnet_post_load,
3087     .pre_save  = virtio_net_vnet_pre_save,
3088     .fields    = (VMStateField[]) {
3089         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3090         VMSTATE_END_OF_LIST()
3091     },
3092 };
3093 
3094 static bool virtio_net_rss_needed(void *opaque)
3095 {
3096     return VIRTIO_NET(opaque)->rss_data.enabled;
3097 }
3098 
3099 static const VMStateDescription vmstate_virtio_net_rss = {
3100     .name      = "virtio-net-device/rss",
3101     .version_id = 1,
3102     .minimum_version_id = 1,
3103     .needed = virtio_net_rss_needed,
3104     .fields = (VMStateField[]) {
3105         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3106         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3107         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3108         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3109         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3110         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3111         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3112                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3113         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3114                                     rss_data.indirections_len, 0,
3115                                     vmstate_info_uint16, uint16_t),
3116         VMSTATE_END_OF_LIST()
3117     },
3118 };
3119 
3120 static const VMStateDescription vmstate_virtio_net_device = {
3121     .name = "virtio-net-device",
3122     .version_id = VIRTIO_NET_VM_VERSION,
3123     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3124     .post_load = virtio_net_post_load_device,
3125     .fields = (VMStateField[]) {
3126         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3127         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3128                                vmstate_virtio_net_queue_tx_waiting,
3129                                VirtIONetQueue),
3130         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3131         VMSTATE_UINT16(status, VirtIONet),
3132         VMSTATE_UINT8(promisc, VirtIONet),
3133         VMSTATE_UINT8(allmulti, VirtIONet),
3134         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3135 
3136         /* Guarded pair: If it fits we load it, else we throw it away
3137          * - can happen if source has a larger MAC table.; post-load
3138          *  sets flags in this case.
3139          */
3140         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3141                                 0, mac_table_fits, mac_table.in_use,
3142                                  ETH_ALEN),
3143         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3144                                      mac_table.in_use, ETH_ALEN),
3145 
3146         /* Note: This is an array of uint32's that's always been saved as a
3147          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3148          * but based on the uint.
3149          */
3150         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3151         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3152                          vmstate_virtio_net_has_vnet),
3153         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3154         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3155         VMSTATE_UINT8(alluni, VirtIONet),
3156         VMSTATE_UINT8(nomulti, VirtIONet),
3157         VMSTATE_UINT8(nouni, VirtIONet),
3158         VMSTATE_UINT8(nobcast, VirtIONet),
3159         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3160                          vmstate_virtio_net_has_ufo),
3161         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3162                             vmstate_info_uint16_equal, uint16_t),
3163         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3164         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3165                          vmstate_virtio_net_tx_waiting),
3166         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3167                             has_ctrl_guest_offloads),
3168         VMSTATE_END_OF_LIST()
3169    },
3170     .subsections = (const VMStateDescription * []) {
3171         &vmstate_virtio_net_rss,
3172         NULL
3173     }
3174 };
3175 
3176 static NetClientInfo net_virtio_info = {
3177     .type = NET_CLIENT_DRIVER_NIC,
3178     .size = sizeof(NICState),
3179     .can_receive = virtio_net_can_receive,
3180     .receive = virtio_net_receive,
3181     .link_status_changed = virtio_net_set_link_status,
3182     .query_rx_filter = virtio_net_query_rxfilter,
3183     .announce = virtio_net_announce,
3184 };
3185 
3186 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3187 {
3188     VirtIONet *n = VIRTIO_NET(vdev);
3189     NetClientState *nc;
3190     assert(n->vhost_started);
3191     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3192         /* Must guard against invalid features and bogus queue index
3193          * from being set by malicious guest, or penetrated through
3194          * buggy migration stream.
3195          */
3196         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3197             qemu_log_mask(LOG_GUEST_ERROR,
3198                           "%s: bogus vq index ignored\n", __func__);
3199             return false;
3200         }
3201         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3202     } else {
3203         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3204     }
3205     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3206 }
3207 
3208 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3209                                            bool mask)
3210 {
3211     VirtIONet *n = VIRTIO_NET(vdev);
3212     NetClientState *nc;
3213     assert(n->vhost_started);
3214     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3215         /* Must guard against invalid features and bogus queue index
3216          * from being set by malicious guest, or penetrated through
3217          * buggy migration stream.
3218          */
3219         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3220             qemu_log_mask(LOG_GUEST_ERROR,
3221                           "%s: bogus vq index ignored\n", __func__);
3222             return;
3223         }
3224         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3225     } else {
3226         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3227     }
3228     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3229                              vdev, idx, mask);
3230 }
3231 
3232 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3233 {
3234     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3235 
3236     n->config_size = virtio_feature_get_config_size(feature_sizes,
3237                                                     host_features);
3238 }
3239 
3240 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3241                                    const char *type)
3242 {
3243     /*
3244      * The name can be NULL, the netclient name will be type.x.
3245      */
3246     assert(type != NULL);
3247 
3248     g_free(n->netclient_name);
3249     g_free(n->netclient_type);
3250     n->netclient_name = g_strdup(name);
3251     n->netclient_type = g_strdup(type);
3252 }
3253 
3254 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3255 {
3256     HotplugHandler *hotplug_ctrl;
3257     PCIDevice *pci_dev;
3258     Error *err = NULL;
3259 
3260     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3261     if (hotplug_ctrl) {
3262         pci_dev = PCI_DEVICE(dev);
3263         pci_dev->partially_hotplugged = true;
3264         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3265         if (err) {
3266             error_report_err(err);
3267             return false;
3268         }
3269     } else {
3270         return false;
3271     }
3272     return true;
3273 }
3274 
3275 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3276                                     Error **errp)
3277 {
3278     Error *err = NULL;
3279     HotplugHandler *hotplug_ctrl;
3280     PCIDevice *pdev = PCI_DEVICE(dev);
3281     BusState *primary_bus;
3282 
3283     if (!pdev->partially_hotplugged) {
3284         return true;
3285     }
3286     primary_bus = dev->parent_bus;
3287     if (!primary_bus) {
3288         error_setg(errp, "virtio_net: couldn't find primary bus");
3289         return false;
3290     }
3291     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3292     qatomic_set(&n->failover_primary_hidden, false);
3293     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3294     if (hotplug_ctrl) {
3295         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3296         if (err) {
3297             goto out;
3298         }
3299         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3300     }
3301     pdev->partially_hotplugged = false;
3302 
3303 out:
3304     error_propagate(errp, err);
3305     return !err;
3306 }
3307 
3308 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3309 {
3310     bool should_be_hidden;
3311     Error *err = NULL;
3312     DeviceState *dev = failover_find_primary_device(n);
3313 
3314     if (!dev) {
3315         return;
3316     }
3317 
3318     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3319 
3320     if (migration_in_setup(s) && !should_be_hidden) {
3321         if (failover_unplug_primary(n, dev)) {
3322             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3323             qapi_event_send_unplug_primary(dev->id);
3324             qatomic_set(&n->failover_primary_hidden, true);
3325         } else {
3326             warn_report("couldn't unplug primary device");
3327         }
3328     } else if (migration_has_failed(s)) {
3329         /* We already unplugged the device let's plug it back */
3330         if (!failover_replug_primary(n, dev, &err)) {
3331             if (err) {
3332                 error_report_err(err);
3333             }
3334         }
3335     }
3336 }
3337 
3338 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3339 {
3340     MigrationState *s = data;
3341     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3342     virtio_net_handle_migration_primary(n, s);
3343 }
3344 
3345 static bool failover_hide_primary_device(DeviceListener *listener,
3346                                          const QDict *device_opts,
3347                                          bool from_json,
3348                                          Error **errp)
3349 {
3350     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3351     const char *standby_id;
3352 
3353     if (!device_opts) {
3354         return false;
3355     }
3356 
3357     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3358         return false;
3359     }
3360 
3361     if (!qdict_haskey(device_opts, "id")) {
3362         error_setg(errp, "Device with failover_pair_id needs to have id");
3363         return false;
3364     }
3365 
3366     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3367     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3368         return false;
3369     }
3370 
3371     /*
3372      * The hide helper can be called several times for a given device.
3373      * Check there is only one primary for a virtio-net device but
3374      * don't duplicate the qdict several times if it's called for the same
3375      * device.
3376      */
3377     if (n->primary_opts) {
3378         const char *old, *new;
3379         /* devices with failover_pair_id always have an id */
3380         old = qdict_get_str(n->primary_opts, "id");
3381         new = qdict_get_str(device_opts, "id");
3382         if (strcmp(old, new) != 0) {
3383             error_setg(errp, "Cannot attach more than one primary device to "
3384                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3385             return false;
3386         }
3387     } else {
3388         n->primary_opts = qdict_clone_shallow(device_opts);
3389         n->primary_opts_from_json = from_json;
3390     }
3391 
3392     /* failover_primary_hidden is set during feature negotiation */
3393     return qatomic_read(&n->failover_primary_hidden);
3394 }
3395 
3396 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3397 {
3398     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3399     VirtIONet *n = VIRTIO_NET(dev);
3400     NetClientState *nc;
3401     int i;
3402 
3403     if (n->net_conf.mtu) {
3404         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3405     }
3406 
3407     if (n->net_conf.duplex_str) {
3408         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3409             n->net_conf.duplex = DUPLEX_HALF;
3410         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3411             n->net_conf.duplex = DUPLEX_FULL;
3412         } else {
3413             error_setg(errp, "'duplex' must be 'half' or 'full'");
3414             return;
3415         }
3416         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3417     } else {
3418         n->net_conf.duplex = DUPLEX_UNKNOWN;
3419     }
3420 
3421     if (n->net_conf.speed < SPEED_UNKNOWN) {
3422         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3423         return;
3424     }
3425     if (n->net_conf.speed >= 0) {
3426         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3427     }
3428 
3429     if (n->failover) {
3430         n->primary_listener.hide_device = failover_hide_primary_device;
3431         qatomic_set(&n->failover_primary_hidden, true);
3432         device_listener_register(&n->primary_listener);
3433         n->migration_state.notify = virtio_net_migration_state_notifier;
3434         add_migration_state_change_notifier(&n->migration_state);
3435         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3436     }
3437 
3438     virtio_net_set_config_size(n, n->host_features);
3439     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3440 
3441     /*
3442      * We set a lower limit on RX queue size to what it always was.
3443      * Guests that want a smaller ring can always resize it without
3444      * help from us (using virtio 1 and up).
3445      */
3446     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3447         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3448         !is_power_of_2(n->net_conf.rx_queue_size)) {
3449         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3450                    "must be a power of 2 between %d and %d.",
3451                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3452                    VIRTQUEUE_MAX_SIZE);
3453         virtio_cleanup(vdev);
3454         return;
3455     }
3456 
3457     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3458         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3459         !is_power_of_2(n->net_conf.tx_queue_size)) {
3460         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3461                    "must be a power of 2 between %d and %d",
3462                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3463                    VIRTQUEUE_MAX_SIZE);
3464         virtio_cleanup(vdev);
3465         return;
3466     }
3467 
3468     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3469 
3470     /*
3471      * Figure out the datapath queue pairs since the backend could
3472      * provide control queue via peers as well.
3473      */
3474     if (n->nic_conf.peers.queues) {
3475         for (i = 0; i < n->max_ncs; i++) {
3476             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3477                 ++n->max_queue_pairs;
3478             }
3479         }
3480     }
3481     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3482 
3483     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3484         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3485                    "must be a positive integer less than %d.",
3486                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3487         virtio_cleanup(vdev);
3488         return;
3489     }
3490     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3491     n->curr_queue_pairs = 1;
3492     n->tx_timeout = n->net_conf.txtimer;
3493 
3494     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3495                        && strcmp(n->net_conf.tx, "bh")) {
3496         warn_report("virtio-net: "
3497                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3498                     n->net_conf.tx);
3499         error_printf("Defaulting to \"bh\"");
3500     }
3501 
3502     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3503                                     n->net_conf.tx_queue_size);
3504 
3505     for (i = 0; i < n->max_queue_pairs; i++) {
3506         virtio_net_add_queue(n, i);
3507     }
3508 
3509     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3510     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3511     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3512     n->status = VIRTIO_NET_S_LINK_UP;
3513     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3514                               QEMU_CLOCK_VIRTUAL,
3515                               virtio_net_announce_timer, n);
3516     n->announce_timer.round = 0;
3517 
3518     if (n->netclient_type) {
3519         /*
3520          * Happen when virtio_net_set_netclient_name has been called.
3521          */
3522         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3523                               n->netclient_type, n->netclient_name, n);
3524     } else {
3525         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3526                               object_get_typename(OBJECT(dev)), dev->id, n);
3527     }
3528 
3529     for (i = 0; i < n->max_queue_pairs; i++) {
3530         n->nic->ncs[i].do_not_pad = true;
3531     }
3532 
3533     peer_test_vnet_hdr(n);
3534     if (peer_has_vnet_hdr(n)) {
3535         for (i = 0; i < n->max_queue_pairs; i++) {
3536             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3537         }
3538         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3539     } else {
3540         n->host_hdr_len = 0;
3541     }
3542 
3543     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3544 
3545     n->vqs[0].tx_waiting = 0;
3546     n->tx_burst = n->net_conf.txburst;
3547     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3548     n->promisc = 1; /* for compatibility */
3549 
3550     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3551 
3552     n->vlans = g_malloc0(MAX_VLAN >> 3);
3553 
3554     nc = qemu_get_queue(n->nic);
3555     nc->rxfilter_notify_enabled = 1;
3556 
3557    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3558         struct virtio_net_config netcfg = {};
3559         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3560         vhost_net_set_config(get_vhost_net(nc->peer),
3561             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3562     }
3563     QTAILQ_INIT(&n->rsc_chains);
3564     n->qdev = dev;
3565 
3566     net_rx_pkt_init(&n->rx_pkt, false);
3567 
3568     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3569         virtio_net_load_ebpf(n);
3570     }
3571 }
3572 
3573 static void virtio_net_device_unrealize(DeviceState *dev)
3574 {
3575     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3576     VirtIONet *n = VIRTIO_NET(dev);
3577     int i, max_queue_pairs;
3578 
3579     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3580         virtio_net_unload_ebpf(n);
3581     }
3582 
3583     /* This will stop vhost backend if appropriate. */
3584     virtio_net_set_status(vdev, 0);
3585 
3586     g_free(n->netclient_name);
3587     n->netclient_name = NULL;
3588     g_free(n->netclient_type);
3589     n->netclient_type = NULL;
3590 
3591     g_free(n->mac_table.macs);
3592     g_free(n->vlans);
3593 
3594     if (n->failover) {
3595         qobject_unref(n->primary_opts);
3596         device_listener_unregister(&n->primary_listener);
3597         remove_migration_state_change_notifier(&n->migration_state);
3598     } else {
3599         assert(n->primary_opts == NULL);
3600     }
3601 
3602     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3603     for (i = 0; i < max_queue_pairs; i++) {
3604         virtio_net_del_queue(n, i);
3605     }
3606     /* delete also control vq */
3607     virtio_del_queue(vdev, max_queue_pairs * 2);
3608     qemu_announce_timer_del(&n->announce_timer, false);
3609     g_free(n->vqs);
3610     qemu_del_nic(n->nic);
3611     virtio_net_rsc_cleanup(n);
3612     g_free(n->rss_data.indirections_table);
3613     net_rx_pkt_uninit(n->rx_pkt);
3614     virtio_cleanup(vdev);
3615 }
3616 
3617 static void virtio_net_instance_init(Object *obj)
3618 {
3619     VirtIONet *n = VIRTIO_NET(obj);
3620 
3621     /*
3622      * The default config_size is sizeof(struct virtio_net_config).
3623      * Can be overriden with virtio_net_set_config_size.
3624      */
3625     n->config_size = sizeof(struct virtio_net_config);
3626     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3627                                   "bootindex", "/ethernet-phy@0",
3628                                   DEVICE(n));
3629 
3630     ebpf_rss_init(&n->ebpf_rss);
3631 }
3632 
3633 static int virtio_net_pre_save(void *opaque)
3634 {
3635     VirtIONet *n = opaque;
3636 
3637     /* At this point, backend must be stopped, otherwise
3638      * it might keep writing to memory. */
3639     assert(!n->vhost_started);
3640 
3641     return 0;
3642 }
3643 
3644 static bool primary_unplug_pending(void *opaque)
3645 {
3646     DeviceState *dev = opaque;
3647     DeviceState *primary;
3648     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3649     VirtIONet *n = VIRTIO_NET(vdev);
3650 
3651     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3652         return false;
3653     }
3654     primary = failover_find_primary_device(n);
3655     return primary ? primary->pending_deleted_event : false;
3656 }
3657 
3658 static bool dev_unplug_pending(void *opaque)
3659 {
3660     DeviceState *dev = opaque;
3661     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3662 
3663     return vdc->primary_unplug_pending(dev);
3664 }
3665 
3666 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3667 {
3668     VirtIONet *n = VIRTIO_NET(vdev);
3669     NetClientState *nc = qemu_get_queue(n->nic);
3670     struct vhost_net *net = get_vhost_net(nc->peer);
3671     return &net->dev;
3672 }
3673 
3674 static const VMStateDescription vmstate_virtio_net = {
3675     .name = "virtio-net",
3676     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3677     .version_id = VIRTIO_NET_VM_VERSION,
3678     .fields = (VMStateField[]) {
3679         VMSTATE_VIRTIO_DEVICE,
3680         VMSTATE_END_OF_LIST()
3681     },
3682     .pre_save = virtio_net_pre_save,
3683     .dev_unplug_pending = dev_unplug_pending,
3684 };
3685 
3686 static Property virtio_net_properties[] = {
3687     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3688                     VIRTIO_NET_F_CSUM, true),
3689     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3690                     VIRTIO_NET_F_GUEST_CSUM, true),
3691     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3692     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3693                     VIRTIO_NET_F_GUEST_TSO4, true),
3694     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3695                     VIRTIO_NET_F_GUEST_TSO6, true),
3696     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3697                     VIRTIO_NET_F_GUEST_ECN, true),
3698     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3699                     VIRTIO_NET_F_GUEST_UFO, true),
3700     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3701                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3702     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3703                     VIRTIO_NET_F_HOST_TSO4, true),
3704     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3705                     VIRTIO_NET_F_HOST_TSO6, true),
3706     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3707                     VIRTIO_NET_F_HOST_ECN, true),
3708     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3709                     VIRTIO_NET_F_HOST_UFO, true),
3710     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3711                     VIRTIO_NET_F_MRG_RXBUF, true),
3712     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3713                     VIRTIO_NET_F_STATUS, true),
3714     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3715                     VIRTIO_NET_F_CTRL_VQ, true),
3716     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3717                     VIRTIO_NET_F_CTRL_RX, true),
3718     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3719                     VIRTIO_NET_F_CTRL_VLAN, true),
3720     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3721                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3722     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3723                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3724     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3725                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3726     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3727     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3728                     VIRTIO_NET_F_RSS, false),
3729     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3730                     VIRTIO_NET_F_HASH_REPORT, false),
3731     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3732                     VIRTIO_NET_F_RSC_EXT, false),
3733     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3734                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3735     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3736     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3737                        TX_TIMER_INTERVAL),
3738     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3739     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3740     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3741                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3742     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3743                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3744     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3745     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3746                      true),
3747     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3748     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3749     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3750     DEFINE_PROP_END_OF_LIST(),
3751 };
3752 
3753 static void virtio_net_class_init(ObjectClass *klass, void *data)
3754 {
3755     DeviceClass *dc = DEVICE_CLASS(klass);
3756     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3757 
3758     device_class_set_props(dc, virtio_net_properties);
3759     dc->vmsd = &vmstate_virtio_net;
3760     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3761     vdc->realize = virtio_net_device_realize;
3762     vdc->unrealize = virtio_net_device_unrealize;
3763     vdc->get_config = virtio_net_get_config;
3764     vdc->set_config = virtio_net_set_config;
3765     vdc->get_features = virtio_net_get_features;
3766     vdc->set_features = virtio_net_set_features;
3767     vdc->bad_features = virtio_net_bad_features;
3768     vdc->reset = virtio_net_reset;
3769     vdc->set_status = virtio_net_set_status;
3770     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3771     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3772     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3773     vdc->post_load = virtio_net_post_load_virtio;
3774     vdc->vmsd = &vmstate_virtio_net_device;
3775     vdc->primary_unplug_pending = primary_unplug_pending;
3776     vdc->get_vhost = virtio_net_get_vhost;
3777 }
3778 
3779 static const TypeInfo virtio_net_info = {
3780     .name = TYPE_VIRTIO_NET,
3781     .parent = TYPE_VIRTIO_DEVICE,
3782     .instance_size = sizeof(VirtIONet),
3783     .instance_init = virtio_net_instance_init,
3784     .class_init = virtio_net_class_init,
3785 };
3786 
3787 static void virtio_register_types(void)
3788 {
3789     type_register_static(&virtio_net_info);
3790 }
3791 
3792 type_init(virtio_register_types)
3793