xref: /qemu/include/hw/virtio/virtio-net.h (revision 6b230b7d)
1 /*
2  * Virtio Network Device
3  *
4  * Copyright IBM, Corp. 2007
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #ifndef QEMU_VIRTIO_NET_H
15 #define QEMU_VIRTIO_NET_H
16 
17 #include "qemu/units.h"
18 #include "standard-headers/linux/virtio_net.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/announce.h"
21 #include "qemu/option_int.h"
22 #include "qom/object.h"
23 
24 #include "ebpf/ebpf_rss.h"
25 
26 #define TYPE_VIRTIO_NET "virtio-net-device"
27 OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
28 
29 #define TX_TIMER_INTERVAL 150000 /* 150 us */
30 
31 /* Limit the number of packets that can be sent via a single flush
32  * of the TX queue.  This gives us a guaranteed exit condition and
33  * ensures fairness in the io path.  256 conveniently matches the
34  * length of the TX queue and shows a good balance of performance
35  * and latency. */
36 #define TX_BURST 256
37 
38 /* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */
39 #define MAC_TABLE_ENTRIES    64
40 
41 /*
42  * The maximum number of VLANs in the VLAN filter table
43  * added by VIRTIO_NET_CTRL_VLAN_ADD
44  */
45 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
46 
47 typedef struct virtio_net_conf
48 {
49     uint32_t txtimer;
50     int32_t txburst;
51     char *tx;
52     uint16_t rx_queue_size;
53     uint16_t tx_queue_size;
54     uint16_t mtu;
55     int32_t speed;
56     char *duplex_str;
57     uint8_t duplex;
58     char *primary_id_str;
59 } virtio_net_conf;
60 
61 /* Coalesced packets type & status */
62 typedef enum {
63     RSC_COALESCE,           /* Data been coalesced */
64     RSC_FINAL,              /* Will terminate current connection */
65     RSC_NO_MATCH,           /* No matched in the buffer pool */
66     RSC_BYPASS,             /* Packet to be bypass, not tcp, tcp ctrl, etc */
67     RSC_CANDIDATE                /* Data want to be coalesced */
68 } CoalesceStatus;
69 
70 typedef struct VirtioNetRscStat {
71     uint32_t received;
72     uint32_t coalesced;
73     uint32_t over_size;
74     uint32_t cache;
75     uint32_t empty_cache;
76     uint32_t no_match_cache;
77     uint32_t win_update;
78     uint32_t no_match;
79     uint32_t tcp_syn;
80     uint32_t tcp_ctrl_drain;
81     uint32_t dup_ack;
82     uint32_t dup_ack1;
83     uint32_t dup_ack2;
84     uint32_t pure_ack;
85     uint32_t ack_out_of_win;
86     uint32_t data_out_of_win;
87     uint32_t data_out_of_order;
88     uint32_t data_after_pure_ack;
89     uint32_t bypass_not_tcp;
90     uint32_t tcp_option;
91     uint32_t tcp_all_opt;
92     uint32_t ip_frag;
93     uint32_t ip_ecn;
94     uint32_t ip_hacked;
95     uint32_t ip_option;
96     uint32_t purge_failed;
97     uint32_t drain_failed;
98     uint32_t final_failed;
99     int64_t  timer;
100 } VirtioNetRscStat;
101 
102 /* Rsc unit general info used to checking if can coalescing */
103 typedef struct VirtioNetRscUnit {
104     void *ip;   /* ip header */
105     uint16_t *ip_plen;      /* data len pointer in ip header field */
106     struct tcp_header *tcp; /* tcp header */
107     uint16_t tcp_hdrlen;    /* tcp header len */
108     uint16_t payload;       /* pure payload without virtio/eth/ip/tcp */
109 } VirtioNetRscUnit;
110 
111 /* Coalesced segment */
112 typedef struct VirtioNetRscSeg {
113     QTAILQ_ENTRY(VirtioNetRscSeg) next;
114     void *buf;
115     size_t size;
116     uint16_t packets;
117     uint16_t dup_ack;
118     bool is_coalesced;      /* need recall ipv4 header checksum, mark here */
119     VirtioNetRscUnit unit;
120     NetClientState *nc;
121 } VirtioNetRscSeg;
122 
123 
124 /* Chain is divided by protocol(ipv4/v6) and NetClientInfo */
125 typedef struct VirtioNetRscChain {
126     QTAILQ_ENTRY(VirtioNetRscChain) next;
127     VirtIONet *n;                            /* VirtIONet */
128     uint16_t proto;
129     uint8_t  gso_type;
130     uint16_t max_payload;
131     QEMUTimer *drain_timer;
132     QTAILQ_HEAD(, VirtioNetRscSeg) buffers;
133     VirtioNetRscStat stat;
134 } VirtioNetRscChain;
135 
136 /* Maximum packet size we can receive from tap device: header + 64k */
137 #define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 * KiB))
138 
139 #define VIRTIO_NET_RSS_MAX_KEY_SIZE     40
140 #define VIRTIO_NET_RSS_MAX_TABLE_LEN    128
141 
142 typedef struct VirtioNetRssData {
143     bool    enabled;
144     bool    enabled_software_rss;
145     bool    redirect;
146     bool    populate_hash;
147     uint32_t hash_types;
148     uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
149     uint16_t indirections_len;
150     uint16_t *indirections_table;
151     uint16_t default_queue;
152 } VirtioNetRssData;
153 
154 typedef struct VirtIONetQueue {
155     VirtQueue *rx_vq;
156     VirtQueue *tx_vq;
157     QEMUTimer *tx_timer;
158     QEMUBH *tx_bh;
159     uint32_t tx_waiting;
160     struct {
161         VirtQueueElement *elem;
162     } async_tx;
163     struct VirtIONet *n;
164 } VirtIONetQueue;
165 
166 struct VirtIONet {
167     VirtIODevice parent_obj;
168     uint8_t mac[ETH_ALEN];
169     uint16_t status;
170     VirtIONetQueue *vqs;
171     VirtQueue *ctrl_vq;
172     NICState *nic;
173     /* RSC Chains - temporary storage of coalesced data,
174        all these data are lost in case of migration */
175     QTAILQ_HEAD(, VirtioNetRscChain) rsc_chains;
176     uint32_t tx_timeout;
177     int32_t tx_burst;
178     uint32_t has_vnet_hdr;
179     size_t host_hdr_len;
180     size_t guest_hdr_len;
181     uint64_t host_features;
182     uint32_t rsc_timeout;
183     uint8_t rsc4_enabled;
184     uint8_t rsc6_enabled;
185     uint8_t has_ufo;
186     uint32_t mergeable_rx_bufs;
187     uint8_t promisc;
188     uint8_t allmulti;
189     uint8_t alluni;
190     uint8_t nomulti;
191     uint8_t nouni;
192     uint8_t nobcast;
193     uint8_t vhost_started;
194     struct {
195         uint32_t in_use;
196         uint32_t first_multi;
197         uint8_t multi_overflow;
198         uint8_t uni_overflow;
199         uint8_t *macs;
200     } mac_table;
201     uint32_t *vlans;
202     virtio_net_conf net_conf;
203     NICConf nic_conf;
204     DeviceState *qdev;
205     int multiqueue;
206     uint16_t max_queue_pairs;
207     uint16_t curr_queue_pairs;
208     uint16_t max_ncs;
209     size_t config_size;
210     char *netclient_name;
211     char *netclient_type;
212     uint64_t curr_guest_offloads;
213     /* used on saved state restore phase to preserve the curr_guest_offloads */
214     uint64_t saved_guest_offloads;
215     AnnounceTimer announce_timer;
216     bool needs_vnet_hdr_swap;
217     bool mtu_bypass_backend;
218     /* primary failover device is hidden*/
219     bool failover_primary_hidden;
220     bool failover;
221     DeviceListener primary_listener;
222     QDict *primary_opts;
223     bool primary_opts_from_json;
224     NotifierWithReturn migration_state;
225     VirtioNetRssData rss_data;
226     struct NetRxPkt *rx_pkt;
227     struct EBPFRSSContext ebpf_rss;
228     uint32_t nr_ebpf_rss_fds;
229     char **ebpf_rss_fds;
230 };
231 
232 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
233                                   const struct iovec *in_sg, unsigned in_num,
234                                   const struct iovec *out_sg,
235                                   unsigned out_num);
236 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
237                                    const char *type);
238 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n);
239 
240 #endif
241