xref: /freebsd/sys/dev/virtio/network/virtio_net.h (revision 97a4045a)
1336f459cSPeter Grehan /*-
27282444bSPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
37282444bSPedro F. Giffuni  *
410b59a9bSPeter Grehan  * This header is BSD licensed so anyone can use the definitions to implement
510b59a9bSPeter Grehan  * compatible drivers/servers.
610b59a9bSPeter Grehan  *
7336f459cSPeter Grehan  * Redistribution and use in source and binary forms, with or without
8336f459cSPeter Grehan  * modification, are permitted provided that the following conditions
9336f459cSPeter Grehan  * are met:
10336f459cSPeter Grehan  * 1. Redistributions of source code must retain the above copyright
11336f459cSPeter Grehan  *    notice, this list of conditions and the following disclaimer.
12336f459cSPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
13336f459cSPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
14336f459cSPeter Grehan  *    documentation and/or other materials provided with the distribution.
15336f459cSPeter Grehan  * 3. Neither the name of IBM nor the names of its contributors
16336f459cSPeter Grehan  *    may be used to endorse or promote products derived from this software
17336f459cSPeter Grehan  *    without specific prior written permission.
18336f459cSPeter Grehan  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19336f459cSPeter Grehan  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20336f459cSPeter Grehan  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21336f459cSPeter Grehan  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
22336f459cSPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23336f459cSPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24336f459cSPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25336f459cSPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26336f459cSPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27336f459cSPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28336f459cSPeter Grehan  * SUCH DAMAGE.
2910b59a9bSPeter Grehan  */
3010b59a9bSPeter Grehan 
3110b59a9bSPeter Grehan #ifndef _VIRTIO_NET_H
3210b59a9bSPeter Grehan #define _VIRTIO_NET_H
3310b59a9bSPeter Grehan 
3410b59a9bSPeter Grehan /* The feature bitmap for virtio net */
355e220811SBryan Venteicher #define VIRTIO_NET_F_CSUM		 0x000001 /* Host handles pkts w/ partial csum */
365e220811SBryan Venteicher #define VIRTIO_NET_F_GUEST_CSUM		 0x000002 /* Guest handles pkts w/ partial csum*/
375e220811SBryan Venteicher #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 0x000004 /* Dynamic offload configuration. */
385e220811SBryan Venteicher #define VIRTIO_NET_F_MTU		 0x000008 /* Initial MTU advice */
395e220811SBryan Venteicher #define VIRTIO_NET_F_MAC		 0x000020 /* Host has given MAC address. */
405e220811SBryan Venteicher #define VIRTIO_NET_F_GSO		 0x000040 /* Host handles pkts w/ any GSO type */
415e220811SBryan Venteicher #define VIRTIO_NET_F_GUEST_TSO4		 0x000080 /* Guest can handle TSOv4 in. */
425e220811SBryan Venteicher #define VIRTIO_NET_F_GUEST_TSO6		 0x000100 /* Guest can handle TSOv6 in. */
435e220811SBryan Venteicher #define VIRTIO_NET_F_GUEST_ECN		 0x000200 /* Guest can handle TSO[6] w/ ECN in. */
445e220811SBryan Venteicher #define VIRTIO_NET_F_GUEST_UFO		 0x000400 /* Guest can handle UFO in. */
455e220811SBryan Venteicher #define VIRTIO_NET_F_HOST_TSO4		 0x000800 /* Host can handle TSOv4 in. */
465e220811SBryan Venteicher #define VIRTIO_NET_F_HOST_TSO6		 0x001000 /* Host can handle TSOv6 in. */
475e220811SBryan Venteicher #define VIRTIO_NET_F_HOST_ECN		 0x002000 /* Host can handle TSO[6] w/ ECN in. */
485e220811SBryan Venteicher #define VIRTIO_NET_F_HOST_UFO		 0x004000 /* Host can handle UFO in. */
495e220811SBryan Venteicher #define VIRTIO_NET_F_MRG_RXBUF		 0x008000 /* Host can merge receive buffers. */
505e220811SBryan Venteicher #define VIRTIO_NET_F_STATUS		 0x010000 /* virtio_net_config.status available*/
515e220811SBryan Venteicher #define VIRTIO_NET_F_CTRL_VQ		 0x020000 /* Control channel available */
525e220811SBryan Venteicher #define VIRTIO_NET_F_CTRL_RX		 0x040000 /* Control channel RX mode support */
535e220811SBryan Venteicher #define VIRTIO_NET_F_CTRL_VLAN		 0x080000 /* Control channel VLAN filtering */
5410b59a9bSPeter Grehan #define VIRTIO_NET_F_CTRL_RX_EXTRA	 0x100000 /* Extra RX mode control support */
55cfc28a5bSBryan Venteicher #define VIRTIO_NET_F_GUEST_ANNOUNCE	 0x200000 /* Announce device on network */
565e220811SBryan Venteicher #define VIRTIO_NET_F_MQ			 0x400000 /* Device supports Receive Flow Steering */
57cfc28a5bSBryan Venteicher #define VIRTIO_NET_F_CTRL_MAC_ADDR	 0x800000 /* Set MAC address */
585e220811SBryan Venteicher #define VIRTIO_NET_F_SPEED_DUPLEX	 (1ULL << 63) /* Device set linkspeed and duplex */
5910b59a9bSPeter Grehan 
6010b59a9bSPeter Grehan #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
615e220811SBryan Venteicher #define VIRTIO_NET_S_ANNOUNCE	2	/* Announcement is needed */
6210b59a9bSPeter Grehan 
6310b59a9bSPeter Grehan struct virtio_net_config {
6410b59a9bSPeter Grehan 	/* The config defining mac address (if VIRTIO_NET_F_MAC) */
6510b59a9bSPeter Grehan 	uint8_t		mac[ETHER_ADDR_LEN];
6610b59a9bSPeter Grehan 	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
6710b59a9bSPeter Grehan 	uint16_t	status;
68cfc28a5bSBryan Venteicher 	/* Maximum number of each of transmit and receive queues;
69cfc28a5bSBryan Venteicher 	 * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ.
70cfc28a5bSBryan Venteicher 	 * Legal values are between 1 and 0x8000.
71cfc28a5bSBryan Venteicher 	 */
72cfc28a5bSBryan Venteicher 	uint16_t	max_virtqueue_pairs;
735e220811SBryan Venteicher 	/* Default maximum transmit unit advice */
745e220811SBryan Venteicher 	uint16_t	mtu;
755e220811SBryan Venteicher 	/*
765e220811SBryan Venteicher 	 * speed, in units of 1Mb. All values 0 to INT_MAX are legal.
775e220811SBryan Venteicher 	 * Any other value stands for unknown.
785e220811SBryan Venteicher 	 */
795e220811SBryan Venteicher 	uint32_t	speed;
805e220811SBryan Venteicher 	/*
815e220811SBryan Venteicher 	 * 0x00 - half duplex
825e220811SBryan Venteicher 	 * 0x01 - full duplex
835e220811SBryan Venteicher 	 * Any other value stands for unknown.
845e220811SBryan Venteicher 	 */
855e220811SBryan Venteicher 	uint8_t		duplex;
8610b59a9bSPeter Grehan } __packed;
8710b59a9bSPeter Grehan 
8810b59a9bSPeter Grehan /*
895e220811SBryan Venteicher  * This header comes first in the scatter-gather list.  If you don't
9010b59a9bSPeter Grehan  * specify GSO or CSUM features, you can simply ignore the header.
915e220811SBryan Venteicher  *
925e220811SBryan Venteicher  * This is bitwise-equivalent to the legacy struct virtio_net_hdr_mrg_rxbuf,
935e220811SBryan Venteicher  * only flattened.
9410b59a9bSPeter Grehan  */
955e220811SBryan Venteicher struct virtio_net_hdr_v1 {
9610b59a9bSPeter Grehan #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	/* Use csum_start, csum_offset */
97cfc28a5bSBryan Venteicher #define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
9810b59a9bSPeter Grehan 	uint8_t flags;
9910b59a9bSPeter Grehan #define VIRTIO_NET_HDR_GSO_NONE		0	/* Not a GSO frame */
10010b59a9bSPeter Grehan #define VIRTIO_NET_HDR_GSO_TCPV4	1	/* GSO frame, IPv4 TCP (TSO) */
10110b59a9bSPeter Grehan #define VIRTIO_NET_HDR_GSO_UDP		3	/* GSO frame, IPv4 UDP (UFO) */
10210b59a9bSPeter Grehan #define VIRTIO_NET_HDR_GSO_TCPV6	4	/* GSO frame, IPv6 TCP */
10310b59a9bSPeter Grehan #define VIRTIO_NET_HDR_GSO_ECN		0x80	/* TCP has ECN set */
10410b59a9bSPeter Grehan 	uint8_t gso_type;
10510b59a9bSPeter Grehan 	uint16_t hdr_len;	/* Ethernet + IP + tcp/udp hdrs */
10610b59a9bSPeter Grehan 	uint16_t gso_size;	/* Bytes to append to hdr_len per frame */
10710b59a9bSPeter Grehan 	uint16_t csum_start;	/* Position to start checksumming from */
10810b59a9bSPeter Grehan 	uint16_t csum_offset;	/* Offset after that to place checksum */
1095e220811SBryan Venteicher 	uint16_t num_buffers;	/* Number of merged rx buffers */
1105e220811SBryan Venteicher };
1115e220811SBryan Venteicher 
1125e220811SBryan Venteicher /*
1135e220811SBryan Venteicher  * This header comes first in the scatter-gather list.
1145e220811SBryan Venteicher  * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must
1155e220811SBryan Venteicher  * be the first element of the scatter-gather list.  If you don't
1165e220811SBryan Venteicher  * specify GSO or CSUM features, you can simply ignore the header.
1175e220811SBryan Venteicher  */
1185e220811SBryan Venteicher struct virtio_net_hdr {
1195e220811SBryan Venteicher 	/* See VIRTIO_NET_HDR_F_* */
1205e220811SBryan Venteicher 	uint8_t	flags;
1215e220811SBryan Venteicher 	/* See VIRTIO_NET_HDR_GSO_* */
1225e220811SBryan Venteicher 	uint8_t gso_type;
1235e220811SBryan Venteicher 	uint16_t hdr_len;	/* Ethernet + IP + tcp/udp hdrs */
1245e220811SBryan Venteicher 	uint16_t gso_size;	/* Bytes to append to hdr_len per frame */
1255e220811SBryan Venteicher 	uint16_t csum_start;	/* Position to start checksumming from */
1265e220811SBryan Venteicher 	uint16_t csum_offset;	/* Offset after that to place checksum */
12710b59a9bSPeter Grehan };
12810b59a9bSPeter Grehan 
12910b59a9bSPeter Grehan /*
13010b59a9bSPeter Grehan  * This is the version of the header to use when the MRG_RXBUF
13110b59a9bSPeter Grehan  * feature has been negotiated.
13210b59a9bSPeter Grehan  */
13310b59a9bSPeter Grehan struct virtio_net_hdr_mrg_rxbuf {
13410b59a9bSPeter Grehan 	struct virtio_net_hdr hdr;
13510b59a9bSPeter Grehan 	uint16_t num_buffers;	/* Number of merged rx buffers */
13610b59a9bSPeter Grehan };
13710b59a9bSPeter Grehan 
13810b59a9bSPeter Grehan /*
13910b59a9bSPeter Grehan  * Control virtqueue data structures
14010b59a9bSPeter Grehan  *
14110b59a9bSPeter Grehan  * The control virtqueue expects a header in the first sg entry
14210b59a9bSPeter Grehan  * and an ack/status response in the last entry.  Data for the
14310b59a9bSPeter Grehan  * command goes in between.
14410b59a9bSPeter Grehan  */
14510b59a9bSPeter Grehan struct virtio_net_ctrl_hdr {
14610b59a9bSPeter Grehan 	uint8_t class;
14710b59a9bSPeter Grehan 	uint8_t cmd;
14810b59a9bSPeter Grehan } __packed;
14910b59a9bSPeter Grehan 
15010b59a9bSPeter Grehan #define VIRTIO_NET_OK	0
15110b59a9bSPeter Grehan #define VIRTIO_NET_ERR	1
15210b59a9bSPeter Grehan 
15310b59a9bSPeter Grehan /*
15410b59a9bSPeter Grehan  * Control the RX mode, ie. promiscuous, allmulti, etc...
15510b59a9bSPeter Grehan  * All commands require an "out" sg entry containing a 1 byte
15610b59a9bSPeter Grehan  * state value, zero = disable, non-zero = enable.  Commands
15710b59a9bSPeter Grehan  * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
15810b59a9bSPeter Grehan  * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
15910b59a9bSPeter Grehan  */
16010b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_RX	0
16110b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_RX_PROMISC	0
16210b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_RX_ALLMULTI	1
16310b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_RX_ALLUNI	2
16410b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_RX_NOMULTI	3
16510b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_RX_NOUNI	4
16610b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_RX_NOBCAST	5
16710b59a9bSPeter Grehan 
16810b59a9bSPeter Grehan /*
16910b59a9bSPeter Grehan  * Control the MAC filter table.
17010b59a9bSPeter Grehan  *
17110b59a9bSPeter Grehan  * The MAC filter table is managed by the hypervisor, the guest should
17210b59a9bSPeter Grehan  * assume the size is infinite.  Filtering should be considered
17310b59a9bSPeter Grehan  * non-perfect, ie. based on hypervisor resources, the guest may
17410b59a9bSPeter Grehan  * received packets from sources not specified in the filter list.
17510b59a9bSPeter Grehan  *
17610b59a9bSPeter Grehan  * In addition to the class/cmd header, the TABLE_SET command requires
17710b59a9bSPeter Grehan  * two out scatterlists.  Each contains a 4 byte count of entries followed
17810b59a9bSPeter Grehan  * by a concatenated byte stream of the ETH_ALEN MAC addresses.  The
17910b59a9bSPeter Grehan  * first sg list contains unicast addresses, the second is for multicast.
18010b59a9bSPeter Grehan  * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
18110b59a9bSPeter Grehan  * is available.
182cfc28a5bSBryan Venteicher  *
183cfc28a5bSBryan Venteicher  * The ADDR_SET command requests one out scatterlist, it contains a
184cfc28a5bSBryan Venteicher  * 6 bytes MAC address. This functionality is present if the
185cfc28a5bSBryan Venteicher  * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
18610b59a9bSPeter Grehan  */
18710b59a9bSPeter Grehan struct virtio_net_ctrl_mac {
18810b59a9bSPeter Grehan 	uint32_t	entries;
18910b59a9bSPeter Grehan 	uint8_t		macs[][ETHER_ADDR_LEN];
19010b59a9bSPeter Grehan } __packed;
19110b59a9bSPeter Grehan 
19210b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_MAC	1
19310b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_MAC_TABLE_SET	0
194cfc28a5bSBryan Venteicher #define VIRTIO_NET_CTRL_MAC_ADDR_SET	1
19510b59a9bSPeter Grehan 
19610b59a9bSPeter Grehan /*
19710b59a9bSPeter Grehan  * Control VLAN filtering
19810b59a9bSPeter Grehan  *
19910b59a9bSPeter Grehan  * The VLAN filter table is controlled via a simple ADD/DEL interface.
20010b59a9bSPeter Grehan  * VLAN IDs not added may be filtered by the hypervisor.  Del is the
20110b59a9bSPeter Grehan  * opposite of add.  Both commands expect an out entry containing a 2
20210b59a9bSPeter Grehan  * byte VLAN ID.  VLAN filtering is available with the
20310b59a9bSPeter Grehan  * VIRTIO_NET_F_CTRL_VLAN feature bit.
20410b59a9bSPeter Grehan  */
20510b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_VLAN	2
20610b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_VLAN_ADD	0
20710b59a9bSPeter Grehan #define VIRTIO_NET_CTRL_VLAN_DEL	1
20810b59a9bSPeter Grehan 
209cfc28a5bSBryan Venteicher /*
210cfc28a5bSBryan Venteicher  * Control link announce acknowledgement
211cfc28a5bSBryan Venteicher  *
212cfc28a5bSBryan Venteicher  * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
21397a4045aSGordon Bergling  * driver has received the notification; device would clear the
214cfc28a5bSBryan Venteicher  * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
215cfc28a5bSBryan Venteicher  * this command.
216cfc28a5bSBryan Venteicher  */
217cfc28a5bSBryan Venteicher #define VIRTIO_NET_CTRL_ANNOUNCE	3
218cfc28a5bSBryan Venteicher #define VIRTIO_NET_CTRL_ANNOUNCE_ACK	0
219cfc28a5bSBryan Venteicher 
220cfc28a5bSBryan Venteicher /*
221cfc28a5bSBryan Venteicher  * Control Receive Flow Steering
222cfc28a5bSBryan Venteicher  *
223cfc28a5bSBryan Venteicher  * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET enables Receive Flow
224cfc28a5bSBryan Venteicher  * Steering, specifying the number of the transmit and receive queues
225cfc28a5bSBryan Venteicher  * that will be used. After the command is consumed and acked by the
226cfc28a5bSBryan Venteicher  * device, the device will not steer new packets on receive virtqueues
227cfc28a5bSBryan Venteicher  * other than specified nor read from transmit virtqueues other than
228cfc28a5bSBryan Venteicher  * specified. Accordingly, driver should not transmit new packets on
229cfc28a5bSBryan Venteicher  * virtqueues other than specified.
230cfc28a5bSBryan Venteicher  */
231cfc28a5bSBryan Venteicher struct virtio_net_ctrl_mq {
232cfc28a5bSBryan Venteicher 	uint16_t	virtqueue_pairs;
233cfc28a5bSBryan Venteicher } __packed;
234cfc28a5bSBryan Venteicher 
235cfc28a5bSBryan Venteicher #define VIRTIO_NET_CTRL_MQ	4
236cfc28a5bSBryan Venteicher #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET		0
237cfc28a5bSBryan Venteicher #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN		1
238cfc28a5bSBryan Venteicher #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX		0x8000
239cfc28a5bSBryan Venteicher 
240f8bc74e2SVincenzo Maffione /*
2415e220811SBryan Venteicher  * Control network offloads
2425e220811SBryan Venteicher  *
2435e220811SBryan Venteicher  * Reconfigures the network offloads that Guest can handle.
2445e220811SBryan Venteicher  *
2455e220811SBryan Venteicher  * Available with the VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit.
2465e220811SBryan Venteicher  *
2475e220811SBryan Venteicher  * Command data format matches the feature bit mask exactly.
2485e220811SBryan Venteicher  *
2495e220811SBryan Venteicher  * See VIRTIO_NET_F_GUEST_* for the list of offloads
2505e220811SBryan Venteicher  * that can be enabled/disabled.
2515e220811SBryan Venteicher  */
2525e220811SBryan Venteicher #define VIRTIO_NET_CTRL_GUEST_OFFLOADS		5
2535e220811SBryan Venteicher #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET	0
2545e220811SBryan Venteicher 
2555e220811SBryan Venteicher /*
256f8bc74e2SVincenzo Maffione  * Use the checksum offset in the VirtIO header to set the
257f8bc74e2SVincenzo Maffione  * correct CSUM_* flags.
258f8bc74e2SVincenzo Maffione  */
259f8bc74e2SVincenzo Maffione static inline int
virtio_net_rx_csum_by_offset(struct mbuf * m,uint16_t eth_type,int ip_start,struct virtio_net_hdr * hdr)260f8bc74e2SVincenzo Maffione virtio_net_rx_csum_by_offset(struct mbuf *m, uint16_t eth_type, int ip_start,
261f8bc74e2SVincenzo Maffione 			struct virtio_net_hdr *hdr)
262f8bc74e2SVincenzo Maffione {
263f8bc74e2SVincenzo Maffione #if defined(INET) || defined(INET6)
264f8bc74e2SVincenzo Maffione 	int offset = hdr->csum_start + hdr->csum_offset;
265f8bc74e2SVincenzo Maffione #endif
266f8bc74e2SVincenzo Maffione 
267f8bc74e2SVincenzo Maffione 	/* Only do a basic sanity check on the offset. */
268f8bc74e2SVincenzo Maffione 	switch (eth_type) {
269f8bc74e2SVincenzo Maffione #if defined(INET)
270f8bc74e2SVincenzo Maffione 	case ETHERTYPE_IP:
271f8bc74e2SVincenzo Maffione 		if (__predict_false(offset < ip_start + sizeof(struct ip)))
272f8bc74e2SVincenzo Maffione 			return (1);
273f8bc74e2SVincenzo Maffione 		break;
274f8bc74e2SVincenzo Maffione #endif
275f8bc74e2SVincenzo Maffione #if defined(INET6)
276f8bc74e2SVincenzo Maffione 	case ETHERTYPE_IPV6:
277f8bc74e2SVincenzo Maffione 		if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
278f8bc74e2SVincenzo Maffione 			return (1);
279f8bc74e2SVincenzo Maffione 		break;
280f8bc74e2SVincenzo Maffione #endif
281f8bc74e2SVincenzo Maffione 	default:
282f8bc74e2SVincenzo Maffione 		/* Here we should increment the rx_csum_bad_ethtype counter. */
283f8bc74e2SVincenzo Maffione 		return (1);
284f8bc74e2SVincenzo Maffione 	}
285f8bc74e2SVincenzo Maffione 
286f8bc74e2SVincenzo Maffione 	/*
287f8bc74e2SVincenzo Maffione 	 * Use the offset to determine the appropriate CSUM_* flags. This is
288f8bc74e2SVincenzo Maffione 	 * a bit dirty, but we can get by with it since the checksum offsets
289f8bc74e2SVincenzo Maffione 	 * happen to be different. We assume the host host does not do IPv4
290f8bc74e2SVincenzo Maffione 	 * header checksum offloading.
291f8bc74e2SVincenzo Maffione 	 */
292f8bc74e2SVincenzo Maffione 	switch (hdr->csum_offset) {
293f8bc74e2SVincenzo Maffione 	case offsetof(struct udphdr, uh_sum):
294f8bc74e2SVincenzo Maffione 	case offsetof(struct tcphdr, th_sum):
295f8bc74e2SVincenzo Maffione 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
296f8bc74e2SVincenzo Maffione 		m->m_pkthdr.csum_data = 0xFFFF;
297f8bc74e2SVincenzo Maffione 		break;
298f8bc74e2SVincenzo Maffione 	default:
299f8bc74e2SVincenzo Maffione 		/* Here we should increment the rx_csum_bad_offset counter. */
300f8bc74e2SVincenzo Maffione 		return (1);
301f8bc74e2SVincenzo Maffione 	}
302f8bc74e2SVincenzo Maffione 
303f8bc74e2SVincenzo Maffione 	return (0);
304f8bc74e2SVincenzo Maffione }
305f8bc74e2SVincenzo Maffione 
306f8bc74e2SVincenzo Maffione static inline int
virtio_net_rx_csum_by_parse(struct mbuf * m,uint16_t eth_type,int ip_start,struct virtio_net_hdr * hdr)307f8bc74e2SVincenzo Maffione virtio_net_rx_csum_by_parse(struct mbuf *m, uint16_t eth_type, int ip_start,
308f8bc74e2SVincenzo Maffione 		       struct virtio_net_hdr *hdr)
309f8bc74e2SVincenzo Maffione {
310f8bc74e2SVincenzo Maffione 	int offset, proto;
311f8bc74e2SVincenzo Maffione 
312f8bc74e2SVincenzo Maffione 	switch (eth_type) {
313f8bc74e2SVincenzo Maffione #if defined(INET)
314f8bc74e2SVincenzo Maffione 	case ETHERTYPE_IP: {
315f8bc74e2SVincenzo Maffione 		struct ip *ip;
316f8bc74e2SVincenzo Maffione 		if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
317f8bc74e2SVincenzo Maffione 			return (1);
318f8bc74e2SVincenzo Maffione 		ip = (struct ip *)(m->m_data + ip_start);
319f8bc74e2SVincenzo Maffione 		proto = ip->ip_p;
320f8bc74e2SVincenzo Maffione 		offset = ip_start + (ip->ip_hl << 2);
321f8bc74e2SVincenzo Maffione 		break;
322f8bc74e2SVincenzo Maffione 	}
323f8bc74e2SVincenzo Maffione #endif
324f8bc74e2SVincenzo Maffione #if defined(INET6)
325f8bc74e2SVincenzo Maffione 	case ETHERTYPE_IPV6:
326f8bc74e2SVincenzo Maffione 		if (__predict_false(m->m_len < ip_start +
327f8bc74e2SVincenzo Maffione 		    sizeof(struct ip6_hdr)))
328f8bc74e2SVincenzo Maffione 			return (1);
329f8bc74e2SVincenzo Maffione 		offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
330f8bc74e2SVincenzo Maffione 		if (__predict_false(offset < 0))
331f8bc74e2SVincenzo Maffione 			return (1);
332f8bc74e2SVincenzo Maffione 		break;
333f8bc74e2SVincenzo Maffione #endif
334f8bc74e2SVincenzo Maffione 	default:
335f8bc74e2SVincenzo Maffione 		/* Here we should increment the rx_csum_bad_ethtype counter. */
336f8bc74e2SVincenzo Maffione 		return (1);
337f8bc74e2SVincenzo Maffione 	}
338f8bc74e2SVincenzo Maffione 
339f8bc74e2SVincenzo Maffione 	switch (proto) {
340f8bc74e2SVincenzo Maffione 	case IPPROTO_TCP:
341f8bc74e2SVincenzo Maffione 		if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
342f8bc74e2SVincenzo Maffione 			return (1);
343f8bc74e2SVincenzo Maffione 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
344f8bc74e2SVincenzo Maffione 		m->m_pkthdr.csum_data = 0xFFFF;
345f8bc74e2SVincenzo Maffione 		break;
346f8bc74e2SVincenzo Maffione 	case IPPROTO_UDP:
347f8bc74e2SVincenzo Maffione 		if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
348f8bc74e2SVincenzo Maffione 			return (1);
349f8bc74e2SVincenzo Maffione 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
350f8bc74e2SVincenzo Maffione 		m->m_pkthdr.csum_data = 0xFFFF;
351f8bc74e2SVincenzo Maffione 		break;
352f8bc74e2SVincenzo Maffione 	default:
353f8bc74e2SVincenzo Maffione 		/*
354f8bc74e2SVincenzo Maffione 		 * For the remaining protocols, FreeBSD does not support
355f8bc74e2SVincenzo Maffione 		 * checksum offloading, so the checksum will be recomputed.
356f8bc74e2SVincenzo Maffione 		 */
357f8bc74e2SVincenzo Maffione #if 0
358f8bc74e2SVincenzo Maffione 		if_printf(ifp, "cksum offload of unsupported "
359f8bc74e2SVincenzo Maffione 		    "protocol eth_type=%#x proto=%d csum_start=%d "
360f8bc74e2SVincenzo Maffione 		    "csum_offset=%d\n", __func__, eth_type, proto,
361f8bc74e2SVincenzo Maffione 		    hdr->csum_start, hdr->csum_offset);
362f8bc74e2SVincenzo Maffione #endif
363f8bc74e2SVincenzo Maffione 		break;
364f8bc74e2SVincenzo Maffione 	}
365f8bc74e2SVincenzo Maffione 
366f8bc74e2SVincenzo Maffione 	return (0);
367f8bc74e2SVincenzo Maffione }
368f8bc74e2SVincenzo Maffione 
369f8bc74e2SVincenzo Maffione /*
370f8bc74e2SVincenzo Maffione  * Set the appropriate CSUM_* flags. Unfortunately, the information
371f8bc74e2SVincenzo Maffione  * provided is not directly useful to us. The VirtIO header gives the
372f8bc74e2SVincenzo Maffione  * offset of the checksum, which is all Linux needs, but this is not
373f8bc74e2SVincenzo Maffione  * how FreeBSD does things. We are forced to peek inside the packet
374f8bc74e2SVincenzo Maffione  * a bit.
375f8bc74e2SVincenzo Maffione  *
376f8bc74e2SVincenzo Maffione  * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
377f8bc74e2SVincenzo Maffione  * could accept the offsets and let the stack figure it out.
378f8bc74e2SVincenzo Maffione  */
379f8bc74e2SVincenzo Maffione static inline int
virtio_net_rx_csum(struct mbuf * m,struct virtio_net_hdr * hdr)380f8bc74e2SVincenzo Maffione virtio_net_rx_csum(struct mbuf *m, struct virtio_net_hdr *hdr)
381f8bc74e2SVincenzo Maffione {
382f8bc74e2SVincenzo Maffione 	struct ether_header *eh;
383f8bc74e2SVincenzo Maffione 	struct ether_vlan_header *evh;
384f8bc74e2SVincenzo Maffione 	uint16_t eth_type;
385f8bc74e2SVincenzo Maffione 	int offset, error;
386f8bc74e2SVincenzo Maffione 
387f8bc74e2SVincenzo Maffione 	if ((hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
388f8bc74e2SVincenzo Maffione 	    VIRTIO_NET_HDR_F_DATA_VALID)) == 0) {
389f8bc74e2SVincenzo Maffione 		return (0);
390f8bc74e2SVincenzo Maffione 	}
391f8bc74e2SVincenzo Maffione 
392f8bc74e2SVincenzo Maffione 	eh = mtod(m, struct ether_header *);
393f8bc74e2SVincenzo Maffione 	eth_type = ntohs(eh->ether_type);
394f8bc74e2SVincenzo Maffione 	if (eth_type == ETHERTYPE_VLAN) {
395f8bc74e2SVincenzo Maffione 		/* BMV: We should handle nested VLAN tags too. */
396f8bc74e2SVincenzo Maffione 		evh = mtod(m, struct ether_vlan_header *);
397f8bc74e2SVincenzo Maffione 		eth_type = ntohs(evh->evl_proto);
398f8bc74e2SVincenzo Maffione 		offset = sizeof(struct ether_vlan_header);
399f8bc74e2SVincenzo Maffione 	} else
400f8bc74e2SVincenzo Maffione 		offset = sizeof(struct ether_header);
401f8bc74e2SVincenzo Maffione 
402f8bc74e2SVincenzo Maffione 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
403f8bc74e2SVincenzo Maffione 		error = virtio_net_rx_csum_by_offset(m, eth_type, offset, hdr);
404f8bc74e2SVincenzo Maffione 	else
405f8bc74e2SVincenzo Maffione 		error = virtio_net_rx_csum_by_parse(m, eth_type, offset, hdr);
406f8bc74e2SVincenzo Maffione 
407f8bc74e2SVincenzo Maffione 	return (error);
408f8bc74e2SVincenzo Maffione }
409f8bc74e2SVincenzo Maffione 
410f8bc74e2SVincenzo Maffione static inline int
virtio_net_tx_offload_ctx(struct mbuf * m,int * etype,int * proto,int * start)411f8bc74e2SVincenzo Maffione virtio_net_tx_offload_ctx(struct mbuf *m, int *etype, int *proto, int *start)
412f8bc74e2SVincenzo Maffione {
413f8bc74e2SVincenzo Maffione 	struct ether_vlan_header *evh;
41488646c4eSMateusz Guzik #if defined(INET) || defined(INET6)
415f8bc74e2SVincenzo Maffione 	int offset;
41688646c4eSMateusz Guzik #endif
417f8bc74e2SVincenzo Maffione 
418f8bc74e2SVincenzo Maffione 	evh = mtod(m, struct ether_vlan_header *);
419f8bc74e2SVincenzo Maffione 	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
420f8bc74e2SVincenzo Maffione 		/* BMV: We should handle nested VLAN tags too. */
421f8bc74e2SVincenzo Maffione 		*etype = ntohs(evh->evl_proto);
42288646c4eSMateusz Guzik #if defined(INET) || defined(INET6)
423f8bc74e2SVincenzo Maffione 		offset = sizeof(struct ether_vlan_header);
42488646c4eSMateusz Guzik #endif
425f8bc74e2SVincenzo Maffione 	} else {
426f8bc74e2SVincenzo Maffione 		*etype = ntohs(evh->evl_encap_proto);
42788646c4eSMateusz Guzik #if defined(INET) || defined(INET6)
428f8bc74e2SVincenzo Maffione 		offset = sizeof(struct ether_header);
42988646c4eSMateusz Guzik #endif
430f8bc74e2SVincenzo Maffione 	}
431f8bc74e2SVincenzo Maffione 
432f8bc74e2SVincenzo Maffione 	switch (*etype) {
433f8bc74e2SVincenzo Maffione #if defined(INET)
434f8bc74e2SVincenzo Maffione 	case ETHERTYPE_IP: {
435f8bc74e2SVincenzo Maffione 		struct ip *ip, iphdr;
436f8bc74e2SVincenzo Maffione 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
437f8bc74e2SVincenzo Maffione 			m_copydata(m, offset, sizeof(struct ip),
438f8bc74e2SVincenzo Maffione 			    (caddr_t) &iphdr);
439f8bc74e2SVincenzo Maffione 			ip = &iphdr;
440f8bc74e2SVincenzo Maffione 		} else
441f8bc74e2SVincenzo Maffione 			ip = (struct ip *)(m->m_data + offset);
442f8bc74e2SVincenzo Maffione 		*proto = ip->ip_p;
443f8bc74e2SVincenzo Maffione 		*start = offset + (ip->ip_hl << 2);
444f8bc74e2SVincenzo Maffione 		break;
445f8bc74e2SVincenzo Maffione 	}
446f8bc74e2SVincenzo Maffione #endif
447f8bc74e2SVincenzo Maffione #if defined(INET6)
448f8bc74e2SVincenzo Maffione 	case ETHERTYPE_IPV6:
449f8bc74e2SVincenzo Maffione 		*proto = -1;
450f8bc74e2SVincenzo Maffione 		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
451f8bc74e2SVincenzo Maffione 		/* Assert the network stack sent us a valid packet. */
452f8bc74e2SVincenzo Maffione 		KASSERT(*start > offset,
453f8bc74e2SVincenzo Maffione 		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
454f8bc74e2SVincenzo Maffione 		    *start, offset, *proto));
455f8bc74e2SVincenzo Maffione 		break;
456f8bc74e2SVincenzo Maffione #endif
457f8bc74e2SVincenzo Maffione 	default:
458f8bc74e2SVincenzo Maffione 		/* Here we should increment the tx_csum_bad_ethtype counter. */
459f8bc74e2SVincenzo Maffione 		return (EINVAL);
460f8bc74e2SVincenzo Maffione 	}
461f8bc74e2SVincenzo Maffione 
462f8bc74e2SVincenzo Maffione 	return (0);
463f8bc74e2SVincenzo Maffione }
464f8bc74e2SVincenzo Maffione 
465f8bc74e2SVincenzo Maffione static inline int
virtio_net_tx_offload_tso(if_t ifp,struct mbuf * m,int eth_type,int offset,bool allow_ecn,struct virtio_net_hdr * hdr)466f8bc74e2SVincenzo Maffione virtio_net_tx_offload_tso(if_t ifp, struct mbuf *m, int eth_type,
467f8bc74e2SVincenzo Maffione 		     int offset, bool allow_ecn, struct virtio_net_hdr *hdr)
468f8bc74e2SVincenzo Maffione {
469f8bc74e2SVincenzo Maffione 	static struct timeval lastecn;
470f8bc74e2SVincenzo Maffione 	static int curecn;
471f8bc74e2SVincenzo Maffione 	struct tcphdr *tcp, tcphdr;
472f8bc74e2SVincenzo Maffione 
473f8bc74e2SVincenzo Maffione 	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
474f8bc74e2SVincenzo Maffione 		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
475f8bc74e2SVincenzo Maffione 		tcp = &tcphdr;
476f8bc74e2SVincenzo Maffione 	} else
477f8bc74e2SVincenzo Maffione 		tcp = (struct tcphdr *)(m->m_data + offset);
478f8bc74e2SVincenzo Maffione 
479f8bc74e2SVincenzo Maffione 	hdr->hdr_len = offset + (tcp->th_off << 2);
480f8bc74e2SVincenzo Maffione 	hdr->gso_size = m->m_pkthdr.tso_segsz;
481f8bc74e2SVincenzo Maffione 	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
482f8bc74e2SVincenzo Maffione 	    VIRTIO_NET_HDR_GSO_TCPV6;
483f8bc74e2SVincenzo Maffione 
484f8bc74e2SVincenzo Maffione 	if (tcp->th_flags & TH_CWR) {
485f8bc74e2SVincenzo Maffione 		/*
486f8bc74e2SVincenzo Maffione 		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
487f8bc74e2SVincenzo Maffione 		 * ECN support is not on a per-interface basis, but globally via
488f8bc74e2SVincenzo Maffione 		 * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
489f8bc74e2SVincenzo Maffione 		 */
490f8bc74e2SVincenzo Maffione 		if (!allow_ecn) {
491f8bc74e2SVincenzo Maffione 			if (ppsratecheck(&lastecn, &curecn, 1))
492f8bc74e2SVincenzo Maffione 				if_printf(ifp,
493f8bc74e2SVincenzo Maffione 				    "TSO with ECN not negotiated with host\n");
494f8bc74e2SVincenzo Maffione 			return (ENOTSUP);
495f8bc74e2SVincenzo Maffione 		}
496f8bc74e2SVincenzo Maffione 		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
497f8bc74e2SVincenzo Maffione 	}
498f8bc74e2SVincenzo Maffione 
499f8bc74e2SVincenzo Maffione 	/* Here we should increment tx_tso counter. */
500f8bc74e2SVincenzo Maffione 
501f8bc74e2SVincenzo Maffione 	return (0);
502f8bc74e2SVincenzo Maffione }
503f8bc74e2SVincenzo Maffione 
504f8bc74e2SVincenzo Maffione static inline struct mbuf *
virtio_net_tx_offload(if_t ifp,struct mbuf * m,bool allow_ecn,struct virtio_net_hdr * hdr)505f8bc74e2SVincenzo Maffione virtio_net_tx_offload(if_t ifp, struct mbuf *m, bool allow_ecn,
506f8bc74e2SVincenzo Maffione 		 struct virtio_net_hdr *hdr)
507f8bc74e2SVincenzo Maffione {
508f8bc74e2SVincenzo Maffione 	int flags, etype, csum_start, proto, error;
509f8bc74e2SVincenzo Maffione 
510f8bc74e2SVincenzo Maffione 	flags = m->m_pkthdr.csum_flags;
511f8bc74e2SVincenzo Maffione 
512f8bc74e2SVincenzo Maffione 	error = virtio_net_tx_offload_ctx(m, &etype, &proto, &csum_start);
513f8bc74e2SVincenzo Maffione 	if (error)
514f8bc74e2SVincenzo Maffione 		goto drop;
515f8bc74e2SVincenzo Maffione 
516f8bc74e2SVincenzo Maffione 	if ((etype == ETHERTYPE_IP && (flags & (CSUM_TCP | CSUM_UDP))) ||
517f8bc74e2SVincenzo Maffione 	    (etype == ETHERTYPE_IPV6 &&
518f8bc74e2SVincenzo Maffione 	        (flags & (CSUM_TCP_IPV6 | CSUM_UDP_IPV6)))) {
519f8bc74e2SVincenzo Maffione 		/*
520f8bc74e2SVincenzo Maffione 		 * We could compare the IP protocol vs the CSUM_ flag too,
521f8bc74e2SVincenzo Maffione 		 * but that really should not be necessary.
522f8bc74e2SVincenzo Maffione 		 */
523f8bc74e2SVincenzo Maffione 		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
524f8bc74e2SVincenzo Maffione 		hdr->csum_start = csum_start;
525f8bc74e2SVincenzo Maffione 		hdr->csum_offset = m->m_pkthdr.csum_data;
526f8bc74e2SVincenzo Maffione 		/* Here we should increment the tx_csum counter. */
527f8bc74e2SVincenzo Maffione 	}
528f8bc74e2SVincenzo Maffione 
529f8bc74e2SVincenzo Maffione 	if (flags & CSUM_TSO) {
530f8bc74e2SVincenzo Maffione 		if (__predict_false(proto != IPPROTO_TCP)) {
531f8bc74e2SVincenzo Maffione 			/* Likely failed to correctly parse the mbuf.
532f8bc74e2SVincenzo Maffione 			 * Here we should increment the tx_tso_not_tcp
533f8bc74e2SVincenzo Maffione 			 * counter. */
534f8bc74e2SVincenzo Maffione 			goto drop;
535f8bc74e2SVincenzo Maffione 		}
536f8bc74e2SVincenzo Maffione 
537f8bc74e2SVincenzo Maffione 		KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
538f8bc74e2SVincenzo Maffione 		    ("%s: mbuf %p TSO without checksum offload %#x",
539f8bc74e2SVincenzo Maffione 		    __func__, m, flags));
540f8bc74e2SVincenzo Maffione 
541f8bc74e2SVincenzo Maffione 		error = virtio_net_tx_offload_tso(ifp, m, etype, csum_start,
542f8bc74e2SVincenzo Maffione 					     allow_ecn, hdr);
543f8bc74e2SVincenzo Maffione 		if (error)
544f8bc74e2SVincenzo Maffione 			goto drop;
545f8bc74e2SVincenzo Maffione 	}
546f8bc74e2SVincenzo Maffione 
547f8bc74e2SVincenzo Maffione 	return (m);
548f8bc74e2SVincenzo Maffione 
549f8bc74e2SVincenzo Maffione drop:
550f8bc74e2SVincenzo Maffione 	m_freem(m);
551f8bc74e2SVincenzo Maffione 	return (NULL);
552f8bc74e2SVincenzo Maffione }
553f8bc74e2SVincenzo Maffione 
55410b59a9bSPeter Grehan #endif /* _VIRTIO_NET_H */
555