xref: /freebsd/sys/dev/netmap/netmap_offloadings.c (revision 4e8d558c)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (C) 2014-2015 Vincenzo Maffione
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *   2. Redistributions in binary form must reproduce the above copyright
13  *      notice, this list of conditions and the following disclaimer in the
14  *      documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /* $FreeBSD$ */
30 
31 #if defined(__FreeBSD__)
32 #include <sys/cdefs.h> /* prerequisite */
33 
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/param.h>	/* defines used in kernel.h */
37 #include <sys/kernel.h>	/* types used in module initialization */
38 #include <sys/sockio.h>
39 #include <sys/malloc.h>
40 #include <sys/socketvar.h>	/* struct socket */
41 #include <sys/socket.h> /* sockaddrs */
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <machine/bus.h>	/* bus_dmamap_* */
45 #include <sys/endian.h>
46 
47 #elif defined(linux)
48 
49 #include "bsd_glue.h"
50 
51 #elif defined(__APPLE__)
52 
53 #warning OSX support is only partial
54 #include "osx_glue.h"
55 
56 #else
57 
58 #error	Unsupported platform
59 
60 #endif /* unsupported */
61 
62 #include <net/netmap.h>
63 #include <dev/netmap/netmap_kern.h>
64 
65 
66 
67 /* This routine is called by bdg_mismatch_datapath() when it finishes
68  * accumulating bytes for a segment, in order to fix some fields in the
69  * segment headers (which still contain the same content as the header
70  * of the original GSO packet). 'pkt' points to the beginning of the IP
71  * header of the segment, while 'len' is the length of the IP packet.
72  */
73 static void
74 gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
75 		u_int idx, u_int segmented_bytes, u_int last_segment)
76 {
77 	struct nm_iphdr *iph = (struct nm_iphdr *)(pkt);
78 	struct nm_ipv6hdr *ip6h = (struct nm_ipv6hdr *)(pkt);
79 	uint16_t *check = NULL;
80 	uint8_t *check_data = NULL;
81 
82 	if (ipv4) {
83 		/* Set the IPv4 "Total Length" field. */
84 		iph->tot_len = htobe16(len);
85 		nm_prdis("ip total length %u", be16toh(ip->tot_len));
86 
87 		/* Set the IPv4 "Identification" field. */
88 		iph->id = htobe16(be16toh(iph->id) + idx);
89 		nm_prdis("ip identification %u", be16toh(iph->id));
90 
91 		/* Compute and insert the IPv4 header checksum. */
92 		iph->check = 0;
93 		iph->check = nm_os_csum_ipv4(iph);
94 		nm_prdis("IP csum %x", be16toh(iph->check));
95 	} else {
96 		/* Set the IPv6 "Payload Len" field. */
97 		ip6h->payload_len = htobe16(len-iphlen);
98 	}
99 
100 	if (tcp) {
101 		struct nm_tcphdr *tcph = (struct nm_tcphdr *)(pkt + iphlen);
102 
103 		/* Set the TCP sequence number. */
104 		tcph->seq = htobe32(be32toh(tcph->seq) + segmented_bytes);
105 		nm_prdis("tcp seq %u", be32toh(tcph->seq));
106 
107 		/* Zero the PSH and FIN TCP flags if this is not the last
108 		   segment. */
109 		if (!last_segment)
110 			tcph->flags &= ~(0x8 | 0x1);
111 		nm_prdis("last_segment %u", last_segment);
112 
113 		check = &tcph->check;
114 		check_data = (uint8_t *)tcph;
115 	} else { /* UDP */
116 		struct nm_udphdr *udph = (struct nm_udphdr *)(pkt + iphlen);
117 
118 		/* Set the UDP 'Length' field. */
119 		udph->len = htobe16(len-iphlen);
120 
121 		check = &udph->check;
122 		check_data = (uint8_t *)udph;
123 	}
124 
125 	/* Compute and insert TCP/UDP checksum. */
126 	*check = 0;
127 	if (ipv4)
128 		nm_os_csum_tcpudp_ipv4(iph, check_data, len-iphlen, check);
129 	else
130 		nm_os_csum_tcpudp_ipv6(ip6h, check_data, len-iphlen, check);
131 
132 	nm_prdis("TCP/UDP csum %x", be16toh(*check));
133 }
134 
135 static inline int
136 vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
137 {
138 	uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
139 
140 	return (
141 		(gso_type != VIRTIO_NET_HDR_GSO_NONE &&
142 		 gso_type != VIRTIO_NET_HDR_GSO_TCPV4 &&
143 		 gso_type != VIRTIO_NET_HDR_GSO_UDP &&
144 		 gso_type != VIRTIO_NET_HDR_GSO_TCPV6)
145 		||
146 		 (vh->flags & ~(VIRTIO_NET_HDR_F_NEEDS_CSUM
147 			       | VIRTIO_NET_HDR_F_DATA_VALID))
148 	       );
149 }
150 
151 /* The VALE mismatch datapath implementation. */
152 void
153 bdg_mismatch_datapath(struct netmap_vp_adapter *na,
154 		      struct netmap_vp_adapter *dst_na,
155 		      const struct nm_bdg_fwd *ft_p,
156 		      struct netmap_ring *dst_ring,
157 		      u_int *j, u_int lim, u_int *howmany)
158 {
159 	struct netmap_slot *dst_slot = NULL;
160 	struct nm_vnet_hdr *vh = NULL;
161 	const struct nm_bdg_fwd *ft_end = ft_p + ft_p->ft_frags;
162 
163 	/* Source and destination pointers. */
164 	uint8_t *dst, *src;
165 	size_t src_len, dst_len;
166 
167 	/* Indices and counters for the destination ring. */
168 	u_int j_start = *j;
169 	u_int j_cur = j_start;
170 	u_int dst_slots = 0;
171 
172 	if (unlikely(ft_p == ft_end)) {
173 		nm_prlim(1, "No source slots to process");
174 		return;
175 	}
176 
177 	/* Init source and dest pointers. */
178 	src = ft_p->ft_buf;
179 	src_len = ft_p->ft_len;
180 	dst_slot = &dst_ring->slot[j_cur];
181 	dst = NMB(&dst_na->up, dst_slot);
182 	dst_len = src_len;
183 
184 	/* If the source port uses the offloadings, while destination doesn't,
185 	 * we grab the source virtio-net header and do the offloadings here.
186 	 */
187 	if (na->up.virt_hdr_len && !dst_na->up.virt_hdr_len) {
188 		vh = (struct nm_vnet_hdr *)src;
189 		/* Initial sanity check on the source virtio-net header. If
190 		 * something seems wrong, just drop the packet. */
191 		if (src_len < na->up.virt_hdr_len) {
192 			nm_prlim(1, "Short src vnet header, dropping");
193 			return;
194 		}
195 		if (unlikely(vnet_hdr_is_bad(vh))) {
196 			nm_prlim(1, "Bad src vnet header, dropping");
197 			return;
198 		}
199 	}
200 
201 	/* We are processing the first input slot and there is a mismatch
202 	 * between source and destination virt_hdr_len (SHL and DHL).
203 	 * When the a client is using virtio-net headers, the header length
204 	 * can be:
205 	 *    - 10: the header corresponds to the struct nm_vnet_hdr
206 	 *    - 12: the first 10 bytes correspond to the struct
207 	 *          virtio_net_hdr, and the last 2 bytes store the
208 	 *          "mergeable buffers" info, which is an optional
209 	 *	    hint that can be zeroed for compatibility
210 	 *
211 	 * The destination header is therefore built according to the
212 	 * following table:
213 	 *
214 	 * SHL | DHL | destination header
215 	 * -----------------------------
216 	 *   0 |  10 | zero
217 	 *   0 |  12 | zero
218 	 *  10 |   0 | doesn't exist
219 	 *  10 |  12 | first 10 bytes are copied from source header, last 2 are zero
220 	 *  12 |   0 | doesn't exist
221 	 *  12 |  10 | copied from the first 10 bytes of source header
222 	 */
223 	bzero(dst, dst_na->up.virt_hdr_len);
224 	if (na->up.virt_hdr_len && dst_na->up.virt_hdr_len)
225 		memcpy(dst, src, sizeof(struct nm_vnet_hdr));
226 	/* Skip the virtio-net headers. */
227 	src += na->up.virt_hdr_len;
228 	src_len -= na->up.virt_hdr_len;
229 	dst += dst_na->up.virt_hdr_len;
230 	dst_len = dst_na->up.virt_hdr_len + src_len;
231 
232 	/* Here it could be dst_len == 0 (which implies src_len == 0),
233 	 * so we avoid passing a zero length fragment.
234 	 */
235 	if (dst_len == 0) {
236 		ft_p++;
237 		src = ft_p->ft_buf;
238 		src_len = ft_p->ft_len;
239 		dst_len = src_len;
240 	}
241 
242 	if (vh && vh->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
243 		u_int gso_bytes = 0;
244 		/* Length of the GSO packet header. */
245 		u_int gso_hdr_len = 0;
246 		/* Pointer to the GSO packet header. Assume it is in a single fragment. */
247 		uint8_t *gso_hdr = NULL;
248 		/* Index of the current segment. */
249 		u_int gso_idx = 0;
250 		/* Payload data bytes segmented so far (e.g. TCP data bytes). */
251 		u_int segmented_bytes = 0;
252 		/* Is this an IPv4 or IPv6 GSO packet? */
253 		u_int ipv4 = 0;
254 		/* Length of the IP header (20 if IPv4, 40 if IPv6). */
255 		u_int iphlen = 0;
256 		/* Length of the Ethernet header (18 if 802.1q, otherwise 14). */
257 		u_int ethhlen = 14;
258 		/* Is this a TCP or an UDP GSO packet? */
259 		u_int tcp = ((vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN)
260 				== VIRTIO_NET_HDR_GSO_UDP) ? 0 : 1;
261 
262 		/* Segment the GSO packet contained into the input slots (frags). */
263 		for (;;) {
264 			size_t copy;
265 
266 			if (dst_slots >= *howmany) {
267 				/* We still have work to do, but we've run out of
268 				 * dst slots, so we have to drop the packet. */
269 				nm_prdis(1, "Not enough slots, dropping GSO packet");
270 				return;
271 			}
272 
273 			/* Grab the GSO header if we don't have it. */
274 			if (!gso_hdr) {
275 				uint16_t ethertype;
276 
277 				gso_hdr = src;
278 
279 				/* Look at the 'Ethertype' field to see if this packet
280 				 * is IPv4 or IPv6, taking into account VLAN
281 				 * encapsulation. */
282 				for (;;) {
283 					if (src_len < ethhlen) {
284 						nm_prlim(1, "Short GSO fragment [eth], dropping");
285 						return;
286 					}
287 					ethertype = be16toh(*((uint16_t *)
288 							    (gso_hdr + ethhlen - 2)));
289 					if (ethertype != 0x8100) /* not 802.1q */
290 						break;
291 					ethhlen += 4;
292 				}
293 				switch (ethertype) {
294 					case 0x0800:  /* IPv4 */
295 					{
296 						struct nm_iphdr *iph = (struct nm_iphdr *)
297 									(gso_hdr + ethhlen);
298 
299 						if (src_len < ethhlen + 20) {
300 							nm_prlim(1, "Short GSO fragment "
301 							      "[IPv4], dropping");
302 							return;
303 						}
304 						ipv4 = 1;
305 						iphlen = 4 * (iph->version_ihl & 0x0F);
306 						break;
307 					}
308 					case 0x86DD:  /* IPv6 */
309 						ipv4 = 0;
310 						iphlen = 40;
311 						break;
312 					default:
313 						nm_prlim(1, "Unsupported ethertype, "
314 						      "dropping GSO packet");
315 						return;
316 				}
317 				nm_prdis(3, "type=%04x", ethertype);
318 
319 				if (src_len < ethhlen + iphlen) {
320 					nm_prlim(1, "Short GSO fragment [IP], dropping");
321 					return;
322 				}
323 
324 				/* Compute gso_hdr_len. For TCP we need to read the
325 				 * content of the 'Data Offset' field.
326 				 */
327 				if (tcp) {
328 					struct nm_tcphdr *tcph = (struct nm_tcphdr *)
329 								(gso_hdr + ethhlen + iphlen);
330 
331 					if (src_len < ethhlen + iphlen + 20) {
332 						nm_prlim(1, "Short GSO fragment "
333 								"[TCP], dropping");
334 						return;
335 					}
336 					gso_hdr_len = ethhlen + iphlen +
337 						      4 * (tcph->doff >> 4);
338 				} else {
339 					gso_hdr_len = ethhlen + iphlen + 8; /* UDP */
340 				}
341 
342 				if (src_len < gso_hdr_len) {
343 					nm_prlim(1, "Short GSO fragment [TCP/UDP], dropping");
344 					return;
345 				}
346 
347 				nm_prdis(3, "gso_hdr_len %u gso_mtu %d", gso_hdr_len,
348 								   dst_na->mfs);
349 
350 				/* Advance source pointers. */
351 				src += gso_hdr_len;
352 				src_len -= gso_hdr_len;
353 				if (src_len == 0) {
354 					ft_p++;
355 					if (ft_p == ft_end)
356 						break;
357 					src = ft_p->ft_buf;
358 					src_len = ft_p->ft_len;
359 				}
360 			}
361 
362 			/* Fill in the header of the current segment. */
363 			if (gso_bytes == 0) {
364 				memcpy(dst, gso_hdr, gso_hdr_len);
365 				gso_bytes = gso_hdr_len;
366 			}
367 
368 			/* Fill in data and update source and dest pointers. */
369 			copy = src_len;
370 			if (gso_bytes + copy > dst_na->mfs)
371 				copy = dst_na->mfs - gso_bytes;
372 			memcpy(dst + gso_bytes, src, copy);
373 			gso_bytes += copy;
374 			src += copy;
375 			src_len -= copy;
376 
377 			/* A segment is complete or we have processed all the
378 			   the GSO payload bytes. */
379 			if (gso_bytes >= dst_na->mfs ||
380 				(src_len == 0 && ft_p + 1 == ft_end)) {
381 				/* After raw segmentation, we must fix some header
382 				 * fields and compute checksums, in a protocol dependent
383 				 * way. */
384 				gso_fix_segment(dst + ethhlen, gso_bytes - ethhlen,
385 						ipv4, iphlen, tcp,
386 						gso_idx, segmented_bytes,
387 						src_len == 0 && ft_p + 1 == ft_end);
388 
389 				nm_prdis("frame %u completed with %d bytes", gso_idx, (int)gso_bytes);
390 				dst_slot->len = gso_bytes;
391 				dst_slot->flags = 0;
392 				dst_slots++;
393 				segmented_bytes += gso_bytes - gso_hdr_len;
394 
395 				gso_bytes = 0;
396 				gso_idx++;
397 
398 				/* Next destination slot. */
399 				j_cur = nm_next(j_cur, lim);
400 				dst_slot = &dst_ring->slot[j_cur];
401 				dst = NMB(&dst_na->up, dst_slot);
402 			}
403 
404 			/* Next input slot. */
405 			if (src_len == 0) {
406 				ft_p++;
407 				if (ft_p == ft_end)
408 					break;
409 				src = ft_p->ft_buf;
410 				src_len = ft_p->ft_len;
411 			}
412 		}
413 		nm_prdis(3, "%d bytes segmented", segmented_bytes);
414 
415 	} else {
416 		/* Address of a checksum field into a destination slot. */
417 		uint16_t *check = NULL;
418 		/* Accumulator for an unfolded checksum. */
419 		rawsum_t csum = 0;
420 
421 		/* Process a non-GSO packet. */
422 
423 		/* Init 'check' if necessary. */
424 		if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
425 			if (unlikely(vh->csum_offset + vh->csum_start > src_len))
426 				nm_prerr("invalid checksum request");
427 			else
428 				check = (uint16_t *)(dst + vh->csum_start +
429 						vh->csum_offset);
430 		}
431 
432 		while (ft_p != ft_end) {
433 			/* Init/update the packet checksum if needed. */
434 			if (vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
435 				if (!dst_slots)
436 					csum = nm_os_csum_raw(src + vh->csum_start,
437 								src_len - vh->csum_start, 0);
438 				else
439 					csum = nm_os_csum_raw(src, src_len, csum);
440 			}
441 
442 			/* Round to a multiple of 64 */
443 			src_len = (src_len + 63) & ~63;
444 
445 			if (ft_p->ft_flags & NS_INDIRECT) {
446 				if (copyin(src, dst, src_len)) {
447 					/* Invalid user pointer, pretend len is 0. */
448 					dst_len = 0;
449 				}
450 			} else {
451 				memcpy(dst, src, (int)src_len);
452 			}
453 			dst_slot->len = dst_len;
454 			dst_slots++;
455 
456 			/* Next destination slot. */
457 			j_cur = nm_next(j_cur, lim);
458 			dst_slot = &dst_ring->slot[j_cur];
459 			dst = NMB(&dst_na->up, dst_slot);
460 
461 			/* Next source slot. */
462 			ft_p++;
463 			src = ft_p->ft_buf;
464 			dst_len = src_len = ft_p->ft_len;
465 		}
466 
467 		/* Finalize (fold) the checksum if needed. */
468 		if (check && vh && (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
469 			*check = nm_os_csum_fold(csum);
470 		}
471 		nm_prdis(3, "using %u dst_slots", dst_slots);
472 
473 		/* A second pass on the destination slots to set the slot flags,
474 		 * using the right number of destination slots.
475 		 */
476 		while (j_start != j_cur) {
477 			dst_slot = &dst_ring->slot[j_start];
478 			dst_slot->flags = (dst_slots << 8)| NS_MOREFRAG;
479 			j_start = nm_next(j_start, lim);
480 		}
481 		/* Clear NS_MOREFRAG flag on last entry. */
482 		dst_slot->flags = (dst_slots << 8);
483 	}
484 
485 	/* Update howmany and j. This is to commit the use of
486 	 * those slots in the destination ring. */
487 	if (unlikely(dst_slots > *howmany)) {
488 		nm_prerr("bug: slot allocation error");
489 	}
490 	*j = j_cur;
491 	*howmany -= dst_slots;
492 }
493