xref: /freebsd/sys/dev/hyperv/netvsc/hn_rndis.c (revision 0957b409)
1 /*-
2  * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
3  * Copyright (c) 2010-2012 Citrix Inc.
4  * Copyright (c) 2012 NetApp Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_inet6.h"
33 #include "opt_inet.h"
34 
35 #include <sys/param.h>
36 #include <sys/socket.h>
37 #include <sys/systm.h>
38 #include <sys/taskqueue.h>
39 
40 #include <machine/atomic.h>
41 
42 #include <net/ethernet.h>
43 #include <net/if.h>
44 #include <net/if_var.h>
45 #include <net/if_media.h>
46 #include <net/rndis.h>
47 
48 #include <netinet/in.h>
49 #include <netinet/ip.h>
50 #include <netinet/tcp_lro.h>
51 
52 #include <dev/hyperv/include/hyperv.h>
53 #include <dev/hyperv/include/hyperv_busdma.h>
54 #include <dev/hyperv/include/vmbus.h>
55 #include <dev/hyperv/include/vmbus_xact.h>
56 
57 #include <dev/hyperv/netvsc/ndis.h>
58 #include <dev/hyperv/netvsc/if_hnreg.h>
59 #include <dev/hyperv/netvsc/if_hnvar.h>
60 #include <dev/hyperv/netvsc/hn_nvs.h>
61 #include <dev/hyperv/netvsc/hn_rndis.h>
62 
63 #define HN_RNDIS_RID_COMPAT_MASK	0xffff
64 #define HN_RNDIS_RID_COMPAT_MAX		HN_RNDIS_RID_COMPAT_MASK
65 
66 #define HN_RNDIS_XFER_SIZE		2048
67 
68 #define HN_NDIS_TXCSUM_CAP_IP4		\
69 	(NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT)
70 #define HN_NDIS_TXCSUM_CAP_TCP4		\
71 	(NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT)
72 #define HN_NDIS_TXCSUM_CAP_TCP6		\
73 	(NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \
74 	 NDIS_TXCSUM_CAP_IP6EXT)
75 #define HN_NDIS_TXCSUM_CAP_UDP6		\
76 	(NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT)
77 #define HN_NDIS_LSOV2_CAP_IP6		\
78 	(NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT)
79 
80 static const void	*hn_rndis_xact_exec1(struct hn_softc *,
81 			    struct vmbus_xact *, size_t,
82 			    struct hn_nvs_sendctx *, size_t *);
83 static const void	*hn_rndis_xact_execute(struct hn_softc *,
84 			    struct vmbus_xact *, uint32_t, size_t, size_t *,
85 			    uint32_t);
86 static int		hn_rndis_query(struct hn_softc *, uint32_t,
87 			    const void *, size_t, void *, size_t *);
88 static int		hn_rndis_query2(struct hn_softc *, uint32_t,
89 			    const void *, size_t, void *, size_t *, size_t);
90 static int		hn_rndis_set(struct hn_softc *, uint32_t,
91 			    const void *, size_t);
92 static int		hn_rndis_init(struct hn_softc *);
93 static int		hn_rndis_halt(struct hn_softc *);
94 static int		hn_rndis_conf_offload(struct hn_softc *, int);
95 static int		hn_rndis_query_hwcaps(struct hn_softc *,
96 			    struct ndis_offload *);
97 
98 static __inline uint32_t
99 hn_rndis_rid(struct hn_softc *sc)
100 {
101 	uint32_t rid;
102 
103 again:
104 	rid = atomic_fetchadd_int(&sc->hn_rndis_rid, 1);
105 	if (rid == 0)
106 		goto again;
107 
108 	/* Use upper 16 bits for non-compat RNDIS messages. */
109 	return ((rid & 0xffff) << 16);
110 }
111 
112 void
113 hn_rndis_rx_ctrl(struct hn_softc *sc, const void *data, int dlen)
114 {
115 	const struct rndis_comp_hdr *comp;
116 	const struct rndis_msghdr *hdr;
117 
118 	KASSERT(dlen >= sizeof(*hdr), ("invalid RNDIS msg\n"));
119 	hdr = data;
120 
121 	switch (hdr->rm_type) {
122 	case REMOTE_NDIS_INITIALIZE_CMPLT:
123 	case REMOTE_NDIS_QUERY_CMPLT:
124 	case REMOTE_NDIS_SET_CMPLT:
125 	case REMOTE_NDIS_KEEPALIVE_CMPLT:	/* unused */
126 		if (dlen < sizeof(*comp)) {
127 			if_printf(sc->hn_ifp, "invalid RNDIS cmplt\n");
128 			return;
129 		}
130 		comp = data;
131 
132 		KASSERT(comp->rm_rid > HN_RNDIS_RID_COMPAT_MAX,
133 		    ("invalid RNDIS rid 0x%08x\n", comp->rm_rid));
134 		vmbus_xact_ctx_wakeup(sc->hn_xact, comp, dlen);
135 		break;
136 
137 	case REMOTE_NDIS_RESET_CMPLT:
138 		/*
139 		 * Reset completed, no rid.
140 		 *
141 		 * NOTE:
142 		 * RESET is not issued by hn(4), so this message should
143 		 * _not_ be observed.
144 		 */
145 		if_printf(sc->hn_ifp, "RESET cmplt received\n");
146 		break;
147 
148 	default:
149 		if_printf(sc->hn_ifp, "unknown RNDIS msg 0x%x\n",
150 		    hdr->rm_type);
151 		break;
152 	}
153 }
154 
155 int
156 hn_rndis_get_eaddr(struct hn_softc *sc, uint8_t *eaddr)
157 {
158 	size_t eaddr_len;
159 	int error;
160 
161 	eaddr_len = ETHER_ADDR_LEN;
162 	error = hn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, NULL, 0,
163 	    eaddr, &eaddr_len);
164 	if (error)
165 		return (error);
166 	if (eaddr_len != ETHER_ADDR_LEN) {
167 		if_printf(sc->hn_ifp, "invalid eaddr len %zu\n", eaddr_len);
168 		return (EINVAL);
169 	}
170 	return (0);
171 }
172 
173 int
174 hn_rndis_get_linkstatus(struct hn_softc *sc, uint32_t *link_status)
175 {
176 	size_t size;
177 	int error;
178 
179 	size = sizeof(*link_status);
180 	error = hn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0,
181 	    link_status, &size);
182 	if (error)
183 		return (error);
184 	if (size != sizeof(uint32_t)) {
185 		if_printf(sc->hn_ifp, "invalid link status len %zu\n", size);
186 		return (EINVAL);
187 	}
188 	return (0);
189 }
190 
191 int
192 hn_rndis_get_mtu(struct hn_softc *sc, uint32_t *mtu)
193 {
194 	size_t size;
195 	int error;
196 
197 	size = sizeof(*mtu);
198 	error = hn_rndis_query(sc, OID_GEN_MAXIMUM_FRAME_SIZE, NULL, 0,
199 	    mtu, &size);
200 	if (error)
201 		return (error);
202 	if (size != sizeof(uint32_t)) {
203 		if_printf(sc->hn_ifp, "invalid mtu len %zu\n", size);
204 		return (EINVAL);
205 	}
206 	return (0);
207 }
208 
209 static const void *
210 hn_rndis_xact_exec1(struct hn_softc *sc, struct vmbus_xact *xact, size_t reqlen,
211     struct hn_nvs_sendctx *sndc, size_t *comp_len)
212 {
213 	struct vmbus_gpa gpa[HN_XACT_REQ_PGCNT];
214 	int gpa_cnt, error;
215 	bus_addr_t paddr;
216 
217 	KASSERT(reqlen <= HN_XACT_REQ_SIZE && reqlen > 0,
218 	    ("invalid request length %zu", reqlen));
219 
220 	/*
221 	 * Setup the SG list.
222 	 */
223 	paddr = vmbus_xact_req_paddr(xact);
224 	KASSERT((paddr & PAGE_MASK) == 0,
225 	    ("vmbus xact request is not page aligned 0x%jx", (uintmax_t)paddr));
226 	for (gpa_cnt = 0; gpa_cnt < HN_XACT_REQ_PGCNT; ++gpa_cnt) {
227 		int len = PAGE_SIZE;
228 
229 		if (reqlen == 0)
230 			break;
231 		if (reqlen < len)
232 			len = reqlen;
233 
234 		gpa[gpa_cnt].gpa_page = atop(paddr) + gpa_cnt;
235 		gpa[gpa_cnt].gpa_len = len;
236 		gpa[gpa_cnt].gpa_ofs = 0;
237 
238 		reqlen -= len;
239 	}
240 	KASSERT(reqlen == 0, ("still have %zu request data left", reqlen));
241 
242 	/*
243 	 * Send this RNDIS control message and wait for its completion
244 	 * message.
245 	 */
246 	vmbus_xact_activate(xact);
247 	error = hn_nvs_send_rndis_ctrl(sc->hn_prichan, sndc, gpa, gpa_cnt);
248 	if (error) {
249 		vmbus_xact_deactivate(xact);
250 		if_printf(sc->hn_ifp, "RNDIS ctrl send failed: %d\n", error);
251 		return (NULL);
252 	}
253 	return (vmbus_chan_xact_wait(sc->hn_prichan, xact, comp_len,
254 	    HN_CAN_SLEEP(sc)));
255 }
256 
257 static const void *
258 hn_rndis_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact, uint32_t rid,
259     size_t reqlen, size_t *comp_len0, uint32_t comp_type)
260 {
261 	const struct rndis_comp_hdr *comp;
262 	size_t comp_len, min_complen = *comp_len0;
263 
264 	KASSERT(rid > HN_RNDIS_RID_COMPAT_MAX, ("invalid rid %u\n", rid));
265 	KASSERT(min_complen >= sizeof(*comp),
266 	    ("invalid minimum complete len %zu", min_complen));
267 
268 	/*
269 	 * Execute the xact setup by the caller.
270 	 */
271 	comp = hn_rndis_xact_exec1(sc, xact, reqlen, &hn_nvs_sendctx_none,
272 	    &comp_len);
273 	if (comp == NULL)
274 		return (NULL);
275 
276 	/*
277 	 * Check this RNDIS complete message.
278 	 */
279 	if (comp_len < min_complen) {
280 		if (comp_len >= sizeof(*comp)) {
281 			/* rm_status field is valid */
282 			if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu, "
283 			    "status 0x%08x\n", comp_len, comp->rm_status);
284 		} else {
285 			if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu\n",
286 			    comp_len);
287 		}
288 		return (NULL);
289 	}
290 	if (comp->rm_len < min_complen) {
291 		if_printf(sc->hn_ifp, "invalid RNDIS comp msglen %u\n",
292 		    comp->rm_len);
293 		return (NULL);
294 	}
295 	if (comp->rm_type != comp_type) {
296 		if_printf(sc->hn_ifp, "unexpected RNDIS comp 0x%08x, "
297 		    "expect 0x%08x\n", comp->rm_type, comp_type);
298 		return (NULL);
299 	}
300 	if (comp->rm_rid != rid) {
301 		if_printf(sc->hn_ifp, "RNDIS comp rid mismatch %u, "
302 		    "expect %u\n", comp->rm_rid, rid);
303 		return (NULL);
304 	}
305 	/* All pass! */
306 	*comp_len0 = comp_len;
307 	return (comp);
308 }
309 
310 static int
311 hn_rndis_query(struct hn_softc *sc, uint32_t oid,
312     const void *idata, size_t idlen, void *odata, size_t *odlen0)
313 {
314 
315 	return (hn_rndis_query2(sc, oid, idata, idlen, odata, odlen0, *odlen0));
316 }
317 
318 static int
319 hn_rndis_query2(struct hn_softc *sc, uint32_t oid,
320     const void *idata, size_t idlen, void *odata, size_t *odlen0,
321     size_t min_odlen)
322 {
323 	struct rndis_query_req *req;
324 	const struct rndis_query_comp *comp;
325 	struct vmbus_xact *xact;
326 	size_t reqlen, odlen = *odlen0, comp_len;
327 	int error, ofs;
328 	uint32_t rid;
329 
330 	reqlen = sizeof(*req) + idlen;
331 	xact = vmbus_xact_get(sc->hn_xact, reqlen);
332 	if (xact == NULL) {
333 		if_printf(sc->hn_ifp, "no xact for RNDIS query 0x%08x\n", oid);
334 		return (ENXIO);
335 	}
336 	rid = hn_rndis_rid(sc);
337 	req = vmbus_xact_req_data(xact);
338 	req->rm_type = REMOTE_NDIS_QUERY_MSG;
339 	req->rm_len = reqlen;
340 	req->rm_rid = rid;
341 	req->rm_oid = oid;
342 	/*
343 	 * XXX
344 	 * This is _not_ RNDIS Spec conforming:
345 	 * "This MUST be set to 0 when there is no input data
346 	 *  associated with the OID."
347 	 *
348 	 * If this field was set to 0 according to the RNDIS Spec,
349 	 * Hyper-V would set non-SUCCESS status in the query
350 	 * completion.
351 	 */
352 	req->rm_infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET;
353 
354 	if (idlen > 0) {
355 		req->rm_infobuflen = idlen;
356 		/* Input data immediately follows RNDIS query. */
357 		memcpy(req + 1, idata, idlen);
358 	}
359 
360 	comp_len = sizeof(*comp) + min_odlen;
361 	comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len,
362 	    REMOTE_NDIS_QUERY_CMPLT);
363 	if (comp == NULL) {
364 		if_printf(sc->hn_ifp, "exec RNDIS query 0x%08x failed\n", oid);
365 		error = EIO;
366 		goto done;
367 	}
368 
369 	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
370 		if_printf(sc->hn_ifp, "RNDIS query 0x%08x failed: "
371 		    "status 0x%08x\n", oid, comp->rm_status);
372 		error = EIO;
373 		goto done;
374 	}
375 	if (comp->rm_infobuflen == 0 || comp->rm_infobufoffset == 0) {
376 		/* No output data! */
377 		if_printf(sc->hn_ifp, "RNDIS query 0x%08x, no data\n", oid);
378 		*odlen0 = 0;
379 		error = 0;
380 		goto done;
381 	}
382 
383 	/*
384 	 * Check output data length and offset.
385 	 */
386 	/* ofs is the offset from the beginning of comp. */
387 	ofs = RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(comp->rm_infobufoffset);
388 	if (ofs < sizeof(*comp) || ofs + comp->rm_infobuflen > comp_len) {
389 		if_printf(sc->hn_ifp, "RNDIS query invalid comp ib off/len, "
390 		    "%u/%u\n", comp->rm_infobufoffset, comp->rm_infobuflen);
391 		error = EINVAL;
392 		goto done;
393 	}
394 
395 	/*
396 	 * Save output data.
397 	 */
398 	if (comp->rm_infobuflen < odlen)
399 		odlen = comp->rm_infobuflen;
400 	memcpy(odata, ((const uint8_t *)comp) + ofs, odlen);
401 	*odlen0 = odlen;
402 
403 	error = 0;
404 done:
405 	vmbus_xact_put(xact);
406 	return (error);
407 }
408 
409 int
410 hn_rndis_query_rsscaps(struct hn_softc *sc, int *rxr_cnt0)
411 {
412 	struct ndis_rss_caps in, caps;
413 	size_t caps_len;
414 	int error, indsz, rxr_cnt, hash_fnidx;
415 	uint32_t hash_func = 0, hash_types = 0;
416 
417 	*rxr_cnt0 = 0;
418 
419 	if (sc->hn_ndis_ver < HN_NDIS_VERSION_6_20)
420 		return (EOPNOTSUPP);
421 
422 	memset(&in, 0, sizeof(in));
423 	in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS;
424 	in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2;
425 	in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE;
426 
427 	caps_len = NDIS_RSS_CAPS_SIZE;
428 	error = hn_rndis_query2(sc, OID_GEN_RECEIVE_SCALE_CAPABILITIES,
429 	    &in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len, NDIS_RSS_CAPS_SIZE_6_0);
430 	if (error)
431 		return (error);
432 
433 	/*
434 	 * Preliminary verification.
435 	 */
436 	if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) {
437 		if_printf(sc->hn_ifp, "invalid NDIS objtype 0x%02x\n",
438 		    caps.ndis_hdr.ndis_type);
439 		return (EINVAL);
440 	}
441 	if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) {
442 		if_printf(sc->hn_ifp, "invalid NDIS objrev 0x%02x\n",
443 		    caps.ndis_hdr.ndis_rev);
444 		return (EINVAL);
445 	}
446 	if (caps.ndis_hdr.ndis_size > caps_len) {
447 		if_printf(sc->hn_ifp, "invalid NDIS objsize %u, "
448 		    "data size %zu\n", caps.ndis_hdr.ndis_size, caps_len);
449 		return (EINVAL);
450 	} else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) {
451 		if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n",
452 		    caps.ndis_hdr.ndis_size);
453 		return (EINVAL);
454 	}
455 
456 	/*
457 	 * Save information for later RSS configuration.
458 	 */
459 	if (caps.ndis_nrxr == 0) {
460 		if_printf(sc->hn_ifp, "0 RX rings!?\n");
461 		return (EINVAL);
462 	}
463 	if (bootverbose)
464 		if_printf(sc->hn_ifp, "%u RX rings\n", caps.ndis_nrxr);
465 	rxr_cnt = caps.ndis_nrxr;
466 
467 	if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE &&
468 	    caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) {
469 		if (caps.ndis_nind > NDIS_HASH_INDCNT) {
470 			if_printf(sc->hn_ifp,
471 			    "too many RSS indirect table entries %u\n",
472 			    caps.ndis_nind);
473 			return (EOPNOTSUPP);
474 		}
475 		if (!powerof2(caps.ndis_nind)) {
476 			if_printf(sc->hn_ifp, "RSS indirect table size is not "
477 			    "power-of-2 %u\n", caps.ndis_nind);
478 		}
479 
480 		if (bootverbose) {
481 			if_printf(sc->hn_ifp, "RSS indirect table size %u\n",
482 			    caps.ndis_nind);
483 		}
484 		indsz = caps.ndis_nind;
485 	} else {
486 		indsz = NDIS_HASH_INDCNT;
487 	}
488 	if (indsz < rxr_cnt) {
489 		if_printf(sc->hn_ifp, "# of RX rings (%d) > "
490 		    "RSS indirect table size %d\n", rxr_cnt, indsz);
491 		rxr_cnt = indsz;
492 	}
493 
494 	/*
495 	 * NOTE:
496 	 * Toeplitz is at the lowest bit, and it is prefered; so ffs(),
497 	 * instead of fls(), is used here.
498 	 */
499 	hash_fnidx = ffs(caps.ndis_caps & NDIS_RSS_CAP_HASHFUNC_MASK);
500 	if (hash_fnidx == 0) {
501 		if_printf(sc->hn_ifp, "no hash functions, caps 0x%08x\n",
502 		    caps.ndis_caps);
503 		return (EOPNOTSUPP);
504 	}
505 	hash_func = 1 << (hash_fnidx - 1); /* ffs is 1-based */
506 
507 	if (caps.ndis_caps & NDIS_RSS_CAP_IPV4)
508 		hash_types |= NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4;
509 	if (caps.ndis_caps & NDIS_RSS_CAP_IPV6)
510 		hash_types |= NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6;
511 	if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX)
512 		hash_types |= NDIS_HASH_IPV6_EX | NDIS_HASH_TCP_IPV6_EX;
513 	if (hash_types == 0) {
514 		if_printf(sc->hn_ifp, "no hash types, caps 0x%08x\n",
515 		    caps.ndis_caps);
516 		return (EOPNOTSUPP);
517 	}
518 	if (bootverbose)
519 		if_printf(sc->hn_ifp, "RSS caps %#x\n", caps.ndis_caps);
520 
521 	/* Commit! */
522 	sc->hn_rss_ind_size = indsz;
523 	sc->hn_rss_hcap = hash_func | hash_types;
524 	if (sc->hn_caps & HN_CAP_UDPHASH) {
525 		/* UDP 4-tuple hash is unconditionally enabled. */
526 		sc->hn_rss_hcap |= NDIS_HASH_UDP_IPV4_X;
527 	}
528 	*rxr_cnt0 = rxr_cnt;
529 	return (0);
530 }
531 
532 static int
533 hn_rndis_set(struct hn_softc *sc, uint32_t oid, const void *data, size_t dlen)
534 {
535 	struct rndis_set_req *req;
536 	const struct rndis_set_comp *comp;
537 	struct vmbus_xact *xact;
538 	size_t reqlen, comp_len;
539 	uint32_t rid;
540 	int error;
541 
542 	KASSERT(dlen > 0, ("invalid dlen %zu", dlen));
543 
544 	reqlen = sizeof(*req) + dlen;
545 	xact = vmbus_xact_get(sc->hn_xact, reqlen);
546 	if (xact == NULL) {
547 		if_printf(sc->hn_ifp, "no xact for RNDIS set 0x%08x\n", oid);
548 		return (ENXIO);
549 	}
550 	rid = hn_rndis_rid(sc);
551 	req = vmbus_xact_req_data(xact);
552 	req->rm_type = REMOTE_NDIS_SET_MSG;
553 	req->rm_len = reqlen;
554 	req->rm_rid = rid;
555 	req->rm_oid = oid;
556 	req->rm_infobuflen = dlen;
557 	req->rm_infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET;
558 	/* Data immediately follows RNDIS set. */
559 	memcpy(req + 1, data, dlen);
560 
561 	comp_len = sizeof(*comp);
562 	comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len,
563 	    REMOTE_NDIS_SET_CMPLT);
564 	if (comp == NULL) {
565 		if_printf(sc->hn_ifp, "exec RNDIS set 0x%08x failed\n", oid);
566 		error = EIO;
567 		goto done;
568 	}
569 
570 	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
571 		if_printf(sc->hn_ifp, "RNDIS set 0x%08x failed: "
572 		    "status 0x%08x\n", oid, comp->rm_status);
573 		error = EIO;
574 		goto done;
575 	}
576 	error = 0;
577 done:
578 	vmbus_xact_put(xact);
579 	return (error);
580 }
581 
582 static int
583 hn_rndis_conf_offload(struct hn_softc *sc, int mtu)
584 {
585 	struct ndis_offload hwcaps;
586 	struct ndis_offload_params params;
587 	uint32_t caps = 0;
588 	size_t paramsz;
589 	int error, tso_maxsz, tso_minsg;
590 
591 	error = hn_rndis_query_hwcaps(sc, &hwcaps);
592 	if (error) {
593 		if_printf(sc->hn_ifp, "hwcaps query failed: %d\n", error);
594 		return (error);
595 	}
596 
597 	/* NOTE: 0 means "no change" */
598 	memset(&params, 0, sizeof(params));
599 
600 	params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT;
601 	if (sc->hn_ndis_ver < HN_NDIS_VERSION_6_30) {
602 		params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2;
603 		paramsz = NDIS_OFFLOAD_PARAMS_SIZE_6_1;
604 	} else {
605 		params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3;
606 		paramsz = NDIS_OFFLOAD_PARAMS_SIZE;
607 	}
608 	params.ndis_hdr.ndis_size = paramsz;
609 
610 	/*
611 	 * TSO4/TSO6 setup.
612 	 */
613 	tso_maxsz = IP_MAXPACKET;
614 	tso_minsg = 2;
615 	if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) {
616 		caps |= HN_CAP_TSO4;
617 		params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON;
618 
619 		if (hwcaps.ndis_lsov2.ndis_ip4_maxsz < tso_maxsz)
620 			tso_maxsz = hwcaps.ndis_lsov2.ndis_ip4_maxsz;
621 		if (hwcaps.ndis_lsov2.ndis_ip4_minsg > tso_minsg)
622 			tso_minsg = hwcaps.ndis_lsov2.ndis_ip4_minsg;
623 	}
624 	if ((hwcaps.ndis_lsov2.ndis_ip6_encap & NDIS_OFFLOAD_ENCAP_8023) &&
625 	    (hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6) ==
626 	    HN_NDIS_LSOV2_CAP_IP6) {
627 		caps |= HN_CAP_TSO6;
628 		params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON;
629 
630 		if (hwcaps.ndis_lsov2.ndis_ip6_maxsz < tso_maxsz)
631 			tso_maxsz = hwcaps.ndis_lsov2.ndis_ip6_maxsz;
632 		if (hwcaps.ndis_lsov2.ndis_ip6_minsg > tso_minsg)
633 			tso_minsg = hwcaps.ndis_lsov2.ndis_ip6_minsg;
634 	}
635 	sc->hn_ndis_tso_szmax = 0;
636 	sc->hn_ndis_tso_sgmin = 0;
637 	if (caps & (HN_CAP_TSO4 | HN_CAP_TSO6)) {
638 		KASSERT(tso_maxsz <= IP_MAXPACKET,
639 		    ("invalid NDIS TSO maxsz %d", tso_maxsz));
640 		KASSERT(tso_minsg >= 2,
641 		    ("invalid NDIS TSO minsg %d", tso_minsg));
642 		if (tso_maxsz < tso_minsg * mtu) {
643 			if_printf(sc->hn_ifp, "invalid NDIS TSO config: "
644 			    "maxsz %d, minsg %d, mtu %d; "
645 			    "disable TSO4 and TSO6\n",
646 			    tso_maxsz, tso_minsg, mtu);
647 			caps &= ~(HN_CAP_TSO4 | HN_CAP_TSO6);
648 			params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_OFF;
649 			params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_OFF;
650 		} else {
651 			sc->hn_ndis_tso_szmax = tso_maxsz;
652 			sc->hn_ndis_tso_sgmin = tso_minsg;
653 			if (bootverbose) {
654 				if_printf(sc->hn_ifp, "NDIS TSO "
655 				    "szmax %d sgmin %d\n",
656 				    sc->hn_ndis_tso_szmax,
657 				    sc->hn_ndis_tso_sgmin);
658 			}
659 		}
660 	}
661 
662 	/* IPv4 checksum */
663 	if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_IP4) ==
664 	    HN_NDIS_TXCSUM_CAP_IP4) {
665 		caps |= HN_CAP_IPCS;
666 		params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX;
667 	}
668 	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) {
669 		if (params.ndis_ip4csum == NDIS_OFFLOAD_PARAM_TX)
670 			params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX;
671 		else
672 			params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_RX;
673 	}
674 
675 	/* TCP4 checksum */
676 	if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_TCP4) ==
677 	    HN_NDIS_TXCSUM_CAP_TCP4) {
678 		caps |= HN_CAP_TCP4CS;
679 		params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX;
680 	}
681 	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) {
682 		if (params.ndis_tcp4csum == NDIS_OFFLOAD_PARAM_TX)
683 			params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX;
684 		else
685 			params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_RX;
686 	}
687 
688 	/* UDP4 checksum */
689 	if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) {
690 		caps |= HN_CAP_UDP4CS;
691 		params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX;
692 	}
693 	if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) {
694 		if (params.ndis_udp4csum == NDIS_OFFLOAD_PARAM_TX)
695 			params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX;
696 		else
697 			params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_RX;
698 	}
699 
700 	/* TCP6 checksum */
701 	if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_TCP6) ==
702 	    HN_NDIS_TXCSUM_CAP_TCP6) {
703 		caps |= HN_CAP_TCP6CS;
704 		params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX;
705 	}
706 	if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6) {
707 		if (params.ndis_tcp6csum == NDIS_OFFLOAD_PARAM_TX)
708 			params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX;
709 		else
710 			params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_RX;
711 	}
712 
713 	/* UDP6 checksum */
714 	if ((hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_UDP6) ==
715 	    HN_NDIS_TXCSUM_CAP_UDP6) {
716 		caps |= HN_CAP_UDP6CS;
717 		params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX;
718 	}
719 	if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6) {
720 		if (params.ndis_udp6csum == NDIS_OFFLOAD_PARAM_TX)
721 			params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX;
722 		else
723 			params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_RX;
724 	}
725 
726 	if (bootverbose) {
727 		if_printf(sc->hn_ifp, "offload csum: "
728 		    "ip4 %u, tcp4 %u, udp4 %u, tcp6 %u, udp6 %u\n",
729 		    params.ndis_ip4csum,
730 		    params.ndis_tcp4csum,
731 		    params.ndis_udp4csum,
732 		    params.ndis_tcp6csum,
733 		    params.ndis_udp6csum);
734 		if_printf(sc->hn_ifp, "offload lsov2: ip4 %u, ip6 %u\n",
735 		    params.ndis_lsov2_ip4,
736 		    params.ndis_lsov2_ip6);
737 	}
738 
739 	error = hn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, &params, paramsz);
740 	if (error) {
741 		if_printf(sc->hn_ifp, "offload config failed: %d\n", error);
742 		return (error);
743 	}
744 
745 	if (bootverbose)
746 		if_printf(sc->hn_ifp, "offload config done\n");
747 	sc->hn_caps |= caps;
748 	return (0);
749 }
750 
751 int
752 hn_rndis_conf_rss(struct hn_softc *sc, uint16_t flags)
753 {
754 	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
755 	struct ndis_rss_params *prm = &rss->rss_params;
756 	int error, rss_size;
757 
758 	/*
759 	 * Only NDIS 6.20+ is supported:
760 	 * We only support 4bytes element in indirect table, which has been
761 	 * adopted since NDIS 6.20.
762 	 */
763 	KASSERT(sc->hn_ndis_ver >= HN_NDIS_VERSION_6_20,
764 	    ("NDIS 6.20+ is required, NDIS version 0x%08x", sc->hn_ndis_ver));
765 
766 	/* XXX only one can be specified through, popcnt? */
767 	KASSERT((sc->hn_rss_hash & NDIS_HASH_FUNCTION_MASK),
768 	    ("no hash func %08x", sc->hn_rss_hash));
769 	KASSERT((sc->hn_rss_hash & NDIS_HASH_STD),
770 	    ("no standard hash types %08x", sc->hn_rss_hash));
771 	KASSERT(sc->hn_rss_ind_size > 0, ("no indirect table size"));
772 
773 	if (bootverbose) {
774 		if_printf(sc->hn_ifp, "RSS indirect table size %d, "
775 		    "hash 0x%08x\n", sc->hn_rss_ind_size, sc->hn_rss_hash);
776 	}
777 
778 	/*
779 	 * NOTE:
780 	 * DO NOT whack rss_key and rss_ind, which are setup by the caller.
781 	 */
782 	memset(prm, 0, sizeof(*prm));
783 	rss_size = NDIS_RSSPRM_TOEPLITZ_SIZE(sc->hn_rss_ind_size);
784 
785 	prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS;
786 	prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2;
787 	prm->ndis_hdr.ndis_size = rss_size;
788 	prm->ndis_flags = flags;
789 	prm->ndis_hash = sc->hn_rss_hash &
790 	    (NDIS_HASH_FUNCTION_MASK | NDIS_HASH_STD);
791 	prm->ndis_indsize = sizeof(rss->rss_ind[0]) * sc->hn_rss_ind_size;
792 	prm->ndis_indoffset =
793 	    __offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]);
794 	prm->ndis_keysize = sizeof(rss->rss_key);
795 	prm->ndis_keyoffset =
796 	    __offsetof(struct ndis_rssprm_toeplitz, rss_key[0]);
797 
798 	error = hn_rndis_set(sc, OID_GEN_RECEIVE_SCALE_PARAMETERS,
799 	    rss, rss_size);
800 	if (error) {
801 		if_printf(sc->hn_ifp, "RSS config failed: %d\n", error);
802 	} else {
803 		if (bootverbose)
804 			if_printf(sc->hn_ifp, "RSS config done\n");
805 	}
806 	return (error);
807 }
808 
809 int
810 hn_rndis_set_rxfilter(struct hn_softc *sc, uint32_t filter)
811 {
812 	int error;
813 
814 	error = hn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER,
815 	    &filter, sizeof(filter));
816 	if (error) {
817 		if_printf(sc->hn_ifp, "set RX filter 0x%08x failed: %d\n",
818 		    filter, error);
819 	} else {
820 		if (bootverbose) {
821 			if_printf(sc->hn_ifp, "set RX filter 0x%08x done\n",
822 			    filter);
823 		}
824 	}
825 	return (error);
826 }
827 
828 static int
829 hn_rndis_init(struct hn_softc *sc)
830 {
831 	struct rndis_init_req *req;
832 	const struct rndis_init_comp *comp;
833 	struct vmbus_xact *xact;
834 	size_t comp_len;
835 	uint32_t rid;
836 	int error;
837 
838 	xact = vmbus_xact_get(sc->hn_xact, sizeof(*req));
839 	if (xact == NULL) {
840 		if_printf(sc->hn_ifp, "no xact for RNDIS init\n");
841 		return (ENXIO);
842 	}
843 	rid = hn_rndis_rid(sc);
844 	req = vmbus_xact_req_data(xact);
845 	req->rm_type = REMOTE_NDIS_INITIALIZE_MSG;
846 	req->rm_len = sizeof(*req);
847 	req->rm_rid = rid;
848 	req->rm_ver_major = RNDIS_VERSION_MAJOR;
849 	req->rm_ver_minor = RNDIS_VERSION_MINOR;
850 	req->rm_max_xfersz = HN_RNDIS_XFER_SIZE;
851 
852 	comp_len = RNDIS_INIT_COMP_SIZE_MIN;
853 	comp = hn_rndis_xact_execute(sc, xact, rid, sizeof(*req), &comp_len,
854 	    REMOTE_NDIS_INITIALIZE_CMPLT);
855 	if (comp == NULL) {
856 		if_printf(sc->hn_ifp, "exec RNDIS init failed\n");
857 		error = EIO;
858 		goto done;
859 	}
860 
861 	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
862 		if_printf(sc->hn_ifp, "RNDIS init failed: status 0x%08x\n",
863 		    comp->rm_status);
864 		error = EIO;
865 		goto done;
866 	}
867 	sc->hn_rndis_agg_size = comp->rm_pktmaxsz;
868 	sc->hn_rndis_agg_pkts = comp->rm_pktmaxcnt;
869 	sc->hn_rndis_agg_align = 1U << comp->rm_align;
870 
871 	if (sc->hn_rndis_agg_align < sizeof(uint32_t)) {
872 		/*
873 		 * The RNDIS packet messsage encap assumes that the RNDIS
874 		 * packet message is at least 4 bytes aligned.  Fix up the
875 		 * alignment here, if the remote side sets the alignment
876 		 * too low.
877 		 */
878 		if_printf(sc->hn_ifp, "fixup RNDIS aggpkt align: %u -> %zu\n",
879 		    sc->hn_rndis_agg_align, sizeof(uint32_t));
880 		sc->hn_rndis_agg_align = sizeof(uint32_t);
881 	}
882 
883 	if (bootverbose) {
884 		if_printf(sc->hn_ifp, "RNDIS ver %u.%u, "
885 		    "aggpkt size %u, aggpkt cnt %u, aggpkt align %u\n",
886 		    comp->rm_ver_major, comp->rm_ver_minor,
887 		    sc->hn_rndis_agg_size, sc->hn_rndis_agg_pkts,
888 		    sc->hn_rndis_agg_align);
889 	}
890 	error = 0;
891 done:
892 	vmbus_xact_put(xact);
893 	return (error);
894 }
895 
896 static int
897 hn_rndis_halt(struct hn_softc *sc)
898 {
899 	struct vmbus_xact *xact;
900 	struct rndis_halt_req *halt;
901 	struct hn_nvs_sendctx sndc;
902 	size_t comp_len;
903 
904 	xact = vmbus_xact_get(sc->hn_xact, sizeof(*halt));
905 	if (xact == NULL) {
906 		if_printf(sc->hn_ifp, "no xact for RNDIS halt\n");
907 		return (ENXIO);
908 	}
909 	halt = vmbus_xact_req_data(xact);
910 	halt->rm_type = REMOTE_NDIS_HALT_MSG;
911 	halt->rm_len = sizeof(*halt);
912 	halt->rm_rid = hn_rndis_rid(sc);
913 
914 	/* No RNDIS completion; rely on NVS message send completion */
915 	hn_nvs_sendctx_init(&sndc, hn_nvs_sent_xact, xact);
916 	hn_rndis_xact_exec1(sc, xact, sizeof(*halt), &sndc, &comp_len);
917 
918 	vmbus_xact_put(xact);
919 	if (bootverbose)
920 		if_printf(sc->hn_ifp, "RNDIS halt done\n");
921 	return (0);
922 }
923 
924 static int
925 hn_rndis_query_hwcaps(struct hn_softc *sc, struct ndis_offload *caps)
926 {
927 	struct ndis_offload in;
928 	size_t caps_len, size;
929 	int error;
930 
931 	memset(&in, 0, sizeof(in));
932 	in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD;
933 	if (sc->hn_ndis_ver >= HN_NDIS_VERSION_6_30) {
934 		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3;
935 		size = NDIS_OFFLOAD_SIZE;
936 	} else if (sc->hn_ndis_ver >= HN_NDIS_VERSION_6_1) {
937 		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2;
938 		size = NDIS_OFFLOAD_SIZE_6_1;
939 	} else {
940 		in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1;
941 		size = NDIS_OFFLOAD_SIZE_6_0;
942 	}
943 	in.ndis_hdr.ndis_size = size;
944 
945 	caps_len = NDIS_OFFLOAD_SIZE;
946 	error = hn_rndis_query2(sc, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
947 	    &in, size, caps, &caps_len, NDIS_OFFLOAD_SIZE_6_0);
948 	if (error)
949 		return (error);
950 
951 	/*
952 	 * Preliminary verification.
953 	 */
954 	if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) {
955 		if_printf(sc->hn_ifp, "invalid NDIS objtype 0x%02x\n",
956 		    caps->ndis_hdr.ndis_type);
957 		return (EINVAL);
958 	}
959 	if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) {
960 		if_printf(sc->hn_ifp, "invalid NDIS objrev 0x%02x\n",
961 		    caps->ndis_hdr.ndis_rev);
962 		return (EINVAL);
963 	}
964 	if (caps->ndis_hdr.ndis_size > caps_len) {
965 		if_printf(sc->hn_ifp, "invalid NDIS objsize %u, "
966 		    "data size %zu\n", caps->ndis_hdr.ndis_size, caps_len);
967 		return (EINVAL);
968 	} else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) {
969 		if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n",
970 		    caps->ndis_hdr.ndis_size);
971 		return (EINVAL);
972 	}
973 
974 	if (bootverbose) {
975 		/*
976 		 * NOTE:
977 		 * caps->ndis_hdr.ndis_size MUST be checked before accessing
978 		 * NDIS 6.1+ specific fields.
979 		 */
980 		if_printf(sc->hn_ifp, "hwcaps rev %u\n",
981 		    caps->ndis_hdr.ndis_rev);
982 
983 		if_printf(sc->hn_ifp, "hwcaps csum: "
984 		    "ip4 tx 0x%x/0x%x rx 0x%x/0x%x, "
985 		    "ip6 tx 0x%x/0x%x rx 0x%x/0x%x\n",
986 		    caps->ndis_csum.ndis_ip4_txcsum,
987 		    caps->ndis_csum.ndis_ip4_txenc,
988 		    caps->ndis_csum.ndis_ip4_rxcsum,
989 		    caps->ndis_csum.ndis_ip4_rxenc,
990 		    caps->ndis_csum.ndis_ip6_txcsum,
991 		    caps->ndis_csum.ndis_ip6_txenc,
992 		    caps->ndis_csum.ndis_ip6_rxcsum,
993 		    caps->ndis_csum.ndis_ip6_rxenc);
994 		if_printf(sc->hn_ifp, "hwcaps lsov2: "
995 		    "ip4 maxsz %u minsg %u encap 0x%x, "
996 		    "ip6 maxsz %u minsg %u encap 0x%x opts 0x%x\n",
997 		    caps->ndis_lsov2.ndis_ip4_maxsz,
998 		    caps->ndis_lsov2.ndis_ip4_minsg,
999 		    caps->ndis_lsov2.ndis_ip4_encap,
1000 		    caps->ndis_lsov2.ndis_ip6_maxsz,
1001 		    caps->ndis_lsov2.ndis_ip6_minsg,
1002 		    caps->ndis_lsov2.ndis_ip6_encap,
1003 		    caps->ndis_lsov2.ndis_ip6_opts);
1004 	}
1005 	return (0);
1006 }
1007 
1008 int
1009 hn_rndis_attach(struct hn_softc *sc, int mtu, int *init_done)
1010 {
1011 	int error;
1012 
1013 	*init_done = 0;
1014 
1015 	/*
1016 	 * Initialize RNDIS.
1017 	 */
1018 	error = hn_rndis_init(sc);
1019 	if (error)
1020 		return (error);
1021 	*init_done = 1;
1022 
1023 	/*
1024 	 * Configure NDIS offload settings.
1025 	 */
1026 	hn_rndis_conf_offload(sc, mtu);
1027 	return (0);
1028 }
1029 
1030 void
1031 hn_rndis_detach(struct hn_softc *sc)
1032 {
1033 
1034 	/* Halt the RNDIS. */
1035 	hn_rndis_halt(sc);
1036 }
1037