xref: /illumos-gate/usr/src/uts/common/io/dld/dld_proto.c (revision 57c40785)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Data-Link Driver
30  */
31 
32 #include <sys/types.h>
33 #include <sys/debug.h>
34 #include <sys/sysmacros.h>
35 #include <sys/stream.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/strsun.h>
39 #include <sys/cpuvar.h>
40 #include <sys/dlpi.h>
41 #include <netinet/in.h>
42 #include <sys/sdt.h>
43 #include <sys/strsubr.h>
44 #include <sys/vlan.h>
45 #include <sys/mac.h>
46 #include <sys/dls.h>
47 #include <sys/dld.h>
48 #include <sys/dld_impl.h>
49 #include <sys/dls_soft_ring.h>
50 
51 typedef boolean_t proto_reqfunc_t(dld_str_t *, union DL_primitives *, mblk_t *);
52 
53 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
54     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
55     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
56     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
57     proto_notify_req, proto_passive_req;
58 
59 static void proto_poll_disable(dld_str_t *);
60 static boolean_t proto_poll_enable(dld_str_t *, dl_capab_dls_t *);
61 
62 static void proto_soft_ring_disable(dld_str_t *);
63 static boolean_t proto_soft_ring_enable(dld_str_t *, dl_capab_dls_t *);
64 static boolean_t proto_capability_advertise(dld_str_t *, mblk_t *);
65 static void proto_change_soft_ring_fanout(dld_str_t *, int);
66 
67 #define	DL_ACK_PENDING(state) \
68 	((state) == DL_ATTACH_PENDING || \
69 	(state) == DL_DETACH_PENDING || \
70 	(state) == DL_BIND_PENDING || \
71 	(state) == DL_UNBIND_PENDING)
72 
73 /*
74  * Process a DLPI protocol message.
75  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
76  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
77  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
78  * as 'passive' and forbids it from being subsequently made 'active'
79  * by the above primitives.
80  */
81 void
82 dld_wput_proto_nondata(dld_str_t *dsp, mblk_t *mp)
83 {
84 	union DL_primitives	*udlp;
85 	t_uscalar_t		prim;
86 
87 	ASSERT(MBLKL(mp) >= sizeof (t_uscalar_t));
88 
89 	udlp = (union DL_primitives *)mp->b_rptr;
90 	prim = udlp->dl_primitive;
91 
92 	switch (prim) {
93 	case DL_INFO_REQ:
94 		(void) proto_info_req(dsp, udlp, mp);
95 		break;
96 	case DL_BIND_REQ:
97 		(void) proto_bind_req(dsp, udlp, mp);
98 		break;
99 	case DL_UNBIND_REQ:
100 		(void) proto_unbind_req(dsp, udlp, mp);
101 		break;
102 	case DL_UDQOS_REQ:
103 		(void) proto_udqos_req(dsp, udlp, mp);
104 		break;
105 	case DL_ATTACH_REQ:
106 		(void) proto_attach_req(dsp, udlp, mp);
107 		break;
108 	case DL_DETACH_REQ:
109 		(void) proto_detach_req(dsp, udlp, mp);
110 		break;
111 	case DL_ENABMULTI_REQ:
112 		(void) proto_enabmulti_req(dsp, udlp, mp);
113 		break;
114 	case DL_DISABMULTI_REQ:
115 		(void) proto_disabmulti_req(dsp, udlp, mp);
116 		break;
117 	case DL_PROMISCON_REQ:
118 		(void) proto_promiscon_req(dsp, udlp, mp);
119 		break;
120 	case DL_PROMISCOFF_REQ:
121 		(void) proto_promiscoff_req(dsp, udlp, mp);
122 		break;
123 	case DL_PHYS_ADDR_REQ:
124 		(void) proto_physaddr_req(dsp, udlp, mp);
125 		break;
126 	case DL_SET_PHYS_ADDR_REQ:
127 		(void) proto_setphysaddr_req(dsp, udlp, mp);
128 		break;
129 	case DL_NOTIFY_REQ:
130 		(void) proto_notify_req(dsp, udlp, mp);
131 		break;
132 	case DL_CAPABILITY_REQ:
133 		(void) proto_capability_req(dsp, udlp, mp);
134 		break;
135 	case DL_PASSIVE_REQ:
136 		(void) proto_passive_req(dsp, udlp, mp);
137 		break;
138 	default:
139 		(void) proto_req(dsp, udlp, mp);
140 		break;
141 	}
142 }
143 
144 #define	NEG(x)	-(x)
145 
146 typedef struct dl_info_ack_wrapper {
147 	dl_info_ack_t		dl_info;
148 	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
149 	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
150 	dl_qos_cl_range1_t	dl_qos_range1;
151 	dl_qos_cl_sel1_t	dl_qos_sel1;
152 } dl_info_ack_wrapper_t;
153 
154 /*
155  * DL_INFO_REQ
156  */
157 /*ARGSUSED*/
158 static boolean_t
159 proto_info_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
160 {
161 	dl_info_ack_wrapper_t	*dlwp;
162 	dl_info_ack_t		*dlp;
163 	dl_qos_cl_sel1_t	*selp;
164 	dl_qos_cl_range1_t	*rangep;
165 	uint8_t			*addr;
166 	uint8_t			*brdcst_addr;
167 	uint_t			addr_length;
168 	uint_t			sap_length;
169 	mac_info_t		minfo;
170 	mac_info_t		*minfop;
171 	queue_t			*q = dsp->ds_wq;
172 
173 	/*
174 	 * Swap the request message for one large enough to contain the
175 	 * wrapper structure defined above.
176 	 */
177 	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
178 	    M_PCPROTO, 0)) == NULL)
179 		return (B_FALSE);
180 
181 	rw_enter(&dsp->ds_lock, RW_READER);
182 
183 	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
184 	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
185 
186 	dlp = &(dlwp->dl_info);
187 	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
188 
189 	dlp->dl_primitive = DL_INFO_ACK;
190 
191 	/*
192 	 * Set up the sub-structure pointers.
193 	 */
194 	addr = dlwp->dl_addr;
195 	brdcst_addr = dlwp->dl_brdcst_addr;
196 	rangep = &(dlwp->dl_qos_range1);
197 	selp = &(dlwp->dl_qos_sel1);
198 
199 	/*
200 	 * This driver supports only version 2 connectionless DLPI provider
201 	 * nodes.
202 	 */
203 	dlp->dl_service_mode = DL_CLDLS;
204 	dlp->dl_version = DL_VERSION_2;
205 
206 	/*
207 	 * Set the style of the provider
208 	 */
209 	dlp->dl_provider_style = dsp->ds_style;
210 	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
211 	    dlp->dl_provider_style == DL_STYLE2);
212 
213 	/*
214 	 * Set the current DLPI state.
215 	 */
216 	dlp->dl_current_state = dsp->ds_dlstate;
217 
218 	/*
219 	 * Gratuitously set the media type. This is to deal with modules
220 	 * that assume the media type is known prior to DL_ATTACH_REQ
221 	 * being completed.
222 	 */
223 	dlp->dl_mac_type = DL_ETHER;
224 
225 	/*
226 	 * If the stream is not at least attached we try to retrieve the
227 	 * mac_info using mac_info_get()
228 	 */
229 	if (dsp->ds_dlstate == DL_UNATTACHED ||
230 	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
231 	    dsp->ds_dlstate == DL_DETACH_PENDING) {
232 		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
233 			/*
234 			 * Cannot find mac_info. giving up.
235 			 */
236 			goto done;
237 		}
238 		minfop = &minfo;
239 	} else {
240 		minfop = (mac_info_t *)dsp->ds_mip;
241 		/* We can only get the sdu if we're attached. */
242 		mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
243 	}
244 
245 	/*
246 	 * Set the media type (properly this time).
247 	 */
248 	if (dsp->ds_native)
249 		dlp->dl_mac_type = minfop->mi_nativemedia;
250 	else
251 		dlp->dl_mac_type = minfop->mi_media;
252 
253 	/*
254 	 * Set the DLSAP length. We only support 16 bit values and they
255 	 * appear after the MAC address portion of DLSAP addresses.
256 	 */
257 	sap_length = sizeof (uint16_t);
258 	dlp->dl_sap_length = NEG(sap_length);
259 
260 	addr_length = minfop->mi_addr_length;
261 
262 	/*
263 	 * Copy in the media broadcast address.
264 	 */
265 	if (minfop->mi_brdcst_addr != NULL) {
266 		dlp->dl_brdcst_addr_offset =
267 		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
268 		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
269 		dlp->dl_brdcst_addr_length = addr_length;
270 	}
271 
272 	dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
273 	dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
274 
275 	rangep->dl_qos_type = DL_QOS_CL_RANGE1;
276 	rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
277 	rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
278 	rangep->dl_protection.dl_min = DL_UNKNOWN;
279 	rangep->dl_protection.dl_max = DL_UNKNOWN;
280 	rangep->dl_residual_error = DL_UNKNOWN;
281 
282 	/*
283 	 * Specify the supported range of priorities.
284 	 */
285 	rangep->dl_priority.dl_min = 0;
286 	rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
287 
288 	dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
289 	dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
290 
291 	selp->dl_qos_type = DL_QOS_CL_SEL1;
292 	selp->dl_trans_delay = DL_UNKNOWN;
293 	selp->dl_protection = DL_UNKNOWN;
294 	selp->dl_residual_error = DL_UNKNOWN;
295 
296 	/*
297 	 * Specify the current priority (which can be changed by
298 	 * the DL_UDQOS_REQ primitive).
299 	 */
300 	selp->dl_priority = dsp->ds_pri;
301 
302 	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
303 	if (dsp->ds_dlstate == DL_IDLE) {
304 		/*
305 		 * The stream is bound. Therefore we can formulate a valid
306 		 * DLSAP address.
307 		 */
308 		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
309 		if (addr_length > 0)
310 			bcopy(dsp->ds_curr_addr, addr, addr_length);
311 		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
312 	}
313 
314 done:
315 	ASSERT(IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0));
316 	ASSERT(IMPLY(dlp->dl_qos_range_offset != 0,
317 	    dlp->dl_qos_range_length != 0));
318 	ASSERT(IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0));
319 	ASSERT(IMPLY(dlp->dl_brdcst_addr_offset != 0,
320 	    dlp->dl_brdcst_addr_length != 0));
321 
322 	rw_exit(&dsp->ds_lock);
323 
324 	qreply(q, mp);
325 	return (B_TRUE);
326 }
327 
328 /*
329  * DL_ATTACH_REQ
330  */
331 static boolean_t
332 proto_attach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
333 {
334 	dl_attach_req_t	*dlp = (dl_attach_req_t *)udlp;
335 	int		err = 0;
336 	t_uscalar_t	dl_err;
337 	queue_t		*q = dsp->ds_wq;
338 
339 	rw_enter(&dsp->ds_lock, RW_WRITER);
340 
341 	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
342 	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
343 		dl_err = DL_BADPRIM;
344 		goto failed;
345 	}
346 
347 	if (dsp->ds_dlstate != DL_UNATTACHED) {
348 		dl_err = DL_OUTSTATE;
349 		goto failed;
350 	}
351 
352 	dsp->ds_dlstate = DL_ATTACH_PENDING;
353 
354 	err = dld_str_attach(dsp, dlp->dl_ppa);
355 	if (err != 0) {
356 		switch (err) {
357 		case ENOENT:
358 			dl_err = DL_BADPPA;
359 			err = 0;
360 			break;
361 		default:
362 			dl_err = DL_SYSERR;
363 			break;
364 		}
365 		dsp->ds_dlstate = DL_UNATTACHED;
366 		goto failed;
367 	}
368 	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
369 	rw_exit(&dsp->ds_lock);
370 
371 	dlokack(q, mp, DL_ATTACH_REQ);
372 	return (B_TRUE);
373 failed:
374 	rw_exit(&dsp->ds_lock);
375 	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
376 	return (B_FALSE);
377 }
378 
379 /*ARGSUSED*/
380 static boolean_t
381 proto_detach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
382 {
383 	queue_t		*q = dsp->ds_wq;
384 	t_uscalar_t	dl_err;
385 
386 	rw_enter(&dsp->ds_lock, RW_WRITER);
387 
388 	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
389 		dl_err = DL_BADPRIM;
390 		goto failed;
391 	}
392 
393 	if (dsp->ds_dlstate != DL_UNBOUND) {
394 		dl_err = DL_OUTSTATE;
395 		goto failed;
396 	}
397 
398 	if (dsp->ds_style == DL_STYLE1) {
399 		dl_err = DL_BADPRIM;
400 		goto failed;
401 	}
402 
403 	dsp->ds_dlstate = DL_DETACH_PENDING;
404 	dld_str_detach(dsp);
405 
406 	rw_exit(&dsp->ds_lock);
407 	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
408 	return (B_TRUE);
409 failed:
410 	rw_exit(&dsp->ds_lock);
411 	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
412 	return (B_FALSE);
413 }
414 
415 /*
416  * DL_BIND_REQ
417  */
418 static boolean_t
419 proto_bind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
420 {
421 	dl_bind_req_t	*dlp = (dl_bind_req_t *)udlp;
422 	int		err = 0;
423 	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
424 	uint_t		dlsap_addr_length;
425 	t_uscalar_t	dl_err;
426 	t_scalar_t	sap;
427 	queue_t		*q = dsp->ds_wq;
428 
429 	/*
430 	 * Because control message processing is serialized, we don't need
431 	 * to hold any locks to read any fields of dsp; we only need ds_lock
432 	 * to update the ds_dlstate, ds_sap and ds_passivestate fields.
433 	 */
434 	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
435 		dl_err = DL_BADPRIM;
436 		goto failed;
437 	}
438 
439 	if (dlp->dl_xidtest_flg != 0) {
440 		dl_err = DL_NOAUTO;
441 		goto failed;
442 	}
443 
444 	if (dlp->dl_service_mode != DL_CLDLS) {
445 		dl_err = DL_UNSUPPORTED;
446 		goto failed;
447 	}
448 
449 	if (dsp->ds_dlstate != DL_UNBOUND) {
450 		dl_err = DL_OUTSTATE;
451 		goto failed;
452 	}
453 
454 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
455 	    !dls_active_set(dsp->ds_dc)) {
456 		dl_err = DL_SYSERR;
457 		err = EBUSY;
458 		goto failed;
459 	}
460 
461 	/*
462 	 * Set the receive callback.
463 	 */
464 	dls_rx_set(dsp->ds_dc, (dsp->ds_mode == DLD_RAW) ?
465 	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
466 
467 	/*
468 	 * Bind the channel such that it can receive packets.
469 	 */
470 	sap = dlp->dl_sap;
471 	err = dls_bind(dsp->ds_dc, sap);
472 	if (err != 0) {
473 		switch (err) {
474 		case EINVAL:
475 			dl_err = DL_BADADDR;
476 			err = 0;
477 			break;
478 		default:
479 			dl_err = DL_SYSERR;
480 			break;
481 		}
482 
483 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
484 			dls_active_clear(dsp->ds_dc);
485 
486 		goto failed;
487 	}
488 
489 	/*
490 	 * Copy in MAC address.
491 	 */
492 	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
493 	bcopy(dsp->ds_curr_addr, dlsap_addr, dlsap_addr_length);
494 
495 	/*
496 	 * Copy in the SAP.
497 	 */
498 	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
499 	dlsap_addr_length += sizeof (uint16_t);
500 
501 	rw_enter(&dsp->ds_lock, RW_WRITER);
502 
503 	dsp->ds_dlstate = DL_IDLE;
504 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
505 		dsp->ds_passivestate = DLD_ACTIVE;
506 	dsp->ds_sap = sap;
507 
508 	if (dsp->ds_mode == DLD_FASTPATH)
509 		dsp->ds_tx = str_mdata_fastpath_put;
510 	else if (dsp->ds_mode == DLD_RAW)
511 		dsp->ds_tx = str_mdata_raw_put;
512 	dsp->ds_unitdata_tx = dld_wput_proto_data;
513 
514 	rw_exit(&dsp->ds_lock);
515 
516 	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
517 	return (B_TRUE);
518 failed:
519 	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
520 	return (B_FALSE);
521 }
522 
523 /*
524  * DL_UNBIND_REQ
525  */
526 /*ARGSUSED*/
527 static boolean_t
528 proto_unbind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
529 {
530 	queue_t		*q = dsp->ds_wq;
531 	t_uscalar_t	dl_err;
532 
533 	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
534 		dl_err = DL_BADPRIM;
535 		goto failed;
536 	}
537 
538 	if (dsp->ds_dlstate != DL_IDLE) {
539 		dl_err = DL_OUTSTATE;
540 		goto failed;
541 	}
542 
543 	/*
544 	 * Flush any remaining packets scheduled for transmission.
545 	 */
546 	dld_tx_flush(dsp);
547 
548 	/*
549 	 * Unbind the channel to stop packets being received.
550 	 */
551 	dls_unbind(dsp->ds_dc);
552 
553 	/*
554 	 * Clear the receive callback.
555 	 */
556 	dls_rx_set(dsp->ds_dc, NULL, NULL);
557 
558 	rw_enter(&dsp->ds_lock, RW_WRITER);
559 
560 	/*
561 	 * Disable polling mode, if it is enabled.
562 	 */
563 	proto_poll_disable(dsp);
564 
565 	/*
566 	 * If soft rings were enabled, the workers should be quiesced.
567 	 */
568 	dls_soft_ring_disable(dsp->ds_dc);
569 
570 	/*
571 	 * Clear LSO flags.
572 	 */
573 	dsp->ds_lso = B_FALSE;
574 	dsp->ds_lso_max = 0;
575 
576 	/*
577 	 * Set the mode back to the default (unitdata).
578 	 */
579 	dsp->ds_mode = DLD_UNITDATA;
580 	dsp->ds_dlstate = DL_UNBOUND;
581 	DLD_TX_QUIESCE(dsp);
582 	rw_exit(&dsp->ds_lock);
583 
584 	dlokack(q, mp, DL_UNBIND_REQ);
585 
586 	return (B_TRUE);
587 failed:
588 	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
589 	return (B_FALSE);
590 }
591 
592 /*
593  * DL_PROMISCON_REQ
594  */
595 static boolean_t
596 proto_promiscon_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
597 {
598 	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)udlp;
599 	int		err = 0;
600 	t_uscalar_t	dl_err;
601 	uint32_t	promisc;
602 	queue_t		*q = dsp->ds_wq;
603 
604 	/*
605 	 * Because control message processing is serialized, we don't need
606 	 * to hold any locks to read any fields of dsp; we only need ds_lock
607 	 * to update the ds_promisc and ds_passivestate fields.
608 	 */
609 	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
610 		dl_err = DL_BADPRIM;
611 		goto failed;
612 	}
613 
614 	if (dsp->ds_dlstate == DL_UNATTACHED ||
615 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
616 		dl_err = DL_OUTSTATE;
617 		goto failed;
618 	}
619 
620 	switch (dlp->dl_level) {
621 	case DL_PROMISC_SAP:
622 		promisc = DLS_PROMISC_SAP;
623 		break;
624 	case DL_PROMISC_MULTI:
625 		promisc = DLS_PROMISC_MULTI;
626 		break;
627 	case DL_PROMISC_PHYS:
628 		promisc = DLS_PROMISC_PHYS;
629 		break;
630 	default:
631 		dl_err = DL_NOTSUPPORTED;
632 		goto failed;
633 	}
634 
635 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
636 	    !dls_active_set(dsp->ds_dc)) {
637 		dl_err = DL_SYSERR;
638 		err = EBUSY;
639 		goto failed;
640 	}
641 
642 	/*
643 	 * Adjust channel promiscuity.
644 	 */
645 	promisc = (dsp->ds_promisc | promisc);
646 	err = dls_promisc(dsp->ds_dc, promisc);
647 	if (err != 0) {
648 		dl_err = DL_SYSERR;
649 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
650 			dls_active_clear(dsp->ds_dc);
651 		goto failed;
652 	}
653 
654 	rw_enter(&dsp->ds_lock, RW_WRITER);
655 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
656 		dsp->ds_passivestate = DLD_ACTIVE;
657 	dsp->ds_promisc = promisc;
658 	rw_exit(&dsp->ds_lock);
659 
660 	dlokack(q, mp, DL_PROMISCON_REQ);
661 	return (B_TRUE);
662 failed:
663 	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
664 	return (B_FALSE);
665 }
666 
667 /*
668  * DL_PROMISCOFF_REQ
669  */
670 static boolean_t
671 proto_promiscoff_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
672 {
673 	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)udlp;
674 	int		err = 0;
675 	t_uscalar_t	dl_err;
676 	uint32_t	promisc;
677 	queue_t		*q = dsp->ds_wq;
678 
679 	/*
680 	 * Because control messages processing is serialized, we don't need
681 	 * to hold any lock to read any field of dsp; we hold ds_lock to
682 	 * update the ds_promisc field.
683 	 */
684 	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
685 		dl_err = DL_BADPRIM;
686 		goto failed;
687 	}
688 
689 	if (dsp->ds_dlstate == DL_UNATTACHED ||
690 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
691 		dl_err = DL_OUTSTATE;
692 		goto failed;
693 	}
694 
695 	switch (dlp->dl_level) {
696 	case DL_PROMISC_SAP:
697 		promisc = DLS_PROMISC_SAP;
698 		break;
699 	case DL_PROMISC_MULTI:
700 		promisc = DLS_PROMISC_MULTI;
701 		break;
702 	case DL_PROMISC_PHYS:
703 		promisc = DLS_PROMISC_PHYS;
704 		break;
705 	default:
706 		dl_err = DL_NOTSUPPORTED;
707 		goto failed;
708 	}
709 
710 	if (!(dsp->ds_promisc & promisc)) {
711 		dl_err = DL_NOTENAB;
712 		goto failed;
713 	}
714 
715 	promisc = (dsp->ds_promisc & ~promisc);
716 	err = dls_promisc(dsp->ds_dc, promisc);
717 	if (err != 0) {
718 		dl_err = DL_SYSERR;
719 		goto failed;
720 	}
721 
722 	rw_enter(&dsp->ds_lock, RW_WRITER);
723 	dsp->ds_promisc = promisc;
724 	rw_exit(&dsp->ds_lock);
725 
726 	dlokack(q, mp, DL_PROMISCOFF_REQ);
727 	return (B_TRUE);
728 failed:
729 	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
730 	return (B_FALSE);
731 }
732 
733 /*
734  * DL_ENABMULTI_REQ
735  */
736 static boolean_t
737 proto_enabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
738 {
739 	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)udlp;
740 	int		err = 0;
741 	t_uscalar_t	dl_err;
742 	queue_t		*q = dsp->ds_wq;
743 
744 	/*
745 	 * Because control messages processing is serialized, we don't need
746 	 * to hold any lock to read any field of dsp; we hold ds_lock to
747 	 * update the ds_passivestate field.
748 	 */
749 	if (dsp->ds_dlstate == DL_UNATTACHED ||
750 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
751 		dl_err = DL_OUTSTATE;
752 		goto failed;
753 	}
754 
755 	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
756 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
757 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
758 		dl_err = DL_BADPRIM;
759 		goto failed;
760 	}
761 
762 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
763 	    !dls_active_set(dsp->ds_dc)) {
764 		dl_err = DL_SYSERR;
765 		err = EBUSY;
766 		goto failed;
767 	}
768 
769 	err = dls_multicst_add(dsp->ds_dc, mp->b_rptr + dlp->dl_addr_offset);
770 	if (err != 0) {
771 		switch (err) {
772 		case EINVAL:
773 			dl_err = DL_BADADDR;
774 			err = 0;
775 			break;
776 		case ENOSPC:
777 			dl_err = DL_TOOMANY;
778 			err = 0;
779 			break;
780 		default:
781 			dl_err = DL_SYSERR;
782 			break;
783 		}
784 
785 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
786 			dls_active_clear(dsp->ds_dc);
787 
788 		goto failed;
789 	}
790 
791 	rw_enter(&dsp->ds_lock, RW_WRITER);
792 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
793 		dsp->ds_passivestate = DLD_ACTIVE;
794 	rw_exit(&dsp->ds_lock);
795 
796 	dlokack(q, mp, DL_ENABMULTI_REQ);
797 	return (B_TRUE);
798 failed:
799 	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
800 	return (B_FALSE);
801 }
802 
803 /*
804  * DL_DISABMULTI_REQ
805  */
806 static boolean_t
807 proto_disabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
808 {
809 	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)udlp;
810 	int		err = 0;
811 	t_uscalar_t	dl_err;
812 	queue_t		*q = dsp->ds_wq;
813 
814 	/*
815 	 * Because control messages processing is serialized, we don't need
816 	 * to hold any lock to read any field of dsp.
817 	 */
818 	if (dsp->ds_dlstate == DL_UNATTACHED ||
819 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
820 		dl_err = DL_OUTSTATE;
821 		goto failed;
822 	}
823 
824 	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
825 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
826 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
827 		dl_err = DL_BADPRIM;
828 		goto failed;
829 	}
830 
831 	err = dls_multicst_remove(dsp->ds_dc, mp->b_rptr + dlp->dl_addr_offset);
832 	if (err != 0) {
833 		switch (err) {
834 		case EINVAL:
835 			dl_err = DL_BADADDR;
836 			err = 0;
837 			break;
838 		case ENOENT:
839 			dl_err = DL_NOTENAB;
840 			err = 0;
841 			break;
842 		default:
843 			dl_err = DL_SYSERR;
844 			break;
845 		}
846 		goto failed;
847 	}
848 
849 	dlokack(q, mp, DL_DISABMULTI_REQ);
850 	return (B_TRUE);
851 failed:
852 	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
853 	return (B_FALSE);
854 }
855 
856 /*
857  * DL_PHYS_ADDR_REQ
858  */
859 static boolean_t
860 proto_physaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
861 {
862 	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)udlp;
863 	queue_t		*q = dsp->ds_wq;
864 	t_uscalar_t	dl_err;
865 	char		*addr;
866 	uint_t		addr_length;
867 
868 	rw_enter(&dsp->ds_lock, RW_READER);
869 
870 	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
871 		dl_err = DL_BADPRIM;
872 		goto failed;
873 	}
874 
875 	if (dsp->ds_dlstate == DL_UNATTACHED ||
876 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
877 		dl_err = DL_OUTSTATE;
878 		goto failed;
879 	}
880 
881 	if (dlp->dl_addr_type != DL_CURR_PHYS_ADDR &&
882 	    dlp->dl_addr_type != DL_FACT_PHYS_ADDR) {
883 		dl_err = DL_UNSUPPORTED;
884 		goto failed;
885 	}
886 
887 	addr_length = dsp->ds_mip->mi_addr_length;
888 	if (addr_length > 0) {
889 		addr = kmem_alloc(addr_length, KM_NOSLEEP);
890 		if (addr == NULL) {
891 			rw_exit(&dsp->ds_lock);
892 			merror(q, mp, ENOSR);
893 			return (B_FALSE);
894 		}
895 
896 		/*
897 		 * Copy out the address before we drop the lock; we don't
898 		 * want to call dlphysaddrack() while holding ds_lock.
899 		 */
900 		bcopy((dlp->dl_addr_type == DL_CURR_PHYS_ADDR) ?
901 		    dsp->ds_curr_addr : dsp->ds_fact_addr, addr, addr_length);
902 
903 		rw_exit(&dsp->ds_lock);
904 		dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
905 		kmem_free(addr, addr_length);
906 	} else {
907 		rw_exit(&dsp->ds_lock);
908 		dlphysaddrack(q, mp, NULL, 0);
909 	}
910 	return (B_TRUE);
911 failed:
912 	rw_exit(&dsp->ds_lock);
913 	dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
914 	return (B_FALSE);
915 }
916 
917 /*
918  * DL_SET_PHYS_ADDR_REQ
919  */
920 static boolean_t
921 proto_setphysaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
922 {
923 	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)udlp;
924 	int		err = 0;
925 	t_uscalar_t	dl_err;
926 	queue_t		*q = dsp->ds_wq;
927 
928 	/*
929 	 * Because control message processing is serialized, we don't need
930 	 * to hold any locks to read any fields of dsp; we only need ds_lock
931 	 * to update the ds_passivestate field.
932 	 */
933 	if (dsp->ds_dlstate == DL_UNATTACHED ||
934 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
935 		dl_err = DL_OUTSTATE;
936 		goto failed;
937 	}
938 
939 	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
940 	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
941 	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
942 		dl_err = DL_BADPRIM;
943 		goto failed;
944 	}
945 
946 	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
947 	    !dls_active_set(dsp->ds_dc)) {
948 		dl_err = DL_SYSERR;
949 		err = EBUSY;
950 		goto failed;
951 	}
952 
953 	err = mac_unicst_set(dsp->ds_mh, mp->b_rptr + dlp->dl_addr_offset);
954 	if (err != 0) {
955 		switch (err) {
956 		case EINVAL:
957 			dl_err = DL_BADADDR;
958 			err = 0;
959 			break;
960 
961 		default:
962 			dl_err = DL_SYSERR;
963 			break;
964 		}
965 
966 		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
967 			dls_active_clear(dsp->ds_dc);
968 
969 		goto failed;
970 	}
971 
972 	rw_enter(&dsp->ds_lock, RW_WRITER);
973 	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
974 		dsp->ds_passivestate = DLD_ACTIVE;
975 	rw_exit(&dsp->ds_lock);
976 
977 	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
978 	return (B_TRUE);
979 failed:
980 	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
981 	return (B_FALSE);
982 }
983 
984 /*
985  * DL_UDQOS_REQ
986  */
987 static boolean_t
988 proto_udqos_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
989 {
990 	dl_udqos_req_t *dlp = (dl_udqos_req_t *)udlp;
991 	dl_qos_cl_sel1_t *selp;
992 	int		off, len;
993 	t_uscalar_t	dl_err;
994 	queue_t		*q = dsp->ds_wq;
995 
996 	off = dlp->dl_qos_offset;
997 	len = dlp->dl_qos_length;
998 
999 	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
1000 		dl_err = DL_BADPRIM;
1001 		goto failed;
1002 	}
1003 
1004 	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
1005 	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
1006 		dl_err = DL_BADQOSTYPE;
1007 		goto failed;
1008 	}
1009 
1010 	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
1011 	    selp->dl_priority < 0) {
1012 		dl_err = DL_BADQOSPARAM;
1013 		goto failed;
1014 	}
1015 
1016 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1017 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1018 		dl_err = DL_OUTSTATE;
1019 		goto failed;
1020 	}
1021 
1022 	rw_enter(&dsp->ds_lock, RW_WRITER);
1023 	dsp->ds_pri = selp->dl_priority;
1024 	rw_exit(&dsp->ds_lock);
1025 
1026 	dlokack(q, mp, DL_UDQOS_REQ);
1027 	return (B_TRUE);
1028 failed:
1029 	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1030 	return (B_FALSE);
1031 }
1032 
1033 static boolean_t
1034 check_ip_above(queue_t *q)
1035 {
1036 	queue_t		*next_q;
1037 	boolean_t	ret = B_TRUE;
1038 
1039 	claimstr(q);
1040 	next_q = q->q_next;
1041 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
1042 		ret = B_FALSE;
1043 	releasestr(q);
1044 	return (ret);
1045 }
1046 
1047 /*
1048  * DL_CAPABILITY_REQ
1049  */
1050 /*ARGSUSED*/
1051 static boolean_t
1052 proto_capability_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1053 {
1054 	dl_capability_req_t *dlp = (dl_capability_req_t *)udlp;
1055 	dl_capability_sub_t *sp;
1056 	size_t		size, len;
1057 	offset_t	off, end;
1058 	t_uscalar_t	dl_err;
1059 	queue_t		*q = dsp->ds_wq;
1060 
1061 	rw_enter(&dsp->ds_lock, RW_WRITER);
1062 
1063 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1064 		dl_err = DL_BADPRIM;
1065 		goto failed;
1066 	}
1067 
1068 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1069 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1070 		dl_err = DL_OUTSTATE;
1071 		goto failed;
1072 	}
1073 
1074 	/*
1075 	 * This request is overloaded. If there are no requested capabilities
1076 	 * then we just want to acknowledge with all the capabilities we
1077 	 * support. Otherwise we enable the set of capabilities requested.
1078 	 */
1079 	if (dlp->dl_sub_length == 0) {
1080 		/* callee drops lock */
1081 		return (proto_capability_advertise(dsp, mp));
1082 	}
1083 
1084 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1085 		dl_err = DL_BADPRIM;
1086 		goto failed;
1087 	}
1088 
1089 	dlp->dl_primitive = DL_CAPABILITY_ACK;
1090 
1091 	off = dlp->dl_sub_offset;
1092 	len = dlp->dl_sub_length;
1093 
1094 	/*
1095 	 * Walk the list of capabilities to be enabled.
1096 	 */
1097 	for (end = off + len; off < end; ) {
1098 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1099 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1100 
1101 		if (off + size > end ||
1102 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1103 			dl_err = DL_BADPRIM;
1104 			goto failed;
1105 		}
1106 
1107 		switch (sp->dl_cap) {
1108 		/*
1109 		 * TCP/IP checksum offload to hardware.
1110 		 */
1111 		case DL_CAPAB_HCKSUM: {
1112 			dl_capab_hcksum_t *hcksump;
1113 			dl_capab_hcksum_t hcksum;
1114 
1115 			hcksump = (dl_capab_hcksum_t *)&sp[1];
1116 			/*
1117 			 * Copy for alignment.
1118 			 */
1119 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1120 			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1121 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1122 			break;
1123 		}
1124 
1125 		/*
1126 		 * Large segment offload. (LSO)
1127 		 */
1128 		case DL_CAPAB_LSO: {
1129 			dl_capab_lso_t *lsop;
1130 			dl_capab_lso_t lso;
1131 
1132 			lsop = (dl_capab_lso_t *)&sp[1];
1133 			/*
1134 			 * Copy for alignment.
1135 			 */
1136 			bcopy(lsop, &lso, sizeof (dl_capab_lso_t));
1137 			dlcapabsetqid(&(lso.lso_mid), dsp->ds_rq);
1138 			bcopy(&lso, lsop, sizeof (dl_capab_lso_t));
1139 			break;
1140 		}
1141 
1142 		/*
1143 		 * IP polling interface.
1144 		 */
1145 		case DL_CAPAB_POLL: {
1146 			dl_capab_dls_t *pollp;
1147 			dl_capab_dls_t	poll;
1148 
1149 			pollp = (dl_capab_dls_t *)&sp[1];
1150 			/*
1151 			 * Copy for alignment.
1152 			 */
1153 			bcopy(pollp, &poll, sizeof (dl_capab_dls_t));
1154 
1155 			switch (poll.dls_flags) {
1156 			default:
1157 				/*FALLTHRU*/
1158 			case POLL_DISABLE:
1159 				proto_poll_disable(dsp);
1160 				break;
1161 
1162 			case POLL_ENABLE:
1163 				ASSERT(!(dld_opt & DLD_OPT_NO_POLL));
1164 
1165 				/*
1166 				 * Make sure polling is disabled.
1167 				 */
1168 				proto_poll_disable(dsp);
1169 
1170 				/*
1171 				 * Note that only IP should enable POLL.
1172 				 */
1173 				if (check_ip_above(dsp->ds_rq) &&
1174 				    proto_poll_enable(dsp, &poll)) {
1175 					bzero(&poll, sizeof (dl_capab_dls_t));
1176 					poll.dls_flags = POLL_ENABLE;
1177 				} else {
1178 					bzero(&poll, sizeof (dl_capab_dls_t));
1179 					poll.dls_flags = POLL_DISABLE;
1180 				}
1181 				break;
1182 			}
1183 
1184 			dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq);
1185 			bcopy(&poll, pollp, sizeof (dl_capab_dls_t));
1186 			break;
1187 		}
1188 		case DL_CAPAB_SOFT_RING: {
1189 			dl_capab_dls_t *soft_ringp;
1190 			dl_capab_dls_t soft_ring;
1191 
1192 			soft_ringp = (dl_capab_dls_t *)&sp[1];
1193 			/*
1194 			 * Copy for alignment.
1195 			 */
1196 			bcopy(soft_ringp, &soft_ring,
1197 			    sizeof (dl_capab_dls_t));
1198 
1199 			switch (soft_ring.dls_flags) {
1200 			default:
1201 				/*FALLTHRU*/
1202 			case SOFT_RING_DISABLE:
1203 				proto_soft_ring_disable(dsp);
1204 				break;
1205 
1206 			case SOFT_RING_ENABLE:
1207 				ASSERT(!(dld_opt & DLD_OPT_NO_SOFTRING));
1208 				/*
1209 				 * Make sure soft_ring is disabled.
1210 				 */
1211 				proto_soft_ring_disable(dsp);
1212 
1213 				/*
1214 				 * Note that only IP can enable soft ring.
1215 				 */
1216 				if (check_ip_above(dsp->ds_rq) &&
1217 				    proto_soft_ring_enable(dsp, &soft_ring)) {
1218 					bzero(&soft_ring,
1219 					    sizeof (dl_capab_dls_t));
1220 					soft_ring.dls_flags = SOFT_RING_ENABLE;
1221 				} else {
1222 					bzero(&soft_ring,
1223 					    sizeof (dl_capab_dls_t));
1224 					soft_ring.dls_flags = SOFT_RING_DISABLE;
1225 				}
1226 				break;
1227 			}
1228 
1229 			dlcapabsetqid(&(soft_ring.dls_mid), dsp->ds_rq);
1230 			bcopy(&soft_ring, soft_ringp,
1231 			    sizeof (dl_capab_dls_t));
1232 			break;
1233 		}
1234 		default:
1235 			break;
1236 		}
1237 
1238 		off += size;
1239 	}
1240 	rw_exit(&dsp->ds_lock);
1241 	qreply(q, mp);
1242 	return (B_TRUE);
1243 failed:
1244 	rw_exit(&dsp->ds_lock);
1245 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1246 	return (B_FALSE);
1247 }
1248 
1249 /*
1250  * DL_NOTIFY_REQ
1251  */
1252 static boolean_t
1253 proto_notify_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1254 {
1255 	dl_notify_req_t	*dlp = (dl_notify_req_t *)udlp;
1256 	t_uscalar_t	dl_err;
1257 	queue_t		*q = dsp->ds_wq;
1258 	uint_t		note =
1259 	    DL_NOTE_PROMISC_ON_PHYS |
1260 	    DL_NOTE_PROMISC_OFF_PHYS |
1261 	    DL_NOTE_PHYS_ADDR |
1262 	    DL_NOTE_LINK_UP |
1263 	    DL_NOTE_LINK_DOWN |
1264 	    DL_NOTE_CAPAB_RENEG |
1265 	    DL_NOTE_SPEED;
1266 
1267 	rw_enter(&dsp->ds_lock, RW_WRITER);
1268 
1269 	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1270 		dl_err = DL_BADPRIM;
1271 		goto failed;
1272 	}
1273 
1274 	if (dsp->ds_dlstate == DL_UNATTACHED ||
1275 	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1276 		dl_err = DL_OUTSTATE;
1277 		goto failed;
1278 	}
1279 
1280 	note &= ~(mac_no_notification(dsp->ds_mh));
1281 
1282 	/*
1283 	 * Cache the notifications that are being enabled.
1284 	 */
1285 	dsp->ds_notifications = dlp->dl_notifications & note;
1286 	rw_exit(&dsp->ds_lock);
1287 	/*
1288 	 * The ACK carries all notifications regardless of which set is
1289 	 * being enabled.
1290 	 */
1291 	dlnotifyack(q, mp, note);
1292 
1293 	/*
1294 	 * Solicit DL_NOTIFY_IND messages for each enabled notification.
1295 	 */
1296 	rw_enter(&dsp->ds_lock, RW_READER);
1297 	if (dsp->ds_notifications != 0) {
1298 		rw_exit(&dsp->ds_lock);
1299 		dld_str_notify_ind(dsp);
1300 	} else {
1301 		rw_exit(&dsp->ds_lock);
1302 	}
1303 	return (B_TRUE);
1304 failed:
1305 	rw_exit(&dsp->ds_lock);
1306 	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1307 	return (B_FALSE);
1308 }
1309 
1310 /*
1311  * DL_UNITDATA_REQ
1312  */
1313 void
1314 dld_wput_proto_data(dld_str_t *dsp, mblk_t *mp)
1315 {
1316 	queue_t			*q = dsp->ds_wq;
1317 	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1318 	off_t			off;
1319 	size_t			len, size;
1320 	const uint8_t		*addr;
1321 	uint16_t		sap;
1322 	uint_t			addr_length;
1323 	mblk_t			*bp, *payload;
1324 	uint32_t		start, stuff, end, value, flags;
1325 	t_uscalar_t		dl_err;
1326 	uint_t			max_sdu;
1327 
1328 	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1329 		dl_err = DL_BADPRIM;
1330 		goto failed;
1331 	}
1332 
1333 	addr_length = dsp->ds_mip->mi_addr_length;
1334 
1335 	off = dlp->dl_dest_addr_offset;
1336 	len = dlp->dl_dest_addr_length;
1337 
1338 	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1339 		dl_err = DL_BADPRIM;
1340 		goto failed;
1341 	}
1342 
1343 	if (len != addr_length + sizeof (uint16_t)) {
1344 		dl_err = DL_BADADDR;
1345 		goto failed;
1346 	}
1347 
1348 	addr = mp->b_rptr + off;
1349 	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1350 
1351 	/*
1352 	 * Check the length of the packet and the block types.
1353 	 */
1354 	size = 0;
1355 	payload = mp->b_cont;
1356 	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1357 		if (DB_TYPE(bp) != M_DATA)
1358 			goto baddata;
1359 
1360 		size += MBLKL(bp);
1361 	}
1362 
1363 	mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1364 	if (size > max_sdu)
1365 		goto baddata;
1366 
1367 	/*
1368 	 * Build a packet header.
1369 	 */
1370 	if ((bp = dls_header(dsp->ds_dc, addr, sap, dlp->dl_priority.dl_max,
1371 	    &payload)) == NULL) {
1372 		dl_err = DL_BADADDR;
1373 		goto failed;
1374 	}
1375 
1376 	/*
1377 	 * We no longer need the M_PROTO header, so free it.
1378 	 */
1379 	freeb(mp);
1380 
1381 	/*
1382 	 * Transfer the checksum offload information if it is present.
1383 	 */
1384 	hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1385 	    &flags);
1386 	(void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1387 
1388 	/*
1389 	 * Link the payload onto the new header.
1390 	 */
1391 	ASSERT(bp->b_cont == NULL);
1392 	bp->b_cont = payload;
1393 	dld_tx_single(dsp, bp);
1394 	return;
1395 failed:
1396 	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1397 	return;
1398 
1399 baddata:
1400 	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1401 }
1402 
1403 /*
1404  * DL_PASSIVE_REQ
1405  */
1406 /* ARGSUSED */
1407 static boolean_t
1408 proto_passive_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1409 {
1410 	t_uscalar_t dl_err;
1411 
1412 	/*
1413 	 * READER lock is enough because ds_passivestate can only be changed
1414 	 * as the result of non-data message processing.
1415 	 */
1416 	rw_enter(&dsp->ds_lock, RW_READER);
1417 
1418 	/*
1419 	 * If we've already become active by issuing an active primitive,
1420 	 * then it's too late to try to become passive.
1421 	 */
1422 	if (dsp->ds_passivestate == DLD_ACTIVE) {
1423 		dl_err = DL_OUTSTATE;
1424 		goto failed;
1425 	}
1426 
1427 	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1428 		dl_err = DL_BADPRIM;
1429 		goto failed;
1430 	}
1431 
1432 	dsp->ds_passivestate = DLD_PASSIVE;
1433 	rw_exit(&dsp->ds_lock);
1434 	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1435 	return (B_TRUE);
1436 failed:
1437 	rw_exit(&dsp->ds_lock);
1438 	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1439 	return (B_FALSE);
1440 }
1441 
1442 /*
1443  * Catch-all handler.
1444  */
1445 static boolean_t
1446 proto_req(dld_str_t *dsp, union DL_primitives *dlp, mblk_t *mp)
1447 {
1448 	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1449 	return (B_FALSE);
1450 }
1451 
1452 static void
1453 proto_poll_disable(dld_str_t *dsp)
1454 {
1455 	mac_handle_t	mh;
1456 
1457 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1458 
1459 	if (!dsp->ds_polling)
1460 		return;
1461 
1462 	/*
1463 	 * It should be impossible to enable raw mode if polling is turned on.
1464 	 */
1465 	ASSERT(dsp->ds_mode != DLD_RAW);
1466 
1467 	/*
1468 	 * Reset the resource_add callback.
1469 	 */
1470 	mh = dls_mac(dsp->ds_dc);
1471 	mac_resource_set(mh, NULL, NULL);
1472 	mac_resources(mh);
1473 
1474 	/*
1475 	 * Set receive function back to default.
1476 	 */
1477 	dls_rx_set(dsp->ds_dc, (dsp->ds_mode == DLD_FASTPATH) ?
1478 	    dld_str_rx_fastpath : dld_str_rx_unitdata, dsp);
1479 
1480 	/*
1481 	 * Note that polling is disabled.
1482 	 */
1483 	dsp->ds_polling = B_FALSE;
1484 }
1485 
1486 static boolean_t
1487 proto_poll_enable(dld_str_t *dsp, dl_capab_dls_t *pollp)
1488 {
1489 	mac_handle_t	mh;
1490 
1491 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1492 	ASSERT(!dsp->ds_polling);
1493 
1494 	/*
1495 	 * We cannot enable polling if raw mode
1496 	 * has been enabled.
1497 	 */
1498 	if (dsp->ds_mode == DLD_RAW)
1499 		return (B_FALSE);
1500 
1501 	mh = dls_mac(dsp->ds_dc);
1502 
1503 	/*
1504 	 * Register resources.
1505 	 */
1506 	mac_resource_set(mh, (mac_resource_add_t)pollp->dls_ring_add,
1507 	    (void *)pollp->dls_rx_handle);
1508 
1509 	mac_resources(mh);
1510 
1511 	/*
1512 	 * Set the upstream receive function.
1513 	 */
1514 	dls_rx_set(dsp->ds_dc, (dls_rx_t)pollp->dls_rx,
1515 	    (void *)pollp->dls_rx_handle);
1516 
1517 	/*
1518 	 * Note that polling is enabled. This prevents further DLIOCHDRINFO
1519 	 * ioctls from overwriting the receive function pointer.
1520 	 */
1521 	dsp->ds_polling = B_TRUE;
1522 	return (B_TRUE);
1523 }
1524 
1525 static void
1526 proto_soft_ring_disable(dld_str_t *dsp)
1527 {
1528 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1529 
1530 	if (!dsp->ds_soft_ring)
1531 		return;
1532 
1533 	/*
1534 	 * It should be impossible to enable raw mode if soft_ring is turned on.
1535 	 */
1536 	ASSERT(dsp->ds_mode != DLD_RAW);
1537 	proto_change_soft_ring_fanout(dsp, SOFT_RING_NONE);
1538 	/*
1539 	 * Note that fanout is disabled.
1540 	 */
1541 	dsp->ds_soft_ring = B_FALSE;
1542 }
1543 
1544 static boolean_t
1545 proto_soft_ring_enable(dld_str_t *dsp, dl_capab_dls_t *soft_ringp)
1546 {
1547 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1548 	ASSERT(!dsp->ds_soft_ring);
1549 
1550 	/*
1551 	 * We cannot enable soft_ring if raw mode
1552 	 * has been enabled.
1553 	 */
1554 	if (dsp->ds_mode == DLD_RAW)
1555 		return (B_FALSE);
1556 
1557 	if (dls_soft_ring_enable(dsp->ds_dc, soft_ringp) == B_FALSE)
1558 		return (B_FALSE);
1559 
1560 	dsp->ds_soft_ring = B_TRUE;
1561 	return (B_TRUE);
1562 }
1563 
1564 static void
1565 proto_change_soft_ring_fanout(dld_str_t *dsp, int type)
1566 {
1567 	dls_channel_t	dc = dsp->ds_dc;
1568 
1569 	if (type == SOFT_RING_NONE) {
1570 		dls_rx_set(dc, (dsp->ds_mode == DLD_FASTPATH) ?
1571 		    dld_str_rx_fastpath : dld_str_rx_unitdata, dsp);
1572 	} else if (type != SOFT_RING_NONE) {
1573 		dls_rx_set(dc, (dls_rx_t)dls_soft_ring_fanout, dc);
1574 	}
1575 }
1576 
1577 /*
1578  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1579  */
1580 static boolean_t
1581 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1582 {
1583 	dl_capability_ack_t	*dlap;
1584 	dl_capability_sub_t	*dlsp;
1585 	size_t			subsize;
1586 	dl_capab_dls_t		poll;
1587 	dl_capab_dls_t		soft_ring;
1588 	dl_capab_hcksum_t	hcksum;
1589 	dl_capab_lso_t		lso;
1590 	dl_capab_zerocopy_t	zcopy;
1591 	uint8_t			*ptr;
1592 	queue_t			*q = dsp->ds_wq;
1593 	mblk_t			*mp1;
1594 	boolean_t		is_vlan = (dsp->ds_vid != VLAN_ID_NONE);
1595 	boolean_t		poll_capable = B_FALSE;
1596 	boolean_t		soft_ring_capable = B_FALSE;
1597 	boolean_t		hcksum_capable = B_FALSE;
1598 	boolean_t		zcopy_capable = B_FALSE;
1599 	boolean_t		lso_capable = B_FALSE;
1600 	mac_capab_lso_t		mac_lso;
1601 
1602 	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1603 
1604 	/*
1605 	 * Initially assume no capabilities.
1606 	 */
1607 	subsize = 0;
1608 
1609 	/*
1610 	 * Check if soft ring can be enabled on this interface. Note that we
1611 	 * do not enable softring on any legacy drivers, because doing that
1612 	 * would hurt the performance if the legacy driver has its own taskq
1613 	 * implementation. Further, most high-performance legacy drivers do
1614 	 * have their own taskq implementation.
1615 	 *
1616 	 * If advertising DL_CAPAB_SOFT_RING has not been explicitly disabled,
1617 	 * reserve space for that capability.
1618 	 */
1619 	if (!mac_is_legacy(dsp->ds_mh) && !(dld_opt & DLD_OPT_NO_SOFTRING)) {
1620 		soft_ring_capable = B_TRUE;
1621 		subsize += sizeof (dl_capability_sub_t) +
1622 		    sizeof (dl_capab_dls_t);
1623 	}
1624 
1625 	/*
1626 	 * Check if polling can be enabled on this interface.
1627 	 * If advertising DL_CAPAB_POLL has not been explicitly disabled
1628 	 * then reserve space for that capability.
1629 	 */
1630 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_POLL, NULL) &&
1631 	    !(dld_opt & DLD_OPT_NO_POLL) && !is_vlan) {
1632 		poll_capable = B_TRUE;
1633 		subsize += sizeof (dl_capability_sub_t) +
1634 		    sizeof (dl_capab_dls_t);
1635 	}
1636 
1637 	/*
1638 	 * Check if checksum offload is supported on this MAC.  Don't
1639 	 * advertise DL_CAPAB_HCKSUM if the underlying MAC is VLAN incapable,
1640 	 * since it might not be able to do the hardware checksum offload
1641 	 * with the correct offset.
1642 	 */
1643 	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1644 	if ((!is_vlan || (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_NATIVEVLAN,
1645 	    NULL))) && mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1646 	    &hcksum.hcksum_txflags)) {
1647 		if (hcksum.hcksum_txflags != 0) {
1648 			hcksum_capable = B_TRUE;
1649 			subsize += sizeof (dl_capability_sub_t) +
1650 			    sizeof (dl_capab_hcksum_t);
1651 		}
1652 	}
1653 
1654 	/*
1655 	 * Check if LSO is supported on this MAC, then reserve space for
1656 	 * the DL_CAPAB_LSO capability.
1657 	 */
1658 	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1659 		lso_capable = B_TRUE;
1660 		subsize += sizeof (dl_capability_sub_t) +
1661 		    sizeof (dl_capab_lso_t);
1662 	}
1663 
1664 	/*
1665 	 * Check if zerocopy is supported on this interface.
1666 	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1667 	 * then reserve space for that capability.
1668 	 */
1669 	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1670 	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1671 		zcopy_capable = B_TRUE;
1672 		subsize += sizeof (dl_capability_sub_t) +
1673 		    sizeof (dl_capab_zerocopy_t);
1674 	}
1675 
1676 	/*
1677 	 * If there are no capabilities to advertise or if we
1678 	 * can't allocate a response, send a DL_ERROR_ACK.
1679 	 */
1680 	if ((mp1 = reallocb(mp,
1681 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1682 		rw_exit(&dsp->ds_lock);
1683 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1684 		return (B_FALSE);
1685 	}
1686 
1687 	mp = mp1;
1688 	DB_TYPE(mp) = M_PROTO;
1689 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1690 	bzero(mp->b_rptr, MBLKL(mp));
1691 	dlap = (dl_capability_ack_t *)mp->b_rptr;
1692 	dlap->dl_primitive = DL_CAPABILITY_ACK;
1693 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1694 	dlap->dl_sub_length = subsize;
1695 	ptr = (uint8_t *)&dlap[1];
1696 
1697 	/*
1698 	 * IP polling interface.
1699 	 */
1700 	if (poll_capable) {
1701 		/*
1702 		 * Attempt to disable just in case this is a re-negotiation;
1703 		 * READER lock is enough because ds_polling can only be
1704 		 * changed as the result of non-data message processing.
1705 		 */
1706 		proto_poll_disable(dsp);
1707 
1708 		dlsp = (dl_capability_sub_t *)ptr;
1709 
1710 		dlsp->dl_cap = DL_CAPAB_POLL;
1711 		dlsp->dl_length = sizeof (dl_capab_dls_t);
1712 		ptr += sizeof (dl_capability_sub_t);
1713 
1714 		bzero(&poll, sizeof (dl_capab_dls_t));
1715 		poll.dls_version = POLL_VERSION_1;
1716 		poll.dls_flags = POLL_CAPABLE;
1717 		poll.dls_tx_handle = (uintptr_t)dsp;
1718 		poll.dls_tx = (uintptr_t)str_mdata_fastpath_put;
1719 		dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq);
1720 		bcopy(&poll, ptr, sizeof (dl_capab_dls_t));
1721 		ptr += sizeof (dl_capab_dls_t);
1722 	}
1723 
1724 
1725 	if (soft_ring_capable) {
1726 		dlsp = (dl_capability_sub_t *)ptr;
1727 
1728 		dlsp->dl_cap = DL_CAPAB_SOFT_RING;
1729 		dlsp->dl_length = sizeof (dl_capab_dls_t);
1730 		ptr += sizeof (dl_capability_sub_t);
1731 
1732 		bzero(&soft_ring, sizeof (dl_capab_dls_t));
1733 		soft_ring.dls_version = SOFT_RING_VERSION_1;
1734 		soft_ring.dls_flags = SOFT_RING_CAPABLE;
1735 		soft_ring.dls_tx_handle = (uintptr_t)dsp;
1736 		soft_ring.dls_tx = (uintptr_t)str_mdata_fastpath_put;
1737 		soft_ring.dls_ring_change_status =
1738 		    (uintptr_t)proto_change_soft_ring_fanout;
1739 		soft_ring.dls_ring_bind = (uintptr_t)soft_ring_bind;
1740 		soft_ring.dls_ring_unbind = (uintptr_t)soft_ring_unbind;
1741 
1742 		dlcapabsetqid(&(soft_ring.dls_mid), dsp->ds_rq);
1743 		bcopy(&soft_ring, ptr, sizeof (dl_capab_dls_t));
1744 		ptr += sizeof (dl_capab_dls_t);
1745 	}
1746 
1747 	/*
1748 	 * TCP/IP checksum offload.
1749 	 */
1750 	if (hcksum_capable) {
1751 		dlsp = (dl_capability_sub_t *)ptr;
1752 
1753 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1754 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1755 		ptr += sizeof (dl_capability_sub_t);
1756 
1757 		hcksum.hcksum_version = HCKSUM_VERSION_1;
1758 		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1759 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1760 		ptr += sizeof (dl_capab_hcksum_t);
1761 	}
1762 
1763 	/*
1764 	 * Large segment offload. (LSO)
1765 	 */
1766 	if (lso_capable) {
1767 		dlsp = (dl_capability_sub_t *)ptr;
1768 
1769 		dlsp->dl_cap = DL_CAPAB_LSO;
1770 		dlsp->dl_length = sizeof (dl_capab_lso_t);
1771 		ptr += sizeof (dl_capability_sub_t);
1772 
1773 		lso.lso_version = LSO_VERSION_1;
1774 		lso.lso_flags = mac_lso.lso_flags;
1775 		lso.lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1776 
1777 		/* Simply enable LSO with DLD */
1778 		dsp->ds_lso = B_TRUE;
1779 		dsp->ds_lso_max = lso.lso_max;
1780 
1781 		dlcapabsetqid(&(lso.lso_mid), dsp->ds_rq);
1782 		bcopy(&lso, ptr, sizeof (dl_capab_lso_t));
1783 		ptr += sizeof (dl_capab_lso_t);
1784 	} else {
1785 		dsp->ds_lso = B_FALSE;
1786 		dsp->ds_lso_max = 0;
1787 	}
1788 
1789 	/*
1790 	 * Zero copy
1791 	 */
1792 	if (zcopy_capable) {
1793 		dlsp = (dl_capability_sub_t *)ptr;
1794 
1795 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1796 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1797 		ptr += sizeof (dl_capability_sub_t);
1798 
1799 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1800 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1801 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1802 
1803 		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1804 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1805 		ptr += sizeof (dl_capab_zerocopy_t);
1806 	}
1807 
1808 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1809 
1810 	rw_exit(&dsp->ds_lock);
1811 	qreply(q, mp);
1812 	return (B_TRUE);
1813 }
1814 
1815 /*
1816  * Disable any enabled capabilities.
1817  */
1818 void
1819 dld_capabilities_disable(dld_str_t *dsp)
1820 {
1821 	if (dsp->ds_polling)
1822 		proto_poll_disable(dsp);
1823 
1824 	if (dsp->ds_soft_ring)
1825 		proto_soft_ring_disable(dsp);
1826 }
1827