1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Softmac data-path switching:
28  *
29  * - Fast-path model
30  *
31  * When the softmac fast-path is used, a dedicated lower-stream
32  * will be opened over the legacy device for each IP/ARP (upper-)stream
33  * over the softMAC, and all DLPI messages (including control messages
34  * and data messages) will be exchanged between the upper-stream and
35  * the corresponding lower-stream directly. Therefore, the data
36  * demultiplexing, filtering and classification processing will be done
37  * by the lower-stream, and the GLDv3 DLS/MAC layer processing will be
38  * no longer needed.
39  *
40  * - Slow-path model
41  *
42  * Some GLDv3 features requires the GLDv3 DLS/MAC layer processing to
43  * not be bypassed to assure its function correctness. For example,
44  * softmac fast-path must be disabled to support GLDv3 VNIC functionality.
45  * In this case, a shared lower-stream will be opened over the legacy
46  * device, which is responsible for implementing the GLDv3 callbacks
47  * and passing RAW data messages between the legacy devices and the GLDv3
48  * framework.
49  *
50  * By default, the softmac fast-path mode will be used to assure the
51  * performance; MAC clients will be able to request to disable the softmac
52  * fast-path mode to support certain features, and if that succeeds,
53  * the system will fallback to the slow-path softmac data-path model.
54  *
55  *
56  * The details of the softmac data fast-path model is stated as below
57  *
58  * 1. When a stream is opened on a softMAC, the softmac module will takes
59  *    over the DLPI processing on this stream;
60  *
61  * 2. For IP/ARP streams over a softMAC, softmac data fast-path will be
62  *    used by default, unless fast-path is disabled by any MAC client
63  *    explicitly. The softmac module first identifies an IP/ARP stream
64  *    by seeing whether there is a SIOCSLIFNAME ioctl sent from upstream,
65  *    if there is one, this stream is either an IP or an ARP stream
66  *    and will use fast-path potentially;
67  *
68  * 3. When the softmac fast-path is used, an dedicated lower-stream will
69  *    be setup for each IP/ARP stream (1-1 mapping). From that point on,
70  *    all control and data messages will be exchanged between the IP/ARP
71  *    upper-stream and the legacy device through this dedicated
72  *    lower-stream. As a result, the DLS/MAC layer processing in GLDv3
73  *    will be skipped, and this greatly improves the performance;
74  *
75  * 4. When the softmac data fast-path is disabled by a MAC client (e.g.,
76  *    by a VNIC), all the IP/ARP upper streams will try to switch from
77  *    the fast-path to the slow-path. The dedicated lower-stream will be
78  *    destroyed, and all the control and data-messages will go through the
79  *    existing GLDv3 code path and (in the end) the shared lower-stream;
80  *
81  * 5. On the other hand, when the last MAC client cancels its fast-path
82  *    disable request, all the IP/ARP streams will try to switch back to
83  *    the fast-path mode;
84  *
85  * Step 5 and 6 both rely on the data-path mode switching process
86  * described below:
87  *
88  * 1) To switch the softmac data-path mode (between fast-path and slow-path),
89  *    softmac will first send a DL_NOTE_REPLUMB DL_NOTIFY_IND message
90  *    upstream over each IP/ARP streams that needs data-path mode switching;
91  *
92  * 2) When IP receives this DL_NOTE_REPLUMB message, it will bring down
93  *    all the IP interfaces on the corresponding ill (IP Lower level
94  *    structure), and bring up those interfaces over again; this will in
95  *    turn cause the ARP to "replumb" the interface.
96  *
97  *    During the replumb process, both IP and ARP will send downstream the
98  *    necessary DL_DISABMULTI_REQ and DL_UNBIND_REQ messages and cleanup
99  *    the old state of the underlying softMAC, following with the necessary
100  *    DL_BIND_REQ and DL_ENABMULTI_REQ messages to setup the new state.
101  *    Between the cleanup and re-setup process, IP/ARP will also send down
102  *    a DL_NOTE_REPLUMB_DONE DL_NOTIFY_CONF messages to the softMAC to
103  *    indicate the *switching point*;
104  *
105  * 3) When softmac receives the DL_NOTE_REPLUMB_DONE message, it either
106  *    creates or destroys the dedicated lower-stream (depending on which
107  *    data-path mode the softMAC switches to), and change the softmac
108  *    data-path mode. From then on, softmac will process all the succeeding
109  *    control messages (including the DL_BIND_REQ and DL_ENABMULTI_REQ
110  *    messages) and data messages based on new data-path mode.
111  */
112 
113 #include <sys/types.h>
114 #include <sys/disp.h>
115 #include <sys/callb.h>
116 #include <sys/sysmacros.h>
117 #include <sys/file.h>
118 #include <sys/vlan.h>
119 #include <sys/dld.h>
120 #include <sys/sockio.h>
121 #include <sys/softmac_impl.h>
122 #include <net/if.h>
123 
124 static kmutex_t		softmac_taskq_lock;
125 static kcondvar_t	softmac_taskq_cv;
126 static list_t		softmac_taskq_list;	/* List of softmac_upper_t */
127 boolean_t		softmac_taskq_quit;
128 boolean_t		softmac_taskq_done;
129 
130 static void		softmac_taskq_dispatch();
131 static int		softmac_fastpath_setup(softmac_upper_t *);
132 static mac_tx_cookie_t	softmac_fastpath_wput_data(softmac_upper_t *, mblk_t *,
133 			    uintptr_t, uint16_t);
134 static void		softmac_datapath_switch_done(softmac_upper_t *);
135 
136 void
137 softmac_fp_init()
138 {
139 	mutex_init(&softmac_taskq_lock, NULL, MUTEX_DRIVER, NULL);
140 	cv_init(&softmac_taskq_cv, NULL, CV_DRIVER, NULL);
141 
142 	softmac_taskq_quit = B_FALSE;
143 	softmac_taskq_done = B_FALSE;
144 	list_create(&softmac_taskq_list, sizeof (softmac_upper_t),
145 	    offsetof(softmac_upper_t, su_taskq_list_node));
146 	(void) thread_create(NULL, 0, softmac_taskq_dispatch, NULL, 0,
147 	    &p0, TS_RUN, minclsyspri);
148 }
149 
150 void
151 softmac_fp_fini()
152 {
153 	/*
154 	 * Request the softmac_taskq thread to quit and wait for it to be done.
155 	 */
156 	mutex_enter(&softmac_taskq_lock);
157 	softmac_taskq_quit = B_TRUE;
158 	cv_signal(&softmac_taskq_cv);
159 	while (!softmac_taskq_done)
160 		cv_wait(&softmac_taskq_cv, &softmac_taskq_lock);
161 	mutex_exit(&softmac_taskq_lock);
162 	list_destroy(&softmac_taskq_list);
163 
164 	mutex_destroy(&softmac_taskq_lock);
165 	cv_destroy(&softmac_taskq_cv);
166 }
167 
168 static boolean_t
169 check_ip_above(queue_t *q)
170 {
171 	queue_t		*next_q;
172 	boolean_t	ret = B_TRUE;
173 
174 	claimstr(q);
175 	next_q = q->q_next;
176 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
177 		ret = B_FALSE;
178 	releasestr(q);
179 	return (ret);
180 }
181 
182 /* ARGSUSED */
183 static int
184 softmac_capab_perim(softmac_upper_t *sup, void *data, uint_t flags)
185 {
186 	switch (flags) {
187 	case DLD_ENABLE:
188 		mutex_enter(&sup->su_mutex);
189 		break;
190 	case DLD_DISABLE:
191 		mutex_exit(&sup->su_mutex);
192 		break;
193 	case DLD_QUERY:
194 		return (MUTEX_HELD(&sup->su_mutex));
195 	}
196 	return (0);
197 }
198 
199 /* ARGSUSED */
200 static mac_tx_notify_handle_t
201 softmac_client_tx_notify(void *txcb, mac_tx_notify_t func, void *arg)
202 {
203 	return (NULL);
204 }
205 
206 static int
207 softmac_capab_direct(softmac_upper_t *sup, void *data, uint_t flags)
208 {
209 	dld_capab_direct_t	*direct = data;
210 	softmac_lower_t		*slp = sup->su_slp;
211 
212 	ASSERT(MUTEX_HELD(&sup->su_mutex));
213 
214 	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
215 
216 	switch (flags) {
217 	case DLD_ENABLE:
218 		if (sup->su_direct)
219 			return (0);
220 
221 		sup->su_direct_rxinfo.slr_rx = (softmac_rx_t)direct->di_rx_cf;
222 		sup->su_direct_rxinfo.slr_arg = direct->di_rx_ch;
223 		slp->sl_rxinfo = &sup->su_direct_rxinfo;
224 		direct->di_tx_df = (uintptr_t)softmac_fastpath_wput_data;
225 		direct->di_tx_dh = sup;
226 
227 		/*
228 		 * We relying on the STREAM flow-control to backenable
229 		 * the IP stream. Therefore, no notify callback needs to
230 		 * be registered. But IP requires this to be a valid function
231 		 * pointer.
232 		 */
233 		direct->di_tx_cb_df = (uintptr_t)softmac_client_tx_notify;
234 		direct->di_tx_cb_dh = NULL;
235 		sup->su_direct = B_TRUE;
236 		return (0);
237 
238 	case DLD_DISABLE:
239 		if (!sup->su_direct)
240 			return (0);
241 
242 		slp->sl_rxinfo = &sup->su_rxinfo;
243 		sup->su_direct = B_FALSE;
244 		return (0);
245 	}
246 	return (ENOTSUP);
247 }
248 
249 static int
250 softmac_dld_capab(softmac_upper_t *sup, uint_t type, void *data, uint_t flags)
251 {
252 	int	err;
253 
254 	/*
255 	 * Don't enable direct callback capabilities unless the caller is
256 	 * the IP client. When a module is inserted in a stream (_I_INSERT)
257 	 * the stack initiates capability disable, but due to races, the
258 	 * module insertion may complete before the capability disable
259 	 * completes. So we limit the check to DLD_ENABLE case.
260 	 */
261 	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
262 	    !check_ip_above(sup->su_rq)) {
263 		return (ENOTSUP);
264 	}
265 
266 	switch (type) {
267 	case DLD_CAPAB_DIRECT:
268 		err = softmac_capab_direct(sup, data, flags);
269 		break;
270 
271 	case DLD_CAPAB_PERIM:
272 		err = softmac_capab_perim(sup, data, flags);
273 		break;
274 
275 	default:
276 		err = ENOTSUP;
277 		break;
278 	}
279 	return (err);
280 }
281 
282 static void
283 softmac_capability_advertise(softmac_upper_t *sup, mblk_t *mp)
284 {
285 	dl_capability_ack_t	*dlap;
286 	dl_capability_sub_t	*dlsp;
287 	t_uscalar_t		subsize;
288 	uint8_t			*ptr;
289 	queue_t			*q = sup->su_wq;
290 	mblk_t			*mp1;
291 	softmac_t		*softmac = sup->su_softmac;
292 	boolean_t		dld_capable = B_FALSE;
293 	boolean_t		hcksum_capable = B_FALSE;
294 	boolean_t		zcopy_capable = B_FALSE;
295 	boolean_t		mdt_capable = B_FALSE;
296 
297 	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
298 
299 	/*
300 	 * Initially assume no capabilities.
301 	 */
302 	subsize = 0;
303 
304 	/*
305 	 * Direct capability negotiation interface between IP and softmac
306 	 */
307 	if (check_ip_above(sup->su_rq)) {
308 		dld_capable = B_TRUE;
309 		subsize += sizeof (dl_capability_sub_t) +
310 		    sizeof (dl_capab_dld_t);
311 	}
312 
313 	/*
314 	 * Check if checksum offload is supported on this MAC.
315 	 */
316 	if (softmac->smac_capab_flags & MAC_CAPAB_HCKSUM) {
317 		hcksum_capable = B_TRUE;
318 		subsize += sizeof (dl_capability_sub_t) +
319 		    sizeof (dl_capab_hcksum_t);
320 	}
321 
322 	/*
323 	 * Check if zerocopy is supported on this interface.
324 	 */
325 	if (!(softmac->smac_capab_flags & MAC_CAPAB_NO_ZCOPY)) {
326 		zcopy_capable = B_TRUE;
327 		subsize += sizeof (dl_capability_sub_t) +
328 		    sizeof (dl_capab_zerocopy_t);
329 	}
330 
331 	if (softmac->smac_mdt) {
332 		mdt_capable = B_TRUE;
333 		subsize += sizeof (dl_capability_sub_t) +
334 		    sizeof (dl_capab_mdt_t);
335 	}
336 
337 	/*
338 	 * If there are no capabilities to advertise or if we
339 	 * can't allocate a response, send a DL_ERROR_ACK.
340 	 */
341 	if ((subsize == 0) || (mp1 = reallocb(mp,
342 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
343 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
344 		return;
345 	}
346 
347 	mp = mp1;
348 	DB_TYPE(mp) = M_PROTO;
349 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
350 	bzero(mp->b_rptr, MBLKL(mp));
351 	dlap = (dl_capability_ack_t *)mp->b_rptr;
352 	dlap->dl_primitive = DL_CAPABILITY_ACK;
353 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
354 	dlap->dl_sub_length = subsize;
355 	ptr = (uint8_t *)&dlap[1];
356 
357 	/*
358 	 * IP polling interface.
359 	 */
360 	if (dld_capable) {
361 		dl_capab_dld_t		dld;
362 
363 		dlsp = (dl_capability_sub_t *)ptr;
364 		dlsp->dl_cap = DL_CAPAB_DLD;
365 		dlsp->dl_length = sizeof (dl_capab_dld_t);
366 		ptr += sizeof (dl_capability_sub_t);
367 
368 		bzero(&dld, sizeof (dl_capab_dld_t));
369 		dld.dld_version = DLD_CURRENT_VERSION;
370 		dld.dld_capab = (uintptr_t)softmac_dld_capab;
371 		dld.dld_capab_handle = (uintptr_t)sup;
372 
373 		dlcapabsetqid(&(dld.dld_mid), sup->su_rq);
374 		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
375 		ptr += sizeof (dl_capab_dld_t);
376 	}
377 
378 	/*
379 	 * TCP/IP checksum offload.
380 	 */
381 	if (hcksum_capable) {
382 		dl_capab_hcksum_t	hcksum;
383 
384 		dlsp = (dl_capability_sub_t *)ptr;
385 
386 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
387 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
388 		ptr += sizeof (dl_capability_sub_t);
389 
390 		bzero(&hcksum, sizeof (dl_capab_hcksum_t));
391 		hcksum.hcksum_version = HCKSUM_VERSION_1;
392 		hcksum.hcksum_txflags = softmac->smac_hcksum_txflags;
393 		dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq);
394 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
395 		ptr += sizeof (dl_capab_hcksum_t);
396 	}
397 
398 	/*
399 	 * Zero copy
400 	 */
401 	if (zcopy_capable) {
402 		dl_capab_zerocopy_t	zcopy;
403 
404 		dlsp = (dl_capability_sub_t *)ptr;
405 
406 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
407 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
408 		ptr += sizeof (dl_capability_sub_t);
409 
410 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
411 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
412 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
413 		dlcapabsetqid(&(zcopy.zerocopy_mid), sup->su_rq);
414 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
415 		ptr += sizeof (dl_capab_zerocopy_t);
416 	}
417 
418 	/*
419 	 * MDT
420 	 */
421 	if (mdt_capable) {
422 		dl_capab_mdt_t mdt;
423 
424 		dlsp = (dl_capability_sub_t *)ptr;
425 
426 		dlsp->dl_cap = DL_CAPAB_MDT;
427 		dlsp->dl_length = sizeof (dl_capab_mdt_t);
428 		ptr += sizeof (dl_capability_sub_t);
429 
430 		bzero(&mdt, sizeof (dl_capab_mdt_t));
431 		mdt.mdt_version = MDT_VERSION_2;
432 		mdt.mdt_flags = DL_CAPAB_MDT_ENABLE;
433 		mdt.mdt_hdr_head = softmac->smac_mdt_capab.mdt_hdr_head;
434 		mdt.mdt_hdr_tail = softmac->smac_mdt_capab.mdt_hdr_tail;
435 		mdt.mdt_max_pld = softmac->smac_mdt_capab.mdt_max_pld;
436 		mdt.mdt_span_limit = softmac->smac_mdt_capab.mdt_span_limit;
437 		dlcapabsetqid(&(mdt.mdt_mid), sup->su_rq);
438 		bcopy(&mdt, ptr, sizeof (dl_capab_mdt_t));
439 		ptr += sizeof (dl_capab_mdt_t);
440 	}
441 
442 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
443 	qreply(q, mp);
444 }
445 
446 static void
447 softmac_capability_req(softmac_upper_t *sup, mblk_t *mp)
448 {
449 	dl_capability_req_t	*dlp = (dl_capability_req_t *)mp->b_rptr;
450 	dl_capability_sub_t	*sp;
451 	size_t			size, len;
452 	offset_t		off, end;
453 	t_uscalar_t		dl_err;
454 	queue_t			*q = sup->su_wq;
455 
456 	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
457 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
458 		dl_err = DL_BADPRIM;
459 		goto failed;
460 	}
461 
462 	if (!sup->su_bound) {
463 		dl_err = DL_OUTSTATE;
464 		goto failed;
465 	}
466 
467 	/*
468 	 * This request is overloaded. If there are no requested capabilities
469 	 * then we just want to acknowledge with all the capabilities we
470 	 * support. Otherwise we enable the set of capabilities requested.
471 	 */
472 	if (dlp->dl_sub_length == 0) {
473 		softmac_capability_advertise(sup, mp);
474 		return;
475 	}
476 
477 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
478 		dl_err = DL_BADPRIM;
479 		goto failed;
480 	}
481 
482 	dlp->dl_primitive = DL_CAPABILITY_ACK;
483 
484 	off = dlp->dl_sub_offset;
485 	len = dlp->dl_sub_length;
486 
487 	/*
488 	 * Walk the list of capabilities to be enabled.
489 	 */
490 	for (end = off + len; off < end; ) {
491 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
492 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
493 
494 		if (off + size > end ||
495 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
496 			dl_err = DL_BADPRIM;
497 			goto failed;
498 		}
499 
500 		switch (sp->dl_cap) {
501 		/*
502 		 * TCP/IP checksum offload to hardware.
503 		 */
504 		case DL_CAPAB_HCKSUM: {
505 			dl_capab_hcksum_t *hcksump;
506 			dl_capab_hcksum_t hcksum;
507 
508 			hcksump = (dl_capab_hcksum_t *)&sp[1];
509 			/*
510 			 * Copy for alignment.
511 			 */
512 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
513 			dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq);
514 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
515 			break;
516 		}
517 
518 		default:
519 			break;
520 		}
521 
522 		off += size;
523 	}
524 	qreply(q, mp);
525 	return;
526 failed:
527 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
528 }
529 
530 static void
531 softmac_bind_req(softmac_upper_t *sup, mblk_t *mp)
532 {
533 	softmac_lower_t	*slp = sup->su_slp;
534 	softmac_t	*softmac = sup->su_softmac;
535 	mblk_t		*ackmp, *mp1;
536 	int		err;
537 
538 	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
539 		freemsg(mp);
540 		return;
541 	}
542 
543 	/*
544 	 * Allocate ackmp incase the underlying driver does not ack timely.
545 	 */
546 	if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) {
547 		dlerrorack(sup->su_wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM);
548 		return;
549 	}
550 
551 	err = softmac_output(slp, mp, DL_BIND_REQ, DL_BIND_ACK, &ackmp);
552 	if (ackmp != NULL) {
553 		freemsg(mp1);
554 	} else {
555 		/*
556 		 * The driver does not ack timely.
557 		 */
558 		ASSERT(err == ENOMSG);
559 		ackmp = mp1;
560 	}
561 	if (err != 0)
562 		goto failed;
563 
564 	/*
565 	 * Enable capabilities the underlying driver claims to support.
566 	 */
567 	if ((err = softmac_capab_enable(slp)) != 0)
568 		goto failed;
569 
570 	/*
571 	 * Check whether this softmac is already marked as exclusively used,
572 	 * e.g., an aggregation is created over it. Fail the BIND_REQ if so.
573 	 */
574 	mutex_enter(&softmac->smac_active_mutex);
575 	if (softmac->smac_active) {
576 		mutex_exit(&softmac->smac_active_mutex);
577 		err = EBUSY;
578 		goto failed;
579 	}
580 	softmac->smac_nactive++;
581 	sup->su_active = B_TRUE;
582 	mutex_exit(&softmac->smac_active_mutex);
583 	sup->su_bound = B_TRUE;
584 
585 	qreply(sup->su_wq, ackmp);
586 	return;
587 failed:
588 	if (err != 0) {
589 		dlerrorack(sup->su_wq, ackmp, DL_BIND_REQ, DL_SYSERR, err);
590 		return;
591 	}
592 }
593 
594 static void
595 softmac_unbind_req(softmac_upper_t *sup, mblk_t *mp)
596 {
597 	softmac_lower_t	*slp = sup->su_slp;
598 	softmac_t	*softmac = sup->su_softmac;
599 	mblk_t		*ackmp, *mp1;
600 	int		err;
601 
602 	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
603 		freemsg(mp);
604 		return;
605 	}
606 
607 	if (!sup->su_bound) {
608 		dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
609 		return;
610 	}
611 
612 	/*
613 	 * Allocate ackmp incase the underlying driver does not ack timely.
614 	 */
615 	if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) {
616 		dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM);
617 		return;
618 	}
619 
620 	err = softmac_output(slp, mp, DL_UNBIND_REQ, DL_OK_ACK, &ackmp);
621 	if (ackmp != NULL) {
622 		freemsg(mp1);
623 	} else {
624 		/*
625 		 * The driver does not ack timely.
626 		 */
627 		ASSERT(err == ENOMSG);
628 		ackmp = mp1;
629 	}
630 	if (err != 0) {
631 		dlerrorack(sup->su_wq, ackmp, DL_UNBIND_REQ, DL_SYSERR, err);
632 		return;
633 	}
634 
635 	sup->su_bound = B_FALSE;
636 
637 	mutex_enter(&softmac->smac_active_mutex);
638 	if (sup->su_active) {
639 		ASSERT(!softmac->smac_active);
640 		softmac->smac_nactive--;
641 		sup->su_active = B_FALSE;
642 	}
643 	mutex_exit(&softmac->smac_active_mutex);
644 
645 done:
646 	qreply(sup->su_wq, ackmp);
647 }
648 
649 /*
650  * Process the non-data mblk.
651  */
652 static void
653 softmac_wput_single_nondata(softmac_upper_t *sup, mblk_t *mp)
654 {
655 	softmac_t *softmac = sup->su_softmac;
656 	softmac_lower_t	*slp = sup->su_slp;
657 	unsigned char	dbtype;
658 	t_uscalar_t	prim;
659 
660 	dbtype = DB_TYPE(mp);
661 	switch (dbtype) {
662 	case M_IOCTL:
663 	case M_CTL: {
664 		uint32_t	expected_mode;
665 
666 		if (((struct iocblk *)(mp->b_rptr))->ioc_cmd != SIOCSLIFNAME)
667 			break;
668 
669 		/*
670 		 * Nak the M_IOCTL based on the STREAMS specification.
671 		 */
672 		if (dbtype == M_IOCTL)
673 			miocnak(sup->su_wq, mp, 0, EINVAL);
674 
675 		/*
676 		 * This stream is either IP or ARP. See whether
677 		 * we need to setup a dedicated-lower-stream for it.
678 		 */
679 		mutex_enter(&softmac->smac_fp_mutex);
680 
681 		expected_mode = DATAPATH_MODE(softmac);
682 		if (expected_mode == SOFTMAC_SLOWPATH)
683 			sup->su_mode = SOFTMAC_SLOWPATH;
684 		list_insert_head(&softmac->smac_sup_list, sup);
685 		mutex_exit(&softmac->smac_fp_mutex);
686 
687 		/*
688 		 * Setup the fast-path dedicated lower stream if fast-path
689 		 * is expected. Note that no lock is held here, and if
690 		 * smac_expected_mode is changed from SOFTMAC_FASTPATH to
691 		 * SOFTMAC_SLOWPATH, the DL_NOTE_REPLUMB message used for
692 		 * data-path switching would already be queued and will
693 		 * be processed by softmac_wput_single_nondata() later.
694 		 */
695 		if (expected_mode == SOFTMAC_FASTPATH)
696 			(void) softmac_fastpath_setup(sup);
697 		return;
698 	}
699 	case M_PROTO:
700 	case M_PCPROTO:
701 		if (MBLKL(mp) < sizeof (t_uscalar_t)) {
702 			freemsg(mp);
703 			return;
704 		}
705 		prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
706 		switch (prim) {
707 		case DL_NOTIFY_IND:
708 			if (MBLKL(mp) < sizeof (dl_notify_ind_t) ||
709 			    ((dl_notify_ind_t *)mp->b_rptr)->dl_notification !=
710 			    DL_NOTE_REPLUMB) {
711 				freemsg(mp);
712 				return;
713 			}
714 			/*
715 			 * This DL_NOTE_REPLUMB message is initiated
716 			 * and queued by the softmac itself, when the
717 			 * sup is trying to switching its datapath mode
718 			 * between SOFTMAC_SLOWPATH and SOFTMAC_FASTPATH.
719 			 * Send this message upstream.
720 			 */
721 			qreply(sup->su_wq, mp);
722 			return;
723 		case DL_NOTIFY_CONF:
724 			if (MBLKL(mp) < sizeof (dl_notify_conf_t) ||
725 			    ((dl_notify_conf_t *)mp->b_rptr)->dl_notification !=
726 			    DL_NOTE_REPLUMB_DONE) {
727 				freemsg(mp);
728 				return;
729 			}
730 			/*
731 			 * This is an indication from IP/ARP that the
732 			 * fastpath->slowpath switch is done.
733 			 */
734 			freemsg(mp);
735 			softmac_datapath_switch_done(sup);
736 			return;
737 		}
738 		break;
739 	}
740 
741 	/*
742 	 * No need to hold lock to check su_mode, since su_mode updating only
743 	 * operation is is serialized by softmac_wput_nondata_task().
744 	 */
745 	if (sup->su_mode != SOFTMAC_FASTPATH) {
746 		dld_wput(sup->su_wq, mp);
747 		return;
748 	}
749 
750 	/*
751 	 * Fastpath non-data message processing. Most of non-data messages
752 	 * can be directly passed down to the dedicated-lower-stream, aside
753 	 * from the following M_PROTO/M_PCPROTO messages.
754 	 */
755 	switch (dbtype) {
756 	case M_PROTO:
757 	case M_PCPROTO:
758 		switch (prim) {
759 		case DL_BIND_REQ:
760 			softmac_bind_req(sup, mp);
761 			break;
762 		case DL_UNBIND_REQ:
763 			softmac_unbind_req(sup, mp);
764 			break;
765 		case DL_CAPABILITY_REQ:
766 			softmac_capability_req(sup, mp);
767 			break;
768 		default:
769 			putnext(slp->sl_wq, mp);
770 			break;
771 		}
772 		break;
773 	default:
774 		putnext(slp->sl_wq, mp);
775 		break;
776 	}
777 }
778 
779 /*
780  * The worker thread which processes non-data messages. Note we only process
781  * one message at one time in order to be able to "flush" the queued message
782  * and serialize the processing.
783  */
784 static void
785 softmac_wput_nondata_task(void *arg)
786 {
787 	softmac_upper_t	*sup = arg;
788 	mblk_t		*mp;
789 
790 	mutex_enter(&sup->su_disp_mutex);
791 
792 	while (sup->su_pending_head != NULL) {
793 		if (sup->su_closing)
794 			break;
795 
796 		SOFTMAC_DQ_PENDING(sup, &mp);
797 		mutex_exit(&sup->su_disp_mutex);
798 		softmac_wput_single_nondata(sup, mp);
799 		mutex_enter(&sup->su_disp_mutex);
800 	}
801 
802 	/*
803 	 * If the stream is closing, flush all queued messages and inform
804 	 * the stream to be closed.
805 	 */
806 	freemsgchain(sup->su_pending_head);
807 	sup->su_pending_head = sup->su_pending_tail = NULL;
808 	sup->su_dlpi_pending = B_FALSE;
809 	cv_signal(&sup->su_disp_cv);
810 	mutex_exit(&sup->su_disp_mutex);
811 }
812 
813 /*
814  * Kernel thread to handle taskq dispatch failures in softmac_wput_nondata().
815  * This thread is started when the softmac module is first loaded.
816  */
817 static void
818 softmac_taskq_dispatch(void)
819 {
820 	callb_cpr_t	cprinfo;
821 	softmac_upper_t	*sup;
822 
823 	CALLB_CPR_INIT(&cprinfo, &softmac_taskq_lock, callb_generic_cpr,
824 	    "softmac_taskq_dispatch");
825 	mutex_enter(&softmac_taskq_lock);
826 
827 	while (!softmac_taskq_quit) {
828 		sup = list_head(&softmac_taskq_list);
829 		while (sup != NULL) {
830 			list_remove(&softmac_taskq_list, sup);
831 			sup->su_taskq_scheduled = B_FALSE;
832 			mutex_exit(&softmac_taskq_lock);
833 			VERIFY(taskq_dispatch(system_taskq,
834 			    softmac_wput_nondata_task, sup, TQ_SLEEP) != NULL);
835 			mutex_enter(&softmac_taskq_lock);
836 			sup = list_head(&softmac_taskq_list);
837 		}
838 
839 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
840 		cv_wait(&softmac_taskq_cv, &softmac_taskq_lock);
841 		CALLB_CPR_SAFE_END(&cprinfo, &softmac_taskq_lock);
842 	}
843 
844 	softmac_taskq_done = B_TRUE;
845 	cv_signal(&softmac_taskq_cv);
846 	CALLB_CPR_EXIT(&cprinfo);
847 	thread_exit();
848 }
849 
850 void
851 softmac_wput_nondata(softmac_upper_t *sup, mblk_t *mp)
852 {
853 	/*
854 	 * The processing of the message might block. Enqueue the
855 	 * message for later processing.
856 	 */
857 	mutex_enter(&sup->su_disp_mutex);
858 
859 	if (sup->su_closing) {
860 		mutex_exit(&sup->su_disp_mutex);
861 		freemsg(mp);
862 		return;
863 	}
864 
865 	SOFTMAC_EQ_PENDING(sup, mp);
866 
867 	if (sup->su_dlpi_pending) {
868 		mutex_exit(&sup->su_disp_mutex);
869 		return;
870 	}
871 	sup->su_dlpi_pending = B_TRUE;
872 	mutex_exit(&sup->su_disp_mutex);
873 
874 	if (taskq_dispatch(system_taskq, softmac_wput_nondata_task,
875 	    sup, TQ_NOSLEEP) != NULL) {
876 		return;
877 	}
878 
879 	mutex_enter(&softmac_taskq_lock);
880 	if (!sup->su_taskq_scheduled) {
881 		list_insert_tail(&softmac_taskq_list, sup);
882 		cv_signal(&softmac_taskq_cv);
883 	}
884 	sup->su_taskq_scheduled = B_TRUE;
885 	mutex_exit(&softmac_taskq_lock);
886 }
887 
888 /*
889  * Setup the dedicated-lower-stream (fast-path) for the IP/ARP upperstream.
890  */
891 static int
892 softmac_fastpath_setup(softmac_upper_t *sup)
893 {
894 	softmac_t	*softmac = sup->su_softmac;
895 	softmac_lower_t	*slp;
896 	int		err;
897 
898 	err = softmac_lower_setup(softmac, sup, &slp);
899 
900 	mutex_enter(&sup->su_mutex);
901 	/*
902 	 * Wait for all data messages to be processed so that we can change
903 	 * the su_mode.
904 	 */
905 	while (sup->su_tx_inprocess != 0)
906 		cv_wait(&sup->su_cv, &sup->su_mutex);
907 
908 	ASSERT(sup->su_mode != SOFTMAC_FASTPATH);
909 	ASSERT(sup->su_slp == NULL);
910 	if (err != 0) {
911 		sup->su_mode = SOFTMAC_SLOWPATH;
912 	} else {
913 		sup->su_slp = slp;
914 		sup->su_mode = SOFTMAC_FASTPATH;
915 	}
916 	mutex_exit(&sup->su_mutex);
917 	return (err);
918 }
919 
920 /*
921  * Tear down the dedicated-lower-stream (fast-path) for the IP/ARP upperstream.
922  */
923 static void
924 softmac_fastpath_tear(softmac_upper_t *sup)
925 {
926 	mutex_enter(&sup->su_mutex);
927 	/*
928 	 * Wait for all data messages in the dedicated-lower-stream
929 	 * to be processed.
930 	 */
931 	while (sup->su_tx_inprocess != 0)
932 		cv_wait(&sup->su_cv, &sup->su_mutex);
933 
934 	if (sup->su_tx_busy) {
935 		ASSERT(sup->su_tx_flow_mp == NULL);
936 		sup->su_tx_flow_mp = getq(sup->su_wq);
937 		sup->su_tx_busy = B_FALSE;
938 	}
939 
940 	sup->su_mode = SOFTMAC_SLOWPATH;
941 
942 	/*
943 	 * Destroy the dedicated-lower-stream. Note that slp is destroyed
944 	 * when lh is closed.
945 	 */
946 	(void) ldi_close(sup->su_slp->sl_lh, FREAD|FWRITE, kcred);
947 	sup->su_slp = NULL;
948 	mutex_exit(&sup->su_mutex);
949 }
950 
951 void
952 softmac_wput_data(softmac_upper_t *sup, mblk_t *mp)
953 {
954 	/*
955 	 * No lock is required to access the su_mode field since the data
956 	 * traffic is quiesce by IP when the data-path mode is in the
957 	 * process of switching.
958 	 */
959 	if (sup->su_mode != SOFTMAC_FASTPATH)
960 		dld_wput(sup->su_wq, mp);
961 	else
962 		(void) softmac_fastpath_wput_data(sup, mp, NULL, 0);
963 }
964 
965 /*ARGSUSED*/
966 static mac_tx_cookie_t
967 softmac_fastpath_wput_data(softmac_upper_t *sup, mblk_t *mp, uintptr_t f_hint,
968     uint16_t flag)
969 {
970 	queue_t		*wq = sup->su_slp->sl_wq;
971 
972 	/*
973 	 * This function is called from IP, only the MAC_DROP_ON_NO_DESC
974 	 * flag can be specified.
975 	 */
976 	ASSERT((flag & ~MAC_DROP_ON_NO_DESC) == 0);
977 	ASSERT(mp->b_next == NULL);
978 
979 	/*
980 	 * Check wether the dedicated-lower-stream is able to handle more
981 	 * messages, and enable the flow-control if it is not.
982 	 *
983 	 * Note that in order not to introduce any packet reordering, we
984 	 * always send the message down to the dedicated-lower-stream:
985 	 *
986 	 * If the flow-control is already enabled, but we still get
987 	 * the messages from the upper-stream, it means that the upper
988 	 * stream does not respect STREAMS flow-control (e.g., TCP). Simply
989 	 * pass the message down to the lower-stream in that case.
990 	 */
991 	if (SOFTMAC_CANPUTNEXT(wq)) {
992 		putnext(wq, mp);
993 		return (NULL);
994 	}
995 
996 	if ((flag & MAC_DROP_ON_NO_DESC) != 0) {
997 		freemsg(mp);
998 		return ((mac_tx_cookie_t)wq);
999 	}
1000 
1001 	if (sup->su_tx_busy) {
1002 		putnext(wq, mp);
1003 		return ((mac_tx_cookie_t)wq);
1004 	}
1005 
1006 	mutex_enter(&sup->su_mutex);
1007 	if (!sup->su_tx_busy) {
1008 		ASSERT(sup->su_tx_flow_mp != NULL);
1009 		(void) putq(sup->su_wq, sup->su_tx_flow_mp);
1010 		sup->su_tx_flow_mp = NULL;
1011 		sup->su_tx_busy = B_TRUE;
1012 		qenable(wq);
1013 	}
1014 	mutex_exit(&sup->su_mutex);
1015 	putnext(wq, mp);
1016 	return ((mac_tx_cookie_t)wq);
1017 }
1018 
1019 boolean_t
1020 softmac_active_set(void *arg)
1021 {
1022 	softmac_t	*softmac = arg;
1023 
1024 	mutex_enter(&softmac->smac_active_mutex);
1025 	if (softmac->smac_nactive != 0) {
1026 		mutex_exit(&softmac->smac_active_mutex);
1027 		return (B_FALSE);
1028 	}
1029 	softmac->smac_active = B_TRUE;
1030 	mutex_exit(&softmac->smac_active_mutex);
1031 	return (B_TRUE);
1032 }
1033 
1034 void
1035 softmac_active_clear(void *arg)
1036 {
1037 	softmac_t	*softmac = arg;
1038 
1039 	mutex_enter(&softmac->smac_active_mutex);
1040 	ASSERT(softmac->smac_active && (softmac->smac_nactive == 0));
1041 	softmac->smac_active = B_FALSE;
1042 	mutex_exit(&softmac->smac_active_mutex);
1043 }
1044 
1045 /*
1046  * Disable/reenable fastpath on given softmac. This request could come from a
1047  * MAC client or directly from administrators.
1048  */
1049 int
1050 softmac_datapath_switch(softmac_t *softmac, boolean_t disable, boolean_t admin)
1051 {
1052 	softmac_upper_t		*sup;
1053 	mblk_t			*head = NULL, *tail = NULL, *mp;
1054 	list_t			reqlist;
1055 	softmac_switch_req_t	*req;
1056 	uint32_t		current_mode, expected_mode;
1057 	int			err = 0;
1058 
1059 	mutex_enter(&softmac->smac_fp_mutex);
1060 
1061 	current_mode = DATAPATH_MODE(softmac);
1062 	if (admin) {
1063 		if (softmac->smac_fastpath_admin_disabled == disable) {
1064 			mutex_exit(&softmac->smac_fp_mutex);
1065 			return (0);
1066 		}
1067 		softmac->smac_fastpath_admin_disabled = disable;
1068 	} else if (disable) {
1069 		softmac->smac_fp_disable_clients++;
1070 	} else {
1071 		ASSERT(softmac->smac_fp_disable_clients != 0);
1072 		softmac->smac_fp_disable_clients--;
1073 	}
1074 
1075 	expected_mode = DATAPATH_MODE(softmac);
1076 	if (current_mode == expected_mode) {
1077 		mutex_exit(&softmac->smac_fp_mutex);
1078 		return (0);
1079 	}
1080 
1081 	/*
1082 	 * The expected mode is different from whatever datapath mode
1083 	 * this softmac is expected from last request, enqueue the data-path
1084 	 * switch request.
1085 	 */
1086 	list_create(&reqlist, sizeof (softmac_switch_req_t),
1087 	    offsetof(softmac_switch_req_t, ssq_req_list_node));
1088 
1089 	/*
1090 	 * Allocate all DL_NOTIFY_IND messages and request structures that
1091 	 * are required to switch each IP/ARP stream to the expected mode.
1092 	 */
1093 	for (sup = list_head(&softmac->smac_sup_list); sup != NULL;
1094 	    sup = list_next(&softmac->smac_sup_list, sup)) {
1095 		dl_notify_ind_t	*dlip;
1096 
1097 		req = kmem_alloc(sizeof (softmac_switch_req_t), KM_NOSLEEP);
1098 		if (req == NULL)
1099 			break;
1100 
1101 		req->ssq_expected_mode = expected_mode;
1102 
1103 		/*
1104 		 * Allocate the DL_NOTE_REPLUMB message.
1105 		 */
1106 		if ((mp = allocb(sizeof (dl_notify_ind_t), BPRI_LO)) == NULL) {
1107 			kmem_free(req, sizeof (softmac_switch_req_t));
1108 			break;
1109 		}
1110 
1111 		list_insert_tail(&reqlist, req);
1112 
1113 		mp->b_wptr = mp->b_rptr + sizeof (dl_notify_ind_t);
1114 		mp->b_datap->db_type = M_PROTO;
1115 		bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1116 		dlip = (dl_notify_ind_t *)mp->b_rptr;
1117 		dlip->dl_primitive = DL_NOTIFY_IND;
1118 		dlip->dl_notification = DL_NOTE_REPLUMB;
1119 		if (head == NULL) {
1120 			head = tail = mp;
1121 		} else {
1122 			tail->b_next = mp;
1123 			tail = mp;
1124 		}
1125 	}
1126 
1127 	/*
1128 	 * Note that it is fine if the expected data-path mode is fast-path
1129 	 * and some of streams fails to switch. Only return failure if we
1130 	 * are expected to switch to the slow-path.
1131 	 */
1132 	if (sup != NULL && expected_mode == SOFTMAC_SLOWPATH) {
1133 		err = ENOMEM;
1134 		goto fail;
1135 	}
1136 
1137 	/*
1138 	 * Start switching for each IP/ARP stream. The switching operation
1139 	 * will eventually succeed and there is no need to wait for it
1140 	 * to finish.
1141 	 */
1142 	for (sup = list_head(&softmac->smac_sup_list); sup != NULL;
1143 	    sup = list_next(&softmac->smac_sup_list, sup)) {
1144 		mp = head->b_next;
1145 		head->b_next = NULL;
1146 
1147 		/*
1148 		 * Add the swtich request to the requests list of the stream.
1149 		 */
1150 		req = list_head(&reqlist);
1151 		ASSERT(req != NULL);
1152 		list_remove(&reqlist, req);
1153 		list_insert_tail(&sup->su_req_list, req);
1154 		softmac_wput_nondata(sup, head);
1155 		head = mp;
1156 	}
1157 
1158 	mutex_exit(&softmac->smac_fp_mutex);
1159 	ASSERT(list_is_empty(&reqlist));
1160 	list_destroy(&reqlist);
1161 	return (0);
1162 fail:
1163 	if (admin) {
1164 		softmac->smac_fastpath_admin_disabled = !disable;
1165 	} else if (disable) {
1166 		softmac->smac_fp_disable_clients--;
1167 	} else {
1168 		softmac->smac_fp_disable_clients++;
1169 	}
1170 
1171 	mutex_exit(&softmac->smac_fp_mutex);
1172 	while ((req = list_head(&reqlist)) != NULL) {
1173 		list_remove(&reqlist, req);
1174 		kmem_free(req, sizeof (softmac_switch_req_t));
1175 	}
1176 	freemsgchain(head);
1177 	list_destroy(&reqlist);
1178 	return (err);
1179 }
1180 
1181 int
1182 softmac_fastpath_disable(void *arg)
1183 {
1184 	return (softmac_datapath_switch((softmac_t *)arg, B_TRUE, B_FALSE));
1185 }
1186 
1187 void
1188 softmac_fastpath_enable(void *arg)
1189 {
1190 	VERIFY(softmac_datapath_switch((softmac_t *)arg, B_FALSE,
1191 	    B_FALSE) == 0);
1192 }
1193 
1194 void
1195 softmac_upperstream_close(softmac_upper_t *sup)
1196 {
1197 	softmac_t		*softmac = sup->su_softmac;
1198 	softmac_switch_req_t	*req;
1199 
1200 	mutex_enter(&softmac->smac_fp_mutex);
1201 
1202 	if (sup->su_mode == SOFTMAC_FASTPATH)
1203 		softmac_fastpath_tear(sup);
1204 
1205 	if (sup->su_mode != SOFTMAC_UNKNOWN) {
1206 		list_remove(&softmac->smac_sup_list, sup);
1207 		sup->su_mode = SOFTMAC_UNKNOWN;
1208 	}
1209 
1210 	/*
1211 	 * Cleanup all the switch requests queueed on this stream.
1212 	 */
1213 	while ((req = list_head(&sup->su_req_list)) != NULL) {
1214 		list_remove(&sup->su_req_list, req);
1215 		kmem_free(req, sizeof (softmac_switch_req_t));
1216 	}
1217 	mutex_exit(&softmac->smac_fp_mutex);
1218 }
1219 
1220 /*
1221  * Handle the DL_NOTE_REPLUMB_DONE indication from IP/ARP. Change the upper
1222  * stream from the fastpath mode to the slowpath mode.
1223  */
1224 static void
1225 softmac_datapath_switch_done(softmac_upper_t *sup)
1226 {
1227 	softmac_t		*softmac = sup->su_softmac;
1228 	softmac_switch_req_t	*req;
1229 	uint32_t		expected_mode;
1230 
1231 	mutex_enter(&softmac->smac_fp_mutex);
1232 	req = list_head(&sup->su_req_list);
1233 	list_remove(&sup->su_req_list, req);
1234 	expected_mode = req->ssq_expected_mode;
1235 	kmem_free(req, sizeof (softmac_switch_req_t));
1236 
1237 	if (expected_mode == sup->su_mode) {
1238 		mutex_exit(&softmac->smac_fp_mutex);
1239 		return;
1240 	}
1241 
1242 	ASSERT(!sup->su_bound);
1243 	mutex_exit(&softmac->smac_fp_mutex);
1244 
1245 	/*
1246 	 * It is fine if the expected mode is fast-path and we fail
1247 	 * to enable fastpath on this stream.
1248 	 */
1249 	if (expected_mode == SOFTMAC_SLOWPATH)
1250 		softmac_fastpath_tear(sup);
1251 	else
1252 		(void) softmac_fastpath_setup(sup);
1253 }
1254