1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Softmac data-path switching:
28  *
29  * - Fast-path model
30  *
31  * When the softmac fast-path is used, a dedicated lower-stream
32  * will be opened over the legacy device for each IP/ARP (upper-)stream
33  * over the softMAC, and all DLPI messages (including control messages
34  * and data messages) will be exchanged between the upper-stream and
35  * the corresponding lower-stream directly. Therefore, the data
36  * demultiplexing, filtering and classification processing will be done
37  * by the lower-stream, and the GLDv3 DLS/MAC layer processing will be
38  * no longer needed.
39  *
40  * - Slow-path model
41  *
42  * Some GLDv3 features requires the GLDv3 DLS/MAC layer processing to
43  * not be bypassed to assure its function correctness. For example,
44  * softmac fast-path must be disabled to support GLDv3 VNIC functionality.
45  * In this case, a shared lower-stream will be opened over the legacy
46  * device, which is responsible for implementing the GLDv3 callbacks
47  * and passing RAW data messages between the legacy devices and the GLDv3
48  * framework.
49  *
50  * By default, the softmac fast-path mode will be used to assure the
51  * performance; MAC clients will be able to request to disable the softmac
52  * fast-path mode to support certain features, and if that succeeds,
53  * the system will fallback to the slow-path softmac data-path model.
54  *
55  *
56  * The details of the softmac data fast-path model is stated as below
57  *
58  * 1. When a stream is opened on a softMAC, the softmac module will takes
59  *    over the DLPI processing on this stream;
60  *
61  * 2. For IP/ARP streams over a softMAC, softmac data fast-path will be
62  *    used by default, unless fast-path is disabled by any MAC client
63  *    explicitly. The softmac module first identifies an IP/ARP stream
64  *    by seeing whether there is a SIOCSLIFNAME ioctl sent from upstream,
65  *    if there is one, this stream is either an IP or an ARP stream
66  *    and will use fast-path potentially;
67  *
68  * 3. When the softmac fast-path is used, an dedicated lower-stream will
69  *    be setup for each IP/ARP stream (1-1 mapping). From that point on,
70  *    all control and data messages will be exchanged between the IP/ARP
71  *    upper-stream and the legacy device through this dedicated
72  *    lower-stream. As a result, the DLS/MAC layer processing in GLDv3
73  *    will be skipped, and this greatly improves the performance;
74  *
75  * 4. When the softmac data fast-path is disabled by a MAC client (e.g.,
76  *    by a VNIC), all the IP/ARP upper streams will try to switch from
77  *    the fast-path to the slow-path. The dedicated lower-stream will be
78  *    destroyed, and all the control and data-messages will go through the
79  *    existing GLDv3 code path and (in the end) the shared lower-stream;
80  *
81  * 5. On the other hand, when the last MAC client cancels its fast-path
82  *    disable request, all the IP/ARP streams will try to switch back to
83  *    the fast-path mode;
84  *
85  * Step 5 and 6 both rely on the data-path mode switching process
86  * described below:
87  *
88  * 1) To switch the softmac data-path mode (between fast-path and slow-path),
89  *    softmac will first send a DL_NOTE_REPLUMB DL_NOTIFY_IND message
90  *    upstream over each IP/ARP streams that needs data-path mode switching;
91  *
92  * 2) When IP receives this DL_NOTE_REPLUMB message, it will bring down
93  *    all the IP interfaces on the corresponding ill (IP Lower level
94  *    structure), and bring up those interfaces over again; this will in
95  *    turn cause the ARP to "replumb" the interface.
96  *
97  *    During the replumb process, both IP and ARP will send downstream the
98  *    necessary DL_DISABMULTI_REQ and DL_UNBIND_REQ messages and cleanup
99  *    the old state of the underlying softMAC, following with the necessary
100  *    DL_BIND_REQ and DL_ENABMULTI_REQ messages to setup the new state.
101  *    Between the cleanup and re-setup process, IP/ARP will also send down
102  *    a DL_NOTE_REPLUMB_DONE DL_NOTIFY_CONF messages to the softMAC to
103  *    indicate the *switching point*;
104  *
105  * 3) When softmac receives the DL_NOTE_REPLUMB_DONE message, it either
106  *    creates or destroys the dedicated lower-stream (depending on which
107  *    data-path mode the softMAC switches to), and change the softmac
108  *    data-path mode. From then on, softmac will process all the succeeding
109  *    control messages (including the DL_BIND_REQ and DL_ENABMULTI_REQ
110  *    messages) and data messages based on new data-path mode.
111  */
112 
113 #include <sys/types.h>
114 #include <sys/disp.h>
115 #include <sys/callb.h>
116 #include <sys/sysmacros.h>
117 #include <sys/file.h>
118 #include <sys/vlan.h>
119 #include <sys/dld.h>
120 #include <sys/sockio.h>
121 #include <sys/softmac_impl.h>
122 #include <net/if.h>
123 
124 static kmutex_t		softmac_taskq_lock;
125 static kcondvar_t	softmac_taskq_cv;
126 static list_t		softmac_taskq_list;	/* List of softmac_upper_t */
127 boolean_t		softmac_taskq_quit;
128 boolean_t		softmac_taskq_done;
129 
130 static void		softmac_taskq_dispatch();
131 static int		softmac_fastpath_setup(softmac_upper_t *);
132 static mac_tx_cookie_t	softmac_fastpath_wput_data(softmac_upper_t *, mblk_t *,
133 			    uintptr_t, uint16_t);
134 static void		softmac_datapath_switch_done(softmac_upper_t *);
135 
136 void
137 softmac_fp_init()
138 {
139 	mutex_init(&softmac_taskq_lock, NULL, MUTEX_DRIVER, NULL);
140 	cv_init(&softmac_taskq_cv, NULL, CV_DRIVER, NULL);
141 
142 	softmac_taskq_quit = B_FALSE;
143 	softmac_taskq_done = B_FALSE;
144 	list_create(&softmac_taskq_list, sizeof (softmac_upper_t),
145 	    offsetof(softmac_upper_t, su_taskq_list_node));
146 	(void) thread_create(NULL, 0, softmac_taskq_dispatch, NULL, 0,
147 	    &p0, TS_RUN, minclsyspri);
148 }
149 
150 void
151 softmac_fp_fini()
152 {
153 	/*
154 	 * Request the softmac_taskq thread to quit and wait for it to be done.
155 	 */
156 	mutex_enter(&softmac_taskq_lock);
157 	softmac_taskq_quit = B_TRUE;
158 	cv_signal(&softmac_taskq_cv);
159 	while (!softmac_taskq_done)
160 		cv_wait(&softmac_taskq_cv, &softmac_taskq_lock);
161 	mutex_exit(&softmac_taskq_lock);
162 	list_destroy(&softmac_taskq_list);
163 
164 	mutex_destroy(&softmac_taskq_lock);
165 	cv_destroy(&softmac_taskq_cv);
166 }
167 
168 static boolean_t
169 check_ip_above(queue_t *q)
170 {
171 	queue_t		*next_q;
172 	boolean_t	ret = B_TRUE;
173 
174 	claimstr(q);
175 	next_q = q->q_next;
176 	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
177 		ret = B_FALSE;
178 	releasestr(q);
179 	return (ret);
180 }
181 
182 /* ARGSUSED */
183 static int
184 softmac_capab_perim(softmac_upper_t *sup, void *data, uint_t flags)
185 {
186 	switch (flags) {
187 	case DLD_ENABLE:
188 		mutex_enter(&sup->su_mutex);
189 		break;
190 	case DLD_DISABLE:
191 		mutex_exit(&sup->su_mutex);
192 		break;
193 	case DLD_QUERY:
194 		return (MUTEX_HELD(&sup->su_mutex));
195 	}
196 	return (0);
197 }
198 
199 /* ARGSUSED */
200 static mac_tx_notify_handle_t
201 softmac_client_tx_notify(void *txcb, mac_tx_notify_t func, void *arg)
202 {
203 	return (NULL);
204 }
205 
206 static int
207 softmac_capab_direct(softmac_upper_t *sup, void *data, uint_t flags)
208 {
209 	dld_capab_direct_t	*direct = data;
210 	softmac_lower_t		*slp = sup->su_slp;
211 
212 	ASSERT(MUTEX_HELD(&sup->su_mutex));
213 
214 	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
215 
216 	switch (flags) {
217 	case DLD_ENABLE:
218 		if (sup->su_direct)
219 			return (0);
220 
221 		sup->su_direct_rxinfo.slr_rx = (softmac_rx_t)direct->di_rx_cf;
222 		sup->su_direct_rxinfo.slr_arg = direct->di_rx_ch;
223 		slp->sl_rxinfo = &sup->su_direct_rxinfo;
224 		direct->di_tx_df = (uintptr_t)softmac_fastpath_wput_data;
225 		direct->di_tx_dh = sup;
226 
227 		/*
228 		 * We relying on the STREAM flow-control to backenable
229 		 * the IP stream. Therefore, no notify callback needs to
230 		 * be registered. But IP requires this to be a valid function
231 		 * pointer.
232 		 */
233 		direct->di_tx_cb_df = (uintptr_t)softmac_client_tx_notify;
234 		direct->di_tx_cb_dh = NULL;
235 		sup->su_direct = B_TRUE;
236 		return (0);
237 
238 	case DLD_DISABLE:
239 		if (!sup->su_direct)
240 			return (0);
241 
242 		slp->sl_rxinfo = &sup->su_rxinfo;
243 		sup->su_direct = B_FALSE;
244 		return (0);
245 	}
246 	return (ENOTSUP);
247 }
248 
249 static int
250 softmac_dld_capab(softmac_upper_t *sup, uint_t type, void *data, uint_t flags)
251 {
252 	int	err;
253 
254 	/*
255 	 * Don't enable direct callback capabilities unless the caller is
256 	 * the IP client. When a module is inserted in a stream (_I_INSERT)
257 	 * the stack initiates capability disable, but due to races, the
258 	 * module insertion may complete before the capability disable
259 	 * completes. So we limit the check to DLD_ENABLE case.
260 	 */
261 	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
262 	    !check_ip_above(sup->su_rq)) {
263 		return (ENOTSUP);
264 	}
265 
266 	switch (type) {
267 	case DLD_CAPAB_DIRECT:
268 		err = softmac_capab_direct(sup, data, flags);
269 		break;
270 
271 	case DLD_CAPAB_PERIM:
272 		err = softmac_capab_perim(sup, data, flags);
273 		break;
274 
275 	default:
276 		err = ENOTSUP;
277 		break;
278 	}
279 	return (err);
280 }
281 
282 static void
283 softmac_capability_advertise(softmac_upper_t *sup, mblk_t *mp)
284 {
285 	dl_capability_ack_t	*dlap;
286 	dl_capability_sub_t	*dlsp;
287 	t_uscalar_t		subsize;
288 	uint8_t			*ptr;
289 	queue_t			*q = sup->su_wq;
290 	mblk_t			*mp1;
291 	softmac_t		*softmac = sup->su_softmac;
292 	boolean_t		dld_capable = B_FALSE;
293 	boolean_t		hcksum_capable = B_FALSE;
294 	boolean_t		zcopy_capable = B_FALSE;
295 	boolean_t		mdt_capable = B_FALSE;
296 
297 	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
298 
299 	/*
300 	 * Initially assume no capabilities.
301 	 */
302 	subsize = 0;
303 
304 	/*
305 	 * Direct capability negotiation interface between IP and softmac
306 	 */
307 	if (check_ip_above(sup->su_rq)) {
308 		dld_capable = B_TRUE;
309 		subsize += sizeof (dl_capability_sub_t) +
310 		    sizeof (dl_capab_dld_t);
311 	}
312 
313 	/*
314 	 * Check if checksum offload is supported on this MAC.
315 	 */
316 	if (softmac->smac_capab_flags & MAC_CAPAB_HCKSUM) {
317 		hcksum_capable = B_TRUE;
318 		subsize += sizeof (dl_capability_sub_t) +
319 		    sizeof (dl_capab_hcksum_t);
320 	}
321 
322 	/*
323 	 * Check if zerocopy is supported on this interface.
324 	 */
325 	if (!(softmac->smac_capab_flags & MAC_CAPAB_NO_ZCOPY)) {
326 		zcopy_capable = B_TRUE;
327 		subsize += sizeof (dl_capability_sub_t) +
328 		    sizeof (dl_capab_zerocopy_t);
329 	}
330 
331 	if (softmac->smac_mdt) {
332 		mdt_capable = B_TRUE;
333 		subsize += sizeof (dl_capability_sub_t) +
334 		    sizeof (dl_capab_mdt_t);
335 	}
336 
337 	/*
338 	 * If there are no capabilities to advertise or if we
339 	 * can't allocate a response, send a DL_ERROR_ACK.
340 	 */
341 	if ((subsize == 0) || (mp1 = reallocb(mp,
342 	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
343 		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
344 		return;
345 	}
346 
347 	mp = mp1;
348 	DB_TYPE(mp) = M_PROTO;
349 	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
350 	bzero(mp->b_rptr, MBLKL(mp));
351 	dlap = (dl_capability_ack_t *)mp->b_rptr;
352 	dlap->dl_primitive = DL_CAPABILITY_ACK;
353 	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
354 	dlap->dl_sub_length = subsize;
355 	ptr = (uint8_t *)&dlap[1];
356 
357 	/*
358 	 * IP polling interface.
359 	 */
360 	if (dld_capable) {
361 		dl_capab_dld_t		dld;
362 
363 		dlsp = (dl_capability_sub_t *)ptr;
364 		dlsp->dl_cap = DL_CAPAB_DLD;
365 		dlsp->dl_length = sizeof (dl_capab_dld_t);
366 		ptr += sizeof (dl_capability_sub_t);
367 
368 		bzero(&dld, sizeof (dl_capab_dld_t));
369 		dld.dld_version = DLD_CURRENT_VERSION;
370 		dld.dld_capab = (uintptr_t)softmac_dld_capab;
371 		dld.dld_capab_handle = (uintptr_t)sup;
372 
373 		dlcapabsetqid(&(dld.dld_mid), sup->su_rq);
374 		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
375 		ptr += sizeof (dl_capab_dld_t);
376 	}
377 
378 	/*
379 	 * TCP/IP checksum offload.
380 	 */
381 	if (hcksum_capable) {
382 		dl_capab_hcksum_t	hcksum;
383 
384 		dlsp = (dl_capability_sub_t *)ptr;
385 
386 		dlsp->dl_cap = DL_CAPAB_HCKSUM;
387 		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
388 		ptr += sizeof (dl_capability_sub_t);
389 
390 		bzero(&hcksum, sizeof (dl_capab_hcksum_t));
391 		hcksum.hcksum_version = HCKSUM_VERSION_1;
392 		hcksum.hcksum_txflags = softmac->smac_hcksum_txflags;
393 		dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq);
394 		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
395 		ptr += sizeof (dl_capab_hcksum_t);
396 	}
397 
398 	/*
399 	 * Zero copy
400 	 */
401 	if (zcopy_capable) {
402 		dl_capab_zerocopy_t	zcopy;
403 
404 		dlsp = (dl_capability_sub_t *)ptr;
405 
406 		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
407 		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
408 		ptr += sizeof (dl_capability_sub_t);
409 
410 		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
411 		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
412 		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
413 		dlcapabsetqid(&(zcopy.zerocopy_mid), sup->su_rq);
414 		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
415 		ptr += sizeof (dl_capab_zerocopy_t);
416 	}
417 
418 	/*
419 	 * MDT
420 	 */
421 	if (mdt_capable) {
422 		dl_capab_mdt_t mdt;
423 
424 		dlsp = (dl_capability_sub_t *)ptr;
425 
426 		dlsp->dl_cap = DL_CAPAB_MDT;
427 		dlsp->dl_length = sizeof (dl_capab_mdt_t);
428 		ptr += sizeof (dl_capability_sub_t);
429 
430 		bzero(&mdt, sizeof (dl_capab_mdt_t));
431 		mdt.mdt_version = MDT_VERSION_2;
432 		mdt.mdt_flags = DL_CAPAB_MDT_ENABLE;
433 		mdt.mdt_hdr_head = softmac->smac_mdt_capab.mdt_hdr_head;
434 		mdt.mdt_hdr_tail = softmac->smac_mdt_capab.mdt_hdr_tail;
435 		mdt.mdt_max_pld = softmac->smac_mdt_capab.mdt_max_pld;
436 		mdt.mdt_span_limit = softmac->smac_mdt_capab.mdt_span_limit;
437 		dlcapabsetqid(&(mdt.mdt_mid), sup->su_rq);
438 		bcopy(&mdt, ptr, sizeof (dl_capab_mdt_t));
439 		ptr += sizeof (dl_capab_mdt_t);
440 	}
441 
442 	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
443 	qreply(q, mp);
444 }
445 
446 static void
447 softmac_capability_req(softmac_upper_t *sup, mblk_t *mp)
448 {
449 	dl_capability_req_t	*dlp = (dl_capability_req_t *)mp->b_rptr;
450 	dl_capability_sub_t	*sp;
451 	size_t			size, len;
452 	offset_t		off, end;
453 	t_uscalar_t		dl_err;
454 	queue_t			*q = sup->su_wq;
455 
456 	ASSERT(sup->su_mode == SOFTMAC_FASTPATH);
457 	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
458 		dl_err = DL_BADPRIM;
459 		goto failed;
460 	}
461 
462 	if (!sup->su_bound) {
463 		dl_err = DL_OUTSTATE;
464 		goto failed;
465 	}
466 
467 	/*
468 	 * This request is overloaded. If there are no requested capabilities
469 	 * then we just want to acknowledge with all the capabilities we
470 	 * support. Otherwise we enable the set of capabilities requested.
471 	 */
472 	if (dlp->dl_sub_length == 0) {
473 		softmac_capability_advertise(sup, mp);
474 		return;
475 	}
476 
477 	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
478 		dl_err = DL_BADPRIM;
479 		goto failed;
480 	}
481 
482 	dlp->dl_primitive = DL_CAPABILITY_ACK;
483 
484 	off = dlp->dl_sub_offset;
485 	len = dlp->dl_sub_length;
486 
487 	/*
488 	 * Walk the list of capabilities to be enabled.
489 	 */
490 	for (end = off + len; off < end; ) {
491 		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
492 		size = sizeof (dl_capability_sub_t) + sp->dl_length;
493 
494 		if (off + size > end ||
495 		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
496 			dl_err = DL_BADPRIM;
497 			goto failed;
498 		}
499 
500 		switch (sp->dl_cap) {
501 		/*
502 		 * TCP/IP checksum offload to hardware.
503 		 */
504 		case DL_CAPAB_HCKSUM: {
505 			dl_capab_hcksum_t *hcksump;
506 			dl_capab_hcksum_t hcksum;
507 
508 			hcksump = (dl_capab_hcksum_t *)&sp[1];
509 			/*
510 			 * Copy for alignment.
511 			 */
512 			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
513 			dlcapabsetqid(&(hcksum.hcksum_mid), sup->su_rq);
514 			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
515 			break;
516 		}
517 
518 		default:
519 			break;
520 		}
521 
522 		off += size;
523 	}
524 	qreply(q, mp);
525 	return;
526 failed:
527 	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
528 }
529 
530 static void
531 softmac_bind_req(softmac_upper_t *sup, mblk_t *mp)
532 {
533 	softmac_lower_t	*slp = sup->su_slp;
534 	softmac_t	*softmac = sup->su_softmac;
535 	mblk_t		*ackmp, *mp1;
536 	int		err;
537 
538 	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
539 		freemsg(mp);
540 		return;
541 	}
542 
543 	/*
544 	 * Allocate ackmp incase the underlying driver does not ack timely.
545 	 */
546 	if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) {
547 		dlerrorack(sup->su_wq, mp, DL_BIND_REQ, DL_SYSERR, ENOMEM);
548 		return;
549 	}
550 
551 	err = softmac_output(slp, mp, DL_BIND_REQ, DL_BIND_ACK, &ackmp);
552 	if (ackmp != NULL) {
553 		freemsg(mp1);
554 	} else {
555 		/*
556 		 * The driver does not ack timely.
557 		 */
558 		ASSERT(err == ENOMSG);
559 		ackmp = mp1;
560 	}
561 	if (err != 0)
562 		goto failed;
563 
564 	/*
565 	 * Enable capabilities the underlying driver claims to support.
566 	 */
567 	if ((err = softmac_capab_enable(slp)) != 0)
568 		goto failed;
569 
570 	/*
571 	 * Check whether this softmac is already marked as exclusively used,
572 	 * e.g., an aggregation is created over it. Fail the BIND_REQ if so.
573 	 */
574 	mutex_enter(&softmac->smac_active_mutex);
575 	if (softmac->smac_active) {
576 		mutex_exit(&softmac->smac_active_mutex);
577 		err = EBUSY;
578 		goto failed;
579 	}
580 	softmac->smac_nactive++;
581 	sup->su_active = B_TRUE;
582 	mutex_exit(&softmac->smac_active_mutex);
583 	sup->su_bound = B_TRUE;
584 
585 	qreply(sup->su_wq, ackmp);
586 	return;
587 failed:
588 	if (err != 0) {
589 		dlerrorack(sup->su_wq, ackmp, DL_BIND_REQ, DL_SYSERR, err);
590 		return;
591 	}
592 }
593 
594 static void
595 softmac_unbind_req(softmac_upper_t *sup, mblk_t *mp)
596 {
597 	softmac_lower_t	*slp = sup->su_slp;
598 	softmac_t	*softmac = sup->su_softmac;
599 	mblk_t		*ackmp, *mp1;
600 	int		err;
601 
602 	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
603 		freemsg(mp);
604 		return;
605 	}
606 
607 	if (!sup->su_bound) {
608 		dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
609 		return;
610 	}
611 
612 	/*
613 	 * Allocate ackmp incase the underlying driver does not ack timely.
614 	 */
615 	if ((mp1 = allocb(sizeof (dl_error_ack_t), BPRI_HI)) == NULL) {
616 		dlerrorack(sup->su_wq, mp, DL_UNBIND_REQ, DL_SYSERR, ENOMEM);
617 		return;
618 	}
619 
620 	err = softmac_output(slp, mp, DL_UNBIND_REQ, DL_OK_ACK, &ackmp);
621 	if (ackmp != NULL) {
622 		freemsg(mp1);
623 	} else {
624 		/*
625 		 * The driver does not ack timely.
626 		 */
627 		ASSERT(err == ENOMSG);
628 		ackmp = mp1;
629 	}
630 	if (err != 0) {
631 		dlerrorack(sup->su_wq, ackmp, DL_UNBIND_REQ, DL_SYSERR, err);
632 		return;
633 	}
634 
635 	sup->su_bound = B_FALSE;
636 
637 	mutex_enter(&softmac->smac_active_mutex);
638 	if (sup->su_active) {
639 		ASSERT(!softmac->smac_active);
640 		softmac->smac_nactive--;
641 		sup->su_active = B_FALSE;
642 	}
643 	mutex_exit(&softmac->smac_active_mutex);
644 
645 done:
646 	qreply(sup->su_wq, ackmp);
647 }
648 
649 /*
650  * Process the non-data mblk.
651  */
652 static void
653 softmac_wput_single_nondata(softmac_upper_t *sup, mblk_t *mp)
654 {
655 	softmac_t *softmac = sup->su_softmac;
656 	softmac_lower_t	*slp = sup->su_slp;
657 	unsigned char	dbtype;
658 	t_uscalar_t	prim;
659 
660 	dbtype = DB_TYPE(mp);
661 	switch (dbtype) {
662 	case M_IOCTL:
663 	case M_CTL: {
664 		uint32_t	expected_mode;
665 
666 		if (((struct iocblk *)(mp->b_rptr))->ioc_cmd != SIOCSLIFNAME)
667 			break;
668 
669 		/*
670 		 * Nak the M_IOCTL based on the STREAMS specification.
671 		 */
672 		if (dbtype == M_IOCTL)
673 			miocnak(sup->su_wq, mp, 0, EINVAL);
674 		else
675 			freemsg(mp);
676 
677 		/*
678 		 * This stream is either IP or ARP. See whether
679 		 * we need to setup a dedicated-lower-stream for it.
680 		 */
681 		mutex_enter(&softmac->smac_fp_mutex);
682 
683 		expected_mode = DATAPATH_MODE(softmac);
684 		if (expected_mode == SOFTMAC_SLOWPATH)
685 			sup->su_mode = SOFTMAC_SLOWPATH;
686 		list_insert_head(&softmac->smac_sup_list, sup);
687 		mutex_exit(&softmac->smac_fp_mutex);
688 
689 		/*
690 		 * Setup the fast-path dedicated lower stream if fast-path
691 		 * is expected. Note that no lock is held here, and if
692 		 * smac_expected_mode is changed from SOFTMAC_FASTPATH to
693 		 * SOFTMAC_SLOWPATH, the DL_NOTE_REPLUMB message used for
694 		 * data-path switching would already be queued and will
695 		 * be processed by softmac_wput_single_nondata() later.
696 		 */
697 		if (expected_mode == SOFTMAC_FASTPATH)
698 			(void) softmac_fastpath_setup(sup);
699 		return;
700 	}
701 	case M_PROTO:
702 	case M_PCPROTO:
703 		if (MBLKL(mp) < sizeof (t_uscalar_t)) {
704 			freemsg(mp);
705 			return;
706 		}
707 		prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
708 		switch (prim) {
709 		case DL_NOTIFY_IND:
710 			if (MBLKL(mp) < sizeof (dl_notify_ind_t) ||
711 			    ((dl_notify_ind_t *)mp->b_rptr)->dl_notification !=
712 			    DL_NOTE_REPLUMB) {
713 				freemsg(mp);
714 				return;
715 			}
716 			/*
717 			 * This DL_NOTE_REPLUMB message is initiated
718 			 * and queued by the softmac itself, when the
719 			 * sup is trying to switching its datapath mode
720 			 * between SOFTMAC_SLOWPATH and SOFTMAC_FASTPATH.
721 			 * Send this message upstream.
722 			 */
723 			qreply(sup->su_wq, mp);
724 			return;
725 		case DL_NOTIFY_CONF:
726 			if (MBLKL(mp) < sizeof (dl_notify_conf_t) ||
727 			    ((dl_notify_conf_t *)mp->b_rptr)->dl_notification !=
728 			    DL_NOTE_REPLUMB_DONE) {
729 				freemsg(mp);
730 				return;
731 			}
732 			/*
733 			 * This is an indication from IP/ARP that the
734 			 * fastpath->slowpath switch is done.
735 			 */
736 			freemsg(mp);
737 			softmac_datapath_switch_done(sup);
738 			return;
739 		}
740 		break;
741 	}
742 
743 	/*
744 	 * No need to hold lock to check su_mode, since su_mode updating only
745 	 * operation is is serialized by softmac_wput_nondata_task().
746 	 */
747 	if (sup->su_mode != SOFTMAC_FASTPATH) {
748 		dld_wput(sup->su_wq, mp);
749 		return;
750 	}
751 
752 	/*
753 	 * Fastpath non-data message processing. Most of non-data messages
754 	 * can be directly passed down to the dedicated-lower-stream, aside
755 	 * from the following M_PROTO/M_PCPROTO messages.
756 	 */
757 	switch (dbtype) {
758 	case M_PROTO:
759 	case M_PCPROTO:
760 		switch (prim) {
761 		case DL_BIND_REQ:
762 			softmac_bind_req(sup, mp);
763 			break;
764 		case DL_UNBIND_REQ:
765 			softmac_unbind_req(sup, mp);
766 			break;
767 		case DL_CAPABILITY_REQ:
768 			softmac_capability_req(sup, mp);
769 			break;
770 		default:
771 			putnext(slp->sl_wq, mp);
772 			break;
773 		}
774 		break;
775 	default:
776 		putnext(slp->sl_wq, mp);
777 		break;
778 	}
779 }
780 
781 /*
782  * The worker thread which processes non-data messages. Note we only process
783  * one message at one time in order to be able to "flush" the queued message
784  * and serialize the processing.
785  */
786 static void
787 softmac_wput_nondata_task(void *arg)
788 {
789 	softmac_upper_t	*sup = arg;
790 	mblk_t		*mp;
791 
792 	mutex_enter(&sup->su_disp_mutex);
793 
794 	while (sup->su_pending_head != NULL) {
795 		if (sup->su_closing)
796 			break;
797 
798 		SOFTMAC_DQ_PENDING(sup, &mp);
799 		mutex_exit(&sup->su_disp_mutex);
800 		softmac_wput_single_nondata(sup, mp);
801 		mutex_enter(&sup->su_disp_mutex);
802 	}
803 
804 	/*
805 	 * If the stream is closing, flush all queued messages and inform
806 	 * the stream to be closed.
807 	 */
808 	freemsgchain(sup->su_pending_head);
809 	sup->su_pending_head = sup->su_pending_tail = NULL;
810 	sup->su_dlpi_pending = B_FALSE;
811 	cv_signal(&sup->su_disp_cv);
812 	mutex_exit(&sup->su_disp_mutex);
813 }
814 
815 /*
816  * Kernel thread to handle taskq dispatch failures in softmac_wput_nondata().
817  * This thread is started when the softmac module is first loaded.
818  */
819 static void
820 softmac_taskq_dispatch(void)
821 {
822 	callb_cpr_t	cprinfo;
823 	softmac_upper_t	*sup;
824 
825 	CALLB_CPR_INIT(&cprinfo, &softmac_taskq_lock, callb_generic_cpr,
826 	    "softmac_taskq_dispatch");
827 	mutex_enter(&softmac_taskq_lock);
828 
829 	while (!softmac_taskq_quit) {
830 		sup = list_head(&softmac_taskq_list);
831 		while (sup != NULL) {
832 			list_remove(&softmac_taskq_list, sup);
833 			sup->su_taskq_scheduled = B_FALSE;
834 			mutex_exit(&softmac_taskq_lock);
835 			VERIFY(taskq_dispatch(system_taskq,
836 			    softmac_wput_nondata_task, sup, TQ_SLEEP) != NULL);
837 			mutex_enter(&softmac_taskq_lock);
838 			sup = list_head(&softmac_taskq_list);
839 		}
840 
841 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
842 		cv_wait(&softmac_taskq_cv, &softmac_taskq_lock);
843 		CALLB_CPR_SAFE_END(&cprinfo, &softmac_taskq_lock);
844 	}
845 
846 	softmac_taskq_done = B_TRUE;
847 	cv_signal(&softmac_taskq_cv);
848 	CALLB_CPR_EXIT(&cprinfo);
849 	thread_exit();
850 }
851 
852 void
853 softmac_wput_nondata(softmac_upper_t *sup, mblk_t *mp)
854 {
855 	/*
856 	 * The processing of the message might block. Enqueue the
857 	 * message for later processing.
858 	 */
859 	mutex_enter(&sup->su_disp_mutex);
860 
861 	if (sup->su_closing) {
862 		mutex_exit(&sup->su_disp_mutex);
863 		freemsg(mp);
864 		return;
865 	}
866 
867 	SOFTMAC_EQ_PENDING(sup, mp);
868 
869 	if (sup->su_dlpi_pending) {
870 		mutex_exit(&sup->su_disp_mutex);
871 		return;
872 	}
873 	sup->su_dlpi_pending = B_TRUE;
874 	mutex_exit(&sup->su_disp_mutex);
875 
876 	if (taskq_dispatch(system_taskq, softmac_wput_nondata_task,
877 	    sup, TQ_NOSLEEP) != NULL) {
878 		return;
879 	}
880 
881 	mutex_enter(&softmac_taskq_lock);
882 	if (!sup->su_taskq_scheduled) {
883 		list_insert_tail(&softmac_taskq_list, sup);
884 		cv_signal(&softmac_taskq_cv);
885 	}
886 	sup->su_taskq_scheduled = B_TRUE;
887 	mutex_exit(&softmac_taskq_lock);
888 }
889 
890 /*
891  * Setup the dedicated-lower-stream (fast-path) for the IP/ARP upperstream.
892  */
893 static int
894 softmac_fastpath_setup(softmac_upper_t *sup)
895 {
896 	softmac_t	*softmac = sup->su_softmac;
897 	softmac_lower_t	*slp;
898 	int		err;
899 
900 	err = softmac_lower_setup(softmac, sup, &slp);
901 
902 	mutex_enter(&sup->su_mutex);
903 	/*
904 	 * Wait for all data messages to be processed so that we can change
905 	 * the su_mode.
906 	 */
907 	while (sup->su_tx_inprocess != 0)
908 		cv_wait(&sup->su_cv, &sup->su_mutex);
909 
910 	ASSERT(sup->su_mode != SOFTMAC_FASTPATH);
911 	ASSERT(sup->su_slp == NULL);
912 	if (err != 0) {
913 		sup->su_mode = SOFTMAC_SLOWPATH;
914 	} else {
915 		sup->su_slp = slp;
916 		sup->su_mode = SOFTMAC_FASTPATH;
917 	}
918 	mutex_exit(&sup->su_mutex);
919 	return (err);
920 }
921 
922 /*
923  * Tear down the dedicated-lower-stream (fast-path) for the IP/ARP upperstream.
924  */
925 static void
926 softmac_fastpath_tear(softmac_upper_t *sup)
927 {
928 	mutex_enter(&sup->su_mutex);
929 	/*
930 	 * Wait for all data messages in the dedicated-lower-stream
931 	 * to be processed.
932 	 */
933 	while (sup->su_tx_inprocess != 0)
934 		cv_wait(&sup->su_cv, &sup->su_mutex);
935 
936 	if (sup->su_tx_busy) {
937 		ASSERT(sup->su_tx_flow_mp == NULL);
938 		sup->su_tx_flow_mp = getq(sup->su_wq);
939 		sup->su_tx_busy = B_FALSE;
940 	}
941 
942 	sup->su_mode = SOFTMAC_SLOWPATH;
943 
944 	/*
945 	 * Destroy the dedicated-lower-stream. Note that slp is destroyed
946 	 * when lh is closed.
947 	 */
948 	(void) ldi_close(sup->su_slp->sl_lh, FREAD|FWRITE, kcred);
949 	sup->su_slp = NULL;
950 	mutex_exit(&sup->su_mutex);
951 }
952 
953 void
954 softmac_wput_data(softmac_upper_t *sup, mblk_t *mp)
955 {
956 	/*
957 	 * No lock is required to access the su_mode field since the data
958 	 * traffic is quiesce by IP when the data-path mode is in the
959 	 * process of switching.
960 	 */
961 	if (sup->su_mode != SOFTMAC_FASTPATH)
962 		dld_wput(sup->su_wq, mp);
963 	else
964 		(void) softmac_fastpath_wput_data(sup, mp, NULL, 0);
965 }
966 
967 /*ARGSUSED*/
968 static mac_tx_cookie_t
969 softmac_fastpath_wput_data(softmac_upper_t *sup, mblk_t *mp, uintptr_t f_hint,
970     uint16_t flag)
971 {
972 	queue_t		*wq = sup->su_slp->sl_wq;
973 
974 	/*
975 	 * This function is called from IP, only the MAC_DROP_ON_NO_DESC
976 	 * flag can be specified.
977 	 */
978 	ASSERT((flag & ~MAC_DROP_ON_NO_DESC) == 0);
979 	ASSERT(mp->b_next == NULL);
980 
981 	/*
982 	 * Check wether the dedicated-lower-stream is able to handle more
983 	 * messages, and enable the flow-control if it is not.
984 	 *
985 	 * Note that in order not to introduce any packet reordering, we
986 	 * always send the message down to the dedicated-lower-stream:
987 	 *
988 	 * If the flow-control is already enabled, but we still get
989 	 * the messages from the upper-stream, it means that the upper
990 	 * stream does not respect STREAMS flow-control (e.g., TCP). Simply
991 	 * pass the message down to the lower-stream in that case.
992 	 */
993 	if (SOFTMAC_CANPUTNEXT(wq)) {
994 		putnext(wq, mp);
995 		return (NULL);
996 	}
997 
998 	if ((flag & MAC_DROP_ON_NO_DESC) != 0) {
999 		freemsg(mp);
1000 		return ((mac_tx_cookie_t)wq);
1001 	}
1002 
1003 	if (sup->su_tx_busy) {
1004 		putnext(wq, mp);
1005 		return ((mac_tx_cookie_t)wq);
1006 	}
1007 
1008 	mutex_enter(&sup->su_mutex);
1009 	if (!sup->su_tx_busy) {
1010 		ASSERT(sup->su_tx_flow_mp != NULL);
1011 		(void) putq(sup->su_wq, sup->su_tx_flow_mp);
1012 		sup->su_tx_flow_mp = NULL;
1013 		sup->su_tx_busy = B_TRUE;
1014 		qenable(wq);
1015 	}
1016 	mutex_exit(&sup->su_mutex);
1017 	putnext(wq, mp);
1018 	return ((mac_tx_cookie_t)wq);
1019 }
1020 
1021 boolean_t
1022 softmac_active_set(void *arg)
1023 {
1024 	softmac_t	*softmac = arg;
1025 
1026 	mutex_enter(&softmac->smac_active_mutex);
1027 	if (softmac->smac_nactive != 0) {
1028 		mutex_exit(&softmac->smac_active_mutex);
1029 		return (B_FALSE);
1030 	}
1031 	softmac->smac_active = B_TRUE;
1032 	mutex_exit(&softmac->smac_active_mutex);
1033 	return (B_TRUE);
1034 }
1035 
1036 void
1037 softmac_active_clear(void *arg)
1038 {
1039 	softmac_t	*softmac = arg;
1040 
1041 	mutex_enter(&softmac->smac_active_mutex);
1042 	ASSERT(softmac->smac_active && (softmac->smac_nactive == 0));
1043 	softmac->smac_active = B_FALSE;
1044 	mutex_exit(&softmac->smac_active_mutex);
1045 }
1046 
1047 /*
1048  * Disable/reenable fastpath on given softmac. This request could come from a
1049  * MAC client or directly from administrators.
1050  */
1051 int
1052 softmac_datapath_switch(softmac_t *softmac, boolean_t disable, boolean_t admin)
1053 {
1054 	softmac_upper_t		*sup;
1055 	mblk_t			*head = NULL, *tail = NULL, *mp;
1056 	list_t			reqlist;
1057 	softmac_switch_req_t	*req;
1058 	uint32_t		current_mode, expected_mode;
1059 	int			err = 0;
1060 
1061 	mutex_enter(&softmac->smac_fp_mutex);
1062 
1063 	current_mode = DATAPATH_MODE(softmac);
1064 	if (admin) {
1065 		if (softmac->smac_fastpath_admin_disabled == disable) {
1066 			mutex_exit(&softmac->smac_fp_mutex);
1067 			return (0);
1068 		}
1069 		softmac->smac_fastpath_admin_disabled = disable;
1070 	} else if (disable) {
1071 		softmac->smac_fp_disable_clients++;
1072 	} else {
1073 		ASSERT(softmac->smac_fp_disable_clients != 0);
1074 		softmac->smac_fp_disable_clients--;
1075 	}
1076 
1077 	expected_mode = DATAPATH_MODE(softmac);
1078 	if (current_mode == expected_mode) {
1079 		mutex_exit(&softmac->smac_fp_mutex);
1080 		return (0);
1081 	}
1082 
1083 	/*
1084 	 * The expected mode is different from whatever datapath mode
1085 	 * this softmac is expected from last request, enqueue the data-path
1086 	 * switch request.
1087 	 */
1088 	list_create(&reqlist, sizeof (softmac_switch_req_t),
1089 	    offsetof(softmac_switch_req_t, ssq_req_list_node));
1090 
1091 	/*
1092 	 * Allocate all DL_NOTIFY_IND messages and request structures that
1093 	 * are required to switch each IP/ARP stream to the expected mode.
1094 	 */
1095 	for (sup = list_head(&softmac->smac_sup_list); sup != NULL;
1096 	    sup = list_next(&softmac->smac_sup_list, sup)) {
1097 		dl_notify_ind_t	*dlip;
1098 
1099 		req = kmem_alloc(sizeof (softmac_switch_req_t), KM_NOSLEEP);
1100 		if (req == NULL)
1101 			break;
1102 
1103 		req->ssq_expected_mode = expected_mode;
1104 
1105 		/*
1106 		 * Allocate the DL_NOTE_REPLUMB message.
1107 		 */
1108 		if ((mp = allocb(sizeof (dl_notify_ind_t), BPRI_LO)) == NULL) {
1109 			kmem_free(req, sizeof (softmac_switch_req_t));
1110 			break;
1111 		}
1112 
1113 		list_insert_tail(&reqlist, req);
1114 
1115 		mp->b_wptr = mp->b_rptr + sizeof (dl_notify_ind_t);
1116 		mp->b_datap->db_type = M_PROTO;
1117 		bzero(mp->b_rptr, sizeof (dl_notify_ind_t));
1118 		dlip = (dl_notify_ind_t *)mp->b_rptr;
1119 		dlip->dl_primitive = DL_NOTIFY_IND;
1120 		dlip->dl_notification = DL_NOTE_REPLUMB;
1121 		if (head == NULL) {
1122 			head = tail = mp;
1123 		} else {
1124 			tail->b_next = mp;
1125 			tail = mp;
1126 		}
1127 	}
1128 
1129 	/*
1130 	 * Note that it is fine if the expected data-path mode is fast-path
1131 	 * and some of streams fails to switch. Only return failure if we
1132 	 * are expected to switch to the slow-path.
1133 	 */
1134 	if (sup != NULL && expected_mode == SOFTMAC_SLOWPATH) {
1135 		err = ENOMEM;
1136 		goto fail;
1137 	}
1138 
1139 	/*
1140 	 * Start switching for each IP/ARP stream. The switching operation
1141 	 * will eventually succeed and there is no need to wait for it
1142 	 * to finish.
1143 	 */
1144 	for (sup = list_head(&softmac->smac_sup_list); sup != NULL;
1145 	    sup = list_next(&softmac->smac_sup_list, sup)) {
1146 		mp = head->b_next;
1147 		head->b_next = NULL;
1148 
1149 		/*
1150 		 * Add the swtich request to the requests list of the stream.
1151 		 */
1152 		req = list_head(&reqlist);
1153 		ASSERT(req != NULL);
1154 		list_remove(&reqlist, req);
1155 		list_insert_tail(&sup->su_req_list, req);
1156 		softmac_wput_nondata(sup, head);
1157 		head = mp;
1158 	}
1159 
1160 	mutex_exit(&softmac->smac_fp_mutex);
1161 	ASSERT(list_is_empty(&reqlist));
1162 	list_destroy(&reqlist);
1163 	return (0);
1164 fail:
1165 	if (admin) {
1166 		softmac->smac_fastpath_admin_disabled = !disable;
1167 	} else if (disable) {
1168 		softmac->smac_fp_disable_clients--;
1169 	} else {
1170 		softmac->smac_fp_disable_clients++;
1171 	}
1172 
1173 	mutex_exit(&softmac->smac_fp_mutex);
1174 	while ((req = list_head(&reqlist)) != NULL) {
1175 		list_remove(&reqlist, req);
1176 		kmem_free(req, sizeof (softmac_switch_req_t));
1177 	}
1178 	freemsgchain(head);
1179 	list_destroy(&reqlist);
1180 	return (err);
1181 }
1182 
1183 int
1184 softmac_fastpath_disable(void *arg)
1185 {
1186 	return (softmac_datapath_switch((softmac_t *)arg, B_TRUE, B_FALSE));
1187 }
1188 
1189 void
1190 softmac_fastpath_enable(void *arg)
1191 {
1192 	VERIFY(softmac_datapath_switch((softmac_t *)arg, B_FALSE,
1193 	    B_FALSE) == 0);
1194 }
1195 
1196 void
1197 softmac_upperstream_close(softmac_upper_t *sup)
1198 {
1199 	softmac_t		*softmac = sup->su_softmac;
1200 	softmac_switch_req_t	*req;
1201 
1202 	mutex_enter(&softmac->smac_fp_mutex);
1203 
1204 	if (sup->su_mode == SOFTMAC_FASTPATH)
1205 		softmac_fastpath_tear(sup);
1206 
1207 	if (sup->su_mode != SOFTMAC_UNKNOWN) {
1208 		list_remove(&softmac->smac_sup_list, sup);
1209 		sup->su_mode = SOFTMAC_UNKNOWN;
1210 	}
1211 
1212 	/*
1213 	 * Cleanup all the switch requests queueed on this stream.
1214 	 */
1215 	while ((req = list_head(&sup->su_req_list)) != NULL) {
1216 		list_remove(&sup->su_req_list, req);
1217 		kmem_free(req, sizeof (softmac_switch_req_t));
1218 	}
1219 	mutex_exit(&softmac->smac_fp_mutex);
1220 }
1221 
1222 /*
1223  * Handle the DL_NOTE_REPLUMB_DONE indication from IP/ARP. Change the upper
1224  * stream from the fastpath mode to the slowpath mode.
1225  */
1226 static void
1227 softmac_datapath_switch_done(softmac_upper_t *sup)
1228 {
1229 	softmac_t		*softmac = sup->su_softmac;
1230 	softmac_switch_req_t	*req;
1231 	uint32_t		expected_mode;
1232 
1233 	mutex_enter(&softmac->smac_fp_mutex);
1234 	req = list_head(&sup->su_req_list);
1235 	list_remove(&sup->su_req_list, req);
1236 	expected_mode = req->ssq_expected_mode;
1237 	kmem_free(req, sizeof (softmac_switch_req_t));
1238 
1239 	if (expected_mode == sup->su_mode) {
1240 		mutex_exit(&softmac->smac_fp_mutex);
1241 		return;
1242 	}
1243 
1244 	ASSERT(!sup->su_bound);
1245 	mutex_exit(&softmac->smac_fp_mutex);
1246 
1247 	/*
1248 	 * It is fine if the expected mode is fast-path and we fail
1249 	 * to enable fastpath on this stream.
1250 	 */
1251 	if (expected_mode == SOFTMAC_SLOWPATH)
1252 		softmac_fastpath_tear(sup);
1253 	else
1254 		(void) softmac_fastpath_setup(sup);
1255 }
1256