xref: /illumos-gate/usr/src/uts/common/io/dls/dls_link.c (revision 02e56f3f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Data-Link Services Module
31  */
32 
33 #include	<sys/types.h>
34 #include	<sys/stream.h>
35 #include	<sys/strsun.h>
36 #include	<sys/strsubr.h>
37 #include	<sys/sysmacros.h>
38 #include	<sys/atomic.h>
39 #include	<sys/modhash.h>
40 #include	<sys/dlpi.h>
41 #include	<sys/ethernet.h>
42 #include	<sys/byteorder.h>
43 #include	<sys/vlan.h>
44 #include	<sys/mac.h>
45 #include	<sys/sdt.h>
46 
47 #include	<sys/dls.h>
48 #include	<sys/dld_impl.h>
49 #include	<sys/dls_impl.h>
50 
51 static kmem_cache_t	*i_dls_link_cachep;
52 static mod_hash_t	*i_dls_link_hash;
53 static uint_t		i_dls_link_count;
54 static krwlock_t	i_dls_link_lock;
55 
56 #define		LINK_HASHSZ	67	/* prime */
57 #define		IMPL_HASHSZ	67	/* prime */
58 
59 /*
60  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
61  */
62 #define	MAKE_KEY(_sap, _vid)						\
63 	((mod_hash_key_t)(uintptr_t)					\
64 	(((_sap) << VLAN_ID_SIZE) | (_vid) & VLAN_ID_MASK))
65 
66 /*
67  * Extract the DLSAP value from the hash key.
68  */
69 #define	KEY_SAP(_key)							\
70 	(((uint32_t)(uintptr_t)(_key)) >> VLAN_ID_SIZE)
71 
72 /*
73  * Private functions.
74  */
75 
76 /*ARGSUSED*/
77 static int
78 i_dls_link_constructor(void *buf, void *arg, int kmflag)
79 {
80 	dls_link_t	*dlp = buf;
81 	char		name[MAXNAMELEN];
82 
83 	bzero(buf, sizeof (dls_link_t));
84 
85 	(void) sprintf(name, "dls_link_t_%p_hash", buf);
86 	dlp->dl_impl_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
87 	    mod_hash_null_valdtor);
88 
89 	mutex_init(&dlp->dl_lock, NULL, MUTEX_DEFAULT, NULL);
90 	mutex_init(&dlp->dl_promisc_lock, NULL, MUTEX_DEFAULT, NULL);
91 	rw_init(&dlp->dl_impl_lock, NULL, RW_DEFAULT, NULL);
92 	return (0);
93 }
94 
95 /*ARGSUSED*/
96 static void
97 i_dls_link_destructor(void *buf, void *arg)
98 {
99 	dls_link_t	*dlp = buf;
100 
101 	ASSERT(dlp->dl_ref == 0);
102 	ASSERT(dlp->dl_mh == NULL);
103 	ASSERT(dlp->dl_unknowns == 0);
104 
105 	mod_hash_destroy_idhash(dlp->dl_impl_hash);
106 	dlp->dl_impl_hash = NULL;
107 
108 	mutex_destroy(&dlp->dl_lock);
109 	mutex_destroy(&dlp->dl_promisc_lock);
110 	rw_destroy(&dlp->dl_impl_lock);
111 }
112 
113 #define	ETHER_MATCH(_pkt_a, _pkt_b)					\
114 	((((uint16_t *)(_pkt_a))[0] == ((uint16_t *)(_pkt_b))[0]) &&	\
115 	(((uint16_t *)(_pkt_a))[1] == ((uint16_t *)(_pkt_b))[1]) &&	\
116 	(((uint16_t *)(_pkt_a))[2] == ((uint16_t *)(_pkt_b))[2]) &&	\
117 	(((uint16_t *)(_pkt_a))[6] == ((uint16_t *)(_pkt_b))[6]))
118 
119 #define	ETHER_VLAN_MATCH(_pkt_a, _pkt_b)				\
120 	((((uint16_t *)(_pkt_a))[0] == ((uint16_t *)(_pkt_b))[0]) &&	\
121 	(((uint16_t *)(_pkt_a))[1] == ((uint16_t *)(_pkt_b))[1]) &&	\
122 	(((uint16_t *)(_pkt_a))[2] == ((uint16_t *)(_pkt_b))[2]) &&	\
123 	(((uint16_t *)(_pkt_a))[6] == ((uint16_t *)(_pkt_b))[6]) &&	\
124 	(((uint16_t *)(_pkt_a))[7] == ((uint16_t *)(_pkt_b))[7]) &&	\
125 	(((uint16_t *)(_pkt_a))[8] == ((uint16_t *)(_pkt_b))[8]))
126 
127 static mblk_t *
128 i_dls_link_ether_subchain(mblk_t *mp, uint_t *header_lengthp,
129     uint8_t **daddrp, uint16_t *type_lengthp, uint16_t *vidp,
130     uint_t *countp)
131 {
132 	struct ether_header		*ehp;
133 	struct ether_vlan_header	*evhp;
134 	mblk_t				**pp;
135 	mblk_t				*p;
136 	uint_t				npacket;
137 
138 	/*
139 	 * Packets should always be at least 16 bit aligned.
140 	 */
141 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
142 
143 	/*
144 	 * Determine whether this is a VLAN or non-VLAN packet.
145 	 */
146 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
147 	ehp = (struct ether_header *)mp->b_rptr;
148 	if ((*type_lengthp = ntohs(ehp->ether_type)) == VLAN_TPID)
149 		goto vlan;
150 
151 	/*
152 	 * It is a non-VLAN header.
153 	 */
154 	*header_lengthp = sizeof (struct ether_header);
155 
156 	/*
157 	 * Parse the rest of the header information that we need.
158 	 */
159 	*daddrp = (uint8_t *)&(ehp->ether_dhost);
160 	*vidp = VLAN_ID_NONE;
161 
162 	/*
163 	 * Compare with subsequent headers until we find one that has
164 	 * differing header information. After checking each packet skip over
165 	 * the header.
166 	 */
167 	npacket = 1;
168 	for (pp = &(mp->b_next); (p = *pp) != NULL; pp = &(p->b_next)) {
169 		if (!ETHER_MATCH(p->b_rptr, mp->b_rptr) != 0)
170 			break;
171 		p->b_rptr += sizeof (struct ether_header);
172 		npacket++;
173 	}
174 
175 	/*
176 	 * Skip over the initial packet's header.
177 	 */
178 	mp->b_rptr += sizeof (struct ether_header);
179 	goto done;
180 
181 vlan:
182 	/*
183 	 * It is a VLAN header.
184 	 */
185 	evhp = (struct ether_vlan_header *)mp->b_rptr;
186 	*header_lengthp = sizeof (struct ether_vlan_header);
187 
188 	/*
189 	 * Parse the header information.
190 	 */
191 	*daddrp = (uint8_t *)&(evhp->ether_dhost);
192 	*vidp = VLAN_ID(ntohs(evhp->ether_tci));
193 	*type_lengthp = ntohs(evhp->ether_type);
194 
195 	/*
196 	 * Compare with subsequent headers until we find one that has
197 	 * differing header information. After checking each packet skip over
198 	 * the header.
199 	 */
200 	npacket = 1;
201 	for (pp = &(mp->b_next); (p = *pp) != NULL; pp = &(p->b_next)) {
202 		if (!ETHER_VLAN_MATCH(p->b_rptr, mp->b_rptr) != 0)
203 			break;
204 		p->b_rptr += sizeof (struct ether_vlan_header);
205 		npacket++;
206 	}
207 
208 	/*
209 	 * Skip over the initial packet's header.
210 	 */
211 	mp->b_rptr += sizeof (struct ether_vlan_header);
212 
213 done:
214 	/*
215 	 * Break the chain at this point and return a pointer to the next
216 	 * sub-chain.
217 	 */
218 	*pp = NULL;
219 	*countp = npacket;
220 	return (p);
221 }
222 
223 static void
224 i_dls_head_hold(dls_head_t *dhp)
225 {
226 	atomic_inc_32(&dhp->dh_ref);
227 }
228 
229 static void
230 i_dls_head_rele(dls_head_t *dhp)
231 {
232 	atomic_dec_32(&dhp->dh_ref);
233 }
234 
235 static dls_head_t *
236 i_dls_head_alloc(mod_hash_key_t key)
237 {
238 	dls_head_t	*dhp;
239 
240 	dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
241 	dhp->dh_key = key;
242 	return (dhp);
243 }
244 
245 static void
246 i_dls_head_free(dls_head_t *dhp)
247 {
248 	ASSERT(dhp->dh_ref == 0);
249 	kmem_free(dhp, sizeof (dls_head_t));
250 }
251 
252 static void
253 i_dls_link_ether_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
254 {
255 	dls_link_t			*dlp = arg;
256 	mod_hash_t			*hash = dlp->dl_impl_hash;
257 	mblk_t				*nextp;
258 	uint_t				header_length;
259 	uint8_t				*daddr;
260 	uint16_t			type_length;
261 	uint16_t			vid;
262 	uint16_t			sap;
263 	dls_head_t			*dhp;
264 	dls_impl_t			*dip;
265 	dls_impl_t			*ndip;
266 	mblk_t				*nmp;
267 	mod_hash_key_t			key;
268 	uint_t				npacket;
269 	boolean_t			accepted;
270 	dls_rx_t			di_rx, ndi_rx;
271 	void				*di_rx_arg, *ndi_rx_arg;
272 
273 	/*
274 	 * Walk the packet chain.
275 	 */
276 	while (mp != NULL) {
277 		/*
278 		 * Wipe the accepted state.
279 		 */
280 		accepted = B_FALSE;
281 
282 		/*
283 		 * Grab the longest sub-chain we can process as a single
284 		 * unit.
285 		 */
286 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
287 		    &type_length, &vid, &npacket);
288 
289 		/*
290 		 * Calculate the DLSAP: LLC (0) if the type/length field is
291 		 * interpreted as a length, otherwise it is the value of the
292 		 * type/length field.
293 		 */
294 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
295 
296 		/*
297 		 * Construct a hash key from the VLAN identifier and the
298 		 * DLSAP.
299 		 */
300 		key = MAKE_KEY(sap, vid);
301 
302 		/*
303 		 * Search the has table for dls_impl_t eligible to receive
304 		 * a packet chain for this DLSAP/VLAN combination.
305 		 */
306 		rw_enter(&dlp->dl_impl_lock, RW_READER);
307 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
308 			rw_exit(&dlp->dl_impl_lock);
309 			freemsgchain(mp);
310 			goto loop;
311 		}
312 		i_dls_head_hold(dhp);
313 		rw_exit(&dlp->dl_impl_lock);
314 
315 		/*
316 		 * Find the first dls_impl_t that will accept the sub-chain.
317 		 */
318 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
319 			if (dls_accept(dip, daddr, &di_rx, &di_rx_arg))
320 				break;
321 
322 		/*
323 		 * If we did not find any dls_impl_t willing to accept the
324 		 * sub-chain then throw it away.
325 		 */
326 		if (dip == NULL) {
327 			i_dls_head_rele(dhp);
328 			freemsgchain(mp);
329 			goto loop;
330 		}
331 
332 		/*
333 		 * We have at least one acceptor.
334 		 */
335 		accepted = B_TRUE;
336 		for (;;) {
337 			/*
338 			 * Find the next dls_impl_t that will accept the
339 			 * sub-chain.
340 			 */
341 			for (ndip = dip->di_nextp; ndip != NULL;
342 			    ndip = ndip->di_nextp)
343 				if (dls_accept(ndip, daddr, &ndi_rx,
344 				    &ndi_rx_arg))
345 					break;
346 
347 			/*
348 			 * If there are no more dls_impl_t that are willing
349 			 * to accept the sub-chain then we don't need to dup
350 			 * it before handing it to the current one.
351 			 */
352 			if (ndip == NULL) {
353 				di_rx(di_rx_arg, mrh, mp, header_length);
354 
355 				/*
356 				 * Since there are no more dls_impl_t, we're
357 				 * done.
358 				 */
359 				break;
360 			}
361 
362 			/*
363 			 * There are more dls_impl_t so dup the sub-chain.
364 			 */
365 			if ((nmp = copymsgchain(mp)) != NULL)
366 				di_rx(di_rx_arg, mrh, nmp, header_length);
367 
368 			dip = ndip;
369 			di_rx = ndi_rx;
370 			di_rx_arg = ndi_rx_arg;
371 		}
372 
373 		/*
374 		 * Release the hold on the dls_impl_t chain now that we have
375 		 * finished walking it.
376 		 */
377 		i_dls_head_rele(dhp);
378 
379 loop:
380 		/*
381 		 * If there were no acceptors then add the packet count to the
382 		 * 'unknown' count.
383 		 */
384 		if (!accepted)
385 			atomic_add_32(&(dlp->dl_unknowns), npacket);
386 
387 		/*
388 		 * Move onto the next sub-chain.
389 		 */
390 		mp = nextp;
391 	}
392 }
393 
394 static void
395 i_dls_link_ether_rx_promisc(void *arg, mac_resource_handle_t mrh,
396     mblk_t *mp)
397 {
398 	dls_link_t			*dlp = arg;
399 	mod_hash_t			*hash = dlp->dl_impl_hash;
400 	mblk_t				*nextp;
401 	uint_t				header_length;
402 	uint8_t				*daddr;
403 	uint16_t			type_length;
404 	uint16_t			vid;
405 	uint16_t			sap;
406 	dls_head_t			*dhp;
407 	dls_impl_t			*dip;
408 	dls_impl_t			*ndip;
409 	mblk_t				*nmp;
410 	mod_hash_key_t			key;
411 	uint_t				npacket;
412 	boolean_t			accepted;
413 	dls_rx_t			di_rx, ndi_rx;
414 	void				*di_rx_arg, *ndi_rx_arg;
415 
416 	/*
417 	 * Walk the packet chain.
418 	 */
419 	while (mp != NULL) {
420 		/*
421 		 * Wipe the accepted state.
422 		 */
423 		accepted = B_FALSE;
424 
425 		/*
426 		 * Grab the longest sub-chain we can process as a single
427 		 * unit.
428 		 */
429 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
430 		    &type_length, &vid, &npacket);
431 
432 		/*
433 		 * Construct a hash key from the VLAN identifier and the
434 		 * DLSAP that represents dls_impl_t in promiscuous mode.
435 		 */
436 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
437 
438 		/*
439 		 * Search the has table for dls_impl_t eligible to receive
440 		 * a packet chain for this DLSAP/VLAN combination.
441 		 */
442 		rw_enter(&dlp->dl_impl_lock, RW_READER);
443 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
444 			rw_exit(&dlp->dl_impl_lock);
445 			goto non_promisc;
446 		}
447 		i_dls_head_hold(dhp);
448 		rw_exit(&dlp->dl_impl_lock);
449 
450 		/*
451 		 * Find dls_impl_t that will accept the sub-chain.
452 		 */
453 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
454 			if (!dls_accept(dip, daddr, &di_rx, &di_rx_arg))
455 				continue;
456 
457 			/*
458 			 * We have at least one acceptor.
459 			 */
460 			accepted = B_TRUE;
461 
462 			/*
463 			 * There will normally be at least more dls_impl_t
464 			 * (since we've yet to check for non-promiscuous
465 			 * dls_impl_t) so dup the sub-chain.
466 			 */
467 			if ((nmp = copymsgchain(mp)) != NULL)
468 				di_rx(di_rx_arg, mrh, nmp, header_length);
469 		}
470 
471 		/*
472 		 * Release the hold on the dls_impl_t chain now that we have
473 		 * finished walking it.
474 		 */
475 		i_dls_head_rele(dhp);
476 
477 non_promisc:
478 		/*
479 		 * Calculate the DLSAP: LLC (0) if the type/length field is
480 		 * interpreted as a length, otherwise it is the value of the
481 		 * type/length field.
482 		 */
483 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
484 
485 		/*
486 		 * Construct a hash key from the VLAN identifier and the
487 		 * DLSAP.
488 		 */
489 		key = MAKE_KEY(sap, vid);
490 
491 		/*
492 		 * Search the has table for dls_impl_t eligible to receive
493 		 * a packet chain for this DLSAP/VLAN combination.
494 		 */
495 		rw_enter(&dlp->dl_impl_lock, RW_READER);
496 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
497 			rw_exit(&dlp->dl_impl_lock);
498 			freemsgchain(mp);
499 			goto loop;
500 		}
501 		i_dls_head_hold(dhp);
502 		rw_exit(&dlp->dl_impl_lock);
503 
504 		/*
505 		 * Find the first dls_impl_t that will accept the sub-chain.
506 		 */
507 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
508 			if (dls_accept(dip, daddr, &di_rx, &di_rx_arg))
509 				break;
510 
511 		/*
512 		 * If we did not find any dls_impl_t willing to accept the
513 		 * sub-chain then throw it away.
514 		 */
515 		if (dip == NULL) {
516 			i_dls_head_rele(dhp);
517 			freemsgchain(mp);
518 			goto loop;
519 		}
520 
521 		/*
522 		 * We have at least one acceptor.
523 		 */
524 		accepted = B_TRUE;
525 		for (;;) {
526 			/*
527 			 * Find the next dls_impl_t that will accept the
528 			 * sub-chain.
529 			 */
530 			for (ndip = dip->di_nextp; ndip != NULL;
531 			    ndip = ndip->di_nextp)
532 				if (dls_accept(ndip, daddr, &ndi_rx,
533 				    &ndi_rx_arg))
534 					break;
535 
536 			/*
537 			 * If there are no more dls_impl_t that are willing
538 			 * to accept the sub-chain then we don't need to dup
539 			 * it before handing it to the current one.
540 			 */
541 			if (ndip == NULL) {
542 				di_rx(di_rx_arg, mrh, mp, header_length);
543 
544 				/*
545 				 * Since there are no more dls_impl_t, we're
546 				 * done.
547 				 */
548 				break;
549 			}
550 
551 			/*
552 			 * There are more dls_impl_t so dup the sub-chain.
553 			 */
554 			if ((nmp = copymsgchain(mp)) != NULL)
555 				di_rx(di_rx_arg, mrh, nmp, header_length);
556 
557 			dip = ndip;
558 			di_rx = ndi_rx;
559 			di_rx_arg = ndi_rx_arg;
560 		}
561 
562 		/*
563 		 * Release the hold on the dls_impl_t chain now that we have
564 		 * finished walking it.
565 		 */
566 		i_dls_head_rele(dhp);
567 
568 loop:
569 		/*
570 		 * If there were no acceptors then add the packet count to the
571 		 * 'unknown' count.
572 		 */
573 		if (!accepted)
574 			atomic_add_32(&(dlp->dl_unknowns), npacket);
575 
576 		/*
577 		 * Move onto the next sub-chain.
578 		 */
579 		mp = nextp;
580 	}
581 }
582 
583 static void
584 i_dls_link_ether_loopback(void *arg, mblk_t *mp)
585 {
586 	dls_link_t			*dlp = arg;
587 	mod_hash_t			*hash = dlp->dl_impl_hash;
588 	mblk_t				*nextp;
589 	uint_t				header_length;
590 	uint8_t				*daddr;
591 	uint16_t			type_length;
592 	uint16_t			vid;
593 	uint16_t			sap;
594 	dls_head_t			*dhp;
595 	dls_impl_t			*dip;
596 	dls_impl_t			*ndip;
597 	mblk_t				*nmp;
598 	mod_hash_key_t			key;
599 	uint_t				npacket;
600 	dls_rx_t			di_rx, ndi_rx;
601 	void				*di_rx_arg, *ndi_rx_arg;
602 
603 	/*
604 	 * Walk the packet chain.
605 	 */
606 	while (mp != NULL) {
607 		/*
608 		 * Grab the longest sub-chain we can process as a single
609 		 * unit.
610 		 */
611 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
612 		    &type_length, &vid, &npacket);
613 
614 		/*
615 		 * Calculate the DLSAP: LLC (0) if the type/length field is
616 		 * interpreted as a length, otherwise it is the value of the
617 		 * type/length field.
618 		 */
619 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
620 
621 		/*
622 		 * Construct a hash key from the VLAN identifier and the
623 		 * DLSAP.
624 		 */
625 		key = MAKE_KEY(sap, vid);
626 
627 		/*
628 		 * Search the has table for dls_impl_t eligible to receive
629 		 * a packet chain for this DLSAP/VLAN combination.
630 		 */
631 		rw_enter(&dlp->dl_impl_lock, RW_READER);
632 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
633 			rw_exit(&dlp->dl_impl_lock);
634 			goto promisc;
635 		}
636 		i_dls_head_hold(dhp);
637 		rw_exit(&dlp->dl_impl_lock);
638 
639 		/*
640 		 * Find dls_impl_t that will accept the sub-chain.
641 		 */
642 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
643 			if (!dls_accept_loopback(dip, daddr, &di_rx,
644 			    &di_rx_arg))
645 				continue;
646 
647 			/*
648 			 * There should be at least more dls_impl_t (since
649 			 * we've yet to check for dls_impl_t in promiscuous
650 			 * mode) so dup the sub-chain.
651 			 */
652 			if ((nmp = copymsgchain(mp)) != NULL)
653 				di_rx(di_rx_arg, NULL, nmp, header_length);
654 		}
655 
656 		/*
657 		 * Release the hold on the dls_impl_t chain now that we have
658 		 * finished walking it.
659 		 */
660 		i_dls_head_rele(dhp);
661 
662 promisc:
663 		/*
664 		 * Construct a hash key from the VLAN identifier and the
665 		 * DLSAP that represents dls_impl_t in promiscuous mode.
666 		 */
667 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
668 
669 		/*
670 		 * Search the has table for dls_impl_t eligible to receive
671 		 * a packet chain for this DLSAP/VLAN combination.
672 		 */
673 		rw_enter(&dlp->dl_impl_lock, RW_READER);
674 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
675 			rw_exit(&dlp->dl_impl_lock);
676 			freemsgchain(mp);
677 			goto loop;
678 		}
679 		i_dls_head_hold(dhp);
680 		rw_exit(&dlp->dl_impl_lock);
681 
682 		/*
683 		 * Find the first dls_impl_t that will accept the sub-chain.
684 		 */
685 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
686 			if (dls_accept_loopback(dip, daddr, &di_rx, &di_rx_arg))
687 				break;
688 
689 		/*
690 		 * If we did not find any dls_impl_t willing to accept the
691 		 * sub-chain then throw it away.
692 		 */
693 		if (dip == NULL) {
694 			i_dls_head_rele(dhp);
695 			freemsgchain(mp);
696 			goto loop;
697 		}
698 
699 		for (;;) {
700 			/*
701 			 * Find the next dls_impl_t that will accept the
702 			 * sub-chain.
703 			 */
704 			for (ndip = dip->di_nextp; ndip != NULL;
705 			    ndip = ndip->di_nextp)
706 				if (dls_accept_loopback(ndip, daddr,
707 				    &ndi_rx, &ndi_rx_arg))
708 					break;
709 
710 			/*
711 			 * If there are no more dls_impl_t that are willing
712 			 * to accept the sub-chain then we don't need to dup
713 			 * it before handing it to the current one.
714 			 */
715 			if (ndip == NULL) {
716 				di_rx(di_rx_arg, NULL, mp, header_length);
717 
718 				/*
719 				 * Since there are no more dls_impl_t, we're
720 				 * done.
721 				 */
722 				break;
723 			}
724 
725 			/*
726 			 * There are more dls_impl_t so dup the sub-chain.
727 			 */
728 			if ((nmp = copymsgchain(mp)) != NULL)
729 				di_rx(di_rx_arg, NULL, nmp, header_length);
730 
731 			dip = ndip;
732 			di_rx = ndi_rx;
733 			di_rx_arg = ndi_rx_arg;
734 		}
735 
736 		/*
737 		 * Release the hold on the dls_impl_t chain now that we have
738 		 * finished walking it.
739 		 */
740 		i_dls_head_rele(dhp);
741 
742 loop:
743 		/*
744 		 * Move onto the next sub-chain.
745 		 */
746 		mp = nextp;
747 	}
748 }
749 
750 /*ARGSUSED*/
751 static uint_t
752 i_dls_link_walk(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
753 {
754 	boolean_t	*promiscp = arg;
755 	uint32_t	sap = KEY_SAP(key);
756 
757 	if (sap == DLS_SAP_PROMISC) {
758 		*promiscp = B_TRUE;
759 		return (MH_WALK_TERMINATE);
760 	}
761 
762 	return (MH_WALK_CONTINUE);
763 }
764 
765 static int
766 i_dls_link_create(const char *dev, uint_t port, dls_link_t **dlpp)
767 {
768 	dls_link_t		*dlp;
769 
770 	/*
771 	 * Allocate a new dls_link_t structure.
772 	 */
773 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
774 
775 	/*
776 	 * Name the dls_link_t after the MAC interface it represents.
777 	 */
778 	MAC_NAME(dlp->dl_name, dev, port);
779 	(void) strlcpy(dlp->dl_dev, dev, MAXNAMELEN);
780 	dlp->dl_port = port;
781 
782 	/*
783 	 * Set the packet loopback function for use when the MAC is in
784 	 * promiscuous mode, and initialize promiscuous bookeeping fields.
785 	 */
786 	dlp->dl_loopback = i_dls_link_ether_loopback;
787 	dlp->dl_npromisc = 0;
788 	dlp->dl_mth = NULL;
789 
790 	*dlpp = dlp;
791 	return (0);
792 }
793 
794 static void
795 i_dls_link_destroy(dls_link_t *dlp)
796 {
797 	ASSERT(dlp->dl_npromisc == 0);
798 	ASSERT(dlp->dl_nactive == 0);
799 	ASSERT(dlp->dl_mth == NULL);
800 	ASSERT(dlp->dl_macref == 0);
801 	ASSERT(dlp->dl_mh == NULL);
802 	ASSERT(dlp->dl_mip == NULL);
803 	ASSERT(dlp->dl_impl_count == 0);
804 	ASSERT(dlp->dl_mrh == NULL);
805 
806 	/*
807 	 * Free the structure back to the cache.
808 	 */
809 	dlp->dl_unknowns = 0;
810 	kmem_cache_free(i_dls_link_cachep, dlp);
811 }
812 
813 /*
814  * Module initialization functions.
815  */
816 
817 void
818 dls_link_init(void)
819 {
820 	/*
821 	 * Create a kmem_cache of dls_link_t structures.
822 	 */
823 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
824 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
825 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
826 	ASSERT(i_dls_link_cachep != NULL);
827 
828 	/*
829 	 * Create a dls_link_t hash table and associated lock.
830 	 */
831 	i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
832 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
833 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
834 	rw_init(&i_dls_link_lock, NULL, RW_DEFAULT, NULL);
835 	i_dls_link_count = 0;
836 }
837 
838 int
839 dls_link_fini(void)
840 {
841 	if (i_dls_link_count > 0)
842 		return (EBUSY);
843 
844 	/*
845 	 * Destroy the kmem_cache.
846 	 */
847 	kmem_cache_destroy(i_dls_link_cachep);
848 
849 	/*
850 	 * Destroy the hash table and associated lock.
851 	 */
852 	mod_hash_destroy_hash(i_dls_link_hash);
853 	rw_destroy(&i_dls_link_lock);
854 	return (0);
855 }
856 
857 /*
858  * Exported functions.
859  */
860 
861 int
862 dls_link_hold(const char *dev, uint_t port, dls_link_t **dlpp)
863 {
864 	char			name[MAXNAMELEN];
865 	dls_link_t		*dlp;
866 	int			err;
867 
868 	/*
869 	 * Construct a copy of the name used to identify any existing
870 	 * dls_link_t.
871 	 */
872 	MAC_NAME(name, dev, port);
873 
874 	/*
875 	 * Look up a dls_link_t corresponding to the given mac_handle_t
876 	 * in the global hash table. We need to hold i_dls_link_lock in
877 	 * order to atomically find and insert a dls_link_t into the
878 	 * hash table.
879 	 */
880 	rw_enter(&i_dls_link_lock, RW_WRITER);
881 	if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
882 	    (mod_hash_val_t *)&dlp)) == 0)
883 		goto done;
884 
885 	ASSERT(err == MH_ERR_NOTFOUND);
886 
887 	/*
888 	 * We didn't find anything so we need to create one.
889 	 */
890 	if ((err = i_dls_link_create(dev, port, &dlp)) != 0) {
891 		rw_exit(&i_dls_link_lock);
892 		return (err);
893 	}
894 
895 	/*
896 	 * Insert the dls_link_t.
897 	 */
898 	err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
899 	    (mod_hash_val_t)dlp);
900 	ASSERT(err == 0);
901 
902 	i_dls_link_count++;
903 	ASSERT(i_dls_link_count != 0);
904 
905 done:
906 	/*
907 	 * Bump the reference count and hand back the reference.
908 	 */
909 	dlp->dl_ref++;
910 	*dlpp = dlp;
911 	rw_exit(&i_dls_link_lock);
912 	return (0);
913 }
914 
915 void
916 dls_link_rele(dls_link_t *dlp)
917 {
918 	mod_hash_val_t	val;
919 
920 	rw_enter(&i_dls_link_lock, RW_WRITER);
921 
922 	/*
923 	 * Check if there are any more references.
924 	 */
925 	if (--dlp->dl_ref != 0) {
926 		/*
927 		 * There are more references so there's nothing more to do.
928 		 */
929 		goto done;
930 	}
931 
932 	(void) mod_hash_remove(i_dls_link_hash,
933 	    (mod_hash_key_t)dlp->dl_name, &val);
934 	ASSERT(dlp == (dls_link_t *)val);
935 
936 	/*
937 	 * Destroy the dls_link_t.
938 	 */
939 	i_dls_link_destroy(dlp);
940 	ASSERT(i_dls_link_count > 0);
941 	i_dls_link_count--;
942 done:
943 	rw_exit(&i_dls_link_lock);
944 }
945 
946 int
947 dls_mac_hold(dls_link_t *dlp)
948 {
949 	int err = 0;
950 
951 	mutex_enter(&dlp->dl_lock);
952 
953 	ASSERT(IMPLY(dlp->dl_macref != 0, dlp->dl_mh != NULL));
954 	ASSERT(IMPLY(dlp->dl_macref == 0, dlp->dl_mh == NULL));
955 
956 	if (dlp->dl_macref == 0) {
957 		/*
958 		 * First reference; hold open the MAC interface.
959 		 */
960 		err = mac_open(dlp->dl_dev, dlp->dl_port, &dlp->dl_mh);
961 		if (err != 0)
962 			goto done;
963 
964 		dlp->dl_mip = mac_info(dlp->dl_mh);
965 	}
966 
967 	dlp->dl_macref++;
968 done:
969 	mutex_exit(&dlp->dl_lock);
970 	return (err);
971 }
972 
973 void
974 dls_mac_rele(dls_link_t *dlp)
975 {
976 	mutex_enter(&dlp->dl_lock);
977 	ASSERT(dlp->dl_mh != NULL);
978 
979 	if (--dlp->dl_macref == 0) {
980 		mac_close(dlp->dl_mh);
981 		dlp->dl_mh = NULL;
982 		dlp->dl_mip = NULL;
983 	}
984 	mutex_exit(&dlp->dl_lock);
985 }
986 
987 void
988 dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip)
989 {
990 	dls_vlan_t	*dvp = dip->di_dvp;
991 	mod_hash_t	*hash = dlp->dl_impl_hash;
992 	mod_hash_key_t	key;
993 	dls_head_t	*dhp;
994 	dls_impl_t	*p;
995 	mac_rx_t	rx;
996 	int		err;
997 	boolean_t	promisc = B_FALSE;
998 
999 	/*
1000 	 * For ethernet media, sap values less than or equal to
1001 	 * ETHERMTU (1500) represent LLC channels. (See PSARC 2003/150).
1002 	 * We strictly use 0 to represent LLC channels.
1003 	 */
1004 	sap = (sap <= ETHERMTU) ? 0 : sap;
1005 
1006 	/*
1007 	 * Make the appropriate key value depending on whether the
1008 	 * dls_impl_t is in promiscuous mode or not.
1009 	 */
1010 	key = MAKE_KEY(sap, dvp->dv_id);
1011 
1012 	/*
1013 	 * We need dl_lock here because we want to be able to walk
1014 	 * the hash table *and* set the mac rx func atomically. if
1015 	 * these two operations are separate, someone else could
1016 	 * insert/remove dls_impl_t from the hash table after we
1017 	 * drop the hash lock and this could cause our chosen rx
1018 	 * func to be incorrect. note that we cannot call mac_rx_add
1019 	 * when holding the hash lock because this can cause deadlock.
1020 	 */
1021 	mutex_enter(&dlp->dl_lock);
1022 
1023 	/*
1024 	 * Search the table for a list head with this key.
1025 	 */
1026 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1027 
1028 	if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
1029 		ASSERT(err == MH_ERR_NOTFOUND);
1030 
1031 		dhp = i_dls_head_alloc(key);
1032 		err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
1033 		ASSERT(err == 0);
1034 	}
1035 
1036 	/*
1037 	 * Add the dls_impl_t to the head of the list.
1038 	 */
1039 	ASSERT(dip->di_nextp == NULL);
1040 	p = dhp->dh_list;
1041 	dip->di_nextp = p;
1042 	dhp->dh_list = dip;
1043 
1044 	/*
1045 	 * Save a pointer to the list head.
1046 	 */
1047 	dip->di_headp = dhp;
1048 	dlp->dl_impl_count++;
1049 
1050 	/*
1051 	 * Walk the bound dls_impl_t to see if there are any
1052 	 * in promiscuous 'all sap' mode.
1053 	 */
1054 	mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
1055 	rw_exit(&dlp->dl_impl_lock);
1056 
1057 	/*
1058 	 * If there are then we need to use a receive routine
1059 	 * which will route packets to those dls_impl_t as well
1060 	 * as ones bound to the  DLSAP of the packet.
1061 	 */
1062 	if (promisc)
1063 		rx = i_dls_link_ether_rx_promisc;
1064 	else
1065 		rx = i_dls_link_ether_rx;
1066 
1067 	/* Replace the existing receive function if there is one. */
1068 	if (dlp->dl_mrh != NULL)
1069 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1070 	dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1071 	mutex_exit(&dlp->dl_lock);
1072 }
1073 
1074 void
1075 dls_link_remove(dls_link_t *dlp, dls_impl_t *dip)
1076 {
1077 	mod_hash_t	*hash = dlp->dl_impl_hash;
1078 	dls_impl_t	**pp;
1079 	dls_impl_t	*p;
1080 	dls_head_t	*dhp;
1081 	mac_rx_t	rx;
1082 
1083 	/*
1084 	 * We need dl_lock here because we want to be able to walk
1085 	 * the hash table *and* set the mac rx func atomically. if
1086 	 * these two operations are separate, someone else could
1087 	 * insert/remove dls_impl_t from the hash table after we
1088 	 * drop the hash lock and this could cause our chosen rx
1089 	 * func to be incorrect. note that we cannot call mac_rx_add
1090 	 * when holding the hash lock because this can cause deadlock.
1091 	 */
1092 	mutex_enter(&dlp->dl_lock);
1093 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1094 
1095 	/*
1096 	 * Poll the hash table entry until all references have been dropped.
1097 	 * We need to drop all locks before sleeping because we don't want
1098 	 * the interrupt handler to block. We set di_removing here to
1099 	 * tell the receive callbacks not to pass up packets anymore.
1100 	 * This is only a hint to quicken the decrease of the refcnt so
1101 	 * the assignment need not be protected by any lock.
1102 	 */
1103 	dhp = dip->di_headp;
1104 	dip->di_removing = B_TRUE;
1105 	while (dhp->dh_ref != 0) {
1106 		rw_exit(&dlp->dl_impl_lock);
1107 		mutex_exit(&dlp->dl_lock);
1108 		delay(drv_usectohz(1000));	/* 1ms delay */
1109 		mutex_enter(&dlp->dl_lock);
1110 		rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1111 	}
1112 
1113 	/*
1114 	 * Walk the list and remove the dls_impl_t.
1115 	 */
1116 	for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->di_nextp)) {
1117 		if (p == dip)
1118 			break;
1119 	}
1120 	ASSERT(p != NULL);
1121 	*pp = p->di_nextp;
1122 	p->di_nextp = NULL;
1123 
1124 	ASSERT(dlp->dl_impl_count > 0);
1125 	dlp->dl_impl_count--;
1126 
1127 	if (dhp->dh_list == NULL) {
1128 		mod_hash_val_t	val = NULL;
1129 
1130 		/*
1131 		 * The list is empty so remove the hash table entry.
1132 		 */
1133 		(void) mod_hash_remove(hash, dhp->dh_key, &val);
1134 		ASSERT(dhp == (dls_head_t *)val);
1135 		i_dls_head_free(dhp);
1136 	}
1137 	dip->di_removing = B_FALSE;
1138 
1139 	/*
1140 	 * If there are no dls_impl_t then there's no need to register a
1141 	 * receive function with the mac.
1142 	 */
1143 	if (dlp->dl_impl_count == 0) {
1144 		rw_exit(&dlp->dl_impl_lock);
1145 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1146 		dlp->dl_mrh = NULL;
1147 	} else {
1148 		boolean_t promisc = B_FALSE;
1149 
1150 		/*
1151 		 * Walk the bound dls_impl_t to see if there are any
1152 		 * in promiscuous 'all sap' mode.
1153 		 */
1154 		mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
1155 		rw_exit(&dlp->dl_impl_lock);
1156 
1157 		/*
1158 		 * If there are then we need to use a receive routine
1159 		 * which will route packets to those dls_impl_t as well
1160 		 * as ones bound to the  DLSAP of the packet.
1161 		 */
1162 		if (promisc)
1163 			rx = i_dls_link_ether_rx_promisc;
1164 		else
1165 			rx = i_dls_link_ether_rx;
1166 
1167 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1168 		dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1169 	}
1170 	mutex_exit(&dlp->dl_lock);
1171 }
1172