xref: /illumos-gate/usr/src/uts/common/io/dls/dls_link.c (revision 80ab886d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Data-Link Services Module
30  */
31 
32 #include	<sys/types.h>
33 #include	<sys/stream.h>
34 #include	<sys/strsun.h>
35 #include	<sys/strsubr.h>
36 #include	<sys/sysmacros.h>
37 #include	<sys/atomic.h>
38 #include	<sys/modhash.h>
39 #include	<sys/dlpi.h>
40 #include	<sys/ethernet.h>
41 #include	<sys/byteorder.h>
42 #include	<sys/vlan.h>
43 #include	<sys/mac.h>
44 #include	<sys/sdt.h>
45 
46 #include	<sys/dls.h>
47 #include	<sys/dld_impl.h>
48 #include	<sys/dls_impl.h>
49 
50 static kmem_cache_t	*i_dls_link_cachep;
51 static mod_hash_t	*i_dls_link_hash;
52 static uint_t		i_dls_link_count;
53 static krwlock_t	i_dls_link_lock;
54 
55 #define		LINK_HASHSZ	67	/* prime */
56 #define		IMPL_HASHSZ	67	/* prime */
57 
58 /*
59  * Construct a hash key encompassing both DLSAP value and VLAN idenitifier.
60  */
61 #define	MAKE_KEY(_sap, _vid)						\
62 	((mod_hash_key_t)(uintptr_t)					\
63 	(((_sap) << VLAN_ID_SIZE) | (_vid) & VLAN_ID_MASK))
64 
65 /*
66  * Extract the DLSAP value from the hash key.
67  */
68 #define	KEY_SAP(_key)							\
69 	(((uint32_t)(uintptr_t)(_key)) >> VLAN_ID_SIZE)
70 
71 /*
72  * Private functions.
73  */
74 
75 /*ARGSUSED*/
76 static int
77 i_dls_link_constructor(void *buf, void *arg, int kmflag)
78 {
79 	dls_link_t	*dlp = buf;
80 	char		name[MAXNAMELEN];
81 
82 	bzero(buf, sizeof (dls_link_t));
83 
84 	(void) sprintf(name, "dls_link_t_%p_hash", buf);
85 	dlp->dl_impl_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
86 	    mod_hash_null_valdtor);
87 
88 	mutex_init(&dlp->dl_lock, NULL, MUTEX_DEFAULT, NULL);
89 	mutex_init(&dlp->dl_promisc_lock, NULL, MUTEX_DEFAULT, NULL);
90 	rw_init(&dlp->dl_impl_lock, NULL, RW_DEFAULT, NULL);
91 	return (0);
92 }
93 
94 /*ARGSUSED*/
95 static void
96 i_dls_link_destructor(void *buf, void *arg)
97 {
98 	dls_link_t	*dlp = buf;
99 
100 	ASSERT(dlp->dl_ref == 0);
101 	ASSERT(dlp->dl_mh == NULL);
102 	ASSERT(dlp->dl_unknowns == 0);
103 
104 	mod_hash_destroy_idhash(dlp->dl_impl_hash);
105 	dlp->dl_impl_hash = NULL;
106 
107 	mutex_destroy(&dlp->dl_lock);
108 	mutex_destroy(&dlp->dl_promisc_lock);
109 	rw_destroy(&dlp->dl_impl_lock);
110 }
111 
112 #define	ETHER_MATCH(_pkt_a, _pkt_b)					\
113 	((((uint16_t *)(_pkt_a))[0] == ((uint16_t *)(_pkt_b))[0]) &&	\
114 	(((uint16_t *)(_pkt_a))[1] == ((uint16_t *)(_pkt_b))[1]) &&	\
115 	(((uint16_t *)(_pkt_a))[2] == ((uint16_t *)(_pkt_b))[2]) &&	\
116 	(((uint16_t *)(_pkt_a))[6] == ((uint16_t *)(_pkt_b))[6]))
117 
118 #define	ETHER_VLAN_MATCH(_pkt_a, _pkt_b)				\
119 	((((uint16_t *)(_pkt_a))[0] == ((uint16_t *)(_pkt_b))[0]) &&	\
120 	(((uint16_t *)(_pkt_a))[1] == ((uint16_t *)(_pkt_b))[1]) &&	\
121 	(((uint16_t *)(_pkt_a))[2] == ((uint16_t *)(_pkt_b))[2]) &&	\
122 	(((uint16_t *)(_pkt_a))[6] == ((uint16_t *)(_pkt_b))[6]) &&	\
123 	(((uint16_t *)(_pkt_a))[7] == ((uint16_t *)(_pkt_b))[7]) &&	\
124 	(((uint16_t *)(_pkt_a))[8] == ((uint16_t *)(_pkt_b))[8]))
125 
126 #define	ETHER_STRIP_PADDING(typelen, hdrlen, p) {		\
127 	if (typelen <= ETHERMTU) {				\
128 		ssize_t delta = typelen + hdrlen - msgdsize(p);	\
129 								\
130 		if (delta < 0)					\
131 			(void) adjmsg(p, delta);		\
132 	}							\
133 }
134 
135 static mblk_t *
136 i_dls_link_ether_subchain(mblk_t *mp, uint_t *header_lengthp,
137     uint8_t **daddrp, uint16_t *type_lengthp, uint16_t *vidp,
138     uint_t *countp)
139 {
140 	struct ether_header		*ehp;
141 	struct ether_vlan_header	*evhp;
142 	mblk_t				**pp;
143 	mblk_t				*p;
144 	uint_t				npacket;
145 
146 	/*
147 	 * Packets should always be at least 16 bit aligned.
148 	 */
149 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
150 
151 	/*
152 	 * Determine whether this is a VLAN or non-VLAN packet.
153 	 */
154 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
155 	ehp = (struct ether_header *)mp->b_rptr;
156 	if ((*type_lengthp = ntohs(ehp->ether_type)) == VLAN_TPID)
157 		goto vlan;
158 
159 	/*
160 	 * It is a non-VLAN header.
161 	 */
162 	*header_lengthp = sizeof (struct ether_header);
163 
164 	/*
165 	 * Parse the rest of the header information that we need.
166 	 */
167 	*daddrp = (uint8_t *)&(ehp->ether_dhost);
168 	*vidp = VLAN_ID_NONE;
169 
170 	/*
171 	 * Compare with subsequent headers until we find one that has
172 	 * differing header information. After checking each packet
173 	 * strip padding and skip over the header.
174 	 */
175 	npacket = 1;
176 	for (pp = &(mp->b_next); (p = *pp) != NULL; pp = &(p->b_next)) {
177 		if (!ETHER_MATCH(p->b_rptr, mp->b_rptr) != 0)
178 			break;
179 		ETHER_STRIP_PADDING(*type_lengthp, *header_lengthp, p);
180 		p->b_rptr += sizeof (struct ether_header);
181 		npacket++;
182 	}
183 
184 	/*
185 	 * Strip padding and skip over the initial packet's header.
186 	 */
187 	ETHER_STRIP_PADDING(*type_lengthp, *header_lengthp, mp);
188 	mp->b_rptr += sizeof (struct ether_header);
189 	goto done;
190 
191 vlan:
192 	/*
193 	 * It is a VLAN header.
194 	 */
195 	evhp = (struct ether_vlan_header *)mp->b_rptr;
196 	*header_lengthp = sizeof (struct ether_vlan_header);
197 
198 	/*
199 	 * Parse the header information.
200 	 */
201 	*daddrp = (uint8_t *)&(evhp->ether_dhost);
202 	*vidp = VLAN_ID(ntohs(evhp->ether_tci));
203 	*type_lengthp = ntohs(evhp->ether_type);
204 
205 	/*
206 	 * Compare with subsequent headers until we find one that has
207 	 * differing header information. After checking each packet
208 	 * strip padding and skip over the header.
209 	 */
210 	npacket = 1;
211 	for (pp = &(mp->b_next); (p = *pp) != NULL; pp = &(p->b_next)) {
212 		if (!ETHER_VLAN_MATCH(p->b_rptr, mp->b_rptr) != 0)
213 			break;
214 		ETHER_STRIP_PADDING(*type_lengthp, *header_lengthp, p);
215 		p->b_rptr += sizeof (struct ether_vlan_header);
216 		npacket++;
217 	}
218 
219 	/*
220 	 * Strip padding and skip over the initial packet's header.
221 	 */
222 	ETHER_STRIP_PADDING(*type_lengthp, *header_lengthp, mp);
223 	mp->b_rptr += sizeof (struct ether_vlan_header);
224 
225 done:
226 	/*
227 	 * Break the chain at this point and return a pointer to the next
228 	 * sub-chain.
229 	 */
230 	*pp = NULL;
231 	*countp = npacket;
232 	return (p);
233 }
234 
235 static void
236 i_dls_head_hold(dls_head_t *dhp)
237 {
238 	atomic_inc_32(&dhp->dh_ref);
239 }
240 
241 static void
242 i_dls_head_rele(dls_head_t *dhp)
243 {
244 	atomic_dec_32(&dhp->dh_ref);
245 }
246 
247 static dls_head_t *
248 i_dls_head_alloc(mod_hash_key_t key)
249 {
250 	dls_head_t	*dhp;
251 
252 	dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
253 	dhp->dh_key = key;
254 	return (dhp);
255 }
256 
257 static void
258 i_dls_head_free(dls_head_t *dhp)
259 {
260 	ASSERT(dhp->dh_ref == 0);
261 	kmem_free(dhp, sizeof (dls_head_t));
262 }
263 
264 static void
265 i_dls_link_ether_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
266 {
267 	dls_link_t			*dlp = arg;
268 	mod_hash_t			*hash = dlp->dl_impl_hash;
269 	mblk_t				*nextp;
270 	uint_t				header_length;
271 	uint8_t				*daddr;
272 	uint16_t			type_length;
273 	uint16_t			vid;
274 	uint16_t			sap;
275 	dls_head_t			*dhp;
276 	dls_impl_t			*dip;
277 	dls_impl_t			*ndip;
278 	mblk_t				*nmp;
279 	mod_hash_key_t			key;
280 	uint_t				npacket;
281 	boolean_t			accepted;
282 	dls_rx_t			di_rx, ndi_rx;
283 	void				*di_rx_arg, *ndi_rx_arg;
284 
285 	/*
286 	 * Walk the packet chain.
287 	 */
288 	while (mp != NULL) {
289 		/*
290 		 * Wipe the accepted state.
291 		 */
292 		accepted = B_FALSE;
293 
294 		/*
295 		 * Grab the longest sub-chain we can process as a single
296 		 * unit.
297 		 */
298 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
299 		    &type_length, &vid, &npacket);
300 
301 		/*
302 		 * Calculate the DLSAP: LLC (0) if the type/length field is
303 		 * interpreted as a length, otherwise it is the value of the
304 		 * type/length field.
305 		 */
306 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
307 
308 		/*
309 		 * Construct a hash key from the VLAN identifier and the
310 		 * DLSAP.
311 		 */
312 		key = MAKE_KEY(sap, vid);
313 
314 		/*
315 		 * Search the has table for dls_impl_t eligible to receive
316 		 * a packet chain for this DLSAP/VLAN combination.
317 		 */
318 		rw_enter(&dlp->dl_impl_lock, RW_READER);
319 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
320 			rw_exit(&dlp->dl_impl_lock);
321 			freemsgchain(mp);
322 			goto loop;
323 		}
324 		i_dls_head_hold(dhp);
325 		rw_exit(&dlp->dl_impl_lock);
326 
327 		/*
328 		 * Find the first dls_impl_t that will accept the sub-chain.
329 		 */
330 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
331 			if (dls_accept(dip, daddr, &di_rx, &di_rx_arg))
332 				break;
333 
334 		/*
335 		 * If we did not find any dls_impl_t willing to accept the
336 		 * sub-chain then throw it away.
337 		 */
338 		if (dip == NULL) {
339 			i_dls_head_rele(dhp);
340 			freemsgchain(mp);
341 			goto loop;
342 		}
343 
344 		/*
345 		 * We have at least one acceptor.
346 		 */
347 		accepted = B_TRUE;
348 		for (;;) {
349 			/*
350 			 * Find the next dls_impl_t that will accept the
351 			 * sub-chain.
352 			 */
353 			for (ndip = dip->di_nextp; ndip != NULL;
354 			    ndip = ndip->di_nextp)
355 				if (dls_accept(ndip, daddr, &ndi_rx,
356 				    &ndi_rx_arg))
357 					break;
358 
359 			/*
360 			 * If there are no more dls_impl_t that are willing
361 			 * to accept the sub-chain then we don't need to dup
362 			 * it before handing it to the current one.
363 			 */
364 			if (ndip == NULL) {
365 				di_rx(di_rx_arg, mrh, mp, header_length);
366 
367 				/*
368 				 * Since there are no more dls_impl_t, we're
369 				 * done.
370 				 */
371 				break;
372 			}
373 
374 			/*
375 			 * There are more dls_impl_t so dup the sub-chain.
376 			 */
377 			if ((nmp = copymsgchain(mp)) != NULL)
378 				di_rx(di_rx_arg, mrh, nmp, header_length);
379 
380 			dip = ndip;
381 			di_rx = ndi_rx;
382 			di_rx_arg = ndi_rx_arg;
383 		}
384 
385 		/*
386 		 * Release the hold on the dls_impl_t chain now that we have
387 		 * finished walking it.
388 		 */
389 		i_dls_head_rele(dhp);
390 
391 loop:
392 		/*
393 		 * If there were no acceptors then add the packet count to the
394 		 * 'unknown' count.
395 		 */
396 		if (!accepted)
397 			atomic_add_32(&(dlp->dl_unknowns), npacket);
398 
399 		/*
400 		 * Move onto the next sub-chain.
401 		 */
402 		mp = nextp;
403 	}
404 }
405 
406 static void
407 i_dls_link_ether_rx_promisc(void *arg, mac_resource_handle_t mrh,
408     mblk_t *mp)
409 {
410 	dls_link_t			*dlp = arg;
411 	mod_hash_t			*hash = dlp->dl_impl_hash;
412 	mblk_t				*nextp;
413 	uint_t				header_length;
414 	uint8_t				*daddr;
415 	uint16_t			type_length;
416 	uint16_t			vid;
417 	uint16_t			sap;
418 	dls_head_t			*dhp;
419 	dls_impl_t			*dip;
420 	dls_impl_t			*ndip;
421 	mblk_t				*nmp;
422 	mod_hash_key_t			key;
423 	uint_t				npacket;
424 	boolean_t			accepted;
425 	dls_rx_t			di_rx, ndi_rx;
426 	void				*di_rx_arg, *ndi_rx_arg;
427 
428 	/*
429 	 * Walk the packet chain.
430 	 */
431 	while (mp != NULL) {
432 		/*
433 		 * Wipe the accepted state.
434 		 */
435 		accepted = B_FALSE;
436 
437 		/*
438 		 * Grab the longest sub-chain we can process as a single
439 		 * unit.
440 		 */
441 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
442 		    &type_length, &vid, &npacket);
443 
444 		/*
445 		 * Construct a hash key from the VLAN identifier and the
446 		 * DLSAP that represents dls_impl_t in promiscuous mode.
447 		 */
448 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
449 
450 		/*
451 		 * Search the has table for dls_impl_t eligible to receive
452 		 * a packet chain for this DLSAP/VLAN combination.
453 		 */
454 		rw_enter(&dlp->dl_impl_lock, RW_READER);
455 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
456 			rw_exit(&dlp->dl_impl_lock);
457 			goto non_promisc;
458 		}
459 		i_dls_head_hold(dhp);
460 		rw_exit(&dlp->dl_impl_lock);
461 
462 		/*
463 		 * Find dls_impl_t that will accept the sub-chain.
464 		 */
465 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
466 			if (!dls_accept(dip, daddr, &di_rx, &di_rx_arg))
467 				continue;
468 
469 			/*
470 			 * We have at least one acceptor.
471 			 */
472 			accepted = B_TRUE;
473 
474 			/*
475 			 * There will normally be at least more dls_impl_t
476 			 * (since we've yet to check for non-promiscuous
477 			 * dls_impl_t) so dup the sub-chain.
478 			 */
479 			if ((nmp = copymsgchain(mp)) != NULL)
480 				di_rx(di_rx_arg, mrh, nmp, header_length);
481 		}
482 
483 		/*
484 		 * Release the hold on the dls_impl_t chain now that we have
485 		 * finished walking it.
486 		 */
487 		i_dls_head_rele(dhp);
488 
489 non_promisc:
490 		/*
491 		 * Calculate the DLSAP: LLC (0) if the type/length field is
492 		 * interpreted as a length, otherwise it is the value of the
493 		 * type/length field.
494 		 */
495 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
496 
497 		/*
498 		 * Construct a hash key from the VLAN identifier and the
499 		 * DLSAP.
500 		 */
501 		key = MAKE_KEY(sap, vid);
502 
503 		/*
504 		 * Search the has table for dls_impl_t eligible to receive
505 		 * a packet chain for this DLSAP/VLAN combination.
506 		 */
507 		rw_enter(&dlp->dl_impl_lock, RW_READER);
508 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
509 			rw_exit(&dlp->dl_impl_lock);
510 			freemsgchain(mp);
511 			goto loop;
512 		}
513 		i_dls_head_hold(dhp);
514 		rw_exit(&dlp->dl_impl_lock);
515 
516 		/*
517 		 * Find the first dls_impl_t that will accept the sub-chain.
518 		 */
519 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
520 			if (dls_accept(dip, daddr, &di_rx, &di_rx_arg))
521 				break;
522 
523 		/*
524 		 * If we did not find any dls_impl_t willing to accept the
525 		 * sub-chain then throw it away.
526 		 */
527 		if (dip == NULL) {
528 			i_dls_head_rele(dhp);
529 			freemsgchain(mp);
530 			goto loop;
531 		}
532 
533 		/*
534 		 * We have at least one acceptor.
535 		 */
536 		accepted = B_TRUE;
537 		for (;;) {
538 			/*
539 			 * Find the next dls_impl_t that will accept the
540 			 * sub-chain.
541 			 */
542 			for (ndip = dip->di_nextp; ndip != NULL;
543 			    ndip = ndip->di_nextp)
544 				if (dls_accept(ndip, daddr, &ndi_rx,
545 				    &ndi_rx_arg))
546 					break;
547 
548 			/*
549 			 * If there are no more dls_impl_t that are willing
550 			 * to accept the sub-chain then we don't need to dup
551 			 * it before handing it to the current one.
552 			 */
553 			if (ndip == NULL) {
554 				di_rx(di_rx_arg, mrh, mp, header_length);
555 
556 				/*
557 				 * Since there are no more dls_impl_t, we're
558 				 * done.
559 				 */
560 				break;
561 			}
562 
563 			/*
564 			 * There are more dls_impl_t so dup the sub-chain.
565 			 */
566 			if ((nmp = copymsgchain(mp)) != NULL)
567 				di_rx(di_rx_arg, mrh, nmp, header_length);
568 
569 			dip = ndip;
570 			di_rx = ndi_rx;
571 			di_rx_arg = ndi_rx_arg;
572 		}
573 
574 		/*
575 		 * Release the hold on the dls_impl_t chain now that we have
576 		 * finished walking it.
577 		 */
578 		i_dls_head_rele(dhp);
579 
580 loop:
581 		/*
582 		 * If there were no acceptors then add the packet count to the
583 		 * 'unknown' count.
584 		 */
585 		if (!accepted)
586 			atomic_add_32(&(dlp->dl_unknowns), npacket);
587 
588 		/*
589 		 * Move onto the next sub-chain.
590 		 */
591 		mp = nextp;
592 	}
593 }
594 
595 static void
596 i_dls_link_ether_loopback(void *arg, mblk_t *mp)
597 {
598 	dls_link_t			*dlp = arg;
599 	mod_hash_t			*hash = dlp->dl_impl_hash;
600 	mblk_t				*nextp;
601 	uint_t				header_length;
602 	uint8_t				*daddr;
603 	uint16_t			type_length;
604 	uint16_t			vid;
605 	uint16_t			sap;
606 	dls_head_t			*dhp;
607 	dls_impl_t			*dip;
608 	dls_impl_t			*ndip;
609 	mblk_t				*nmp;
610 	mod_hash_key_t			key;
611 	uint_t				npacket;
612 	dls_rx_t			di_rx, ndi_rx;
613 	void				*di_rx_arg, *ndi_rx_arg;
614 
615 	/*
616 	 * Walk the packet chain.
617 	 */
618 	while (mp != NULL) {
619 		/*
620 		 * Grab the longest sub-chain we can process as a single
621 		 * unit.
622 		 */
623 		nextp = i_dls_link_ether_subchain(mp, &header_length, &daddr,
624 		    &type_length, &vid, &npacket);
625 
626 		/*
627 		 * Calculate the DLSAP: LLC (0) if the type/length field is
628 		 * interpreted as a length, otherwise it is the value of the
629 		 * type/length field.
630 		 */
631 		sap = (type_length <= ETHERMTU) ? DLS_SAP_LLC : type_length;
632 
633 		/*
634 		 * Construct a hash key from the VLAN identifier and the
635 		 * DLSAP.
636 		 */
637 		key = MAKE_KEY(sap, vid);
638 
639 		/*
640 		 * Search the has table for dls_impl_t eligible to receive
641 		 * a packet chain for this DLSAP/VLAN combination.
642 		 */
643 		rw_enter(&dlp->dl_impl_lock, RW_READER);
644 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
645 			rw_exit(&dlp->dl_impl_lock);
646 			goto promisc;
647 		}
648 		i_dls_head_hold(dhp);
649 		rw_exit(&dlp->dl_impl_lock);
650 
651 		/*
652 		 * Find dls_impl_t that will accept the sub-chain.
653 		 */
654 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp) {
655 			if (!dls_accept_loopback(dip, daddr, &di_rx,
656 			    &di_rx_arg))
657 				continue;
658 
659 			/*
660 			 * There should be at least more dls_impl_t (since
661 			 * we've yet to check for dls_impl_t in promiscuous
662 			 * mode) so dup the sub-chain.
663 			 */
664 			if ((nmp = copymsgchain(mp)) != NULL)
665 				di_rx(di_rx_arg, NULL, nmp, header_length);
666 		}
667 
668 		/*
669 		 * Release the hold on the dls_impl_t chain now that we have
670 		 * finished walking it.
671 		 */
672 		i_dls_head_rele(dhp);
673 
674 promisc:
675 		/*
676 		 * Construct a hash key from the VLAN identifier and the
677 		 * DLSAP that represents dls_impl_t in promiscuous mode.
678 		 */
679 		key = MAKE_KEY(DLS_SAP_PROMISC, vid);
680 
681 		/*
682 		 * Search the has table for dls_impl_t eligible to receive
683 		 * a packet chain for this DLSAP/VLAN combination.
684 		 */
685 		rw_enter(&dlp->dl_impl_lock, RW_READER);
686 		if (mod_hash_find(hash, key, (mod_hash_val_t *)&dhp) != 0) {
687 			rw_exit(&dlp->dl_impl_lock);
688 			freemsgchain(mp);
689 			goto loop;
690 		}
691 		i_dls_head_hold(dhp);
692 		rw_exit(&dlp->dl_impl_lock);
693 
694 		/*
695 		 * Find the first dls_impl_t that will accept the sub-chain.
696 		 */
697 		for (dip = dhp->dh_list; dip != NULL; dip = dip->di_nextp)
698 			if (dls_accept_loopback(dip, daddr, &di_rx, &di_rx_arg))
699 				break;
700 
701 		/*
702 		 * If we did not find any dls_impl_t willing to accept the
703 		 * sub-chain then throw it away.
704 		 */
705 		if (dip == NULL) {
706 			i_dls_head_rele(dhp);
707 			freemsgchain(mp);
708 			goto loop;
709 		}
710 
711 		for (;;) {
712 			/*
713 			 * Find the next dls_impl_t that will accept the
714 			 * sub-chain.
715 			 */
716 			for (ndip = dip->di_nextp; ndip != NULL;
717 			    ndip = ndip->di_nextp)
718 				if (dls_accept_loopback(ndip, daddr,
719 				    &ndi_rx, &ndi_rx_arg))
720 					break;
721 
722 			/*
723 			 * If there are no more dls_impl_t that are willing
724 			 * to accept the sub-chain then we don't need to dup
725 			 * it before handing it to the current one.
726 			 */
727 			if (ndip == NULL) {
728 				di_rx(di_rx_arg, NULL, mp, header_length);
729 
730 				/*
731 				 * Since there are no more dls_impl_t, we're
732 				 * done.
733 				 */
734 				break;
735 			}
736 
737 			/*
738 			 * There are more dls_impl_t so dup the sub-chain.
739 			 */
740 			if ((nmp = copymsgchain(mp)) != NULL)
741 				di_rx(di_rx_arg, NULL, nmp, header_length);
742 
743 			dip = ndip;
744 			di_rx = ndi_rx;
745 			di_rx_arg = ndi_rx_arg;
746 		}
747 
748 		/*
749 		 * Release the hold on the dls_impl_t chain now that we have
750 		 * finished walking it.
751 		 */
752 		i_dls_head_rele(dhp);
753 
754 loop:
755 		/*
756 		 * Move onto the next sub-chain.
757 		 */
758 		mp = nextp;
759 	}
760 }
761 
762 /*ARGSUSED*/
763 static uint_t
764 i_dls_link_walk(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
765 {
766 	boolean_t	*promiscp = arg;
767 	uint32_t	sap = KEY_SAP(key);
768 
769 	if (sap == DLS_SAP_PROMISC) {
770 		*promiscp = B_TRUE;
771 		return (MH_WALK_TERMINATE);
772 	}
773 
774 	return (MH_WALK_CONTINUE);
775 }
776 
777 static int
778 i_dls_link_create(const char *dev, uint_t port, dls_link_t **dlpp)
779 {
780 	dls_link_t		*dlp;
781 
782 	/*
783 	 * Allocate a new dls_link_t structure.
784 	 */
785 	dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
786 
787 	/*
788 	 * Name the dls_link_t after the MAC interface it represents.
789 	 */
790 	MAC_NAME(dlp->dl_name, dev, port);
791 	(void) strlcpy(dlp->dl_dev, dev, MAXNAMELEN);
792 	dlp->dl_port = port;
793 
794 	/*
795 	 * Set the packet loopback function for use when the MAC is in
796 	 * promiscuous mode, and initialize promiscuous bookeeping fields.
797 	 */
798 	dlp->dl_loopback = i_dls_link_ether_loopback;
799 	dlp->dl_npromisc = 0;
800 	dlp->dl_mth = NULL;
801 
802 	*dlpp = dlp;
803 	return (0);
804 }
805 
806 static void
807 i_dls_link_destroy(dls_link_t *dlp)
808 {
809 	ASSERT(dlp->dl_npromisc == 0);
810 	ASSERT(dlp->dl_nactive == 0);
811 	ASSERT(dlp->dl_mth == NULL);
812 	ASSERT(dlp->dl_macref == 0);
813 	ASSERT(dlp->dl_mh == NULL);
814 	ASSERT(dlp->dl_mip == NULL);
815 	ASSERT(dlp->dl_impl_count == 0);
816 	ASSERT(dlp->dl_mrh == NULL);
817 
818 	/*
819 	 * Free the structure back to the cache.
820 	 */
821 	dlp->dl_unknowns = 0;
822 	kmem_cache_free(i_dls_link_cachep, dlp);
823 }
824 
825 /*
826  * Module initialization functions.
827  */
828 
829 void
830 dls_link_init(void)
831 {
832 	/*
833 	 * Create a kmem_cache of dls_link_t structures.
834 	 */
835 	i_dls_link_cachep = kmem_cache_create("dls_link_cache",
836 	    sizeof (dls_link_t), 0, i_dls_link_constructor,
837 	    i_dls_link_destructor, NULL, NULL, NULL, 0);
838 	ASSERT(i_dls_link_cachep != NULL);
839 
840 	/*
841 	 * Create a dls_link_t hash table and associated lock.
842 	 */
843 	i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
844 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
845 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
846 	rw_init(&i_dls_link_lock, NULL, RW_DEFAULT, NULL);
847 	i_dls_link_count = 0;
848 }
849 
850 int
851 dls_link_fini(void)
852 {
853 	if (i_dls_link_count > 0)
854 		return (EBUSY);
855 
856 	/*
857 	 * Destroy the kmem_cache.
858 	 */
859 	kmem_cache_destroy(i_dls_link_cachep);
860 
861 	/*
862 	 * Destroy the hash table and associated lock.
863 	 */
864 	mod_hash_destroy_hash(i_dls_link_hash);
865 	rw_destroy(&i_dls_link_lock);
866 	return (0);
867 }
868 
869 /*
870  * Exported functions.
871  */
872 
873 int
874 dls_link_hold(const char *dev, uint_t port, dls_link_t **dlpp)
875 {
876 	char			name[MAXNAMELEN];
877 	dls_link_t		*dlp;
878 	int			err;
879 
880 	/*
881 	 * Construct a copy of the name used to identify any existing
882 	 * dls_link_t.
883 	 */
884 	MAC_NAME(name, dev, port);
885 
886 	/*
887 	 * Look up a dls_link_t corresponding to the given mac_handle_t
888 	 * in the global hash table. We need to hold i_dls_link_lock in
889 	 * order to atomically find and insert a dls_link_t into the
890 	 * hash table.
891 	 */
892 	rw_enter(&i_dls_link_lock, RW_WRITER);
893 	if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
894 	    (mod_hash_val_t *)&dlp)) == 0)
895 		goto done;
896 
897 	ASSERT(err == MH_ERR_NOTFOUND);
898 
899 	/*
900 	 * We didn't find anything so we need to create one.
901 	 */
902 	if ((err = i_dls_link_create(dev, port, &dlp)) != 0) {
903 		rw_exit(&i_dls_link_lock);
904 		return (err);
905 	}
906 
907 	/*
908 	 * Insert the dls_link_t.
909 	 */
910 	err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
911 	    (mod_hash_val_t)dlp);
912 	ASSERT(err == 0);
913 
914 	i_dls_link_count++;
915 	ASSERT(i_dls_link_count != 0);
916 
917 done:
918 	/*
919 	 * Bump the reference count and hand back the reference.
920 	 */
921 	dlp->dl_ref++;
922 	*dlpp = dlp;
923 	rw_exit(&i_dls_link_lock);
924 	return (0);
925 }
926 
927 void
928 dls_link_rele(dls_link_t *dlp)
929 {
930 	mod_hash_val_t	val;
931 
932 	rw_enter(&i_dls_link_lock, RW_WRITER);
933 
934 	/*
935 	 * Check if there are any more references.
936 	 */
937 	if (--dlp->dl_ref != 0) {
938 		/*
939 		 * There are more references so there's nothing more to do.
940 		 */
941 		goto done;
942 	}
943 
944 	(void) mod_hash_remove(i_dls_link_hash,
945 	    (mod_hash_key_t)dlp->dl_name, &val);
946 	ASSERT(dlp == (dls_link_t *)val);
947 
948 	/*
949 	 * Destroy the dls_link_t.
950 	 */
951 	i_dls_link_destroy(dlp);
952 	ASSERT(i_dls_link_count > 0);
953 	i_dls_link_count--;
954 done:
955 	rw_exit(&i_dls_link_lock);
956 }
957 
958 int
959 dls_mac_hold(dls_link_t *dlp)
960 {
961 	int err = 0;
962 
963 	mutex_enter(&dlp->dl_lock);
964 
965 	ASSERT(IMPLY(dlp->dl_macref != 0, dlp->dl_mh != NULL));
966 	ASSERT(IMPLY(dlp->dl_macref == 0, dlp->dl_mh == NULL));
967 
968 	if (dlp->dl_macref == 0) {
969 		/*
970 		 * First reference; hold open the MAC interface.
971 		 */
972 		err = mac_open(dlp->dl_dev, dlp->dl_port, &dlp->dl_mh);
973 		if (err != 0)
974 			goto done;
975 
976 		dlp->dl_mip = mac_info(dlp->dl_mh);
977 	}
978 
979 	dlp->dl_macref++;
980 done:
981 	mutex_exit(&dlp->dl_lock);
982 	return (err);
983 }
984 
985 void
986 dls_mac_rele(dls_link_t *dlp)
987 {
988 	mutex_enter(&dlp->dl_lock);
989 	ASSERT(dlp->dl_mh != NULL);
990 
991 	if (--dlp->dl_macref == 0) {
992 		mac_close(dlp->dl_mh);
993 		dlp->dl_mh = NULL;
994 		dlp->dl_mip = NULL;
995 	}
996 	mutex_exit(&dlp->dl_lock);
997 }
998 
999 void
1000 dls_link_add(dls_link_t *dlp, uint32_t sap, dls_impl_t *dip)
1001 {
1002 	dls_vlan_t	*dvp = dip->di_dvp;
1003 	mod_hash_t	*hash = dlp->dl_impl_hash;
1004 	mod_hash_key_t	key;
1005 	dls_head_t	*dhp;
1006 	dls_impl_t	*p;
1007 	mac_rx_t	rx;
1008 	int		err;
1009 	boolean_t	promisc = B_FALSE;
1010 
1011 	/*
1012 	 * For ethernet media, sap values less than or equal to
1013 	 * ETHERMTU (1500) represent LLC channels. (See PSARC 2003/150).
1014 	 * We strictly use 0 to represent LLC channels.
1015 	 */
1016 	sap = (sap <= ETHERMTU) ? 0 : sap;
1017 
1018 	/*
1019 	 * Make the appropriate key value depending on whether the
1020 	 * dls_impl_t is in promiscuous mode or not.
1021 	 */
1022 	key = MAKE_KEY(sap, dvp->dv_id);
1023 
1024 	/*
1025 	 * We need dl_lock here because we want to be able to walk
1026 	 * the hash table *and* set the mac rx func atomically. if
1027 	 * these two operations are separate, someone else could
1028 	 * insert/remove dls_impl_t from the hash table after we
1029 	 * drop the hash lock and this could cause our chosen rx
1030 	 * func to be incorrect. note that we cannot call mac_rx_add
1031 	 * when holding the hash lock because this can cause deadlock.
1032 	 */
1033 	mutex_enter(&dlp->dl_lock);
1034 
1035 	/*
1036 	 * Search the table for a list head with this key.
1037 	 */
1038 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1039 
1040 	if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
1041 		ASSERT(err == MH_ERR_NOTFOUND);
1042 
1043 		dhp = i_dls_head_alloc(key);
1044 		err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
1045 		ASSERT(err == 0);
1046 	}
1047 
1048 	/*
1049 	 * Add the dls_impl_t to the head of the list.
1050 	 */
1051 	ASSERT(dip->di_nextp == NULL);
1052 	p = dhp->dh_list;
1053 	dip->di_nextp = p;
1054 	dhp->dh_list = dip;
1055 
1056 	/*
1057 	 * Save a pointer to the list head.
1058 	 */
1059 	dip->di_headp = dhp;
1060 	dlp->dl_impl_count++;
1061 
1062 	/*
1063 	 * Walk the bound dls_impl_t to see if there are any
1064 	 * in promiscuous 'all sap' mode.
1065 	 */
1066 	mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
1067 	rw_exit(&dlp->dl_impl_lock);
1068 
1069 	/*
1070 	 * If there are then we need to use a receive routine
1071 	 * which will route packets to those dls_impl_t as well
1072 	 * as ones bound to the  DLSAP of the packet.
1073 	 */
1074 	if (promisc)
1075 		rx = i_dls_link_ether_rx_promisc;
1076 	else
1077 		rx = i_dls_link_ether_rx;
1078 
1079 	/* Replace the existing receive function if there is one. */
1080 	if (dlp->dl_mrh != NULL)
1081 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1082 	dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1083 	mutex_exit(&dlp->dl_lock);
1084 }
1085 
1086 void
1087 dls_link_remove(dls_link_t *dlp, dls_impl_t *dip)
1088 {
1089 	mod_hash_t	*hash = dlp->dl_impl_hash;
1090 	dls_impl_t	**pp;
1091 	dls_impl_t	*p;
1092 	dls_head_t	*dhp;
1093 	mac_rx_t	rx;
1094 
1095 	/*
1096 	 * We need dl_lock here because we want to be able to walk
1097 	 * the hash table *and* set the mac rx func atomically. if
1098 	 * these two operations are separate, someone else could
1099 	 * insert/remove dls_impl_t from the hash table after we
1100 	 * drop the hash lock and this could cause our chosen rx
1101 	 * func to be incorrect. note that we cannot call mac_rx_add
1102 	 * when holding the hash lock because this can cause deadlock.
1103 	 */
1104 	mutex_enter(&dlp->dl_lock);
1105 	rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1106 
1107 	/*
1108 	 * Poll the hash table entry until all references have been dropped.
1109 	 * We need to drop all locks before sleeping because we don't want
1110 	 * the interrupt handler to block. We set di_removing here to
1111 	 * tell the receive callbacks not to pass up packets anymore.
1112 	 * This is only a hint to quicken the decrease of the refcnt so
1113 	 * the assignment need not be protected by any lock.
1114 	 */
1115 	dhp = dip->di_headp;
1116 	dip->di_removing = B_TRUE;
1117 	while (dhp->dh_ref != 0) {
1118 		rw_exit(&dlp->dl_impl_lock);
1119 		mutex_exit(&dlp->dl_lock);
1120 		delay(drv_usectohz(1000));	/* 1ms delay */
1121 		mutex_enter(&dlp->dl_lock);
1122 		rw_enter(&dlp->dl_impl_lock, RW_WRITER);
1123 	}
1124 
1125 	/*
1126 	 * Walk the list and remove the dls_impl_t.
1127 	 */
1128 	for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->di_nextp)) {
1129 		if (p == dip)
1130 			break;
1131 	}
1132 	ASSERT(p != NULL);
1133 	*pp = p->di_nextp;
1134 	p->di_nextp = NULL;
1135 
1136 	ASSERT(dlp->dl_impl_count > 0);
1137 	dlp->dl_impl_count--;
1138 
1139 	if (dhp->dh_list == NULL) {
1140 		mod_hash_val_t	val = NULL;
1141 
1142 		/*
1143 		 * The list is empty so remove the hash table entry.
1144 		 */
1145 		(void) mod_hash_remove(hash, dhp->dh_key, &val);
1146 		ASSERT(dhp == (dls_head_t *)val);
1147 		i_dls_head_free(dhp);
1148 	}
1149 	dip->di_removing = B_FALSE;
1150 
1151 	/*
1152 	 * If there are no dls_impl_t then there's no need to register a
1153 	 * receive function with the mac.
1154 	 */
1155 	if (dlp->dl_impl_count == 0) {
1156 		rw_exit(&dlp->dl_impl_lock);
1157 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1158 		dlp->dl_mrh = NULL;
1159 	} else {
1160 		boolean_t promisc = B_FALSE;
1161 
1162 		/*
1163 		 * Walk the bound dls_impl_t to see if there are any
1164 		 * in promiscuous 'all sap' mode.
1165 		 */
1166 		mod_hash_walk(hash, i_dls_link_walk, (void *)&promisc);
1167 		rw_exit(&dlp->dl_impl_lock);
1168 
1169 		/*
1170 		 * If there are then we need to use a receive routine
1171 		 * which will route packets to those dls_impl_t as well
1172 		 * as ones bound to the  DLSAP of the packet.
1173 		 */
1174 		if (promisc)
1175 			rx = i_dls_link_ether_rx_promisc;
1176 		else
1177 			rx = i_dls_link_ether_rx;
1178 
1179 		mac_rx_remove(dlp->dl_mh, dlp->dl_mrh);
1180 		dlp->dl_mrh = mac_rx_add(dlp->dl_mh, rx, (void *)dlp);
1181 	}
1182 	mutex_exit(&dlp->dl_lock);
1183 }
1184