xref: /illumos-gate/usr/src/uts/common/io/mac/mac.c (revision b6c3f786)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * MAC Services Module
31  */
32 
33 #include <sys/types.h>
34 #include <sys/conf.h>
35 #include <sys/id_space.h>
36 #include <sys/esunddi.h>
37 #include <sys/stat.h>
38 #include <sys/mkdev.h>
39 #include <sys/stream.h>
40 #include <sys/strsun.h>
41 #include <sys/strsubr.h>
42 #include <sys/dlpi.h>
43 #include <sys/dls.h>
44 #include <sys/modhash.h>
45 #include <sys/vlan.h>
46 #include <sys/mac.h>
47 #include <sys/mac_impl.h>
48 #include <sys/dld.h>
49 #include <sys/modctl.h>
50 #include <sys/fs/dv_node.h>
51 #include <sys/thread.h>
52 #include <sys/proc.h>
53 #include <sys/callb.h>
54 #include <sys/cpuvar.h>
55 #include <sys/atomic.h>
56 #include <sys/sdt.h>
57 #include <inet/nd.h>
58 
59 #define	IMPL_HASHSZ	67	/* prime */
60 
61 static kmem_cache_t	*i_mac_impl_cachep;
62 static mod_hash_t	*i_mac_impl_hash;
63 krwlock_t		i_mac_impl_lock;
64 uint_t			i_mac_impl_count;
65 static kmem_cache_t	*mac_vnic_tx_cache;
66 static id_space_t	*minor_ids;
67 static uint32_t		minor_count;
68 
69 #define	MACTYPE_KMODDIR	"mac"
70 #define	MACTYPE_HASHSZ	67
71 static mod_hash_t	*i_mactype_hash;
72 /*
73  * i_mactype_lock synchronizes threads that obtain references to mactype_t
74  * structures through i_mactype_getplugin().
75  */
76 static kmutex_t		i_mactype_lock;
77 
78 static void i_mac_notify_thread(void *);
79 static mblk_t *mac_vnic_tx(void *, mblk_t *);
80 static mblk_t *mac_vnic_txloop(void *, mblk_t *);
81 
82 /*
83  * Private functions.
84  */
85 
86 /*ARGSUSED*/
87 static int
88 i_mac_constructor(void *buf, void *arg, int kmflag)
89 {
90 	mac_impl_t	*mip = buf;
91 
92 	bzero(buf, sizeof (mac_impl_t));
93 
94 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
95 
96 	rw_init(&mip->mi_state_lock, NULL, RW_DRIVER, NULL);
97 	rw_init(&mip->mi_gen_lock, NULL, RW_DRIVER, NULL);
98 	rw_init(&mip->mi_data_lock, NULL, RW_DRIVER, NULL);
99 	rw_init(&mip->mi_notify_lock, NULL, RW_DRIVER, NULL);
100 	rw_init(&mip->mi_rx_lock, NULL, RW_DRIVER, NULL);
101 	rw_init(&mip->mi_tx_lock, NULL, RW_DRIVER, NULL);
102 	rw_init(&mip->mi_resource_lock, NULL, RW_DRIVER, NULL);
103 	mutex_init(&mip->mi_activelink_lock, NULL, MUTEX_DEFAULT, NULL);
104 	mutex_init(&mip->mi_notify_bits_lock, NULL, MUTEX_DRIVER, NULL);
105 	cv_init(&mip->mi_notify_cv, NULL, CV_DRIVER, NULL);
106 	mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL);
107 	cv_init(&mip->mi_rx_cv, NULL, CV_DRIVER, NULL);
108 	return (0);
109 }
110 
111 /*ARGSUSED*/
112 static void
113 i_mac_destructor(void *buf, void *arg)
114 {
115 	mac_impl_t	*mip = buf;
116 
117 	ASSERT(mip->mi_ref == 0);
118 	ASSERT(!mip->mi_exclusive);
119 	ASSERT(mip->mi_active == 0);
120 	ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN);
121 	ASSERT(mip->mi_devpromisc == 0);
122 	ASSERT(mip->mi_promisc == 0);
123 	ASSERT(mip->mi_mmap == NULL);
124 	ASSERT(mip->mi_mmrp == NULL);
125 	ASSERT(mip->mi_mnfp == NULL);
126 	ASSERT(mip->mi_resource_add == NULL);
127 	ASSERT(mip->mi_ksp == NULL);
128 	ASSERT(mip->mi_kstat_count == 0);
129 	ASSERT(mip->mi_notify_bits == 0);
130 	ASSERT(mip->mi_notify_thread == NULL);
131 
132 	rw_destroy(&mip->mi_gen_lock);
133 	rw_destroy(&mip->mi_state_lock);
134 	rw_destroy(&mip->mi_data_lock);
135 	rw_destroy(&mip->mi_notify_lock);
136 	rw_destroy(&mip->mi_rx_lock);
137 	rw_destroy(&mip->mi_tx_lock);
138 	rw_destroy(&mip->mi_resource_lock);
139 	mutex_destroy(&mip->mi_activelink_lock);
140 	mutex_destroy(&mip->mi_notify_bits_lock);
141 	cv_destroy(&mip->mi_notify_cv);
142 	mutex_destroy(&mip->mi_lock);
143 	cv_destroy(&mip->mi_rx_cv);
144 }
145 
146 /*
147  * mac_vnic_tx_t kmem cache support functions.
148  */
149 
150 /* ARGSUSED */
151 static int
152 i_mac_vnic_tx_ctor(void *buf, void *arg, int mkflag)
153 {
154 	mac_vnic_tx_t *vnic_tx = buf;
155 
156 	bzero(buf, sizeof (mac_vnic_tx_t));
157 	mutex_init(&vnic_tx->mv_lock, NULL, MUTEX_DRIVER, NULL);
158 	cv_init(&vnic_tx->mv_cv, NULL, CV_DRIVER, NULL);
159 	return (0);
160 }
161 
162 /* ARGSUSED */
163 static void
164 i_mac_vnic_tx_dtor(void *buf, void *arg)
165 {
166 	mac_vnic_tx_t *vnic_tx = buf;
167 
168 	ASSERT(vnic_tx->mv_refs == 0);
169 	mutex_destroy(&vnic_tx->mv_lock);
170 	cv_destroy(&vnic_tx->mv_cv);
171 }
172 
173 static void
174 i_mac_notify(mac_impl_t *mip, mac_notify_type_t type)
175 {
176 	rw_enter(&i_mac_impl_lock, RW_READER);
177 	if (mip->mi_disabled)
178 		goto exit;
179 
180 	/*
181 	 * Guard against incorrect notifications.  (Running a newer
182 	 * mac client against an older implementation?)
183 	 */
184 	if (type >= MAC_NNOTE)
185 		goto exit;
186 
187 	mutex_enter(&mip->mi_notify_bits_lock);
188 	mip->mi_notify_bits |= (1 << type);
189 	cv_broadcast(&mip->mi_notify_cv);
190 	mutex_exit(&mip->mi_notify_bits_lock);
191 
192 exit:
193 	rw_exit(&i_mac_impl_lock);
194 }
195 
196 static void
197 i_mac_log_link_state(mac_impl_t *mip)
198 {
199 	/*
200 	 * If no change, then it is not interesting.
201 	 */
202 	if (mip->mi_lastlinkstate == mip->mi_linkstate)
203 		return;
204 
205 	switch (mip->mi_linkstate) {
206 	case LINK_STATE_UP:
207 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
208 			char det[200];
209 
210 			mip->mi_type->mt_ops.mtops_link_details(det,
211 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
212 
213 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
214 		} else {
215 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
216 		}
217 		break;
218 
219 	case LINK_STATE_DOWN:
220 		/*
221 		 * Only transitions from UP to DOWN are interesting
222 		 */
223 		if (mip->mi_lastlinkstate != LINK_STATE_UNKNOWN)
224 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
225 		break;
226 
227 	case LINK_STATE_UNKNOWN:
228 		/*
229 		 * This case is normally not interesting.
230 		 */
231 		break;
232 	}
233 	mip->mi_lastlinkstate = mip->mi_linkstate;
234 }
235 
236 static void
237 i_mac_notify_thread(void *arg)
238 {
239 	mac_impl_t	*mip = arg;
240 	callb_cpr_t	cprinfo;
241 
242 	CALLB_CPR_INIT(&cprinfo, &mip->mi_notify_bits_lock, callb_generic_cpr,
243 	    "i_mac_notify_thread");
244 
245 	mutex_enter(&mip->mi_notify_bits_lock);
246 	for (;;) {
247 		uint32_t	bits;
248 		uint32_t	type;
249 
250 		bits = mip->mi_notify_bits;
251 		if (bits == 0) {
252 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
253 			cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
254 			CALLB_CPR_SAFE_END(&cprinfo, &mip->mi_notify_bits_lock);
255 			continue;
256 		}
257 		mip->mi_notify_bits = 0;
258 
259 		if ((bits & (1 << MAC_NNOTE)) != 0) {
260 			/* request to quit */
261 			ASSERT(mip->mi_disabled);
262 			break;
263 		}
264 
265 		mutex_exit(&mip->mi_notify_bits_lock);
266 
267 		/*
268 		 * Log link changes.
269 		 */
270 		if ((bits & (1 << MAC_NOTE_LINK)) != 0)
271 			i_mac_log_link_state(mip);
272 
273 		/*
274 		 * Do notification callbacks for each notification type.
275 		 */
276 		for (type = 0; type < MAC_NNOTE; type++) {
277 			mac_notify_fn_t	*mnfp;
278 
279 			if ((bits & (1 << type)) == 0) {
280 				continue;
281 			}
282 
283 			/*
284 			 * Walk the list of notifications.
285 			 */
286 			rw_enter(&mip->mi_notify_lock, RW_READER);
287 			for (mnfp = mip->mi_mnfp; mnfp != NULL;
288 			    mnfp = mnfp->mnf_nextp) {
289 
290 				mnfp->mnf_fn(mnfp->mnf_arg, type);
291 			}
292 			rw_exit(&mip->mi_notify_lock);
293 		}
294 
295 		mutex_enter(&mip->mi_notify_bits_lock);
296 	}
297 
298 	mip->mi_notify_thread = NULL;
299 	cv_broadcast(&mip->mi_notify_cv);
300 
301 	CALLB_CPR_EXIT(&cprinfo);
302 
303 	thread_exit();
304 }
305 
306 static mactype_t *
307 i_mactype_getplugin(const char *pname)
308 {
309 	mactype_t	*mtype = NULL;
310 	boolean_t	tried_modload = B_FALSE;
311 
312 	mutex_enter(&i_mactype_lock);
313 
314 find_registered_mactype:
315 	if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname,
316 	    (mod_hash_val_t *)&mtype) != 0) {
317 		if (!tried_modload) {
318 			/*
319 			 * If the plugin has not yet been loaded, then
320 			 * attempt to load it now.  If modload() succeeds,
321 			 * the plugin should have registered using
322 			 * mactype_register(), in which case we can go back
323 			 * and attempt to find it again.
324 			 */
325 			if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) {
326 				tried_modload = B_TRUE;
327 				goto find_registered_mactype;
328 			}
329 		}
330 	} else {
331 		/*
332 		 * Note that there's no danger that the plugin we've loaded
333 		 * could be unloaded between the modload() step and the
334 		 * reference count bump here, as we're holding
335 		 * i_mactype_lock, which mactype_unregister() also holds.
336 		 */
337 		atomic_inc_32(&mtype->mt_ref);
338 	}
339 
340 	mutex_exit(&i_mactype_lock);
341 	return (mtype);
342 }
343 
344 /*
345  * Module initialization functions.
346  */
347 
348 void
349 mac_init(void)
350 {
351 	i_mac_impl_cachep = kmem_cache_create("mac_impl_cache",
352 	    sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor,
353 	    NULL, NULL, NULL, 0);
354 	ASSERT(i_mac_impl_cachep != NULL);
355 
356 	mac_vnic_tx_cache = kmem_cache_create("mac_vnic_tx_cache",
357 	    sizeof (mac_vnic_tx_t), 0, i_mac_vnic_tx_ctor, i_mac_vnic_tx_dtor,
358 	    NULL, NULL, NULL, 0);
359 	ASSERT(mac_vnic_tx_cache != NULL);
360 
361 	i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash",
362 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
363 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
364 	rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL);
365 	i_mac_impl_count = 0;
366 
367 	i_mactype_hash = mod_hash_create_extended("mactype_hash",
368 	    MACTYPE_HASHSZ,
369 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
370 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
371 
372 	/*
373 	 * Allocate an id space to manage minor numbers. The range of the
374 	 * space will be from MAC_MAX_MINOR+1 to MAXMIN32 (maximum legal
375 	 * minor number is MAXMIN, but id_t is type of integer and does not
376 	 * allow MAXMIN).
377 	 */
378 	minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, MAXMIN32);
379 	ASSERT(minor_ids != NULL);
380 	minor_count = 0;
381 }
382 
383 int
384 mac_fini(void)
385 {
386 	if (i_mac_impl_count > 0 || minor_count > 0)
387 		return (EBUSY);
388 
389 	id_space_destroy(minor_ids);
390 
391 	mod_hash_destroy_hash(i_mac_impl_hash);
392 	rw_destroy(&i_mac_impl_lock);
393 
394 	kmem_cache_destroy(i_mac_impl_cachep);
395 	kmem_cache_destroy(mac_vnic_tx_cache);
396 
397 	mod_hash_destroy_hash(i_mactype_hash);
398 	return (0);
399 }
400 
401 /*
402  * Client functions.
403  */
404 
405 static int
406 mac_hold(const char *macname, mac_impl_t **pmip)
407 {
408 	mac_impl_t	*mip;
409 	int		err;
410 
411 	/*
412 	 * Check the device name length to make sure it won't overflow our
413 	 * buffer.
414 	 */
415 	if (strlen(macname) >= MAXNAMELEN)
416 		return (EINVAL);
417 
418 	/*
419 	 * Look up its entry in the global hash table.
420 	 */
421 	rw_enter(&i_mac_impl_lock, RW_WRITER);
422 	err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname,
423 	    (mod_hash_val_t *)&mip);
424 
425 	if (err != 0) {
426 		rw_exit(&i_mac_impl_lock);
427 		return (ENOENT);
428 	}
429 
430 	if (mip->mi_disabled) {
431 		rw_exit(&i_mac_impl_lock);
432 		return (ENOENT);
433 	}
434 
435 	if (mip->mi_exclusive) {
436 		rw_exit(&i_mac_impl_lock);
437 		return (EBUSY);
438 	}
439 
440 	mip->mi_ref++;
441 	rw_exit(&i_mac_impl_lock);
442 
443 	*pmip = mip;
444 	return (0);
445 }
446 
447 static void
448 mac_rele(mac_impl_t *mip)
449 {
450 	rw_enter(&i_mac_impl_lock, RW_WRITER);
451 	ASSERT(mip->mi_ref != 0);
452 	if (--mip->mi_ref == 0)
453 		ASSERT(!mip->mi_activelink);
454 	rw_exit(&i_mac_impl_lock);
455 }
456 
457 int
458 mac_hold_exclusive(mac_handle_t mh)
459 {
460 	mac_impl_t	*mip = (mac_impl_t *)mh;
461 
462 	/*
463 	 * Look up its entry in the global hash table.
464 	 */
465 	rw_enter(&i_mac_impl_lock, RW_WRITER);
466 	if (mip->mi_disabled) {
467 		rw_exit(&i_mac_impl_lock);
468 		return (ENOENT);
469 	}
470 
471 	if (mip->mi_ref != 0) {
472 		rw_exit(&i_mac_impl_lock);
473 		return (EBUSY);
474 	}
475 
476 	ASSERT(!mip->mi_exclusive);
477 
478 	mip->mi_ref++;
479 	mip->mi_exclusive = B_TRUE;
480 	rw_exit(&i_mac_impl_lock);
481 	return (0);
482 }
483 
484 void
485 mac_rele_exclusive(mac_handle_t mh)
486 {
487 	mac_impl_t	*mip = (mac_impl_t *)mh;
488 
489 	/*
490 	 * Look up its entry in the global hash table.
491 	 */
492 	rw_enter(&i_mac_impl_lock, RW_WRITER);
493 	ASSERT(mip->mi_ref == 1 && mip->mi_exclusive);
494 	mip->mi_ref--;
495 	mip->mi_exclusive = B_FALSE;
496 	rw_exit(&i_mac_impl_lock);
497 }
498 
499 int
500 mac_open(const char *macname, mac_handle_t *mhp)
501 {
502 	mac_impl_t	*mip;
503 	int		err;
504 
505 	/*
506 	 * Look up its entry in the global hash table.
507 	 */
508 	if ((err = mac_hold(macname, &mip)) != 0)
509 		return (err);
510 
511 	/*
512 	 * Hold the dip associated to the MAC to prevent it from being
513 	 * detached. For a softmac, its underlying dip is held by the
514 	 * mi_open() callback.
515 	 *
516 	 * This is done to be more tolerant with some defective drivers,
517 	 * which incorrectly handle mac_unregister() failure in their
518 	 * xxx_detach() routine. For example, some drivers ignore the
519 	 * failure of mac_unregister() and free all resources that
520 	 * that are needed for data transmition.
521 	 */
522 	e_ddi_hold_devi(mip->mi_dip);
523 
524 	rw_enter(&mip->mi_gen_lock, RW_WRITER);
525 
526 	if ((mip->mi_oref != 0) ||
527 	    !(mip->mi_callbacks->mc_callbacks & MC_OPEN)) {
528 		goto done;
529 	}
530 
531 	/*
532 	 * Note that we do not hold i_mac_impl_lock when calling the
533 	 * mc_open() callback function to avoid deadlock with the
534 	 * i_mac_notify() function.
535 	 */
536 	if ((err = mip->mi_open(mip->mi_driver)) != 0) {
537 		rw_exit(&mip->mi_gen_lock);
538 		ddi_release_devi(mip->mi_dip);
539 		mac_rele(mip);
540 		return (err);
541 	}
542 
543 done:
544 	mip->mi_oref++;
545 	rw_exit(&mip->mi_gen_lock);
546 	*mhp = (mac_handle_t)mip;
547 	return (0);
548 }
549 
550 int
551 mac_open_by_linkid(datalink_id_t linkid, mac_handle_t *mhp)
552 {
553 	dls_dl_handle_t	dlh;
554 	int		err;
555 
556 	if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
557 		return (err);
558 
559 	if (dls_devnet_vid(dlh) != VLAN_ID_NONE) {
560 		err = EINVAL;
561 		goto done;
562 	}
563 
564 	err = mac_open(dls_devnet_mac(dlh), mhp);
565 
566 done:
567 	dls_devnet_rele_tmp(dlh);
568 	return (err);
569 }
570 
571 int
572 mac_open_by_linkname(const char *link, mac_handle_t *mhp)
573 {
574 	datalink_id_t	linkid;
575 	int		err;
576 
577 	if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0)
578 		return (err);
579 	return (mac_open_by_linkid(linkid, mhp));
580 }
581 
582 void
583 mac_close(mac_handle_t mh)
584 {
585 	mac_impl_t	*mip = (mac_impl_t *)mh;
586 
587 	rw_enter(&mip->mi_gen_lock, RW_WRITER);
588 
589 	ASSERT(mip->mi_oref != 0);
590 	if (--mip->mi_oref == 0) {
591 		if ((mip->mi_callbacks->mc_callbacks & MC_CLOSE))
592 			mip->mi_close(mip->mi_driver);
593 	}
594 	rw_exit(&mip->mi_gen_lock);
595 
596 	ddi_release_devi(mip->mi_dip);
597 	mac_rele(mip);
598 }
599 
600 const mac_info_t *
601 mac_info(mac_handle_t mh)
602 {
603 	return (&((mac_impl_t *)mh)->mi_info);
604 }
605 
606 dev_info_t *
607 mac_devinfo_get(mac_handle_t mh)
608 {
609 	return (((mac_impl_t *)mh)->mi_dip);
610 }
611 
612 const char *
613 mac_name(mac_handle_t mh)
614 {
615 	return (((mac_impl_t *)mh)->mi_name);
616 }
617 
618 minor_t
619 mac_minor(mac_handle_t mh)
620 {
621 	return (((mac_impl_t *)mh)->mi_minor);
622 }
623 
624 uint64_t
625 mac_stat_get(mac_handle_t mh, uint_t stat)
626 {
627 	mac_impl_t	*mip = (mac_impl_t *)mh;
628 	uint64_t	val;
629 	int		ret;
630 
631 	/*
632 	 * The range of stat determines where it is maintained.  Stat
633 	 * values from 0 up to (but not including) MAC_STAT_MIN are
634 	 * mainteined by the mac module itself.  Everything else is
635 	 * maintained by the driver.
636 	 */
637 	if (stat < MAC_STAT_MIN) {
638 		/* These stats are maintained by the mac module itself. */
639 		switch (stat) {
640 		case MAC_STAT_LINK_STATE:
641 			return (mip->mi_linkstate);
642 		case MAC_STAT_LINK_UP:
643 			return (mip->mi_linkstate == LINK_STATE_UP);
644 		case MAC_STAT_PROMISC:
645 			return (mip->mi_devpromisc != 0);
646 		default:
647 			ASSERT(B_FALSE);
648 		}
649 	}
650 
651 	/*
652 	 * Call the driver to get the given statistic.
653 	 */
654 	ret = mip->mi_getstat(mip->mi_driver, stat, &val);
655 	if (ret != 0) {
656 		/*
657 		 * The driver doesn't support this statistic.  Get the
658 		 * statistic's default value.
659 		 */
660 		val = mac_stat_default(mip, stat);
661 	}
662 	return (val);
663 }
664 
665 int
666 mac_start(mac_handle_t mh)
667 {
668 	mac_impl_t	*mip = (mac_impl_t *)mh;
669 	int		err;
670 
671 	ASSERT(mip->mi_start != NULL);
672 
673 	rw_enter(&(mip->mi_state_lock), RW_WRITER);
674 
675 	/*
676 	 * Check whether the device is already started.
677 	 */
678 	if (mip->mi_active++ != 0) {
679 		/*
680 		 * It's already started so there's nothing more to do.
681 		 */
682 		err = 0;
683 		goto done;
684 	}
685 
686 	/*
687 	 * Start the device.
688 	 */
689 	if ((err = mip->mi_start(mip->mi_driver)) != 0)
690 		--mip->mi_active;
691 
692 done:
693 	rw_exit(&(mip->mi_state_lock));
694 	return (err);
695 }
696 
697 void
698 mac_stop(mac_handle_t mh)
699 {
700 	mac_impl_t	*mip = (mac_impl_t *)mh;
701 
702 	ASSERT(mip->mi_stop != NULL);
703 
704 	rw_enter(&(mip->mi_state_lock), RW_WRITER);
705 
706 	/*
707 	 * Check whether the device is still needed.
708 	 */
709 	ASSERT(mip->mi_active != 0);
710 	if (--mip->mi_active != 0) {
711 		/*
712 		 * It's still needed so there's nothing more to do.
713 		 */
714 		goto done;
715 	}
716 
717 	/*
718 	 * Stop the device.
719 	 */
720 	mip->mi_stop(mip->mi_driver);
721 
722 done:
723 	rw_exit(&(mip->mi_state_lock));
724 }
725 
726 int
727 mac_multicst_add(mac_handle_t mh, const uint8_t *addr)
728 {
729 	mac_impl_t		*mip = (mac_impl_t *)mh;
730 	mac_multicst_addr_t	**pp;
731 	mac_multicst_addr_t	*p;
732 	int			err;
733 
734 	ASSERT(mip->mi_multicst != NULL);
735 
736 	/*
737 	 * Verify the address.
738 	 */
739 	if ((err = mip->mi_type->mt_ops.mtops_multicst_verify(addr,
740 	    mip->mi_pdata)) != 0) {
741 		return (err);
742 	}
743 
744 	/*
745 	 * Check whether the given address is already enabled.
746 	 */
747 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
748 	for (pp = &(mip->mi_mmap); (p = *pp) != NULL; pp = &(p->mma_nextp)) {
749 		if (bcmp(p->mma_addr, addr, mip->mi_type->mt_addr_length) ==
750 		    0) {
751 			/*
752 			 * The address is already enabled so just bump the
753 			 * reference count.
754 			 */
755 			p->mma_ref++;
756 			err = 0;
757 			goto done;
758 		}
759 	}
760 
761 	/*
762 	 * Allocate a new list entry.
763 	 */
764 	if ((p = kmem_zalloc(sizeof (mac_multicst_addr_t),
765 	    KM_NOSLEEP)) == NULL) {
766 		err = ENOMEM;
767 		goto done;
768 	}
769 
770 	/*
771 	 * Enable a new multicast address.
772 	 */
773 	if ((err = mip->mi_multicst(mip->mi_driver, B_TRUE, addr)) != 0) {
774 		kmem_free(p, sizeof (mac_multicst_addr_t));
775 		goto done;
776 	}
777 
778 	/*
779 	 * Add the address to the list of enabled addresses.
780 	 */
781 	bcopy(addr, p->mma_addr, mip->mi_type->mt_addr_length);
782 	p->mma_ref++;
783 	*pp = p;
784 
785 done:
786 	rw_exit(&(mip->mi_data_lock));
787 	return (err);
788 }
789 
790 int
791 mac_multicst_remove(mac_handle_t mh, const uint8_t *addr)
792 {
793 	mac_impl_t		*mip = (mac_impl_t *)mh;
794 	mac_multicst_addr_t	**pp;
795 	mac_multicst_addr_t	*p;
796 	int			err;
797 
798 	ASSERT(mip->mi_multicst != NULL);
799 
800 	/*
801 	 * Find the entry in the list for the given address.
802 	 */
803 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
804 	for (pp = &(mip->mi_mmap); (p = *pp) != NULL; pp = &(p->mma_nextp)) {
805 		if (bcmp(p->mma_addr, addr, mip->mi_type->mt_addr_length) ==
806 		    0) {
807 			if (--p->mma_ref == 0)
808 				break;
809 
810 			/*
811 			 * There is still a reference to this address so
812 			 * there's nothing more to do.
813 			 */
814 			err = 0;
815 			goto done;
816 		}
817 	}
818 
819 	/*
820 	 * We did not find an entry for the given address so it is not
821 	 * currently enabled.
822 	 */
823 	if (p == NULL) {
824 		err = ENOENT;
825 		goto done;
826 	}
827 	ASSERT(p->mma_ref == 0);
828 
829 	/*
830 	 * Disable the multicast address.
831 	 */
832 	if ((err = mip->mi_multicst(mip->mi_driver, B_FALSE, addr)) != 0) {
833 		p->mma_ref++;
834 		goto done;
835 	}
836 
837 	/*
838 	 * Remove it from the list.
839 	 */
840 	*pp = p->mma_nextp;
841 	kmem_free(p, sizeof (mac_multicst_addr_t));
842 
843 done:
844 	rw_exit(&(mip->mi_data_lock));
845 	return (err);
846 }
847 
848 /*
849  * mac_unicst_verify: Verifies the passed address. It fails
850  * if the passed address is a group address or has incorrect length.
851  */
852 boolean_t
853 mac_unicst_verify(mac_handle_t mh, const uint8_t *addr, uint_t len)
854 {
855 	mac_impl_t	*mip = (mac_impl_t *)mh;
856 
857 	/*
858 	 * Verify the address.
859 	 */
860 	if ((len != mip->mi_type->mt_addr_length) ||
861 	    (mip->mi_type->mt_ops.mtops_unicst_verify(addr,
862 	    mip->mi_pdata)) != 0) {
863 		return (B_FALSE);
864 	} else {
865 		return (B_TRUE);
866 	}
867 }
868 
869 int
870 mac_unicst_set(mac_handle_t mh, const uint8_t *addr)
871 {
872 	mac_impl_t	*mip = (mac_impl_t *)mh;
873 	int		err;
874 	boolean_t	notify = B_FALSE;
875 
876 	ASSERT(mip->mi_unicst != NULL);
877 
878 	/*
879 	 * Verify the address.
880 	 */
881 	if ((err = mip->mi_type->mt_ops.mtops_unicst_verify(addr,
882 	    mip->mi_pdata)) != 0) {
883 		return (err);
884 	}
885 
886 	/*
887 	 * Program the new unicast address.
888 	 */
889 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
890 
891 	/*
892 	 * If address doesn't change, do nothing.
893 	 * This check is necessary otherwise it may call into mac_unicst_set
894 	 * recursively.
895 	 */
896 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0)
897 		goto done;
898 
899 	if ((err = mip->mi_unicst(mip->mi_driver, addr)) != 0)
900 		goto done;
901 
902 	/*
903 	 * Save the address and flag that we need to send a notification.
904 	 */
905 	bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length);
906 	notify = B_TRUE;
907 
908 done:
909 	rw_exit(&(mip->mi_data_lock));
910 
911 	if (notify)
912 		i_mac_notify(mip, MAC_NOTE_UNICST);
913 
914 	return (err);
915 }
916 
917 void
918 mac_unicst_get(mac_handle_t mh, uint8_t *addr)
919 {
920 	mac_impl_t	*mip = (mac_impl_t *)mh;
921 
922 	/*
923 	 * Copy out the current unicast source address.
924 	 */
925 	rw_enter(&(mip->mi_data_lock), RW_READER);
926 	bcopy(mip->mi_addr, addr, mip->mi_type->mt_addr_length);
927 	rw_exit(&(mip->mi_data_lock));
928 }
929 
930 void
931 mac_dest_get(mac_handle_t mh, uint8_t *addr)
932 {
933 	mac_impl_t	*mip = (mac_impl_t *)mh;
934 
935 	/*
936 	 * Copy out the current destination address.
937 	 */
938 	rw_enter(&(mip->mi_data_lock), RW_READER);
939 	bcopy(mip->mi_dstaddr, addr, mip->mi_type->mt_addr_length);
940 	rw_exit(&(mip->mi_data_lock));
941 }
942 
943 int
944 mac_promisc_set(mac_handle_t mh, boolean_t on, mac_promisc_type_t ptype)
945 {
946 	mac_impl_t	*mip = (mac_impl_t *)mh;
947 	int		err = 0;
948 
949 	ASSERT(mip->mi_setpromisc != NULL);
950 	ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC);
951 
952 	/*
953 	 * Determine whether we should enable or disable promiscuous mode.
954 	 * For details on the distinction between "device promiscuous mode"
955 	 * and "MAC promiscuous mode", see PSARC/2005/289.
956 	 */
957 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
958 	if (on) {
959 		/*
960 		 * Enable promiscuous mode on the device if not yet enabled.
961 		 */
962 		if (mip->mi_devpromisc++ == 0) {
963 			err = mip->mi_setpromisc(mip->mi_driver, B_TRUE);
964 			if (err != 0) {
965 				mip->mi_devpromisc--;
966 				goto done;
967 			}
968 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
969 		}
970 
971 		/*
972 		 * Enable promiscuous mode on the MAC if not yet enabled.
973 		 */
974 		if (ptype == MAC_PROMISC && mip->mi_promisc++ == 0)
975 			i_mac_notify(mip, MAC_NOTE_PROMISC);
976 	} else {
977 		if (mip->mi_devpromisc == 0) {
978 			err = EPROTO;
979 			goto done;
980 		}
981 		/*
982 		 * Disable promiscuous mode on the device if this is the last
983 		 * enabling.
984 		 */
985 		if (--mip->mi_devpromisc == 0) {
986 			err = mip->mi_setpromisc(mip->mi_driver, B_FALSE);
987 			if (err != 0) {
988 				mip->mi_devpromisc++;
989 				goto done;
990 			}
991 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
992 		}
993 
994 		/*
995 		 * Disable promiscuous mode on the MAC if this is the last
996 		 * enabling.
997 		 */
998 		if (ptype == MAC_PROMISC && --mip->mi_promisc == 0)
999 			i_mac_notify(mip, MAC_NOTE_PROMISC);
1000 	}
1001 
1002 done:
1003 	rw_exit(&(mip->mi_data_lock));
1004 	return (err);
1005 }
1006 
1007 boolean_t
1008 mac_promisc_get(mac_handle_t mh, mac_promisc_type_t ptype)
1009 {
1010 	mac_impl_t		*mip = (mac_impl_t *)mh;
1011 
1012 	ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC);
1013 
1014 	/*
1015 	 * Return the current promiscuity.
1016 	 */
1017 	if (ptype == MAC_DEVPROMISC)
1018 		return (mip->mi_devpromisc != 0);
1019 	else
1020 		return (mip->mi_promisc != 0);
1021 }
1022 
1023 void
1024 mac_sdu_get(mac_handle_t mh, uint_t *min_sdu, uint_t *max_sdu)
1025 {
1026 	mac_impl_t	*mip = (mac_impl_t *)mh;
1027 
1028 	if (min_sdu != NULL)
1029 		*min_sdu = mip->mi_sdu_min;
1030 	if (max_sdu != NULL)
1031 		*max_sdu = mip->mi_sdu_max;
1032 }
1033 
1034 void
1035 mac_resources(mac_handle_t mh)
1036 {
1037 	mac_impl_t	*mip = (mac_impl_t *)mh;
1038 
1039 	/*
1040 	 * If the driver supports resource registration, call the driver to
1041 	 * ask it to register its resources.
1042 	 */
1043 	if (mip->mi_callbacks->mc_callbacks & MC_RESOURCES)
1044 		mip->mi_resources(mip->mi_driver);
1045 }
1046 
1047 void
1048 mac_ioctl(mac_handle_t mh, queue_t *wq, mblk_t *bp)
1049 {
1050 	mac_impl_t	*mip = (mac_impl_t *)mh;
1051 	int		cmd;
1052 
1053 	if (mip->mi_callbacks->mc_callbacks & (MC_SETPROP|MC_GETPROP)) {
1054 		cmd = ((struct iocblk *)bp->b_rptr)->ioc_cmd;
1055 		if (cmd == ND_SET || cmd == ND_GET) {
1056 			/*
1057 			 * ndd ioctls are Obsolete
1058 			 */
1059 			cmn_err(CE_WARN,
1060 			    "The ndd commands are obsolete and may be removed "
1061 			    "in a future release of Solaris. "
1062 			    "Use dladm(1M) to manage driver tunables\n");
1063 		}
1064 	}
1065 	/*
1066 	 * Call the driver to handle the ioctl.  The driver may not support
1067 	 * any ioctls, in which case we reply with a NAK on its behalf.
1068 	 */
1069 	if (mip->mi_callbacks->mc_callbacks & MC_IOCTL)
1070 		mip->mi_ioctl(mip->mi_driver, wq, bp);
1071 	else
1072 		miocnak(wq, bp, 0, EINVAL);
1073 }
1074 
1075 const mac_txinfo_t *
1076 mac_do_tx_get(mac_handle_t mh, boolean_t is_vnic)
1077 {
1078 	mac_impl_t	*mip = (mac_impl_t *)mh;
1079 	mac_txinfo_t	*mtp;
1080 
1081 	/*
1082 	 * Grab the lock to prevent us from racing with MAC_PROMISC being
1083 	 * changed.  This is sufficient since MAC clients are careful to always
1084 	 * call mac_txloop_add() prior to enabling MAC_PROMISC, and to disable
1085 	 * MAC_PROMISC prior to calling mac_txloop_remove().
1086 	 */
1087 	rw_enter(&mip->mi_tx_lock, RW_READER);
1088 
1089 	if (mac_promisc_get(mh, MAC_PROMISC)) {
1090 		ASSERT(mip->mi_mtfp != NULL);
1091 		if (mip->mi_vnic_present && !is_vnic) {
1092 			mtp = &mip->mi_vnic_txloopinfo;
1093 		} else {
1094 			mtp = &mip->mi_txloopinfo;
1095 		}
1096 	} else {
1097 		if (mip->mi_vnic_present && !is_vnic) {
1098 			mtp = &mip->mi_vnic_txinfo;
1099 		} else {
1100 			/*
1101 			 * Note that we cannot ASSERT() that mip->mi_mtfp is
1102 			 * NULL, because to satisfy the above ASSERT(), we
1103 			 * have to disable MAC_PROMISC prior to calling
1104 			 * mac_txloop_remove().
1105 			 */
1106 			mtp = &mip->mi_txinfo;
1107 		}
1108 	}
1109 
1110 	rw_exit(&mip->mi_tx_lock);
1111 	return (mtp);
1112 }
1113 
1114 /*
1115  * Invoked by VNIC to obtain the transmit entry point.
1116  */
1117 const mac_txinfo_t *
1118 mac_vnic_tx_get(mac_handle_t mh)
1119 {
1120 	return (mac_do_tx_get(mh, B_TRUE));
1121 }
1122 
1123 /*
1124  * Invoked by any non-VNIC client to obtain the transmit entry point.
1125  * If a VNIC is present, the VNIC transmit function provided by the VNIC
1126  * will be returned to the MAC client.
1127  */
1128 const mac_txinfo_t *
1129 mac_tx_get(mac_handle_t mh)
1130 {
1131 	return (mac_do_tx_get(mh, B_FALSE));
1132 }
1133 
1134 link_state_t
1135 mac_link_get(mac_handle_t mh)
1136 {
1137 	return (((mac_impl_t *)mh)->mi_linkstate);
1138 }
1139 
1140 mac_notify_handle_t
1141 mac_notify_add(mac_handle_t mh, mac_notify_t notify, void *arg)
1142 {
1143 	mac_impl_t		*mip = (mac_impl_t *)mh;
1144 	mac_notify_fn_t		*mnfp;
1145 
1146 	mnfp = kmem_zalloc(sizeof (mac_notify_fn_t), KM_SLEEP);
1147 	mnfp->mnf_fn = notify;
1148 	mnfp->mnf_arg = arg;
1149 
1150 	/*
1151 	 * Add it to the head of the 'notify' callback list.
1152 	 */
1153 	rw_enter(&mip->mi_notify_lock, RW_WRITER);
1154 	mnfp->mnf_nextp = mip->mi_mnfp;
1155 	mip->mi_mnfp = mnfp;
1156 	rw_exit(&mip->mi_notify_lock);
1157 
1158 	return ((mac_notify_handle_t)mnfp);
1159 }
1160 
1161 void
1162 mac_notify_remove(mac_handle_t mh, mac_notify_handle_t mnh)
1163 {
1164 	mac_impl_t		*mip = (mac_impl_t *)mh;
1165 	mac_notify_fn_t		*mnfp = (mac_notify_fn_t *)mnh;
1166 	mac_notify_fn_t		**pp;
1167 	mac_notify_fn_t		*p;
1168 
1169 	/*
1170 	 * Search the 'notify' callback list for the function closure.
1171 	 */
1172 	rw_enter(&mip->mi_notify_lock, RW_WRITER);
1173 	for (pp = &(mip->mi_mnfp); (p = *pp) != NULL;
1174 	    pp = &(p->mnf_nextp)) {
1175 		if (p == mnfp)
1176 			break;
1177 	}
1178 	ASSERT(p != NULL);
1179 
1180 	/*
1181 	 * Remove it from the list.
1182 	 */
1183 	*pp = p->mnf_nextp;
1184 	rw_exit(&mip->mi_notify_lock);
1185 
1186 	/*
1187 	 * Free it.
1188 	 */
1189 	kmem_free(mnfp, sizeof (mac_notify_fn_t));
1190 }
1191 
1192 void
1193 mac_notify(mac_handle_t mh)
1194 {
1195 	mac_impl_t		*mip = (mac_impl_t *)mh;
1196 	mac_notify_type_t	type;
1197 
1198 	for (type = 0; type < MAC_NNOTE; type++)
1199 		i_mac_notify(mip, type);
1200 }
1201 
1202 /*
1203  * Register a receive function for this mac.
1204  * More information on this function's interaction with mac_rx()
1205  * can be found atop mac_rx().
1206  */
1207 mac_rx_handle_t
1208 mac_do_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg, boolean_t is_active)
1209 {
1210 	mac_impl_t	*mip = (mac_impl_t *)mh;
1211 	mac_rx_fn_t	*mrfp;
1212 
1213 	mrfp = kmem_zalloc(sizeof (mac_rx_fn_t), KM_SLEEP);
1214 	mrfp->mrf_fn = rx;
1215 	mrfp->mrf_arg = arg;
1216 	mrfp->mrf_active = is_active;
1217 
1218 	/*
1219 	 * Add it to the head of the 'rx' callback list.
1220 	 */
1221 	rw_enter(&(mip->mi_rx_lock), RW_WRITER);
1222 
1223 	/*
1224 	 * mac_rx() will only call callbacks that are marked inuse.
1225 	 */
1226 	mrfp->mrf_inuse = B_TRUE;
1227 	mrfp->mrf_nextp = mip->mi_mrfp;
1228 
1229 	/*
1230 	 * mac_rx() could be traversing the remainder of the list
1231 	 * and miss the new callback we're adding here. This is not a problem
1232 	 * because we do not guarantee the callback to take effect immediately
1233 	 * after mac_rx_add() returns.
1234 	 */
1235 	mip->mi_mrfp = mrfp;
1236 	rw_exit(&(mip->mi_rx_lock));
1237 
1238 	return ((mac_rx_handle_t)mrfp);
1239 }
1240 
1241 mac_rx_handle_t
1242 mac_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg)
1243 {
1244 	return (mac_do_rx_add(mh, rx, arg, B_FALSE));
1245 }
1246 
1247 mac_rx_handle_t
1248 mac_active_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg)
1249 {
1250 	return (mac_do_rx_add(mh, rx, arg, B_TRUE));
1251 }
1252 
1253 /*
1254  * Unregister a receive function for this mac.
1255  * This function does not block if wait is B_FALSE. This is useful
1256  * for clients who call mac_rx_remove() from a non-blockable context.
1257  * More information on this function's interaction with mac_rx()
1258  * can be found atop mac_rx().
1259  */
1260 void
1261 mac_rx_remove(mac_handle_t mh, mac_rx_handle_t mrh, boolean_t wait)
1262 {
1263 	mac_impl_t		*mip = (mac_impl_t *)mh;
1264 	mac_rx_fn_t		*mrfp = (mac_rx_fn_t *)mrh;
1265 	mac_rx_fn_t		**pp;
1266 	mac_rx_fn_t		*p;
1267 
1268 	/*
1269 	 * Search the 'rx' callback list for the function closure.
1270 	 */
1271 	rw_enter(&mip->mi_rx_lock, RW_WRITER);
1272 	for (pp = &(mip->mi_mrfp); (p = *pp) != NULL; pp = &(p->mrf_nextp)) {
1273 		if (p == mrfp)
1274 			break;
1275 	}
1276 	ASSERT(p != NULL);
1277 
1278 	/*
1279 	 * If mac_rx() is running, mark callback for deletion
1280 	 * and return (if wait is false), or wait until mac_rx()
1281 	 * exits (if wait is true).
1282 	 */
1283 	if (mip->mi_rx_ref > 0) {
1284 		DTRACE_PROBE1(defer_delete, mac_impl_t *, mip);
1285 		p->mrf_inuse = B_FALSE;
1286 		mutex_enter(&mip->mi_lock);
1287 		mip->mi_rx_removed++;
1288 		mutex_exit(&mip->mi_lock);
1289 
1290 		rw_exit(&mip->mi_rx_lock);
1291 		if (wait)
1292 			mac_rx_remove_wait(mh);
1293 		return;
1294 	}
1295 
1296 	/* Remove it from the list. */
1297 	*pp = p->mrf_nextp;
1298 	kmem_free(mrfp, sizeof (mac_rx_fn_t));
1299 	rw_exit(&mip->mi_rx_lock);
1300 }
1301 
1302 /*
1303  * Wait for all pending callback removals to be completed by mac_rx().
1304  * Note that if we call mac_rx_remove() immediately before this, there is no
1305  * guarantee we would wait *only* on the callback that we specified.
1306  * mac_rx_remove() could have been called by other threads and we would have
1307  * to wait for other marked callbacks to be removed as well.
1308  */
1309 void
1310 mac_rx_remove_wait(mac_handle_t mh)
1311 {
1312 	mac_impl_t	*mip = (mac_impl_t *)mh;
1313 
1314 	mutex_enter(&mip->mi_lock);
1315 	while (mip->mi_rx_removed > 0) {
1316 		DTRACE_PROBE1(need_wait, mac_impl_t *, mip);
1317 		cv_wait(&mip->mi_rx_cv, &mip->mi_lock);
1318 	}
1319 	mutex_exit(&mip->mi_lock);
1320 }
1321 
1322 mac_txloop_handle_t
1323 mac_txloop_add(mac_handle_t mh, mac_txloop_t tx, void *arg)
1324 {
1325 	mac_impl_t	*mip = (mac_impl_t *)mh;
1326 	mac_txloop_fn_t	*mtfp;
1327 
1328 	mtfp = kmem_zalloc(sizeof (mac_txloop_fn_t), KM_SLEEP);
1329 	mtfp->mtf_fn = tx;
1330 	mtfp->mtf_arg = arg;
1331 
1332 	/*
1333 	 * Add it to the head of the 'tx' callback list.
1334 	 */
1335 	rw_enter(&(mip->mi_tx_lock), RW_WRITER);
1336 	mtfp->mtf_nextp = mip->mi_mtfp;
1337 	mip->mi_mtfp = mtfp;
1338 	rw_exit(&(mip->mi_tx_lock));
1339 
1340 	return ((mac_txloop_handle_t)mtfp);
1341 }
1342 
1343 /*
1344  * Unregister a transmit function for this mac.  This removes the function
1345  * from the list of transmit functions for this mac.
1346  */
1347 void
1348 mac_txloop_remove(mac_handle_t mh, mac_txloop_handle_t mth)
1349 {
1350 	mac_impl_t		*mip = (mac_impl_t *)mh;
1351 	mac_txloop_fn_t		*mtfp = (mac_txloop_fn_t *)mth;
1352 	mac_txloop_fn_t		**pp;
1353 	mac_txloop_fn_t		*p;
1354 
1355 	/*
1356 	 * Search the 'tx' callback list for the function.
1357 	 */
1358 	rw_enter(&(mip->mi_tx_lock), RW_WRITER);
1359 	for (pp = &(mip->mi_mtfp); (p = *pp) != NULL; pp = &(p->mtf_nextp)) {
1360 		if (p == mtfp)
1361 			break;
1362 	}
1363 	ASSERT(p != NULL);
1364 
1365 	/* Remove it from the list. */
1366 	*pp = p->mtf_nextp;
1367 	kmem_free(mtfp, sizeof (mac_txloop_fn_t));
1368 	rw_exit(&(mip->mi_tx_lock));
1369 }
1370 
1371 void
1372 mac_resource_set(mac_handle_t mh, mac_resource_add_t add, void *arg)
1373 {
1374 	mac_impl_t		*mip = (mac_impl_t *)mh;
1375 
1376 	/*
1377 	 * Update the 'resource_add' callbacks.
1378 	 */
1379 	rw_enter(&(mip->mi_resource_lock), RW_WRITER);
1380 	mip->mi_resource_add = add;
1381 	mip->mi_resource_add_arg = arg;
1382 	rw_exit(&(mip->mi_resource_lock));
1383 }
1384 
1385 /*
1386  * Driver support functions.
1387  */
1388 
1389 mac_register_t *
1390 mac_alloc(uint_t mac_version)
1391 {
1392 	mac_register_t *mregp;
1393 
1394 	/*
1395 	 * Make sure there isn't a version mismatch between the driver and
1396 	 * the framework.  In the future, if multiple versions are
1397 	 * supported, this check could become more sophisticated.
1398 	 */
1399 	if (mac_version != MAC_VERSION)
1400 		return (NULL);
1401 
1402 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
1403 	mregp->m_version = mac_version;
1404 	return (mregp);
1405 }
1406 
1407 void
1408 mac_free(mac_register_t *mregp)
1409 {
1410 	kmem_free(mregp, sizeof (mac_register_t));
1411 }
1412 
1413 /*
1414  * Allocate a minor number.
1415  */
1416 minor_t
1417 mac_minor_hold(boolean_t sleep)
1418 {
1419 	minor_t	minor;
1420 
1421 	/*
1422 	 * Grab a value from the arena.
1423 	 */
1424 	atomic_add_32(&minor_count, 1);
1425 
1426 	if (sleep)
1427 		minor = (uint_t)id_alloc(minor_ids);
1428 	else
1429 		minor = (uint_t)id_alloc_nosleep(minor_ids);
1430 
1431 	if (minor == 0) {
1432 		atomic_add_32(&minor_count, -1);
1433 		return (0);
1434 	}
1435 
1436 	return (minor);
1437 }
1438 
1439 /*
1440  * Release a previously allocated minor number.
1441  */
1442 void
1443 mac_minor_rele(minor_t minor)
1444 {
1445 	/*
1446 	 * Return the value to the arena.
1447 	 */
1448 	id_free(minor_ids, minor);
1449 	atomic_add_32(&minor_count, -1);
1450 }
1451 
1452 uint32_t
1453 mac_no_notification(mac_handle_t mh)
1454 {
1455 	mac_impl_t *mip = (mac_impl_t *)mh;
1456 	return (mip->mi_unsup_note);
1457 }
1458 
1459 boolean_t
1460 mac_is_legacy(mac_handle_t mh)
1461 {
1462 	mac_impl_t *mip = (mac_impl_t *)mh;
1463 	return (mip->mi_legacy);
1464 }
1465 
1466 /*
1467  * mac_register() is how drivers register new MACs with the GLDv3
1468  * framework.  The mregp argument is allocated by drivers using the
1469  * mac_alloc() function, and can be freed using mac_free() immediately upon
1470  * return from mac_register().  Upon success (0 return value), the mhp
1471  * opaque pointer becomes the driver's handle to its MAC interface, and is
1472  * the argument to all other mac module entry points.
1473  */
1474 int
1475 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
1476 {
1477 	mac_impl_t		*mip;
1478 	mactype_t		*mtype;
1479 	int			err = EINVAL;
1480 	struct devnames		*dnp = NULL;
1481 	uint_t			instance;
1482 	boolean_t		style1_created = B_FALSE;
1483 	boolean_t		style2_created = B_FALSE;
1484 	mac_capab_legacy_t	legacy;
1485 	char			*driver;
1486 	minor_t			minor = 0;
1487 
1488 	/* Find the required MAC-Type plugin. */
1489 	if ((mtype = i_mactype_getplugin(mregp->m_type_ident)) == NULL)
1490 		return (EINVAL);
1491 
1492 	/* Create a mac_impl_t to represent this MAC. */
1493 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
1494 
1495 	/*
1496 	 * The mac is not ready for open yet.
1497 	 */
1498 	mip->mi_disabled = B_TRUE;
1499 
1500 	/*
1501 	 * When a mac is registered, the m_instance field can be set to:
1502 	 *
1503 	 *  0:	Get the mac's instance number from m_dip.
1504 	 *	This is usually used for physical device dips.
1505 	 *
1506 	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
1507 	 *	For example, when an aggregation is created with the key option,
1508 	 *	"key" will be used as the instance number.
1509 	 *
1510 	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
1511 	 *	This is often used when a MAC of a virtual link is registered
1512 	 *	(e.g., aggregation when "key" is not specified, or vnic).
1513 	 *
1514 	 * Note that the instance number is used to derive the mi_minor field
1515 	 * of mac_impl_t, which will then be used to derive the name of kstats
1516 	 * and the devfs nodes.  The first 2 cases are needed to preserve
1517 	 * backward compatibility.
1518 	 */
1519 	switch (mregp->m_instance) {
1520 	case 0:
1521 		instance = ddi_get_instance(mregp->m_dip);
1522 		break;
1523 	case ((uint_t)-1):
1524 		minor = mac_minor_hold(B_TRUE);
1525 		if (minor == 0) {
1526 			err = ENOSPC;
1527 			goto fail;
1528 		}
1529 		instance = minor - 1;
1530 		break;
1531 	default:
1532 		instance = mregp->m_instance;
1533 		if (instance >= MAC_MAX_MINOR) {
1534 			err = EINVAL;
1535 			goto fail;
1536 		}
1537 		break;
1538 	}
1539 
1540 	mip->mi_minor = (minor_t)(instance + 1);
1541 	mip->mi_dip = mregp->m_dip;
1542 
1543 	driver = (char *)ddi_driver_name(mip->mi_dip);
1544 
1545 	/* Construct the MAC name as <drvname><instance> */
1546 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
1547 	    driver, instance);
1548 
1549 	mip->mi_driver = mregp->m_driver;
1550 
1551 	mip->mi_type = mtype;
1552 	mip->mi_margin = mregp->m_margin;
1553 	mip->mi_info.mi_media = mtype->mt_type;
1554 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
1555 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
1556 		goto fail;
1557 	mip->mi_sdu_min = mregp->m_min_sdu;
1558 	mip->mi_sdu_max = mregp->m_max_sdu;
1559 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
1560 	/*
1561 	 * If the media supports a broadcast address, cache a pointer to it
1562 	 * in the mac_info_t so that upper layers can use it.
1563 	 */
1564 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
1565 
1566 	/*
1567 	 * Copy the unicast source address into the mac_info_t, but only if
1568 	 * the MAC-Type defines a non-zero address length.  We need to
1569 	 * handle MAC-Types that have an address length of 0
1570 	 * (point-to-point protocol MACs for example).
1571 	 */
1572 	if (mip->mi_type->mt_addr_length > 0) {
1573 		if (mregp->m_src_addr == NULL)
1574 			goto fail;
1575 		mip->mi_info.mi_unicst_addr =
1576 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
1577 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
1578 		    mip->mi_type->mt_addr_length);
1579 
1580 		/*
1581 		 * Copy the fixed 'factory' MAC address from the immutable
1582 		 * info.  This is taken to be the MAC address currently in
1583 		 * use.
1584 		 */
1585 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
1586 		    mip->mi_type->mt_addr_length);
1587 		/* Copy the destination address if one is provided. */
1588 		if (mregp->m_dst_addr != NULL) {
1589 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
1590 			    mip->mi_type->mt_addr_length);
1591 		}
1592 	} else if (mregp->m_src_addr != NULL) {
1593 		goto fail;
1594 	}
1595 
1596 	/*
1597 	 * The format of the m_pdata is specific to the plugin.  It is
1598 	 * passed in as an argument to all of the plugin callbacks.  The
1599 	 * driver can update this information by calling
1600 	 * mac_pdata_update().
1601 	 */
1602 	if (mregp->m_pdata != NULL) {
1603 		/*
1604 		 * Verify that the plugin supports MAC plugin data and that
1605 		 * the supplied data is valid.
1606 		 */
1607 		if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
1608 			goto fail;
1609 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
1610 		    mregp->m_pdata_size)) {
1611 			goto fail;
1612 		}
1613 		mip->mi_pdata = kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
1614 		bcopy(mregp->m_pdata, mip->mi_pdata, mregp->m_pdata_size);
1615 		mip->mi_pdata_size = mregp->m_pdata_size;
1616 	}
1617 
1618 	/*
1619 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
1620 	 * check to make sure all mandatory callbacks are set.
1621 	 */
1622 	if (mregp->m_callbacks->mc_getstat == NULL ||
1623 	    mregp->m_callbacks->mc_start == NULL ||
1624 	    mregp->m_callbacks->mc_stop == NULL ||
1625 	    mregp->m_callbacks->mc_setpromisc == NULL ||
1626 	    mregp->m_callbacks->mc_multicst == NULL ||
1627 	    mregp->m_callbacks->mc_unicst == NULL ||
1628 	    mregp->m_callbacks->mc_tx == NULL) {
1629 		goto fail;
1630 	}
1631 	mip->mi_callbacks = mregp->m_callbacks;
1632 
1633 	/*
1634 	 * Set up the possible transmit routines.
1635 	 */
1636 	mip->mi_txinfo.mt_fn = mip->mi_tx;
1637 	mip->mi_txinfo.mt_arg = mip->mi_driver;
1638 
1639 	mip->mi_legacy = mac_capab_get((mac_handle_t)mip,
1640 	    MAC_CAPAB_LEGACY, &legacy);
1641 
1642 	if (mip->mi_legacy) {
1643 		/*
1644 		 * Legacy device. Messages being sent will be looped back
1645 		 * by the underlying driver. Therefore the txloop function
1646 		 * pointer is the same as the tx function pointer.
1647 		 */
1648 		mip->mi_txloopinfo.mt_fn = mip->mi_txinfo.mt_fn;
1649 		mip->mi_txloopinfo.mt_arg = mip->mi_txinfo.mt_arg;
1650 		mip->mi_unsup_note = legacy.ml_unsup_note;
1651 		mip->mi_phy_dev = legacy.ml_dev;
1652 	} else {
1653 		/*
1654 		 * Normal device. The framework needs to do the loopback.
1655 		 */
1656 		mip->mi_txloopinfo.mt_fn = mac_txloop;
1657 		mip->mi_txloopinfo.mt_arg = mip;
1658 		mip->mi_unsup_note = 0;
1659 		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
1660 		    ddi_get_instance(mip->mi_dip) + 1);
1661 	}
1662 
1663 	mip->mi_vnic_txinfo.mt_fn = mac_vnic_tx;
1664 	mip->mi_vnic_txinfo.mt_arg = mip;
1665 
1666 	mip->mi_vnic_txloopinfo.mt_fn = mac_vnic_txloop;
1667 	mip->mi_vnic_txloopinfo.mt_arg = mip;
1668 
1669 	/*
1670 	 * Allocate a notification thread.
1671 	 */
1672 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
1673 	    mip, 0, &p0, TS_RUN, minclsyspri);
1674 	if (mip->mi_notify_thread == NULL)
1675 		goto fail;
1676 
1677 	/*
1678 	 * Initialize the kstats for this device.
1679 	 */
1680 	mac_stat_create(mip);
1681 
1682 	/* set the gldv3 flag in dn_flags */
1683 	dnp = &devnamesp[ddi_driver_major(mip->mi_dip)];
1684 	LOCK_DEV_OPS(&dnp->dn_lock);
1685 	dnp->dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER);
1686 	UNLOCK_DEV_OPS(&dnp->dn_lock);
1687 
1688 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
1689 		/* Create a style-2 DLPI device */
1690 		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
1691 		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
1692 			goto fail;
1693 		style2_created = B_TRUE;
1694 
1695 		/* Create a style-1 DLPI device */
1696 		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
1697 		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
1698 			goto fail;
1699 		style1_created = B_TRUE;
1700 	}
1701 
1702 	rw_enter(&i_mac_impl_lock, RW_WRITER);
1703 	if (mod_hash_insert(i_mac_impl_hash,
1704 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
1705 
1706 		rw_exit(&i_mac_impl_lock);
1707 		err = EEXIST;
1708 		goto fail;
1709 	}
1710 
1711 	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
1712 	    (mac_impl_t *), mip);
1713 
1714 	/*
1715 	 * Mark the MAC to be ready for open.
1716 	 */
1717 	mip->mi_disabled = B_FALSE;
1718 
1719 	rw_exit(&i_mac_impl_lock);
1720 
1721 	atomic_inc_32(&i_mac_impl_count);
1722 
1723 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
1724 	*mhp = (mac_handle_t)mip;
1725 	return (0);
1726 
1727 fail:
1728 	if (style1_created)
1729 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
1730 
1731 	if (style2_created)
1732 		ddi_remove_minor_node(mip->mi_dip, driver);
1733 
1734 	/* clean up notification thread */
1735 	if (mip->mi_notify_thread != NULL) {
1736 		mutex_enter(&mip->mi_notify_bits_lock);
1737 		mip->mi_notify_bits = (1 << MAC_NNOTE);
1738 		cv_broadcast(&mip->mi_notify_cv);
1739 		while (mip->mi_notify_bits != 0)
1740 			cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
1741 		mutex_exit(&mip->mi_notify_bits_lock);
1742 	}
1743 
1744 	if (mip->mi_info.mi_unicst_addr != NULL) {
1745 		kmem_free(mip->mi_info.mi_unicst_addr,
1746 		    mip->mi_type->mt_addr_length);
1747 		mip->mi_info.mi_unicst_addr = NULL;
1748 	}
1749 
1750 	mac_stat_destroy(mip);
1751 
1752 	if (mip->mi_type != NULL) {
1753 		atomic_dec_32(&mip->mi_type->mt_ref);
1754 		mip->mi_type = NULL;
1755 	}
1756 
1757 	if (mip->mi_pdata != NULL) {
1758 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
1759 		mip->mi_pdata = NULL;
1760 		mip->mi_pdata_size = 0;
1761 	}
1762 
1763 	if (minor != 0) {
1764 		ASSERT(minor > MAC_MAX_MINOR);
1765 		mac_minor_rele(minor);
1766 	}
1767 
1768 	kmem_cache_free(i_mac_impl_cachep, mip);
1769 	return (err);
1770 }
1771 
1772 int
1773 mac_disable(mac_handle_t mh)
1774 {
1775 	mac_impl_t		*mip = (mac_impl_t *)mh;
1776 
1777 	/*
1778 	 * See if there are any other references to this mac_t (e.g., VLAN's).
1779 	 * If not, set mi_disabled to prevent any new VLAN's from being
1780 	 * created while we're destroying this mac.
1781 	 */
1782 	rw_enter(&i_mac_impl_lock, RW_WRITER);
1783 	if (mip->mi_ref > 0) {
1784 		rw_exit(&i_mac_impl_lock);
1785 		return (EBUSY);
1786 	}
1787 	mip->mi_disabled = B_TRUE;
1788 	rw_exit(&i_mac_impl_lock);
1789 	return (0);
1790 }
1791 
1792 int
1793 mac_unregister(mac_handle_t mh)
1794 {
1795 	int			err;
1796 	mac_impl_t		*mip = (mac_impl_t *)mh;
1797 	mod_hash_val_t		val;
1798 	mac_multicst_addr_t	*p, *nextp;
1799 	mac_margin_req_t	*mmr, *nextmmr;
1800 
1801 	/*
1802 	 * See if there are any other references to this mac_t (e.g., VLAN's).
1803 	 * If not, set mi_disabled to prevent any new VLAN's from being
1804 	 * created while we're destroying this mac. Once mac_disable() returns
1805 	 * 0, the rest of mac_unregister() stuff should continue without
1806 	 * returning an error.
1807 	 */
1808 	if (!mip->mi_disabled) {
1809 		if ((err = mac_disable(mh)) != 0)
1810 			return (err);
1811 	}
1812 
1813 	/*
1814 	 * Clean up notification thread (wait for it to exit).
1815 	 */
1816 	mutex_enter(&mip->mi_notify_bits_lock);
1817 	mip->mi_notify_bits = (1 << MAC_NNOTE);
1818 	cv_broadcast(&mip->mi_notify_cv);
1819 	while (mip->mi_notify_bits != 0)
1820 		cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
1821 	mutex_exit(&mip->mi_notify_bits_lock);
1822 
1823 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
1824 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
1825 		ddi_remove_minor_node(mip->mi_dip,
1826 		    (char *)ddi_driver_name(mip->mi_dip));
1827 	}
1828 
1829 	ASSERT(!mip->mi_activelink);
1830 
1831 	mac_stat_destroy(mip);
1832 
1833 	rw_enter(&i_mac_impl_lock, RW_WRITER);
1834 	(void) mod_hash_remove(i_mac_impl_hash,
1835 	    (mod_hash_key_t)mip->mi_name, &val);
1836 	ASSERT(mip == (mac_impl_t *)val);
1837 
1838 	ASSERT(i_mac_impl_count > 0);
1839 	atomic_dec_32(&i_mac_impl_count);
1840 	rw_exit(&i_mac_impl_lock);
1841 
1842 	if (mip->mi_pdata != NULL)
1843 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
1844 	mip->mi_pdata = NULL;
1845 	mip->mi_pdata_size = 0;
1846 
1847 	/*
1848 	 * Free the list of multicast addresses.
1849 	 */
1850 	for (p = mip->mi_mmap; p != NULL; p = nextp) {
1851 		nextp = p->mma_nextp;
1852 		kmem_free(p, sizeof (mac_multicst_addr_t));
1853 	}
1854 	mip->mi_mmap = NULL;
1855 
1856 	/*
1857 	 * Free the list of margin request.
1858 	 */
1859 	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
1860 		nextmmr = mmr->mmr_nextp;
1861 		kmem_free(mmr, sizeof (mac_margin_req_t));
1862 	}
1863 	mip->mi_mmrp = NULL;
1864 
1865 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
1866 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
1867 	mip->mi_info.mi_unicst_addr = NULL;
1868 
1869 	atomic_dec_32(&mip->mi_type->mt_ref);
1870 	mip->mi_type = NULL;
1871 
1872 	if (mip->mi_minor > MAC_MAX_MINOR)
1873 		mac_minor_rele(mip->mi_minor);
1874 
1875 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
1876 
1877 	kmem_cache_free(i_mac_impl_cachep, mip);
1878 
1879 	return (0);
1880 }
1881 
1882 /*
1883  * To avoid potential deadlocks, mac_rx() releases mi_rx_lock
1884  * before invoking its list of upcalls. This introduces races with
1885  * mac_rx_remove() and mac_rx_add(), who can potentially modify the
1886  * upcall list while mi_rx_lock is not being held. The race with
1887  * mac_rx_remove() is handled by incrementing mi_rx_ref upon entering
1888  * mac_rx(); a non-zero mi_rx_ref would tell mac_rx_remove()
1889  * to not modify the list but instead mark an upcall for deletion.
1890  * before mac_rx() exits, mi_rx_ref is decremented and if it
1891  * is 0, the marked upcalls will be removed from the list and freed.
1892  * The race with mac_rx_add() is harmless because mac_rx_add() only
1893  * prepends to the list and since mac_rx() saves the list head
1894  * before releasing mi_rx_lock, any prepended upcall won't be seen
1895  * until the next packet chain arrives.
1896  *
1897  * To minimize lock contention between multiple parallel invocations
1898  * of mac_rx(), mi_rx_lock is acquired as a READER lock. The
1899  * use of atomic operations ensures the sanity of mi_rx_ref. mi_rx_lock
1900  * will be upgraded to WRITER mode when there are marked upcalls to be
1901  * cleaned.
1902  */
1903 static void
1904 mac_do_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain,
1905     boolean_t active_only)
1906 {
1907 	mac_impl_t	*mip = (mac_impl_t *)mh;
1908 	mblk_t		*bp = mp_chain;
1909 	mac_rx_fn_t	*mrfp;
1910 
1911 	/*
1912 	 * Call all registered receive functions.
1913 	 */
1914 	rw_enter(&mip->mi_rx_lock, RW_READER);
1915 	if ((mrfp = mip->mi_mrfp) == NULL) {
1916 		/* There are no registered receive functions. */
1917 		freemsgchain(bp);
1918 		rw_exit(&mip->mi_rx_lock);
1919 		return;
1920 	}
1921 	atomic_inc_32(&mip->mi_rx_ref);
1922 	rw_exit(&mip->mi_rx_lock);
1923 
1924 	/*
1925 	 * Call registered receive functions.
1926 	 */
1927 	do {
1928 		mblk_t *recv_bp;
1929 
1930 		if (active_only && !mrfp->mrf_active) {
1931 			mrfp = mrfp->mrf_nextp;
1932 			if (mrfp == NULL) {
1933 				/*
1934 				 * We hit the last receiver, but it's not
1935 				 * active.
1936 				 */
1937 				freemsgchain(bp);
1938 			}
1939 			continue;
1940 		}
1941 
1942 		recv_bp = (mrfp->mrf_nextp != NULL) ? copymsgchain(bp) : bp;
1943 		if (recv_bp != NULL) {
1944 			if (mrfp->mrf_inuse) {
1945 				/*
1946 				 * Send bp itself and keep the copy.
1947 				 * If there's only one active receiver,
1948 				 * it should get the original message,
1949 				 * tagged with the hardware checksum flags.
1950 				 */
1951 				mrfp->mrf_fn(mrfp->mrf_arg, mrh, bp);
1952 				bp = recv_bp;
1953 			} else {
1954 				freemsgchain(recv_bp);
1955 			}
1956 		}
1957 
1958 		mrfp = mrfp->mrf_nextp;
1959 	} while (mrfp != NULL);
1960 
1961 	rw_enter(&mip->mi_rx_lock, RW_READER);
1962 	if (atomic_dec_32_nv(&mip->mi_rx_ref) == 0 && mip->mi_rx_removed > 0) {
1963 		mac_rx_fn_t	**pp, *p;
1964 		uint32_t	cnt = 0;
1965 
1966 		DTRACE_PROBE1(delete_callbacks, mac_impl_t *, mip);
1967 
1968 		/*
1969 		 * Need to become exclusive before doing cleanup
1970 		 */
1971 		if (rw_tryupgrade(&mip->mi_rx_lock) == 0) {
1972 			rw_exit(&mip->mi_rx_lock);
1973 			rw_enter(&mip->mi_rx_lock, RW_WRITER);
1974 		}
1975 
1976 		/*
1977 		 * We return if another thread has already entered and cleaned
1978 		 * up the list.
1979 		 */
1980 		if (mip->mi_rx_ref > 0 || mip->mi_rx_removed == 0) {
1981 			rw_exit(&mip->mi_rx_lock);
1982 			return;
1983 		}
1984 
1985 		/*
1986 		 * Free removed callbacks.
1987 		 */
1988 		pp = &mip->mi_mrfp;
1989 		while (*pp != NULL) {
1990 			if (!(*pp)->mrf_inuse) {
1991 				p = *pp;
1992 				*pp = (*pp)->mrf_nextp;
1993 				kmem_free(p, sizeof (*p));
1994 				cnt++;
1995 				continue;
1996 			}
1997 			pp = &(*pp)->mrf_nextp;
1998 		}
1999 
2000 		/*
2001 		 * Wake up mac_rx_remove_wait()
2002 		 */
2003 		mutex_enter(&mip->mi_lock);
2004 		ASSERT(mip->mi_rx_removed == cnt);
2005 		mip->mi_rx_removed = 0;
2006 		cv_broadcast(&mip->mi_rx_cv);
2007 		mutex_exit(&mip->mi_lock);
2008 	}
2009 	rw_exit(&mip->mi_rx_lock);
2010 }
2011 
2012 void
2013 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
2014 {
2015 	mac_do_rx(mh, mrh, mp_chain, B_FALSE);
2016 }
2017 
2018 /*
2019  * Send a packet chain up to the receive callbacks which declared
2020  * themselves as being active.
2021  */
2022 void
2023 mac_active_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp_chain)
2024 {
2025 	mac_do_rx(arg, mrh, mp_chain, B_TRUE);
2026 }
2027 
2028 /*
2029  * Function passed to the active client sharing a VNIC. This function
2030  * is returned by mac_tx_get() when a VNIC is present. It invokes
2031  * the VNIC transmit entry point which was specified by the VNIC when
2032  * it called mac_vnic_set(). The VNIC transmit entry point will
2033  * pass the packets to the local VNICs and/or to the underlying VNICs
2034  * if needed.
2035  */
2036 static mblk_t *
2037 mac_vnic_tx(void *arg, mblk_t *mp)
2038 {
2039 	mac_impl_t	*mip = arg;
2040 	mac_txinfo_t	*mtfp;
2041 	mac_vnic_tx_t	*mvt;
2042 
2043 	/*
2044 	 * There is a race between the notification of the VNIC
2045 	 * addition and removal, and the processing of the VNIC notification
2046 	 * by the MAC client. During this window, it is possible for
2047 	 * an active MAC client to contine invoking mac_vnic_tx() while
2048 	 * the VNIC has already been removed. So we cannot assume
2049 	 * that mi_vnic_present will always be true when mac_vnic_tx()
2050 	 * is invoked.
2051 	 */
2052 	rw_enter(&mip->mi_tx_lock, RW_READER);
2053 	if (!mip->mi_vnic_present) {
2054 		rw_exit(&mip->mi_tx_lock);
2055 		freemsgchain(mp);
2056 		return (NULL);
2057 	}
2058 
2059 	ASSERT(mip->mi_vnic_tx != NULL);
2060 	mvt = mip->mi_vnic_tx;
2061 	MAC_VNIC_TXINFO_REFHOLD(mvt);
2062 	rw_exit(&mip->mi_tx_lock);
2063 
2064 	mtfp = &mvt->mv_txinfo;
2065 	mtfp->mt_fn(mtfp->mt_arg, mp);
2066 
2067 	MAC_VNIC_TXINFO_REFRELE(mvt);
2068 	return (NULL);
2069 }
2070 
2071 /*
2072  * Transmit function -- ONLY used when there are registered loopback listeners.
2073  */
2074 mblk_t *
2075 mac_do_txloop(void *arg, mblk_t *bp, boolean_t call_vnic)
2076 {
2077 	mac_impl_t	*mip = arg;
2078 	mac_txloop_fn_t	*mtfp;
2079 	mblk_t		*loop_bp, *resid_bp, *next_bp;
2080 
2081 	if (call_vnic) {
2082 		/*
2083 		 * In promiscous mode, a copy of the sent packet will
2084 		 * be sent to the client's promiscous receive entry
2085 		 * points via mac_vnic_tx()->
2086 		 * mac_active_rx_promisc()->mac_rx_default().
2087 		 */
2088 		return (mac_vnic_tx(arg, bp));
2089 	}
2090 
2091 	while (bp != NULL) {
2092 		next_bp = bp->b_next;
2093 		bp->b_next = NULL;
2094 
2095 		if ((loop_bp = copymsg(bp)) == NULL)
2096 			goto noresources;
2097 
2098 		if ((resid_bp = mip->mi_tx(mip->mi_driver, bp)) != NULL) {
2099 			ASSERT(resid_bp == bp);
2100 			freemsg(loop_bp);
2101 			goto noresources;
2102 		}
2103 
2104 		rw_enter(&mip->mi_tx_lock, RW_READER);
2105 		mtfp = mip->mi_mtfp;
2106 		while (mtfp != NULL && loop_bp != NULL) {
2107 			bp = loop_bp;
2108 
2109 			/* XXX counter bump if copymsg() fails? */
2110 			if (mtfp->mtf_nextp != NULL)
2111 				loop_bp = copymsg(bp);
2112 			else
2113 				loop_bp = NULL;
2114 
2115 			mtfp->mtf_fn(mtfp->mtf_arg, bp);
2116 			mtfp = mtfp->mtf_nextp;
2117 		}
2118 		rw_exit(&mip->mi_tx_lock);
2119 
2120 		/*
2121 		 * It's possible we've raced with the disabling of promiscuous
2122 		 * mode, in which case we can discard our copy.
2123 		 */
2124 		if (loop_bp != NULL)
2125 			freemsg(loop_bp);
2126 
2127 		bp = next_bp;
2128 	}
2129 
2130 	return (NULL);
2131 
2132 noresources:
2133 	bp->b_next = next_bp;
2134 	return (bp);
2135 }
2136 
2137 mblk_t *
2138 mac_txloop(void *arg, mblk_t *bp)
2139 {
2140 	return (mac_do_txloop(arg, bp, B_FALSE));
2141 }
2142 
2143 static mblk_t *
2144 mac_vnic_txloop(void *arg, mblk_t *bp)
2145 {
2146 	return (mac_do_txloop(arg, bp, B_TRUE));
2147 }
2148 
2149 void
2150 mac_link_update(mac_handle_t mh, link_state_t link)
2151 {
2152 	mac_impl_t	*mip = (mac_impl_t *)mh;
2153 
2154 	/*
2155 	 * Save the link state.
2156 	 */
2157 	mip->mi_linkstate = link;
2158 
2159 	/*
2160 	 * Send a MAC_NOTE_LINK notification.
2161 	 */
2162 	i_mac_notify(mip, MAC_NOTE_LINK);
2163 }
2164 
2165 void
2166 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
2167 {
2168 	mac_impl_t	*mip = (mac_impl_t *)mh;
2169 
2170 	if (mip->mi_type->mt_addr_length == 0)
2171 		return;
2172 
2173 	/*
2174 	 * If the address has not changed, do nothing.
2175 	 */
2176 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0)
2177 		return;
2178 
2179 	/*
2180 	 * Save the address.
2181 	 */
2182 	bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length);
2183 
2184 	/*
2185 	 * Send a MAC_NOTE_UNICST notification.
2186 	 */
2187 	i_mac_notify(mip, MAC_NOTE_UNICST);
2188 }
2189 
2190 void
2191 mac_tx_update(mac_handle_t mh)
2192 {
2193 	/*
2194 	 * Send a MAC_NOTE_TX notification.
2195 	 */
2196 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_TX);
2197 }
2198 
2199 void
2200 mac_resource_update(mac_handle_t mh)
2201 {
2202 	/*
2203 	 * Send a MAC_NOTE_RESOURCE notification.
2204 	 */
2205 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_RESOURCE);
2206 }
2207 
2208 mac_resource_handle_t
2209 mac_resource_add(mac_handle_t mh, mac_resource_t *mrp)
2210 {
2211 	mac_impl_t		*mip = (mac_impl_t *)mh;
2212 	mac_resource_handle_t	mrh;
2213 	mac_resource_add_t	add;
2214 	void			*arg;
2215 
2216 	rw_enter(&mip->mi_resource_lock, RW_READER);
2217 	add = mip->mi_resource_add;
2218 	arg = mip->mi_resource_add_arg;
2219 
2220 	if (add != NULL)
2221 		mrh = add(arg, mrp);
2222 	else
2223 		mrh = NULL;
2224 	rw_exit(&mip->mi_resource_lock);
2225 
2226 	return (mrh);
2227 }
2228 
2229 int
2230 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
2231 {
2232 	mac_impl_t	*mip = (mac_impl_t *)mh;
2233 
2234 	/*
2235 	 * Verify that the plugin supports MAC plugin data and that the
2236 	 * supplied data is valid.
2237 	 */
2238 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
2239 		return (EINVAL);
2240 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
2241 		return (EINVAL);
2242 
2243 	if (mip->mi_pdata != NULL)
2244 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
2245 
2246 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
2247 	bcopy(mac_pdata, mip->mi_pdata, dsize);
2248 	mip->mi_pdata_size = dsize;
2249 
2250 	/*
2251 	 * Since the MAC plugin data is used to construct MAC headers that
2252 	 * were cached in fast-path headers, we need to flush fast-path
2253 	 * information for links associated with this mac.
2254 	 */
2255 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
2256 	return (0);
2257 }
2258 
2259 void
2260 mac_multicst_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg,
2261     boolean_t add)
2262 {
2263 	mac_impl_t		*mip = (mac_impl_t *)mh;
2264 	mac_multicst_addr_t	*p;
2265 
2266 	/*
2267 	 * If no specific refresh function was given then default to the
2268 	 * driver's m_multicst entry point.
2269 	 */
2270 	if (refresh == NULL) {
2271 		refresh = mip->mi_multicst;
2272 		arg = mip->mi_driver;
2273 	}
2274 	ASSERT(refresh != NULL);
2275 
2276 	/*
2277 	 * Walk the multicast address list and call the refresh function for
2278 	 * each address.
2279 	 */
2280 	rw_enter(&(mip->mi_data_lock), RW_READER);
2281 	for (p = mip->mi_mmap; p != NULL; p = p->mma_nextp)
2282 		refresh(arg, add, p->mma_addr);
2283 	rw_exit(&(mip->mi_data_lock));
2284 }
2285 
2286 void
2287 mac_unicst_refresh(mac_handle_t mh, mac_unicst_t refresh, void *arg)
2288 {
2289 	mac_impl_t	*mip = (mac_impl_t *)mh;
2290 	/*
2291 	 * If no specific refresh function was given then default to the
2292 	 * driver's mi_unicst entry point.
2293 	 */
2294 	if (refresh == NULL) {
2295 		refresh = mip->mi_unicst;
2296 		arg = mip->mi_driver;
2297 	}
2298 	ASSERT(refresh != NULL);
2299 
2300 	/*
2301 	 * Call the refresh function with the current unicast address.
2302 	 */
2303 	refresh(arg, mip->mi_addr);
2304 }
2305 
2306 void
2307 mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg)
2308 {
2309 	mac_impl_t	*mip = (mac_impl_t *)mh;
2310 
2311 	/*
2312 	 * If no specific refresh function was given then default to the
2313 	 * driver's m_promisc entry point.
2314 	 */
2315 	if (refresh == NULL) {
2316 		refresh = mip->mi_setpromisc;
2317 		arg = mip->mi_driver;
2318 	}
2319 	ASSERT(refresh != NULL);
2320 
2321 	/*
2322 	 * Call the refresh function with the current promiscuity.
2323 	 */
2324 	refresh(arg, (mip->mi_devpromisc != 0));
2325 }
2326 
2327 /*
2328  * The mac client requests that the mac not to change its margin size to
2329  * be less than the specified value.  If "current" is B_TRUE, then the client
2330  * requests the mac not to change its margin size to be smaller than the
2331  * current size. Further, return the current margin size value in this case.
2332  *
2333  * We keep every requested size in an ordered list from largest to smallest.
2334  */
2335 int
2336 mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current)
2337 {
2338 	mac_impl_t		*mip = (mac_impl_t *)mh;
2339 	mac_margin_req_t	**pp, *p;
2340 	int			err = 0;
2341 
2342 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
2343 	if (current)
2344 		*marginp = mip->mi_margin;
2345 
2346 	/*
2347 	 * If the current margin value cannot satisfy the margin requested,
2348 	 * return ENOTSUP directly.
2349 	 */
2350 	if (*marginp > mip->mi_margin) {
2351 		err = ENOTSUP;
2352 		goto done;
2353 	}
2354 
2355 	/*
2356 	 * Check whether the given margin is already in the list. If so,
2357 	 * bump the reference count.
2358 	 */
2359 	for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) {
2360 		if (p->mmr_margin == *marginp) {
2361 			/*
2362 			 * The margin requested is already in the list,
2363 			 * so just bump the reference count.
2364 			 */
2365 			p->mmr_ref++;
2366 			goto done;
2367 		}
2368 		if (p->mmr_margin < *marginp)
2369 			break;
2370 	}
2371 
2372 
2373 	if ((p = kmem_zalloc(sizeof (mac_margin_req_t), KM_NOSLEEP)) == NULL) {
2374 		err = ENOMEM;
2375 		goto done;
2376 	}
2377 
2378 	p->mmr_margin = *marginp;
2379 	p->mmr_ref++;
2380 	p->mmr_nextp = *pp;
2381 	*pp = p;
2382 
2383 done:
2384 	rw_exit(&(mip->mi_data_lock));
2385 	return (err);
2386 }
2387 
2388 /*
2389  * The mac client requests to cancel its previous mac_margin_add() request.
2390  * We remove the requested margin size from the list.
2391  */
2392 int
2393 mac_margin_remove(mac_handle_t mh, uint32_t margin)
2394 {
2395 	mac_impl_t		*mip = (mac_impl_t *)mh;
2396 	mac_margin_req_t	**pp, *p;
2397 	int			err = 0;
2398 
2399 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
2400 	/*
2401 	 * Find the entry in the list for the given margin.
2402 	 */
2403 	for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) {
2404 		if (p->mmr_margin == margin) {
2405 			if (--p->mmr_ref == 0)
2406 				break;
2407 
2408 			/*
2409 			 * There is still a reference to this address so
2410 			 * there's nothing more to do.
2411 			 */
2412 			goto done;
2413 		}
2414 	}
2415 
2416 	/*
2417 	 * We did not find an entry for the given margin.
2418 	 */
2419 	if (p == NULL) {
2420 		err = ENOENT;
2421 		goto done;
2422 	}
2423 
2424 	ASSERT(p->mmr_ref == 0);
2425 
2426 	/*
2427 	 * Remove it from the list.
2428 	 */
2429 	*pp = p->mmr_nextp;
2430 	kmem_free(p, sizeof (mac_margin_req_t));
2431 done:
2432 	rw_exit(&(mip->mi_data_lock));
2433 	return (err);
2434 }
2435 
2436 /*
2437  * The mac client requests to get the mac's current margin value.
2438  */
2439 void
2440 mac_margin_get(mac_handle_t mh, uint32_t *marginp)
2441 {
2442 	mac_impl_t	*mip = (mac_impl_t *)mh;
2443 
2444 	rw_enter(&(mip->mi_data_lock), RW_READER);
2445 	*marginp = mip->mi_margin;
2446 	rw_exit(&(mip->mi_data_lock));
2447 }
2448 
2449 boolean_t
2450 mac_margin_update(mac_handle_t mh, uint32_t margin)
2451 {
2452 	mac_impl_t	*mip = (mac_impl_t *)mh;
2453 	uint32_t	margin_needed = 0;
2454 
2455 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
2456 
2457 	if (mip->mi_mmrp != NULL)
2458 		margin_needed = mip->mi_mmrp->mmr_margin;
2459 
2460 	if (margin_needed <= margin)
2461 		mip->mi_margin = margin;
2462 
2463 	rw_exit(&(mip->mi_data_lock));
2464 
2465 	if (margin_needed <= margin)
2466 		i_mac_notify(mip, MAC_NOTE_MARGIN);
2467 
2468 	return (margin_needed <= margin);
2469 }
2470 
2471 boolean_t
2472 mac_do_active_set(mac_handle_t mh, boolean_t shareable)
2473 {
2474 	mac_impl_t *mip = (mac_impl_t *)mh;
2475 
2476 	mutex_enter(&mip->mi_activelink_lock);
2477 	if (mip->mi_activelink) {
2478 		mutex_exit(&mip->mi_activelink_lock);
2479 		return (B_FALSE);
2480 	}
2481 	mip->mi_activelink = B_TRUE;
2482 	mip->mi_shareable = shareable;
2483 	mutex_exit(&mip->mi_activelink_lock);
2484 	return (B_TRUE);
2485 }
2486 
2487 /*
2488  * Called by MAC clients. By default, active MAC clients cannot
2489  * share the NIC with VNICs.
2490  */
2491 boolean_t
2492 mac_active_set(mac_handle_t mh)
2493 {
2494 	return (mac_do_active_set(mh, B_FALSE));
2495 }
2496 
2497 /*
2498  * Called by MAC clients which can share the NIC with VNICS, e.g. DLS.
2499  */
2500 boolean_t
2501 mac_active_shareable_set(mac_handle_t mh)
2502 {
2503 	return (mac_do_active_set(mh, B_TRUE));
2504 }
2505 
2506 void
2507 mac_active_clear(mac_handle_t mh)
2508 {
2509 	mac_impl_t *mip = (mac_impl_t *)mh;
2510 
2511 	mutex_enter(&mip->mi_activelink_lock);
2512 	ASSERT(mip->mi_activelink);
2513 	mip->mi_activelink = B_FALSE;
2514 	mutex_exit(&mip->mi_activelink_lock);
2515 }
2516 
2517 boolean_t
2518 mac_vnic_set(mac_handle_t mh, mac_txinfo_t *tx_info, mac_getcapab_t getcapab_fn,
2519     void *getcapab_arg)
2520 {
2521 	mac_impl_t	*mip = (mac_impl_t *)mh;
2522 	mac_vnic_tx_t	*vnic_tx;
2523 
2524 	mutex_enter(&mip->mi_activelink_lock);
2525 	rw_enter(&mip->mi_tx_lock, RW_WRITER);
2526 	ASSERT(!mip->mi_vnic_present);
2527 
2528 	if (mip->mi_activelink && !mip->mi_shareable) {
2529 		/*
2530 		 * The NIC is already used by an active client which cannot
2531 		 * share it with VNICs.
2532 		 */
2533 		rw_exit(&mip->mi_tx_lock);
2534 		mutex_exit(&mip->mi_activelink_lock);
2535 		return (B_FALSE);
2536 	}
2537 
2538 	vnic_tx = kmem_cache_alloc(mac_vnic_tx_cache, KM_SLEEP);
2539 	vnic_tx->mv_refs = 0;
2540 	vnic_tx->mv_txinfo = *tx_info;
2541 	vnic_tx->mv_clearing = B_FALSE;
2542 
2543 	mip->mi_vnic_present = B_TRUE;
2544 	mip->mi_vnic_tx = vnic_tx;
2545 	mip->mi_vnic_getcapab_fn = getcapab_fn;
2546 	mip->mi_vnic_getcapab_arg = getcapab_arg;
2547 	rw_exit(&mip->mi_tx_lock);
2548 	mutex_exit(&mip->mi_activelink_lock);
2549 
2550 	i_mac_notify(mip, MAC_NOTE_VNIC);
2551 	return (B_TRUE);
2552 }
2553 
2554 void
2555 mac_vnic_clear(mac_handle_t mh)
2556 {
2557 	mac_impl_t *mip = (mac_impl_t *)mh;
2558 	mac_vnic_tx_t	*vnic_tx;
2559 
2560 	rw_enter(&mip->mi_tx_lock, RW_WRITER);
2561 	ASSERT(mip->mi_vnic_present);
2562 	mip->mi_vnic_present = B_FALSE;
2563 	/*
2564 	 * Setting mi_vnic_tx to NULL here under the lock guarantees
2565 	 * that no new references to the current VNIC transmit structure
2566 	 * will be taken by mac_vnic_tx(). This is a necessary condition
2567 	 * for safely waiting for the reference count to drop to
2568 	 * zero below.
2569 	 */
2570 	vnic_tx = mip->mi_vnic_tx;
2571 	mip->mi_vnic_tx = NULL;
2572 	mip->mi_vnic_getcapab_fn = NULL;
2573 	mip->mi_vnic_getcapab_arg = NULL;
2574 	rw_exit(&mip->mi_tx_lock);
2575 
2576 	i_mac_notify(mip, MAC_NOTE_VNIC);
2577 
2578 	/*
2579 	 * Wait for all TX calls referencing the VNIC transmit
2580 	 * entry point that was removed to complete.
2581 	 */
2582 	mutex_enter(&vnic_tx->mv_lock);
2583 	vnic_tx->mv_clearing = B_TRUE;
2584 	while (vnic_tx->mv_refs > 0)
2585 		cv_wait(&vnic_tx->mv_cv, &vnic_tx->mv_lock);
2586 	mutex_exit(&vnic_tx->mv_lock);
2587 	kmem_cache_free(mac_vnic_tx_cache, vnic_tx);
2588 }
2589 
2590 /*
2591  * mac_info_get() is used for retrieving the mac_info when a DL_INFO_REQ is
2592  * issued before a DL_ATTACH_REQ. we walk the i_mac_impl_hash table and find
2593  * the first mac_impl_t with a matching driver name; then we copy its mac_info_t
2594  * to the caller. we do all this with i_mac_impl_lock held so the mac_impl_t
2595  * cannot disappear while we are accessing it.
2596  */
2597 typedef struct i_mac_info_state_s {
2598 	const char	*mi_name;
2599 	mac_info_t	*mi_infop;
2600 } i_mac_info_state_t;
2601 
2602 /*ARGSUSED*/
2603 static uint_t
2604 i_mac_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2605 {
2606 	i_mac_info_state_t	*statep = arg;
2607 	mac_impl_t		*mip = (mac_impl_t *)val;
2608 
2609 	if (mip->mi_disabled)
2610 		return (MH_WALK_CONTINUE);
2611 
2612 	if (strcmp(statep->mi_name,
2613 	    ddi_driver_name(mip->mi_dip)) != 0)
2614 		return (MH_WALK_CONTINUE);
2615 
2616 	statep->mi_infop = &mip->mi_info;
2617 	return (MH_WALK_TERMINATE);
2618 }
2619 
2620 boolean_t
2621 mac_info_get(const char *name, mac_info_t *minfop)
2622 {
2623 	i_mac_info_state_t	state;
2624 
2625 	rw_enter(&i_mac_impl_lock, RW_READER);
2626 	state.mi_name = name;
2627 	state.mi_infop = NULL;
2628 	mod_hash_walk(i_mac_impl_hash, i_mac_info_walker, &state);
2629 	if (state.mi_infop == NULL) {
2630 		rw_exit(&i_mac_impl_lock);
2631 		return (B_FALSE);
2632 	}
2633 	*minfop = *state.mi_infop;
2634 	rw_exit(&i_mac_impl_lock);
2635 	return (B_TRUE);
2636 }
2637 
2638 boolean_t
2639 mac_do_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data,
2640     boolean_t is_vnic)
2641 {
2642 	mac_impl_t *mip = (mac_impl_t *)mh;
2643 
2644 	if (!is_vnic) {
2645 		rw_enter(&mip->mi_tx_lock, RW_READER);
2646 		if (mip->mi_vnic_present) {
2647 			boolean_t rv;
2648 
2649 			rv = mip->mi_vnic_getcapab_fn(mip->mi_vnic_getcapab_arg,
2650 			    cap, cap_data);
2651 			rw_exit(&mip->mi_tx_lock);
2652 			return (rv);
2653 		}
2654 		rw_exit(&mip->mi_tx_lock);
2655 	}
2656 
2657 	if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB)
2658 		return (mip->mi_getcapab(mip->mi_driver, cap, cap_data));
2659 	else
2660 		return (B_FALSE);
2661 }
2662 
2663 boolean_t
2664 mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
2665 {
2666 	return (mac_do_capab_get(mh, cap, cap_data, B_FALSE));
2667 }
2668 
2669 boolean_t
2670 mac_vnic_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
2671 {
2672 	return (mac_do_capab_get(mh, cap, cap_data, B_TRUE));
2673 }
2674 
2675 boolean_t
2676 mac_sap_verify(mac_handle_t mh, uint32_t sap, uint32_t *bind_sap)
2677 {
2678 	mac_impl_t	*mip = (mac_impl_t *)mh;
2679 	return (mip->mi_type->mt_ops.mtops_sap_verify(sap, bind_sap,
2680 	    mip->mi_pdata));
2681 }
2682 
2683 mblk_t *
2684 mac_header(mac_handle_t mh, const uint8_t *daddr, uint32_t sap, mblk_t *payload,
2685     size_t extra_len)
2686 {
2687 	mac_impl_t	*mip = (mac_impl_t *)mh;
2688 	return (mip->mi_type->mt_ops.mtops_header(mip->mi_addr, daddr, sap,
2689 	    mip->mi_pdata, payload, extra_len));
2690 }
2691 
2692 int
2693 mac_header_info(mac_handle_t mh, mblk_t *mp, mac_header_info_t *mhip)
2694 {
2695 	mac_impl_t	*mip = (mac_impl_t *)mh;
2696 	return (mip->mi_type->mt_ops.mtops_header_info(mp, mip->mi_pdata,
2697 	    mhip));
2698 }
2699 
2700 mblk_t *
2701 mac_header_cook(mac_handle_t mh, mblk_t *mp)
2702 {
2703 	mac_impl_t	*mip = (mac_impl_t *)mh;
2704 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_COOK) {
2705 		if (DB_REF(mp) > 1) {
2706 			mblk_t *newmp = copymsg(mp);
2707 			if (newmp == NULL)
2708 				return (NULL);
2709 			freemsg(mp);
2710 			mp = newmp;
2711 		}
2712 		return (mip->mi_type->mt_ops.mtops_header_cook(mp,
2713 		    mip->mi_pdata));
2714 	}
2715 	return (mp);
2716 }
2717 
2718 mblk_t *
2719 mac_header_uncook(mac_handle_t mh, mblk_t *mp)
2720 {
2721 	mac_impl_t	*mip = (mac_impl_t *)mh;
2722 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_UNCOOK) {
2723 		if (DB_REF(mp) > 1) {
2724 			mblk_t *newmp = copymsg(mp);
2725 			if (newmp == NULL)
2726 				return (NULL);
2727 			freemsg(mp);
2728 			mp = newmp;
2729 		}
2730 		return (mip->mi_type->mt_ops.mtops_header_uncook(mp,
2731 		    mip->mi_pdata));
2732 	}
2733 	return (mp);
2734 }
2735 
2736 void
2737 mac_init_ops(struct dev_ops *ops, const char *name)
2738 {
2739 	dld_init_ops(ops, name);
2740 }
2741 
2742 void
2743 mac_fini_ops(struct dev_ops *ops)
2744 {
2745 	dld_fini_ops(ops);
2746 }
2747 
2748 /*
2749  * MAC Type Plugin functions.
2750  */
2751 
2752 mactype_register_t *
2753 mactype_alloc(uint_t mactype_version)
2754 {
2755 	mactype_register_t *mtrp;
2756 
2757 	/*
2758 	 * Make sure there isn't a version mismatch between the plugin and
2759 	 * the framework.  In the future, if multiple versions are
2760 	 * supported, this check could become more sophisticated.
2761 	 */
2762 	if (mactype_version != MACTYPE_VERSION)
2763 		return (NULL);
2764 
2765 	mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP);
2766 	mtrp->mtr_version = mactype_version;
2767 	return (mtrp);
2768 }
2769 
2770 void
2771 mactype_free(mactype_register_t *mtrp)
2772 {
2773 	kmem_free(mtrp, sizeof (mactype_register_t));
2774 }
2775 
2776 int
2777 mactype_register(mactype_register_t *mtrp)
2778 {
2779 	mactype_t	*mtp;
2780 	mactype_ops_t	*ops = mtrp->mtr_ops;
2781 
2782 	/* Do some sanity checking before we register this MAC type. */
2783 	if (mtrp->mtr_ident == NULL || ops == NULL || mtrp->mtr_addrlen == 0)
2784 		return (EINVAL);
2785 
2786 	/*
2787 	 * Verify that all mandatory callbacks are set in the ops
2788 	 * vector.
2789 	 */
2790 	if (ops->mtops_unicst_verify == NULL ||
2791 	    ops->mtops_multicst_verify == NULL ||
2792 	    ops->mtops_sap_verify == NULL ||
2793 	    ops->mtops_header == NULL ||
2794 	    ops->mtops_header_info == NULL) {
2795 		return (EINVAL);
2796 	}
2797 
2798 	mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP);
2799 	mtp->mt_ident = mtrp->mtr_ident;
2800 	mtp->mt_ops = *ops;
2801 	mtp->mt_type = mtrp->mtr_mactype;
2802 	mtp->mt_nativetype = mtrp->mtr_nativetype;
2803 	mtp->mt_addr_length = mtrp->mtr_addrlen;
2804 	if (mtrp->mtr_brdcst_addr != NULL) {
2805 		mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP);
2806 		bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr,
2807 		    mtrp->mtr_addrlen);
2808 	}
2809 
2810 	mtp->mt_stats = mtrp->mtr_stats;
2811 	mtp->mt_statcount = mtrp->mtr_statcount;
2812 
2813 	if (mod_hash_insert(i_mactype_hash,
2814 	    (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) {
2815 		kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
2816 		kmem_free(mtp, sizeof (*mtp));
2817 		return (EEXIST);
2818 	}
2819 	return (0);
2820 }
2821 
2822 int
2823 mactype_unregister(const char *ident)
2824 {
2825 	mactype_t	*mtp;
2826 	mod_hash_val_t	val;
2827 	int 		err;
2828 
2829 	/*
2830 	 * Let's not allow MAC drivers to use this plugin while we're
2831 	 * trying to unregister it.  Holding i_mactype_lock also prevents a
2832 	 * plugin from unregistering while a MAC driver is attempting to
2833 	 * hold a reference to it in i_mactype_getplugin().
2834 	 */
2835 	mutex_enter(&i_mactype_lock);
2836 
2837 	if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident,
2838 	    (mod_hash_val_t *)&mtp)) != 0) {
2839 		/* A plugin is trying to unregister, but it never registered. */
2840 		err = ENXIO;
2841 		goto done;
2842 	}
2843 
2844 	if (mtp->mt_ref != 0) {
2845 		err = EBUSY;
2846 		goto done;
2847 	}
2848 
2849 	err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val);
2850 	ASSERT(err == 0);
2851 	if (err != 0) {
2852 		/* This should never happen, thus the ASSERT() above. */
2853 		err = EINVAL;
2854 		goto done;
2855 	}
2856 	ASSERT(mtp == (mactype_t *)val);
2857 
2858 	kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
2859 	kmem_free(mtp, sizeof (mactype_t));
2860 done:
2861 	mutex_exit(&i_mactype_lock);
2862 	return (err);
2863 }
2864 
2865 int
2866 mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize)
2867 {
2868 	int err = ENOTSUP;
2869 	mac_impl_t *mip = (mac_impl_t *)mh;
2870 
2871 	if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) {
2872 		err = mip->mi_callbacks->mc_setprop(mip->mi_driver,
2873 		    macprop->mp_name, macprop->mp_id, valsize, val);
2874 	}
2875 	return (err);
2876 }
2877 
2878 int
2879 mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize)
2880 {
2881 	int err = ENOTSUP;
2882 	mac_impl_t *mip = (mac_impl_t *)mh;
2883 
2884 	if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) {
2885 		err = mip->mi_callbacks->mc_getprop(mip->mi_driver,
2886 		    macprop->mp_name, macprop->mp_id, valsize, val);
2887 	}
2888 	return (err);
2889 }
2890 
2891 int
2892 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
2893 {
2894 	mac_impl_t	*mip = (mac_impl_t *)mh;
2895 
2896 	if (sdu_max <= mip->mi_sdu_min)
2897 		return (EINVAL);
2898 	mip->mi_sdu_max = sdu_max;
2899 
2900 	/* Send a MAC_NOTE_SDU_SIZE notification. */
2901 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
2902 	return (0);
2903 }
2904