xref: /illumos-gate/usr/src/uts/common/io/mac/mac.c (revision 57c40785)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 /*
29  * MAC Services Module
30  */
31 
32 #include <sys/types.h>
33 #include <sys/conf.h>
34 #include <sys/id_space.h>
35 #include <sys/esunddi.h>
36 #include <sys/stat.h>
37 #include <sys/mkdev.h>
38 #include <sys/stream.h>
39 #include <sys/strsun.h>
40 #include <sys/strsubr.h>
41 #include <sys/dlpi.h>
42 #include <sys/dls.h>
43 #include <sys/modhash.h>
44 #include <sys/vlan.h>
45 #include <sys/mac.h>
46 #include <sys/mac_impl.h>
47 #include <sys/dld.h>
48 #include <sys/modctl.h>
49 #include <sys/fs/dv_node.h>
50 #include <sys/thread.h>
51 #include <sys/proc.h>
52 #include <sys/callb.h>
53 #include <sys/cpuvar.h>
54 #include <sys/atomic.h>
55 #include <sys/sdt.h>
56 #include <inet/nd.h>
57 #include <sys/ethernet.h>
58 
59 #define	IMPL_HASHSZ	67	/* prime */
60 
61 static kmem_cache_t	*i_mac_impl_cachep;
62 static mod_hash_t	*i_mac_impl_hash;
63 krwlock_t		i_mac_impl_lock;
64 uint_t			i_mac_impl_count;
65 static kmem_cache_t	*mac_vnic_tx_cache;
66 static id_space_t	*minor_ids;
67 static uint32_t		minor_count;
68 
69 #define	MACTYPE_KMODDIR	"mac"
70 #define	MACTYPE_HASHSZ	67
71 static mod_hash_t	*i_mactype_hash;
72 /*
73  * i_mactype_lock synchronizes threads that obtain references to mactype_t
74  * structures through i_mactype_getplugin().
75  */
76 static kmutex_t		i_mactype_lock;
77 
78 static void i_mac_notify_thread(void *);
79 static mblk_t *mac_vnic_tx(void *, mblk_t *);
80 static mblk_t *mac_vnic_txloop(void *, mblk_t *);
81 static void   mac_register_priv_prop(mac_impl_t *, mac_priv_prop_t *, uint_t);
82 static void   mac_unregister_priv_prop(mac_impl_t *);
83 
84 /*
85  * Private functions.
86  */
87 
88 /*ARGSUSED*/
89 static int
90 i_mac_constructor(void *buf, void *arg, int kmflag)
91 {
92 	mac_impl_t	*mip = buf;
93 
94 	bzero(buf, sizeof (mac_impl_t));
95 
96 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
97 
98 	rw_init(&mip->mi_state_lock, NULL, RW_DRIVER, NULL);
99 	rw_init(&mip->mi_gen_lock, NULL, RW_DRIVER, NULL);
100 	rw_init(&mip->mi_data_lock, NULL, RW_DRIVER, NULL);
101 	rw_init(&mip->mi_notify_lock, NULL, RW_DRIVER, NULL);
102 	rw_init(&mip->mi_rx_lock, NULL, RW_DRIVER, NULL);
103 	rw_init(&mip->mi_tx_lock, NULL, RW_DRIVER, NULL);
104 	rw_init(&mip->mi_resource_lock, NULL, RW_DRIVER, NULL);
105 	mutex_init(&mip->mi_activelink_lock, NULL, MUTEX_DEFAULT, NULL);
106 	mutex_init(&mip->mi_notify_bits_lock, NULL, MUTEX_DRIVER, NULL);
107 	cv_init(&mip->mi_notify_cv, NULL, CV_DRIVER, NULL);
108 	mutex_init(&mip->mi_lock, NULL, MUTEX_DRIVER, NULL);
109 	cv_init(&mip->mi_rx_cv, NULL, CV_DRIVER, NULL);
110 	return (0);
111 }
112 
113 /*ARGSUSED*/
114 static void
115 i_mac_destructor(void *buf, void *arg)
116 {
117 	mac_impl_t	*mip = buf;
118 
119 	ASSERT(mip->mi_ref == 0);
120 	ASSERT(!mip->mi_exclusive);
121 	ASSERT(mip->mi_active == 0);
122 	ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN);
123 	ASSERT(mip->mi_devpromisc == 0);
124 	ASSERT(mip->mi_promisc == 0);
125 	ASSERT(mip->mi_mmap == NULL);
126 	ASSERT(mip->mi_mmrp == NULL);
127 	ASSERT(mip->mi_mnfp == NULL);
128 	ASSERT(mip->mi_resource_add == NULL);
129 	ASSERT(mip->mi_ksp == NULL);
130 	ASSERT(mip->mi_kstat_count == 0);
131 	ASSERT(mip->mi_notify_bits == 0);
132 	ASSERT(mip->mi_notify_thread == NULL);
133 
134 	rw_destroy(&mip->mi_gen_lock);
135 	rw_destroy(&mip->mi_state_lock);
136 	rw_destroy(&mip->mi_data_lock);
137 	rw_destroy(&mip->mi_notify_lock);
138 	rw_destroy(&mip->mi_rx_lock);
139 	rw_destroy(&mip->mi_tx_lock);
140 	rw_destroy(&mip->mi_resource_lock);
141 	mutex_destroy(&mip->mi_activelink_lock);
142 	mutex_destroy(&mip->mi_notify_bits_lock);
143 	cv_destroy(&mip->mi_notify_cv);
144 	mutex_destroy(&mip->mi_lock);
145 	cv_destroy(&mip->mi_rx_cv);
146 }
147 
148 /*
149  * mac_vnic_tx_t kmem cache support functions.
150  */
151 
152 /* ARGSUSED */
153 static int
154 i_mac_vnic_tx_ctor(void *buf, void *arg, int mkflag)
155 {
156 	mac_vnic_tx_t *vnic_tx = buf;
157 
158 	bzero(buf, sizeof (mac_vnic_tx_t));
159 	mutex_init(&vnic_tx->mv_lock, NULL, MUTEX_DRIVER, NULL);
160 	cv_init(&vnic_tx->mv_cv, NULL, CV_DRIVER, NULL);
161 	return (0);
162 }
163 
164 /* ARGSUSED */
165 static void
166 i_mac_vnic_tx_dtor(void *buf, void *arg)
167 {
168 	mac_vnic_tx_t *vnic_tx = buf;
169 
170 	ASSERT(vnic_tx->mv_refs == 0);
171 	mutex_destroy(&vnic_tx->mv_lock);
172 	cv_destroy(&vnic_tx->mv_cv);
173 }
174 
175 static void
176 i_mac_notify(mac_impl_t *mip, mac_notify_type_t type)
177 {
178 	rw_enter(&i_mac_impl_lock, RW_READER);
179 	if (mip->mi_disabled)
180 		goto exit;
181 
182 	/*
183 	 * Guard against incorrect notifications.  (Running a newer
184 	 * mac client against an older implementation?)
185 	 */
186 	if (type >= MAC_NNOTE)
187 		goto exit;
188 
189 	mutex_enter(&mip->mi_notify_bits_lock);
190 	mip->mi_notify_bits |= (1 << type);
191 	cv_broadcast(&mip->mi_notify_cv);
192 	mutex_exit(&mip->mi_notify_bits_lock);
193 
194 exit:
195 	rw_exit(&i_mac_impl_lock);
196 }
197 
198 static void
199 i_mac_log_link_state(mac_impl_t *mip)
200 {
201 	/*
202 	 * If no change, then it is not interesting.
203 	 */
204 	if (mip->mi_lastlinkstate == mip->mi_linkstate)
205 		return;
206 
207 	switch (mip->mi_linkstate) {
208 	case LINK_STATE_UP:
209 		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
210 			char det[200];
211 
212 			mip->mi_type->mt_ops.mtops_link_details(det,
213 			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
214 
215 			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
216 		} else {
217 			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
218 		}
219 		break;
220 
221 	case LINK_STATE_DOWN:
222 		/*
223 		 * Only transitions from UP to DOWN are interesting
224 		 */
225 		if (mip->mi_lastlinkstate != LINK_STATE_UNKNOWN)
226 			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
227 		break;
228 
229 	case LINK_STATE_UNKNOWN:
230 		/*
231 		 * This case is normally not interesting.
232 		 */
233 		break;
234 	}
235 	mip->mi_lastlinkstate = mip->mi_linkstate;
236 }
237 
238 static void
239 i_mac_notify_thread(void *arg)
240 {
241 	mac_impl_t	*mip = arg;
242 	callb_cpr_t	cprinfo;
243 
244 	CALLB_CPR_INIT(&cprinfo, &mip->mi_notify_bits_lock, callb_generic_cpr,
245 	    "i_mac_notify_thread");
246 
247 	mutex_enter(&mip->mi_notify_bits_lock);
248 	for (;;) {
249 		uint32_t	bits;
250 		uint32_t	type;
251 
252 		bits = mip->mi_notify_bits;
253 		if (bits == 0) {
254 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
255 			cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
256 			CALLB_CPR_SAFE_END(&cprinfo, &mip->mi_notify_bits_lock);
257 			continue;
258 		}
259 		mip->mi_notify_bits = 0;
260 
261 		if ((bits & (1 << MAC_NNOTE)) != 0) {
262 			/* request to quit */
263 			ASSERT(mip->mi_disabled);
264 			break;
265 		}
266 
267 		mutex_exit(&mip->mi_notify_bits_lock);
268 
269 		/*
270 		 * Log link changes.
271 		 */
272 		if ((bits & (1 << MAC_NOTE_LINK)) != 0)
273 			i_mac_log_link_state(mip);
274 
275 		/*
276 		 * Do notification callbacks for each notification type.
277 		 */
278 		for (type = 0; type < MAC_NNOTE; type++) {
279 			mac_notify_fn_t	*mnfp;
280 
281 			if ((bits & (1 << type)) == 0) {
282 				continue;
283 			}
284 
285 			/*
286 			 * Walk the list of notifications.
287 			 */
288 			rw_enter(&mip->mi_notify_lock, RW_READER);
289 			for (mnfp = mip->mi_mnfp; mnfp != NULL;
290 			    mnfp = mnfp->mnf_nextp) {
291 
292 				mnfp->mnf_fn(mnfp->mnf_arg, type);
293 			}
294 			rw_exit(&mip->mi_notify_lock);
295 		}
296 
297 		mutex_enter(&mip->mi_notify_bits_lock);
298 	}
299 
300 	mip->mi_notify_thread = NULL;
301 	cv_broadcast(&mip->mi_notify_cv);
302 
303 	CALLB_CPR_EXIT(&cprinfo);
304 
305 	thread_exit();
306 }
307 
308 static mactype_t *
309 i_mactype_getplugin(const char *pname)
310 {
311 	mactype_t	*mtype = NULL;
312 	boolean_t	tried_modload = B_FALSE;
313 
314 	mutex_enter(&i_mactype_lock);
315 
316 find_registered_mactype:
317 	if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname,
318 	    (mod_hash_val_t *)&mtype) != 0) {
319 		if (!tried_modload) {
320 			/*
321 			 * If the plugin has not yet been loaded, then
322 			 * attempt to load it now.  If modload() succeeds,
323 			 * the plugin should have registered using
324 			 * mactype_register(), in which case we can go back
325 			 * and attempt to find it again.
326 			 */
327 			if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) {
328 				tried_modload = B_TRUE;
329 				goto find_registered_mactype;
330 			}
331 		}
332 	} else {
333 		/*
334 		 * Note that there's no danger that the plugin we've loaded
335 		 * could be unloaded between the modload() step and the
336 		 * reference count bump here, as we're holding
337 		 * i_mactype_lock, which mactype_unregister() also holds.
338 		 */
339 		atomic_inc_32(&mtype->mt_ref);
340 	}
341 
342 	mutex_exit(&i_mactype_lock);
343 	return (mtype);
344 }
345 
346 /*
347  * Module initialization functions.
348  */
349 
350 void
351 mac_init(void)
352 {
353 	i_mac_impl_cachep = kmem_cache_create("mac_impl_cache",
354 	    sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor,
355 	    NULL, NULL, NULL, 0);
356 	ASSERT(i_mac_impl_cachep != NULL);
357 
358 	mac_vnic_tx_cache = kmem_cache_create("mac_vnic_tx_cache",
359 	    sizeof (mac_vnic_tx_t), 0, i_mac_vnic_tx_ctor, i_mac_vnic_tx_dtor,
360 	    NULL, NULL, NULL, 0);
361 	ASSERT(mac_vnic_tx_cache != NULL);
362 
363 	i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash",
364 	    IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
365 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
366 	rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL);
367 	i_mac_impl_count = 0;
368 
369 	i_mactype_hash = mod_hash_create_extended("mactype_hash",
370 	    MACTYPE_HASHSZ,
371 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
372 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
373 
374 	/*
375 	 * Allocate an id space to manage minor numbers. The range of the
376 	 * space will be from MAC_MAX_MINOR+1 to MAXMIN32 (maximum legal
377 	 * minor number is MAXMIN, but id_t is type of integer and does not
378 	 * allow MAXMIN).
379 	 */
380 	minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, MAXMIN32);
381 	ASSERT(minor_ids != NULL);
382 	minor_count = 0;
383 }
384 
385 int
386 mac_fini(void)
387 {
388 	if (i_mac_impl_count > 0 || minor_count > 0)
389 		return (EBUSY);
390 
391 	id_space_destroy(minor_ids);
392 
393 	mod_hash_destroy_hash(i_mac_impl_hash);
394 	rw_destroy(&i_mac_impl_lock);
395 
396 	kmem_cache_destroy(i_mac_impl_cachep);
397 	kmem_cache_destroy(mac_vnic_tx_cache);
398 
399 	mod_hash_destroy_hash(i_mactype_hash);
400 	return (0);
401 }
402 
403 /*
404  * Client functions.
405  */
406 
407 static int
408 mac_hold(const char *macname, mac_impl_t **pmip)
409 {
410 	mac_impl_t	*mip;
411 	int		err;
412 
413 	/*
414 	 * Check the device name length to make sure it won't overflow our
415 	 * buffer.
416 	 */
417 	if (strlen(macname) >= MAXNAMELEN)
418 		return (EINVAL);
419 
420 	/*
421 	 * Look up its entry in the global hash table.
422 	 */
423 	rw_enter(&i_mac_impl_lock, RW_WRITER);
424 	err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname,
425 	    (mod_hash_val_t *)&mip);
426 
427 	if (err != 0) {
428 		rw_exit(&i_mac_impl_lock);
429 		return (ENOENT);
430 	}
431 
432 	if (mip->mi_disabled) {
433 		rw_exit(&i_mac_impl_lock);
434 		return (ENOENT);
435 	}
436 
437 	if (mip->mi_exclusive) {
438 		rw_exit(&i_mac_impl_lock);
439 		return (EBUSY);
440 	}
441 
442 	mip->mi_ref++;
443 	rw_exit(&i_mac_impl_lock);
444 
445 	*pmip = mip;
446 	return (0);
447 }
448 
449 static void
450 mac_rele(mac_impl_t *mip)
451 {
452 	rw_enter(&i_mac_impl_lock, RW_WRITER);
453 	ASSERT(mip->mi_ref != 0);
454 	if (--mip->mi_ref == 0)
455 		ASSERT(!mip->mi_activelink);
456 	rw_exit(&i_mac_impl_lock);
457 }
458 
459 int
460 mac_hold_exclusive(mac_handle_t mh)
461 {
462 	mac_impl_t	*mip = (mac_impl_t *)mh;
463 
464 	/*
465 	 * Look up its entry in the global hash table.
466 	 */
467 	rw_enter(&i_mac_impl_lock, RW_WRITER);
468 	if (mip->mi_disabled) {
469 		rw_exit(&i_mac_impl_lock);
470 		return (ENOENT);
471 	}
472 
473 	if (mip->mi_ref != 0) {
474 		rw_exit(&i_mac_impl_lock);
475 		return (EBUSY);
476 	}
477 
478 	ASSERT(!mip->mi_exclusive);
479 
480 	mip->mi_ref++;
481 	mip->mi_exclusive = B_TRUE;
482 	rw_exit(&i_mac_impl_lock);
483 	return (0);
484 }
485 
486 void
487 mac_rele_exclusive(mac_handle_t mh)
488 {
489 	mac_impl_t	*mip = (mac_impl_t *)mh;
490 
491 	/*
492 	 * Look up its entry in the global hash table.
493 	 */
494 	rw_enter(&i_mac_impl_lock, RW_WRITER);
495 	ASSERT(mip->mi_ref == 1 && mip->mi_exclusive);
496 	mip->mi_ref--;
497 	mip->mi_exclusive = B_FALSE;
498 	rw_exit(&i_mac_impl_lock);
499 }
500 
501 int
502 mac_open(const char *macname, mac_handle_t *mhp)
503 {
504 	mac_impl_t	*mip;
505 	int		err;
506 
507 	/*
508 	 * Look up its entry in the global hash table.
509 	 */
510 	if ((err = mac_hold(macname, &mip)) != 0)
511 		return (err);
512 
513 	/*
514 	 * Hold the dip associated to the MAC to prevent it from being
515 	 * detached. For a softmac, its underlying dip is held by the
516 	 * mi_open() callback.
517 	 *
518 	 * This is done to be more tolerant with some defective drivers,
519 	 * which incorrectly handle mac_unregister() failure in their
520 	 * xxx_detach() routine. For example, some drivers ignore the
521 	 * failure of mac_unregister() and free all resources that
522 	 * that are needed for data transmition.
523 	 */
524 	e_ddi_hold_devi(mip->mi_dip);
525 
526 	rw_enter(&mip->mi_gen_lock, RW_WRITER);
527 
528 	if ((mip->mi_oref != 0) ||
529 	    !(mip->mi_callbacks->mc_callbacks & MC_OPEN)) {
530 		goto done;
531 	}
532 
533 	/*
534 	 * Note that we do not hold i_mac_impl_lock when calling the
535 	 * mc_open() callback function to avoid deadlock with the
536 	 * i_mac_notify() function.
537 	 */
538 	if ((err = mip->mi_open(mip->mi_driver)) != 0) {
539 		rw_exit(&mip->mi_gen_lock);
540 		ddi_release_devi(mip->mi_dip);
541 		mac_rele(mip);
542 		return (err);
543 	}
544 
545 done:
546 	mip->mi_oref++;
547 	rw_exit(&mip->mi_gen_lock);
548 	*mhp = (mac_handle_t)mip;
549 	return (0);
550 }
551 
552 int
553 mac_open_by_linkid(datalink_id_t linkid, mac_handle_t *mhp)
554 {
555 	dls_dl_handle_t	dlh;
556 	int		err;
557 
558 	if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
559 		return (err);
560 
561 	if (dls_devnet_vid(dlh) != VLAN_ID_NONE) {
562 		err = EINVAL;
563 		goto done;
564 	}
565 
566 	dls_devnet_prop_task_wait(dlh);
567 
568 	err = mac_open(dls_devnet_mac(dlh), mhp);
569 
570 done:
571 	dls_devnet_rele_tmp(dlh);
572 	return (err);
573 }
574 
575 int
576 mac_open_by_linkname(const char *link, mac_handle_t *mhp)
577 {
578 	datalink_id_t	linkid;
579 	int		err;
580 
581 	if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0)
582 		return (err);
583 	return (mac_open_by_linkid(linkid, mhp));
584 }
585 
586 void
587 mac_close(mac_handle_t mh)
588 {
589 	mac_impl_t	*mip = (mac_impl_t *)mh;
590 
591 	rw_enter(&mip->mi_gen_lock, RW_WRITER);
592 
593 	ASSERT(mip->mi_oref != 0);
594 	if (--mip->mi_oref == 0) {
595 		if ((mip->mi_callbacks->mc_callbacks & MC_CLOSE))
596 			mip->mi_close(mip->mi_driver);
597 	}
598 	rw_exit(&mip->mi_gen_lock);
599 
600 	ddi_release_devi(mip->mi_dip);
601 	mac_rele(mip);
602 }
603 
604 const mac_info_t *
605 mac_info(mac_handle_t mh)
606 {
607 	return (&((mac_impl_t *)mh)->mi_info);
608 }
609 
610 dev_info_t *
611 mac_devinfo_get(mac_handle_t mh)
612 {
613 	return (((mac_impl_t *)mh)->mi_dip);
614 }
615 
616 const char *
617 mac_name(mac_handle_t mh)
618 {
619 	return (((mac_impl_t *)mh)->mi_name);
620 }
621 
622 minor_t
623 mac_minor(mac_handle_t mh)
624 {
625 	return (((mac_impl_t *)mh)->mi_minor);
626 }
627 
628 uint64_t
629 mac_stat_get(mac_handle_t mh, uint_t stat)
630 {
631 	mac_impl_t	*mip = (mac_impl_t *)mh;
632 	uint64_t	val;
633 	int		ret;
634 
635 	/*
636 	 * The range of stat determines where it is maintained.  Stat
637 	 * values from 0 up to (but not including) MAC_STAT_MIN are
638 	 * mainteined by the mac module itself.  Everything else is
639 	 * maintained by the driver.
640 	 */
641 	if (stat < MAC_STAT_MIN) {
642 		/* These stats are maintained by the mac module itself. */
643 		switch (stat) {
644 		case MAC_STAT_LINK_STATE:
645 			return (mip->mi_linkstate);
646 		case MAC_STAT_LINK_UP:
647 			return (mip->mi_linkstate == LINK_STATE_UP);
648 		case MAC_STAT_PROMISC:
649 			return (mip->mi_devpromisc != 0);
650 		default:
651 			ASSERT(B_FALSE);
652 		}
653 	}
654 
655 	/*
656 	 * Call the driver to get the given statistic.
657 	 */
658 	ret = mip->mi_getstat(mip->mi_driver, stat, &val);
659 	if (ret != 0) {
660 		/*
661 		 * The driver doesn't support this statistic.  Get the
662 		 * statistic's default value.
663 		 */
664 		val = mac_stat_default(mip, stat);
665 	}
666 	return (val);
667 }
668 
669 int
670 mac_start(mac_handle_t mh)
671 {
672 	mac_impl_t	*mip = (mac_impl_t *)mh;
673 	int		err;
674 
675 	ASSERT(mip->mi_start != NULL);
676 
677 	rw_enter(&(mip->mi_state_lock), RW_WRITER);
678 
679 	/*
680 	 * Check whether the device is already started.
681 	 */
682 	if (mip->mi_active++ != 0) {
683 		/*
684 		 * It's already started so there's nothing more to do.
685 		 */
686 		err = 0;
687 		goto done;
688 	}
689 
690 	/*
691 	 * Start the device.
692 	 */
693 	if ((err = mip->mi_start(mip->mi_driver)) != 0)
694 		--mip->mi_active;
695 
696 done:
697 	rw_exit(&(mip->mi_state_lock));
698 	return (err);
699 }
700 
701 void
702 mac_stop(mac_handle_t mh)
703 {
704 	mac_impl_t	*mip = (mac_impl_t *)mh;
705 
706 	ASSERT(mip->mi_stop != NULL);
707 
708 	rw_enter(&(mip->mi_state_lock), RW_WRITER);
709 
710 	/*
711 	 * Check whether the device is still needed.
712 	 */
713 	ASSERT(mip->mi_active != 0);
714 	if (--mip->mi_active != 0) {
715 		/*
716 		 * It's still needed so there's nothing more to do.
717 		 */
718 		goto done;
719 	}
720 
721 	/*
722 	 * Stop the device.
723 	 */
724 	mip->mi_stop(mip->mi_driver);
725 
726 done:
727 	rw_exit(&(mip->mi_state_lock));
728 }
729 
730 int
731 mac_multicst_add(mac_handle_t mh, const uint8_t *addr)
732 {
733 	mac_impl_t		*mip = (mac_impl_t *)mh;
734 	mac_multicst_addr_t	**pp;
735 	mac_multicst_addr_t	*p;
736 	int			err;
737 
738 	ASSERT(mip->mi_multicst != NULL);
739 
740 	/*
741 	 * Verify the address.
742 	 */
743 	if ((err = mip->mi_type->mt_ops.mtops_multicst_verify(addr,
744 	    mip->mi_pdata)) != 0) {
745 		return (err);
746 	}
747 
748 	/*
749 	 * Check whether the given address is already enabled.
750 	 */
751 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
752 	for (pp = &(mip->mi_mmap); (p = *pp) != NULL; pp = &(p->mma_nextp)) {
753 		if (bcmp(p->mma_addr, addr, mip->mi_type->mt_addr_length) ==
754 		    0) {
755 			/*
756 			 * The address is already enabled so just bump the
757 			 * reference count.
758 			 */
759 			p->mma_ref++;
760 			err = 0;
761 			goto done;
762 		}
763 	}
764 
765 	/*
766 	 * Allocate a new list entry.
767 	 */
768 	if ((p = kmem_zalloc(sizeof (mac_multicst_addr_t),
769 	    KM_NOSLEEP)) == NULL) {
770 		err = ENOMEM;
771 		goto done;
772 	}
773 
774 	/*
775 	 * Enable a new multicast address.
776 	 */
777 	if ((err = mip->mi_multicst(mip->mi_driver, B_TRUE, addr)) != 0) {
778 		kmem_free(p, sizeof (mac_multicst_addr_t));
779 		goto done;
780 	}
781 
782 	/*
783 	 * Add the address to the list of enabled addresses.
784 	 */
785 	bcopy(addr, p->mma_addr, mip->mi_type->mt_addr_length);
786 	p->mma_ref++;
787 	*pp = p;
788 
789 done:
790 	rw_exit(&(mip->mi_data_lock));
791 	return (err);
792 }
793 
794 int
795 mac_multicst_remove(mac_handle_t mh, const uint8_t *addr)
796 {
797 	mac_impl_t		*mip = (mac_impl_t *)mh;
798 	mac_multicst_addr_t	**pp;
799 	mac_multicst_addr_t	*p;
800 	int			err;
801 
802 	ASSERT(mip->mi_multicst != NULL);
803 
804 	/*
805 	 * Find the entry in the list for the given address.
806 	 */
807 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
808 	for (pp = &(mip->mi_mmap); (p = *pp) != NULL; pp = &(p->mma_nextp)) {
809 		if (bcmp(p->mma_addr, addr, mip->mi_type->mt_addr_length) ==
810 		    0) {
811 			if (--p->mma_ref == 0)
812 				break;
813 
814 			/*
815 			 * There is still a reference to this address so
816 			 * there's nothing more to do.
817 			 */
818 			err = 0;
819 			goto done;
820 		}
821 	}
822 
823 	/*
824 	 * We did not find an entry for the given address so it is not
825 	 * currently enabled.
826 	 */
827 	if (p == NULL) {
828 		err = ENOENT;
829 		goto done;
830 	}
831 	ASSERT(p->mma_ref == 0);
832 
833 	/*
834 	 * Disable the multicast address.
835 	 */
836 	if ((err = mip->mi_multicst(mip->mi_driver, B_FALSE, addr)) != 0) {
837 		p->mma_ref++;
838 		goto done;
839 	}
840 
841 	/*
842 	 * Remove it from the list.
843 	 */
844 	*pp = p->mma_nextp;
845 	kmem_free(p, sizeof (mac_multicst_addr_t));
846 
847 done:
848 	rw_exit(&(mip->mi_data_lock));
849 	return (err);
850 }
851 
852 /*
853  * mac_unicst_verify: Verifies the passed address. It fails
854  * if the passed address is a group address or has incorrect length.
855  */
856 boolean_t
857 mac_unicst_verify(mac_handle_t mh, const uint8_t *addr, uint_t len)
858 {
859 	mac_impl_t	*mip = (mac_impl_t *)mh;
860 
861 	/*
862 	 * Verify the address.
863 	 */
864 	if ((len != mip->mi_type->mt_addr_length) ||
865 	    (mip->mi_type->mt_ops.mtops_unicst_verify(addr,
866 	    mip->mi_pdata)) != 0) {
867 		return (B_FALSE);
868 	} else {
869 		return (B_TRUE);
870 	}
871 }
872 
873 int
874 mac_unicst_set(mac_handle_t mh, const uint8_t *addr)
875 {
876 	mac_impl_t	*mip = (mac_impl_t *)mh;
877 	int		err;
878 	boolean_t	notify = B_FALSE;
879 
880 	ASSERT(mip->mi_unicst != NULL);
881 
882 	/*
883 	 * Verify the address.
884 	 */
885 	if ((err = mip->mi_type->mt_ops.mtops_unicst_verify(addr,
886 	    mip->mi_pdata)) != 0) {
887 		return (err);
888 	}
889 
890 	/*
891 	 * Program the new unicast address.
892 	 */
893 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
894 
895 	/*
896 	 * If address doesn't change, do nothing.
897 	 * This check is necessary otherwise it may call into mac_unicst_set
898 	 * recursively.
899 	 */
900 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0)
901 		goto done;
902 
903 	if ((err = mip->mi_unicst(mip->mi_driver, addr)) != 0)
904 		goto done;
905 
906 	/*
907 	 * Save the address and flag that we need to send a notification.
908 	 */
909 	bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length);
910 	notify = B_TRUE;
911 
912 done:
913 	rw_exit(&(mip->mi_data_lock));
914 
915 	if (notify)
916 		i_mac_notify(mip, MAC_NOTE_UNICST);
917 
918 	return (err);
919 }
920 
921 void
922 mac_unicst_get(mac_handle_t mh, uint8_t *addr)
923 {
924 	mac_impl_t	*mip = (mac_impl_t *)mh;
925 
926 	/*
927 	 * Copy out the current unicast source address.
928 	 */
929 	rw_enter(&(mip->mi_data_lock), RW_READER);
930 	bcopy(mip->mi_addr, addr, mip->mi_type->mt_addr_length);
931 	rw_exit(&(mip->mi_data_lock));
932 }
933 
934 void
935 mac_dest_get(mac_handle_t mh, uint8_t *addr)
936 {
937 	mac_impl_t	*mip = (mac_impl_t *)mh;
938 
939 	/*
940 	 * Copy out the current destination address.
941 	 */
942 	rw_enter(&(mip->mi_data_lock), RW_READER);
943 	bcopy(mip->mi_dstaddr, addr, mip->mi_type->mt_addr_length);
944 	rw_exit(&(mip->mi_data_lock));
945 }
946 
947 int
948 mac_promisc_set(mac_handle_t mh, boolean_t on, mac_promisc_type_t ptype)
949 {
950 	mac_impl_t	*mip = (mac_impl_t *)mh;
951 	int		err = 0;
952 
953 	ASSERT(mip->mi_setpromisc != NULL);
954 	ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC);
955 
956 	/*
957 	 * Determine whether we should enable or disable promiscuous mode.
958 	 * For details on the distinction between "device promiscuous mode"
959 	 * and "MAC promiscuous mode", see PSARC/2005/289.
960 	 */
961 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
962 	if (on) {
963 		/*
964 		 * Enable promiscuous mode on the device if not yet enabled.
965 		 */
966 		if (mip->mi_devpromisc++ == 0) {
967 			err = mip->mi_setpromisc(mip->mi_driver, B_TRUE);
968 			if (err != 0) {
969 				mip->mi_devpromisc--;
970 				goto done;
971 			}
972 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
973 		}
974 
975 		/*
976 		 * Enable promiscuous mode on the MAC if not yet enabled.
977 		 */
978 		if (ptype == MAC_PROMISC && mip->mi_promisc++ == 0)
979 			i_mac_notify(mip, MAC_NOTE_PROMISC);
980 	} else {
981 		if (mip->mi_devpromisc == 0) {
982 			err = EPROTO;
983 			goto done;
984 		}
985 		/*
986 		 * Disable promiscuous mode on the device if this is the last
987 		 * enabling.
988 		 */
989 		if (--mip->mi_devpromisc == 0) {
990 			err = mip->mi_setpromisc(mip->mi_driver, B_FALSE);
991 			if (err != 0) {
992 				mip->mi_devpromisc++;
993 				goto done;
994 			}
995 			i_mac_notify(mip, MAC_NOTE_DEVPROMISC);
996 		}
997 
998 		/*
999 		 * Disable promiscuous mode on the MAC if this is the last
1000 		 * enabling.
1001 		 */
1002 		if (ptype == MAC_PROMISC && --mip->mi_promisc == 0)
1003 			i_mac_notify(mip, MAC_NOTE_PROMISC);
1004 	}
1005 
1006 done:
1007 	rw_exit(&(mip->mi_data_lock));
1008 	return (err);
1009 }
1010 
1011 boolean_t
1012 mac_promisc_get(mac_handle_t mh, mac_promisc_type_t ptype)
1013 {
1014 	mac_impl_t		*mip = (mac_impl_t *)mh;
1015 
1016 	ASSERT(ptype == MAC_DEVPROMISC || ptype == MAC_PROMISC);
1017 
1018 	/*
1019 	 * Return the current promiscuity.
1020 	 */
1021 	if (ptype == MAC_DEVPROMISC)
1022 		return (mip->mi_devpromisc != 0);
1023 	else
1024 		return (mip->mi_promisc != 0);
1025 }
1026 
1027 void
1028 mac_sdu_get(mac_handle_t mh, uint_t *min_sdu, uint_t *max_sdu)
1029 {
1030 	mac_impl_t	*mip = (mac_impl_t *)mh;
1031 
1032 	if (min_sdu != NULL)
1033 		*min_sdu = mip->mi_sdu_min;
1034 	if (max_sdu != NULL)
1035 		*max_sdu = mip->mi_sdu_max;
1036 }
1037 
1038 void
1039 mac_resources(mac_handle_t mh)
1040 {
1041 	mac_impl_t	*mip = (mac_impl_t *)mh;
1042 
1043 	/*
1044 	 * If the driver supports resource registration, call the driver to
1045 	 * ask it to register its resources.
1046 	 */
1047 	if (mip->mi_callbacks->mc_callbacks & MC_RESOURCES)
1048 		mip->mi_resources(mip->mi_driver);
1049 }
1050 
1051 void
1052 mac_ioctl(mac_handle_t mh, queue_t *wq, mblk_t *bp)
1053 {
1054 	mac_impl_t	*mip = (mac_impl_t *)mh;
1055 	int cmd = ((struct iocblk *)bp->b_rptr)->ioc_cmd;
1056 
1057 	if ((cmd == ND_GET && (mip->mi_callbacks->mc_callbacks & MC_GETPROP)) ||
1058 	    (cmd == ND_SET && (mip->mi_callbacks->mc_callbacks & MC_SETPROP))) {
1059 		/*
1060 		 * If ndd props were registered, call them.
1061 		 * Note that ndd ioctls are Obsolete
1062 		 */
1063 		mac_ndd_ioctl(mip, wq, bp);
1064 		return;
1065 	}
1066 
1067 	/*
1068 	 * Call the driver to handle the ioctl.  The driver may not support
1069 	 * any ioctls, in which case we reply with a NAK on its behalf.
1070 	 */
1071 	if (mip->mi_callbacks->mc_callbacks & MC_IOCTL)
1072 		mip->mi_ioctl(mip->mi_driver, wq, bp);
1073 	else
1074 		miocnak(wq, bp, 0, EINVAL);
1075 }
1076 
1077 const mac_txinfo_t *
1078 mac_do_tx_get(mac_handle_t mh, boolean_t is_vnic)
1079 {
1080 	mac_impl_t	*mip = (mac_impl_t *)mh;
1081 	mac_txinfo_t	*mtp;
1082 
1083 	/*
1084 	 * Grab the lock to prevent us from racing with MAC_PROMISC being
1085 	 * changed.  This is sufficient since MAC clients are careful to always
1086 	 * call mac_txloop_add() prior to enabling MAC_PROMISC, and to disable
1087 	 * MAC_PROMISC prior to calling mac_txloop_remove().
1088 	 */
1089 	rw_enter(&mip->mi_tx_lock, RW_READER);
1090 
1091 	if (mac_promisc_get(mh, MAC_PROMISC)) {
1092 		ASSERT(mip->mi_mtfp != NULL);
1093 		if (mip->mi_vnic_present && !is_vnic) {
1094 			mtp = &mip->mi_vnic_txloopinfo;
1095 		} else {
1096 			mtp = &mip->mi_txloopinfo;
1097 		}
1098 	} else {
1099 		if (mip->mi_vnic_present && !is_vnic) {
1100 			mtp = &mip->mi_vnic_txinfo;
1101 		} else {
1102 			/*
1103 			 * Note that we cannot ASSERT() that mip->mi_mtfp is
1104 			 * NULL, because to satisfy the above ASSERT(), we
1105 			 * have to disable MAC_PROMISC prior to calling
1106 			 * mac_txloop_remove().
1107 			 */
1108 			mtp = &mip->mi_txinfo;
1109 		}
1110 	}
1111 
1112 	rw_exit(&mip->mi_tx_lock);
1113 	return (mtp);
1114 }
1115 
1116 /*
1117  * Invoked by VNIC to obtain the transmit entry point.
1118  */
1119 const mac_txinfo_t *
1120 mac_vnic_tx_get(mac_handle_t mh)
1121 {
1122 	return (mac_do_tx_get(mh, B_TRUE));
1123 }
1124 
1125 /*
1126  * Invoked by any non-VNIC client to obtain the transmit entry point.
1127  * If a VNIC is present, the VNIC transmit function provided by the VNIC
1128  * will be returned to the MAC client.
1129  */
1130 const mac_txinfo_t *
1131 mac_tx_get(mac_handle_t mh)
1132 {
1133 	return (mac_do_tx_get(mh, B_FALSE));
1134 }
1135 
1136 link_state_t
1137 mac_link_get(mac_handle_t mh)
1138 {
1139 	return (((mac_impl_t *)mh)->mi_linkstate);
1140 }
1141 
1142 mac_notify_handle_t
1143 mac_notify_add(mac_handle_t mh, mac_notify_t notify, void *arg)
1144 {
1145 	mac_impl_t		*mip = (mac_impl_t *)mh;
1146 	mac_notify_fn_t		*mnfp;
1147 
1148 	mnfp = kmem_zalloc(sizeof (mac_notify_fn_t), KM_SLEEP);
1149 	mnfp->mnf_fn = notify;
1150 	mnfp->mnf_arg = arg;
1151 
1152 	/*
1153 	 * Add it to the head of the 'notify' callback list.
1154 	 */
1155 	rw_enter(&mip->mi_notify_lock, RW_WRITER);
1156 	mnfp->mnf_nextp = mip->mi_mnfp;
1157 	mip->mi_mnfp = mnfp;
1158 	rw_exit(&mip->mi_notify_lock);
1159 
1160 	return ((mac_notify_handle_t)mnfp);
1161 }
1162 
1163 void
1164 mac_notify_remove(mac_handle_t mh, mac_notify_handle_t mnh)
1165 {
1166 	mac_impl_t		*mip = (mac_impl_t *)mh;
1167 	mac_notify_fn_t		*mnfp = (mac_notify_fn_t *)mnh;
1168 	mac_notify_fn_t		**pp;
1169 	mac_notify_fn_t		*p;
1170 
1171 	/*
1172 	 * Search the 'notify' callback list for the function closure.
1173 	 */
1174 	rw_enter(&mip->mi_notify_lock, RW_WRITER);
1175 	for (pp = &(mip->mi_mnfp); (p = *pp) != NULL;
1176 	    pp = &(p->mnf_nextp)) {
1177 		if (p == mnfp)
1178 			break;
1179 	}
1180 	ASSERT(p != NULL);
1181 
1182 	/*
1183 	 * Remove it from the list.
1184 	 */
1185 	*pp = p->mnf_nextp;
1186 	rw_exit(&mip->mi_notify_lock);
1187 
1188 	/*
1189 	 * Free it.
1190 	 */
1191 	kmem_free(mnfp, sizeof (mac_notify_fn_t));
1192 }
1193 
1194 void
1195 mac_notify(mac_handle_t mh)
1196 {
1197 	mac_impl_t		*mip = (mac_impl_t *)mh;
1198 	mac_notify_type_t	type;
1199 
1200 	for (type = 0; type < MAC_NNOTE; type++)
1201 		i_mac_notify(mip, type);
1202 }
1203 
1204 /*
1205  * Register a receive function for this mac.
1206  * More information on this function's interaction with mac_rx()
1207  * can be found atop mac_rx().
1208  */
1209 mac_rx_handle_t
1210 mac_do_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg, boolean_t is_active)
1211 {
1212 	mac_impl_t	*mip = (mac_impl_t *)mh;
1213 	mac_rx_fn_t	*mrfp;
1214 
1215 	mrfp = kmem_zalloc(sizeof (mac_rx_fn_t), KM_SLEEP);
1216 	mrfp->mrf_fn = rx;
1217 	mrfp->mrf_arg = arg;
1218 	mrfp->mrf_active = is_active;
1219 
1220 	/*
1221 	 * Add it to the head of the 'rx' callback list.
1222 	 */
1223 	rw_enter(&(mip->mi_rx_lock), RW_WRITER);
1224 
1225 	/*
1226 	 * mac_rx() will only call callbacks that are marked inuse.
1227 	 */
1228 	mrfp->mrf_inuse = B_TRUE;
1229 	mrfp->mrf_nextp = mip->mi_mrfp;
1230 
1231 	/*
1232 	 * mac_rx() could be traversing the remainder of the list
1233 	 * and miss the new callback we're adding here. This is not a problem
1234 	 * because we do not guarantee the callback to take effect immediately
1235 	 * after mac_rx_add() returns.
1236 	 */
1237 	mip->mi_mrfp = mrfp;
1238 	rw_exit(&(mip->mi_rx_lock));
1239 
1240 	return ((mac_rx_handle_t)mrfp);
1241 }
1242 
1243 mac_rx_handle_t
1244 mac_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg)
1245 {
1246 	return (mac_do_rx_add(mh, rx, arg, B_FALSE));
1247 }
1248 
1249 mac_rx_handle_t
1250 mac_active_rx_add(mac_handle_t mh, mac_rx_t rx, void *arg)
1251 {
1252 	return (mac_do_rx_add(mh, rx, arg, B_TRUE));
1253 }
1254 
1255 /*
1256  * Unregister a receive function for this mac.
1257  * This function does not block if wait is B_FALSE. This is useful
1258  * for clients who call mac_rx_remove() from a non-blockable context.
1259  * More information on this function's interaction with mac_rx()
1260  * can be found atop mac_rx().
1261  */
1262 void
1263 mac_rx_remove(mac_handle_t mh, mac_rx_handle_t mrh, boolean_t wait)
1264 {
1265 	mac_impl_t		*mip = (mac_impl_t *)mh;
1266 	mac_rx_fn_t		*mrfp = (mac_rx_fn_t *)mrh;
1267 	mac_rx_fn_t		**pp;
1268 	mac_rx_fn_t		*p;
1269 
1270 	/*
1271 	 * Search the 'rx' callback list for the function closure.
1272 	 */
1273 	rw_enter(&mip->mi_rx_lock, RW_WRITER);
1274 	for (pp = &(mip->mi_mrfp); (p = *pp) != NULL; pp = &(p->mrf_nextp)) {
1275 		if (p == mrfp)
1276 			break;
1277 	}
1278 	ASSERT(p != NULL);
1279 
1280 	/*
1281 	 * If mac_rx() is running, mark callback for deletion
1282 	 * and return (if wait is false), or wait until mac_rx()
1283 	 * exits (if wait is true).
1284 	 */
1285 	if (mip->mi_rx_ref > 0) {
1286 		DTRACE_PROBE1(defer_delete, mac_impl_t *, mip);
1287 		p->mrf_inuse = B_FALSE;
1288 		mutex_enter(&mip->mi_lock);
1289 		mip->mi_rx_removed++;
1290 		mutex_exit(&mip->mi_lock);
1291 
1292 		rw_exit(&mip->mi_rx_lock);
1293 		if (wait)
1294 			mac_rx_remove_wait(mh);
1295 		return;
1296 	}
1297 
1298 	/* Remove it from the list. */
1299 	*pp = p->mrf_nextp;
1300 	kmem_free(mrfp, sizeof (mac_rx_fn_t));
1301 	rw_exit(&mip->mi_rx_lock);
1302 }
1303 
1304 /*
1305  * Wait for all pending callback removals to be completed by mac_rx().
1306  * Note that if we call mac_rx_remove() immediately before this, there is no
1307  * guarantee we would wait *only* on the callback that we specified.
1308  * mac_rx_remove() could have been called by other threads and we would have
1309  * to wait for other marked callbacks to be removed as well.
1310  */
1311 void
1312 mac_rx_remove_wait(mac_handle_t mh)
1313 {
1314 	mac_impl_t	*mip = (mac_impl_t *)mh;
1315 
1316 	mutex_enter(&mip->mi_lock);
1317 	while (mip->mi_rx_removed > 0) {
1318 		DTRACE_PROBE1(need_wait, mac_impl_t *, mip);
1319 		cv_wait(&mip->mi_rx_cv, &mip->mi_lock);
1320 	}
1321 	mutex_exit(&mip->mi_lock);
1322 }
1323 
1324 mac_txloop_handle_t
1325 mac_txloop_add(mac_handle_t mh, mac_txloop_t tx, void *arg)
1326 {
1327 	mac_impl_t	*mip = (mac_impl_t *)mh;
1328 	mac_txloop_fn_t	*mtfp;
1329 
1330 	mtfp = kmem_zalloc(sizeof (mac_txloop_fn_t), KM_SLEEP);
1331 	mtfp->mtf_fn = tx;
1332 	mtfp->mtf_arg = arg;
1333 
1334 	/*
1335 	 * Add it to the head of the 'tx' callback list.
1336 	 */
1337 	rw_enter(&(mip->mi_tx_lock), RW_WRITER);
1338 	mtfp->mtf_nextp = mip->mi_mtfp;
1339 	mip->mi_mtfp = mtfp;
1340 	rw_exit(&(mip->mi_tx_lock));
1341 
1342 	return ((mac_txloop_handle_t)mtfp);
1343 }
1344 
1345 /*
1346  * Unregister a transmit function for this mac.  This removes the function
1347  * from the list of transmit functions for this mac.
1348  */
1349 void
1350 mac_txloop_remove(mac_handle_t mh, mac_txloop_handle_t mth)
1351 {
1352 	mac_impl_t		*mip = (mac_impl_t *)mh;
1353 	mac_txloop_fn_t		*mtfp = (mac_txloop_fn_t *)mth;
1354 	mac_txloop_fn_t		**pp;
1355 	mac_txloop_fn_t		*p;
1356 
1357 	/*
1358 	 * Search the 'tx' callback list for the function.
1359 	 */
1360 	rw_enter(&(mip->mi_tx_lock), RW_WRITER);
1361 	for (pp = &(mip->mi_mtfp); (p = *pp) != NULL; pp = &(p->mtf_nextp)) {
1362 		if (p == mtfp)
1363 			break;
1364 	}
1365 	ASSERT(p != NULL);
1366 
1367 	/* Remove it from the list. */
1368 	*pp = p->mtf_nextp;
1369 	kmem_free(mtfp, sizeof (mac_txloop_fn_t));
1370 	rw_exit(&(mip->mi_tx_lock));
1371 }
1372 
1373 void
1374 mac_resource_set(mac_handle_t mh, mac_resource_add_t add, void *arg)
1375 {
1376 	mac_impl_t		*mip = (mac_impl_t *)mh;
1377 
1378 	/*
1379 	 * Update the 'resource_add' callbacks.
1380 	 */
1381 	rw_enter(&(mip->mi_resource_lock), RW_WRITER);
1382 	mip->mi_resource_add = add;
1383 	mip->mi_resource_add_arg = arg;
1384 	rw_exit(&(mip->mi_resource_lock));
1385 }
1386 
1387 /*
1388  * Driver support functions.
1389  */
1390 
1391 mac_register_t *
1392 mac_alloc(uint_t mac_version)
1393 {
1394 	mac_register_t *mregp;
1395 
1396 	/*
1397 	 * Make sure there isn't a version mismatch between the driver and
1398 	 * the framework.  In the future, if multiple versions are
1399 	 * supported, this check could become more sophisticated.
1400 	 */
1401 	if (mac_version != MAC_VERSION)
1402 		return (NULL);
1403 
1404 	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
1405 	mregp->m_version = mac_version;
1406 	return (mregp);
1407 }
1408 
1409 void
1410 mac_free(mac_register_t *mregp)
1411 {
1412 	kmem_free(mregp, sizeof (mac_register_t));
1413 }
1414 
1415 /*
1416  * Allocate a minor number.
1417  */
1418 minor_t
1419 mac_minor_hold(boolean_t sleep)
1420 {
1421 	minor_t	minor;
1422 
1423 	/*
1424 	 * Grab a value from the arena.
1425 	 */
1426 	atomic_add_32(&minor_count, 1);
1427 
1428 	if (sleep)
1429 		minor = (uint_t)id_alloc(minor_ids);
1430 	else
1431 		minor = (uint_t)id_alloc_nosleep(minor_ids);
1432 
1433 	if (minor == 0) {
1434 		atomic_add_32(&minor_count, -1);
1435 		return (0);
1436 	}
1437 
1438 	return (minor);
1439 }
1440 
1441 /*
1442  * Release a previously allocated minor number.
1443  */
1444 void
1445 mac_minor_rele(minor_t minor)
1446 {
1447 	/*
1448 	 * Return the value to the arena.
1449 	 */
1450 	id_free(minor_ids, minor);
1451 	atomic_add_32(&minor_count, -1);
1452 }
1453 
1454 uint32_t
1455 mac_no_notification(mac_handle_t mh)
1456 {
1457 	mac_impl_t *mip = (mac_impl_t *)mh;
1458 	return (mip->mi_unsup_note);
1459 }
1460 
1461 boolean_t
1462 mac_is_legacy(mac_handle_t mh)
1463 {
1464 	mac_impl_t *mip = (mac_impl_t *)mh;
1465 	return (mip->mi_legacy);
1466 }
1467 
1468 /*
1469  * mac_register() is how drivers register new MACs with the GLDv3
1470  * framework.  The mregp argument is allocated by drivers using the
1471  * mac_alloc() function, and can be freed using mac_free() immediately upon
1472  * return from mac_register().  Upon success (0 return value), the mhp
1473  * opaque pointer becomes the driver's handle to its MAC interface, and is
1474  * the argument to all other mac module entry points.
1475  */
1476 int
1477 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
1478 {
1479 	mac_impl_t		*mip;
1480 	mactype_t		*mtype;
1481 	int			err = EINVAL;
1482 	struct devnames		*dnp = NULL;
1483 	uint_t			instance;
1484 	boolean_t		style1_created = B_FALSE;
1485 	boolean_t		style2_created = B_FALSE;
1486 	mac_capab_legacy_t	legacy;
1487 	char			*driver;
1488 	minor_t			minor = 0;
1489 
1490 	/* Find the required MAC-Type plugin. */
1491 	if ((mtype = i_mactype_getplugin(mregp->m_type_ident)) == NULL)
1492 		return (EINVAL);
1493 
1494 	/* Create a mac_impl_t to represent this MAC. */
1495 	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
1496 
1497 	/*
1498 	 * The mac is not ready for open yet.
1499 	 */
1500 	mip->mi_disabled = B_TRUE;
1501 
1502 	/*
1503 	 * When a mac is registered, the m_instance field can be set to:
1504 	 *
1505 	 *  0:	Get the mac's instance number from m_dip.
1506 	 *	This is usually used for physical device dips.
1507 	 *
1508 	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
1509 	 *	For example, when an aggregation is created with the key option,
1510 	 *	"key" will be used as the instance number.
1511 	 *
1512 	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
1513 	 *	This is often used when a MAC of a virtual link is registered
1514 	 *	(e.g., aggregation when "key" is not specified, or vnic).
1515 	 *
1516 	 * Note that the instance number is used to derive the mi_minor field
1517 	 * of mac_impl_t, which will then be used to derive the name of kstats
1518 	 * and the devfs nodes.  The first 2 cases are needed to preserve
1519 	 * backward compatibility.
1520 	 */
1521 	switch (mregp->m_instance) {
1522 	case 0:
1523 		instance = ddi_get_instance(mregp->m_dip);
1524 		break;
1525 	case ((uint_t)-1):
1526 		minor = mac_minor_hold(B_TRUE);
1527 		if (minor == 0) {
1528 			err = ENOSPC;
1529 			goto fail;
1530 		}
1531 		instance = minor - 1;
1532 		break;
1533 	default:
1534 		instance = mregp->m_instance;
1535 		if (instance >= MAC_MAX_MINOR) {
1536 			err = EINVAL;
1537 			goto fail;
1538 		}
1539 		break;
1540 	}
1541 
1542 	mip->mi_minor = (minor_t)(instance + 1);
1543 	mip->mi_dip = mregp->m_dip;
1544 
1545 	driver = (char *)ddi_driver_name(mip->mi_dip);
1546 
1547 	/* Construct the MAC name as <drvname><instance> */
1548 	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
1549 	    driver, instance);
1550 
1551 	mip->mi_driver = mregp->m_driver;
1552 
1553 	mip->mi_type = mtype;
1554 	mip->mi_margin = mregp->m_margin;
1555 	mip->mi_info.mi_media = mtype->mt_type;
1556 	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
1557 	if (mregp->m_max_sdu <= mregp->m_min_sdu)
1558 		goto fail;
1559 	mip->mi_sdu_min = mregp->m_min_sdu;
1560 	mip->mi_sdu_max = mregp->m_max_sdu;
1561 	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
1562 	/*
1563 	 * If the media supports a broadcast address, cache a pointer to it
1564 	 * in the mac_info_t so that upper layers can use it.
1565 	 */
1566 	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
1567 
1568 	/*
1569 	 * Copy the unicast source address into the mac_info_t, but only if
1570 	 * the MAC-Type defines a non-zero address length.  We need to
1571 	 * handle MAC-Types that have an address length of 0
1572 	 * (point-to-point protocol MACs for example).
1573 	 */
1574 	if (mip->mi_type->mt_addr_length > 0) {
1575 		if (mregp->m_src_addr == NULL)
1576 			goto fail;
1577 		mip->mi_info.mi_unicst_addr =
1578 		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
1579 		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
1580 		    mip->mi_type->mt_addr_length);
1581 
1582 		/*
1583 		 * Copy the fixed 'factory' MAC address from the immutable
1584 		 * info.  This is taken to be the MAC address currently in
1585 		 * use.
1586 		 */
1587 		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
1588 		    mip->mi_type->mt_addr_length);
1589 		/* Copy the destination address if one is provided. */
1590 		if (mregp->m_dst_addr != NULL) {
1591 			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
1592 			    mip->mi_type->mt_addr_length);
1593 		}
1594 	} else if (mregp->m_src_addr != NULL) {
1595 		goto fail;
1596 	}
1597 
1598 	/*
1599 	 * The format of the m_pdata is specific to the plugin.  It is
1600 	 * passed in as an argument to all of the plugin callbacks.  The
1601 	 * driver can update this information by calling
1602 	 * mac_pdata_update().
1603 	 */
1604 	if (mregp->m_pdata != NULL) {
1605 		/*
1606 		 * Verify that the plugin supports MAC plugin data and that
1607 		 * the supplied data is valid.
1608 		 */
1609 		if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
1610 			goto fail;
1611 		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
1612 		    mregp->m_pdata_size)) {
1613 			goto fail;
1614 		}
1615 		mip->mi_pdata = kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
1616 		bcopy(mregp->m_pdata, mip->mi_pdata, mregp->m_pdata_size);
1617 		mip->mi_pdata_size = mregp->m_pdata_size;
1618 	}
1619 
1620 	/*
1621 	 * Register the private properties.
1622 	 */
1623 	mac_register_priv_prop(mip, mregp->m_priv_props,
1624 	    mregp->m_priv_prop_count);
1625 
1626 	/*
1627 	 * Stash the driver callbacks into the mac_impl_t, but first sanity
1628 	 * check to make sure all mandatory callbacks are set.
1629 	 */
1630 	if (mregp->m_callbacks->mc_getstat == NULL ||
1631 	    mregp->m_callbacks->mc_start == NULL ||
1632 	    mregp->m_callbacks->mc_stop == NULL ||
1633 	    mregp->m_callbacks->mc_setpromisc == NULL ||
1634 	    mregp->m_callbacks->mc_multicst == NULL ||
1635 	    mregp->m_callbacks->mc_unicst == NULL ||
1636 	    mregp->m_callbacks->mc_tx == NULL) {
1637 		goto fail;
1638 	}
1639 	mip->mi_callbacks = mregp->m_callbacks;
1640 
1641 	/*
1642 	 * Set up the possible transmit routines.
1643 	 */
1644 	mip->mi_txinfo.mt_fn = mip->mi_tx;
1645 	mip->mi_txinfo.mt_arg = mip->mi_driver;
1646 
1647 	mip->mi_legacy = mac_capab_get((mac_handle_t)mip,
1648 	    MAC_CAPAB_LEGACY, &legacy);
1649 
1650 	if (mip->mi_legacy) {
1651 		/*
1652 		 * Legacy device. Messages being sent will be looped back
1653 		 * by the underlying driver. Therefore the txloop function
1654 		 * pointer is the same as the tx function pointer.
1655 		 */
1656 		mip->mi_txloopinfo.mt_fn = mip->mi_txinfo.mt_fn;
1657 		mip->mi_txloopinfo.mt_arg = mip->mi_txinfo.mt_arg;
1658 		mip->mi_unsup_note = legacy.ml_unsup_note;
1659 		mip->mi_phy_dev = legacy.ml_dev;
1660 	} else {
1661 		/*
1662 		 * Normal device. The framework needs to do the loopback.
1663 		 */
1664 		mip->mi_txloopinfo.mt_fn = mac_txloop;
1665 		mip->mi_txloopinfo.mt_arg = mip;
1666 		mip->mi_unsup_note = 0;
1667 		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
1668 		    ddi_get_instance(mip->mi_dip) + 1);
1669 	}
1670 
1671 	mip->mi_vnic_txinfo.mt_fn = mac_vnic_tx;
1672 	mip->mi_vnic_txinfo.mt_arg = mip;
1673 
1674 	mip->mi_vnic_txloopinfo.mt_fn = mac_vnic_txloop;
1675 	mip->mi_vnic_txloopinfo.mt_arg = mip;
1676 
1677 	/*
1678 	 * Allocate a notification thread.
1679 	 */
1680 	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
1681 	    mip, 0, &p0, TS_RUN, minclsyspri);
1682 	if (mip->mi_notify_thread == NULL)
1683 		goto fail;
1684 
1685 	/*
1686 	 * Initialize the kstats for this device.
1687 	 */
1688 	mac_stat_create(mip);
1689 
1690 
1691 	/* set the gldv3 flag in dn_flags */
1692 	dnp = &devnamesp[ddi_driver_major(mip->mi_dip)];
1693 	LOCK_DEV_OPS(&dnp->dn_lock);
1694 	dnp->dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER);
1695 	UNLOCK_DEV_OPS(&dnp->dn_lock);
1696 
1697 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
1698 		/* Create a style-2 DLPI device */
1699 		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
1700 		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
1701 			goto fail;
1702 		style2_created = B_TRUE;
1703 
1704 		/* Create a style-1 DLPI device */
1705 		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
1706 		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
1707 			goto fail;
1708 		style1_created = B_TRUE;
1709 	}
1710 
1711 	rw_enter(&i_mac_impl_lock, RW_WRITER);
1712 	if (mod_hash_insert(i_mac_impl_hash,
1713 	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
1714 
1715 		rw_exit(&i_mac_impl_lock);
1716 		err = EEXIST;
1717 		goto fail;
1718 	}
1719 
1720 	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
1721 	    (mac_impl_t *), mip);
1722 
1723 	/*
1724 	 * Mark the MAC to be ready for open.
1725 	 */
1726 	mip->mi_disabled = B_FALSE;
1727 
1728 	rw_exit(&i_mac_impl_lock);
1729 
1730 	atomic_inc_32(&i_mac_impl_count);
1731 
1732 	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
1733 	*mhp = (mac_handle_t)mip;
1734 	return (0);
1735 
1736 fail:
1737 	if (style1_created)
1738 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
1739 
1740 	if (style2_created)
1741 		ddi_remove_minor_node(mip->mi_dip, driver);
1742 
1743 	/* clean up notification thread */
1744 	if (mip->mi_notify_thread != NULL) {
1745 		mutex_enter(&mip->mi_notify_bits_lock);
1746 		mip->mi_notify_bits = (1 << MAC_NNOTE);
1747 		cv_broadcast(&mip->mi_notify_cv);
1748 		while (mip->mi_notify_bits != 0)
1749 			cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
1750 		mutex_exit(&mip->mi_notify_bits_lock);
1751 	}
1752 
1753 	if (mip->mi_info.mi_unicst_addr != NULL) {
1754 		kmem_free(mip->mi_info.mi_unicst_addr,
1755 		    mip->mi_type->mt_addr_length);
1756 		mip->mi_info.mi_unicst_addr = NULL;
1757 	}
1758 
1759 	mac_stat_destroy(mip);
1760 
1761 	if (mip->mi_type != NULL) {
1762 		atomic_dec_32(&mip->mi_type->mt_ref);
1763 		mip->mi_type = NULL;
1764 	}
1765 
1766 	if (mip->mi_pdata != NULL) {
1767 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
1768 		mip->mi_pdata = NULL;
1769 		mip->mi_pdata_size = 0;
1770 	}
1771 
1772 	if (minor != 0) {
1773 		ASSERT(minor > MAC_MAX_MINOR);
1774 		mac_minor_rele(minor);
1775 	}
1776 
1777 	mac_unregister_priv_prop(mip);
1778 
1779 	kmem_cache_free(i_mac_impl_cachep, mip);
1780 	return (err);
1781 }
1782 
1783 int
1784 mac_disable(mac_handle_t mh)
1785 {
1786 	mac_impl_t		*mip = (mac_impl_t *)mh;
1787 
1788 	/*
1789 	 * See if there are any other references to this mac_t (e.g., VLAN's).
1790 	 * If not, set mi_disabled to prevent any new VLAN's from being
1791 	 * created while we're destroying this mac.
1792 	 */
1793 	rw_enter(&i_mac_impl_lock, RW_WRITER);
1794 	if (mip->mi_ref > 0) {
1795 		rw_exit(&i_mac_impl_lock);
1796 		return (EBUSY);
1797 	}
1798 	mip->mi_disabled = B_TRUE;
1799 	rw_exit(&i_mac_impl_lock);
1800 	return (0);
1801 }
1802 
1803 int
1804 mac_unregister(mac_handle_t mh)
1805 {
1806 	int			err;
1807 	mac_impl_t		*mip = (mac_impl_t *)mh;
1808 	mod_hash_val_t		val;
1809 	mac_multicst_addr_t	*p, *nextp;
1810 	mac_margin_req_t	*mmr, *nextmmr;
1811 
1812 	/*
1813 	 * See if there are any other references to this mac_t (e.g., VLAN's).
1814 	 * If not, set mi_disabled to prevent any new VLAN's from being
1815 	 * created while we're destroying this mac. Once mac_disable() returns
1816 	 * 0, the rest of mac_unregister() stuff should continue without
1817 	 * returning an error.
1818 	 */
1819 	if (!mip->mi_disabled) {
1820 		if ((err = mac_disable(mh)) != 0)
1821 			return (err);
1822 	}
1823 
1824 	/*
1825 	 * Clean up notification thread (wait for it to exit).
1826 	 */
1827 	mutex_enter(&mip->mi_notify_bits_lock);
1828 	mip->mi_notify_bits = (1 << MAC_NNOTE);
1829 	cv_broadcast(&mip->mi_notify_cv);
1830 	while (mip->mi_notify_bits != 0)
1831 		cv_wait(&mip->mi_notify_cv, &mip->mi_notify_bits_lock);
1832 	mutex_exit(&mip->mi_notify_bits_lock);
1833 
1834 	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
1835 		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
1836 		ddi_remove_minor_node(mip->mi_dip,
1837 		    (char *)ddi_driver_name(mip->mi_dip));
1838 	}
1839 
1840 	ASSERT(!mip->mi_activelink);
1841 
1842 	mac_stat_destroy(mip);
1843 
1844 	rw_enter(&i_mac_impl_lock, RW_WRITER);
1845 	(void) mod_hash_remove(i_mac_impl_hash,
1846 	    (mod_hash_key_t)mip->mi_name, &val);
1847 	ASSERT(mip == (mac_impl_t *)val);
1848 
1849 	ASSERT(i_mac_impl_count > 0);
1850 	atomic_dec_32(&i_mac_impl_count);
1851 	rw_exit(&i_mac_impl_lock);
1852 
1853 	if (mip->mi_pdata != NULL)
1854 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
1855 	mip->mi_pdata = NULL;
1856 	mip->mi_pdata_size = 0;
1857 
1858 	/*
1859 	 * Free the list of multicast addresses.
1860 	 */
1861 	for (p = mip->mi_mmap; p != NULL; p = nextp) {
1862 		nextp = p->mma_nextp;
1863 		kmem_free(p, sizeof (mac_multicst_addr_t));
1864 	}
1865 	mip->mi_mmap = NULL;
1866 
1867 	/*
1868 	 * Free the list of margin request.
1869 	 */
1870 	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
1871 		nextmmr = mmr->mmr_nextp;
1872 		kmem_free(mmr, sizeof (mac_margin_req_t));
1873 	}
1874 	mip->mi_mmrp = NULL;
1875 
1876 	mip->mi_linkstate = LINK_STATE_UNKNOWN;
1877 	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
1878 	mip->mi_info.mi_unicst_addr = NULL;
1879 
1880 	atomic_dec_32(&mip->mi_type->mt_ref);
1881 	mip->mi_type = NULL;
1882 
1883 	if (mip->mi_minor > MAC_MAX_MINOR)
1884 		mac_minor_rele(mip->mi_minor);
1885 
1886 	mac_unregister_priv_prop(mip);
1887 
1888 	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
1889 
1890 	kmem_cache_free(i_mac_impl_cachep, mip);
1891 
1892 	return (0);
1893 }
1894 
1895 /*
1896  * To avoid potential deadlocks, mac_rx() releases mi_rx_lock
1897  * before invoking its list of upcalls. This introduces races with
1898  * mac_rx_remove() and mac_rx_add(), who can potentially modify the
1899  * upcall list while mi_rx_lock is not being held. The race with
1900  * mac_rx_remove() is handled by incrementing mi_rx_ref upon entering
1901  * mac_rx(); a non-zero mi_rx_ref would tell mac_rx_remove()
1902  * to not modify the list but instead mark an upcall for deletion.
1903  * before mac_rx() exits, mi_rx_ref is decremented and if it
1904  * is 0, the marked upcalls will be removed from the list and freed.
1905  * The race with mac_rx_add() is harmless because mac_rx_add() only
1906  * prepends to the list and since mac_rx() saves the list head
1907  * before releasing mi_rx_lock, any prepended upcall won't be seen
1908  * until the next packet chain arrives.
1909  *
1910  * To minimize lock contention between multiple parallel invocations
1911  * of mac_rx(), mi_rx_lock is acquired as a READER lock. The
1912  * use of atomic operations ensures the sanity of mi_rx_ref. mi_rx_lock
1913  * will be upgraded to WRITER mode when there are marked upcalls to be
1914  * cleaned.
1915  */
1916 static void
1917 mac_do_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain,
1918     boolean_t active_only)
1919 {
1920 	mac_impl_t	*mip = (mac_impl_t *)mh;
1921 	mblk_t		*bp = mp_chain;
1922 	mac_rx_fn_t	*mrfp;
1923 
1924 	/*
1925 	 * Call all registered receive functions.
1926 	 */
1927 	rw_enter(&mip->mi_rx_lock, RW_READER);
1928 	if ((mrfp = mip->mi_mrfp) == NULL) {
1929 		/* There are no registered receive functions. */
1930 		freemsgchain(bp);
1931 		rw_exit(&mip->mi_rx_lock);
1932 		return;
1933 	}
1934 	atomic_inc_32(&mip->mi_rx_ref);
1935 	rw_exit(&mip->mi_rx_lock);
1936 
1937 	/*
1938 	 * Call registered receive functions.
1939 	 */
1940 	do {
1941 		mblk_t *recv_bp;
1942 
1943 		if (active_only && !mrfp->mrf_active) {
1944 			mrfp = mrfp->mrf_nextp;
1945 			if (mrfp == NULL) {
1946 				/*
1947 				 * We hit the last receiver, but it's not
1948 				 * active.
1949 				 */
1950 				freemsgchain(bp);
1951 			}
1952 			continue;
1953 		}
1954 
1955 		recv_bp = (mrfp->mrf_nextp != NULL) ? copymsgchain(bp) : bp;
1956 		if (recv_bp != NULL) {
1957 			if (mrfp->mrf_inuse) {
1958 				/*
1959 				 * Send bp itself and keep the copy.
1960 				 * If there's only one active receiver,
1961 				 * it should get the original message,
1962 				 * tagged with the hardware checksum flags.
1963 				 */
1964 				mrfp->mrf_fn(mrfp->mrf_arg, mrh, bp);
1965 				bp = recv_bp;
1966 			} else {
1967 				freemsgchain(recv_bp);
1968 			}
1969 		}
1970 
1971 		mrfp = mrfp->mrf_nextp;
1972 	} while (mrfp != NULL);
1973 
1974 	rw_enter(&mip->mi_rx_lock, RW_READER);
1975 	if (atomic_dec_32_nv(&mip->mi_rx_ref) == 0 && mip->mi_rx_removed > 0) {
1976 		mac_rx_fn_t	**pp, *p;
1977 		uint32_t	cnt = 0;
1978 
1979 		DTRACE_PROBE1(delete_callbacks, mac_impl_t *, mip);
1980 
1981 		/*
1982 		 * Need to become exclusive before doing cleanup
1983 		 */
1984 		if (rw_tryupgrade(&mip->mi_rx_lock) == 0) {
1985 			rw_exit(&mip->mi_rx_lock);
1986 			rw_enter(&mip->mi_rx_lock, RW_WRITER);
1987 		}
1988 
1989 		/*
1990 		 * We return if another thread has already entered and cleaned
1991 		 * up the list.
1992 		 */
1993 		if (mip->mi_rx_ref > 0 || mip->mi_rx_removed == 0) {
1994 			rw_exit(&mip->mi_rx_lock);
1995 			return;
1996 		}
1997 
1998 		/*
1999 		 * Free removed callbacks.
2000 		 */
2001 		pp = &mip->mi_mrfp;
2002 		while (*pp != NULL) {
2003 			if (!(*pp)->mrf_inuse) {
2004 				p = *pp;
2005 				*pp = (*pp)->mrf_nextp;
2006 				kmem_free(p, sizeof (*p));
2007 				cnt++;
2008 				continue;
2009 			}
2010 			pp = &(*pp)->mrf_nextp;
2011 		}
2012 
2013 		/*
2014 		 * Wake up mac_rx_remove_wait()
2015 		 */
2016 		mutex_enter(&mip->mi_lock);
2017 		ASSERT(mip->mi_rx_removed == cnt);
2018 		mip->mi_rx_removed = 0;
2019 		cv_broadcast(&mip->mi_rx_cv);
2020 		mutex_exit(&mip->mi_lock);
2021 	}
2022 	rw_exit(&mip->mi_rx_lock);
2023 }
2024 
2025 void
2026 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
2027 {
2028 	mac_do_rx(mh, mrh, mp_chain, B_FALSE);
2029 }
2030 
2031 /*
2032  * Send a packet chain up to the receive callbacks which declared
2033  * themselves as being active.
2034  */
2035 void
2036 mac_active_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp_chain)
2037 {
2038 	mac_do_rx(arg, mrh, mp_chain, B_TRUE);
2039 }
2040 
2041 /*
2042  * Function passed to the active client sharing a VNIC. This function
2043  * is returned by mac_tx_get() when a VNIC is present. It invokes
2044  * the VNIC transmit entry point which was specified by the VNIC when
2045  * it called mac_vnic_set(). The VNIC transmit entry point will
2046  * pass the packets to the local VNICs and/or to the underlying VNICs
2047  * if needed.
2048  */
2049 static mblk_t *
2050 mac_vnic_tx(void *arg, mblk_t *mp)
2051 {
2052 	mac_impl_t	*mip = arg;
2053 	mac_txinfo_t	*mtfp;
2054 	mac_vnic_tx_t	*mvt;
2055 
2056 	/*
2057 	 * There is a race between the notification of the VNIC
2058 	 * addition and removal, and the processing of the VNIC notification
2059 	 * by the MAC client. During this window, it is possible for
2060 	 * an active MAC client to contine invoking mac_vnic_tx() while
2061 	 * the VNIC has already been removed. So we cannot assume
2062 	 * that mi_vnic_present will always be true when mac_vnic_tx()
2063 	 * is invoked.
2064 	 */
2065 	rw_enter(&mip->mi_tx_lock, RW_READER);
2066 	if (!mip->mi_vnic_present) {
2067 		rw_exit(&mip->mi_tx_lock);
2068 		freemsgchain(mp);
2069 		return (NULL);
2070 	}
2071 
2072 	ASSERT(mip->mi_vnic_tx != NULL);
2073 	mvt = mip->mi_vnic_tx;
2074 	MAC_VNIC_TXINFO_REFHOLD(mvt);
2075 	rw_exit(&mip->mi_tx_lock);
2076 
2077 	mtfp = &mvt->mv_txinfo;
2078 	mtfp->mt_fn(mtfp->mt_arg, mp);
2079 
2080 	MAC_VNIC_TXINFO_REFRELE(mvt);
2081 	return (NULL);
2082 }
2083 
2084 /*
2085  * Transmit function -- ONLY used when there are registered loopback listeners.
2086  */
2087 mblk_t *
2088 mac_do_txloop(void *arg, mblk_t *bp, boolean_t call_vnic)
2089 {
2090 	mac_impl_t	*mip = arg;
2091 	mac_txloop_fn_t	*mtfp;
2092 	mblk_t		*loop_bp, *resid_bp, *next_bp;
2093 
2094 	if (call_vnic) {
2095 		/*
2096 		 * In promiscous mode, a copy of the sent packet will
2097 		 * be sent to the client's promiscous receive entry
2098 		 * points via mac_vnic_tx()->
2099 		 * mac_active_rx_promisc()->mac_rx_default().
2100 		 */
2101 		return (mac_vnic_tx(arg, bp));
2102 	}
2103 
2104 	while (bp != NULL) {
2105 		next_bp = bp->b_next;
2106 		bp->b_next = NULL;
2107 
2108 		if ((loop_bp = copymsg(bp)) == NULL)
2109 			goto noresources;
2110 
2111 		if ((resid_bp = mip->mi_tx(mip->mi_driver, bp)) != NULL) {
2112 			ASSERT(resid_bp == bp);
2113 			freemsg(loop_bp);
2114 			goto noresources;
2115 		}
2116 
2117 		rw_enter(&mip->mi_tx_lock, RW_READER);
2118 		mtfp = mip->mi_mtfp;
2119 		while (mtfp != NULL && loop_bp != NULL) {
2120 			bp = loop_bp;
2121 
2122 			/* XXX counter bump if copymsg() fails? */
2123 			if (mtfp->mtf_nextp != NULL)
2124 				loop_bp = copymsg(bp);
2125 			else
2126 				loop_bp = NULL;
2127 
2128 			mtfp->mtf_fn(mtfp->mtf_arg, bp);
2129 			mtfp = mtfp->mtf_nextp;
2130 		}
2131 		rw_exit(&mip->mi_tx_lock);
2132 
2133 		/*
2134 		 * It's possible we've raced with the disabling of promiscuous
2135 		 * mode, in which case we can discard our copy.
2136 		 */
2137 		if (loop_bp != NULL)
2138 			freemsg(loop_bp);
2139 
2140 		bp = next_bp;
2141 	}
2142 
2143 	return (NULL);
2144 
2145 noresources:
2146 	bp->b_next = next_bp;
2147 	return (bp);
2148 }
2149 
2150 mblk_t *
2151 mac_txloop(void *arg, mblk_t *bp)
2152 {
2153 	return (mac_do_txloop(arg, bp, B_FALSE));
2154 }
2155 
2156 static mblk_t *
2157 mac_vnic_txloop(void *arg, mblk_t *bp)
2158 {
2159 	return (mac_do_txloop(arg, bp, B_TRUE));
2160 }
2161 
2162 void
2163 mac_link_update(mac_handle_t mh, link_state_t link)
2164 {
2165 	mac_impl_t	*mip = (mac_impl_t *)mh;
2166 
2167 	/*
2168 	 * Save the link state.
2169 	 */
2170 	mip->mi_linkstate = link;
2171 
2172 	/*
2173 	 * Send a MAC_NOTE_LINK notification.
2174 	 */
2175 	i_mac_notify(mip, MAC_NOTE_LINK);
2176 }
2177 
2178 void
2179 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
2180 {
2181 	mac_impl_t	*mip = (mac_impl_t *)mh;
2182 
2183 	if (mip->mi_type->mt_addr_length == 0)
2184 		return;
2185 
2186 	/*
2187 	 * If the address has not changed, do nothing.
2188 	 */
2189 	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0)
2190 		return;
2191 
2192 	/*
2193 	 * Save the address.
2194 	 */
2195 	bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length);
2196 
2197 	/*
2198 	 * Send a MAC_NOTE_UNICST notification.
2199 	 */
2200 	i_mac_notify(mip, MAC_NOTE_UNICST);
2201 }
2202 
2203 void
2204 mac_tx_update(mac_handle_t mh)
2205 {
2206 	/*
2207 	 * Send a MAC_NOTE_TX notification.
2208 	 */
2209 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_TX);
2210 }
2211 
2212 void
2213 mac_resource_update(mac_handle_t mh)
2214 {
2215 	/*
2216 	 * Send a MAC_NOTE_RESOURCE notification.
2217 	 */
2218 	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_RESOURCE);
2219 }
2220 
2221 mac_resource_handle_t
2222 mac_resource_add(mac_handle_t mh, mac_resource_t *mrp)
2223 {
2224 	mac_impl_t		*mip = (mac_impl_t *)mh;
2225 	mac_resource_handle_t	mrh;
2226 	mac_resource_add_t	add;
2227 	void			*arg;
2228 
2229 	rw_enter(&mip->mi_resource_lock, RW_READER);
2230 	add = mip->mi_resource_add;
2231 	arg = mip->mi_resource_add_arg;
2232 
2233 	if (add != NULL)
2234 		mrh = add(arg, mrp);
2235 	else
2236 		mrh = NULL;
2237 	rw_exit(&mip->mi_resource_lock);
2238 
2239 	return (mrh);
2240 }
2241 
2242 int
2243 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
2244 {
2245 	mac_impl_t	*mip = (mac_impl_t *)mh;
2246 
2247 	/*
2248 	 * Verify that the plugin supports MAC plugin data and that the
2249 	 * supplied data is valid.
2250 	 */
2251 	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
2252 		return (EINVAL);
2253 	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
2254 		return (EINVAL);
2255 
2256 	if (mip->mi_pdata != NULL)
2257 		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
2258 
2259 	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
2260 	bcopy(mac_pdata, mip->mi_pdata, dsize);
2261 	mip->mi_pdata_size = dsize;
2262 
2263 	/*
2264 	 * Since the MAC plugin data is used to construct MAC headers that
2265 	 * were cached in fast-path headers, we need to flush fast-path
2266 	 * information for links associated with this mac.
2267 	 */
2268 	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
2269 	return (0);
2270 }
2271 
2272 void
2273 mac_multicst_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg,
2274     boolean_t add)
2275 {
2276 	mac_impl_t		*mip = (mac_impl_t *)mh;
2277 	mac_multicst_addr_t	*p;
2278 
2279 	/*
2280 	 * If no specific refresh function was given then default to the
2281 	 * driver's m_multicst entry point.
2282 	 */
2283 	if (refresh == NULL) {
2284 		refresh = mip->mi_multicst;
2285 		arg = mip->mi_driver;
2286 	}
2287 	ASSERT(refresh != NULL);
2288 
2289 	/*
2290 	 * Walk the multicast address list and call the refresh function for
2291 	 * each address.
2292 	 */
2293 	rw_enter(&(mip->mi_data_lock), RW_READER);
2294 	for (p = mip->mi_mmap; p != NULL; p = p->mma_nextp)
2295 		refresh(arg, add, p->mma_addr);
2296 	rw_exit(&(mip->mi_data_lock));
2297 }
2298 
2299 void
2300 mac_unicst_refresh(mac_handle_t mh, mac_unicst_t refresh, void *arg)
2301 {
2302 	mac_impl_t	*mip = (mac_impl_t *)mh;
2303 	/*
2304 	 * If no specific refresh function was given then default to the
2305 	 * driver's mi_unicst entry point.
2306 	 */
2307 	if (refresh == NULL) {
2308 		refresh = mip->mi_unicst;
2309 		arg = mip->mi_driver;
2310 	}
2311 	ASSERT(refresh != NULL);
2312 
2313 	/*
2314 	 * Call the refresh function with the current unicast address.
2315 	 */
2316 	refresh(arg, mip->mi_addr);
2317 }
2318 
2319 void
2320 mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg)
2321 {
2322 	mac_impl_t	*mip = (mac_impl_t *)mh;
2323 
2324 	/*
2325 	 * If no specific refresh function was given then default to the
2326 	 * driver's m_promisc entry point.
2327 	 */
2328 	if (refresh == NULL) {
2329 		refresh = mip->mi_setpromisc;
2330 		arg = mip->mi_driver;
2331 	}
2332 	ASSERT(refresh != NULL);
2333 
2334 	/*
2335 	 * Call the refresh function with the current promiscuity.
2336 	 */
2337 	refresh(arg, (mip->mi_devpromisc != 0));
2338 }
2339 
2340 /*
2341  * The mac client requests that the mac not to change its margin size to
2342  * be less than the specified value.  If "current" is B_TRUE, then the client
2343  * requests the mac not to change its margin size to be smaller than the
2344  * current size. Further, return the current margin size value in this case.
2345  *
2346  * We keep every requested size in an ordered list from largest to smallest.
2347  */
2348 int
2349 mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current)
2350 {
2351 	mac_impl_t		*mip = (mac_impl_t *)mh;
2352 	mac_margin_req_t	**pp, *p;
2353 	int			err = 0;
2354 
2355 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
2356 	if (current)
2357 		*marginp = mip->mi_margin;
2358 
2359 	/*
2360 	 * If the current margin value cannot satisfy the margin requested,
2361 	 * return ENOTSUP directly.
2362 	 */
2363 	if (*marginp > mip->mi_margin) {
2364 		err = ENOTSUP;
2365 		goto done;
2366 	}
2367 
2368 	/*
2369 	 * Check whether the given margin is already in the list. If so,
2370 	 * bump the reference count.
2371 	 */
2372 	for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) {
2373 		if (p->mmr_margin == *marginp) {
2374 			/*
2375 			 * The margin requested is already in the list,
2376 			 * so just bump the reference count.
2377 			 */
2378 			p->mmr_ref++;
2379 			goto done;
2380 		}
2381 		if (p->mmr_margin < *marginp)
2382 			break;
2383 	}
2384 
2385 
2386 	if ((p = kmem_zalloc(sizeof (mac_margin_req_t), KM_NOSLEEP)) == NULL) {
2387 		err = ENOMEM;
2388 		goto done;
2389 	}
2390 
2391 	p->mmr_margin = *marginp;
2392 	p->mmr_ref++;
2393 	p->mmr_nextp = *pp;
2394 	*pp = p;
2395 
2396 done:
2397 	rw_exit(&(mip->mi_data_lock));
2398 	return (err);
2399 }
2400 
2401 /*
2402  * The mac client requests to cancel its previous mac_margin_add() request.
2403  * We remove the requested margin size from the list.
2404  */
2405 int
2406 mac_margin_remove(mac_handle_t mh, uint32_t margin)
2407 {
2408 	mac_impl_t		*mip = (mac_impl_t *)mh;
2409 	mac_margin_req_t	**pp, *p;
2410 	int			err = 0;
2411 
2412 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
2413 	/*
2414 	 * Find the entry in the list for the given margin.
2415 	 */
2416 	for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) {
2417 		if (p->mmr_margin == margin) {
2418 			if (--p->mmr_ref == 0)
2419 				break;
2420 
2421 			/*
2422 			 * There is still a reference to this address so
2423 			 * there's nothing more to do.
2424 			 */
2425 			goto done;
2426 		}
2427 	}
2428 
2429 	/*
2430 	 * We did not find an entry for the given margin.
2431 	 */
2432 	if (p == NULL) {
2433 		err = ENOENT;
2434 		goto done;
2435 	}
2436 
2437 	ASSERT(p->mmr_ref == 0);
2438 
2439 	/*
2440 	 * Remove it from the list.
2441 	 */
2442 	*pp = p->mmr_nextp;
2443 	kmem_free(p, sizeof (mac_margin_req_t));
2444 done:
2445 	rw_exit(&(mip->mi_data_lock));
2446 	return (err);
2447 }
2448 
2449 /*
2450  * The mac client requests to get the mac's current margin value.
2451  */
2452 void
2453 mac_margin_get(mac_handle_t mh, uint32_t *marginp)
2454 {
2455 	mac_impl_t	*mip = (mac_impl_t *)mh;
2456 
2457 	rw_enter(&(mip->mi_data_lock), RW_READER);
2458 	*marginp = mip->mi_margin;
2459 	rw_exit(&(mip->mi_data_lock));
2460 }
2461 
2462 boolean_t
2463 mac_margin_update(mac_handle_t mh, uint32_t margin)
2464 {
2465 	mac_impl_t	*mip = (mac_impl_t *)mh;
2466 	uint32_t	margin_needed = 0;
2467 
2468 	rw_enter(&(mip->mi_data_lock), RW_WRITER);
2469 
2470 	if (mip->mi_mmrp != NULL)
2471 		margin_needed = mip->mi_mmrp->mmr_margin;
2472 
2473 	if (margin_needed <= margin)
2474 		mip->mi_margin = margin;
2475 
2476 	rw_exit(&(mip->mi_data_lock));
2477 
2478 	if (margin_needed <= margin)
2479 		i_mac_notify(mip, MAC_NOTE_MARGIN);
2480 
2481 	return (margin_needed <= margin);
2482 }
2483 
2484 boolean_t
2485 mac_do_active_set(mac_handle_t mh, boolean_t shareable)
2486 {
2487 	mac_impl_t *mip = (mac_impl_t *)mh;
2488 
2489 	mutex_enter(&mip->mi_activelink_lock);
2490 	if (mip->mi_activelink) {
2491 		mutex_exit(&mip->mi_activelink_lock);
2492 		return (B_FALSE);
2493 	}
2494 	mip->mi_activelink = B_TRUE;
2495 	mip->mi_shareable = shareable;
2496 	mutex_exit(&mip->mi_activelink_lock);
2497 	return (B_TRUE);
2498 }
2499 
2500 /*
2501  * Called by MAC clients. By default, active MAC clients cannot
2502  * share the NIC with VNICs.
2503  */
2504 boolean_t
2505 mac_active_set(mac_handle_t mh)
2506 {
2507 	return (mac_do_active_set(mh, B_FALSE));
2508 }
2509 
2510 /*
2511  * Called by MAC clients which can share the NIC with VNICS, e.g. DLS.
2512  */
2513 boolean_t
2514 mac_active_shareable_set(mac_handle_t mh)
2515 {
2516 	return (mac_do_active_set(mh, B_TRUE));
2517 }
2518 
2519 void
2520 mac_active_clear(mac_handle_t mh)
2521 {
2522 	mac_impl_t *mip = (mac_impl_t *)mh;
2523 
2524 	mutex_enter(&mip->mi_activelink_lock);
2525 	ASSERT(mip->mi_activelink);
2526 	mip->mi_activelink = B_FALSE;
2527 	mutex_exit(&mip->mi_activelink_lock);
2528 }
2529 
2530 boolean_t
2531 mac_vnic_set(mac_handle_t mh, mac_txinfo_t *tx_info, mac_getcapab_t getcapab_fn,
2532     void *getcapab_arg)
2533 {
2534 	mac_impl_t	*mip = (mac_impl_t *)mh;
2535 	mac_vnic_tx_t	*vnic_tx;
2536 
2537 	mutex_enter(&mip->mi_activelink_lock);
2538 	rw_enter(&mip->mi_tx_lock, RW_WRITER);
2539 	ASSERT(!mip->mi_vnic_present);
2540 
2541 	if (mip->mi_activelink && !mip->mi_shareable) {
2542 		/*
2543 		 * The NIC is already used by an active client which cannot
2544 		 * share it with VNICs.
2545 		 */
2546 		rw_exit(&mip->mi_tx_lock);
2547 		mutex_exit(&mip->mi_activelink_lock);
2548 		return (B_FALSE);
2549 	}
2550 
2551 	vnic_tx = kmem_cache_alloc(mac_vnic_tx_cache, KM_SLEEP);
2552 	vnic_tx->mv_refs = 0;
2553 	vnic_tx->mv_txinfo = *tx_info;
2554 	vnic_tx->mv_clearing = B_FALSE;
2555 
2556 	mip->mi_vnic_present = B_TRUE;
2557 	mip->mi_vnic_tx = vnic_tx;
2558 	mip->mi_vnic_getcapab_fn = getcapab_fn;
2559 	mip->mi_vnic_getcapab_arg = getcapab_arg;
2560 	rw_exit(&mip->mi_tx_lock);
2561 	mutex_exit(&mip->mi_activelink_lock);
2562 
2563 	i_mac_notify(mip, MAC_NOTE_VNIC);
2564 	return (B_TRUE);
2565 }
2566 
2567 void
2568 mac_vnic_clear(mac_handle_t mh)
2569 {
2570 	mac_impl_t *mip = (mac_impl_t *)mh;
2571 	mac_vnic_tx_t	*vnic_tx;
2572 
2573 	rw_enter(&mip->mi_tx_lock, RW_WRITER);
2574 	ASSERT(mip->mi_vnic_present);
2575 	mip->mi_vnic_present = B_FALSE;
2576 	/*
2577 	 * Setting mi_vnic_tx to NULL here under the lock guarantees
2578 	 * that no new references to the current VNIC transmit structure
2579 	 * will be taken by mac_vnic_tx(). This is a necessary condition
2580 	 * for safely waiting for the reference count to drop to
2581 	 * zero below.
2582 	 */
2583 	vnic_tx = mip->mi_vnic_tx;
2584 	mip->mi_vnic_tx = NULL;
2585 	mip->mi_vnic_getcapab_fn = NULL;
2586 	mip->mi_vnic_getcapab_arg = NULL;
2587 	rw_exit(&mip->mi_tx_lock);
2588 
2589 	i_mac_notify(mip, MAC_NOTE_VNIC);
2590 
2591 	/*
2592 	 * Wait for all TX calls referencing the VNIC transmit
2593 	 * entry point that was removed to complete.
2594 	 */
2595 	mutex_enter(&vnic_tx->mv_lock);
2596 	vnic_tx->mv_clearing = B_TRUE;
2597 	while (vnic_tx->mv_refs > 0)
2598 		cv_wait(&vnic_tx->mv_cv, &vnic_tx->mv_lock);
2599 	mutex_exit(&vnic_tx->mv_lock);
2600 	kmem_cache_free(mac_vnic_tx_cache, vnic_tx);
2601 }
2602 
2603 /*
2604  * mac_info_get() is used for retrieving the mac_info when a DL_INFO_REQ is
2605  * issued before a DL_ATTACH_REQ. we walk the i_mac_impl_hash table and find
2606  * the first mac_impl_t with a matching driver name; then we copy its mac_info_t
2607  * to the caller. we do all this with i_mac_impl_lock held so the mac_impl_t
2608  * cannot disappear while we are accessing it.
2609  */
2610 typedef struct i_mac_info_state_s {
2611 	const char	*mi_name;
2612 	mac_info_t	*mi_infop;
2613 } i_mac_info_state_t;
2614 
2615 /*ARGSUSED*/
2616 static uint_t
2617 i_mac_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2618 {
2619 	i_mac_info_state_t	*statep = arg;
2620 	mac_impl_t		*mip = (mac_impl_t *)val;
2621 
2622 	if (mip->mi_disabled)
2623 		return (MH_WALK_CONTINUE);
2624 
2625 	if (strcmp(statep->mi_name,
2626 	    ddi_driver_name(mip->mi_dip)) != 0)
2627 		return (MH_WALK_CONTINUE);
2628 
2629 	statep->mi_infop = &mip->mi_info;
2630 	return (MH_WALK_TERMINATE);
2631 }
2632 
2633 boolean_t
2634 mac_info_get(const char *name, mac_info_t *minfop)
2635 {
2636 	i_mac_info_state_t	state;
2637 
2638 	rw_enter(&i_mac_impl_lock, RW_READER);
2639 	state.mi_name = name;
2640 	state.mi_infop = NULL;
2641 	mod_hash_walk(i_mac_impl_hash, i_mac_info_walker, &state);
2642 	if (state.mi_infop == NULL) {
2643 		rw_exit(&i_mac_impl_lock);
2644 		return (B_FALSE);
2645 	}
2646 	*minfop = *state.mi_infop;
2647 	rw_exit(&i_mac_impl_lock);
2648 	return (B_TRUE);
2649 }
2650 
2651 boolean_t
2652 mac_do_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data,
2653     boolean_t is_vnic)
2654 {
2655 	mac_impl_t *mip = (mac_impl_t *)mh;
2656 
2657 	if (!is_vnic) {
2658 		rw_enter(&mip->mi_tx_lock, RW_READER);
2659 		if (mip->mi_vnic_present) {
2660 			boolean_t rv;
2661 
2662 			rv = mip->mi_vnic_getcapab_fn(mip->mi_vnic_getcapab_arg,
2663 			    cap, cap_data);
2664 			rw_exit(&mip->mi_tx_lock);
2665 			return (rv);
2666 		}
2667 		rw_exit(&mip->mi_tx_lock);
2668 	}
2669 
2670 	if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB)
2671 		return (mip->mi_getcapab(mip->mi_driver, cap, cap_data));
2672 	else
2673 		return (B_FALSE);
2674 }
2675 
2676 boolean_t
2677 mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
2678 {
2679 	return (mac_do_capab_get(mh, cap, cap_data, B_FALSE));
2680 }
2681 
2682 boolean_t
2683 mac_vnic_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
2684 {
2685 	return (mac_do_capab_get(mh, cap, cap_data, B_TRUE));
2686 }
2687 
2688 boolean_t
2689 mac_sap_verify(mac_handle_t mh, uint32_t sap, uint32_t *bind_sap)
2690 {
2691 	mac_impl_t	*mip = (mac_impl_t *)mh;
2692 	return (mip->mi_type->mt_ops.mtops_sap_verify(sap, bind_sap,
2693 	    mip->mi_pdata));
2694 }
2695 
2696 mblk_t *
2697 mac_header(mac_handle_t mh, const uint8_t *daddr, uint32_t sap, mblk_t *payload,
2698     size_t extra_len)
2699 {
2700 	mac_impl_t	*mip = (mac_impl_t *)mh;
2701 	return (mip->mi_type->mt_ops.mtops_header(mip->mi_addr, daddr, sap,
2702 	    mip->mi_pdata, payload, extra_len));
2703 }
2704 
2705 int
2706 mac_header_info(mac_handle_t mh, mblk_t *mp, mac_header_info_t *mhip)
2707 {
2708 	mac_impl_t	*mip = (mac_impl_t *)mh;
2709 	return (mip->mi_type->mt_ops.mtops_header_info(mp, mip->mi_pdata,
2710 	    mhip));
2711 }
2712 
2713 mblk_t *
2714 mac_header_cook(mac_handle_t mh, mblk_t *mp)
2715 {
2716 	mac_impl_t	*mip = (mac_impl_t *)mh;
2717 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_COOK) {
2718 		if (DB_REF(mp) > 1) {
2719 			mblk_t *newmp = copymsg(mp);
2720 			if (newmp == NULL)
2721 				return (NULL);
2722 			freemsg(mp);
2723 			mp = newmp;
2724 		}
2725 		return (mip->mi_type->mt_ops.mtops_header_cook(mp,
2726 		    mip->mi_pdata));
2727 	}
2728 	return (mp);
2729 }
2730 
2731 mblk_t *
2732 mac_header_uncook(mac_handle_t mh, mblk_t *mp)
2733 {
2734 	mac_impl_t	*mip = (mac_impl_t *)mh;
2735 	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_UNCOOK) {
2736 		if (DB_REF(mp) > 1) {
2737 			mblk_t *newmp = copymsg(mp);
2738 			if (newmp == NULL)
2739 				return (NULL);
2740 			freemsg(mp);
2741 			mp = newmp;
2742 		}
2743 		return (mip->mi_type->mt_ops.mtops_header_uncook(mp,
2744 		    mip->mi_pdata));
2745 	}
2746 	return (mp);
2747 }
2748 
2749 void
2750 mac_init_ops(struct dev_ops *ops, const char *name)
2751 {
2752 	dld_init_ops(ops, name);
2753 }
2754 
2755 void
2756 mac_fini_ops(struct dev_ops *ops)
2757 {
2758 	dld_fini_ops(ops);
2759 }
2760 
2761 /*
2762  * MAC Type Plugin functions.
2763  */
2764 
2765 mactype_register_t *
2766 mactype_alloc(uint_t mactype_version)
2767 {
2768 	mactype_register_t *mtrp;
2769 
2770 	/*
2771 	 * Make sure there isn't a version mismatch between the plugin and
2772 	 * the framework.  In the future, if multiple versions are
2773 	 * supported, this check could become more sophisticated.
2774 	 */
2775 	if (mactype_version != MACTYPE_VERSION)
2776 		return (NULL);
2777 
2778 	mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP);
2779 	mtrp->mtr_version = mactype_version;
2780 	return (mtrp);
2781 }
2782 
2783 void
2784 mactype_free(mactype_register_t *mtrp)
2785 {
2786 	kmem_free(mtrp, sizeof (mactype_register_t));
2787 }
2788 
2789 int
2790 mactype_register(mactype_register_t *mtrp)
2791 {
2792 	mactype_t	*mtp;
2793 	mactype_ops_t	*ops = mtrp->mtr_ops;
2794 
2795 	/* Do some sanity checking before we register this MAC type. */
2796 	if (mtrp->mtr_ident == NULL || ops == NULL)
2797 		return (EINVAL);
2798 
2799 	/*
2800 	 * Verify that all mandatory callbacks are set in the ops
2801 	 * vector.
2802 	 */
2803 	if (ops->mtops_unicst_verify == NULL ||
2804 	    ops->mtops_multicst_verify == NULL ||
2805 	    ops->mtops_sap_verify == NULL ||
2806 	    ops->mtops_header == NULL ||
2807 	    ops->mtops_header_info == NULL) {
2808 		return (EINVAL);
2809 	}
2810 
2811 	mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP);
2812 	mtp->mt_ident = mtrp->mtr_ident;
2813 	mtp->mt_ops = *ops;
2814 	mtp->mt_type = mtrp->mtr_mactype;
2815 	mtp->mt_nativetype = mtrp->mtr_nativetype;
2816 	mtp->mt_addr_length = mtrp->mtr_addrlen;
2817 	if (mtrp->mtr_brdcst_addr != NULL) {
2818 		mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP);
2819 		bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr,
2820 		    mtrp->mtr_addrlen);
2821 	}
2822 
2823 	mtp->mt_stats = mtrp->mtr_stats;
2824 	mtp->mt_statcount = mtrp->mtr_statcount;
2825 
2826 	mtp->mt_mapping = mtrp->mtr_mapping;
2827 	mtp->mt_mappingcount = mtrp->mtr_mappingcount;
2828 
2829 	if (mod_hash_insert(i_mactype_hash,
2830 	    (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) {
2831 		kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
2832 		kmem_free(mtp, sizeof (*mtp));
2833 		return (EEXIST);
2834 	}
2835 	return (0);
2836 }
2837 
2838 int
2839 mactype_unregister(const char *ident)
2840 {
2841 	mactype_t	*mtp;
2842 	mod_hash_val_t	val;
2843 	int 		err;
2844 
2845 	/*
2846 	 * Let's not allow MAC drivers to use this plugin while we're
2847 	 * trying to unregister it.  Holding i_mactype_lock also prevents a
2848 	 * plugin from unregistering while a MAC driver is attempting to
2849 	 * hold a reference to it in i_mactype_getplugin().
2850 	 */
2851 	mutex_enter(&i_mactype_lock);
2852 
2853 	if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident,
2854 	    (mod_hash_val_t *)&mtp)) != 0) {
2855 		/* A plugin is trying to unregister, but it never registered. */
2856 		err = ENXIO;
2857 		goto done;
2858 	}
2859 
2860 	if (mtp->mt_ref != 0) {
2861 		err = EBUSY;
2862 		goto done;
2863 	}
2864 
2865 	err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val);
2866 	ASSERT(err == 0);
2867 	if (err != 0) {
2868 		/* This should never happen, thus the ASSERT() above. */
2869 		err = EINVAL;
2870 		goto done;
2871 	}
2872 	ASSERT(mtp == (mactype_t *)val);
2873 
2874 	kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length);
2875 	kmem_free(mtp, sizeof (mactype_t));
2876 done:
2877 	mutex_exit(&i_mactype_lock);
2878 	return (err);
2879 }
2880 
2881 int
2882 mac_set_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize)
2883 {
2884 	int err = ENOTSUP;
2885 	mac_impl_t *mip = (mac_impl_t *)mh;
2886 
2887 	if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) {
2888 		err = mip->mi_callbacks->mc_setprop(mip->mi_driver,
2889 		    macprop->mp_name, macprop->mp_id, valsize, val);
2890 	}
2891 	return (err);
2892 }
2893 
2894 int
2895 mac_get_prop(mac_handle_t mh, mac_prop_t *macprop, void *val, uint_t valsize,
2896     uint_t *perm)
2897 {
2898 	int err = ENOTSUP;
2899 	mac_impl_t *mip = (mac_impl_t *)mh;
2900 	uint32_t sdu;
2901 	link_state_t link_state;
2902 
2903 	switch (macprop->mp_id) {
2904 	case MAC_PROP_MTU:
2905 		if (valsize < sizeof (sdu))
2906 			return (EINVAL);
2907 		if ((macprop->mp_flags & MAC_PROP_DEFAULT) == 0) {
2908 			mac_sdu_get(mh, NULL, &sdu);
2909 			bcopy(&sdu, val, sizeof (sdu));
2910 			if (mac_set_prop(mh, macprop, val, sizeof (sdu)) != 0)
2911 				*perm = MAC_PROP_PERM_READ;
2912 			else
2913 				*perm = MAC_PROP_PERM_RW;
2914 			return (0);
2915 		} else {
2916 			if (mip->mi_info.mi_media == DL_ETHER) {
2917 				sdu = ETHERMTU;
2918 				bcopy(&sdu, val, sizeof (sdu));
2919 				return (0);
2920 			}
2921 			/*
2922 			 * ask driver for its default.
2923 			 */
2924 			break;
2925 		}
2926 	case MAC_PROP_STATUS:
2927 		if (valsize < sizeof (link_state))
2928 			return (EINVAL);
2929 		*perm = MAC_PROP_PERM_READ;
2930 		link_state = mac_link_get(mh);
2931 		bcopy(&link_state, val, sizeof (link_state));
2932 		return (0);
2933 	default:
2934 		break;
2935 	}
2936 	if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) {
2937 		err = mip->mi_callbacks->mc_getprop(mip->mi_driver,
2938 		    macprop->mp_name, macprop->mp_id, macprop->mp_flags,
2939 		    valsize, val, perm);
2940 	}
2941 	return (err);
2942 }
2943 
2944 int
2945 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
2946 {
2947 	mac_impl_t	*mip = (mac_impl_t *)mh;
2948 
2949 	if (sdu_max <= mip->mi_sdu_min)
2950 		return (EINVAL);
2951 	mip->mi_sdu_max = sdu_max;
2952 
2953 	/* Send a MAC_NOTE_SDU_SIZE notification. */
2954 	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
2955 	return (0);
2956 }
2957 
2958 static void
2959 mac_register_priv_prop(mac_impl_t *mip, mac_priv_prop_t *mpp, uint_t nprop)
2960 {
2961 	mac_priv_prop_t *mpriv;
2962 
2963 	if (mpp == NULL)
2964 		return;
2965 
2966 	mpriv = kmem_zalloc(nprop * sizeof (*mpriv), KM_SLEEP);
2967 	(void) memcpy(mpriv, mpp, nprop * sizeof (*mpriv));
2968 	mip->mi_priv_prop = mpriv;
2969 	mip->mi_priv_prop_count = nprop;
2970 }
2971 
2972 static void
2973 mac_unregister_priv_prop(mac_impl_t *mip)
2974 {
2975 	mac_priv_prop_t	*mpriv;
2976 
2977 	mpriv = mip->mi_priv_prop;
2978 	if (mpriv != NULL) {
2979 		kmem_free(mpriv, mip->mi_priv_prop_count * sizeof (*mpriv));
2980 		mip->mi_priv_prop = NULL;
2981 	}
2982 	mip->mi_priv_prop_count = 0;
2983 }
2984