xref: /illumos-gate/usr/src/uts/common/inet/ipnet/ipnet.c (revision 257873cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * The ipnet device defined here provides access to packets at the IP layer. To
29  * provide access to packets at this layer it registers a callback function in
30  * the ip module and when there are open instances of the device ip will pass
31  * packets into the device. Packets from ip are passed on the input, output and
32  * loopback paths. Internally the module returns to ip as soon as possible by
33  * deferring processing using a taskq.
34  *
35  * Management of the devices in /dev/ipnet/ is handled by the devname
36  * filesystem and use of the neti interfaces.  This module registers for NIC
37  * events using the neti framework so that when IP interfaces are bought up,
38  * taken down etc. the ipnet module is notified and its view of the interfaces
39  * configured on the system adjusted.  On attach, the module gets an initial
40  * view of the system again using the neti framework but as it has already
41  * registered for IP interface events, it is still up-to-date with any changes.
42  */
43 
44 #include <sys/types.h>
45 #include <sys/conf.h>
46 #include <sys/cred.h>
47 #include <sys/stat.h>
48 #include <sys/ddi.h>
49 #include <sys/sunddi.h>
50 #include <sys/modctl.h>
51 #include <sys/dlpi.h>
52 #include <sys/strsun.h>
53 #include <sys/id_space.h>
54 #include <sys/kmem.h>
55 #include <sys/mkdev.h>
56 #include <sys/neti.h>
57 #include <net/if.h>
58 #include <sys/errno.h>
59 #include <sys/list.h>
60 #include <sys/ksynch.h>
61 #include <sys/hook_event.h>
62 #include <sys/stropts.h>
63 #include <sys/sysmacros.h>
64 #include <inet/ip.h>
65 #include <inet/ip_multi.h>
66 #include <inet/ip6.h>
67 #include <inet/ipnet.h>
68 
69 static struct module_info ipnet_minfo = {
70 	1,		/* mi_idnum */
71 	"ipnet",	/* mi_idname */
72 	0,		/* mi_minpsz */
73 	INFPSZ,		/* mi_maxpsz */
74 	2048,		/* mi_hiwat */
75 	0		/* mi_lowat */
76 };
77 
78 /*
79  * List to hold static view of ipnetif_t's on the system. This is needed to
80  * avoid holding the lock protecting the avl tree of ipnetif's over the
81  * callback into the dev filesystem.
82  */
83 typedef struct ipnetif_cbdata {
84 	char		ic_ifname[LIFNAMSIZ];
85 	dev_t		ic_dev;
86 	list_node_t	ic_next;
87 } ipnetif_cbdata_t;
88 
89 /*
90  * Convenience enumerated type for ipnet_accept().  It describes the
91  * properties of a given ipnet_addrp_t relative to a single ipnet_t
92  * client stream.  The values represent whether the address is ...
93  */
94 typedef enum {
95 	IPNETADDR_MYADDR,	/* an address on my ipnetif_t. */
96 	IPNETADDR_MBCAST,	/* a multicast or broadcast address. */
97 	IPNETADDR_UNKNOWN	/* none of the above. */
98 } ipnet_addrtype_t;
99 
100 /* Argument used for the ipnet_nicevent_taskq callback. */
101 typedef struct ipnet_nicevent_s {
102 	nic_event_t		ipne_event;
103 	net_handle_t		ipne_protocol;
104 	netstackid_t		ipne_stackid;
105 	uint64_t		ipne_ifindex;
106 	uint64_t		ipne_lifindex;
107 	char			ipne_ifname[LIFNAMSIZ];
108 } ipnet_nicevent_t;
109 
110 static dev_info_t	*ipnet_dip;
111 static major_t		ipnet_major;
112 static ddi_taskq_t	*ipnet_taskq;		/* taskq for packets */
113 static ddi_taskq_t	*ipnet_nicevent_taskq;	/* taskq for NIC events */
114 static id_space_t	*ipnet_minor_space;
115 static const int	IPNET_MINOR_LO = 1; 	/* minor number for /dev/lo0 */
116 static const int 	IPNET_MINOR_MIN = 2; 	/* start of dynamic minors */
117 static dl_info_ack_t	ipnet_infoack = IPNET_INFO_ACK_INIT;
118 static ipnet_acceptfn_t	ipnet_accept, ipnet_loaccept;
119 
120 static void	ipnet_input(mblk_t *);
121 static int	ipnet_wput(queue_t *, mblk_t *);
122 static int	ipnet_rsrv(queue_t *);
123 static int	ipnet_open(queue_t *, dev_t *, int, int, cred_t *);
124 static int	ipnet_close(queue_t *);
125 static void	ipnet_ioctl(queue_t *, mblk_t *);
126 static void	ipnet_iocdata(queue_t *, mblk_t *);
127 static void 	ipnet_wputnondata(queue_t *, mblk_t *);
128 static int	ipnet_attach(dev_info_t *, ddi_attach_cmd_t);
129 static int	ipnet_detach(dev_info_t *, ddi_detach_cmd_t);
130 static int	ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
131 static void	ipnet_inforeq(queue_t *q, mblk_t *mp);
132 static void	ipnet_bindreq(queue_t *q, mblk_t *mp);
133 static void	ipnet_unbindreq(queue_t *q, mblk_t *mp);
134 static void	ipnet_dlpromisconreq(queue_t *q, mblk_t *mp);
135 static void	ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp);
136 static int	ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *);
137 static void	ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *);
138 static int	ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *);
139 static void	ipnet_nicevent_task(void *);
140 static ipnetif_t *ipnet_create_if(const char *, uint64_t, ipnet_stack_t *);
141 static void	ipnet_remove_if(ipnetif_t *, ipnet_stack_t *);
142 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t);
143 static ipnetif_t *ipnet_if_getby_index(uint64_t, ipnet_stack_t *);
144 static ipnetif_t *ipnet_if_getby_dev(dev_t, ipnet_stack_t *);
145 static boolean_t ipnet_if_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
146 static void	ipnet_if_zonecheck(ipnetif_t *, ipnet_stack_t *);
147 static int	ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t);
148 static int 	ipnet_if_compare_name(const void *, const void *);
149 static int 	ipnet_if_compare_index(const void *, const void *);
150 static void	ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t);
151 static void	ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t);
152 static void	ipnetif_refhold(ipnetif_t *);
153 static void	ipnetif_refrele(ipnetif_t *);
154 static void	ipnet_walkers_inc(ipnet_stack_t *);
155 static void	ipnet_walkers_dec(ipnet_stack_t *);
156 static void	ipnet_register_netihook(ipnet_stack_t *);
157 static void	*ipnet_stack_init(netstackid_t, netstack_t *);
158 static void	ipnet_stack_fini(netstackid_t, void *);
159 
160 static struct qinit ipnet_rinit = {
161 	NULL,		/* qi_putp */
162 	ipnet_rsrv,	/* qi_srvp */
163 	ipnet_open,	/* qi_qopen */
164 	ipnet_close,	/* qi_qclose */
165 	NULL,		/* qi_qadmin */
166 	&ipnet_minfo,	/* qi_minfo */
167 };
168 
169 static struct qinit ipnet_winit = {
170 	ipnet_wput,	/* qi_putp */
171 	NULL,		/* qi_srvp */
172 	NULL,		/* qi_qopen */
173 	NULL,		/* qi_qclose */
174 	NULL,		/* qi_qadmin */
175 	&ipnet_minfo,	/* qi_minfo */
176 };
177 
178 static struct streamtab ipnet_info = {
179 	&ipnet_rinit, &ipnet_winit
180 };
181 
182 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach,
183     ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info,
184     ddi_quiesce_not_supported);
185 
186 static struct modldrv modldrv = {
187 	&mod_driverops,
188 	"STREAMS ipnet driver",
189 	&ipnet_ops
190 };
191 
192 static struct modlinkage modlinkage = {
193 	MODREV_1, &modldrv, NULL
194 };
195 
196 /*
197  * Walk the list of physical interfaces on the machine, for each
198  * interface create a new ipnetif_t and add any addresses to it. We
199  * need to do the walk twice, once for IPv4 and once for IPv6.
200  *
201  * The interfaces are destroyed as part of ipnet_stack_fini() for each
202  * stack.  Note that we cannot do this initialization in
203  * ipnet_stack_init(), since ipnet_stack_init() cannot fail.
204  */
205 static int
206 ipnet_if_init(void)
207 {
208 	netstack_handle_t	nh;
209 	netstack_t		*ns;
210 	ipnet_stack_t		*ips;
211 	int			ret = 0;
212 
213 	netstack_next_init(&nh);
214 	while ((ns = netstack_next(&nh)) != NULL) {
215 		ips = ns->netstack_ipnet;
216 		if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) != 0)
217 			break;
218 		if ((ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE)) != 0)
219 			break;
220 	}
221 	netstack_next_fini(&nh);
222 	return (ret);
223 }
224 
225 /*
226  * Standard module entry points.
227  */
228 int
229 _init(void)
230 {
231 	int	ret;
232 
233 	if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1)
234 		return (ENODEV);
235 	ipnet_minor_space = id_space_create("ipnet_minor_space",
236 	    IPNET_MINOR_MIN, MAXMIN32);
237 	netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini);
238 	/*
239 	 * We call ddi_taskq_create() with nthread == 1 to ensure in-order
240 	 * delivery of packets to clients.
241 	 */
242 	ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0);
243 	ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue",
244 	    1, TASKQ_DEFAULTPRI, 0);
245 	if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) {
246 		ret = ENOMEM;
247 		goto done;
248 	}
249 	if ((ret = ipnet_if_init()) == 0)
250 		ret = mod_install(&modlinkage);
251 done:
252 	if (ret != 0) {
253 		if (ipnet_taskq != NULL)
254 			ddi_taskq_destroy(ipnet_taskq);
255 		if (ipnet_nicevent_taskq != NULL)
256 			ddi_taskq_destroy(ipnet_nicevent_taskq);
257 		netstack_unregister(NS_IPNET);
258 		id_space_destroy(ipnet_minor_space);
259 	}
260 	return (ret);
261 }
262 
263 int
264 _fini(void)
265 {
266 	int err;
267 
268 	if ((err = mod_remove(&modlinkage)) != 0)
269 		return (err);
270 	ddi_taskq_destroy(ipnet_nicevent_taskq);
271 	ddi_taskq_destroy(ipnet_taskq);
272 	netstack_unregister(NS_IPNET);
273 	id_space_destroy(ipnet_minor_space);
274 	return (0);
275 }
276 
277 int
278 _info(struct modinfo *modinfop)
279 {
280 	return (mod_info(&modlinkage, modinfop));
281 }
282 
283 static void
284 ipnet_register_netihook(ipnet_stack_t *ips)
285 {
286 	int		ret;
287 	netstackid_t	stackid = ips->ips_netstack->netstack_stackid;
288 
289 	HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents",
290 	    ips);
291 
292 	/*
293 	 * The ipnet device depends on ip and is registered in the netstack
294 	 * framework after ip so the call to net_lookup_impl() cannot fail.
295 	 */
296 	ips->ips_ndv4 = net_protocol_lookup(stackid, NHF_INET);
297 	ips->ips_ndv6 = net_protocol_lookup(stackid, NHF_INET6);
298 
299 	ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS,
300 	    ips->ips_nicevents);
301 	if (ret != 0) {
302 		cmn_err(CE_WARN, "ipnet_register_netihook: net_register_hook() "
303 		    "failed for v4 stack instance %d: %d", stackid, ret);
304 	}
305 	ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS,
306 	    ips->ips_nicevents);
307 	if (ret != 0) {
308 		cmn_err(CE_WARN, "ipnet_register_netihook: net_register_hook() "
309 		    "failed for v6 stack instance %d: %d", stackid, ret);
310 	}
311 }
312 
313 /*
314  * This function is called on attach to build an initial view of the
315  * interfaces on the system. It will be called once for IPv4 and once
316  * for IPv6, although there is only one ipnet interface for both IPv4
317  * and IPv6 there are separate address lists.
318  */
319 static int
320 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6)
321 {
322 	phy_if_t		phyif;
323 	lif_if_t		lif;
324 	ipnetif_t		*ipnetif;
325 	char			name[LIFNAMSIZ];
326 	boolean_t		new_if = B_FALSE;
327 	uint64_t		ifflags;
328 	int			ret = 0;
329 
330 	/*
331 	 * Make sure we're not processing NIC events during the
332 	 * population of our interfaces and address lists.
333 	 */
334 	mutex_enter(&ips->ips_event_lock);
335 
336 	for (phyif = net_phygetnext(nd, 0); phyif != 0;
337 	    phyif = net_phygetnext(nd, phyif)) {
338 		if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0)
339 			continue;
340 		if ((ipnetif = ipnet_if_getby_index(phyif, ips)) == NULL) {
341 			ipnetif = ipnet_create_if(name, phyif, ips);
342 			if (ipnetif == NULL) {
343 				ret = ENOMEM;
344 				goto done;
345 			}
346 			new_if = B_TRUE;
347 		}
348 		ipnetif->if_flags |=
349 		    isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
350 
351 		for (lif = net_lifgetnext(nd, phyif, 0); lif != 0;
352 		    lif = net_lifgetnext(nd, phyif, lif)) {
353 			/*
354 			 * Skip addresses that aren't up.  We'll add
355 			 * them when we receive an NE_LIF_UP event.
356 			 */
357 			if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 ||
358 			    !(ifflags & IFF_UP))
359 				continue;
360 			/* Don't add it if we already have it. */
361 			if (ipnet_match_lif(ipnetif, lif, isv6) != NULL)
362 				continue;
363 			ipnet_add_ifaddr(lif, ipnetif, nd);
364 		}
365 		if (!new_if)
366 			ipnetif_refrele(ipnetif);
367 	}
368 
369 done:
370 	mutex_exit(&ips->ips_event_lock);
371 	return (ret);
372 }
373 
374 static int
375 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
376 {
377 	if (cmd != DDI_ATTACH)
378 		return (DDI_FAILURE);
379 
380 	if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO,
381 	    DDI_PSEUDO, 0) == DDI_FAILURE)
382 		return (DDI_FAILURE);
383 
384 	ipnet_dip = dip;
385 	return (DDI_SUCCESS);
386 }
387 
388 static int
389 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
390 {
391 	if (cmd != DDI_DETACH)
392 		return (DDI_FAILURE);
393 
394 	ASSERT(dip == ipnet_dip);
395 	ddi_remove_minor_node(ipnet_dip, NULL);
396 	ipnet_dip = NULL;
397 	return (DDI_SUCCESS);
398 }
399 
400 /* ARGSUSED */
401 static int
402 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
403 {
404 	int error = DDI_FAILURE;
405 
406 	switch (infocmd) {
407 	case DDI_INFO_DEVT2INSTANCE:
408 		*result = (void *)0;
409 		error = DDI_SUCCESS;
410 		break;
411 	case DDI_INFO_DEVT2DEVINFO:
412 		if (ipnet_dip != NULL) {
413 			*result = ipnet_dip;
414 			error = DDI_SUCCESS;
415 		}
416 		break;
417 	}
418 	return (error);
419 }
420 
421 /* ARGSUSED */
422 static int
423 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
424 {
425 	ipnet_t		*ipnet;
426 	netstack_t	*ns = NULL;
427 	ipnet_stack_t	*ips;
428 	int		err = 0;
429 	zoneid_t	zoneid = crgetzoneid(crp);
430 
431 	/*
432 	 * If the system is labeled, only the global zone is allowed to open
433 	 * IP observability nodes.
434 	 */
435 	if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
436 		return (EACCES);
437 
438 	/* We don't support open as a module */
439 	if (sflag & MODOPEN)
440 		return (ENOTSUP);
441 
442 	/* This driver is self-cloning, we don't support re-open. */
443 	if (rq->q_ptr != NULL)
444 		return (EBUSY);
445 
446 	if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL)
447 		return (ENOMEM);
448 
449 	VERIFY((ns = netstack_find_by_cred(crp)) != NULL);
450 	ips = ns->netstack_ipnet;
451 
452 	rq->q_ptr = WR(rq)->q_ptr = ipnet;
453 	ipnet->ipnet_rq = rq;
454 	ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space);
455 	ipnet->ipnet_zoneid = zoneid;
456 	ipnet->ipnet_dlstate = DL_UNBOUND;
457 	ipnet->ipnet_sap = 0;
458 	ipnet->ipnet_ns = ns;
459 
460 	/*
461 	 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need
462 	 * to be processed after ipnet_if is set and the ipnet_t has been
463 	 * inserted in the ips_str_list.
464 	 */
465 	mutex_enter(&ips->ips_event_lock);
466 	if (getminor(*dev) == IPNET_MINOR_LO) {
467 		ipnet->ipnet_flags |= IPNET_LOMODE;
468 		ipnet->ipnet_acceptfn = ipnet_loaccept;
469 	} else {
470 		ipnet->ipnet_acceptfn = ipnet_accept;
471 		ipnet->ipnet_if = ipnet_if_getby_dev(*dev, ips);
472 		if (ipnet->ipnet_if == NULL ||
473 		    !ipnet_if_in_zone(ipnet->ipnet_if, zoneid, ips)) {
474 			err = ENODEV;
475 			goto done;
476 		}
477 	}
478 
479 	mutex_enter(&ips->ips_walkers_lock);
480 	while (ips->ips_walkers_cnt != 0)
481 		cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
482 	list_insert_head(&ips->ips_str_list, ipnet);
483 	*dev = makedevice(getmajor(*dev), ipnet->ipnet_minor);
484 	qprocson(rq);
485 
486 	/*
487 	 * Only register our callback if we're the first open client; we call
488 	 * unregister in close() for the last open client.
489 	 */
490 	if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list))
491 		ipobs_register_hook(ns, ipnet_input);
492 	mutex_exit(&ips->ips_walkers_lock);
493 
494 done:
495 	mutex_exit(&ips->ips_event_lock);
496 	if (err != 0) {
497 		netstack_rele(ns);
498 		id_free(ipnet_minor_space, ipnet->ipnet_minor);
499 		if (ipnet->ipnet_if != NULL)
500 			ipnetif_refrele(ipnet->ipnet_if);
501 		kmem_free(ipnet, sizeof (*ipnet));
502 	}
503 	return (err);
504 }
505 
506 static int
507 ipnet_close(queue_t *rq)
508 {
509 	ipnet_t		*ipnet = rq->q_ptr;
510 	ipnet_stack_t	*ips = ipnet->ipnet_ns->netstack_ipnet;
511 
512 	if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
513 		ipnet_leave_allmulti(ipnet->ipnet_if, ips);
514 	if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
515 		ipnet_leave_allmulti(ipnet->ipnet_if, ips);
516 
517 	mutex_enter(&ips->ips_walkers_lock);
518 	while (ips->ips_walkers_cnt != 0)
519 		cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
520 
521 	qprocsoff(rq);
522 
523 	list_remove(&ips->ips_str_list, ipnet);
524 	if (ipnet->ipnet_if != NULL)
525 		ipnetif_refrele(ipnet->ipnet_if);
526 	id_free(ipnet_minor_space, ipnet->ipnet_minor);
527 	kmem_free(ipnet, sizeof (*ipnet));
528 
529 	if (list_is_empty(&ips->ips_str_list))
530 		ipobs_unregister_hook(ips->ips_netstack, ipnet_input);
531 
532 	mutex_exit(&ips->ips_walkers_lock);
533 	netstack_rele(ips->ips_netstack);
534 	return (0);
535 }
536 
537 static int
538 ipnet_wput(queue_t *q, mblk_t *mp)
539 {
540 	switch (mp->b_datap->db_type) {
541 	case M_FLUSH:
542 		if (*mp->b_rptr & FLUSHW) {
543 			flushq(q, FLUSHDATA);
544 			*mp->b_rptr &= ~FLUSHW;
545 		}
546 		if (*mp->b_rptr & FLUSHR)
547 			qreply(q, mp);
548 		else
549 			freemsg(mp);
550 		break;
551 	case M_PROTO:
552 	case M_PCPROTO:
553 		ipnet_wputnondata(q, mp);
554 		break;
555 	case M_IOCTL:
556 		ipnet_ioctl(q, mp);
557 		break;
558 	case M_IOCDATA:
559 		ipnet_iocdata(q, mp);
560 		break;
561 	default:
562 		freemsg(mp);
563 		break;
564 	}
565 	return (0);
566 }
567 
568 static int
569 ipnet_rsrv(queue_t *q)
570 {
571 	mblk_t *mp;
572 
573 	while ((mp = getq(q)) != NULL) {
574 		ASSERT(DB_TYPE(mp) == M_DATA);
575 		if (canputnext(q)) {
576 			putnext(q, mp);
577 		} else {
578 			(void) putbq(q, mp);
579 			break;
580 		}
581 	}
582 	return (0);
583 }
584 
585 static void
586 ipnet_ioctl(queue_t *q, mblk_t *mp)
587 {
588 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
589 
590 	switch (iocp->ioc_cmd) {
591 	case DLIOCRAW:
592 		miocack(q, mp, 0, 0);
593 		break;
594 	case DLIOCIPNETINFO:
595 		if (iocp->ioc_count == TRANSPARENT) {
596 			mcopyin(mp, NULL, sizeof (uint_t), NULL);
597 			qreply(q, mp);
598 			break;
599 		}
600 		/* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */
601 	default:
602 		miocnak(q, mp, 0, EINVAL);
603 		break;
604 	}
605 }
606 
607 static void
608 ipnet_iocdata(queue_t *q, mblk_t *mp)
609 {
610 	struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
611 	ipnet_t		*ipnet = q->q_ptr;
612 
613 	switch (iocp->ioc_cmd) {
614 	case DLIOCIPNETINFO:
615 		if (*(int *)mp->b_cont->b_rptr == 1)
616 			ipnet->ipnet_flags |= IPNET_INFO;
617 		else if (*(int *)mp->b_cont->b_rptr == 0)
618 			ipnet->ipnet_flags &= ~IPNET_INFO;
619 		else
620 			goto iocnak;
621 		miocack(q, mp, 0, DL_IPNETINFO_VERSION);
622 		break;
623 	default:
624 	iocnak:
625 		miocnak(q, mp, 0, EINVAL);
626 		break;
627 	}
628 }
629 
630 static void
631 ipnet_wputnondata(queue_t *q, mblk_t *mp)
632 {
633 	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
634 	t_uscalar_t		prim = dlp->dl_primitive;
635 
636 	switch (prim) {
637 	case DL_INFO_REQ:
638 		ipnet_inforeq(q, mp);
639 		break;
640 	case DL_UNBIND_REQ:
641 		ipnet_unbindreq(q, mp);
642 		break;
643 	case DL_BIND_REQ:
644 		ipnet_bindreq(q, mp);
645 		break;
646 	case DL_PROMISCON_REQ:
647 		ipnet_dlpromisconreq(q, mp);
648 		break;
649 	case DL_PROMISCOFF_REQ:
650 		ipnet_dlpromiscoffreq(q, mp);
651 		break;
652 	case DL_UNITDATA_REQ:
653 	case DL_DETACH_REQ:
654 	case DL_PHYS_ADDR_REQ:
655 	case DL_SET_PHYS_ADDR_REQ:
656 	case DL_ENABMULTI_REQ:
657 	case DL_DISABMULTI_REQ:
658 	case DL_ATTACH_REQ:
659 		dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0);
660 		break;
661 	default:
662 		dlerrorack(q, mp, prim, DL_BADPRIM, 0);
663 		break;
664 	}
665 }
666 
667 static void
668 ipnet_inforeq(queue_t *q, mblk_t *mp)
669 {
670 	dl_info_ack_t	*dlip;
671 	size_t		size = sizeof (dl_info_ack_t) + sizeof (ushort_t);
672 
673 	if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
674 		dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0);
675 		return;
676 	}
677 
678 	if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL)
679 		return;
680 
681 	dlip = (dl_info_ack_t *)mp->b_rptr;
682 	*dlip = ipnet_infoack;
683 	qreply(q, mp);
684 }
685 
686 static void
687 ipnet_bindreq(queue_t *q, mblk_t *mp)
688 {
689 	union   DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
690 	int32_t sap;
691 	ipnet_t	*ipnet = q->q_ptr;
692 
693 	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
694 		dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0);
695 		return;
696 	}
697 
698 	sap = dlp->bind_req.dl_sap;
699 	if (sap != IPV4_VERSION && sap != IPV6_VERSION && sap != 0) {
700 		dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0);
701 	} else {
702 		ipnet->ipnet_sap = sap;
703 		ipnet->ipnet_dlstate = DL_IDLE;
704 		dlbindack(q, mp, sap, 0, 0, 0, 0);
705 	}
706 }
707 
708 static void
709 ipnet_unbindreq(queue_t *q, mblk_t *mp)
710 {
711 	ipnet_t	*ipnet = q->q_ptr;
712 
713 	if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
714 		dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
715 		return;
716 	}
717 
718 	if (ipnet->ipnet_dlstate != DL_IDLE) {
719 		dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
720 	} else {
721 		ipnet->ipnet_dlstate = DL_UNBOUND;
722 		ipnet->ipnet_sap = 0;
723 		dlokack(q, mp, DL_UNBIND_REQ);
724 	}
725 }
726 
727 static void
728 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp)
729 {
730 	ipnet_t		*ipnet = q->q_ptr;
731 	t_uscalar_t	level;
732 	int		err;
733 
734 	if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
735 		dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
736 		return;
737 	}
738 
739 	if (ipnet->ipnet_flags & IPNET_LOMODE) {
740 		dlokack(q, mp, DL_PROMISCON_REQ);
741 		return;
742 	}
743 
744 	level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
745 	if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
746 		if ((err = ipnet_join_allmulti(ipnet->ipnet_if,
747 		    ipnet->ipnet_ns->netstack_ipnet)) != 0) {
748 			dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err);
749 			return;
750 		}
751 	}
752 
753 	switch (level) {
754 	case DL_PROMISC_PHYS:
755 		ipnet->ipnet_flags |= IPNET_PROMISC_PHYS;
756 		break;
757 	case DL_PROMISC_SAP:
758 		ipnet->ipnet_flags |= IPNET_PROMISC_SAP;
759 		break;
760 	case DL_PROMISC_MULTI:
761 		ipnet->ipnet_flags |= IPNET_PROMISC_MULTI;
762 		break;
763 	default:
764 		dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
765 		return;
766 	}
767 
768 	dlokack(q, mp, DL_PROMISCON_REQ);
769 }
770 
771 static void
772 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp)
773 {
774 	ipnet_t		*ipnet = q->q_ptr;
775 	t_uscalar_t	level;
776 	uint16_t	orig_ipnet_flags = ipnet->ipnet_flags;
777 
778 	if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
779 		dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
780 		return;
781 	}
782 
783 	if (ipnet->ipnet_flags & IPNET_LOMODE) {
784 		dlokack(q, mp, DL_PROMISCOFF_REQ);
785 		return;
786 	}
787 
788 	level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
789 	switch (level) {
790 	case DL_PROMISC_PHYS:
791 		if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
792 			ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS;
793 		break;
794 	case DL_PROMISC_SAP:
795 		if (ipnet->ipnet_flags & IPNET_PROMISC_SAP)
796 			ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP;
797 		break;
798 	case DL_PROMISC_MULTI:
799 		if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
800 			ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI;
801 		break;
802 	default:
803 		dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
804 		return;
805 	}
806 
807 	if (orig_ipnet_flags == ipnet->ipnet_flags) {
808 		dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
809 		return;
810 	}
811 
812 	if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
813 		ipnet_leave_allmulti(ipnet->ipnet_if,
814 		    ipnet->ipnet_ns->netstack_ipnet);
815 	}
816 
817 	dlokack(q, mp, DL_PROMISCOFF_REQ);
818 }
819 
820 static int
821 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
822 {
823 	int		err = 0;
824 	ip_stack_t	*ipst = ips->ips_netstack->netstack_ip;
825 	uint64_t	index = ipnetif->if_index;
826 
827 	mutex_enter(&ips->ips_event_lock);
828 	if (ipnetif->if_multicnt == 0) {
829 		ASSERT((ipnetif->if_flags &
830 		    (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0);
831 		if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) {
832 			err = ip_join_allmulti(index, B_FALSE, ipst);
833 			if (err != 0)
834 				goto done;
835 			ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI;
836 		}
837 		if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) {
838 			err = ip_join_allmulti(index, B_TRUE, ipst);
839 			if (err != 0 &&
840 			    (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) {
841 				(void) ip_leave_allmulti(index, B_FALSE, ipst);
842 				ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
843 				goto done;
844 			}
845 			ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI;
846 		}
847 	}
848 	ipnetif->if_multicnt++;
849 
850 done:
851 	mutex_exit(&ips->ips_event_lock);
852 	return (err);
853 }
854 
855 static void
856 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
857 {
858 	int		err;
859 	ip_stack_t	*ipst = ips->ips_netstack->netstack_ip;
860 	uint64_t	index = ipnetif->if_index;
861 
862 	mutex_enter(&ips->ips_event_lock);
863 	ASSERT(ipnetif->if_multicnt != 0);
864 	if (--ipnetif->if_multicnt == 0) {
865 		if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) {
866 			err = ip_leave_allmulti(index, B_FALSE, ipst);
867 			ASSERT(err == 0 || err == ENODEV);
868 			ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
869 		}
870 		if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) {
871 			err = ip_leave_allmulti(index, B_TRUE, ipst);
872 			ASSERT(err == 0 || err == ENODEV);
873 			ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI;
874 		}
875 	}
876 	mutex_exit(&ips->ips_event_lock);
877 }
878 
879 static mblk_t *
880 ipnet_addheader(ipobs_hook_data_t *ihd, mblk_t *mp)
881 {
882 	mblk_t		*dlhdr;
883 	dl_ipnetinfo_t	*dl;
884 
885 	if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) {
886 		freemsg(mp);
887 		return (NULL);
888 	}
889 	dl = (dl_ipnetinfo_t *)dlhdr->b_rptr;
890 	dl->dli_version = DL_IPNETINFO_VERSION;
891 	dl->dli_len = htons(sizeof (*dl));
892 	dl->dli_ipver = ihd->ihd_ipver;
893 	dl->dli_srczone = BE_64((uint64_t)ihd->ihd_zsrc);
894 	dl->dli_dstzone = BE_64((uint64_t)ihd->ihd_zdst);
895 	dlhdr->b_wptr += sizeof (*dl);
896 	dlhdr->b_cont = mp;
897 
898 	return (dlhdr);
899 }
900 
901 static ipnet_addrtype_t
902 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr)
903 {
904 	list_t			*list;
905 	ipnetif_t		*ipnetif = ipnet->ipnet_if;
906 	ipnetif_addr_t		*ifaddr;
907 	ipnet_addrtype_t	addrtype = IPNETADDR_UNKNOWN;
908 
909 	/* First check if the address is multicast or limited broadcast. */
910 	switch (addr->iap_family) {
911 	case AF_INET:
912 		if (CLASSD(*(addr->iap_addr4)) ||
913 		    *(addr->iap_addr4) == INADDR_BROADCAST)
914 			return (IPNETADDR_MBCAST);
915 		break;
916 	case AF_INET6:
917 		if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6))
918 			return (IPNETADDR_MBCAST);
919 		break;
920 	}
921 
922 	/*
923 	 * Walk the address list to see if the address belongs to our
924 	 * interface or is one of our subnet broadcast addresses.
925 	 */
926 	mutex_enter(&ipnetif->if_addr_lock);
927 	list = (addr->iap_family == AF_INET) ?
928 	    &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list;
929 	for (ifaddr = list_head(list);
930 	    ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN;
931 	    ifaddr = list_next(list, ifaddr)) {
932 		/*
933 		 * If we're not in the global zone, then only look at
934 		 * addresses in our zone.
935 		 */
936 		if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
937 		    ipnet->ipnet_zoneid != ifaddr->ifa_zone)
938 			continue;
939 		switch (addr->iap_family) {
940 		case AF_INET:
941 			if (ifaddr->ifa_ip4addr != INADDR_ANY &&
942 			    *(addr->iap_addr4) == ifaddr->ifa_ip4addr)
943 				addrtype = IPNETADDR_MYADDR;
944 			else if (ifaddr->ifa_brdaddr != INADDR_ANY &&
945 			    *(addr->iap_addr4) == ifaddr->ifa_brdaddr)
946 				addrtype = IPNETADDR_MBCAST;
947 			break;
948 		case AF_INET6:
949 			if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6,
950 			    &ifaddr->ifa_ip6addr))
951 				addrtype = IPNETADDR_MYADDR;
952 			break;
953 		}
954 	}
955 	mutex_exit(&ipnetif->if_addr_lock);
956 
957 	return (addrtype);
958 }
959 
960 /*
961  * Verify if the packet contained in ihd should be passed up to the
962  * ipnet client stream.
963  */
964 static boolean_t
965 ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
966     ipnet_addrp_t *dst)
967 {
968 	uint64_t		ifindex = ipnet->ipnet_if->if_index;
969 	ipnet_addrtype_t	srctype, dsttype;
970 
971 	srctype = ipnet_get_addrtype(ipnet, src);
972 	dsttype = ipnet_get_addrtype(ipnet, dst);
973 
974 	/*
975 	 * Do not allow an ipnet stream to see packets that are not from or to
976 	 * its zone.  The exception is when zones are using the shared stack
977 	 * model.  In this case, streams in the global zone have visibility
978 	 * into other shared-stack zones, and broadcast and multicast traffic
979 	 * is visible by all zones in the stack.
980 	 */
981 	if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
982 	    dsttype != IPNETADDR_MBCAST) {
983 		if (ipnet->ipnet_zoneid != ihd->ihd_zsrc &&
984 		    ipnet->ipnet_zoneid != ihd->ihd_zdst)
985 			return (B_FALSE);
986 	}
987 
988 	/*
989 	 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the
990 	 * packet's IP version.
991 	 */
992 	if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) &&
993 	    ipnet->ipnet_sap != ihd->ihd_ipver)
994 		return (B_FALSE);
995 
996 	/* If the destination address is ours, then accept the packet. */
997 	if (dsttype == IPNETADDR_MYADDR)
998 		return (B_TRUE);
999 
1000 	/*
1001 	 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are
1002 	 * sent or received on the interface we're observing, or packets that
1003 	 * have our source address (this allows us to see packets we send).
1004 	 */
1005 	if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) {
1006 		if (ihd->ihd_ifindex == ifindex || srctype == IPNETADDR_MYADDR)
1007 			return (B_TRUE);
1008 	}
1009 
1010 	/*
1011 	 * We accept multicast and broadcast packets transmitted or received
1012 	 * on the interface we're observing.
1013 	 */
1014 	if (dsttype == IPNETADDR_MBCAST && ihd->ihd_ifindex == ifindex)
1015 		return (B_TRUE);
1016 
1017 	return (B_FALSE);
1018 }
1019 
1020 /*
1021  * Verify if the packet contained in ihd should be passed up to the ipnet
1022  * client stream that's in IPNET_LOMODE.
1023  */
1024 /* ARGSUSED */
1025 static boolean_t
1026 ipnet_loaccept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
1027     ipnet_addrp_t *dst)
1028 {
1029 	if (ihd->ihd_htype != IPOBS_HOOK_LOCAL)
1030 		return (B_FALSE);
1031 
1032 	/*
1033 	 * An ipnet stream must not see packets that are not from/to its zone.
1034 	 */
1035 	if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) {
1036 		if (ipnet->ipnet_zoneid != ihd->ihd_zsrc &&
1037 		    ipnet->ipnet_zoneid != ihd->ihd_zdst)
1038 			return (B_FALSE);
1039 	}
1040 
1041 	return (ipnet->ipnet_sap == 0 || ipnet->ipnet_sap == ihd->ihd_ipver);
1042 }
1043 
1044 static void
1045 ipnet_dispatch(void *arg)
1046 {
1047 	mblk_t			*mp = arg;
1048 	ipobs_hook_data_t	*ihd = (ipobs_hook_data_t *)mp->b_rptr;
1049 	ipnet_t			*ipnet;
1050 	mblk_t			*netmp;
1051 	list_t			*list;
1052 	ipnet_stack_t		*ips = ihd->ihd_stack->netstack_ipnet;
1053 	ipnet_addrp_t		src, dst;
1054 
1055 	if (ihd->ihd_ipver == IPV4_VERSION) {
1056 		src.iap_family = dst.iap_family = AF_INET;
1057 		src.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_src;
1058 		dst.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_dst;
1059 	} else {
1060 		src.iap_family = dst.iap_family = AF_INET6;
1061 		src.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_src;
1062 		dst.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_dst;
1063 	}
1064 
1065 	ipnet_walkers_inc(ips);
1066 
1067 	list = &ips->ips_str_list;
1068 	for (ipnet = list_head(list); ipnet != NULL;
1069 	    ipnet = list_next(list, ipnet)) {
1070 		if (!(*ipnet->ipnet_acceptfn)(ipnet, ihd, &src, &dst))
1071 			continue;
1072 
1073 		if (list_next(list, ipnet) == NULL) {
1074 			netmp = ihd->ihd_mp;
1075 			ihd->ihd_mp = NULL;
1076 		} else {
1077 			if ((netmp = dupmsg(ihd->ihd_mp)) == NULL &&
1078 			    (netmp = copymsg(ihd->ihd_mp)) == NULL) {
1079 				atomic_inc_64(&ips->ips_drops);
1080 				continue;
1081 			}
1082 		}
1083 
1084 		if (ipnet->ipnet_flags & IPNET_INFO) {
1085 			if ((netmp = ipnet_addheader(ihd, netmp)) == NULL) {
1086 				atomic_inc_64(&ips->ips_drops);
1087 				continue;
1088 			}
1089 		}
1090 
1091 		if (ipnet->ipnet_rq->q_first == NULL &&
1092 		    canputnext(ipnet->ipnet_rq)) {
1093 			putnext(ipnet->ipnet_rq, netmp);
1094 		} else if (canput(ipnet->ipnet_rq)) {
1095 			(void) putq(ipnet->ipnet_rq, netmp);
1096 		} else {
1097 			freemsg(netmp);
1098 			atomic_inc_64(&ips->ips_drops);
1099 		}
1100 	}
1101 
1102 	ipnet_walkers_dec(ips);
1103 
1104 	freemsg(ihd->ihd_mp);
1105 	freemsg(mp);
1106 }
1107 
1108 static void
1109 ipnet_input(mblk_t *mp)
1110 {
1111 	ipobs_hook_data_t  *ihd = (ipobs_hook_data_t *)mp->b_rptr;
1112 
1113 	if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) !=
1114 	    DDI_SUCCESS) {
1115 		atomic_inc_64(&ihd->ihd_stack->netstack_ipnet->ips_drops);
1116 		freemsg(ihd->ihd_mp);
1117 		freemsg(mp);
1118 	}
1119 }
1120 
1121 /*
1122  * Create a new ipnetif_t and new minor node for it.  If creation is
1123  * successful the new ipnetif_t is inserted into an avl_tree
1124  * containing ipnetif's for this stack instance.
1125  */
1126 static ipnetif_t *
1127 ipnet_create_if(const char *name, uint64_t index, ipnet_stack_t *ips)
1128 {
1129 	ipnetif_t	*ipnetif;
1130 	avl_index_t	where = 0;
1131 	minor_t		ifminor;
1132 
1133 	/*
1134 	 * Because ipnet_create_if() can be called from a NIC event
1135 	 * callback, it should not block.
1136 	 */
1137 	ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space);
1138 	if (ifminor == (minor_t)-1)
1139 		return (NULL);
1140 	if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) {
1141 		id_free(ipnet_minor_space, ifminor);
1142 		return (NULL);
1143 	}
1144 
1145 	(void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ);
1146 	ipnetif->if_index = index;
1147 
1148 	mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0);
1149 	list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t),
1150 	    offsetof(ipnetif_addr_t, ifa_link));
1151 	list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t),
1152 	    offsetof(ipnetif_addr_t, ifa_link));
1153 	ipnetif->if_dev = makedevice(ipnet_major, ifminor);
1154 	mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0);
1155 	ipnetif->if_refcnt = 1;
1156 
1157 	mutex_enter(&ips->ips_avl_lock);
1158 	VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL);
1159 	avl_insert(&ips->ips_avl_by_index, ipnetif, where);
1160 	VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL);
1161 	avl_insert(&ips->ips_avl_by_name, ipnetif, where);
1162 	mutex_exit(&ips->ips_avl_lock);
1163 
1164 	return (ipnetif);
1165 }
1166 
1167 static void
1168 ipnet_remove_if(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1169 {
1170 	ipnet_t	*ipnet;
1171 
1172 	ipnet_walkers_inc(ips);
1173 	/* Send a SIGHUP to all open streams associated with this ipnetif. */
1174 	for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL;
1175 	    ipnet = list_next(&ips->ips_str_list, ipnet)) {
1176 		if (ipnet->ipnet_if == ipnetif)
1177 			(void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1178 	}
1179 	ipnet_walkers_dec(ips);
1180 	mutex_enter(&ips->ips_avl_lock);
1181 	avl_remove(&ips->ips_avl_by_index, ipnetif);
1182 	avl_remove(&ips->ips_avl_by_name, ipnetif);
1183 	mutex_exit(&ips->ips_avl_lock);
1184 	/* Release the reference we implicitly held in ipnet_create_if(). */
1185 	ipnetif_refrele(ipnetif);
1186 }
1187 
1188 static void
1189 ipnet_purge_addrlist(list_t *addrlist)
1190 {
1191 	ipnetif_addr_t *ifa;
1192 
1193 	while ((ifa = list_head(addrlist)) != NULL) {
1194 		list_remove(addrlist, ifa);
1195 		kmem_free(ifa, sizeof (*ifa));
1196 	}
1197 }
1198 
1199 static void
1200 ipnet_free_if(ipnetif_t *ipnetif)
1201 {
1202 	ASSERT(ipnetif->if_refcnt == 0);
1203 
1204 	/* Remove IPv4/v6 address lists from the ipnetif */
1205 	ipnet_purge_addrlist(&ipnetif->if_ip4addr_list);
1206 	list_destroy(&ipnetif->if_ip4addr_list);
1207 	ipnet_purge_addrlist(&ipnetif->if_ip6addr_list);
1208 	list_destroy(&ipnetif->if_ip6addr_list);
1209 	mutex_destroy(&ipnetif->if_addr_lock);
1210 	mutex_destroy(&ipnetif->if_reflock);
1211 	id_free(ipnet_minor_space, getminor(ipnetif->if_dev));
1212 	kmem_free(ipnetif, sizeof (*ipnetif));
1213 }
1214 
1215 /*
1216  * Create an ipnetif_addr_t with the given logical interface id (lif)
1217  * and add it to the supplied ipnetif.  The lif is the netinfo
1218  * representation of logical interface id, and we use this id to match
1219  * incoming netinfo events against our lists of addresses.
1220  */
1221 static void
1222 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd)
1223 {
1224 	ipnetif_addr_t		*ifaddr;
1225 	zoneid_t		zoneid;
1226 	struct sockaddr_in	bcast;
1227 	struct sockaddr_storage	addr;
1228 	net_ifaddr_t		type = NA_ADDRESS;
1229 	uint64_t		phyif = ipnetif->if_index;
1230 
1231 	if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 ||
1232 	    net_getlifzone(nd, phyif, lif, &zoneid) != 0)
1233 		return;
1234 	if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL)
1235 		return;
1236 
1237 	ifaddr->ifa_zone = zoneid;
1238 	ifaddr->ifa_id = lif;
1239 
1240 	switch (addr.ss_family) {
1241 	case AF_INET:
1242 		ifaddr->ifa_ip4addr =
1243 		    ((struct sockaddr_in *)&addr)->sin_addr.s_addr;
1244 		/*
1245 		 * Try and get the broadcast address.  Note that it's okay for
1246 		 * an interface to not have a broadcast address, so we don't
1247 		 * fail the entire operation if net_getlifaddr() fails here.
1248 		 */
1249 		type = NA_BROADCAST;
1250 		if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0)
1251 			ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr;
1252 		break;
1253 	case AF_INET6:
1254 		ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr;
1255 		break;
1256 	}
1257 
1258 	mutex_enter(&ipnetif->if_addr_lock);
1259 	list_insert_tail(addr.ss_family == AF_INET ?
1260 	    &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr);
1261 	mutex_exit(&ipnetif->if_addr_lock);
1262 }
1263 
1264 static void
1265 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6)
1266 {
1267 	mutex_enter(&ipnetif->if_addr_lock);
1268 	list_remove(isv6 ?
1269 	    &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr);
1270 	mutex_exit(&ipnetif->if_addr_lock);
1271 	kmem_free(ifaddr, sizeof (*ifaddr));
1272 }
1273 
1274 static void
1275 ipnet_plumb_ev(uint64_t ifindex, const char *ifname, ipnet_stack_t *ips,
1276     boolean_t isv6)
1277 {
1278 	ipnetif_t	*ipnetif;
1279 	boolean_t	refrele_needed = B_TRUE;
1280 
1281 	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) {
1282 		ipnetif = ipnet_create_if(ifname, ifindex, ips);
1283 		refrele_needed = B_FALSE;
1284 	}
1285 	if (ipnetif != NULL) {
1286 		ipnetif->if_flags |=
1287 		    isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
1288 	}
1289 
1290 	if (ipnetif->if_multicnt != 0) {
1291 		if (ip_join_allmulti(ifindex, isv6,
1292 		    ips->ips_netstack->netstack_ip) == 0) {
1293 			ipnetif->if_flags |=
1294 			    isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI;
1295 		}
1296 	}
1297 
1298 	if (refrele_needed)
1299 		ipnetif_refrele(ipnetif);
1300 }
1301 
1302 static void
1303 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6)
1304 {
1305 	ipnetif_t	*ipnetif;
1306 
1307 	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
1308 		return;
1309 
1310 	mutex_enter(&ipnetif->if_addr_lock);
1311 	ipnet_purge_addrlist(isv6 ?
1312 	    &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list);
1313 	mutex_exit(&ipnetif->if_addr_lock);
1314 
1315 	/*
1316 	 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive
1317 	 * separate NE_UNPLUMB events for IPv4 and IPv6.  We remove the ipnetif
1318 	 * if both IPv4 and IPv6 interfaces have been unplumbed.
1319 	 */
1320 	ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED;
1321 	if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED)))
1322 		ipnet_remove_if(ipnetif, ips);
1323 	ipnetif_refrele(ipnetif);
1324 }
1325 
1326 static void
1327 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd,
1328     ipnet_stack_t *ips, boolean_t isv6)
1329 {
1330 	ipnetif_t	*ipnetif;
1331 	ipnetif_addr_t	*ifaddr;
1332 
1333 	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
1334 		return;
1335 	if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) {
1336 		/*
1337 		 * We must have missed a NE_LIF_DOWN event.  Delete this
1338 		 * ifaddr and re-create it.
1339 		 */
1340 		ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1341 	}
1342 
1343 	ipnet_add_ifaddr(lifindex, ipnetif, nd);
1344 	ipnetif_refrele(ipnetif);
1345 }
1346 
1347 static void
1348 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips,
1349     boolean_t isv6)
1350 {
1351 	ipnetif_t	*ipnetif;
1352 	ipnetif_addr_t	*ifaddr;
1353 
1354 	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
1355 		return;
1356 	if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL)
1357 		ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1358 	ipnetif_refrele(ipnetif);
1359 	/*
1360 	 * Make sure that open streams on this ipnetif are still allowed to
1361 	 * have it open.
1362 	 */
1363 	ipnet_if_zonecheck(ipnetif, ips);
1364 }
1365 
1366 /*
1367  * This callback from the NIC event framework dispatches a taskq as the event
1368  * handlers may block.
1369  */
1370 /* ARGSUSED */
1371 static int
1372 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg)
1373 {
1374 	ipnet_stack_t		*ips = arg;
1375 	hook_nic_event_t	*hn = (hook_nic_event_t *)info;
1376 	ipnet_nicevent_t	*ipne;
1377 
1378 	if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL)
1379 		return (0);
1380 	ipne->ipne_event = hn->hne_event;
1381 	ipne->ipne_protocol = hn->hne_protocol;
1382 	ipne->ipne_stackid = ips->ips_netstack->netstack_stackid;
1383 	ipne->ipne_ifindex = hn->hne_nic;
1384 	ipne->ipne_lifindex = hn->hne_lif;
1385 	if (hn->hne_datalen != 0) {
1386 		(void) strlcpy(ipne->ipne_ifname, hn->hne_data,
1387 		    sizeof (ipne->ipne_ifname));
1388 	}
1389 	(void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task,
1390 	    ipne, DDI_NOSLEEP);
1391 	return (0);
1392 }
1393 
1394 static void
1395 ipnet_nicevent_task(void *arg)
1396 {
1397 	ipnet_nicevent_t	*ipne = arg;
1398 	netstack_t		*ns;
1399 	ipnet_stack_t		*ips;
1400 	boolean_t		isv6;
1401 
1402 	if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL)
1403 		goto done;
1404 	ips = ns->netstack_ipnet;
1405 	isv6 = (ipne->ipne_protocol == ips->ips_ndv6);
1406 
1407 	mutex_enter(&ips->ips_event_lock);
1408 	switch (ipne->ipne_event) {
1409 	case NE_PLUMB:
1410 		ipnet_plumb_ev(ipne->ipne_ifindex, ipne->ipne_ifname, ips,
1411 		    isv6);
1412 		break;
1413 	case NE_UNPLUMB:
1414 		ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6);
1415 		break;
1416 	case NE_LIF_UP:
1417 		ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex,
1418 		    ipne->ipne_protocol, ips, isv6);
1419 		break;
1420 	case NE_LIF_DOWN:
1421 		ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips,
1422 		    isv6);
1423 		break;
1424 	default:
1425 		break;
1426 	}
1427 	mutex_exit(&ips->ips_event_lock);
1428 done:
1429 	if (ns != NULL)
1430 		netstack_rele(ns);
1431 	kmem_free(ipne, sizeof (ipnet_nicevent_t));
1432 }
1433 
1434 dev_t
1435 ipnet_if_getdev(char *name, zoneid_t zoneid)
1436 {
1437 	netstack_t	*ns;
1438 	ipnet_stack_t	*ips;
1439 	ipnetif_t	*ipnetif;
1440 	dev_t		dev = (dev_t)-1;
1441 
1442 	if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1443 		return (dev);
1444 	if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1445 		return (dev);
1446 
1447 	ips = ns->netstack_ipnet;
1448 	mutex_enter(&ips->ips_avl_lock);
1449 	if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) {
1450 		if (ipnet_if_in_zone(ipnetif, zoneid, ips))
1451 			dev = ipnetif->if_dev;
1452 	}
1453 	mutex_exit(&ips->ips_avl_lock);
1454 	netstack_rele(ns);
1455 
1456 	return (dev);
1457 }
1458 
1459 static ipnetif_t *
1460 ipnet_if_getby_index(uint64_t id, ipnet_stack_t *ips)
1461 {
1462 	ipnetif_t	*ipnetif;
1463 
1464 	mutex_enter(&ips->ips_avl_lock);
1465 	if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL)
1466 		ipnetif_refhold(ipnetif);
1467 	mutex_exit(&ips->ips_avl_lock);
1468 	return (ipnetif);
1469 }
1470 
1471 static ipnetif_t *
1472 ipnet_if_getby_dev(dev_t dev, ipnet_stack_t *ips)
1473 {
1474 	ipnetif_t	*ipnetif;
1475 	avl_tree_t	*tree;
1476 
1477 	mutex_enter(&ips->ips_avl_lock);
1478 	tree = &ips->ips_avl_by_index;
1479 	for (ipnetif = avl_first(tree); ipnetif != NULL;
1480 	    ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) {
1481 		if (ipnetif->if_dev == dev) {
1482 			ipnetif_refhold(ipnetif);
1483 			break;
1484 		}
1485 	}
1486 	mutex_exit(&ips->ips_avl_lock);
1487 	return (ipnetif);
1488 }
1489 
1490 static ipnetif_addr_t *
1491 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6)
1492 {
1493 	ipnetif_addr_t	*ifaddr;
1494 	list_t		*list;
1495 
1496 	mutex_enter(&ipnetif->if_addr_lock);
1497 	list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list;
1498 	for (ifaddr = list_head(list); ifaddr != NULL;
1499 	    ifaddr = list_next(list, ifaddr)) {
1500 		if (lid == ifaddr->ifa_id)
1501 			break;
1502 	}
1503 	mutex_exit(&ipnetif->if_addr_lock);
1504 	return (ifaddr);
1505 }
1506 
1507 /* ARGSUSED */
1508 static void *
1509 ipnet_stack_init(netstackid_t stackid, netstack_t *ns)
1510 {
1511 	ipnet_stack_t	*ips;
1512 
1513 	ips = kmem_zalloc(sizeof (*ips), KM_SLEEP);
1514 	ips->ips_netstack = ns;
1515 	mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0);
1516 	avl_create(&ips->ips_avl_by_index, ipnet_if_compare_index,
1517 	    sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index));
1518 	avl_create(&ips->ips_avl_by_name, ipnet_if_compare_name,
1519 	    sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name));
1520 	mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL);
1521 	cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL);
1522 	list_create(&ips->ips_str_list, sizeof (ipnet_t),
1523 	    offsetof(ipnet_t, ipnet_next));
1524 	ipnet_register_netihook(ips);
1525 	return (ips);
1526 }
1527 
1528 /* ARGSUSED */
1529 static void
1530 ipnet_stack_fini(netstackid_t stackid, void *arg)
1531 {
1532 	ipnet_stack_t	*ips = arg;
1533 	ipnetif_t	*ipnetif, *nipnetif;
1534 
1535 	if (ips->ips_ndv4 != NULL) {
1536 		VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS,
1537 		    ips->ips_nicevents) == 0);
1538 		VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
1539 	}
1540 	if (ips->ips_ndv6 != NULL) {
1541 		VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS,
1542 		    ips->ips_nicevents) == 0);
1543 		VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
1544 	}
1545 	hook_free(ips->ips_nicevents);
1546 
1547 	for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1548 	    ipnetif = nipnetif) {
1549 		nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif);
1550 		ipnet_remove_if(ipnetif, ips);
1551 	}
1552 	avl_destroy(&ips->ips_avl_by_index);
1553 	avl_destroy(&ips->ips_avl_by_name);
1554 	mutex_destroy(&ips->ips_avl_lock);
1555 	mutex_destroy(&ips->ips_walkers_lock);
1556 	cv_destroy(&ips->ips_walkers_cv);
1557 	list_destroy(&ips->ips_str_list);
1558 	kmem_free(ips, sizeof (*ips));
1559 }
1560 
1561 /* Do any of the addresses in addrlist belong the supplied zoneid? */
1562 static boolean_t
1563 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid)
1564 {
1565 	ipnetif_addr_t *ifa;
1566 
1567 	for (ifa = list_head(addrlist); ifa != NULL;
1568 	    ifa = list_next(addrlist, ifa)) {
1569 		if (ifa->ifa_zone == zoneid)
1570 			return (B_TRUE);
1571 	}
1572 	return (B_FALSE);
1573 }
1574 
1575 /* Should the supplied ipnetif be visible from the supplied zoneid? */
1576 static boolean_t
1577 ipnet_if_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
1578 {
1579 	int ret;
1580 
1581 	/*
1582 	 * The global zone has visibility into all interfaces in the global
1583 	 * stack, and exclusive stack zones have visibility into all
1584 	 * interfaces in their stack.
1585 	 */
1586 	if (zoneid == GLOBAL_ZONEID ||
1587 	    ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
1588 		return (B_TRUE);
1589 
1590 	/*
1591 	 * Shared-stack zones only have visibility for interfaces that have
1592 	 * addresses in their zone.
1593 	 */
1594 	mutex_enter(&ipnetif->if_addr_lock);
1595 	ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) ||
1596 	    ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid);
1597 	mutex_exit(&ipnetif->if_addr_lock);
1598 	return (ret);
1599 }
1600 
1601 /*
1602  * Verify that any ipnet_t that has a reference to the supplied ipnetif should
1603  * still be allowed to have it open.  A given ipnet_t may no longer be allowed
1604  * to have an ipnetif open if there are no longer any addresses that belong to
1605  * the ipnetif in the ipnet_t's non-global shared-stack zoneid.  If that's the
1606  * case, send the ipnet_t an M_HANGUP.
1607  */
1608 static void
1609 ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1610 {
1611 	list_t	*strlist = &ips->ips_str_list;
1612 	ipnet_t	*ipnet;
1613 
1614 	ipnet_walkers_inc(ips);
1615 	for (ipnet = list_head(strlist); ipnet != NULL;
1616 	    ipnet = list_next(strlist, ipnet)) {
1617 		if (ipnet->ipnet_if != ipnetif)
1618 			continue;
1619 		if (!ipnet_if_in_zone(ipnetif, ipnet->ipnet_zoneid, ips))
1620 			(void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1621 	}
1622 	ipnet_walkers_dec(ips);
1623 }
1624 
1625 void
1626 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid)
1627 {
1628 	ipnetif_t 		*ipnetif;
1629 	list_t			cbdata;
1630 	ipnetif_cbdata_t	*cbnode;
1631 	netstack_t		*ns;
1632 	ipnet_stack_t		*ips;
1633 
1634 	/*
1635 	 * On labeled systems, non-global zones shouldn't see anything
1636 	 * in /dev/ipnet.
1637 	 */
1638 	if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1639 		return;
1640 
1641 	if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1642 		return;
1643 
1644 	ips = ns->netstack_ipnet;
1645 	list_create(&cbdata, sizeof (ipnetif_cbdata_t),
1646 	    offsetof(ipnetif_cbdata_t, ic_next));
1647 
1648 	mutex_enter(&ips->ips_avl_lock);
1649 	for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1650 	    ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) {
1651 		if (!ipnet_if_in_zone(ipnetif, zoneid, ips))
1652 			continue;
1653 		cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP);
1654 		(void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ);
1655 		cbnode->ic_dev = ipnetif->if_dev;
1656 		list_insert_head(&cbdata, cbnode);
1657 	}
1658 	mutex_exit(&ips->ips_avl_lock);
1659 
1660 	while ((cbnode = list_head(&cbdata)) != NULL) {
1661 		cb(cbnode->ic_ifname, arg, cbnode->ic_dev);
1662 		list_remove(&cbdata, cbnode);
1663 		kmem_free(cbnode, sizeof (ipnetif_cbdata_t));
1664 	}
1665 	list_destroy(&cbdata);
1666 	netstack_rele(ns);
1667 }
1668 
1669 static int
1670 ipnet_if_compare_index(const void *index_ptr, const void *ipnetifp)
1671 {
1672 	int64_t index1 = *((int64_t *)index_ptr);
1673 	int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index;
1674 
1675 	return (SIGNOF(index2 - index1));
1676 }
1677 
1678 static int
1679 ipnet_if_compare_name(const void *name_ptr, const void *ipnetifp)
1680 {
1681 	int res;
1682 
1683 	res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr);
1684 	return (SIGNOF(res));
1685 }
1686 
1687 static void
1688 ipnetif_refhold(ipnetif_t *ipnetif)
1689 {
1690 	mutex_enter(&ipnetif->if_reflock);
1691 	ipnetif->if_refcnt++;
1692 	mutex_exit(&ipnetif->if_reflock);
1693 }
1694 
1695 static void
1696 ipnetif_refrele(ipnetif_t *ipnetif)
1697 {
1698 	mutex_enter(&ipnetif->if_reflock);
1699 	ASSERT(ipnetif->if_refcnt != 0);
1700 	if (--ipnetif->if_refcnt == 0)
1701 		ipnet_free_if(ipnetif);
1702 	else
1703 		mutex_exit(&ipnetif->if_reflock);
1704 }
1705 
1706 static void
1707 ipnet_walkers_inc(ipnet_stack_t *ips)
1708 {
1709 	mutex_enter(&ips->ips_walkers_lock);
1710 	ips->ips_walkers_cnt++;
1711 	mutex_exit(&ips->ips_walkers_lock);
1712 }
1713 
1714 static void
1715 ipnet_walkers_dec(ipnet_stack_t *ips)
1716 {
1717 	mutex_enter(&ips->ips_walkers_lock);
1718 	ASSERT(ips->ips_walkers_cnt != 0);
1719 	if (--ips->ips_walkers_cnt == 0)
1720 		cv_broadcast(&ips->ips_walkers_cv);
1721 	mutex_exit(&ips->ips_walkers_lock);
1722 }
1723