xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_multi.c (revision 3db86aab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/dlpi.h>
32 #include <sys/stropts.h>
33 #include <sys/strsun.h>
34 #include <sys/ddi.h>
35 #include <sys/cmn_err.h>
36 #include <sys/zone.h>
37 
38 #include <sys/param.h>
39 #include <sys/socket.h>
40 #include <sys/sockio.h>
41 #include <net/if.h>
42 #include <sys/systm.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
45 #include <net/if_dl.h>
46 #include <netinet/ip6.h>
47 #include <netinet/icmp6.h>
48 
49 #include <inet/common.h>
50 #include <inet/mi.h>
51 #include <inet/nd.h>
52 #include <inet/arp.h>
53 #include <inet/ip.h>
54 #include <inet/ip6.h>
55 #include <inet/ip_if.h>
56 #include <inet/ip_ndp.h>
57 #include <inet/ip_multi.h>
58 #include <inet/ipclassifier.h>
59 #include <inet/ipsec_impl.h>
60 #include <inet/sctp_ip.h>
61 #include <inet/ip_listutils.h>
62 #include <inet/udp_impl.h>
63 
64 /* igmpv3/mldv2 source filter manipulation */
65 static void	ilm_bld_flists(conn_t *conn, void *arg);
66 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
67     slist_t *flist);
68 
69 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
70     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
71     int orig_ifindex, zoneid_t zoneid);
72 static void	ilm_delete(ilm_t *ilm);
73 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
74 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
75 static ilg_t	*ilg_lookup_ill_index_v6(conn_t *connp,
76     const in6_addr_t *v6group, int index);
77 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
78     ipif_t *ipif);
79 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
80     mcast_record_t fmode, ipaddr_t src);
81 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
82     mcast_record_t fmode, const in6_addr_t *v6src);
83 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
84 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
85     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
86 static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
87     uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
88 static void	conn_ilg_reap(conn_t *connp);
89 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
90     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
91 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
92     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
93     const in6_addr_t *v6src);
94 
95 /*
96  * MT notes:
97  *
98  * Multicast joins operate on both the ilg and ilm structures. Multiple
99  * threads operating on an conn (socket) trying to do multicast joins
100  * need to synchronize  when operating on the ilg. Multiple threads
101  * potentially operating on different conn (socket endpoints) trying to
102  * do multicast joins could eventually end up trying to manipulate the
103  * ilm simulatenously and need to synchronize on the access to the ilm.
104  * Both are amenable to standard Solaris MT techniques, but it would be
105  * complex to handle a failover or failback which needs to manipulate
106  * ilg/ilms if an applications can also simultaenously join/leave
107  * multicast groups. Hence multicast join/leave also go through the ipsq_t
108  * serialization.
109  *
110  * Multicast joins and leaves are single-threaded per phyint/IPMP group
111  * using the ipsq serialization mechanism.
112  *
113  * An ilm is an IP data structure used to track multicast join/leave.
114  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
115  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
116  * referencing the ilm. ilms are created / destroyed only as writer. ilms
117  * are not passed around, instead they are looked up and used under the
118  * ill_lock or as writer. So we don't need a dynamic refcount of the number
119  * of threads holding reference to an ilm.
120  *
121  * Multicast Join operation:
122  *
123  * The first step is to determine the ipif (v4) or ill (v6) on which
124  * the join operation is to be done. The join is done after becoming
125  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
126  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
127  * Multiple threads can attempt to join simultaneously on different ipif/ill
128  * on the same conn. In this case the ipsq serialization does not help in
129  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
130  * The conn_lock also protects all the ilg_t members.
131  *
132  * Leave operation.
133  *
134  * Similar to the join operation, the first step is to determine the ipif
135  * or ill (v6) on which the leave operation is to be done. The leave operation
136  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
137  * As with join ilg modification is done under the protection of the conn lock.
138  */
139 
140 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
141 	ASSERT(connp != NULL);					\
142 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
143 	    (first_mp), (func), (type), B_TRUE);		\
144 	if ((ipsq) == NULL) {					\
145 		ipif_refrele(ipif);				\
146 		return (EINPROGRESS);				\
147 	}
148 
149 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
150 	ASSERT(connp != NULL);					\
151 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
152 	    (first_mp),	(func), (type), B_TRUE);		\
153 	if ((ipsq) == NULL) {					\
154 		ill_refrele(ill);				\
155 		return (EINPROGRESS);				\
156 	}
157 
158 #define	IPSQ_EXIT(ipsq)	\
159 	if (ipsq != NULL)	\
160 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
161 
162 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
163 
164 #define	ILG_WALKER_RELE(connp)				\
165 	{						\
166 		(connp)->conn_ilg_walker_cnt--;		\
167 		if ((connp)->conn_ilg_walker_cnt == 0)	\
168 			conn_ilg_reap(connp);		\
169 	}
170 
171 static void
172 conn_ilg_reap(conn_t *connp)
173 {
174 	int	to;
175 	int	from;
176 
177 	ASSERT(MUTEX_HELD(&connp->conn_lock));
178 
179 	to = 0;
180 	from = 0;
181 	while (from < connp->conn_ilg_inuse) {
182 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
183 			FREE_SLIST(connp->conn_ilg[from].ilg_filter);
184 			from++;
185 			continue;
186 		}
187 		if (to != from)
188 			connp->conn_ilg[to] = connp->conn_ilg[from];
189 		to++;
190 		from++;
191 	}
192 
193 	connp->conn_ilg_inuse = to;
194 
195 	if (connp->conn_ilg_inuse == 0) {
196 		mi_free((char *)connp->conn_ilg);
197 		connp->conn_ilg = NULL;
198 		cv_broadcast(&connp->conn_refcv);
199 	}
200 }
201 
202 #define	GETSTRUCT(structure, number)	\
203 	((structure *)mi_zalloc(sizeof (structure) * (number)))
204 
205 #define	ILG_ALLOC_CHUNK	16
206 
207 /*
208  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
209  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
210  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
211  * returned ilg).  Returns NULL on failure (ENOMEM).
212  *
213  * Assumes connp->conn_lock is held.
214  */
215 static ilg_t *
216 conn_ilg_alloc(conn_t *connp)
217 {
218 	ilg_t *new;
219 	int curcnt;
220 
221 	ASSERT(MUTEX_HELD(&connp->conn_lock));
222 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
223 
224 	if (connp->conn_ilg == NULL) {
225 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
226 		if (connp->conn_ilg == NULL)
227 			return (NULL);
228 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
229 		connp->conn_ilg_inuse = 0;
230 	}
231 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
232 		curcnt = connp->conn_ilg_allocated;
233 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
234 		if (new == NULL)
235 			return (NULL);
236 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
237 		mi_free((char *)connp->conn_ilg);
238 		connp->conn_ilg = new;
239 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
240 	}
241 
242 	return (&connp->conn_ilg[connp->conn_ilg_inuse++]);
243 }
244 
245 typedef struct ilm_fbld_s {
246 	ilm_t		*fbld_ilm;
247 	int		fbld_in_cnt;
248 	int		fbld_ex_cnt;
249 	slist_t		fbld_in;
250 	slist_t		fbld_ex;
251 	boolean_t	fbld_in_overflow;
252 } ilm_fbld_t;
253 
254 static void
255 ilm_bld_flists(conn_t *conn, void *arg)
256 {
257 	int i;
258 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
259 	ilm_t *ilm = fbld->fbld_ilm;
260 	in6_addr_t *v6group = &ilm->ilm_v6addr;
261 
262 	if (conn->conn_ilg_inuse == 0)
263 		return;
264 
265 	/*
266 	 * Since we can't break out of the ipcl_walk once started, we still
267 	 * have to look at every conn.  But if we've already found one
268 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
269 	 * ilgs--that will be our state.
270 	 */
271 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
272 		return;
273 
274 	/*
275 	 * Check this conn's ilgs to see if any are interested in our
276 	 * ilm (group, interface match).  If so, update the master
277 	 * include and exclude lists we're building in the fbld struct
278 	 * with this ilg's filter info.
279 	 */
280 	mutex_enter(&conn->conn_lock);
281 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
282 		ilg_t *ilg = &conn->conn_ilg[i];
283 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
284 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
285 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
286 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
287 				fbld->fbld_in_cnt++;
288 				if (!fbld->fbld_in_overflow)
289 					l_union_in_a(&fbld->fbld_in,
290 					    ilg->ilg_filter,
291 					    &fbld->fbld_in_overflow);
292 			} else {
293 				fbld->fbld_ex_cnt++;
294 				/*
295 				 * On the first exclude list, don't try to do
296 				 * an intersection, as the master exclude list
297 				 * is intentionally empty.  If the master list
298 				 * is still empty on later iterations, that
299 				 * means we have at least one ilg with an empty
300 				 * exclude list, so that should be reflected
301 				 * when we take the intersection.
302 				 */
303 				if (fbld->fbld_ex_cnt == 1) {
304 					if (ilg->ilg_filter != NULL)
305 						l_copy(ilg->ilg_filter,
306 						    &fbld->fbld_ex);
307 				} else {
308 					l_intersection_in_a(&fbld->fbld_ex,
309 					    ilg->ilg_filter);
310 				}
311 			}
312 			/* there will only be one match, so break now. */
313 			break;
314 		}
315 	}
316 	mutex_exit(&conn->conn_lock);
317 }
318 
319 static void
320 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
321 {
322 	ilm_fbld_t fbld;
323 
324 	fbld.fbld_ilm = ilm;
325 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
326 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
327 	fbld.fbld_in_overflow = B_FALSE;
328 
329 	/* first, construct our master include and exclude lists */
330 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld);
331 
332 	/* now use those master lists to generate the interface filter */
333 
334 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
335 	if (fbld.fbld_in_overflow) {
336 		*fmode = MODE_IS_EXCLUDE;
337 		flist->sl_numsrc = 0;
338 		return;
339 	}
340 
341 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
342 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
343 		*fmode = MODE_IS_INCLUDE;
344 		flist->sl_numsrc = 0;
345 		return;
346 	}
347 
348 	/*
349 	 * If there are no exclude lists, then the interface filter
350 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
351 	 * exclude list makes the interface filter EXCLUDE, with its
352 	 * filter list equal to (fbld_ex - fbld_in).
353 	 */
354 	if (fbld.fbld_ex_cnt == 0) {
355 		*fmode = MODE_IS_INCLUDE;
356 		l_copy(&fbld.fbld_in, flist);
357 	} else {
358 		*fmode = MODE_IS_EXCLUDE;
359 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
360 	}
361 }
362 
363 /*
364  * If the given interface has failed, choose a new one to join on so
365  * that we continue to receive packets.  ilg_orig_ifindex remembers
366  * what the application used to join on so that we know the ilg to
367  * delete even though we change the ill here.  Callers will store the
368  * ilg returned from this function in ilg_ill.  Thus when we receive
369  * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets.
370  *
371  * This function must be called as writer so we can walk the group
372  * list and examine flags without holding a lock.
373  */
374 ill_t *
375 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp)
376 {
377 	ill_t	*till;
378 	ill_group_t *illgrp = ill->ill_group;
379 
380 	ASSERT(IAM_WRITER_ILL(ill));
381 
382 	if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL)
383 		return (ill);
384 
385 	if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0)
386 		return (ill);
387 
388 	till = illgrp->illgrp_ill;
389 	while (till != NULL &&
390 	    (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) {
391 		till = till->ill_group_next;
392 	}
393 	if (till != NULL)
394 		return (till);
395 
396 	return (ill);
397 }
398 
399 static int
400 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
401     boolean_t isv6)
402 {
403 	mcast_record_t fmode;
404 	slist_t *flist;
405 	boolean_t fdefault;
406 	char buf[INET6_ADDRSTRLEN];
407 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
408 
409 	/*
410 	 * There are several cases where the ilm's filter state
411 	 * defaults to (EXCLUDE, NULL):
412 	 *	- we've had previous joins without associated ilgs
413 	 *	- this join has no associated ilg
414 	 *	- the ilg's filter state is (EXCLUDE, NULL)
415 	 */
416 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
417 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
418 
419 	/* attempt mallocs (if needed) before doing anything else */
420 	if ((flist = l_alloc()) == NULL)
421 		return (ENOMEM);
422 	if (!fdefault && ilm->ilm_filter == NULL) {
423 		ilm->ilm_filter = l_alloc();
424 		if (ilm->ilm_filter == NULL) {
425 			l_free(flist);
426 			return (ENOMEM);
427 		}
428 	}
429 
430 	if (ilgstat != ILGSTAT_CHANGE)
431 		ilm->ilm_refcnt++;
432 
433 	if (ilgstat == ILGSTAT_NONE)
434 		ilm->ilm_no_ilg_cnt++;
435 
436 	/*
437 	 * Determine new filter state.  If it's not the default
438 	 * (EXCLUDE, NULL), we must walk the conn list to find
439 	 * any ilgs interested in this group, and re-build the
440 	 * ilm filter.
441 	 */
442 	if (fdefault) {
443 		fmode = MODE_IS_EXCLUDE;
444 		flist->sl_numsrc = 0;
445 	} else {
446 		ilm_gen_filter(ilm, &fmode, flist);
447 	}
448 
449 	/* make sure state actually changed; nothing to do if not. */
450 	if ((ilm->ilm_fmode == fmode) &&
451 	    !lists_are_different(ilm->ilm_filter, flist)) {
452 		l_free(flist);
453 		return (0);
454 	}
455 
456 	/* send the state change report */
457 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) {
458 		if (isv6)
459 			mld_statechange(ilm, fmode, flist);
460 		else
461 			igmp_statechange(ilm, fmode, flist);
462 	}
463 
464 	/* update the ilm state */
465 	ilm->ilm_fmode = fmode;
466 	if (flist->sl_numsrc > 0)
467 		l_copy(flist, ilm->ilm_filter);
468 	else
469 		CLEAR_SLIST(ilm->ilm_filter);
470 
471 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
472 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
473 
474 	l_free(flist);
475 	return (0);
476 }
477 
478 static int
479 ilm_update_del(ilm_t *ilm, boolean_t isv6)
480 {
481 	mcast_record_t fmode;
482 	slist_t *flist;
483 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
484 
485 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
486 	    ilm->ilm_refcnt));
487 
488 	if ((flist = l_alloc()) == NULL)
489 		return (ENOMEM);
490 
491 	/*
492 	 * If present, the ilg in question has already either been
493 	 * updated or removed from our list; so all we need to do
494 	 * now is walk the list to update the ilm filter state.
495 	 *
496 	 * Skip the list walk if we have any no-ilg joins, which
497 	 * cause the filter state to revert to (EXCLUDE, NULL).
498 	 */
499 	if (ilm->ilm_no_ilg_cnt != 0) {
500 		fmode = MODE_IS_EXCLUDE;
501 		flist->sl_numsrc = 0;
502 	} else {
503 		ilm_gen_filter(ilm, &fmode, flist);
504 	}
505 
506 	/* check to see if state needs to be updated */
507 	if ((ilm->ilm_fmode == fmode) &&
508 	    (!lists_are_different(ilm->ilm_filter, flist))) {
509 		l_free(flist);
510 		return (0);
511 	}
512 
513 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) {
514 		if (isv6)
515 			mld_statechange(ilm, fmode, flist);
516 		else
517 			igmp_statechange(ilm, fmode, flist);
518 	}
519 
520 	ilm->ilm_fmode = fmode;
521 	if (flist->sl_numsrc > 0) {
522 		if (ilm->ilm_filter == NULL) {
523 			ilm->ilm_filter = l_alloc();
524 			if (ilm->ilm_filter == NULL) {
525 				char buf[INET6_ADDRSTRLEN];
526 				ip1dbg(("ilm_update_del: failed to alloc ilm "
527 				    "filter; no source filtering for %s on %s",
528 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
529 				    buf, sizeof (buf)), ill->ill_name));
530 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
531 				l_free(flist);
532 				return (0);
533 			}
534 		}
535 		l_copy(flist, ilm->ilm_filter);
536 	} else {
537 		CLEAR_SLIST(ilm->ilm_filter);
538 	}
539 
540 	l_free(flist);
541 	return (0);
542 }
543 
544 /*
545  * INADDR_ANY means all multicast addresses. This is only used
546  * by the multicast router.
547  * INADDR_ANY is stored as IPv6 unspecified addr.
548  */
549 int
550 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
551     mcast_record_t ilg_fmode, slist_t *ilg_flist)
552 {
553 	ill_t	*ill = ipif->ipif_ill;
554 	ilm_t 	*ilm;
555 	in6_addr_t v6group;
556 	int	ret;
557 
558 	ASSERT(IAM_WRITER_IPIF(ipif));
559 
560 	if (!CLASSD(group) && group != INADDR_ANY)
561 		return (EINVAL);
562 
563 	/*
564 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
565 	 */
566 	if (group == INADDR_ANY)
567 		v6group = ipv6_all_zeros;
568 	else
569 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
570 
571 	ilm = ilm_lookup_ipif(ipif, group);
572 	if (ilm != NULL)
573 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
574 
575 	/*
576 	 * ilms are associated with ipifs in IPv4. It moves with the
577 	 * ipif if the ipif moves to a new ill when the interface
578 	 * fails. Thus we really don't check whether the ipif_ill
579 	 * has failed like in IPv6. If it has FAILED the ipif
580 	 * will move (daemon will move it) and hence the ilm, if the
581 	 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs,
582 	 * we continue to receive in the same place even if the
583 	 * interface fails.
584 	 */
585 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
586 	    ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid);
587 	if (ilm == NULL)
588 		return (ENOMEM);
589 
590 	if (group == INADDR_ANY) {
591 		/*
592 		 * Check how many ipif's have members in this group -
593 		 * if more then one we should not tell the driver to join
594 		 * this time
595 		 */
596 		if (ilm_numentries_v6(ill, &v6group) > 1)
597 			return (0);
598 		if (ill->ill_group == NULL)
599 			ret = ip_join_allmulti(ipif);
600 		else
601 			ret = ill_nominate_mcast_rcv(ill->ill_group);
602 		if (ret != 0)
603 			ilm_delete(ilm);
604 		return (ret);
605 	}
606 
607 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
608 		igmp_joingroup(ilm);
609 
610 	if (ilm_numentries_v6(ill, &v6group) > 1)
611 		return (0);
612 
613 	ret = ip_ll_addmulti_v6(ipif, &v6group);
614 	if (ret != 0)
615 		ilm_delete(ilm);
616 	return (ret);
617 }
618 
619 /*
620  * The unspecified address means all multicast addresses.
621  * This is only used by the multicast router.
622  *
623  * ill identifies the interface to join on; it may not match the
624  * interface requested by the application of a failover has taken
625  * place.  orig_ifindex always identifies the interface requested
626  * by the app.
627  *
628  * ilgstat tells us if there's an ilg associated with this join,
629  * and if so, if it's a new ilg or a change to an existing one.
630  * ilg_fmode and ilg_flist give us the current filter state of
631  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
632  */
633 int
634 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
635     zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode,
636     slist_t *ilg_flist)
637 {
638 	ilm_t	*ilm;
639 	int	ret;
640 
641 	ASSERT(IAM_WRITER_ILL(ill));
642 
643 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
644 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
645 		return (EINVAL);
646 	}
647 
648 	/*
649 	 * An ilm is uniquely identified by the tuple of (group, ill,
650 	 * orig_ill).  group is the multicast group address, ill is
651 	 * the interface on which it is currently joined, and orig_ill
652 	 * is the interface on which the application requested the
653 	 * join.  orig_ill and ill are the same unless orig_ill has
654 	 * failed over.
655 	 *
656 	 * Both orig_ill and ill are required, which means we may have
657 	 * 2 ilms on an ill for the same group, but with different
658 	 * orig_ills.  These must be kept separate, so that when failback
659 	 * occurs, the appropriate ilms are moved back to their orig_ill
660 	 * without disrupting memberships on the ill to which they had
661 	 * been moved.
662 	 *
663 	 * In order to track orig_ill, we store orig_ifindex in the
664 	 * ilm and ilg.
665 	 */
666 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
667 	if (ilm != NULL)
668 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
669 
670 	/*
671 	 * We need to remember where the application really wanted
672 	 * to join. This will be used later if we want to failback
673 	 * to the original interface.
674 	 */
675 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
676 	    ilg_flist, orig_ifindex, zoneid);
677 	if (ilm == NULL)
678 		return (ENOMEM);
679 
680 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
681 		/*
682 		 * Check how many ipif's that have members in this group -
683 		 * if more then one we should not tell the driver to join
684 		 * this time
685 		 */
686 		if (ilm_numentries_v6(ill, v6group) > 1)
687 			return (0);
688 		if (ill->ill_group == NULL)
689 			ret = ip_join_allmulti(ill->ill_ipif);
690 		else
691 			ret = ill_nominate_mcast_rcv(ill->ill_group);
692 
693 		if (ret != 0)
694 			ilm_delete(ilm);
695 		return (ret);
696 	}
697 
698 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
699 		mld_joingroup(ilm);
700 
701 	/*
702 	 * If we have more then one we should not tell the driver
703 	 * to join this time.
704 	 */
705 	if (ilm_numentries_v6(ill, v6group) > 1)
706 		return (0);
707 
708 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
709 	if (ret != 0)
710 		ilm_delete(ilm);
711 	return (ret);
712 }
713 
714 /*
715  * Send a multicast request to the driver for enabling multicast reception
716  * for v6groupp address. The caller has already checked whether it is
717  * appropriate to send one or not.
718  */
719 int
720 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
721 {
722 	mblk_t	*mp;
723 	uint32_t addrlen, addroff;
724 	char	group_buf[INET6_ADDRSTRLEN];
725 
726 	ASSERT(IAM_WRITER_ILL(ill));
727 
728 	/*
729 	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
730 	 * on.
731 	 */
732 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
733 	    &addrlen, &addroff);
734 	if (!mp)
735 		return (ENOMEM);
736 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
737 		ipaddr_t v4group;
738 
739 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
740 		/*
741 		 * NOTE!!!
742 		 * The "addroff" passed in here was calculated by
743 		 * ill_create_dl(), and will be used by ill_create_squery()
744 		 * to perform some twisted coding magic. It is the offset
745 		 * into the dl_xxx_req of the hw addr. Here, it will be
746 		 * added to b_wptr - b_rptr to create a magic number that
747 		 * is not an offset into this squery mblk.
748 		 * The actual hardware address will be accessed only in the
749 		 * dl_xxx_req, not in the squery. More importantly,
750 		 * that hardware address can *only* be accessed in this
751 		 * mblk chain by calling mi_offset_param_c(), which uses
752 		 * the magic number in the squery hw offset field to go
753 		 * to the *next* mblk (the dl_xxx_req), subtract the
754 		 * (b_wptr - b_rptr), and find the actual offset into
755 		 * the dl_xxx_req.
756 		 * Any method that depends on using the
757 		 * offset field in the dl_disabmulti_req or squery
758 		 * to find either hardware address will similarly fail.
759 		 *
760 		 * Look in ar_entry_squery() in arp.c to see how this offset
761 		 * is used.
762 		 */
763 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
764 		if (!mp)
765 			return (ENOMEM);
766 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
767 		    inet_ntop(AF_INET6, v6groupp, group_buf,
768 		    sizeof (group_buf)),
769 		    ill->ill_name));
770 		putnext(ill->ill_rq, mp);
771 	} else {
772 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_squery_mp %s on"
773 		    " %s\n",
774 		    inet_ntop(AF_INET6, v6groupp, group_buf,
775 		    sizeof (group_buf)),
776 		    ill->ill_name));
777 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
778 	}
779 	return (0);
780 }
781 
782 /*
783  * Send a multicast request to the driver for enabling multicast
784  * membership for v6group if appropriate.
785  */
786 static int
787 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
788 {
789 	ill_t	*ill = ipif->ipif_ill;
790 
791 	ASSERT(IAM_WRITER_IPIF(ipif));
792 
793 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
794 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
795 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
796 		return (0);	/* Must be IRE_IF_NORESOLVER */
797 	}
798 
799 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
800 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
801 		return (0);
802 	}
803 	if (ill->ill_ipif_up_count == 0) {
804 		/*
805 		 * Nobody there. All multicast addresses will be re-joined
806 		 * when we get the DL_BIND_ACK bringing the interface up.
807 		 */
808 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
809 		return (0);
810 	}
811 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
812 }
813 
814 /*
815  * INADDR_ANY means all multicast addresses. This is only used
816  * by the multicast router.
817  * INADDR_ANY is stored as the IPv6 unspecifed addr.
818  */
819 int
820 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
821 {
822 	ill_t	*ill = ipif->ipif_ill;
823 	ilm_t *ilm;
824 	in6_addr_t v6group;
825 	int	ret;
826 
827 	ASSERT(IAM_WRITER_IPIF(ipif));
828 
829 	if (!CLASSD(group) && group != INADDR_ANY)
830 		return (EINVAL);
831 
832 	/*
833 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
834 	 */
835 	if (group == INADDR_ANY)
836 		v6group = ipv6_all_zeros;
837 	else
838 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
839 
840 	/*
841 	 * Look for a match on the ipif.
842 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
843 	 */
844 	ilm = ilm_lookup_ipif(ipif, group);
845 	if (ilm == NULL)
846 		return (ENOENT);
847 
848 	/* Update counters */
849 	if (no_ilg)
850 		ilm->ilm_no_ilg_cnt--;
851 
852 	if (leaving)
853 		ilm->ilm_refcnt--;
854 
855 	if (ilm->ilm_refcnt > 0)
856 		return (ilm_update_del(ilm, B_FALSE));
857 
858 	if (group == INADDR_ANY) {
859 		ilm_delete(ilm);
860 		/*
861 		 * Check how many ipif's that have members in this group -
862 		 * if there are still some left then don't tell the driver
863 		 * to drop it.
864 		 */
865 		if (ilm_numentries_v6(ill, &v6group) != 0)
866 			return (0);
867 
868 		/*
869 		 * If we never joined, then don't leave.  This can happen
870 		 * if we're in an IPMP group, since only one ill per IPMP
871 		 * group receives all multicast packets.
872 		 */
873 		if (!ill->ill_join_allmulti) {
874 			ASSERT(ill->ill_group != NULL);
875 			return (0);
876 		}
877 
878 		ret = ip_leave_allmulti(ipif);
879 		if (ill->ill_group != NULL)
880 			(void) ill_nominate_mcast_rcv(ill->ill_group);
881 		return (ret);
882 	}
883 
884 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
885 		igmp_leavegroup(ilm);
886 
887 	ilm_delete(ilm);
888 	/*
889 	 * Check how many ipif's that have members in this group -
890 	 * if there are still some left then don't tell the driver
891 	 * to drop it.
892 	 */
893 	if (ilm_numentries_v6(ill, &v6group) != 0)
894 		return (0);
895 	return (ip_ll_delmulti_v6(ipif, &v6group));
896 }
897 
898 /*
899  * The unspecified address means all multicast addresses.
900  * This is only used by the multicast router.
901  */
902 int
903 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
904     zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving)
905 {
906 	ipif_t	*ipif;
907 	ilm_t *ilm;
908 	int	ret;
909 
910 	ASSERT(IAM_WRITER_ILL(ill));
911 
912 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
913 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
914 		return (EINVAL);
915 
916 	/*
917 	 * Look for a match on the ill.
918 	 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex).
919 	 *
920 	 * Similar to ip_addmulti_v6, we should always look using
921 	 * the orig_ifindex.
922 	 *
923 	 * 1) If orig_ifindex is different from ill's ifindex
924 	 *    we should have an ilm with orig_ifindex created in
925 	 *    ip_addmulti_v6. We should delete that here.
926 	 *
927 	 * 2) If orig_ifindex is same as ill's ifindex, we should
928 	 *    not delete the ilm that is temporarily here because of
929 	 *    a FAILOVER. Those ilms will have a ilm_orig_ifindex
930 	 *    different from ill's ifindex.
931 	 *
932 	 * Thus, always lookup using orig_ifindex.
933 	 */
934 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
935 	if (ilm == NULL)
936 		return (ENOENT);
937 
938 	ASSERT(ilm->ilm_ill == ill);
939 
940 	ipif = ill->ill_ipif;
941 
942 	/* Update counters */
943 	if (no_ilg)
944 		ilm->ilm_no_ilg_cnt--;
945 
946 	if (leaving)
947 		ilm->ilm_refcnt--;
948 
949 	if (ilm->ilm_refcnt > 0)
950 		return (ilm_update_del(ilm, B_TRUE));
951 
952 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
953 		ilm_delete(ilm);
954 		/*
955 		 * Check how many ipif's that have members in this group -
956 		 * if there are still some left then don't tell the driver
957 		 * to drop it.
958 		 */
959 		if (ilm_numentries_v6(ill, v6group) != 0)
960 			return (0);
961 
962 		/*
963 		 * If we never joined, then don't leave.  This can happen
964 		 * if we're in an IPMP group, since only one ill per IPMP
965 		 * group receives all multicast packets.
966 		 */
967 		if (!ill->ill_join_allmulti) {
968 			ASSERT(ill->ill_group != NULL);
969 			return (0);
970 		}
971 
972 		ret = ip_leave_allmulti(ipif);
973 		if (ill->ill_group != NULL)
974 			(void) ill_nominate_mcast_rcv(ill->ill_group);
975 		return (ret);
976 	}
977 
978 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
979 		mld_leavegroup(ilm);
980 
981 	ilm_delete(ilm);
982 	/*
983 	 * Check how many ipif's that have members in this group -
984 	 * if there are still some left then don't tell the driver
985 	 * to drop it.
986 	 */
987 	if (ilm_numentries_v6(ill, v6group) != 0)
988 		return (0);
989 	return (ip_ll_delmulti_v6(ipif, v6group));
990 }
991 
992 /*
993  * Send a multicast request to the driver for disabling multicast reception
994  * for v6groupp address. The caller has already checked whether it is
995  * appropriate to send one or not.
996  */
997 int
998 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
999 {
1000 	mblk_t	*mp;
1001 	char	group_buf[INET6_ADDRSTRLEN];
1002 	uint32_t	addrlen, addroff;
1003 
1004 	ASSERT(IAM_WRITER_ILL(ill));
1005 	/*
1006 	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
1007 	 * on.
1008 	 */
1009 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
1010 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
1011 
1012 	if (!mp)
1013 		return (ENOMEM);
1014 
1015 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
1016 		ipaddr_t v4group;
1017 
1018 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
1019 		/*
1020 		 * NOTE!!!
1021 		 * The "addroff" passed in here was calculated by
1022 		 * ill_create_dl(), and will be used by ill_create_squery()
1023 		 * to perform some twisted coding magic. It is the offset
1024 		 * into the dl_xxx_req of the hw addr. Here, it will be
1025 		 * added to b_wptr - b_rptr to create a magic number that
1026 		 * is not an offset into this mblk.
1027 		 *
1028 		 * Please see the comment in ip_ll_send)enabmulti_req()
1029 		 * for a complete explanation.
1030 		 *
1031 		 * Look in ar_entry_squery() in arp.c to see how this offset
1032 		 * is used.
1033 		 */
1034 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
1035 		if (!mp)
1036 			return (ENOMEM);
1037 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
1038 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1039 		    sizeof (group_buf)),
1040 		    ill->ill_name));
1041 		putnext(ill->ill_rq, mp);
1042 	} else {
1043 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_squery_mp %s on"
1044 		    " %s\n",
1045 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1046 		    sizeof (group_buf)),
1047 		    ill->ill_name));
1048 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
1049 	}
1050 	return (0);
1051 }
1052 
1053 /*
1054  * Send a multicast request to the driver for disabling multicast
1055  * membership for v6group if appropriate.
1056  */
1057 static int
1058 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1059 {
1060 	ill_t	*ill = ipif->ipif_ill;
1061 
1062 	ASSERT(IAM_WRITER_IPIF(ipif));
1063 
1064 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1065 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1066 		return (0);	/* Must be IRE_IF_NORESOLVER */
1067 	}
1068 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1069 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1070 		return (0);
1071 	}
1072 	if (ill->ill_ipif_up_count == 0) {
1073 		/*
1074 		 * Nobody there. All multicast addresses will be re-joined
1075 		 * when we get the DL_BIND_ACK bringing the interface up.
1076 		 */
1077 		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1078 		return (0);
1079 	}
1080 	return (ip_ll_send_disabmulti_req(ill, v6group));
1081 }
1082 
1083 /*
1084  * Make the driver pass up all multicast packets
1085  *
1086  * With ill groups, the caller makes sure that there is only
1087  * one ill joining the allmulti group.
1088  */
1089 int
1090 ip_join_allmulti(ipif_t *ipif)
1091 {
1092 	ill_t	*ill = ipif->ipif_ill;
1093 	mblk_t	*mp;
1094 	uint32_t	addrlen, addroff;
1095 
1096 	ASSERT(IAM_WRITER_IPIF(ipif));
1097 
1098 	if (ill->ill_ipif_up_count == 0) {
1099 		/*
1100 		 * Nobody there. All multicast addresses will be re-joined
1101 		 * when we get the DL_BIND_ACK bringing the interface up.
1102 		 */
1103 		return (0);
1104 	}
1105 
1106 	ASSERT(!ill->ill_join_allmulti);
1107 
1108 	/*
1109 	 * Create a DL_PROMISCON_REQ message and send it directly to
1110 	 * the DLPI provider.  We don't need to do this for certain
1111 	 * media types for which we never need to turn promiscuous
1112 	 * mode on.
1113 	 */
1114 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1115 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1116 		mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1117 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1118 		if (mp == NULL)
1119 			return (ENOMEM);
1120 		putnext(ill->ill_wq, mp);
1121 	}
1122 
1123 	mutex_enter(&ill->ill_lock);
1124 	ill->ill_join_allmulti = B_TRUE;
1125 	mutex_exit(&ill->ill_lock);
1126 	return (0);
1127 }
1128 
1129 /*
1130  * Make the driver stop passing up all multicast packets
1131  *
1132  * With ill groups, we need to nominate some other ill as
1133  * this ipif->ipif_ill is leaving the group.
1134  */
1135 int
1136 ip_leave_allmulti(ipif_t *ipif)
1137 {
1138 	ill_t	*ill = ipif->ipif_ill;
1139 	mblk_t	*mp;
1140 	uint32_t	addrlen, addroff;
1141 
1142 	ASSERT(IAM_WRITER_IPIF(ipif));
1143 
1144 	if (ill->ill_ipif_up_count == 0) {
1145 		/*
1146 		 * Nobody there. All multicast addresses will be re-joined
1147 		 * when we get the DL_BIND_ACK bringing the interface up.
1148 		 */
1149 		return (0);
1150 	}
1151 
1152 	ASSERT(ill->ill_join_allmulti);
1153 
1154 	/*
1155 	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1156 	 * the DLPI provider.  We don't need to do this for certain
1157 	 * media types for which we never need to turn promiscuous
1158 	 * mode on.
1159 	 */
1160 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1161 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1162 		mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1163 		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1164 		if (mp == NULL)
1165 			return (ENOMEM);
1166 		putnext(ill->ill_wq, mp);
1167 	}
1168 
1169 	mutex_enter(&ill->ill_lock);
1170 	ill->ill_join_allmulti = B_FALSE;
1171 	mutex_exit(&ill->ill_lock);
1172 	return (0);
1173 }
1174 
1175 /*
1176  * Copy mp_orig and pass it in as a local message.
1177  */
1178 void
1179 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1180     zoneid_t zoneid)
1181 {
1182 	mblk_t	*mp;
1183 	mblk_t	*ipsec_mp;
1184 
1185 	if (DB_TYPE(mp_orig) == M_DATA &&
1186 	    ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) {
1187 		uint_t hdrsz;
1188 
1189 		hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) +
1190 		    sizeof (udpha_t);
1191 		ASSERT(MBLKL(mp_orig) >= hdrsz);
1192 
1193 		if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) &&
1194 		    (mp_orig = dupmsg(mp_orig)) != NULL) {
1195 			bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz);
1196 			mp->b_wptr += hdrsz;
1197 			mp->b_cont = mp_orig;
1198 			mp_orig->b_rptr += hdrsz;
1199 			if (MBLKL(mp_orig) == 0) {
1200 				mp->b_cont = mp_orig->b_cont;
1201 				mp_orig->b_cont = NULL;
1202 				freeb(mp_orig);
1203 			}
1204 		} else if (mp != NULL) {
1205 			freeb(mp);
1206 			mp = NULL;
1207 		}
1208 	} else {
1209 		mp = ip_copymsg(mp_orig);
1210 	}
1211 
1212 	if (mp == NULL)
1213 		return;
1214 	if (DB_TYPE(mp) == M_CTL) {
1215 		ipsec_mp = mp;
1216 		mp = mp->b_cont;
1217 	} else {
1218 		ipsec_mp = mp;
1219 	}
1220 	ip_wput_local(q, ill, (ipha_t *)mp->b_rptr, ipsec_mp, NULL,
1221 	    fanout_flags, zoneid);
1222 }
1223 
1224 static area_t	ip_aresq_template = {
1225 	AR_ENTRY_SQUERY,		/* cmd */
1226 	sizeof (area_t)+IP_ADDR_LEN,	/* name offset */
1227 	sizeof (area_t),	/* name len (filled by ill_arp_alloc) */
1228 	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
1229 	sizeof (area_t),			/* proto addr offset */
1230 	IP_ADDR_LEN,			/* proto addr_length */
1231 	0,				/* proto mask offset */
1232 	/* Rest is initialized when used */
1233 	0,				/* flags */
1234 	0,				/* hw addr offset */
1235 	0,				/* hw addr length */
1236 };
1237 
1238 static mblk_t *
1239 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
1240     uint32_t addroff, mblk_t *mp_tail)
1241 {
1242 	mblk_t	*mp;
1243 	area_t	*area;
1244 
1245 	mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
1246 				(caddr_t)&ipaddr);
1247 	if (!mp) {
1248 		freemsg(mp_tail);
1249 		return (NULL);
1250 	}
1251 	area = (area_t *)mp->b_rptr;
1252 	area->area_hw_addr_length = addrlen;
1253 	area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
1254 	/*
1255 	 * NOTE!
1256 	 *
1257 	 * The area_hw_addr_offset, as can be seen, does not hold the
1258 	 * actual hardware address offset. Rather, it holds the offset
1259 	 * to the hw addr in the dl_xxx_req in mp_tail, modified by
1260 	 * adding (mp->b_wptr - mp->b_rptr). This allows the function
1261 	 * mi_offset_paramc() to find the hardware address in the
1262 	 * *second* mblk (dl_xxx_req), not this mblk.
1263 	 *
1264 	 * Using mi_offset_paramc() is thus the *only* way to access
1265 	 * the dl_xxx_hw address.
1266 	 *
1267 	 * The squery hw address should *not* be accessed.
1268 	 *
1269 	 * See ar_entry_squery() in arp.c for an example of how all this works.
1270 	 */
1271 
1272 	mp->b_cont = mp_tail;
1273 	return (mp);
1274 }
1275 
1276 /*
1277  * Create a dlpi message with room for phys+sap. When we come back in
1278  * ip_wput_ctl() we will strip the sap for those primitives which
1279  * only need a physical address.
1280  */
1281 static mblk_t *
1282 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1283     uint32_t *addr_lenp, uint32_t *addr_offp)
1284 {
1285 	mblk_t	*mp;
1286 	uint32_t	hw_addr_length;
1287 	char		*cp;
1288 	uint32_t	offset;
1289 	uint32_t 	size;
1290 
1291 	*addr_lenp = *addr_offp = 0;
1292 
1293 	hw_addr_length = ill->ill_phys_addr_length;
1294 	if (!hw_addr_length) {
1295 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1296 		return (NULL);
1297 	}
1298 
1299 	size = length;
1300 	switch (dl_primitive) {
1301 	case DL_ENABMULTI_REQ:
1302 	case DL_DISABMULTI_REQ:
1303 		size += hw_addr_length;
1304 		break;
1305 	case DL_PROMISCON_REQ:
1306 	case DL_PROMISCOFF_REQ:
1307 		break;
1308 	default:
1309 		return (NULL);
1310 	}
1311 	mp = allocb(size, BPRI_HI);
1312 	if (!mp)
1313 		return (NULL);
1314 	mp->b_wptr += size;
1315 	mp->b_datap->db_type = M_PROTO;
1316 
1317 	cp = (char *)mp->b_rptr;
1318 	offset = length;
1319 
1320 	switch (dl_primitive) {
1321 	case DL_ENABMULTI_REQ: {
1322 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1323 
1324 		dl->dl_primitive = dl_primitive;
1325 		dl->dl_addr_offset = offset;
1326 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1327 		*addr_offp = offset;
1328 		break;
1329 	}
1330 	case DL_DISABMULTI_REQ: {
1331 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1332 
1333 		dl->dl_primitive = dl_primitive;
1334 		dl->dl_addr_offset = offset;
1335 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1336 		*addr_offp = offset;
1337 		break;
1338 	}
1339 	case DL_PROMISCON_REQ:
1340 	case DL_PROMISCOFF_REQ: {
1341 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1342 
1343 		dl->dl_primitive = dl_primitive;
1344 		dl->dl_level = DL_PROMISC_MULTI;
1345 		break;
1346 	}
1347 	}
1348 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1349 		*addr_lenp, *addr_offp));
1350 	return (mp);
1351 }
1352 
1353 void
1354 ip_wput_ctl(queue_t *q, mblk_t *mp_orig)
1355 {
1356 	ill_t	*ill = (ill_t *)q->q_ptr;
1357 	mblk_t	*mp = mp_orig;
1358 	area_t	*area;
1359 
1360 	/* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */
1361 	if ((mp->b_wptr - mp->b_rptr) < sizeof (area_t) ||
1362 	    mp->b_cont == NULL) {
1363 		putnext(q, mp);
1364 		return;
1365 	}
1366 	area = (area_t *)mp->b_rptr;
1367 	if (area->area_cmd != AR_ENTRY_SQUERY) {
1368 		putnext(q, mp);
1369 		return;
1370 	}
1371 	mp = mp->b_cont;
1372 	/*
1373 	 * Update dl_addr_length and dl_addr_offset for primitives that
1374 	 * have physical addresses as opposed to full saps
1375 	 */
1376 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
1377 	case DL_ENABMULTI_REQ:
1378 		/* Track the state if this is the first enabmulti */
1379 		if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN)
1380 			ill->ill_dlpi_multicast_state = IDMS_INPROGRESS;
1381 		ip1dbg(("ip_wput_ctl: ENABMULTI\n"));
1382 		break;
1383 	case DL_DISABMULTI_REQ:
1384 		ip1dbg(("ip_wput_ctl: DISABMULTI\n"));
1385 		break;
1386 	default:
1387 		ip1dbg(("ip_wput_ctl: default\n"));
1388 		break;
1389 	}
1390 	freeb(mp_orig);
1391 	putnext(q, mp);
1392 }
1393 
1394 /*
1395  * Rejoin any groups which have been explicitly joined by the application (we
1396  * left all explicitly joined groups as part of ill_leave_multicast() prior to
1397  * bringing the interface down).  Note that because groups can be joined and
1398  * left while an interface is down, this may not be the same set of groups
1399  * that we left in ill_leave_multicast().
1400  */
1401 void
1402 ill_recover_multicast(ill_t *ill)
1403 {
1404 	ilm_t	*ilm;
1405 	char    addrbuf[INET6_ADDRSTRLEN];
1406 
1407 	ASSERT(IAM_WRITER_ILL(ill));
1408 
1409 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1410 		/*
1411 		 * Check how many ipif's that have members in this group -
1412 		 * if more then one we make sure that this entry is first
1413 		 * in the list.
1414 		 */
1415 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1416 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1417 			continue;
1418 		ip1dbg(("ill_recover_multicast: %s\n",
1419 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1420 		    sizeof (addrbuf))));
1421 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1422 			if (ill->ill_group == NULL) {
1423 				(void) ip_join_allmulti(ill->ill_ipif);
1424 			} else {
1425 				/*
1426 				 * We don't want to join on this ill,
1427 				 * if somebody else in the group has
1428 				 * already been nominated.
1429 				 */
1430 				(void) ill_nominate_mcast_rcv(ill->ill_group);
1431 			}
1432 		} else {
1433 			(void) ip_ll_addmulti_v6(ill->ill_ipif,
1434 			    &ilm->ilm_v6addr);
1435 		}
1436 	}
1437 }
1438 
1439 /*
1440  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1441  * that were explicitly joined.  Note that both these functions could be
1442  * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ
1443  * and DL_ENABMULTI_REQ messages when an interface is down.
1444  */
1445 void
1446 ill_leave_multicast(ill_t *ill)
1447 {
1448 	ilm_t	*ilm;
1449 	char    addrbuf[INET6_ADDRSTRLEN];
1450 
1451 	ASSERT(IAM_WRITER_ILL(ill));
1452 
1453 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1454 		/*
1455 		 * Check how many ipif's that have members in this group -
1456 		 * if more then one we make sure that this entry is first
1457 		 * in the list.
1458 		 */
1459 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1460 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1461 			continue;
1462 		ip1dbg(("ill_leave_multicast: %s\n",
1463 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1464 		    sizeof (addrbuf))));
1465 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1466 			(void) ip_leave_allmulti(ill->ill_ipif);
1467 			/*
1468 			 * If we were part of an IPMP group, then
1469 			 * ill_handoff_responsibility() has already
1470 			 * nominated a new member (so we don't).
1471 			 */
1472 			ASSERT(ill->ill_group == NULL);
1473 		} else {
1474 			(void) ip_ll_send_disabmulti_req(ill, &ilm->ilm_v6addr);
1475 		}
1476 	}
1477 }
1478 
1479 /*
1480  * Find an ilm for matching the ill and which has the source in its
1481  * INCLUDE list or does not have it in its EXCLUDE list
1482  */
1483 ilm_t *
1484 ilm_lookup_ill_withsrc(ill_t *ill, ipaddr_t group, ipaddr_t src)
1485 {
1486 	in6_addr_t	v6group, v6src;
1487 
1488 	/*
1489 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
1490 	 */
1491 	if (group == INADDR_ANY)
1492 		v6group = ipv6_all_zeros;
1493 	else
1494 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1495 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
1496 
1497 	return (ilm_lookup_ill_withsrc_v6(ill, &v6group, &v6src));
1498 }
1499 
1500 ilm_t *
1501 ilm_lookup_ill_withsrc_v6(ill_t *ill, const in6_addr_t *v6group,
1502     const in6_addr_t *v6src)
1503 {
1504 	ilm_t	*ilm;
1505 	boolean_t isinlist;
1506 	int	i, numsrc;
1507 
1508 	/*
1509 	 * If the source is in any ilm's INCLUDE list, or if
1510 	 * it is not in any ilm's EXCLUDE list, we have a hit.
1511 	 */
1512 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1513 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1514 
1515 			isinlist = B_FALSE;
1516 			numsrc = (ilm->ilm_filter == NULL) ?
1517 			    0 : ilm->ilm_filter->sl_numsrc;
1518 			for (i = 0; i < numsrc; i++) {
1519 				if (IN6_ARE_ADDR_EQUAL(v6src,
1520 				    &ilm->ilm_filter->sl_addr[i])) {
1521 					isinlist = B_TRUE;
1522 					break;
1523 				}
1524 			}
1525 			if ((isinlist && ilm->ilm_fmode == MODE_IS_INCLUDE) ||
1526 			    (!isinlist && ilm->ilm_fmode == MODE_IS_EXCLUDE))
1527 				return (ilm);
1528 			else
1529 				return (NULL);
1530 		}
1531 	}
1532 	return (NULL);
1533 }
1534 
1535 
1536 /* Find an ilm for matching the ill */
1537 ilm_t *
1538 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1539 {
1540 	in6_addr_t	v6group;
1541 
1542 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1543 	    IAM_WRITER_ILL(ill));
1544 	/*
1545 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1546 	 */
1547 	if (group == INADDR_ANY)
1548 		v6group = ipv6_all_zeros;
1549 	else
1550 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1551 
1552 	return (ilm_lookup_ill_v6(ill, &v6group, zoneid));
1553 }
1554 
1555 /*
1556  * Find an ilm for matching the ill. All the ilm lookup functions
1557  * ignore ILM_DELETED ilms. These have been logically deleted, and
1558  * igmp and linklayer disable multicast have been done. Only mi_free
1559  * yet to be done. Still there in the list due to ilm_walkers. The
1560  * last walker will release it.
1561  */
1562 ilm_t *
1563 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1564 {
1565 	ilm_t	*ilm;
1566 
1567 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1568 	    IAM_WRITER_ILL(ill));
1569 
1570 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1571 		if (ilm->ilm_flags & ILM_DELETED)
1572 			continue;
1573 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1574 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid))
1575 			return (ilm);
1576 	}
1577 	return (NULL);
1578 }
1579 
1580 ilm_t *
1581 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index,
1582     zoneid_t zoneid)
1583 {
1584 	ilm_t *ilm;
1585 
1586 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1587 	    IAM_WRITER_ILL(ill));
1588 
1589 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1590 		if (ilm->ilm_flags & ILM_DELETED)
1591 			continue;
1592 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1593 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) &&
1594 		    ilm->ilm_orig_ifindex == index) {
1595 			return (ilm);
1596 		}
1597 	}
1598 	return (NULL);
1599 }
1600 
1601 ilm_t *
1602 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid)
1603 {
1604 	in6_addr_t	v6group;
1605 
1606 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1607 	    IAM_WRITER_ILL(ill));
1608 	/*
1609 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1610 	 */
1611 	if (group == INADDR_ANY)
1612 		v6group = ipv6_all_zeros;
1613 	else
1614 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1615 
1616 	return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid));
1617 }
1618 
1619 /*
1620  * Found an ilm for the ipif. Only needed for IPv4 which does
1621  * ipif specific socket options.
1622  */
1623 ilm_t *
1624 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1625 {
1626 	ill_t	*ill = ipif->ipif_ill;
1627 	ilm_t	*ilm;
1628 	in6_addr_t	v6group;
1629 
1630 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1631 	    IAM_WRITER_ILL(ill));
1632 
1633 	/*
1634 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1635 	 */
1636 	if (group == INADDR_ANY)
1637 		v6group = ipv6_all_zeros;
1638 	else
1639 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1640 
1641 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1642 		if (ilm->ilm_flags & ILM_DELETED)
1643 			continue;
1644 		if (ilm->ilm_ipif == ipif &&
1645 		    IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
1646 			return (ilm);
1647 	}
1648 	return (NULL);
1649 }
1650 
1651 /*
1652  * How many members on this ill?
1653  */
1654 int
1655 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1656 {
1657 	ilm_t	*ilm;
1658 	int i = 0;
1659 
1660 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1661 	    IAM_WRITER_ILL(ill));
1662 
1663 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1664 		if (ilm->ilm_flags & ILM_DELETED)
1665 			continue;
1666 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1667 			i++;
1668 		}
1669 	}
1670 	return (i);
1671 }
1672 
1673 /* Caller guarantees that the group is not already on the list */
1674 static ilm_t *
1675 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1676     mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex,
1677     zoneid_t zoneid)
1678 {
1679 	ill_t	*ill = ipif->ipif_ill;
1680 	ilm_t	*ilm;
1681 	ilm_t	*ilm_cur;
1682 	ilm_t	**ilm_ptpn;
1683 
1684 	ASSERT(IAM_WRITER_IPIF(ipif));
1685 
1686 	ilm = GETSTRUCT(ilm_t, 1);
1687 	if (ilm == NULL)
1688 		return (NULL);
1689 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1690 		ilm->ilm_filter = l_alloc();
1691 		if (ilm->ilm_filter == NULL) {
1692 			mi_free(ilm);
1693 			return (NULL);
1694 		}
1695 	}
1696 	ilm->ilm_v6addr = *v6group;
1697 	ilm->ilm_refcnt = 1;
1698 	ilm->ilm_zoneid = zoneid;
1699 	ilm->ilm_timer = INFINITY;
1700 	ilm->ilm_rtx.rtx_timer = INFINITY;
1701 
1702 	/*
1703 	 * IPv4 Multicast groups are joined using ipif.
1704 	 * IPv6 Multicast groups are joined using ill.
1705 	 */
1706 	if (ill->ill_isv6) {
1707 		ilm->ilm_ill = ill;
1708 		ilm->ilm_ipif = NULL;
1709 	} else {
1710 		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1711 		ilm->ilm_ipif = ipif;
1712 		ilm->ilm_ill = NULL;
1713 	}
1714 	/*
1715 	 * After this if ilm moves to a new ill, we don't change
1716 	 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex,
1717 	 * it has been moved. Indexes don't match even when the application
1718 	 * wants to join on a FAILED/INACTIVE interface because we choose
1719 	 * a new interface to join in. This is considered as an implicit
1720 	 * move.
1721 	 */
1722 	ilm->ilm_orig_ifindex = orig_ifindex;
1723 
1724 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1725 	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1726 
1727 	/*
1728 	 * Grab lock to give consistent view to readers
1729 	 */
1730 	mutex_enter(&ill->ill_lock);
1731 	/*
1732 	 * All ilms in the same zone are contiguous in the ill_ilm list.
1733 	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1734 	 * sending duplicates up when two applications in the same zone join the
1735 	 * same group on different logical interfaces.
1736 	 */
1737 	ilm_cur = ill->ill_ilm;
1738 	ilm_ptpn = &ill->ill_ilm;
1739 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1740 		ilm_ptpn = &ilm_cur->ilm_next;
1741 		ilm_cur = ilm_cur->ilm_next;
1742 	}
1743 	ilm->ilm_next = ilm_cur;
1744 	*ilm_ptpn = ilm;
1745 
1746 	/*
1747 	 * If we have an associated ilg, use its filter state; if not,
1748 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1749 	 */
1750 	if (ilgstat != ILGSTAT_NONE) {
1751 		if (!SLIST_IS_EMPTY(ilg_flist))
1752 			l_copy(ilg_flist, ilm->ilm_filter);
1753 		ilm->ilm_fmode = ilg_fmode;
1754 	} else {
1755 		ilm->ilm_no_ilg_cnt = 1;
1756 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1757 	}
1758 
1759 	mutex_exit(&ill->ill_lock);
1760 	return (ilm);
1761 }
1762 
1763 void
1764 ilm_walker_cleanup(ill_t *ill)
1765 {
1766 	ilm_t	**ilmp;
1767 	ilm_t	*ilm;
1768 
1769 	ASSERT(MUTEX_HELD(&ill->ill_lock));
1770 	ASSERT(ill->ill_ilm_walker_cnt == 0);
1771 
1772 	ilmp = &ill->ill_ilm;
1773 	while (*ilmp != NULL) {
1774 		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1775 			ilm = *ilmp;
1776 			*ilmp = ilm->ilm_next;
1777 			FREE_SLIST(ilm->ilm_filter);
1778 			FREE_SLIST(ilm->ilm_pendsrcs);
1779 			FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1780 			FREE_SLIST(ilm->ilm_rtx.rtx_block);
1781 			mi_free((char *)ilm);
1782 		} else {
1783 			ilmp = &(*ilmp)->ilm_next;
1784 		}
1785 	}
1786 	ill->ill_ilm_cleanup_reqd = 0;
1787 }
1788 
1789 /*
1790  * Unlink ilm and free it.
1791  */
1792 static void
1793 ilm_delete(ilm_t *ilm)
1794 {
1795 	ill_t	*ill;
1796 	ilm_t	**ilmp;
1797 
1798 	if (ilm->ilm_ipif != NULL) {
1799 		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1800 		ASSERT(ilm->ilm_ill == NULL);
1801 		ill = ilm->ilm_ipif->ipif_ill;
1802 		ASSERT(!ill->ill_isv6);
1803 	} else {
1804 		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1805 		ASSERT(ilm->ilm_ipif == NULL);
1806 		ill = ilm->ilm_ill;
1807 		ASSERT(ill->ill_isv6);
1808 	}
1809 	/*
1810 	 * Delete under lock protection so that readers don't stumble
1811 	 * on bad ilm_next
1812 	 */
1813 	mutex_enter(&ill->ill_lock);
1814 	if (ill->ill_ilm_walker_cnt != 0) {
1815 		ilm->ilm_flags |= ILM_DELETED;
1816 		ill->ill_ilm_cleanup_reqd = 1;
1817 		mutex_exit(&ill->ill_lock);
1818 		return;
1819 	}
1820 
1821 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1822 				;
1823 	*ilmp = ilm->ilm_next;
1824 	mutex_exit(&ill->ill_lock);
1825 
1826 	FREE_SLIST(ilm->ilm_filter);
1827 	FREE_SLIST(ilm->ilm_pendsrcs);
1828 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1829 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1830 	mi_free((char *)ilm);
1831 }
1832 
1833 /* Free all ilms for this ipif */
1834 void
1835 ilm_free(ipif_t *ipif)
1836 {
1837 	ill_t	*ill = ipif->ipif_ill;
1838 	ilm_t	*ilm;
1839 	ilm_t	 *next_ilm;
1840 
1841 	ASSERT(IAM_WRITER_IPIF(ipif));
1842 
1843 	for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) {
1844 		next_ilm = ilm->ilm_next;
1845 		if (ilm->ilm_ipif == ipif)
1846 			ilm_delete(ilm);
1847 	}
1848 }
1849 
1850 /*
1851  * Looks up the appropriate ipif given a v4 multicast group and interface
1852  * address.  On success, returns 0, with *ipifpp pointing to the found
1853  * struct.  On failure, returns an errno and *ipifpp is NULL.
1854  */
1855 int
1856 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
1857     uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
1858 {
1859 	ipif_t *ipif;
1860 	int err = 0;
1861 	zoneid_t zoneid;
1862 
1863 	if (!CLASSD(group) || CLASSD(src)) {
1864 		return (EINVAL);
1865 	}
1866 	*ipifpp = NULL;
1867 
1868 	zoneid = IPCL_ZONEID(connp);
1869 
1870 	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
1871 	if (ifaddr != INADDR_ANY) {
1872 		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
1873 		    CONNP_TO_WQ(connp), first_mp, func, &err);
1874 		if (err != 0 && err != EINPROGRESS)
1875 			err = EADDRNOTAVAIL;
1876 	} else if (ifindexp != NULL && *ifindexp != 0) {
1877 		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
1878 		    CONNP_TO_WQ(connp), first_mp, func, &err);
1879 	} else {
1880 		ipif = ipif_lookup_group(group, zoneid);
1881 		if (ipif == NULL)
1882 			return (EADDRNOTAVAIL);
1883 	}
1884 	if (ipif == NULL)
1885 		return (err);
1886 
1887 	*ipifpp = ipif;
1888 	return (0);
1889 }
1890 
1891 /*
1892  * Looks up the appropriate ill (or ipif if v4mapped) given an interface
1893  * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
1894  * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
1895  * an errno and *illpp and *ipifpp are undefined.
1896  */
1897 int
1898 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
1899     const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
1900     mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
1901 {
1902 	boolean_t src_unspec;
1903 	ill_t *ill = NULL;
1904 	ipif_t *ipif = NULL;
1905 	int err;
1906 	zoneid_t zoneid = connp->conn_zoneid;
1907 	queue_t *wq = CONNP_TO_WQ(connp);
1908 
1909 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1910 
1911 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1912 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1913 			return (EINVAL);
1914 		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
1915 		if (src_unspec) {
1916 			*v4src = INADDR_ANY;
1917 		} else {
1918 			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
1919 		}
1920 		if (!CLASSD(*v4group) || CLASSD(*v4src))
1921 			return (EINVAL);
1922 		*ipifpp = NULL;
1923 		*isv6 = B_FALSE;
1924 	} else {
1925 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1926 			return (EINVAL);
1927 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1928 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1929 			return (EINVAL);
1930 		}
1931 		*illpp = NULL;
1932 		*isv6 = B_TRUE;
1933 	}
1934 
1935 	if (ifindex == 0) {
1936 		if (*isv6)
1937 			ill = ill_lookup_group_v6(v6group, zoneid);
1938 		else
1939 			ipif = ipif_lookup_group(*v4group, zoneid);
1940 		if (ill == NULL && ipif == NULL)
1941 			return (EADDRNOTAVAIL);
1942 	} else {
1943 		if (*isv6) {
1944 			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1945 			    wq, first_mp, func, &err);
1946 			if (ill != NULL &&
1947 			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
1948 				ill_refrele(ill);
1949 				ill = NULL;
1950 				err = EADDRNOTAVAIL;
1951 			}
1952 		} else {
1953 			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
1954 			    zoneid, wq, first_mp, func, &err);
1955 		}
1956 		if (ill == NULL && ipif == NULL)
1957 			return (err);
1958 	}
1959 
1960 	*ipifpp = ipif;
1961 	*illpp = ill;
1962 	return (0);
1963 }
1964 
1965 static int
1966 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1967     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
1968 {
1969 	ilg_t *ilg;
1970 	int i, numsrc, fmode, outsrcs;
1971 	struct sockaddr_in *sin;
1972 	struct sockaddr_in6 *sin6;
1973 	struct in_addr *addrp;
1974 	slist_t *fp;
1975 	boolean_t is_v4only_api;
1976 
1977 	mutex_enter(&connp->conn_lock);
1978 
1979 	ilg = ilg_lookup_ipif(connp, grp, ipif);
1980 	if (ilg == NULL) {
1981 		mutex_exit(&connp->conn_lock);
1982 		return (EADDRNOTAVAIL);
1983 	}
1984 
1985 	if (gf == NULL) {
1986 		ASSERT(imsf != NULL);
1987 		ASSERT(!isv4mapped);
1988 		is_v4only_api = B_TRUE;
1989 		outsrcs = imsf->imsf_numsrc;
1990 	} else {
1991 		ASSERT(imsf == NULL);
1992 		is_v4only_api = B_FALSE;
1993 		outsrcs = gf->gf_numsrc;
1994 	}
1995 
1996 	/*
1997 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1998 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1999 	 * So we need to translate here.
2000 	 */
2001 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2002 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2003 	if ((fp = ilg->ilg_filter) == NULL) {
2004 		numsrc = 0;
2005 	} else {
2006 		for (i = 0; i < outsrcs; i++) {
2007 			if (i == fp->sl_numsrc)
2008 				break;
2009 			if (isv4mapped) {
2010 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2011 				sin6->sin6_family = AF_INET6;
2012 				sin6->sin6_addr = fp->sl_addr[i];
2013 			} else {
2014 				if (is_v4only_api) {
2015 					addrp = &imsf->imsf_slist[i];
2016 				} else {
2017 					sin = (struct sockaddr_in *)
2018 					    &gf->gf_slist[i];
2019 					sin->sin_family = AF_INET;
2020 					addrp = &sin->sin_addr;
2021 				}
2022 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
2023 			}
2024 		}
2025 		numsrc = fp->sl_numsrc;
2026 	}
2027 
2028 	if (is_v4only_api) {
2029 		imsf->imsf_numsrc = numsrc;
2030 		imsf->imsf_fmode = fmode;
2031 	} else {
2032 		gf->gf_numsrc = numsrc;
2033 		gf->gf_fmode = fmode;
2034 	}
2035 
2036 	mutex_exit(&connp->conn_lock);
2037 
2038 	return (0);
2039 }
2040 
2041 static int
2042 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2043     const struct in6_addr *grp, ill_t *ill)
2044 {
2045 	ilg_t *ilg;
2046 	int i;
2047 	struct sockaddr_storage *sl;
2048 	struct sockaddr_in6 *sin6;
2049 	slist_t *fp;
2050 
2051 	mutex_enter(&connp->conn_lock);
2052 
2053 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2054 	if (ilg == NULL) {
2055 		mutex_exit(&connp->conn_lock);
2056 		return (EADDRNOTAVAIL);
2057 	}
2058 
2059 	/*
2060 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2061 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2062 	 * So we need to translate here.
2063 	 */
2064 	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2065 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2066 	if ((fp = ilg->ilg_filter) == NULL) {
2067 		gf->gf_numsrc = 0;
2068 	} else {
2069 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2070 			if (i == fp->sl_numsrc)
2071 				break;
2072 			sin6 = (struct sockaddr_in6 *)sl;
2073 			sin6->sin6_family = AF_INET6;
2074 			sin6->sin6_addr = fp->sl_addr[i];
2075 		}
2076 		gf->gf_numsrc = fp->sl_numsrc;
2077 	}
2078 
2079 	mutex_exit(&connp->conn_lock);
2080 
2081 	return (0);
2082 }
2083 
2084 static int
2085 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2086     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2087 {
2088 	ilg_t *ilg;
2089 	int i, err, insrcs, infmode, new_fmode;
2090 	struct sockaddr_in *sin;
2091 	struct sockaddr_in6 *sin6;
2092 	struct in_addr *addrp;
2093 	slist_t *orig_filter = NULL;
2094 	slist_t *new_filter = NULL;
2095 	mcast_record_t orig_fmode;
2096 	boolean_t leave_grp, is_v4only_api;
2097 	ilg_stat_t ilgstat;
2098 
2099 	if (gf == NULL) {
2100 		ASSERT(imsf != NULL);
2101 		ASSERT(!isv4mapped);
2102 		is_v4only_api = B_TRUE;
2103 		insrcs = imsf->imsf_numsrc;
2104 		infmode = imsf->imsf_fmode;
2105 	} else {
2106 		ASSERT(imsf == NULL);
2107 		is_v4only_api = B_FALSE;
2108 		insrcs = gf->gf_numsrc;
2109 		infmode = gf->gf_fmode;
2110 	}
2111 
2112 	/* Make sure we can handle the source list */
2113 	if (insrcs > MAX_FILTER_SIZE)
2114 		return (ENOBUFS);
2115 
2116 	/*
2117 	 * setting the filter to (INCLUDE, NULL) is treated
2118 	 * as a request to leave the group.
2119 	 */
2120 	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2121 
2122 	ASSERT(IAM_WRITER_IPIF(ipif));
2123 
2124 	mutex_enter(&connp->conn_lock);
2125 
2126 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2127 	if (ilg == NULL) {
2128 		/*
2129 		 * if the request was actually to leave, and we
2130 		 * didn't find an ilg, there's nothing to do.
2131 		 */
2132 		if (!leave_grp)
2133 			ilg = conn_ilg_alloc(connp);
2134 		if (leave_grp || ilg == NULL) {
2135 			mutex_exit(&connp->conn_lock);
2136 			return (leave_grp ? 0 : ENOMEM);
2137 		}
2138 		ilgstat = ILGSTAT_NEW;
2139 		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2140 		ilg->ilg_ipif = ipif;
2141 		ilg->ilg_ill = NULL;
2142 		ilg->ilg_orig_ifindex = 0;
2143 	} else if (leave_grp) {
2144 		ilg_delete(connp, ilg, NULL);
2145 		mutex_exit(&connp->conn_lock);
2146 		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2147 		return (0);
2148 	} else {
2149 		ilgstat = ILGSTAT_CHANGE;
2150 		/* Preserve existing state in case ip_addmulti() fails */
2151 		orig_fmode = ilg->ilg_fmode;
2152 		if (ilg->ilg_filter == NULL) {
2153 			orig_filter = NULL;
2154 		} else {
2155 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2156 			if (orig_filter == NULL) {
2157 				mutex_exit(&connp->conn_lock);
2158 				return (ENOMEM);
2159 			}
2160 		}
2161 	}
2162 
2163 	/*
2164 	 * Alloc buffer to copy new state into (see below) before
2165 	 * we make any changes, so we can bail if it fails.
2166 	 */
2167 	if ((new_filter = l_alloc()) == NULL) {
2168 		mutex_exit(&connp->conn_lock);
2169 		err = ENOMEM;
2170 		goto free_and_exit;
2171 	}
2172 
2173 	if (insrcs == 0) {
2174 		CLEAR_SLIST(ilg->ilg_filter);
2175 	} else {
2176 		slist_t *fp;
2177 		if (ilg->ilg_filter == NULL) {
2178 			fp = l_alloc();
2179 			if (fp == NULL) {
2180 				if (ilgstat == ILGSTAT_NEW)
2181 					ilg_delete(connp, ilg, NULL);
2182 				mutex_exit(&connp->conn_lock);
2183 				err = ENOMEM;
2184 				goto free_and_exit;
2185 			}
2186 		} else {
2187 			fp = ilg->ilg_filter;
2188 		}
2189 		for (i = 0; i < insrcs; i++) {
2190 			if (isv4mapped) {
2191 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2192 				fp->sl_addr[i] = sin6->sin6_addr;
2193 			} else {
2194 				if (is_v4only_api) {
2195 					addrp = &imsf->imsf_slist[i];
2196 				} else {
2197 					sin = (struct sockaddr_in *)
2198 					    &gf->gf_slist[i];
2199 					addrp = &sin->sin_addr;
2200 				}
2201 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2202 			}
2203 		}
2204 		fp->sl_numsrc = insrcs;
2205 		ilg->ilg_filter = fp;
2206 	}
2207 	/*
2208 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2209 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2210 	 * So we need to translate here.
2211 	 */
2212 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2213 		    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2214 
2215 	/*
2216 	 * Save copy of ilg's filter state to pass to other functions,
2217 	 * so we can release conn_lock now.
2218 	 */
2219 	new_fmode = ilg->ilg_fmode;
2220 	l_copy(ilg->ilg_filter, new_filter);
2221 
2222 	mutex_exit(&connp->conn_lock);
2223 
2224 	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2225 	if (err != 0) {
2226 		/*
2227 		 * Restore the original filter state, or delete the
2228 		 * newly-created ilg.  We need to look up the ilg
2229 		 * again, though, since we've not been holding the
2230 		 * conn_lock.
2231 		 */
2232 		mutex_enter(&connp->conn_lock);
2233 		ilg = ilg_lookup_ipif(connp, grp, ipif);
2234 		ASSERT(ilg != NULL);
2235 		if (ilgstat == ILGSTAT_NEW) {
2236 			ilg_delete(connp, ilg, NULL);
2237 		} else {
2238 			ilg->ilg_fmode = orig_fmode;
2239 			if (SLIST_IS_EMPTY(orig_filter)) {
2240 				CLEAR_SLIST(ilg->ilg_filter);
2241 			} else {
2242 				/*
2243 				 * We didn't free the filter, even if we
2244 				 * were trying to make the source list empty;
2245 				 * so if orig_filter isn't empty, the ilg
2246 				 * must still have a filter alloc'd.
2247 				 */
2248 				l_copy(orig_filter, ilg->ilg_filter);
2249 			}
2250 		}
2251 		mutex_exit(&connp->conn_lock);
2252 	}
2253 
2254 free_and_exit:
2255 	l_free(orig_filter);
2256 	l_free(new_filter);
2257 
2258 	return (err);
2259 }
2260 
2261 static int
2262 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2263     const struct in6_addr *grp, ill_t *ill)
2264 {
2265 	ilg_t *ilg;
2266 	int i, orig_ifindex, orig_fmode, new_fmode, err;
2267 	slist_t *orig_filter = NULL;
2268 	slist_t *new_filter = NULL;
2269 	struct sockaddr_storage *sl;
2270 	struct sockaddr_in6 *sin6;
2271 	boolean_t leave_grp;
2272 	ilg_stat_t ilgstat;
2273 
2274 	/* Make sure we can handle the source list */
2275 	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2276 		return (ENOBUFS);
2277 
2278 	/*
2279 	 * setting the filter to (INCLUDE, NULL) is treated
2280 	 * as a request to leave the group.
2281 	 */
2282 	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2283 
2284 	ASSERT(IAM_WRITER_ILL(ill));
2285 
2286 	/*
2287 	 * Use the ifindex to do the lookup.  We can't use the ill
2288 	 * directly because ilg_ill could point to a different ill
2289 	 * if things have moved.
2290 	 */
2291 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
2292 
2293 	mutex_enter(&connp->conn_lock);
2294 	ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2295 	if (ilg == NULL) {
2296 		/*
2297 		 * if the request was actually to leave, and we
2298 		 * didn't find an ilg, there's nothing to do.
2299 		 */
2300 		if (!leave_grp)
2301 			ilg = conn_ilg_alloc(connp);
2302 		if (leave_grp || ilg == NULL) {
2303 			mutex_exit(&connp->conn_lock);
2304 			return (leave_grp ? 0 : ENOMEM);
2305 		}
2306 		ilgstat = ILGSTAT_NEW;
2307 		ilg->ilg_v6group = *grp;
2308 		ilg->ilg_ipif = NULL;
2309 		/*
2310 		 * Choose our target ill to join on. This might be
2311 		 * different from the ill we've been given if it's
2312 		 * currently down and part of a group.
2313 		 *
2314 		 * new ill is not refheld; we are writer.
2315 		 */
2316 		ill = ip_choose_multi_ill(ill, grp);
2317 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
2318 		ilg->ilg_ill = ill;
2319 		/*
2320 		 * Remember the index that we joined on, so that we can
2321 		 * successfully delete them later on and also search for
2322 		 * duplicates if the application wants to join again.
2323 		 */
2324 		ilg->ilg_orig_ifindex = orig_ifindex;
2325 	} else if (leave_grp) {
2326 		/*
2327 		 * Use the ilg's current ill for the deletion,
2328 		 * we might have failed over.
2329 		 */
2330 		ill = ilg->ilg_ill;
2331 		ilg_delete(connp, ilg, NULL);
2332 		mutex_exit(&connp->conn_lock);
2333 		(void) ip_delmulti_v6(grp, ill, orig_ifindex,
2334 		    connp->conn_zoneid, B_FALSE, B_TRUE);
2335 		return (0);
2336 	} else {
2337 		ilgstat = ILGSTAT_CHANGE;
2338 		/*
2339 		 * The current ill might be different from the one we were
2340 		 * asked to join on (if failover has occurred); we should
2341 		 * join on the ill stored in the ilg.  The original ill
2342 		 * is noted in ilg_orig_ifindex, which matched our request.
2343 		 */
2344 		ill = ilg->ilg_ill;
2345 		/* preserve existing state in case ip_addmulti() fails */
2346 		orig_fmode = ilg->ilg_fmode;
2347 		if (ilg->ilg_filter == NULL) {
2348 			orig_filter = NULL;
2349 		} else {
2350 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2351 			if (orig_filter == NULL) {
2352 				mutex_exit(&connp->conn_lock);
2353 				return (ENOMEM);
2354 			}
2355 		}
2356 	}
2357 
2358 	/*
2359 	 * Alloc buffer to copy new state into (see below) before
2360 	 * we make any changes, so we can bail if it fails.
2361 	 */
2362 	if ((new_filter = l_alloc()) == NULL) {
2363 		mutex_exit(&connp->conn_lock);
2364 		err = ENOMEM;
2365 		goto free_and_exit;
2366 	}
2367 
2368 	if (gf->gf_numsrc == 0) {
2369 		CLEAR_SLIST(ilg->ilg_filter);
2370 	} else {
2371 		slist_t *fp;
2372 		if (ilg->ilg_filter == NULL) {
2373 			fp = l_alloc();
2374 			if (fp == NULL) {
2375 				if (ilgstat == ILGSTAT_NEW)
2376 					ilg_delete(connp, ilg, NULL);
2377 				mutex_exit(&connp->conn_lock);
2378 				err = ENOMEM;
2379 				goto free_and_exit;
2380 			}
2381 		} else {
2382 			fp = ilg->ilg_filter;
2383 		}
2384 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2385 			sin6 = (struct sockaddr_in6 *)sl;
2386 			fp->sl_addr[i] = sin6->sin6_addr;
2387 		}
2388 		fp->sl_numsrc = gf->gf_numsrc;
2389 		ilg->ilg_filter = fp;
2390 	}
2391 	/*
2392 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2393 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2394 	 * So we need to translate here.
2395 	 */
2396 	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2397 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2398 
2399 	/*
2400 	 * Save copy of ilg's filter state to pass to other functions,
2401 	 * so we can release conn_lock now.
2402 	 */
2403 	new_fmode = ilg->ilg_fmode;
2404 	l_copy(ilg->ilg_filter, new_filter);
2405 
2406 	mutex_exit(&connp->conn_lock);
2407 
2408 	err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid,
2409 	    ilgstat, new_fmode, new_filter);
2410 	if (err != 0) {
2411 		/*
2412 		 * Restore the original filter state, or delete the
2413 		 * newly-created ilg.  We need to look up the ilg
2414 		 * again, though, since we've not been holding the
2415 		 * conn_lock.
2416 		 */
2417 		mutex_enter(&connp->conn_lock);
2418 		ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2419 		ASSERT(ilg != NULL);
2420 		if (ilgstat == ILGSTAT_NEW) {
2421 			ilg_delete(connp, ilg, NULL);
2422 		} else {
2423 			ilg->ilg_fmode = orig_fmode;
2424 			if (SLIST_IS_EMPTY(orig_filter)) {
2425 				CLEAR_SLIST(ilg->ilg_filter);
2426 			} else {
2427 				/*
2428 				 * We didn't free the filter, even if we
2429 				 * were trying to make the source list empty;
2430 				 * so if orig_filter isn't empty, the ilg
2431 				 * must still have a filter alloc'd.
2432 				 */
2433 				l_copy(orig_filter, ilg->ilg_filter);
2434 			}
2435 		}
2436 		mutex_exit(&connp->conn_lock);
2437 	}
2438 
2439 free_and_exit:
2440 	l_free(orig_filter);
2441 	l_free(new_filter);
2442 
2443 	return (err);
2444 }
2445 
2446 /*
2447  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2448  */
2449 /* ARGSUSED */
2450 int
2451 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2452     ip_ioctl_cmd_t *ipip, void *ifreq)
2453 {
2454 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2455 	/* existence verified in ip_wput_nondata() */
2456 	mblk_t *data_mp = mp->b_cont->b_cont;
2457 	int datalen, err, cmd, minsize;
2458 	int expsize = 0;
2459 	conn_t *connp;
2460 	boolean_t isv6, is_v4only_api, getcmd;
2461 	struct sockaddr_in *gsin;
2462 	struct sockaddr_in6 *gsin6;
2463 	ipaddr_t v4grp;
2464 	in6_addr_t v6grp;
2465 	struct group_filter *gf = NULL;
2466 	struct ip_msfilter *imsf = NULL;
2467 	mblk_t *ndp;
2468 
2469 	if (data_mp->b_cont != NULL) {
2470 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2471 			return (ENOMEM);
2472 		freemsg(data_mp);
2473 		data_mp = ndp;
2474 		mp->b_cont->b_cont = data_mp;
2475 	}
2476 
2477 	cmd = iocp->ioc_cmd;
2478 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2479 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2480 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2481 	datalen = MBLKL(data_mp);
2482 
2483 	if (datalen < minsize)
2484 		return (EINVAL);
2485 
2486 	/*
2487 	 * now we know we have at least have the initial structure,
2488 	 * but need to check for the source list array.
2489 	 */
2490 	if (is_v4only_api) {
2491 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2492 		isv6 = B_FALSE;
2493 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2494 	} else {
2495 		gf = (struct group_filter *)data_mp->b_rptr;
2496 		if (gf->gf_group.ss_family == AF_INET6) {
2497 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2498 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2499 		} else {
2500 			isv6 = B_FALSE;
2501 		}
2502 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2503 	}
2504 	if (datalen < expsize)
2505 		return (EINVAL);
2506 
2507 	connp = Q_TO_CONN(q);
2508 
2509 	/* operation not supported on the virtual network interface */
2510 	if (IS_VNI(ipif->ipif_ill))
2511 		return (EINVAL);
2512 
2513 	if (isv6) {
2514 		ill_t *ill = ipif->ipif_ill;
2515 		ill_refhold(ill);
2516 
2517 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2518 		v6grp = gsin6->sin6_addr;
2519 		if (getcmd)
2520 			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2521 		else
2522 			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2523 
2524 		ill_refrele(ill);
2525 	} else {
2526 		boolean_t isv4mapped = B_FALSE;
2527 		if (is_v4only_api) {
2528 			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2529 		} else {
2530 			if (gf->gf_group.ss_family == AF_INET) {
2531 				gsin = (struct sockaddr_in *)&gf->gf_group;
2532 				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2533 			} else {
2534 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2535 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2536 				    v4grp);
2537 				isv4mapped = B_TRUE;
2538 			}
2539 		}
2540 		if (getcmd)
2541 			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2542 			    isv4mapped);
2543 		else
2544 			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2545 			    isv4mapped);
2546 	}
2547 
2548 	return (err);
2549 }
2550 
2551 /*
2552  * Finds the ipif based on information in the ioctl headers.  Needed to make
2553  * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2554  * ioctls prior to calling the ioctl's handler function).  Somewhat analogous
2555  * to ip_extract_lifreq_cmn() and ip_extract_tunreq().
2556  */
2557 int
2558 ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func)
2559 {
2560 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2561 	int cmd = iocp->ioc_cmd, err = 0;
2562 	conn_t *connp;
2563 	ipif_t *ipif;
2564 	/* caller has verified this mblk exists */
2565 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2566 	struct ip_msfilter *imsf;
2567 	struct group_filter *gf;
2568 	ipaddr_t v4addr, v4grp;
2569 	in6_addr_t v6grp;
2570 	uint32_t index;
2571 	zoneid_t zoneid;
2572 
2573 	connp = Q_TO_CONN(q);
2574 	zoneid = connp->conn_zoneid;
2575 
2576 	/* don't allow multicast operations on a tcp conn */
2577 	if (IPCL_IS_TCP(connp))
2578 		return (ENOPROTOOPT);
2579 
2580 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2581 		/* don't allow v4-specific ioctls on v6 socket */
2582 		if (connp->conn_af_isv6)
2583 			return (EAFNOSUPPORT);
2584 
2585 		imsf = (struct ip_msfilter *)dbuf;
2586 		v4addr = imsf->imsf_interface.s_addr;
2587 		v4grp = imsf->imsf_multiaddr.s_addr;
2588 		if (v4addr == INADDR_ANY) {
2589 			ipif = ipif_lookup_group(v4grp, zoneid);
2590 			if (ipif == NULL)
2591 				err = EADDRNOTAVAIL;
2592 		} else {
2593 			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2594 			    func, &err);
2595 		}
2596 	} else {
2597 		boolean_t isv6 = B_FALSE;
2598 		gf = (struct group_filter *)dbuf;
2599 		index = gf->gf_interface;
2600 		if (gf->gf_group.ss_family == AF_INET6) {
2601 			struct sockaddr_in6 *sin6;
2602 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2603 			v6grp = sin6->sin6_addr;
2604 			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2605 				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2606 			else
2607 				isv6 = B_TRUE;
2608 		} else if (gf->gf_group.ss_family == AF_INET) {
2609 			struct sockaddr_in *sin;
2610 			sin = (struct sockaddr_in *)&gf->gf_group;
2611 			v4grp = sin->sin_addr.s_addr;
2612 		} else {
2613 			return (EAFNOSUPPORT);
2614 		}
2615 		if (index == 0) {
2616 			if (isv6)
2617 				ipif = ipif_lookup_group_v6(&v6grp, zoneid);
2618 			else
2619 				ipif = ipif_lookup_group(v4grp, zoneid);
2620 			if (ipif == NULL)
2621 				err = EADDRNOTAVAIL;
2622 		} else {
2623 			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2624 			    q, mp, func, &err);
2625 		}
2626 	}
2627 
2628 	*ipifpp = ipif;
2629 	return (err);
2630 }
2631 
2632 /*
2633  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2634  * in in two stages, as the first copyin tells us the size of the attached
2635  * source buffer.  This function is called by ip_wput_nondata() after the
2636  * first copyin has completed; it figures out how big the second stage
2637  * needs to be, and kicks it off.
2638  *
2639  * In some cases (numsrc < 2), the second copyin is not needed as the
2640  * first one gets a complete structure containing 1 source addr.
2641  *
2642  * The function returns 0 if a second copyin has been started (i.e. there's
2643  * no more work to be done right now), or 1 if the second copyin is not
2644  * needed and ip_wput_nondata() can continue its processing.
2645  */
2646 int
2647 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2648 {
2649 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2650 	int cmd = iocp->ioc_cmd;
2651 	/* validity of this checked in ip_wput_nondata() */
2652 	mblk_t *mp1 = mp->b_cont->b_cont;
2653 	int copysize = 0;
2654 	int offset;
2655 
2656 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2657 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2658 		if (gf->gf_numsrc >= 2) {
2659 			offset = sizeof (struct group_filter);
2660 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2661 		}
2662 	} else {
2663 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2664 		if (imsf->imsf_numsrc >= 2) {
2665 			offset = sizeof (struct ip_msfilter);
2666 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2667 		}
2668 	}
2669 	if (copysize > 0) {
2670 		mi_copyin_n(q, mp, offset, copysize);
2671 		return (0);
2672 	}
2673 	return (1);
2674 }
2675 
2676 /*
2677  * Handle the following optmgmt:
2678  *	IP_ADD_MEMBERSHIP		must not have joined already
2679  *	MCAST_JOIN_GROUP		must not have joined already
2680  *	IP_BLOCK_SOURCE			must have joined already
2681  *	MCAST_BLOCK_SOURCE		must have joined already
2682  *	IP_JOIN_SOURCE_GROUP		may have joined already
2683  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2684  *
2685  * fmode and src parameters may be used to determine which option is
2686  * being set, as follows (the IP_* and MCAST_* versions of each option
2687  * are functionally equivalent):
2688  *	opt			fmode			src
2689  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2690  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2691  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2692  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2693  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2694  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2695  *
2696  * Changing the filter mode is not allowed; if a matching ilg already
2697  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2698  *
2699  * Verifies that there is a source address of appropriate scope for
2700  * the group; if not, EADDRNOTAVAIL is returned.
2701  *
2702  * The interface to be used may be identified by an address or by an
2703  * index.  A pointer to the index is passed; if it is NULL, use the
2704  * address, otherwise, use the index.
2705  */
2706 int
2707 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2708     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2709     mblk_t *first_mp)
2710 {
2711 	ipif_t	*ipif;
2712 	ipsq_t	*ipsq;
2713 	int err = 0;
2714 	ill_t	*ill;
2715 
2716 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2717 	    ip_restart_optmgmt, &ipif);
2718 	if (err != 0) {
2719 		if (err != EINPROGRESS) {
2720 			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2721 			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2722 			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2723 		}
2724 		return (err);
2725 	}
2726 	ASSERT(ipif != NULL);
2727 
2728 	ill = ipif->ipif_ill;
2729 	/* Operation not supported on a virtual network interface */
2730 	if (IS_VNI(ill)) {
2731 		ipif_refrele(ipif);
2732 		return (EINVAL);
2733 	}
2734 
2735 	if (checkonly) {
2736 		/*
2737 		 * do not do operation, just pretend to - new T_CHECK
2738 		 * semantics. The error return case above if encountered
2739 		 * considered a good enough "check" here.
2740 		 */
2741 		ipif_refrele(ipif);
2742 		return (0);
2743 	}
2744 
2745 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2746 	    NEW_OP);
2747 
2748 	/* unspecified source addr => no source filtering */
2749 	err = ilg_add(connp, group, ipif, fmode, src);
2750 
2751 	IPSQ_EXIT(ipsq);
2752 
2753 	ipif_refrele(ipif);
2754 	return (err);
2755 }
2756 
2757 /*
2758  * Handle the following optmgmt:
2759  *	IPV6_JOIN_GROUP			must not have joined already
2760  *	MCAST_JOIN_GROUP		must not have joined already
2761  *	MCAST_BLOCK_SOURCE		must have joined already
2762  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2763  *
2764  * fmode and src parameters may be used to determine which option is
2765  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2766  * are functionally equivalent):
2767  *	opt			fmode			v6src
2768  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2769  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2770  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2771  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2772  *
2773  * Changing the filter mode is not allowed; if a matching ilg already
2774  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2775  *
2776  * Verifies that there is a source address of appropriate scope for
2777  * the group; if not, EADDRNOTAVAIL is returned.
2778  *
2779  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2780  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2781  * v6src is also v4-mapped.
2782  */
2783 int
2784 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2785     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2786     const in6_addr_t *v6src, mblk_t *first_mp)
2787 {
2788 	ill_t *ill;
2789 	ipif_t	*ipif;
2790 	char buf[INET6_ADDRSTRLEN];
2791 	ipaddr_t v4group, v4src;
2792 	boolean_t isv6;
2793 	ipsq_t	*ipsq;
2794 	int	err;
2795 
2796 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2797 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2798 	if (err != 0) {
2799 		if (err != EINPROGRESS) {
2800 			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2801 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2802 			    sizeof (buf)), ifindex));
2803 		}
2804 		return (err);
2805 	}
2806 	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2807 
2808 	/* operation is not supported on the virtual network interface */
2809 	if (isv6) {
2810 		if (IS_VNI(ill)) {
2811 			ill_refrele(ill);
2812 			return (EINVAL);
2813 		}
2814 	} else {
2815 		if (IS_VNI(ipif->ipif_ill)) {
2816 			ipif_refrele(ipif);
2817 			return (EINVAL);
2818 		}
2819 	}
2820 
2821 	if (checkonly) {
2822 		/*
2823 		 * do not do operation, just pretend to - new T_CHECK
2824 		 * semantics. The error return case above if encountered
2825 		 * considered a good enough "check" here.
2826 		 */
2827 		if (isv6)
2828 			ill_refrele(ill);
2829 		else
2830 			ipif_refrele(ipif);
2831 		return (0);
2832 	}
2833 
2834 	if (!isv6) {
2835 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2836 		    ipsq, NEW_OP);
2837 		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2838 		IPSQ_EXIT(ipsq);
2839 		ipif_refrele(ipif);
2840 	} else {
2841 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2842 		    ipsq, NEW_OP);
2843 		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2844 		IPSQ_EXIT(ipsq);
2845 		ill_refrele(ill);
2846 	}
2847 
2848 	return (err);
2849 }
2850 
2851 static int
2852 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2853     mcast_record_t fmode, ipaddr_t src)
2854 {
2855 	ilg_t	*ilg;
2856 	in6_addr_t v6src;
2857 	boolean_t leaving = B_FALSE;
2858 
2859 	ASSERT(IAM_WRITER_IPIF(ipif));
2860 
2861 	/*
2862 	 * The ilg is valid only while we hold the conn lock. Once we drop
2863 	 * the lock, another thread can locate another ilg on this connp,
2864 	 * but on a different ipif, and delete it, and cause the ilg array
2865 	 * to be reallocated and copied. Hence do the ilg_delete before
2866 	 * dropping the lock.
2867 	 */
2868 	mutex_enter(&connp->conn_lock);
2869 	ilg = ilg_lookup_ipif(connp, group, ipif);
2870 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2871 		mutex_exit(&connp->conn_lock);
2872 		return (EADDRNOTAVAIL);
2873 	}
2874 
2875 	/*
2876 	 * Decide if we're actually deleting the ilg or just removing a
2877 	 * source filter address; if just removing an addr, make sure we
2878 	 * aren't trying to change the filter mode, and that the addr is
2879 	 * actually in our filter list already.  If we're removing the
2880 	 * last src in an include list, just delete the ilg.
2881 	 */
2882 	if (src == INADDR_ANY) {
2883 		v6src = ipv6_all_zeros;
2884 		leaving = B_TRUE;
2885 	} else {
2886 		int err = 0;
2887 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2888 		if (fmode != ilg->ilg_fmode)
2889 			err = EINVAL;
2890 		else if (ilg->ilg_filter == NULL ||
2891 		    !list_has_addr(ilg->ilg_filter, &v6src))
2892 			err = EADDRNOTAVAIL;
2893 		if (err != 0) {
2894 			mutex_exit(&connp->conn_lock);
2895 			return (err);
2896 		}
2897 		if (fmode == MODE_IS_INCLUDE &&
2898 		    ilg->ilg_filter->sl_numsrc == 1) {
2899 			v6src = ipv6_all_zeros;
2900 			leaving = B_TRUE;
2901 		}
2902 	}
2903 
2904 	ilg_delete(connp, ilg, &v6src);
2905 	mutex_exit(&connp->conn_lock);
2906 
2907 	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
2908 	return (0);
2909 }
2910 
2911 static int
2912 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
2913     ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2914 {
2915 	ilg_t	*ilg;
2916 	ill_t	*ilg_ill;
2917 	uint_t	ilg_orig_ifindex;
2918 	boolean_t leaving = B_TRUE;
2919 
2920 	ASSERT(IAM_WRITER_ILL(ill));
2921 
2922 	/*
2923 	 * Use the index that we originally used to join. We can't
2924 	 * use the ill directly because ilg_ill could point to
2925 	 * a new ill if things have moved.
2926 	 */
2927 	mutex_enter(&connp->conn_lock);
2928 	ilg = ilg_lookup_ill_index_v6(connp, v6group,
2929 	    ill->ill_phyint->phyint_ifindex);
2930 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2931 		mutex_exit(&connp->conn_lock);
2932 		return (EADDRNOTAVAIL);
2933 	}
2934 
2935 	/*
2936 	 * Decide if we're actually deleting the ilg or just removing a
2937 	 * source filter address; if just removing an addr, make sure we
2938 	 * aren't trying to change the filter mode, and that the addr is
2939 	 * actually in our filter list already.  If we're removing the
2940 	 * last src in an include list, just delete the ilg.
2941 	 */
2942 	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2943 		int err = 0;
2944 		if (fmode != ilg->ilg_fmode)
2945 			err = EINVAL;
2946 		else if (ilg->ilg_filter == NULL ||
2947 		    !list_has_addr(ilg->ilg_filter, v6src))
2948 			err = EADDRNOTAVAIL;
2949 		if (err != 0) {
2950 			mutex_exit(&connp->conn_lock);
2951 			return (err);
2952 		}
2953 		if (fmode == MODE_IS_INCLUDE &&
2954 		    ilg->ilg_filter->sl_numsrc == 1)
2955 			v6src = NULL;
2956 		else
2957 			leaving = B_FALSE;
2958 	}
2959 
2960 	ilg_ill = ilg->ilg_ill;
2961 	ilg_orig_ifindex = ilg->ilg_orig_ifindex;
2962 	ilg_delete(connp, ilg, v6src);
2963 	mutex_exit(&connp->conn_lock);
2964 	(void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex,
2965 	    connp->conn_zoneid, B_FALSE, leaving);
2966 
2967 	return (0);
2968 }
2969 
2970 /*
2971  * Handle the following optmgmt:
2972  *	IP_DROP_MEMBERSHIP		will leave
2973  *	MCAST_LEAVE_GROUP		will leave
2974  *	IP_UNBLOCK_SOURCE		will not leave
2975  *	MCAST_UNBLOCK_SOURCE		will not leave
2976  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2977  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2978  *
2979  * fmode and src parameters may be used to determine which option is
2980  * being set, as follows (the IP_* and MCAST_* versions of each option
2981  * are functionally equivalent):
2982  *	opt			 fmode			src
2983  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
2984  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
2985  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
2986  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
2987  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
2988  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
2989  *
2990  * Changing the filter mode is not allowed; if a matching ilg already
2991  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2992  *
2993  * The interface to be used may be identified by an address or by an
2994  * index.  A pointer to the index is passed; if it is NULL, use the
2995  * address, otherwise, use the index.
2996  */
2997 int
2998 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2999     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
3000     mblk_t *first_mp)
3001 {
3002 	ipif_t	*ipif;
3003 	ipsq_t	*ipsq;
3004 	int	err;
3005 	ill_t	*ill;
3006 
3007 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
3008 	    ip_restart_optmgmt, &ipif);
3009 	if (err != 0) {
3010 		if (err != EINPROGRESS) {
3011 			ip1dbg(("ip_opt_delete_group: no ipif for group "
3012 			    "0x%x, ifaddr 0x%x\n",
3013 			    (int)ntohl(group), (int)ntohl(ifaddr)));
3014 		}
3015 		return (err);
3016 	}
3017 	ASSERT(ipif != NULL);
3018 
3019 	ill = ipif->ipif_ill;
3020 	/* Operation not supported on a virtual network interface */
3021 	if (IS_VNI(ill)) {
3022 		ipif_refrele(ipif);
3023 		return (EINVAL);
3024 	}
3025 
3026 	if (checkonly) {
3027 		/*
3028 		 * do not do operation, just pretend to - new T_CHECK
3029 		 * semantics. The error return case above if encountered
3030 		 * considered a good enough "check" here.
3031 		 */
3032 		ipif_refrele(ipif);
3033 		return (0);
3034 	}
3035 
3036 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3037 	    NEW_OP);
3038 	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3039 	IPSQ_EXIT(ipsq);
3040 
3041 	ipif_refrele(ipif);
3042 	return (err);
3043 }
3044 
3045 /*
3046  * Handle the following optmgmt:
3047  *	IPV6_LEAVE_GROUP		will leave
3048  *	MCAST_LEAVE_GROUP		will leave
3049  *	MCAST_UNBLOCK_SOURCE		will not leave
3050  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3051  *
3052  * fmode and src parameters may be used to determine which option is
3053  * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3054  * are functionally equivalent):
3055  *	opt			 fmode			v6src
3056  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3057  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3058  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3059  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3060  *
3061  * Changing the filter mode is not allowed; if a matching ilg already
3062  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3063  *
3064  * Handles IPv4-mapped IPv6 multicast addresses by associating them
3065  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3066  * v6src is also v4-mapped.
3067  */
3068 int
3069 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3070     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3071     const in6_addr_t *v6src, mblk_t *first_mp)
3072 {
3073 	ill_t *ill;
3074 	ipif_t	*ipif;
3075 	char	buf[INET6_ADDRSTRLEN];
3076 	ipaddr_t v4group, v4src;
3077 	boolean_t isv6;
3078 	ipsq_t	*ipsq;
3079 	int	err;
3080 
3081 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3082 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3083 	if (err != 0) {
3084 		if (err != EINPROGRESS) {
3085 			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3086 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3087 			    sizeof (buf)), ifindex));
3088 		}
3089 		return (err);
3090 	}
3091 	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3092 
3093 	/* operation is not supported on the virtual network interface */
3094 	if (isv6) {
3095 		if (IS_VNI(ill)) {
3096 			ill_refrele(ill);
3097 			return (EINVAL);
3098 		}
3099 	} else {
3100 		if (IS_VNI(ipif->ipif_ill)) {
3101 			ipif_refrele(ipif);
3102 			return (EINVAL);
3103 		}
3104 	}
3105 
3106 	if (checkonly) {
3107 		/*
3108 		 * do not do operation, just pretend to - new T_CHECK
3109 		 * semantics. The error return case above if encountered
3110 		 * considered a good enough "check" here.
3111 		 */
3112 		if (isv6)
3113 			ill_refrele(ill);
3114 		else
3115 			ipif_refrele(ipif);
3116 		return (0);
3117 	}
3118 
3119 	if (!isv6) {
3120 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3121 		    ipsq, NEW_OP);
3122 		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3123 		    v4src);
3124 		IPSQ_EXIT(ipsq);
3125 		ipif_refrele(ipif);
3126 	} else {
3127 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3128 		    ipsq, NEW_OP);
3129 		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3130 		    v6src);
3131 		IPSQ_EXIT(ipsq);
3132 		ill_refrele(ill);
3133 	}
3134 
3135 	return (err);
3136 }
3137 
3138 /*
3139  * Group mgmt for upper conn that passes things down
3140  * to the interface multicast list (and DLPI)
3141  * These routines can handle new style options that specify an interface name
3142  * as opposed to an interface address (needed for general handling of
3143  * unnumbered interfaces.)
3144  */
3145 
3146 /*
3147  * Add a group to an upper conn group data structure and pass things down
3148  * to the interface multicast list (and DLPI)
3149  */
3150 static int
3151 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3152     ipaddr_t src)
3153 {
3154 	int	error = 0;
3155 	ill_t	*ill;
3156 	ilg_t	*ilg;
3157 	ilg_stat_t ilgstat;
3158 	slist_t	*new_filter = NULL;
3159 	int	new_fmode;
3160 
3161 	ASSERT(IAM_WRITER_IPIF(ipif));
3162 
3163 	ill = ipif->ipif_ill;
3164 
3165 	if (!(ill->ill_flags & ILLF_MULTICAST))
3166 		return (EADDRNOTAVAIL);
3167 
3168 	/*
3169 	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3170 	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3171 	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3172 	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3173 	 * but both operations happen on the same conn.
3174 	 */
3175 	mutex_enter(&connp->conn_lock);
3176 	ilg = ilg_lookup_ipif(connp, group, ipif);
3177 
3178 	/*
3179 	 * Depending on the option we're handling, may or may not be okay
3180 	 * if group has already been added.  Figure out our rules based
3181 	 * on fmode and src params.  Also make sure there's enough room
3182 	 * in the filter if we're adding a source to an existing filter.
3183 	 */
3184 	if (src == INADDR_ANY) {
3185 		/* we're joining for all sources, must not have joined */
3186 		if (ilg != NULL)
3187 			error = EADDRINUSE;
3188 	} else {
3189 		if (fmode == MODE_IS_EXCLUDE) {
3190 			/* (excl {addr}) => block source, must have joined */
3191 			if (ilg == NULL)
3192 				error = EADDRNOTAVAIL;
3193 		}
3194 		/* (incl {addr}) => join source, may have joined */
3195 
3196 		if (ilg != NULL &&
3197 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3198 			error = ENOBUFS;
3199 	}
3200 	if (error != 0) {
3201 		mutex_exit(&connp->conn_lock);
3202 		return (error);
3203 	}
3204 
3205 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3206 
3207 	/*
3208 	 * Alloc buffer to copy new state into (see below) before
3209 	 * we make any changes, so we can bail if it fails.
3210 	 */
3211 	if ((new_filter = l_alloc()) == NULL) {
3212 		mutex_exit(&connp->conn_lock);
3213 		return (ENOMEM);
3214 	}
3215 
3216 	if (ilg == NULL) {
3217 		ilgstat = ILGSTAT_NEW;
3218 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3219 			mutex_exit(&connp->conn_lock);
3220 			l_free(new_filter);
3221 			return (ENOMEM);
3222 		}
3223 		if (src != INADDR_ANY) {
3224 			ilg->ilg_filter = l_alloc();
3225 			if (ilg->ilg_filter == NULL) {
3226 				ilg_delete(connp, ilg, NULL);
3227 				mutex_exit(&connp->conn_lock);
3228 				l_free(new_filter);
3229 				return (ENOMEM);
3230 			}
3231 			ilg->ilg_filter->sl_numsrc = 1;
3232 			IN6_IPADDR_TO_V4MAPPED(src,
3233 			    &ilg->ilg_filter->sl_addr[0]);
3234 		}
3235 		if (group == INADDR_ANY) {
3236 			ilg->ilg_v6group = ipv6_all_zeros;
3237 		} else {
3238 			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3239 		}
3240 		ilg->ilg_ipif = ipif;
3241 		ilg->ilg_ill = NULL;
3242 		ilg->ilg_orig_ifindex = 0;
3243 		ilg->ilg_fmode = fmode;
3244 	} else {
3245 		int index;
3246 		in6_addr_t v6src;
3247 		ilgstat = ILGSTAT_CHANGE;
3248 		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3249 			mutex_exit(&connp->conn_lock);
3250 			l_free(new_filter);
3251 			return (EINVAL);
3252 		}
3253 		if (ilg->ilg_filter == NULL) {
3254 			ilg->ilg_filter = l_alloc();
3255 			if (ilg->ilg_filter == NULL) {
3256 				mutex_exit(&connp->conn_lock);
3257 				l_free(new_filter);
3258 				return (ENOMEM);
3259 			}
3260 		}
3261 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3262 		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3263 			mutex_exit(&connp->conn_lock);
3264 			l_free(new_filter);
3265 			return (EADDRNOTAVAIL);
3266 		}
3267 		index = ilg->ilg_filter->sl_numsrc++;
3268 		ilg->ilg_filter->sl_addr[index] = v6src;
3269 	}
3270 
3271 	/*
3272 	 * Save copy of ilg's filter state to pass to other functions,
3273 	 * so we can release conn_lock now.
3274 	 */
3275 	new_fmode = ilg->ilg_fmode;
3276 	l_copy(ilg->ilg_filter, new_filter);
3277 
3278 	mutex_exit(&connp->conn_lock);
3279 
3280 	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3281 	if (error != 0) {
3282 		/*
3283 		 * Need to undo what we did before calling ip_addmulti()!
3284 		 * Must look up the ilg again since we've not been holding
3285 		 * conn_lock.
3286 		 */
3287 		in6_addr_t v6src;
3288 		if (ilgstat == ILGSTAT_NEW)
3289 			v6src = ipv6_all_zeros;
3290 		else
3291 			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3292 		mutex_enter(&connp->conn_lock);
3293 		ilg = ilg_lookup_ipif(connp, group, ipif);
3294 		ASSERT(ilg != NULL);
3295 		ilg_delete(connp, ilg, &v6src);
3296 		mutex_exit(&connp->conn_lock);
3297 		l_free(new_filter);
3298 		return (error);
3299 	}
3300 
3301 	l_free(new_filter);
3302 	return (0);
3303 }
3304 
3305 static int
3306 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3307     mcast_record_t fmode, const in6_addr_t *v6src)
3308 {
3309 	int	error = 0;
3310 	int	orig_ifindex;
3311 	ilg_t	*ilg;
3312 	ilg_stat_t ilgstat;
3313 	slist_t	*new_filter = NULL;
3314 	int	new_fmode;
3315 
3316 	ASSERT(IAM_WRITER_ILL(ill));
3317 
3318 	if (!(ill->ill_flags & ILLF_MULTICAST))
3319 		return (EADDRNOTAVAIL);
3320 
3321 	/*
3322 	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3323 	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3324 	 * and hme1 map to different ipsq's, but both operations happen
3325 	 * on the same conn.
3326 	 */
3327 	mutex_enter(&connp->conn_lock);
3328 
3329 	/*
3330 	 * Use the ifindex to do the lookup. We can't use the ill
3331 	 * directly because ilg_ill could point to a different ill if
3332 	 * things have moved.
3333 	 */
3334 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
3335 	ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3336 
3337 	/*
3338 	 * Depending on the option we're handling, may or may not be okay
3339 	 * if group has already been added.  Figure out our rules based
3340 	 * on fmode and src params.  Also make sure there's enough room
3341 	 * in the filter if we're adding a source to an existing filter.
3342 	 */
3343 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3344 		/* we're joining for all sources, must not have joined */
3345 		if (ilg != NULL)
3346 			error = EADDRINUSE;
3347 	} else {
3348 		if (fmode == MODE_IS_EXCLUDE) {
3349 			/* (excl {addr}) => block source, must have joined */
3350 			if (ilg == NULL)
3351 				error = EADDRNOTAVAIL;
3352 		}
3353 		/* (incl {addr}) => join source, may have joined */
3354 
3355 		if (ilg != NULL &&
3356 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3357 			error = ENOBUFS;
3358 	}
3359 	if (error != 0) {
3360 		mutex_exit(&connp->conn_lock);
3361 		return (error);
3362 	}
3363 
3364 	/*
3365 	 * Alloc buffer to copy new state into (see below) before
3366 	 * we make any changes, so we can bail if it fails.
3367 	 */
3368 	if ((new_filter = l_alloc()) == NULL) {
3369 		mutex_exit(&connp->conn_lock);
3370 		return (ENOMEM);
3371 	}
3372 
3373 	if (ilg == NULL) {
3374 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3375 			mutex_exit(&connp->conn_lock);
3376 			l_free(new_filter);
3377 			return (ENOMEM);
3378 		}
3379 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3380 			ilg->ilg_filter = l_alloc();
3381 			if (ilg->ilg_filter == NULL) {
3382 				ilg_delete(connp, ilg, NULL);
3383 				mutex_exit(&connp->conn_lock);
3384 				l_free(new_filter);
3385 				return (ENOMEM);
3386 			}
3387 			ilg->ilg_filter->sl_numsrc = 1;
3388 			ilg->ilg_filter->sl_addr[0] = *v6src;
3389 		}
3390 		ilgstat = ILGSTAT_NEW;
3391 		ilg->ilg_v6group = *v6group;
3392 		ilg->ilg_fmode = fmode;
3393 		ilg->ilg_ipif = NULL;
3394 		/*
3395 		 * Choose our target ill to join on. This might be different
3396 		 * from the ill we've been given if it's currently down and
3397 		 * part of a group.
3398 		 *
3399 		 * new ill is not refheld; we are writer.
3400 		 */
3401 		ill = ip_choose_multi_ill(ill, v6group);
3402 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
3403 		ilg->ilg_ill = ill;
3404 		/*
3405 		 * Remember the orig_ifindex that we joined on, so that we
3406 		 * can successfully delete them later on and also search
3407 		 * for duplicates if the application wants to join again.
3408 		 */
3409 		ilg->ilg_orig_ifindex = orig_ifindex;
3410 	} else {
3411 		int index;
3412 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3413 			mutex_exit(&connp->conn_lock);
3414 			l_free(new_filter);
3415 			return (EINVAL);
3416 		}
3417 		if (ilg->ilg_filter == NULL) {
3418 			ilg->ilg_filter = l_alloc();
3419 			if (ilg->ilg_filter == NULL) {
3420 				mutex_exit(&connp->conn_lock);
3421 				l_free(new_filter);
3422 				return (ENOMEM);
3423 			}
3424 		}
3425 		if (list_has_addr(ilg->ilg_filter, v6src)) {
3426 			mutex_exit(&connp->conn_lock);
3427 			l_free(new_filter);
3428 			return (EADDRNOTAVAIL);
3429 		}
3430 		ilgstat = ILGSTAT_CHANGE;
3431 		index = ilg->ilg_filter->sl_numsrc++;
3432 		ilg->ilg_filter->sl_addr[index] = *v6src;
3433 		/*
3434 		 * The current ill might be different from the one we were
3435 		 * asked to join on (if failover has occurred); we should
3436 		 * join on the ill stored in the ilg.  The original ill
3437 		 * is noted in ilg_orig_ifindex, which matched our request.
3438 		 */
3439 		ill = ilg->ilg_ill;
3440 	}
3441 
3442 	/*
3443 	 * Save copy of ilg's filter state to pass to other functions,
3444 	 * so we can release conn_lock now.
3445 	 */
3446 	new_fmode = ilg->ilg_fmode;
3447 	l_copy(ilg->ilg_filter, new_filter);
3448 
3449 	mutex_exit(&connp->conn_lock);
3450 
3451 	/*
3452 	 * Now update the ill. We wait to do this until after the ilg
3453 	 * has been updated because we need to update the src filter
3454 	 * info for the ill, which involves looking at the status of
3455 	 * all the ilgs associated with this group/interface pair.
3456 	 */
3457 	error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid,
3458 	    ilgstat, new_fmode, new_filter);
3459 	if (error != 0) {
3460 		/*
3461 		 * But because we waited, we have to undo the ilg update
3462 		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3463 		 * again, since we've not been holding conn_lock.
3464 		 */
3465 		in6_addr_t delsrc =
3466 		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3467 		mutex_enter(&connp->conn_lock);
3468 		ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3469 		ASSERT(ilg != NULL);
3470 		ilg_delete(connp, ilg, &delsrc);
3471 		mutex_exit(&connp->conn_lock);
3472 		l_free(new_filter);
3473 		return (error);
3474 	}
3475 
3476 	l_free(new_filter);
3477 
3478 	return (0);
3479 }
3480 
3481 /*
3482  * Find an IPv4 ilg matching group, ill and source
3483  */
3484 ilg_t *
3485 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3486 {
3487 	in6_addr_t v6group, v6src;
3488 	int i;
3489 	boolean_t isinlist;
3490 	ilg_t *ilg;
3491 	ipif_t *ipif;
3492 	ill_t *ilg_ill;
3493 
3494 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3495 
3496 	/*
3497 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3498 	 */
3499 	if (group == INADDR_ANY)
3500 		v6group = ipv6_all_zeros;
3501 	else
3502 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3503 
3504 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3505 		/* ilg_ipif is NULL for v6; skip them */
3506 		ilg = &connp->conn_ilg[i];
3507 		if ((ipif = ilg->ilg_ipif) == NULL)
3508 			continue;
3509 		ASSERT(ilg->ilg_ill == NULL);
3510 		ilg_ill = ipif->ipif_ill;
3511 		ASSERT(!ilg_ill->ill_isv6);
3512 		if (ilg_ill == ill &&
3513 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3514 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3515 				/* no source filter, so this is a match */
3516 				return (ilg);
3517 			}
3518 			break;
3519 		}
3520 	}
3521 	if (i == connp->conn_ilg_inuse)
3522 		return (NULL);
3523 
3524 	/*
3525 	 * we have an ilg with matching ill and group; but
3526 	 * the ilg has a source list that we must check.
3527 	 */
3528 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3529 	isinlist = B_FALSE;
3530 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3531 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3532 			isinlist = B_TRUE;
3533 			break;
3534 		}
3535 	}
3536 
3537 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3538 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3539 		return (ilg);
3540 
3541 	return (NULL);
3542 }
3543 
3544 /*
3545  * Find an IPv6 ilg matching group, ill, and source
3546  */
3547 ilg_t *
3548 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3549     const in6_addr_t *v6src, ill_t *ill)
3550 {
3551 	int i;
3552 	boolean_t isinlist;
3553 	ilg_t *ilg;
3554 	ill_t *ilg_ill;
3555 
3556 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3557 
3558 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3559 		ilg = &connp->conn_ilg[i];
3560 		if ((ilg_ill = ilg->ilg_ill) == NULL)
3561 			continue;
3562 		ASSERT(ilg->ilg_ipif == NULL);
3563 		ASSERT(ilg_ill->ill_isv6);
3564 		if (ilg_ill == ill &&
3565 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3566 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3567 				/* no source filter, so this is a match */
3568 				return (ilg);
3569 			}
3570 			break;
3571 		}
3572 	}
3573 	if (i == connp->conn_ilg_inuse)
3574 		return (NULL);
3575 
3576 	/*
3577 	 * we have an ilg with matching ill and group; but
3578 	 * the ilg has a source list that we must check.
3579 	 */
3580 	isinlist = B_FALSE;
3581 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3582 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3583 			isinlist = B_TRUE;
3584 			break;
3585 		}
3586 	}
3587 
3588 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3589 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3590 		return (ilg);
3591 
3592 	return (NULL);
3593 }
3594 
3595 /*
3596  * Get the ilg whose ilg_orig_ifindex is associated with ifindex.
3597  * This is useful when the interface fails and we have moved
3598  * to a new ill, but still would like to locate using the index
3599  * that we originally used to join. Used only for IPv6 currently.
3600  */
3601 static ilg_t *
3602 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex)
3603 {
3604 	ilg_t	*ilg;
3605 	int	i;
3606 
3607 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3608 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3609 		ilg = &connp->conn_ilg[i];
3610 		/* ilg_ill is NULL for V4. Skip them */
3611 		if (ilg->ilg_ill == NULL)
3612 			continue;
3613 		/* ilg_ipif is NULL for V6 */
3614 		ASSERT(ilg->ilg_ipif == NULL);
3615 		ASSERT(ilg->ilg_orig_ifindex != 0);
3616 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) &&
3617 		    ilg->ilg_orig_ifindex == ifindex) {
3618 			return (ilg);
3619 		}
3620 	}
3621 	return (NULL);
3622 }
3623 
3624 /*
3625  * Find an IPv6 ilg matching group and ill
3626  */
3627 ilg_t *
3628 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3629 {
3630 	ilg_t	*ilg;
3631 	int	i;
3632 	ill_t 	*mem_ill;
3633 
3634 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3635 
3636 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3637 		ilg = &connp->conn_ilg[i];
3638 		if ((mem_ill = ilg->ilg_ill) == NULL)
3639 			continue;
3640 		ASSERT(ilg->ilg_ipif == NULL);
3641 		ASSERT(mem_ill->ill_isv6);
3642 		if (mem_ill == ill &&
3643 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3644 			return (ilg);
3645 	}
3646 	return (NULL);
3647 }
3648 
3649 /*
3650  * Find an IPv4 ilg matching group and ipif
3651  */
3652 static ilg_t *
3653 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3654 {
3655 	in6_addr_t v6group;
3656 	int	i;
3657 
3658 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3659 	ASSERT(!ipif->ipif_ill->ill_isv6);
3660 
3661 	if (group == INADDR_ANY)
3662 		v6group = ipv6_all_zeros;
3663 	else
3664 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3665 
3666 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3667 		if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group,
3668 		    &v6group) &&
3669 		    connp->conn_ilg[i].ilg_ipif == ipif)
3670 			return (&connp->conn_ilg[i]);
3671 	}
3672 	return (NULL);
3673 }
3674 
3675 /*
3676  * If a source address is passed in (src != NULL and src is not
3677  * unspecified), remove the specified src addr from the given ilg's
3678  * filter list, else delete the ilg.
3679  */
3680 static void
3681 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3682 {
3683 	int	i;
3684 
3685 	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3686 	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3687 	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3688 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3689 	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3690 
3691 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3692 		if (connp->conn_ilg_walker_cnt != 0) {
3693 			ilg->ilg_flags |= ILG_DELETED;
3694 			return;
3695 		}
3696 
3697 		FREE_SLIST(ilg->ilg_filter);
3698 
3699 		i = ilg - &connp->conn_ilg[0];
3700 		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3701 
3702 		/* Move other entries up one step */
3703 		connp->conn_ilg_inuse--;
3704 		for (; i < connp->conn_ilg_inuse; i++)
3705 			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3706 
3707 		if (connp->conn_ilg_inuse == 0) {
3708 			mi_free((char *)connp->conn_ilg);
3709 			connp->conn_ilg = NULL;
3710 			cv_broadcast(&connp->conn_refcv);
3711 		}
3712 	} else {
3713 		l_remove(ilg->ilg_filter, src);
3714 	}
3715 }
3716 
3717 /*
3718  * Called from conn close. No new ilg can be added or removed.
3719  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3720  * will return error if conn has started closing.
3721  */
3722 void
3723 ilg_delete_all(conn_t *connp)
3724 {
3725 	int	i;
3726 	ipif_t	*ipif = NULL;
3727 	ill_t	*ill = NULL;
3728 	ilg_t	*ilg;
3729 	in6_addr_t v6group;
3730 	boolean_t success;
3731 	ipsq_t	*ipsq;
3732 	int	orig_ifindex;
3733 
3734 	mutex_enter(&connp->conn_lock);
3735 retry:
3736 	ILG_WALKER_HOLD(connp);
3737 	for (i = connp->conn_ilg_inuse - 1; i >= 0; ) {
3738 		ilg = &connp->conn_ilg[i];
3739 		/*
3740 		 * Since this walk is not atomic (we drop the
3741 		 * conn_lock and wait in ipsq_enter) we need
3742 		 * to check for the ILG_DELETED flag.
3743 		 */
3744 		if (ilg->ilg_flags & ILG_DELETED) {
3745 			/* Go to the next ilg */
3746 			i--;
3747 			continue;
3748 		}
3749 		v6group = ilg->ilg_v6group;
3750 
3751 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3752 			ipif = ilg->ilg_ipif;
3753 			ill = ipif->ipif_ill;
3754 		} else {
3755 			ipif = NULL;
3756 			ill = ilg->ilg_ill;
3757 		}
3758 		/*
3759 		 * We may not be able to refhold the ill if the ill/ipif
3760 		 * is changing. But we need to make sure that the ill will
3761 		 * not vanish. So we just bump up the ill_waiter count.
3762 		 * If we are unable to do even that, then the ill is closing,
3763 		 * in which case the unplumb thread will handle the cleanup,
3764 		 * and we move on to the next ilg.
3765 		 */
3766 		if (!ill_waiter_inc(ill)) {
3767 			/* Go to the next ilg */
3768 			i--;
3769 			continue;
3770 		}
3771 		mutex_exit(&connp->conn_lock);
3772 		/*
3773 		 * To prevent deadlock between ill close which waits inside
3774 		 * the perimeter, and conn close, ipsq_enter returns error,
3775 		 * the moment ILL_CONDEMNED is set, in which case ill close
3776 		 * takes responsibility to cleanup the ilgs. Note that we
3777 		 * have not yet set condemned flag, otherwise the conn can't
3778 		 * be refheld for cleanup by those routines and it would be
3779 		 * a mutual deadlock.
3780 		 */
3781 		success = ipsq_enter(ill, B_FALSE);
3782 		ipsq = ill->ill_phyint->phyint_ipsq;
3783 		ill_waiter_dcr(ill);
3784 		mutex_enter(&connp->conn_lock);
3785 		if (!success) {
3786 			/* Go to the next ilg */
3787 			i--;
3788 			continue;
3789 		}
3790 
3791 		/*
3792 		 * Make sure that nothing has changed under. For eg.
3793 		 * a failover/failback can change ilg_ill while we were
3794 		 * waiting to become exclusive above
3795 		 */
3796 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3797 			ipif = ilg->ilg_ipif;
3798 			ill = ipif->ipif_ill;
3799 		} else {
3800 			ipif = NULL;
3801 			ill = ilg->ilg_ill;
3802 		}
3803 		if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) {
3804 			/*
3805 			 * The ilg has changed under us probably due
3806 			 * to a failover or unplumb. Retry on the same ilg.
3807 			 */
3808 			mutex_exit(&connp->conn_lock);
3809 			ipsq_exit(ipsq, B_TRUE, B_TRUE);
3810 			mutex_enter(&connp->conn_lock);
3811 			continue;
3812 		}
3813 		v6group = ilg->ilg_v6group;
3814 		orig_ifindex = ilg->ilg_orig_ifindex;
3815 		ilg_delete(connp, ilg, NULL);
3816 		mutex_exit(&connp->conn_lock);
3817 
3818 		if (ipif != NULL)
3819 			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3820 			    B_FALSE, B_TRUE);
3821 
3822 		else
3823 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3824 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3825 
3826 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
3827 		mutex_enter(&connp->conn_lock);
3828 		/* Go to the next ilg */
3829 		i--;
3830 	}
3831 	ILG_WALKER_RELE(connp);
3832 
3833 	/* If any ill was skipped above wait and retry */
3834 	if (connp->conn_ilg_inuse != 0) {
3835 		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3836 		goto retry;
3837 	}
3838 	mutex_exit(&connp->conn_lock);
3839 }
3840 
3841 /*
3842  * Called from ill close by ipcl_walk for clearing conn_ilg and
3843  * conn_multicast_ipif for a given ipif. conn is held by caller.
3844  * Note that ipcl_walk only walks conns that are not yet condemned.
3845  * condemned conns can't be refheld. For this reason, conn must become clean
3846  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3847  * condemned flag.
3848  */
3849 static void
3850 conn_delete_ipif(conn_t *connp, caddr_t arg)
3851 {
3852 	ipif_t	*ipif = (ipif_t *)arg;
3853 	int	i;
3854 	char	group_buf1[INET6_ADDRSTRLEN];
3855 	char	group_buf2[INET6_ADDRSTRLEN];
3856 	ipaddr_t group;
3857 	ilg_t	*ilg;
3858 
3859 	/*
3860 	 * Even though conn_ilg_inuse can change while we are in this loop,
3861 	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3862 	 * be created or deleted for this connp, on this ill, since this ill
3863 	 * is the perimeter. So we won't miss any ilg in this cleanup.
3864 	 */
3865 	mutex_enter(&connp->conn_lock);
3866 
3867 	/*
3868 	 * Increment the walker count, so that ilg repacking does not
3869 	 * occur while we are in the loop.
3870 	 */
3871 	ILG_WALKER_HOLD(connp);
3872 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3873 		ilg = &connp->conn_ilg[i];
3874 		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
3875 			continue;
3876 		/*
3877 		 * ip_close cannot be cleaning this ilg at the same time.
3878 		 * since it also has to execute in this ill's perimeter which
3879 		 * we are now holding. Only a clean conn can be condemned.
3880 		 */
3881 		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3882 
3883 		/* Blow away the membership */
3884 		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
3885 		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
3886 		    group_buf1, sizeof (group_buf1)),
3887 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
3888 		    group_buf2, sizeof (group_buf2)),
3889 		    ipif->ipif_ill->ill_name));
3890 
3891 		/* ilg_ipif is NULL for V6, so we won't be here */
3892 		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
3893 
3894 		group = V4_PART_OF_V6(ilg->ilg_v6group);
3895 		ilg_delete(connp, &connp->conn_ilg[i], NULL);
3896 		mutex_exit(&connp->conn_lock);
3897 
3898 		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
3899 		mutex_enter(&connp->conn_lock);
3900 	}
3901 
3902 	/*
3903 	 * If we are the last walker, need to physically delete the
3904 	 * ilgs and repack.
3905 	 */
3906 	ILG_WALKER_RELE(connp);
3907 
3908 	if (connp->conn_multicast_ipif == ipif) {
3909 		/* Revert to late binding */
3910 		connp->conn_multicast_ipif = NULL;
3911 	}
3912 	mutex_exit(&connp->conn_lock);
3913 
3914 	conn_delete_ire(connp, (caddr_t)ipif);
3915 }
3916 
3917 /*
3918  * Called from ill close by ipcl_walk for clearing conn_ilg and
3919  * conn_multicast_ill for a given ill. conn is held by caller.
3920  * Note that ipcl_walk only walks conns that are not yet condemned.
3921  * condemned conns can't be refheld. For this reason, conn must become clean
3922  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3923  * condemned flag.
3924  */
3925 static void
3926 conn_delete_ill(conn_t *connp, caddr_t arg)
3927 {
3928 	ill_t	*ill = (ill_t *)arg;
3929 	int	i;
3930 	char	group_buf[INET6_ADDRSTRLEN];
3931 	in6_addr_t v6group;
3932 	int	orig_ifindex;
3933 	ilg_t	*ilg;
3934 
3935 	/*
3936 	 * Even though conn_ilg_inuse can change while we are in this loop,
3937 	 * no new ilgs can be created/deleted for this connp, on this
3938 	 * ill, since this ill is the perimeter. So we won't miss any ilg
3939 	 * in this cleanup.
3940 	 */
3941 	mutex_enter(&connp->conn_lock);
3942 
3943 	/*
3944 	 * Increment the walker count, so that ilg repacking does not
3945 	 * occur while we are in the loop.
3946 	 */
3947 	ILG_WALKER_HOLD(connp);
3948 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3949 		ilg = &connp->conn_ilg[i];
3950 		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
3951 			/*
3952 			 * ip_close cannot be cleaning this ilg at the same
3953 			 * time, since it also has to execute in this ill's
3954 			 * perimeter which we are now holding. Only a clean
3955 			 * conn can be condemned.
3956 			 */
3957 			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3958 
3959 			/* Blow away the membership */
3960 			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
3961 			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3962 			    group_buf, sizeof (group_buf)),
3963 			    ill->ill_name));
3964 
3965 			v6group = ilg->ilg_v6group;
3966 			orig_ifindex = ilg->ilg_orig_ifindex;
3967 			ilg_delete(connp, ilg, NULL);
3968 			mutex_exit(&connp->conn_lock);
3969 
3970 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3971 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3972 			mutex_enter(&connp->conn_lock);
3973 		}
3974 	}
3975 	/*
3976 	 * If we are the last walker, need to physically delete the
3977 	 * ilgs and repack.
3978 	 */
3979 	ILG_WALKER_RELE(connp);
3980 
3981 	if (connp->conn_multicast_ill == ill) {
3982 		/* Revert to late binding */
3983 		connp->conn_multicast_ill = NULL;
3984 		connp->conn_orig_multicast_ifindex = 0;
3985 	}
3986 	mutex_exit(&connp->conn_lock);
3987 }
3988 
3989 /*
3990  * Called when an ipif is unplumbed to make sure that there are no
3991  * dangling conn references to that ipif.
3992  * Handles ilg_ipif and conn_multicast_ipif
3993  */
3994 void
3995 reset_conn_ipif(ipif)
3996 	ipif_t	*ipif;
3997 {
3998 	ipcl_walk(conn_delete_ipif, (caddr_t)ipif);
3999 }
4000 
4001 /*
4002  * Called when an ill is unplumbed to make sure that there are no
4003  * dangling conn references to that ill.
4004  * Handles ilg_ill, conn_multicast_ill.
4005  */
4006 void
4007 reset_conn_ill(ill_t *ill)
4008 {
4009 	ipcl_walk(conn_delete_ill, (caddr_t)ill);
4010 }
4011 
4012 #ifdef DEBUG
4013 /*
4014  * Walk functions walk all the interfaces in the system to make
4015  * sure that there is no refernece to the ipif or ill that is
4016  * going away.
4017  */
4018 int
4019 ilm_walk_ill(ill_t *ill)
4020 {
4021 	int cnt = 0;
4022 	ill_t *till;
4023 	ilm_t *ilm;
4024 	ill_walk_context_t ctx;
4025 
4026 	rw_enter(&ill_g_lock, RW_READER);
4027 	till = ILL_START_WALK_ALL(&ctx);
4028 	for (; till != NULL; till = ill_next(&ctx, till)) {
4029 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4030 			if (ilm->ilm_ill == ill) {
4031 				cnt++;
4032 			}
4033 		}
4034 	}
4035 	rw_exit(&ill_g_lock);
4036 
4037 	return (cnt);
4038 }
4039 
4040 /*
4041  * This function is called before the ipif is freed.
4042  */
4043 int
4044 ilm_walk_ipif(ipif_t *ipif)
4045 {
4046 	int cnt = 0;
4047 	ill_t *till;
4048 	ilm_t *ilm;
4049 	ill_walk_context_t ctx;
4050 
4051 	till = ILL_START_WALK_ALL(&ctx);
4052 	for (; till != NULL; till = ill_next(&ctx, till)) {
4053 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4054 			if (ilm->ilm_ipif == ipif) {
4055 					cnt++;
4056 			}
4057 		}
4058 	}
4059 	return (cnt);
4060 }
4061 #endif
4062