xref: /netbsd/sys/netinet6/mld6.c (revision d57ba21c)
1 /*	$NetBSD: mld6.c,v 1.78 2017/01/16 07:33:36 ryo Exp $	*/
2 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1992, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Stephen Deering of Stanford University.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
65  */
66 
67 /*
68  * Copyright (c) 1988 Stephen Deering.
69  *
70  * This code is derived from software contributed to Berkeley by
71  * Stephen Deering of Stanford University.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *	This product includes software developed by the University of
84  *	California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
102  */
103 
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.78 2017/01/16 07:33:36 ryo Exp $");
106 
107 #ifdef _KERNEL_OPT
108 #include "opt_inet.h"
109 #include "opt_net_mpsafe.h"
110 #endif
111 
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/mbuf.h>
115 #include <sys/socket.h>
116 #include <sys/socketvar.h>
117 #include <sys/syslog.h>
118 #include <sys/sysctl.h>
119 #include <sys/kernel.h>
120 #include <sys/callout.h>
121 #include <sys/cprng.h>
122 
123 #include <net/if.h>
124 
125 #include <netinet/in.h>
126 #include <netinet/in_var.h>
127 #include <netinet6/in6_var.h>
128 #include <netinet/ip6.h>
129 #include <netinet6/ip6_var.h>
130 #include <netinet6/scope6_var.h>
131 #include <netinet/icmp6.h>
132 #include <netinet6/icmp6_private.h>
133 #include <netinet6/mld6_var.h>
134 
135 #include <net/net_osdep.h>
136 
137 
138 /*
139  * This structure is used to keep track of in6_multi chains which belong to
140  * deleted interface addresses.
141  */
142 static LIST_HEAD(, multi6_kludge) in6_mk = LIST_HEAD_INITIALIZER(in6_mk);
143 
144 struct multi6_kludge {
145 	LIST_ENTRY(multi6_kludge) mk_entry;
146 	struct ifnet *mk_ifp;
147 	struct in6_multihead mk_head;
148 };
149 
150 
151 /*
152  * Protocol constants
153  */
154 
155 /*
156  * time between repetitions of a node's initial report of interest in a
157  * multicast address(in seconds)
158  */
159 #define MLD_UNSOLICITED_REPORT_INTERVAL	10
160 
161 static struct ip6_pktopts ip6_opts;
162 
163 static void mld_start_listening(struct in6_multi *);
164 static void mld_stop_listening(struct in6_multi *);
165 
166 static struct mld_hdr * mld_allocbuf(struct mbuf **, int, struct in6_multi *,
167 	int);
168 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
169 static void mld_starttimer(struct in6_multi *);
170 static void mld_stoptimer(struct in6_multi *);
171 static u_long mld_timerresid(struct in6_multi *);
172 
173 void
174 mld_init(void)
175 {
176 	static u_int8_t hbh_buf[8];
177 	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
178 	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
179 
180 	/* ip6h_nxt will be fill in later */
181 	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
182 
183 	/* XXX: grotty hard coding... */
184 	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
185 	hbh_buf[3] = 0;
186 	hbh_buf[4] = IP6OPT_RTALERT;
187 	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
188 	memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
189 
190 	ip6_opts.ip6po_hbh = hbh;
191 	/* We will specify the hoplimit by a multicast option. */
192 	ip6_opts.ip6po_hlim = -1;
193 	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
194 }
195 
196 static void
197 mld_starttimer(struct in6_multi *in6m)
198 {
199 	struct timeval now;
200 
201 	KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
202 
203 	microtime(&now);
204 	in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
205 	in6m->in6m_timer_expire.tv_usec = now.tv_usec +
206 	    (in6m->in6m_timer % hz) * (1000000 / hz);
207 	if (in6m->in6m_timer_expire.tv_usec > 1000000) {
208 		in6m->in6m_timer_expire.tv_sec++;
209 		in6m->in6m_timer_expire.tv_usec -= 1000000;
210 	}
211 
212 	/* start or restart the timer */
213 	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
214 }
215 
216 static void
217 mld_stoptimer(struct in6_multi *in6m)
218 {
219 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
220 		return;
221 
222 	callout_stop(&in6m->in6m_timer_ch);
223 
224 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
225 }
226 
227 static void
228 mld_timeo(void *arg)
229 {
230 	struct in6_multi *in6m = arg;
231 
232 	mutex_enter(softnet_lock);
233 	KERNEL_LOCK(1, NULL);
234 
235 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
236 		goto out;
237 
238 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
239 
240 	switch (in6m->in6m_state) {
241 	case MLD_REPORTPENDING:
242 		mld_start_listening(in6m);
243 		break;
244 	default:
245 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
246 		break;
247 	}
248 
249 out:
250 	KERNEL_UNLOCK_ONE(NULL);
251 	mutex_exit(softnet_lock);
252 }
253 
254 static u_long
255 mld_timerresid(struct in6_multi *in6m)
256 {
257 	struct timeval now, diff;
258 
259 	microtime(&now);
260 
261 	if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
262 	    (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
263 	    now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
264 		return (0);
265 	}
266 	diff = in6m->in6m_timer_expire;
267 	diff.tv_sec -= now.tv_sec;
268 	diff.tv_usec -= now.tv_usec;
269 	if (diff.tv_usec < 0) {
270 		diff.tv_sec--;
271 		diff.tv_usec += 1000000;
272 	}
273 
274 	/* return the remaining time in milliseconds */
275 	return diff.tv_sec * 1000 + diff.tv_usec / 1000;
276 }
277 
278 static void
279 mld_start_listening(struct in6_multi *in6m)
280 {
281 	struct in6_addr all_in6;
282 
283 	/*
284 	 * RFC2710 page 10:
285 	 * The node never sends a Report or Done for the link-scope all-nodes
286 	 * address.
287 	 * MLD messages are never sent for multicast addresses whose scope is 0
288 	 * (reserved) or 1 (node-local).
289 	 */
290 	all_in6 = in6addr_linklocal_allnodes;
291 	if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
292 		/* XXX: this should not happen! */
293 		in6m->in6m_timer = 0;
294 		in6m->in6m_state = MLD_OTHERLISTENER;
295 	}
296 	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
297 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
298 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
299 		in6m->in6m_state = MLD_OTHERLISTENER;
300 	} else {
301 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
302 		in6m->in6m_timer = cprng_fast32() %
303 		    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
304 		in6m->in6m_state = MLD_IREPORTEDLAST;
305 
306 		mld_starttimer(in6m);
307 	}
308 }
309 
310 static void
311 mld_stop_listening(struct in6_multi *in6m)
312 {
313 	struct in6_addr allnode, allrouter;
314 
315 	allnode = in6addr_linklocal_allnodes;
316 	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
317 		/* XXX: this should not happen! */
318 		return;
319 	}
320 	allrouter = in6addr_linklocal_allrouters;
321 	if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
322 		/* XXX impossible */
323 		return;
324 	}
325 
326 	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
327 	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
328 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
329 	    IPV6_ADDR_SCOPE_INTFACELOCAL) {
330 		mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
331 	}
332 }
333 
334 void
335 mld_input(struct mbuf *m, int off)
336 {
337 	struct ip6_hdr *ip6;
338 	struct mld_hdr *mldh;
339 	struct ifnet *ifp;
340 	struct in6_multi *in6m = NULL;
341 	struct in6_addr mld_addr, all_in6;
342 	struct in6_ifaddr *ia;
343 	u_long timer = 0;	/* timer value in the MLD query header */
344 	int s;
345 
346 	ifp = m_get_rcvif(m, &s);
347 	IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
348 	if (mldh == NULL) {
349 		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
350 		goto out_nodrop;
351 	}
352 
353 	/* source address validation */
354 	ip6 = mtod(m, struct ip6_hdr *);/* in case mpullup */
355 	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
356 		/*
357 		 * RFC3590 allows the IPv6 unspecified address as the source
358 		 * address of MLD report and done messages.  However, as this
359 		 * same document says, this special rule is for snooping
360 		 * switches and the RFC requires routers to discard MLD packets
361 		 * with the unspecified source address.  The RFC only talks
362 		 * about hosts receiving an MLD query or report in Security
363 		 * Considerations, but this is probably the correct intention.
364 		 * RFC3590 does not talk about other cases than link-local and
365 		 * the unspecified source addresses, but we believe the same
366 		 * rule should be applied.
367 		 * As a result, we only allow link-local addresses as the
368 		 * source address; otherwise, simply discard the packet.
369 		 */
370 #if 0
371 		/*
372 		 * XXX: do not log in an input path to avoid log flooding,
373 		 * though RFC3590 says "SHOULD log" if the source of a query
374 		 * is the unspecified address.
375 		 */
376 		char ip6bufs[INET6_ADDRSTRLEN];
377 		char ip6bufm[INET6_ADDRSTRLEN];
378 		log(LOG_INFO,
379 		    "mld_input: src %s is not link-local (grp=%s)\n",
380 		    ip6_sprintf(ip6bufs,&ip6->ip6_src),
381 		    ip6_sprintf(ip6bufm, &mldh->mld_addr));
382 #endif
383 		goto out;
384 	}
385 
386 	/*
387 	 * make a copy for local work (in6_setscope() may modify the 1st arg)
388 	 */
389 	mld_addr = mldh->mld_addr;
390 	if (in6_setscope(&mld_addr, ifp, NULL)) {
391 		/* XXX: this should not happen! */
392 		goto out;
393 	}
394 
395 	/*
396 	 * In the MLD specification, there are 3 states and a flag.
397 	 *
398 	 * In Non-Listener state, we simply don't have a membership record.
399 	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
400 	 * In Idle Listener state, our timer is not running
401 	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
402 	 *
403 	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
404 	 * we have heard a report from another member, or MLD_IREPORTEDLAST
405 	 * if we sent the last report.
406 	 */
407 	switch (mldh->mld_type) {
408 	case MLD_LISTENER_QUERY: {
409 		struct psref psref;
410 
411 		if (ifp->if_flags & IFF_LOOPBACK)
412 			break;
413 
414 		if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
415 		    !IN6_IS_ADDR_MULTICAST(&mld_addr))
416 			break;	/* print error or log stat? */
417 
418 		all_in6 = in6addr_linklocal_allnodes;
419 		if (in6_setscope(&all_in6, ifp, NULL)) {
420 			/* XXX: this should not happen! */
421 			break;
422 		}
423 
424 		/*
425 		 * - Start the timers in all of our membership records
426 		 *   that the query applies to for the interface on
427 		 *   which the query arrived excl. those that belong
428 		 *   to the "all-nodes" group (ff02::1).
429 		 * - Restart any timer that is already running but has
430 		 *   a value longer than the requested timeout.
431 		 * - Use the value specified in the query message as
432 		 *   the maximum timeout.
433 		 */
434 		timer = ntohs(mldh->mld_maxdelay);
435 
436 		ia = in6_get_ia_from_ifp_psref(ifp, &psref);
437 		if (ia == NULL)
438 			break;
439 
440 		/* The following operations may sleep */
441 		m_put_rcvif(ifp, &s);
442 		ifp = NULL;
443 
444 		LIST_FOREACH(in6m, &ia->ia6_multiaddrs, in6m_entry) {
445 			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
446 			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
447 			    IPV6_ADDR_SCOPE_LINKLOCAL)
448 				continue;
449 
450 			if (in6m->in6m_state == MLD_REPORTPENDING)
451 				continue; /* we are not yet ready */
452 
453 			if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
454 			    !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
455 				continue;
456 
457 			if (timer == 0) {
458 				/* send a report immediately */
459 				mld_stoptimer(in6m);
460 				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
461 				in6m->in6m_state = MLD_IREPORTEDLAST;
462 			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
463 			    mld_timerresid(in6m) > timer) {
464 				in6m->in6m_timer =
465 				   1 + (cprng_fast32() % timer) * hz / 1000;
466 				mld_starttimer(in6m);
467 			}
468 		}
469 		ia6_release(ia, &psref);
470 		break;
471 	    }
472 
473 	case MLD_LISTENER_REPORT:
474 		/*
475 		 * For fast leave to work, we have to know that we are the
476 		 * last person to send a report for this group.  Reports
477 		 * can potentially get looped back if we are a multicast
478 		 * router, so discard reports sourced by me.
479 		 * Note that it is impossible to check IFF_LOOPBACK flag of
480 		 * ifp for this purpose, since ip6_mloopback pass the physical
481 		 * interface to looutput.
482 		 */
483 		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
484 			break;
485 
486 		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
487 			break;
488 
489 		/*
490 		 * If we belong to the group being reported, stop
491 		 * our timer for that group.
492 		 */
493 		IN6_LOOKUP_MULTI(mld_addr, ifp, in6m);
494 		if (in6m) {
495 			mld_stoptimer(in6m); /* transit to idle state */
496 			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
497 		}
498 		break;
499 	default:		/* this is impossible */
500 #if 0
501 		/*
502 		 * this case should be impossible because of filtering in
503 		 * icmp6_input().  But we explicitly disabled this part
504 		 * just in case.
505 		 */
506 		log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
507 #endif
508 		break;
509 	}
510 
511 out:
512 	m_freem(m);
513 out_nodrop:
514 	m_put_rcvif(ifp, &s);
515 }
516 
517 static void
518 mld_sendpkt(struct in6_multi *in6m, int type,
519 	const struct in6_addr *dst)
520 {
521 	struct mbuf *mh;
522 	struct mld_hdr *mldh;
523 	struct ip6_hdr *ip6 = NULL;
524 	struct ip6_moptions im6o;
525 	struct in6_ifaddr *ia = NULL;
526 	struct ifnet *ifp = in6m->in6m_ifp;
527 	int ignflags;
528 	struct psref psref;
529 	int bound;
530 
531 	/*
532 	 * At first, find a link local address on the outgoing interface
533 	 * to use as the source address of the MLD packet.
534 	 * We do not reject tentative addresses for MLD report to deal with
535 	 * the case where we first join a link-local address.
536 	 */
537 	ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
538 	bound = curlwp_bind();
539 	ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
540 	if (ia == NULL) {
541 		curlwp_bindx(bound);
542 		return;
543 	}
544 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
545 		ia6_release(ia, &psref);
546 		ia = NULL;
547 	}
548 
549 	/* Allocate two mbufs to store IPv6 header and MLD header */
550 	mldh = mld_allocbuf(&mh, sizeof(struct mld_hdr), in6m, type);
551 	if (mldh == NULL) {
552 		ia6_release(ia, &psref);
553 		curlwp_bindx(bound);
554 		return;
555 	}
556 
557 	/* fill src/dst here */
558  	ip6 = mtod(mh, struct ip6_hdr *);
559  	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
560  	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
561 	ia6_release(ia, &psref);
562 	curlwp_bindx(bound);
563 
564 	mldh->mld_addr = in6m->in6m_addr;
565 	in6_clearscope(&mldh->mld_addr); /* XXX */
566 	mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
567 	    sizeof(struct mld_hdr));
568 
569 	/* construct multicast option */
570 	memset(&im6o, 0, sizeof(im6o));
571 	im6o.im6o_multicast_if_index = if_get_index(ifp);
572 	im6o.im6o_multicast_hlim = 1;
573 
574 	/*
575 	 * Request loopback of the report if we are acting as a multicast
576 	 * router, so that the process-level routing daemon can hear it.
577 	 */
578 	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
579 
580 	/* increment output statictics */
581 	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
582 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
583 	switch (type) {
584 	case MLD_LISTENER_QUERY:
585 		icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
586 		break;
587 	case MLD_LISTENER_REPORT:
588 		icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
589 		break;
590 	case MLD_LISTENER_DONE:
591 		icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
592 		break;
593 	}
594 
595 	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
596 	    &im6o, NULL, NULL);
597 }
598 
599 static struct mld_hdr *
600 mld_allocbuf(struct mbuf **mh, int len, struct in6_multi *in6m,
601     int type)
602 {
603 	struct mbuf *md;
604 	struct mld_hdr *mldh;
605 	struct ip6_hdr *ip6;
606 
607 	/*
608 	 * Allocate mbufs to store ip6 header and MLD header.
609 	 * We allocate 2 mbufs and make chain in advance because
610 	 * it is more convenient when inserting the hop-by-hop option later.
611 	 */
612 	MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
613 	if (*mh == NULL)
614 		return NULL;
615 	MGET(md, M_DONTWAIT, MT_DATA);
616 	if (md == NULL) {
617 		m_free(*mh);
618 		*mh = NULL;
619 		return NULL;
620 	}
621 	(*mh)->m_next = md;
622 	md->m_next = NULL;
623 
624 	m_reset_rcvif((*mh));
625 	(*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + len;
626 	(*mh)->m_len = sizeof(struct ip6_hdr);
627 	MH_ALIGN(*mh, sizeof(struct ip6_hdr));
628 
629 	/* fill in the ip6 header */
630 	ip6 = mtod(*mh, struct ip6_hdr *);
631 	memset(ip6, 0, sizeof(*ip6));
632 	ip6->ip6_flow = 0;
633 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
634 	ip6->ip6_vfc |= IPV6_VERSION;
635 	/* ip6_plen will be set later */
636 	ip6->ip6_nxt = IPPROTO_ICMPV6;
637 	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
638 	/* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
639 
640 	/* fill in the MLD header as much as possible */
641 	md->m_len = len;
642 	mldh = mtod(md, struct mld_hdr *);
643 	memset(mldh, 0, len);
644 	mldh->mld_type = type;
645 	return mldh;
646 }
647 
648 /*
649  * Add an address to the list of IP6 multicast addresses for a given interface.
650  */
651 struct	in6_multi *
652 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp,
653 	int *errorp, int timer)
654 {
655 	struct	in6_ifaddr *ia;
656 	struct	sockaddr_in6 sin6;
657 	struct	in6_multi *in6m;
658 	int	s = splsoftnet();
659 
660 	*errorp = 0;
661 
662 	/*
663 	 * See if address already in list.
664 	 */
665 	IN6_LOOKUP_MULTI(*maddr6, ifp, in6m);
666 	if (in6m != NULL) {
667 		/*
668 		 * Found it; just increment the refrence count.
669 		 */
670 		in6m->in6m_refcount++;
671 	} else {
672 		int _s;
673 		/*
674 		 * New address; allocate a new multicast record
675 		 * and link it into the interface's multicast list.
676 		 */
677 		in6m = (struct in6_multi *)
678 			malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
679 		if (in6m == NULL) {
680 			splx(s);
681 			*errorp = ENOBUFS;
682 			return (NULL);
683 		}
684 
685 		in6m->in6m_addr = *maddr6;
686 		in6m->in6m_ifp = ifp;
687 		in6m->in6m_refcount = 1;
688 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
689 		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
690 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
691 
692 		_s = pserialize_read_enter();
693 		ia = in6_get_ia_from_ifp(ifp);
694 		if (ia == NULL) {
695 			pserialize_read_exit(_s);
696 			callout_destroy(&in6m->in6m_timer_ch);
697 			free(in6m, M_IPMADDR);
698 			splx(s);
699 			*errorp = EADDRNOTAVAIL; /* appropriate? */
700 			return (NULL);
701 		}
702 		in6m->in6m_ia = ia;
703 		ifaref(&ia->ia_ifa); /* gain a reference */
704 		/* FIXME NOMPSAFE: need to lock */
705 		LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
706 		pserialize_read_exit(_s);
707 
708 		/*
709 		 * Ask the network driver to update its multicast reception
710 		 * filter appropriately for the new address.
711 		 */
712 		sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
713 		*errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
714 		if (*errorp) {
715 			callout_destroy(&in6m->in6m_timer_ch);
716 			LIST_REMOVE(in6m, in6m_entry);
717 			free(in6m, M_IPMADDR);
718 			ifafree(&ia->ia_ifa);
719 			splx(s);
720 			return (NULL);
721 		}
722 
723 		in6m->in6m_timer = timer;
724 		if (in6m->in6m_timer > 0) {
725 			in6m->in6m_state = MLD_REPORTPENDING;
726 			mld_starttimer(in6m);
727 
728 			splx(s);
729 			return (in6m);
730 		}
731 
732 		/*
733 		 * Let MLD6 know that we have joined a new IP6 multicast
734 		 * group.
735 		 */
736 		mld_start_listening(in6m);
737 	}
738 	splx(s);
739 	return (in6m);
740 }
741 
742 /*
743  * Delete a multicast address record.
744  */
745 void
746 in6_delmulti(struct in6_multi *in6m)
747 {
748 	struct	sockaddr_in6 sin6;
749 	struct	in6_ifaddr *ia;
750 	int	s = splsoftnet();
751 
752 	mld_stoptimer(in6m);
753 
754 	if (--in6m->in6m_refcount == 0) {
755 		int _s;
756 
757 		/*
758 		 * No remaining claims to this record; let MLD6 know
759 		 * that we are leaving the multicast group.
760 		 */
761 		mld_stop_listening(in6m);
762 
763 		/*
764 		 * Unlink from list.
765 		 */
766 		LIST_REMOVE(in6m, in6m_entry);
767 		if (in6m->in6m_ia != NULL) {
768 			ifafree(&in6m->in6m_ia->ia_ifa); /* release reference */
769 			in6m->in6m_ia = NULL;
770 		}
771 
772 		/*
773 		 * Delete all references of this multicasting group from
774 		 * the membership arrays
775 		 */
776 		_s = pserialize_read_enter();
777 		IN6_ADDRLIST_READER_FOREACH(ia) {
778 			struct in6_multi_mship *imm;
779 			LIST_FOREACH(imm, &ia->ia6_memberships, i6mm_chain) {
780 				if (imm->i6mm_maddr == in6m)
781 					imm->i6mm_maddr = NULL;
782 			}
783 		}
784 		pserialize_read_exit(_s);
785 
786 		/*
787 		 * Notify the network driver to update its multicast
788 		 * reception filter.
789 		 */
790 		sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
791 		if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
792 
793 		/* Tell mld_timeo we're halting the timer */
794 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
795 #ifdef NET_MPSAFE
796 		callout_halt(&in6m->in6m_timer_ch, NULL);
797 #else
798 		callout_halt(&in6m->in6m_timer_ch, softnet_lock);
799 #endif
800 		callout_destroy(&in6m->in6m_timer_ch);
801 
802 		free(in6m, M_IPMADDR);
803 	}
804 	splx(s);
805 }
806 
807 
808 struct in6_multi_mship *
809 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
810 	int *errorp, int timer)
811 {
812 	struct in6_multi_mship *imm;
813 
814 	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
815 	if (imm == NULL) {
816 		*errorp = ENOBUFS;
817 		return NULL;
818 	}
819 
820 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
821 	if (!imm->i6mm_maddr) {
822 		/* *errorp is already set */
823 		free(imm, M_IPMADDR);
824 		return NULL;
825 	}
826 	return imm;
827 }
828 
829 int
830 in6_leavegroup(struct in6_multi_mship *imm)
831 {
832 
833 	if (imm->i6mm_maddr) {
834 		in6_delmulti(imm->i6mm_maddr);
835 	}
836 	free(imm, M_IPMADDR);
837 	return 0;
838 }
839 
840 
841 /*
842  * Multicast address kludge:
843  * If there were any multicast addresses attached to this interface address,
844  * either move them to another address on this interface, or save them until
845  * such time as this interface is reconfigured for IPv6.
846  */
847 void
848 in6_savemkludge(struct in6_ifaddr *oia)
849 {
850 	struct in6_ifaddr *ia;
851 	struct in6_multi *in6m;
852 	int s;
853 
854 	s = pserialize_read_enter();
855 	ia = in6_get_ia_from_ifp(oia->ia_ifp);
856 	if (ia) {	/* there is another address */
857 		KASSERT(ia != oia);
858 		while ((in6m = LIST_FIRST(&oia->ia6_multiaddrs)) != NULL) {
859 			LIST_REMOVE(in6m, in6m_entry);
860 			ifaref(&ia->ia_ifa);
861 			ifafree(&in6m->in6m_ia->ia_ifa);
862 			in6m->in6m_ia = ia;
863 			/* FIXME NOMPSAFE: need to lock */
864 			LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
865 		}
866 	} else {	/* last address on this if deleted, save */
867 		struct multi6_kludge *mk;
868 
869 		LIST_FOREACH(mk, &in6_mk, mk_entry) {
870 			if (mk->mk_ifp == oia->ia_ifp)
871 				break;
872 		}
873 		if (mk == NULL) /* this should not happen! */
874 			panic("in6_savemkludge: no kludge space");
875 
876 		while ((in6m = LIST_FIRST(&oia->ia6_multiaddrs)) != NULL) {
877 			LIST_REMOVE(in6m, in6m_entry);
878 			ifafree(&in6m->in6m_ia->ia_ifa); /* release reference */
879 			in6m->in6m_ia = NULL;
880 			LIST_INSERT_HEAD(&mk->mk_head, in6m, in6m_entry);
881 		}
882 	}
883 	pserialize_read_exit(s);
884 }
885 
886 /*
887  * Continuation of multicast address hack:
888  * If there was a multicast group list previously saved for this interface,
889  * then we re-attach it to the first address configured on the i/f.
890  */
891 void
892 in6_restoremkludge(struct in6_ifaddr *ia, struct ifnet *ifp)
893 {
894 	struct multi6_kludge *mk;
895 	struct in6_multi *in6m;
896 
897 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
898 		if (mk->mk_ifp == ifp)
899 			break;
900 	}
901 	if (mk == NULL)
902 		return;
903 	while ((in6m = LIST_FIRST(&mk->mk_head)) != NULL) {
904 		LIST_REMOVE(in6m, in6m_entry);
905 		in6m->in6m_ia = ia;
906 		ifaref(&ia->ia_ifa);
907 		LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
908 	}
909 }
910 
911 /*
912  * Allocate space for the kludge at interface initialization time.
913  * Formerly, we dynamically allocated the space in in6_savemkludge() with
914  * malloc(M_WAITOK).  However, it was wrong since the function could be called
915  * under an interrupt context (software timer on address lifetime expiration).
916  * Also, we cannot just give up allocating the strucutre, since the group
917  * membership structure is very complex and we need to keep it anyway.
918  * Of course, this function MUST NOT be called under an interrupt context.
919  * Specifically, it is expected to be called only from in6_ifattach(), though
920  * it is a global function.
921  */
922 void
923 in6_createmkludge(struct ifnet *ifp)
924 {
925 	struct multi6_kludge *mk;
926 
927 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
928 		/* If we've already had one, do not allocate. */
929 		if (mk->mk_ifp == ifp)
930 			return;
931 	}
932 
933 	mk = malloc(sizeof(*mk), M_IPMADDR, M_ZERO|M_WAITOK);
934 
935 	LIST_INIT(&mk->mk_head);
936 	mk->mk_ifp = ifp;
937 	LIST_INSERT_HEAD(&in6_mk, mk, mk_entry);
938 }
939 
940 void
941 in6_purgemkludge(struct ifnet *ifp)
942 {
943 	struct multi6_kludge *mk;
944 	struct in6_multi *in6m, *next;
945 
946 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
947 		if (mk->mk_ifp == ifp)
948 			break;
949 	}
950 	if (mk == NULL)
951 		return;
952 
953 	/* leave from all multicast groups joined */
954 	for (in6m = LIST_FIRST(&mk->mk_head); in6m != NULL; in6m = next) {
955 		next = LIST_NEXT(in6m, in6m_entry);
956 		in6_delmulti(in6m);
957 	}
958 	LIST_REMOVE(mk, mk_entry);
959 	free(mk, M_IPMADDR);
960 }
961 
962 static int
963 in6_mkludge_sysctl(SYSCTLFN_ARGS)
964 {
965 	struct multi6_kludge *mk;
966 	struct in6_multi *in6m;
967 	int error;
968 	uint32_t tmp;
969 	size_t written;
970 
971 	if (namelen != 1)
972 		return EINVAL;
973 
974 	if (oldp == NULL) {
975 		*oldlenp = 0;
976 		LIST_FOREACH(mk, &in6_mk, mk_entry) {
977 			if (mk->mk_ifp->if_index == name[0])
978 				continue;
979 			LIST_FOREACH(in6m, &mk->mk_head, in6m_entry) {
980 				*oldlenp += sizeof(struct in6_addr) +
981 				    sizeof(uint32_t);
982 			}
983 		}
984 		return 0;
985 	}
986 
987 	error = 0;
988 	written = 0;
989 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
990 		if (mk->mk_ifp->if_index == name[0])
991 			continue;
992 		LIST_FOREACH(in6m, &mk->mk_head, in6m_entry) {
993 			if (written + sizeof(struct in6_addr) +
994 			    sizeof(uint32_t) > *oldlenp)
995 				goto done;
996 			error = sysctl_copyout(l, &in6m->in6m_addr,
997 			    oldp, sizeof(struct in6_addr));
998 			if (error)
999 				goto done;
1000 			oldp = (char *)oldp + sizeof(struct in6_addr);
1001 			written += sizeof(struct in6_addr);
1002 			tmp = in6m->in6m_refcount;
1003 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1004 			if (error)
1005 				goto done;
1006 			oldp = (char *)oldp + sizeof(tmp);
1007 			written += sizeof(tmp);
1008 		}
1009 	}
1010 
1011 done:
1012 	*oldlenp = written;
1013 	return error;
1014 }
1015 
1016 static int
1017 in6_multicast_sysctl(SYSCTLFN_ARGS)
1018 {
1019 	struct ifnet *ifp;
1020 	struct ifaddr *ifa;
1021 	struct in6_ifaddr *ifa6;
1022 	struct in6_multi *in6m;
1023 	uint32_t tmp;
1024 	int error;
1025 	size_t written;
1026 	struct psref psref, psref_ia;
1027 	int bound, s;
1028 
1029 	if (namelen != 1)
1030 		return EINVAL;
1031 
1032 	bound = curlwp_bind();
1033 	ifp = if_get_byindex(name[0], &psref);
1034 	if (ifp == NULL) {
1035 		curlwp_bindx(bound);
1036 		return ENODEV;
1037 	}
1038 
1039 	if (oldp == NULL) {
1040 		*oldlenp = 0;
1041 		s = pserialize_read_enter();
1042 		IFADDR_READER_FOREACH(ifa, ifp) {
1043 			if (ifa->ifa_addr->sa_family != AF_INET6)
1044 				continue;
1045 			ifa6 = (struct in6_ifaddr *)ifa;
1046 			LIST_FOREACH(in6m, &ifa6->ia6_multiaddrs, in6m_entry) {
1047 				*oldlenp += 2 * sizeof(struct in6_addr) +
1048 				    sizeof(uint32_t);
1049 			}
1050 		}
1051 		pserialize_read_exit(s);
1052 		if_put(ifp, &psref);
1053 		curlwp_bindx(bound);
1054 		return 0;
1055 	}
1056 
1057 	error = 0;
1058 	written = 0;
1059 	s = pserialize_read_enter();
1060 	IFADDR_READER_FOREACH(ifa, ifp) {
1061 		if (ifa->ifa_addr->sa_family != AF_INET6)
1062 			continue;
1063 
1064 		ifa_acquire(ifa, &psref_ia);
1065 		pserialize_read_exit(s);
1066 
1067 		ifa6 = (struct in6_ifaddr *)ifa;
1068 		LIST_FOREACH(in6m, &ifa6->ia6_multiaddrs, in6m_entry) {
1069 			if (written + 2 * sizeof(struct in6_addr) +
1070 			    sizeof(uint32_t) > *oldlenp)
1071 				goto done;
1072 			error = sysctl_copyout(l, &ifa6->ia_addr.sin6_addr,
1073 			    oldp, sizeof(struct in6_addr));
1074 			if (error)
1075 				goto done;
1076 			oldp = (char *)oldp + sizeof(struct in6_addr);
1077 			written += sizeof(struct in6_addr);
1078 			error = sysctl_copyout(l, &in6m->in6m_addr,
1079 			    oldp, sizeof(struct in6_addr));
1080 			if (error)
1081 				goto done;
1082 			oldp = (char *)oldp + sizeof(struct in6_addr);
1083 			written += sizeof(struct in6_addr);
1084 			tmp = in6m->in6m_refcount;
1085 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1086 			if (error)
1087 				goto done;
1088 			oldp = (char *)oldp + sizeof(tmp);
1089 			written += sizeof(tmp);
1090 		}
1091 
1092 		s = pserialize_read_enter();
1093 		ifa_release(ifa, &psref_ia);
1094 	}
1095 	pserialize_read_exit(s);
1096 done:
1097 	ifa_release(ifa, &psref_ia);
1098 	if_put(ifp, &psref);
1099 	curlwp_bindx(bound);
1100 	*oldlenp = written;
1101 	return error;
1102 }
1103 
1104 void
1105 in6_sysctl_multicast_setup(struct sysctllog **clog)
1106 {
1107 
1108 	sysctl_createv(clog, 0, NULL, NULL,
1109 		       CTLFLAG_PERMANENT,
1110 		       CTLTYPE_NODE, "inet6", NULL,
1111 		       NULL, 0, NULL, 0,
1112 		       CTL_NET, PF_INET6, CTL_EOL);
1113 
1114 	sysctl_createv(clog, 0, NULL, NULL,
1115 		       CTLFLAG_PERMANENT,
1116 		       CTLTYPE_NODE, "multicast",
1117 		       SYSCTL_DESCR("Multicast information"),
1118 		       in6_multicast_sysctl, 0, NULL, 0,
1119 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1120 
1121 	sysctl_createv(clog, 0, NULL, NULL,
1122 		       CTLFLAG_PERMANENT,
1123 		       CTLTYPE_NODE, "multicast_kludge",
1124 		       SYSCTL_DESCR("multicast kludge information"),
1125 		       in6_mkludge_sysctl, 0, NULL, 0,
1126 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1127 }
1128