xref: /openbsd/sys/netinet6/ip6_output.c (revision 8529ddd3)
1 /*	$OpenBSD: ip6_output.c,v 1.171 2015/05/13 10:42:47 jsg Exp $	*/
2 /*	$KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1990, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
62  */
63 
64 #include "pf.h"
65 
66 #include <sys/param.h>
67 #include <sys/malloc.h>
68 #include <sys/mbuf.h>
69 #include <sys/errno.h>
70 #include <sys/protosw.h>
71 #include <sys/socket.h>
72 #include <sys/socketvar.h>
73 #include <sys/proc.h>
74 #include <sys/systm.h>
75 
76 #include <net/if.h>
77 #include <net/if_var.h>
78 #include <net/if_enc.h>
79 #include <net/route.h>
80 
81 #include <netinet/in.h>
82 #include <netinet/ip.h>
83 #include <netinet/in_pcb.h>
84 #include <netinet/udp.h>
85 #include <netinet/tcp.h>
86 
87 #include <netinet/ip_var.h>
88 #include <netinet/tcp_timer.h>
89 #include <netinet/tcp_var.h>
90 #include <netinet/udp_var.h>
91 
92 #include <netinet6/in6_var.h>
93 #include <netinet/ip6.h>
94 #include <netinet/icmp6.h>
95 #include <netinet6/ip6_var.h>
96 #include <netinet6/nd6.h>
97 #include <netinet6/ip6protosw.h>
98 
99 #include <crypto/idgen.h>
100 
101 #if NPF > 0
102 #include <net/pfvar.h>
103 #endif
104 
105 #ifdef IPSEC
106 #include <netinet/ip_ipsp.h>
107 #include <netinet/ip_ah.h>
108 #include <netinet/ip_esp.h>
109 #endif /* IPSEC */
110 
111 struct ip6_exthdrs {
112 	struct mbuf *ip6e_ip6;
113 	struct mbuf *ip6e_hbh;
114 	struct mbuf *ip6e_dest1;
115 	struct mbuf *ip6e_rthdr;
116 	struct mbuf *ip6e_dest2;
117 };
118 
119 int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int, int);
120 int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *);
121 int ip6_getpcbopt(struct ip6_pktopts *, int, struct mbuf **);
122 int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int, int,
123 	int, int);
124 int ip6_setmoptions(int, struct ip6_moptions **, struct mbuf *);
125 int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **);
126 int ip6_copyexthdr(struct mbuf **, caddr_t, int);
127 int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
128 	struct ip6_frag **);
129 int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
130 int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
131 int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
132 	struct ifnet *, struct in6_addr *, u_long *, int *);
133 int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
134 static __inline u_int16_t __attribute__((__unused__))
135     in6_cksum_phdr(const struct in6_addr *, const struct in6_addr *,
136     u_int32_t, u_int32_t);
137 void in6_delayed_cksum(struct mbuf *, u_int8_t);
138 
139 /* Context for non-repeating IDs */
140 struct idgen32_ctx ip6_id_ctx;
141 
142 /*
143  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
144  * header (with pri, len, nxt, hlim, src, dst).
145  * This function may modify ver and hlim only.
146  * The mbuf chain containing the packet will be freed.
147  * The mbuf opt, if present, will not be freed.
148  *
149  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
150  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
151  * which is rt_rmx.rmx_mtu.
152  *
153  * ifpp - XXX: just for statistics
154  */
155 int
156 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
157     int flags, struct ip6_moptions *im6o, struct ifnet **ifpp,
158     struct inpcb *inp)
159 {
160 	struct ip6_hdr *ip6;
161 	struct ifnet *ifp;
162 	struct mbuf *m = m0;
163 	int hlen, tlen;
164 	struct route_in6 ip6route;
165 	struct rtentry *rt = NULL;
166 	struct sockaddr_in6 *dst, dstsock;
167 	int error = 0;
168 	u_long mtu;
169 	int alwaysfrag, dontfrag;
170 	u_int16_t src_scope, dst_scope;
171 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
172 	struct ip6_exthdrs exthdrs;
173 	struct in6_addr finaldst;
174 	struct route_in6 *ro_pmtu = NULL;
175 	int hdrsplit = 0;
176 	u_int8_t sproto = 0;
177 #ifdef IPSEC
178 	struct m_tag *mtag;
179 	union sockaddr_union sdst;
180 	struct tdb_ident *tdbi;
181 	u_int32_t sspi;
182 	struct tdb *tdb;
183 #if NPF > 0
184 	struct ifnet *encif;
185 #endif
186 #endif /* IPSEC */
187 
188 #ifdef IPSEC
189 	if (inp && (inp->inp_flags & INP_IPV6) == 0)
190 		panic("ip6_output: IPv4 pcb is passed");
191 #endif /* IPSEC */
192 
193 	ip6 = mtod(m, struct ip6_hdr *);
194 	finaldst = ip6->ip6_dst;
195 
196 #define MAKE_EXTHDR(hp, mp)						\
197     do {								\
198 	if (hp) {							\
199 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
200 		error = ip6_copyexthdr((mp), (caddr_t)(hp), 		\
201 		    ((eh)->ip6e_len + 1) << 3);				\
202 		if (error)						\
203 			goto freehdrs;					\
204 	}								\
205     } while (0)
206 
207 	bzero(&exthdrs, sizeof(exthdrs));
208 
209 	if (opt) {
210 		/* Hop-by-Hop options header */
211 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
212 		/* Destination options header(1st part) */
213 		MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
214 		/* Routing header */
215 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
216 		/* Destination options header(2nd part) */
217 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
218 	}
219 
220 #ifdef IPSEC
221 	if (!ipsec_in_use && !inp)
222 		goto done_spd;
223 
224 	/*
225 	 * Check if there was an outgoing SA bound to the flow
226 	 * from a transport protocol.
227 	 */
228 	ip6 = mtod(m, struct ip6_hdr *);
229 
230 	/* Do we have any pending SAs to apply ? */
231 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
232 	if (mtag != NULL) {
233 #ifdef DIAGNOSTIC
234 		if (mtag->m_tag_len != sizeof (struct tdb_ident))
235 			panic("ip6_output: tag of length %hu (should be %zu",
236 			    mtag->m_tag_len, sizeof (struct tdb_ident));
237 #endif
238 		tdbi = (struct tdb_ident *)(mtag + 1);
239 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst, tdbi->proto);
240 		if (tdb == NULL)
241 			error = -EINVAL;
242 		m_tag_delete(m, mtag);
243 	} else
244 		tdb = ipsp_spd_lookup(m, AF_INET6, sizeof(struct ip6_hdr),
245 		    &error, IPSP_DIRECTION_OUT, NULL, inp, 0);
246 
247 	if (tdb == NULL) {
248 		if (error == 0) {
249 		        /*
250 			 * No IPsec processing required, we'll just send the
251 			 * packet out.
252 			 */
253 		        sproto = 0;
254 
255 			/* Fall through to routing/multicast handling */
256 		} else {
257 		        /*
258 			 * -EINVAL is used to indicate that the packet should
259 			 * be silently dropped, typically because we've asked
260 			 * key management for an SA.
261 			 */
262 		        if (error == -EINVAL) /* Should silently drop packet */
263 				error = 0;
264 
265 			goto freehdrs;
266 		}
267 	} else {
268 		/* Loop detection */
269 		for (mtag = m_tag_first(m); mtag != NULL;
270 		    mtag = m_tag_next(m, mtag)) {
271 			if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE)
272 				continue;
273 			tdbi = (struct tdb_ident *)(mtag + 1);
274 			if (tdbi->spi == tdb->tdb_spi &&
275 			    tdbi->proto == tdb->tdb_sproto &&
276 			    tdbi->rdomain == tdb->tdb_rdomain &&
277 			    !bcmp(&tdbi->dst, &tdb->tdb_dst,
278 			    sizeof(union sockaddr_union))) {
279 				sproto = 0; /* mark as no-IPsec-needed */
280 				goto done_spd;
281 			}
282 		}
283 
284 	        /* We need to do IPsec */
285 	        bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst));
286 		sspi = tdb->tdb_spi;
287 		sproto = tdb->tdb_sproto;
288 	}
289 
290 	/* Fall through to the routing/multicast handling code */
291  done_spd:
292 #endif /* IPSEC */
293 
294 	/*
295 	 * Calculate the total length of the extension header chain.
296 	 * Keep the length of the unfragmentable part for fragmentation.
297 	 */
298 	optlen = 0;
299 	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
300 	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
301 	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
302 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
303 	/* NOTE: we don't add AH/ESP length here. do that later. */
304 	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
305 
306 	/*
307 	 * If we need IPsec, or there is at least one extension header,
308 	 * separate IP6 header from the payload.
309 	 */
310 	if ((sproto || optlen) && !hdrsplit) {
311 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
312 			m = NULL;
313 			goto freehdrs;
314 		}
315 		m = exthdrs.ip6e_ip6;
316 		hdrsplit++;
317 	}
318 
319 	/* adjust pointer */
320 	ip6 = mtod(m, struct ip6_hdr *);
321 
322 	/* adjust mbuf packet header length */
323 	m->m_pkthdr.len += optlen;
324 	plen = m->m_pkthdr.len - sizeof(*ip6);
325 
326 	/* If this is a jumbo payload, insert a jumbo payload option. */
327 	if (plen > IPV6_MAXPACKET) {
328 		if (!hdrsplit) {
329 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
330 				m = NULL;
331 				goto freehdrs;
332 			}
333 			m = exthdrs.ip6e_ip6;
334 			hdrsplit++;
335 		}
336 		/* adjust pointer */
337 		ip6 = mtod(m, struct ip6_hdr *);
338 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
339 			goto freehdrs;
340 		ip6->ip6_plen = 0;
341 	} else
342 		ip6->ip6_plen = htons(plen);
343 
344 	/*
345 	 * Concatenate headers and fill in next header fields.
346 	 * Here we have, on "m"
347 	 *	IPv6 payload
348 	 * and we insert headers accordingly.  Finally, we should be getting:
349 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
350 	 *
351 	 * during the header composing process, "m" points to IPv6 header.
352 	 * "mprev" points to an extension header prior to esp.
353 	 */
354 	{
355 		u_char *nexthdrp = &ip6->ip6_nxt;
356 		struct mbuf *mprev = m;
357 
358 		/*
359 		 * we treat dest2 specially.  this makes IPsec processing
360 		 * much easier.  the goal here is to make mprev point the
361 		 * mbuf prior to dest2.
362 		 *
363 		 * result: IPv6 dest2 payload
364 		 * m and mprev will point to IPv6 header.
365 		 */
366 		if (exthdrs.ip6e_dest2) {
367 			if (!hdrsplit)
368 				panic("assumption failed: hdr not split");
369 			exthdrs.ip6e_dest2->m_next = m->m_next;
370 			m->m_next = exthdrs.ip6e_dest2;
371 			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
372 			ip6->ip6_nxt = IPPROTO_DSTOPTS;
373 		}
374 
375 #define MAKE_CHAIN(m, mp, p, i)\
376     do {\
377 	if (m) {\
378 		if (!hdrsplit) \
379 			panic("assumption failed: hdr not split"); \
380 		*mtod((m), u_char *) = *(p);\
381 		*(p) = (i);\
382 		p = mtod((m), u_char *);\
383 		(m)->m_next = (mp)->m_next;\
384 		(mp)->m_next = (m);\
385 		(mp) = (m);\
386 	}\
387     } while (0)
388 		/*
389 		 * result: IPv6 hbh dest1 rthdr dest2 payload
390 		 * m will point to IPv6 header.  mprev will point to the
391 		 * extension header prior to dest2 (rthdr in the above case).
392 		 */
393 		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
394 		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
395 		    IPPROTO_DSTOPTS);
396 		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
397 		    IPPROTO_ROUTING);
398 	}
399 
400 	/*
401 	 * If there is a routing header, replace the destination address field
402 	 * with the first hop of the routing header.
403 	 */
404 	if (exthdrs.ip6e_rthdr) {
405 		struct ip6_rthdr *rh;
406 		struct ip6_rthdr0 *rh0;
407 		struct in6_addr *addr;
408 
409 		rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
410 		    struct ip6_rthdr *));
411 		switch (rh->ip6r_type) {
412 		case IPV6_RTHDR_TYPE_0:
413 			 rh0 = (struct ip6_rthdr0 *)rh;
414 			 addr = (struct in6_addr *)(rh0 + 1);
415 			 ip6->ip6_dst = addr[0];
416 			 bcopy(&addr[1], &addr[0],
417 			     sizeof(struct in6_addr) * (rh0->ip6r0_segleft - 1));
418 			 addr[rh0->ip6r0_segleft - 1] = finaldst;
419 			 break;
420 		default:	/* is it possible? */
421 			 error = EINVAL;
422 			 goto bad;
423 		}
424 	}
425 
426 	/* Source address validation */
427 	if (!(flags & IPV6_UNSPECSRC) &&
428 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
429 		/*
430 		 * XXX: we can probably assume validation in the caller, but
431 		 * we explicitly check the address here for safety.
432 		 */
433 		error = EOPNOTSUPP;
434 		ip6stat.ip6s_badscope++;
435 		goto bad;
436 	}
437 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
438 		error = EOPNOTSUPP;
439 		ip6stat.ip6s_badscope++;
440 		goto bad;
441 	}
442 
443 	ip6stat.ip6s_localout++;
444 
445 	/*
446 	 * Route packet.
447 	 */
448 #if NPF > 0
449 reroute:
450 #endif
451 
452 	/* initialize cached route */
453 	if (ro == 0) {
454 		ro = &ip6route;
455 		bzero((caddr_t)ro, sizeof(*ro));
456 	}
457 	ro_pmtu = ro;
458 	if (opt && opt->ip6po_rthdr)
459 		ro = &opt->ip6po_route;
460 	dst = &ro->ro_dst;
461 
462 	/*
463 	 * if specified, try to fill in the traffic class field.
464 	 * do not override if a non-zero value is already set.
465 	 * we check the diffserv field and the ecn field separately.
466 	 */
467 	if (opt && opt->ip6po_tclass >= 0) {
468 		int mask = 0;
469 
470 		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
471 			mask |= 0xfc;
472 		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
473 			mask |= 0x03;
474 		if (mask != 0)
475 			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
476 	}
477 
478 	/* fill in or override the hop limit field, if necessary. */
479 	if (opt && opt->ip6po_hlim != -1)
480 		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
481 	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
482 		if (im6o != NULL)
483 			ip6->ip6_hlim = im6o->im6o_hlim;
484 		else
485 			ip6->ip6_hlim = ip6_defmcasthlim;
486 	}
487 
488 #ifdef IPSEC
489 	/*
490 	 * Check if the packet needs encapsulation.
491 	 * ipsp_process_packet will never come back to here.
492 	 */
493 	if (sproto != 0) {
494 		/*
495 		 * XXX what should we do if ip6_hlim == 0 and the
496 		 * packet gets tunneled?
497 		 */
498 
499 		tdb = gettdb(rtable_l2(m->m_pkthdr.ph_rtableid),
500 		    sspi, &sdst, sproto);
501 		if (tdb == NULL) {
502 			error = EHOSTUNREACH;
503 			m_freem(m);
504 			goto done;
505 		}
506 
507 #if NPF > 0
508 		if ((encif = enc_getif(tdb->tdb_rdomain,
509 		    tdb->tdb_tap)) == NULL ||
510 		    pf_test(AF_INET6, PF_OUT, encif, &m, NULL) != PF_PASS) {
511 			error = EHOSTUNREACH;
512 			m_freem(m);
513 			goto done;
514 		}
515 		if (m == NULL)
516 			goto done;
517 		ip6 = mtod(m, struct ip6_hdr *);
518 		/*
519 		 * PF_TAG_REROUTE handling or not...
520 		 * Packet is entering IPsec so the routing is
521 		 * already overruled by the IPsec policy.
522 		 * Until now the change was not reconsidered.
523 		 * What's the behaviour?
524 		 */
525 #endif
526 		in6_proto_cksum_out(m, encif);
527 
528 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
529 
530 		/* Callee frees mbuf */
531 		/*
532 		 * if we are source-routing, do not attempt to tunnel the
533 		 * packet just because ip6_dst is different from what tdb has.
534 		 * XXX
535 		 */
536 		error = ipsp_process_packet(m, tdb, AF_INET6,
537 		    exthdrs.ip6e_rthdr ? 1 : 0);
538 
539 		return error;  /* Nothing more to be done */
540 	}
541 #endif /* IPSEC */
542 
543 	bzero(&dstsock, sizeof(dstsock));
544 	dstsock.sin6_family = AF_INET6;
545 	dstsock.sin6_addr = ip6->ip6_dst;
546 	dstsock.sin6_len = sizeof(dstsock);
547 	ro->ro_tableid = m->m_pkthdr.ph_rtableid;
548 	if ((error = in6_selectroute(&dstsock, opt, im6o, ro, &ifp,
549 	    &rt, m->m_pkthdr.ph_rtableid)) != 0) {
550 		switch (error) {
551 		case EHOSTUNREACH:
552 			ip6stat.ip6s_noroute++;
553 			break;
554 		case EADDRNOTAVAIL:
555 		default:
556 			break;	/* XXX statistics? */
557 		}
558 		if (ifp != NULL)
559 			in6_ifstat_inc(ifp, ifs6_out_discard);
560 		goto bad;
561 	}
562 	if (rt == NULL) {
563 		/*
564 		 * If in6_selectroute() does not return a route entry,
565 		 * dst may not have been updated.
566 		 */
567 		*dst = dstsock;	/* XXX */
568 	}
569 
570 	/*
571 	 * then rt (for unicast) and ifp must be non-NULL valid values.
572 	 */
573 	if (rt)
574 		rt->rt_use++;
575 
576 	if ((flags & IPV6_FORWARDING) == 0) {
577 		/* XXX: the FORWARDING flag can be set for mrouting. */
578 		in6_ifstat_inc(ifp, ifs6_out_request);
579 	}
580 
581 	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
582 		if (opt && opt->ip6po_nextroute.ro_rt) {
583 			/*
584 			 * The nexthop is explicitly specified by the
585 			 * application.  We assume the next hop is an IPv6
586 			 * address.
587 			 */
588 			dst = satosin6(opt->ip6po_nexthop);
589 		} else if ((rt->rt_flags & RTF_GATEWAY))
590 			dst = satosin6(rt->rt_gateway);
591 	}
592 
593 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
594 		/* Unicast */
595 
596 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
597 	} else {
598 		/* Multicast */
599 		struct	in6_multi *in6m;
600 
601 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
602 
603 		in6_ifstat_inc(ifp, ifs6_out_mcast);
604 
605 		/*
606 		 * Confirm that the outgoing interface supports multicast.
607 		 */
608 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
609 			ip6stat.ip6s_noroute++;
610 			in6_ifstat_inc(ifp, ifs6_out_discard);
611 			error = ENETUNREACH;
612 			goto bad;
613 		}
614 		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
615 		if (in6m != NULL &&
616 		    (im6o == NULL || im6o->im6o_loop)) {
617 			/*
618 			 * If we belong to the destination multicast group
619 			 * on the outgoing interface, and the caller did not
620 			 * forbid loopback, loop back a copy.
621 			 */
622 			ip6_mloopback(ifp, m, dst);
623 		} else {
624 			/*
625 			 * If we are acting as a multicast router, perform
626 			 * multicast forwarding as if the packet had just
627 			 * arrived on the interface to which we are about
628 			 * to send.  The multicast forwarding function
629 			 * recursively calls this function, using the
630 			 * IPV6_FORWARDING flag to prevent infinite recursion.
631 			 *
632 			 * Multicasts that are looped back by ip6_mloopback(),
633 			 * above, will be forwarded by the ip6_input() routine,
634 			 * if necessary.
635 			 */
636 #ifdef MROUTING
637 			if (ip6_mforwarding && ip6_mrouter &&
638 			    (flags & IPV6_FORWARDING) == 0) {
639 				if (ip6_mforward(ip6, ifp, m) != 0) {
640 					m_freem(m);
641 					goto done;
642 				}
643 			}
644 #endif
645 		}
646 		/*
647 		 * Multicasts with a hoplimit of zero may be looped back,
648 		 * above, but must not be transmitted on a network.
649 		 * Also, multicasts addressed to the loopback interface
650 		 * are not sent -- the above call to ip6_mloopback() will
651 		 * loop back a copy if this host actually belongs to the
652 		 * destination group on the loopback interface.
653 		 */
654 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
655 		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
656 			m_freem(m);
657 			goto done;
658 		}
659 	}
660 
661 	/*
662 	 * If this packet is going trough a loopback interface we wont
663 	 * be able to restore its scope ID using the interface index.
664 	 */
665 	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
666 		if (ifp->if_flags & IFF_LOOPBACK)
667 			src_scope = ip6->ip6_src.s6_addr16[1];
668 		ip6->ip6_src.s6_addr16[1] = 0;
669 	}
670 	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
671 		if (ifp->if_flags & IFF_LOOPBACK)
672 			dst_scope = ip6->ip6_dst.s6_addr16[1];
673 		ip6->ip6_dst.s6_addr16[1] = 0;
674 	}
675 
676 	/*
677 	 * Fill the outgoing interface to tell the upper layer
678 	 * to increment per-interface statistics.
679 	 */
680 	if (ifpp)
681 		*ifpp = ifp;
682 
683 	/* Determine path MTU. */
684 	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
685 	    &alwaysfrag)) != 0)
686 		goto bad;
687 
688 	/*
689 	 * The caller of this function may specify to use the minimum MTU
690 	 * in some cases.
691 	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
692 	 * setting.  The logic is a bit complicated; by default, unicast
693 	 * packets will follow path MTU while multicast packets will be sent at
694 	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
695 	 * including unicast ones will be sent at the minimum MTU.  Multicast
696 	 * packets will always be sent at the minimum MTU unless
697 	 * IP6PO_MINMTU_DISABLE is explicitly specified.
698 	 * See RFC 3542 for more details.
699 	 */
700 	if (mtu > IPV6_MMTU) {
701 		if ((flags & IPV6_MINMTU))
702 			mtu = IPV6_MMTU;
703 		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
704 			mtu = IPV6_MMTU;
705 		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
706 			 (opt == NULL ||
707 			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
708 			mtu = IPV6_MMTU;
709 		}
710 	}
711 
712 	/*
713 	 * If the outgoing packet contains a hop-by-hop options header,
714 	 * it must be examined and processed even by the source node.
715 	 * (RFC 2460, section 4.)
716 	 */
717 	if (exthdrs.ip6e_hbh) {
718 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
719 		u_int32_t dummy1; /* XXX unused */
720 		u_int32_t dummy2; /* XXX unused */
721 
722 		/*
723 		 *  XXX: if we have to send an ICMPv6 error to the sender,
724 		 *       we need the M_LOOP flag since icmp6_error() expects
725 		 *       the IPv6 and the hop-by-hop options header are
726 		 *       continuous unless the flag is set.
727 		 */
728 		m->m_flags |= M_LOOP;
729 		m->m_pkthdr.rcvif = ifp;
730 		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
731 		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
732 		    &dummy1, &dummy2) < 0) {
733 			/* m was already freed at this point */
734 			error = EINVAL;/* better error? */
735 			goto done;
736 		}
737 		m->m_flags &= ~M_LOOP; /* XXX */
738 		m->m_pkthdr.rcvif = NULL;
739 	}
740 
741 #if NPF > 0
742 	if (pf_test(AF_INET6, PF_OUT, ifp, &m, NULL) != PF_PASS) {
743 		error = EHOSTUNREACH;
744 		m_freem(m);
745 		goto done;
746 	}
747 	if (m == NULL)
748 		goto done;
749 	ip6 = mtod(m, struct ip6_hdr *);
750 	if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) ==
751 	    (PF_TAG_REROUTE | PF_TAG_GENERATED)) {
752 		/* already rerun the route lookup, go on */
753 		m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE);
754 	} else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) {
755 		/* tag as generated to skip over pf_test on rerun */
756 		m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
757 		finaldst = ip6->ip6_dst;
758 		ro = NULL;
759 		goto reroute;
760 	}
761 #endif
762 
763 	/*
764 	 * If the packet is not going on the wire it can be destinated
765 	 * to any local address.  In this case do not clear its scopes
766 	 * to let ip6_input() find a matching local route.
767 	 */
768 	if (ifp->if_flags & IFF_LOOPBACK) {
769 		if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
770 			ip6->ip6_src.s6_addr16[1] = src_scope;
771 		if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
772 			ip6->ip6_dst.s6_addr16[1] = dst_scope;
773 	}
774 
775 	in6_proto_cksum_out(m, ifp);
776 
777 	/*
778 	 * Send the packet to the outgoing interface.
779 	 * If necessary, do IPv6 fragmentation before sending.
780 	 *
781 	 * the logic here is rather complex:
782 	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
783 	 * 1-a: send as is if tlen <= path mtu
784 	 * 1-b: fragment if tlen > path mtu
785 	 *
786 	 * 2: if user asks us not to fragment (dontfrag == 1)
787 	 * 2-a: send as is if tlen <= interface mtu
788 	 * 2-b: error if tlen > interface mtu
789 	 *
790 	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
791 	 *      always fragment
792 	 *
793 	 * 4: if dontfrag == 1 && alwaysfrag == 1
794 	 *      error, as we cannot handle this conflicting request
795 	 */
796 	tlen = m->m_pkthdr.len;
797 
798 	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
799 		dontfrag = 1;
800 	else
801 		dontfrag = 0;
802 	if (dontfrag && alwaysfrag) {	/* case 4 */
803 		/* conflicting request - can't transmit */
804 		error = EMSGSIZE;
805 		goto bad;
806 	}
807 	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
808 		/*
809 		 * Even if the DONTFRAG option is specified, we cannot send the
810 		 * packet when the data length is larger than the MTU of the
811 		 * outgoing interface.
812 		 * Notify the error by sending IPV6_PATHMTU ancillary data as
813 		 * well as returning an error code (the latter is not described
814 		 * in the API spec.)
815 		 */
816 #if 0
817 		u_int32_t mtu32;
818 		struct ip6ctlparam ip6cp;
819 
820 		mtu32 = (u_int32_t)mtu;
821 		bzero(&ip6cp, sizeof(ip6cp));
822 		ip6cp.ip6c_cmdarg = (void *)&mtu32;
823 		pfctlinput2(PRC_MSGSIZE, sin6tosa(&ro_pmtu->ro_dst),
824 		    (void *)&ip6cp);
825 #endif
826 
827 		error = EMSGSIZE;
828 		goto bad;
829 	}
830 
831 	/*
832 	 * transmit packet without fragmentation
833 	 */
834 	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
835 		error = nd6_output(ifp, m, dst, ro->ro_rt);
836 		goto done;
837 	}
838 
839 	/*
840 	 * try to fragment the packet.  case 1-b and 3
841 	 */
842 	if (mtu < IPV6_MMTU) {
843 		/* path MTU cannot be less than IPV6_MMTU */
844 		error = EMSGSIZE;
845 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
846 		goto bad;
847 	} else if (ip6->ip6_plen == 0) {
848 		/* jumbo payload cannot be fragmented */
849 		error = EMSGSIZE;
850 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
851 		goto bad;
852 	} else {
853 		u_char nextproto;
854 #if 0
855 		struct ip6ctlparam ip6cp;
856 		u_int32_t mtu32;
857 #endif
858 
859 		/*
860 		 * Too large for the destination or interface;
861 		 * fragment if possible.
862 		 * Must be able to put at least 8 bytes per fragment.
863 		 */
864 		hlen = unfragpartlen;
865 		if (mtu > IPV6_MAXPACKET)
866 			mtu = IPV6_MAXPACKET;
867 
868 #if 0
869 		/* Notify a proper path MTU to applications. */
870 		mtu32 = (u_int32_t)mtu;
871 		bzero(&ip6cp, sizeof(ip6cp));
872 		ip6cp.ip6c_cmdarg = (void *)&mtu32;
873 		pfctlinput2(PRC_MSGSIZE, sin6tosa(&ro_pmtu->ro_dst),
874 		    (void *)&ip6cp);
875 #endif
876 
877 		/*
878 		 * Change the next header field of the last header in the
879 		 * unfragmentable part.
880 		 */
881 		if (exthdrs.ip6e_rthdr) {
882 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
883 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
884 		} else if (exthdrs.ip6e_dest1) {
885 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
886 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
887 		} else if (exthdrs.ip6e_hbh) {
888 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
889 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
890 		} else {
891 			nextproto = ip6->ip6_nxt;
892 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
893 		}
894 
895 		m0 = m;
896 		error = ip6_fragment(m0, hlen, nextproto, mtu);
897 
898 		switch (error) {
899 		case 0:
900 			in6_ifstat_inc(ifp, ifs6_out_fragok);
901 			break;
902 		case EMSGSIZE:
903 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
904 			break;
905 		default:
906 			ip6stat.ip6s_odropped++;
907 			break;
908 		}
909 	}
910 
911 	/*
912 	 * Remove leading garbages.
913 	 */
914 	m = m0->m_nextpkt;
915 	m0->m_nextpkt = 0;
916 	m_freem(m0);
917 	for (m0 = m; m; m = m0) {
918 		m0 = m->m_nextpkt;
919 		m->m_nextpkt = 0;
920 		if (error == 0) {
921 			ip6stat.ip6s_ofragments++;
922 			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
923 			error = nd6_output(ifp, m, dst, ro->ro_rt);
924 		} else
925 			m_freem(m);
926 	}
927 
928 	if (error == 0)
929 		ip6stat.ip6s_fragmented++;
930 
931 done:
932 	if (ro == &ip6route && ro->ro_rt) {
933 		rtfree(ro->ro_rt);
934 	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
935 		rtfree(ro_pmtu->ro_rt);
936 	}
937 
938 	return (error);
939 
940 freehdrs:
941 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
942 	m_freem(exthdrs.ip6e_dest1);
943 	m_freem(exthdrs.ip6e_rthdr);
944 	m_freem(exthdrs.ip6e_dest2);
945 	/* FALLTHROUGH */
946 bad:
947 	m_freem(m);
948 	goto done;
949 }
950 
951 int
952 ip6_fragment(struct mbuf *m0, int hlen, u_char nextproto, u_long mtu)
953 {
954 	struct mbuf	*m, **mnext, *m_frgpart;
955 	struct ip6_hdr	*mhip6;
956 	struct ip6_frag	*ip6f;
957 	u_int32_t	 id;
958 	int		 tlen, len, off;
959 	int		 error;
960 
961 	id = htonl(ip6_randomid());
962 
963 	mnext = &m0->m_nextpkt;
964 	*mnext = NULL;
965 
966 	tlen = m0->m_pkthdr.len;
967 	len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
968 	if (len < 8)
969 		return (EMSGSIZE);
970 
971 	/*
972 	 * Loop through length of segment after first fragment,
973 	 * make new header and copy data of each part and link onto
974 	 * chain.
975 	 */
976 	for (off = hlen; off < tlen; off += len) {
977 		struct mbuf *mlast;
978 
979 		if ((m = m_gethdr(M_DONTWAIT, MT_HEADER)) == NULL)
980 			return (ENOBUFS);
981 		*mnext = m;
982 		mnext = &m->m_nextpkt;
983 		if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0)
984 			return (error);
985 		m->m_data += max_linkhdr;
986 		mhip6 = mtod(m, struct ip6_hdr *);
987 		*mhip6 = *mtod(m0, struct ip6_hdr *);
988 		m->m_len = sizeof(*mhip6);
989 		if ((error = ip6_insertfraghdr(m0, m, hlen, &ip6f)) != 0)
990 			return (error);
991 		ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
992 		if (off + len >= tlen)
993 			len = tlen - off;
994 		else
995 			ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
996 		mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
997 		    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
998 		if ((m_frgpart = m_copym(m0, off, len, M_DONTWAIT)) == NULL)
999 			return (ENOBUFS);
1000 		for (mlast = m; mlast->m_next; mlast = mlast->m_next)
1001 			;
1002 		mlast->m_next = m_frgpart;
1003 		m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1004 		ip6f->ip6f_reserved = 0;
1005 		ip6f->ip6f_ident = id;
1006 		ip6f->ip6f_nxt = nextproto;
1007 	}
1008 
1009 	return (0);
1010 }
1011 
1012 int
1013 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1014 {
1015 	struct mbuf *m;
1016 
1017 	if (hlen > MCLBYTES)
1018 		return (ENOBUFS); /* XXX */
1019 
1020 	MGET(m, M_DONTWAIT, MT_DATA);
1021 	if (!m)
1022 		return (ENOBUFS);
1023 
1024 	if (hlen > MLEN) {
1025 		MCLGET(m, M_DONTWAIT);
1026 		if ((m->m_flags & M_EXT) == 0) {
1027 			m_free(m);
1028 			return (ENOBUFS);
1029 		}
1030 	}
1031 	m->m_len = hlen;
1032 	if (hdr)
1033 		bcopy(hdr, mtod(m, caddr_t), hlen);
1034 
1035 	*mp = m;
1036 	return (0);
1037 }
1038 
1039 /*
1040  * Insert jumbo payload option.
1041  */
1042 int
1043 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1044 {
1045 	struct mbuf *mopt;
1046 	u_int8_t *optbuf;
1047 	u_int32_t v;
1048 
1049 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1050 
1051 	/*
1052 	 * If there is no hop-by-hop options header, allocate new one.
1053 	 * If there is one but it doesn't have enough space to store the
1054 	 * jumbo payload option, allocate a cluster to store the whole options.
1055 	 * Otherwise, use it to store the options.
1056 	 */
1057 	if (exthdrs->ip6e_hbh == 0) {
1058 		MGET(mopt, M_DONTWAIT, MT_DATA);
1059 		if (mopt == NULL)
1060 			return (ENOBUFS);
1061 		mopt->m_len = JUMBOOPTLEN;
1062 		optbuf = mtod(mopt, u_int8_t *);
1063 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1064 		exthdrs->ip6e_hbh = mopt;
1065 	} else {
1066 		struct ip6_hbh *hbh;
1067 
1068 		mopt = exthdrs->ip6e_hbh;
1069 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1070 			/*
1071 			 * XXX assumption:
1072 			 * - exthdrs->ip6e_hbh is not referenced from places
1073 			 *   other than exthdrs.
1074 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1075 			 */
1076 			int oldoptlen = mopt->m_len;
1077 			struct mbuf *n;
1078 
1079 			/*
1080 			 * XXX: give up if the whole (new) hbh header does
1081 			 * not fit even in an mbuf cluster.
1082 			 */
1083 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1084 				return (ENOBUFS);
1085 
1086 			/*
1087 			 * As a consequence, we must always prepare a cluster
1088 			 * at this point.
1089 			 */
1090 			MGET(n, M_DONTWAIT, MT_DATA);
1091 			if (n) {
1092 				MCLGET(n, M_DONTWAIT);
1093 				if ((n->m_flags & M_EXT) == 0) {
1094 					m_freem(n);
1095 					n = NULL;
1096 				}
1097 			}
1098 			if (!n)
1099 				return (ENOBUFS);
1100 			n->m_len = oldoptlen + JUMBOOPTLEN;
1101 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1102 			      oldoptlen);
1103 			optbuf = mtod(n, u_int8_t *) + oldoptlen;
1104 			m_freem(mopt);
1105 			mopt = exthdrs->ip6e_hbh = n;
1106 		} else {
1107 			optbuf = mtod(mopt, u_int8_t *) + mopt->m_len;
1108 			mopt->m_len += JUMBOOPTLEN;
1109 		}
1110 		optbuf[0] = IP6OPT_PADN;
1111 		optbuf[1] = 0;
1112 
1113 		/*
1114 		 * Adjust the header length according to the pad and
1115 		 * the jumbo payload option.
1116 		 */
1117 		hbh = mtod(mopt, struct ip6_hbh *);
1118 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1119 	}
1120 
1121 	/* fill in the option. */
1122 	optbuf[2] = IP6OPT_JUMBO;
1123 	optbuf[3] = 4;
1124 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1125 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1126 
1127 	/* finally, adjust the packet header length */
1128 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1129 
1130 	return (0);
1131 #undef JUMBOOPTLEN
1132 }
1133 
1134 /*
1135  * Insert fragment header and copy unfragmentable header portions.
1136  */
1137 int
1138 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1139     struct ip6_frag **frghdrp)
1140 {
1141 	struct mbuf *n, *mlast;
1142 
1143 	if (hlen > sizeof(struct ip6_hdr)) {
1144 		n = m_copym(m0, sizeof(struct ip6_hdr),
1145 		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1146 		if (n == 0)
1147 			return (ENOBUFS);
1148 		m->m_next = n;
1149 	} else
1150 		n = m;
1151 
1152 	/* Search for the last mbuf of unfragmentable part. */
1153 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1154 		;
1155 
1156 	if ((mlast->m_flags & M_EXT) == 0 &&
1157 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1158 		/* use the trailing space of the last mbuf for the fragment hdr */
1159 		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1160 		    mlast->m_len);
1161 		mlast->m_len += sizeof(struct ip6_frag);
1162 		m->m_pkthdr.len += sizeof(struct ip6_frag);
1163 	} else {
1164 		/* allocate a new mbuf for the fragment header */
1165 		struct mbuf *mfrg;
1166 
1167 		MGET(mfrg, M_DONTWAIT, MT_DATA);
1168 		if (mfrg == NULL)
1169 			return (ENOBUFS);
1170 		mfrg->m_len = sizeof(struct ip6_frag);
1171 		*frghdrp = mtod(mfrg, struct ip6_frag *);
1172 		mlast->m_next = mfrg;
1173 	}
1174 
1175 	return (0);
1176 }
1177 
1178 int
1179 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1180     struct ifnet *ifp, struct in6_addr *dst, u_long *mtup, int *alwaysfragp)
1181 {
1182 	u_int32_t mtu = 0;
1183 	int alwaysfrag = 0;
1184 	int error = 0;
1185 
1186 	if (ro_pmtu != ro) {
1187 		/* The first hop and the final destination may differ. */
1188 		struct sockaddr_in6 *sa6_dst = &ro_pmtu->ro_dst;
1189 
1190 		if (ro_pmtu->ro_rt &&
1191 		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1192 		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1193 			rtfree(ro_pmtu->ro_rt);
1194 			ro_pmtu->ro_rt = NULL;
1195 		}
1196 		if (ro_pmtu->ro_rt == 0) {
1197 			bzero(ro_pmtu, sizeof(*ro_pmtu));
1198 			ro_pmtu->ro_tableid = ifp->if_rdomain;
1199 			sa6_dst->sin6_family = AF_INET6;
1200 			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1201 			sa6_dst->sin6_addr = *dst;
1202 
1203 			ro_pmtu->ro_rt = rtalloc(sin6tosa(&ro_pmtu->ro_dst),
1204 			    RT_REPORT|RT_RESOLVE, ro_pmtu->ro_tableid);
1205 		}
1206 	}
1207 	if (ro_pmtu->ro_rt) {
1208 		u_int32_t ifmtu;
1209 
1210 		if (ifp == NULL)
1211 			ifp = ro_pmtu->ro_rt->rt_ifp;
1212 		ifmtu = IN6_LINKMTU(ifp);
1213 		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1214 		if (mtu == 0)
1215 			mtu = ifmtu;
1216 		else if (mtu < IPV6_MMTU) {
1217 			/*
1218 			 * RFC2460 section 5, last paragraph:
1219 			 * if we record ICMPv6 too big message with
1220 			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1221 			 * or smaller, with fragment header attached.
1222 			 * (fragment header is needed regardless from the
1223 			 * packet size, for translators to identify packets)
1224 			 */
1225 			alwaysfrag = 1;
1226 			mtu = IPV6_MMTU;
1227 		} else if (mtu > ifmtu) {
1228 			/*
1229 			 * The MTU on the route is larger than the MTU on
1230 			 * the interface!  This shouldn't happen, unless the
1231 			 * MTU of the interface has been changed after the
1232 			 * interface was brought up.  Change the MTU in the
1233 			 * route to match the interface MTU (as long as the
1234 			 * field isn't locked).
1235 			 */
1236 			mtu = ifmtu;
1237 			if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU))
1238 				ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1239 		}
1240 	} else if (ifp) {
1241 		mtu = IN6_LINKMTU(ifp);
1242 	} else
1243 		error = EHOSTUNREACH; /* XXX */
1244 
1245 	*mtup = mtu;
1246 	if (alwaysfragp)
1247 		*alwaysfragp = alwaysfrag;
1248 	return (error);
1249 }
1250 
1251 /*
1252  * IP6 socket option processing.
1253  */
1254 int
1255 ip6_ctloutput(int op, struct socket *so, int level, int optname,
1256     struct mbuf **mp)
1257 {
1258 	int privileged, optdatalen, uproto;
1259 	void *optdata;
1260 	struct inpcb *inp = sotoinpcb(so);
1261 	struct mbuf *m = *mp;
1262 	int error, optval;
1263 	struct proc *p = curproc; /* For IPSec and rdomain */
1264 	u_int rtid = 0;
1265 
1266 	error = optval = 0;
1267 
1268 	privileged = (inp->inp_socket->so_state & SS_PRIV);
1269 	uproto = (int)so->so_proto->pr_protocol;
1270 
1271 	if (level == IPPROTO_IPV6) {
1272 		switch (op) {
1273 		case PRCO_SETOPT:
1274 			switch (optname) {
1275 			case IPV6_2292PKTOPTIONS:
1276 				error = ip6_pcbopts(&inp->inp_outputopts6,
1277 				    m, so);
1278 				break;
1279 
1280 			/*
1281 			 * Use of some Hop-by-Hop options or some
1282 			 * Destination options, might require special
1283 			 * privilege.  That is, normal applications
1284 			 * (without special privilege) might be forbidden
1285 			 * from setting certain options in outgoing packets,
1286 			 * and might never see certain options in received
1287 			 * packets. [RFC 2292 Section 6]
1288 			 * KAME specific note:
1289 			 *  KAME prevents non-privileged users from sending or
1290 			 *  receiving ANY hbh/dst options in order to avoid
1291 			 *  overhead of parsing options in the kernel.
1292 			 */
1293 			case IPV6_RECVHOPOPTS:
1294 			case IPV6_RECVDSTOPTS:
1295 			case IPV6_RECVRTHDRDSTOPTS:
1296 				if (!privileged) {
1297 					error = EPERM;
1298 					break;
1299 				}
1300 				/* FALLTHROUGH */
1301 			case IPV6_UNICAST_HOPS:
1302 			case IPV6_HOPLIMIT:
1303 
1304 			case IPV6_RECVPKTINFO:
1305 			case IPV6_RECVHOPLIMIT:
1306 			case IPV6_RECVRTHDR:
1307 			case IPV6_RECVPATHMTU:
1308 			case IPV6_RECVTCLASS:
1309 			case IPV6_V6ONLY:
1310 			case IPV6_AUTOFLOWLABEL:
1311 			case IPV6_RECVDSTPORT:
1312 				if (m == NULL || m->m_len != sizeof(int)) {
1313 					error = EINVAL;
1314 					break;
1315 				}
1316 				optval = *mtod(m, int *);
1317 				switch (optname) {
1318 
1319 				case IPV6_UNICAST_HOPS:
1320 					if (optval < -1 || optval >= 256)
1321 						error = EINVAL;
1322 					else {
1323 						/* -1 = kernel default */
1324 						inp->inp_hops = optval;
1325 					}
1326 					break;
1327 #define OPTSET(bit) \
1328 do { \
1329 	if (optval) \
1330 		inp->inp_flags |= (bit); \
1331 	else \
1332 		inp->inp_flags &= ~(bit); \
1333 } while (/*CONSTCOND*/ 0)
1334 #define OPTSET2292(bit) \
1335 do { \
1336 	inp->inp_flags |= IN6P_RFC2292; \
1337 	if (optval) \
1338 		inp->inp_flags |= (bit); \
1339 	else \
1340 		inp->inp_flags &= ~(bit); \
1341 } while (/*CONSTCOND*/ 0)
1342 #define OPTBIT(bit) (inp->inp_flags & (bit) ? 1 : 0)
1343 
1344 				case IPV6_RECVPKTINFO:
1345 					/* cannot mix with RFC2292 */
1346 					if (OPTBIT(IN6P_RFC2292)) {
1347 						error = EINVAL;
1348 						break;
1349 					}
1350 					OPTSET(IN6P_PKTINFO);
1351 					break;
1352 
1353 				case IPV6_HOPLIMIT:
1354 				{
1355 					struct ip6_pktopts **optp;
1356 
1357 					/* cannot mix with RFC2292 */
1358 					if (OPTBIT(IN6P_RFC2292)) {
1359 						error = EINVAL;
1360 						break;
1361 					}
1362 					optp = &inp->inp_outputopts6;
1363 					error = ip6_pcbopt(IPV6_HOPLIMIT,
1364 							   (u_char *)&optval,
1365 							   sizeof(optval),
1366 							   optp,
1367 							   privileged, uproto);
1368 					break;
1369 				}
1370 
1371 				case IPV6_RECVHOPLIMIT:
1372 					/* cannot mix with RFC2292 */
1373 					if (OPTBIT(IN6P_RFC2292)) {
1374 						error = EINVAL;
1375 						break;
1376 					}
1377 					OPTSET(IN6P_HOPLIMIT);
1378 					break;
1379 
1380 				case IPV6_RECVHOPOPTS:
1381 					/* cannot mix with RFC2292 */
1382 					if (OPTBIT(IN6P_RFC2292)) {
1383 						error = EINVAL;
1384 						break;
1385 					}
1386 					OPTSET(IN6P_HOPOPTS);
1387 					break;
1388 
1389 				case IPV6_RECVDSTOPTS:
1390 					/* cannot mix with RFC2292 */
1391 					if (OPTBIT(IN6P_RFC2292)) {
1392 						error = EINVAL;
1393 						break;
1394 					}
1395 					OPTSET(IN6P_DSTOPTS);
1396 					break;
1397 
1398 				case IPV6_RECVRTHDRDSTOPTS:
1399 					/* cannot mix with RFC2292 */
1400 					if (OPTBIT(IN6P_RFC2292)) {
1401 						error = EINVAL;
1402 						break;
1403 					}
1404 					OPTSET(IN6P_RTHDRDSTOPTS);
1405 					break;
1406 
1407 				case IPV6_RECVRTHDR:
1408 					/* cannot mix with RFC2292 */
1409 					if (OPTBIT(IN6P_RFC2292)) {
1410 						error = EINVAL;
1411 						break;
1412 					}
1413 					OPTSET(IN6P_RTHDR);
1414 					break;
1415 
1416 				case IPV6_RECVPATHMTU:
1417 					/*
1418 					 * We ignore this option for TCP
1419 					 * sockets.
1420 					 * (RFC3542 leaves this case
1421 					 * unspecified.)
1422 					 */
1423 					if (uproto != IPPROTO_TCP)
1424 						OPTSET(IN6P_MTU);
1425 					break;
1426 
1427 				case IPV6_V6ONLY:
1428 					/*
1429 					 * make setsockopt(IPV6_V6ONLY)
1430 					 * available only prior to bind(2).
1431 					 * see ipng mailing list, Jun 22 2001.
1432 					 */
1433 					if (inp->inp_lport ||
1434 					    !IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
1435 						error = EINVAL;
1436 						break;
1437 					}
1438 					if ((ip6_v6only && optval) ||
1439 					    (!ip6_v6only && !optval))
1440 						error = 0;
1441 					else
1442 						error = EINVAL;
1443 					break;
1444 				case IPV6_RECVTCLASS:
1445 					/* cannot mix with RFC2292 XXX */
1446 					if (OPTBIT(IN6P_RFC2292)) {
1447 						error = EINVAL;
1448 						break;
1449 					}
1450 					OPTSET(IN6P_TCLASS);
1451 					break;
1452 				case IPV6_AUTOFLOWLABEL:
1453 					OPTSET(IN6P_AUTOFLOWLABEL);
1454 					break;
1455 
1456 				case IPV6_RECVDSTPORT:
1457 					OPTSET(IN6P_RECVDSTPORT);
1458 					break;
1459 				}
1460 				break;
1461 
1462 			case IPV6_TCLASS:
1463 			case IPV6_DONTFRAG:
1464 			case IPV6_USE_MIN_MTU:
1465 				if (m == NULL || m->m_len != sizeof(optval)) {
1466 					error = EINVAL;
1467 					break;
1468 				}
1469 				optval = *mtod(m, int *);
1470 				{
1471 					struct ip6_pktopts **optp;
1472 					optp = &inp->inp_outputopts6;
1473 					error = ip6_pcbopt(optname,
1474 							   (u_char *)&optval,
1475 							   sizeof(optval),
1476 							   optp,
1477 							   privileged, uproto);
1478 					break;
1479 				}
1480 
1481 			case IPV6_2292PKTINFO:
1482 			case IPV6_2292HOPLIMIT:
1483 			case IPV6_2292HOPOPTS:
1484 			case IPV6_2292DSTOPTS:
1485 			case IPV6_2292RTHDR:
1486 				/* RFC 2292 */
1487 				if (m == NULL || m->m_len != sizeof(int)) {
1488 					error = EINVAL;
1489 					break;
1490 				}
1491 				optval = *mtod(m, int *);
1492 				switch (optname) {
1493 				case IPV6_2292PKTINFO:
1494 					OPTSET2292(IN6P_PKTINFO);
1495 					break;
1496 				case IPV6_2292HOPLIMIT:
1497 					OPTSET2292(IN6P_HOPLIMIT);
1498 					break;
1499 				case IPV6_2292HOPOPTS:
1500 					/*
1501 					 * Check super-user privilege.
1502 					 * See comments for IPV6_RECVHOPOPTS.
1503 					 */
1504 					if (!privileged)
1505 						return (EPERM);
1506 					OPTSET2292(IN6P_HOPOPTS);
1507 					break;
1508 				case IPV6_2292DSTOPTS:
1509 					if (!privileged)
1510 						return (EPERM);
1511 					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1512 					break;
1513 				case IPV6_2292RTHDR:
1514 					OPTSET2292(IN6P_RTHDR);
1515 					break;
1516 				}
1517 				break;
1518 			case IPV6_PKTINFO:
1519 			case IPV6_HOPOPTS:
1520 			case IPV6_RTHDR:
1521 			case IPV6_DSTOPTS:
1522 			case IPV6_RTHDRDSTOPTS:
1523 			case IPV6_NEXTHOP:
1524 			{
1525 				/* new advanced API (RFC3542) */
1526 				u_char *optbuf;
1527 				int optbuflen;
1528 				struct ip6_pktopts **optp;
1529 
1530 				/* cannot mix with RFC2292 */
1531 				if (OPTBIT(IN6P_RFC2292)) {
1532 					error = EINVAL;
1533 					break;
1534 				}
1535 
1536 				if (m && m->m_next) {
1537 					error = EINVAL;	/* XXX */
1538 					break;
1539 				}
1540 				if (m) {
1541 					optbuf = mtod(m, u_char *);
1542 					optbuflen = m->m_len;
1543 				} else {
1544 					optbuf = NULL;
1545 					optbuflen = 0;
1546 				}
1547 				optp = &inp->inp_outputopts6;
1548 				error = ip6_pcbopt(optname,
1549 						   optbuf, optbuflen,
1550 						   optp, privileged, uproto);
1551 				break;
1552 			}
1553 #undef OPTSET
1554 
1555 			case IPV6_MULTICAST_IF:
1556 			case IPV6_MULTICAST_HOPS:
1557 			case IPV6_MULTICAST_LOOP:
1558 			case IPV6_JOIN_GROUP:
1559 			case IPV6_LEAVE_GROUP:
1560 				error =	ip6_setmoptions(optname,
1561 							&inp->inp_moptions6,
1562 							m);
1563 				break;
1564 
1565 			case IPV6_PORTRANGE:
1566 				if (m == NULL || m->m_len != sizeof(int)) {
1567 					error = EINVAL;
1568 					break;
1569 				}
1570 				optval = *mtod(m, int *);
1571 
1572 				switch (optval) {
1573 				case IPV6_PORTRANGE_DEFAULT:
1574 					inp->inp_flags &= ~(IN6P_LOWPORT);
1575 					inp->inp_flags &= ~(IN6P_HIGHPORT);
1576 					break;
1577 
1578 				case IPV6_PORTRANGE_HIGH:
1579 					inp->inp_flags &= ~(IN6P_LOWPORT);
1580 					inp->inp_flags |= IN6P_HIGHPORT;
1581 					break;
1582 
1583 				case IPV6_PORTRANGE_LOW:
1584 					inp->inp_flags &= ~(IN6P_HIGHPORT);
1585 					inp->inp_flags |= IN6P_LOWPORT;
1586 					break;
1587 
1588 				default:
1589 					error = EINVAL;
1590 					break;
1591 				}
1592 				break;
1593 
1594 			case IPSEC6_OUTSA:
1595 				error = EINVAL;
1596 				break;
1597 
1598 			case IPV6_AUTH_LEVEL:
1599 			case IPV6_ESP_TRANS_LEVEL:
1600 			case IPV6_ESP_NETWORK_LEVEL:
1601 			case IPV6_IPCOMP_LEVEL:
1602 #ifndef IPSEC
1603 				error = EINVAL;
1604 #else
1605 				if (m == NULL || m->m_len != sizeof(int)) {
1606 					error = EINVAL;
1607 					break;
1608 				}
1609 				optval = *mtod(m, int *);
1610 
1611 				if (optval < IPSEC_LEVEL_BYPASS ||
1612 				    optval > IPSEC_LEVEL_UNIQUE) {
1613 					error = EINVAL;
1614 					break;
1615 				}
1616 
1617 				switch (optname) {
1618 				case IPV6_AUTH_LEVEL:
1619 				        if (optval < IPSEC_AUTH_LEVEL_DEFAULT &&
1620 					    suser(p, 0)) {
1621 						error = EACCES;
1622 						break;
1623 					}
1624 					inp->inp_seclevel[SL_AUTH] = optval;
1625 					break;
1626 
1627 				case IPV6_ESP_TRANS_LEVEL:
1628 				        if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT &&
1629 					    suser(p, 0)) {
1630 						error = EACCES;
1631 						break;
1632 					}
1633 					inp->inp_seclevel[SL_ESP_TRANS] = optval;
1634 					break;
1635 
1636 				case IPV6_ESP_NETWORK_LEVEL:
1637 				        if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT &&
1638 					    suser(p, 0)) {
1639 						error = EACCES;
1640 						break;
1641 					}
1642 					inp->inp_seclevel[SL_ESP_NETWORK] = optval;
1643 					break;
1644 
1645 				case IPV6_IPCOMP_LEVEL:
1646 				        if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT &&
1647 					    suser(p, 0)) {
1648 						error = EACCES;
1649 						break;
1650 					}
1651 					inp->inp_seclevel[SL_IPCOMP] = optval;
1652 					break;
1653 				}
1654 #endif
1655 				break;
1656 			case SO_RTABLE:
1657 				if (m == NULL || m->m_len < sizeof(u_int)) {
1658 					error = EINVAL;
1659 					break;
1660 				}
1661 				rtid = *mtod(m, u_int *);
1662 				if (inp->inp_rtableid == rtid)
1663 					break;
1664 				/* needs privileges to switch when already set */
1665 				if (p->p_p->ps_rtableid != rtid &&
1666 				    p->p_p->ps_rtableid != 0 &&
1667 				    (error = suser(p, 0)) != 0)
1668 					break;
1669 				/* table must exist */
1670 				if (!rtable_exists(rtid)) {
1671 					error = EINVAL;
1672 					break;
1673 				}
1674 				inp->inp_rtableid = rtid;
1675 				break;
1676 			case IPV6_PIPEX:
1677 				if (m != NULL && m->m_len == sizeof(int))
1678 					inp->inp_pipex = *mtod(m, int *);
1679 				else
1680 					error = EINVAL;
1681 				break;
1682 
1683 			default:
1684 				error = ENOPROTOOPT;
1685 				break;
1686 			}
1687 			if (m)
1688 				(void)m_free(m);
1689 			break;
1690 
1691 		case PRCO_GETOPT:
1692 			switch (optname) {
1693 
1694 			case IPV6_2292PKTOPTIONS:
1695 				/*
1696 				 * RFC3542 (effectively) deprecated the
1697 				 * semantics of the 2292-style pktoptions.
1698 				 * Since it was not reliable in nature (i.e.,
1699 				 * applications had to expect the lack of some
1700 				 * information after all), it would make sense
1701 				 * to simplify this part by always returning
1702 				 * empty data.
1703 				 */
1704 				*mp = m_get(M_WAIT, MT_SOOPTS);
1705 				(*mp)->m_len = 0;
1706 				break;
1707 
1708 			case IPV6_RECVHOPOPTS:
1709 			case IPV6_RECVDSTOPTS:
1710 			case IPV6_RECVRTHDRDSTOPTS:
1711 			case IPV6_UNICAST_HOPS:
1712 			case IPV6_RECVPKTINFO:
1713 			case IPV6_RECVHOPLIMIT:
1714 			case IPV6_RECVRTHDR:
1715 			case IPV6_RECVPATHMTU:
1716 
1717 			case IPV6_V6ONLY:
1718 			case IPV6_PORTRANGE:
1719 			case IPV6_RECVTCLASS:
1720 			case IPV6_AUTOFLOWLABEL:
1721 			case IPV6_RECVDSTPORT:
1722 				switch (optname) {
1723 
1724 				case IPV6_RECVHOPOPTS:
1725 					optval = OPTBIT(IN6P_HOPOPTS);
1726 					break;
1727 
1728 				case IPV6_RECVDSTOPTS:
1729 					optval = OPTBIT(IN6P_DSTOPTS);
1730 					break;
1731 
1732 				case IPV6_RECVRTHDRDSTOPTS:
1733 					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1734 					break;
1735 
1736 				case IPV6_UNICAST_HOPS:
1737 					optval = inp->inp_hops;
1738 					break;
1739 
1740 				case IPV6_RECVPKTINFO:
1741 					optval = OPTBIT(IN6P_PKTINFO);
1742 					break;
1743 
1744 				case IPV6_RECVHOPLIMIT:
1745 					optval = OPTBIT(IN6P_HOPLIMIT);
1746 					break;
1747 
1748 				case IPV6_RECVRTHDR:
1749 					optval = OPTBIT(IN6P_RTHDR);
1750 					break;
1751 
1752 				case IPV6_RECVPATHMTU:
1753 					optval = OPTBIT(IN6P_MTU);
1754 					break;
1755 
1756 				case IPV6_V6ONLY:
1757 					optval = (ip6_v6only != 0); /* XXX */
1758 					break;
1759 
1760 				case IPV6_PORTRANGE:
1761 				    {
1762 					int flags;
1763 					flags = inp->inp_flags;
1764 					if (flags & IN6P_HIGHPORT)
1765 						optval = IPV6_PORTRANGE_HIGH;
1766 					else if (flags & IN6P_LOWPORT)
1767 						optval = IPV6_PORTRANGE_LOW;
1768 					else
1769 						optval = 0;
1770 					break;
1771 				    }
1772 				case IPV6_RECVTCLASS:
1773 					optval = OPTBIT(IN6P_TCLASS);
1774 					break;
1775 
1776 				case IPV6_AUTOFLOWLABEL:
1777 					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
1778 					break;
1779 
1780 				case IPV6_RECVDSTPORT:
1781 					optval = OPTBIT(IN6P_RECVDSTPORT);
1782 					break;
1783 				}
1784 				if (error)
1785 					break;
1786 				*mp = m = m_get(M_WAIT, MT_SOOPTS);
1787 				m->m_len = sizeof(int);
1788 				*mtod(m, int *) = optval;
1789 				break;
1790 
1791 			case IPV6_PATHMTU:
1792 			{
1793 				u_long pmtu = 0;
1794 				struct ip6_mtuinfo mtuinfo;
1795 				struct route_in6 *ro = (struct route_in6 *)&inp->inp_route6;
1796 
1797 				if (!(so->so_state & SS_ISCONNECTED))
1798 					return (ENOTCONN);
1799 				/*
1800 				 * XXX: we dot not consider the case of source
1801 				 * routing, or optional information to specify
1802 				 * the outgoing interface.
1803 				 */
1804 				error = ip6_getpmtu(ro, NULL, NULL,
1805 				    &inp->inp_faddr6, &pmtu, NULL);
1806 				if (error)
1807 					break;
1808 				if (pmtu > IPV6_MAXPACKET)
1809 					pmtu = IPV6_MAXPACKET;
1810 
1811 				bzero(&mtuinfo, sizeof(mtuinfo));
1812 				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1813 				optdata = (void *)&mtuinfo;
1814 				optdatalen = sizeof(mtuinfo);
1815 				if (optdatalen > MCLBYTES)
1816 					return (EMSGSIZE); /* XXX */
1817 				*mp = m = m_get(M_WAIT, MT_SOOPTS);
1818 				if (optdatalen > MLEN)
1819 					MCLGET(m, M_WAIT);
1820 				m->m_len = optdatalen;
1821 				bcopy(optdata, mtod(m, void *), optdatalen);
1822 				break;
1823 			}
1824 
1825 			case IPV6_2292PKTINFO:
1826 			case IPV6_2292HOPLIMIT:
1827 			case IPV6_2292HOPOPTS:
1828 			case IPV6_2292RTHDR:
1829 			case IPV6_2292DSTOPTS:
1830 				switch (optname) {
1831 				case IPV6_2292PKTINFO:
1832 					optval = OPTBIT(IN6P_PKTINFO);
1833 					break;
1834 				case IPV6_2292HOPLIMIT:
1835 					optval = OPTBIT(IN6P_HOPLIMIT);
1836 					break;
1837 				case IPV6_2292HOPOPTS:
1838 					optval = OPTBIT(IN6P_HOPOPTS);
1839 					break;
1840 				case IPV6_2292RTHDR:
1841 					optval = OPTBIT(IN6P_RTHDR);
1842 					break;
1843 				case IPV6_2292DSTOPTS:
1844 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1845 					break;
1846 				}
1847 				*mp = m = m_get(M_WAIT, MT_SOOPTS);
1848 				m->m_len = sizeof(int);
1849 				*mtod(m, int *) = optval;
1850 				break;
1851 			case IPV6_PKTINFO:
1852 			case IPV6_HOPOPTS:
1853 			case IPV6_RTHDR:
1854 			case IPV6_DSTOPTS:
1855 			case IPV6_RTHDRDSTOPTS:
1856 			case IPV6_NEXTHOP:
1857 			case IPV6_TCLASS:
1858 			case IPV6_DONTFRAG:
1859 			case IPV6_USE_MIN_MTU:
1860 				error = ip6_getpcbopt(inp->inp_outputopts6,
1861 				    optname, mp);
1862 				break;
1863 
1864 			case IPV6_MULTICAST_IF:
1865 			case IPV6_MULTICAST_HOPS:
1866 			case IPV6_MULTICAST_LOOP:
1867 			case IPV6_JOIN_GROUP:
1868 			case IPV6_LEAVE_GROUP:
1869 				error = ip6_getmoptions(optname,
1870 				    inp->inp_moptions6, mp);
1871 				break;
1872 
1873 			case IPSEC6_OUTSA:
1874 				error = EINVAL;
1875 				break;
1876 
1877 			case IPV6_AUTH_LEVEL:
1878 			case IPV6_ESP_TRANS_LEVEL:
1879 			case IPV6_ESP_NETWORK_LEVEL:
1880 			case IPV6_IPCOMP_LEVEL:
1881 				*mp = m = m_get(M_WAIT, MT_SOOPTS);
1882 #ifndef IPSEC
1883 				m->m_len = sizeof(int);
1884 				*mtod(m, int *) = IPSEC_LEVEL_NONE;
1885 #else
1886 				m->m_len = sizeof(int);
1887 				switch (optname) {
1888 				case IPV6_AUTH_LEVEL:
1889 					optval = inp->inp_seclevel[SL_AUTH];
1890 					break;
1891 
1892 				case IPV6_ESP_TRANS_LEVEL:
1893 					optval =
1894 					    inp->inp_seclevel[SL_ESP_TRANS];
1895 					break;
1896 
1897 				case IPV6_ESP_NETWORK_LEVEL:
1898 					optval =
1899 					    inp->inp_seclevel[SL_ESP_NETWORK];
1900 					break;
1901 
1902 				case IPV6_IPCOMP_LEVEL:
1903 					optval = inp->inp_seclevel[SL_IPCOMP];
1904 					break;
1905 				}
1906 				*mtod(m, int *) = optval;
1907 #endif
1908 				break;
1909 			case SO_RTABLE:
1910 				*mp = m = m_get(M_WAIT, MT_SOOPTS);
1911 				m->m_len = sizeof(u_int);
1912 				*mtod(m, u_int *) = optval;
1913 				break;
1914 			case IPV6_PIPEX:
1915 				*mp = m = m_get(M_WAIT, MT_SOOPTS);
1916 				m->m_len = sizeof(int);
1917 				*mtod(m, int *) = optval;
1918 				break;
1919 
1920 			default:
1921 				error = ENOPROTOOPT;
1922 				break;
1923 			}
1924 			break;
1925 		}
1926 	} else {
1927 		error = EINVAL;
1928 		if (op == PRCO_SETOPT && *mp)
1929 			(void)m_free(*mp);
1930 	}
1931 	return (error);
1932 }
1933 
1934 int
1935 ip6_raw_ctloutput(int op, struct socket *so, int level, int optname,
1936     struct mbuf **mp)
1937 {
1938 	int error = 0, optval;
1939 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
1940 	struct inpcb *inp = sotoinpcb(so);
1941 	struct mbuf *m = *mp;
1942 
1943 	if (level != IPPROTO_IPV6) {
1944 		if (op == PRCO_SETOPT && *mp)
1945 			(void)m_free(*mp);
1946 		return (EINVAL);
1947 	}
1948 
1949 	switch (optname) {
1950 	case IPV6_CHECKSUM:
1951 		/*
1952 		 * For ICMPv6 sockets, no modification allowed for checksum
1953 		 * offset, permit "no change" values to help existing apps.
1954 		 *
1955 		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
1956 		 * for an ICMPv6 socket will fail."
1957 		 * The current behavior does not meet RFC3542.
1958 		 */
1959 		switch (op) {
1960 		case PRCO_SETOPT:
1961 			if (m == NULL || m->m_len != sizeof(int)) {
1962 				error = EINVAL;
1963 				break;
1964 			}
1965 			optval = *mtod(m, int *);
1966 			if ((optval % 2) != 0) {
1967 				/* the API assumes even offset values */
1968 				error = EINVAL;
1969 			} else if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
1970 				if (optval != icmp6off)
1971 					error = EINVAL;
1972 			} else
1973 				inp->inp_cksum6 = optval;
1974 			break;
1975 
1976 		case PRCO_GETOPT:
1977 			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
1978 				optval = icmp6off;
1979 			else
1980 				optval = inp->inp_cksum6;
1981 
1982 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1983 			m->m_len = sizeof(int);
1984 			*mtod(m, int *) = optval;
1985 			break;
1986 
1987 		default:
1988 			error = EINVAL;
1989 			break;
1990 		}
1991 		break;
1992 
1993 	default:
1994 		error = ENOPROTOOPT;
1995 		break;
1996 	}
1997 
1998 	if (op == PRCO_SETOPT && m)
1999 		(void)m_free(m);
2000 
2001 	return (error);
2002 }
2003 
2004 /*
2005  * Set up IP6 options in pcb for insertion in output packets.
2006  * Store in mbuf with pointer in pcbopt, adding pseudo-option
2007  * with destination address if source routed.
2008  */
2009 int
2010 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so)
2011 {
2012 	struct ip6_pktopts *opt = *pktopt;
2013 	int error = 0;
2014 	struct proc *p = curproc;	/* XXX */
2015 	int priv = 0;
2016 
2017 	/* turn off any old options. */
2018 	if (opt)
2019 		ip6_clearpktopts(opt, -1);
2020 	else
2021 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2022 	*pktopt = 0;
2023 
2024 	if (!m || m->m_len == 0) {
2025 		/*
2026 		 * Only turning off any previous options, regardless of
2027 		 * whether the opt is just created or given.
2028 		 */
2029 		free(opt, M_IP6OPT, 0);
2030 		return (0);
2031 	}
2032 
2033 	/*  set options specified by user. */
2034 	if (p && !suser(p, 0))
2035 		priv = 1;
2036 	if ((error = ip6_setpktopts(m, opt, NULL, priv,
2037 	    so->so_proto->pr_protocol)) != 0) {
2038 		ip6_clearpktopts(opt, -1);	/* XXX discard all options */
2039 		free(opt, M_IP6OPT, 0);
2040 		return (error);
2041 	}
2042 	*pktopt = opt;
2043 	return (0);
2044 }
2045 
2046 /*
2047  * initialize ip6_pktopts.  beware that there are non-zero default values in
2048  * the struct.
2049  */
2050 void
2051 ip6_initpktopts(struct ip6_pktopts *opt)
2052 {
2053 
2054 	bzero(opt, sizeof(*opt));
2055 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2056 	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2057 	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2058 }
2059 
2060 int
2061 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2062     int priv, int uproto)
2063 {
2064 	struct ip6_pktopts *opt;
2065 
2066 	if (*pktopt == NULL) {
2067 		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2068 		    M_WAITOK);
2069 		ip6_initpktopts(*pktopt);
2070 	}
2071 	opt = *pktopt;
2072 
2073 	return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto));
2074 }
2075 
2076 int
2077 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct mbuf **mp)
2078 {
2079 	void *optdata = NULL;
2080 	int optdatalen = 0;
2081 	struct ip6_ext *ip6e;
2082 	int error = 0;
2083 	struct in6_pktinfo null_pktinfo;
2084 	int deftclass = 0, on;
2085 	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2086 	struct mbuf *m;
2087 
2088 	switch (optname) {
2089 	case IPV6_PKTINFO:
2090 		if (pktopt && pktopt->ip6po_pktinfo)
2091 			optdata = (void *)pktopt->ip6po_pktinfo;
2092 		else {
2093 			/* XXX: we don't have to do this every time... */
2094 			bzero(&null_pktinfo, sizeof(null_pktinfo));
2095 			optdata = (void *)&null_pktinfo;
2096 		}
2097 		optdatalen = sizeof(struct in6_pktinfo);
2098 		break;
2099 	case IPV6_TCLASS:
2100 		if (pktopt && pktopt->ip6po_tclass >= 0)
2101 			optdata = (void *)&pktopt->ip6po_tclass;
2102 		else
2103 			optdata = (void *)&deftclass;
2104 		optdatalen = sizeof(int);
2105 		break;
2106 	case IPV6_HOPOPTS:
2107 		if (pktopt && pktopt->ip6po_hbh) {
2108 			optdata = (void *)pktopt->ip6po_hbh;
2109 			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2110 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2111 		}
2112 		break;
2113 	case IPV6_RTHDR:
2114 		if (pktopt && pktopt->ip6po_rthdr) {
2115 			optdata = (void *)pktopt->ip6po_rthdr;
2116 			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2117 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2118 		}
2119 		break;
2120 	case IPV6_RTHDRDSTOPTS:
2121 		if (pktopt && pktopt->ip6po_dest1) {
2122 			optdata = (void *)pktopt->ip6po_dest1;
2123 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2124 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2125 		}
2126 		break;
2127 	case IPV6_DSTOPTS:
2128 		if (pktopt && pktopt->ip6po_dest2) {
2129 			optdata = (void *)pktopt->ip6po_dest2;
2130 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2131 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2132 		}
2133 		break;
2134 	case IPV6_NEXTHOP:
2135 		if (pktopt && pktopt->ip6po_nexthop) {
2136 			optdata = (void *)pktopt->ip6po_nexthop;
2137 			optdatalen = pktopt->ip6po_nexthop->sa_len;
2138 		}
2139 		break;
2140 	case IPV6_USE_MIN_MTU:
2141 		if (pktopt)
2142 			optdata = (void *)&pktopt->ip6po_minmtu;
2143 		else
2144 			optdata = (void *)&defminmtu;
2145 		optdatalen = sizeof(int);
2146 		break;
2147 	case IPV6_DONTFRAG:
2148 		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2149 			on = 1;
2150 		else
2151 			on = 0;
2152 		optdata = (void *)&on;
2153 		optdatalen = sizeof(on);
2154 		break;
2155 	default:		/* should not happen */
2156 #ifdef DIAGNOSTIC
2157 		panic("ip6_getpcbopt: unexpected option");
2158 #endif
2159 		return (ENOPROTOOPT);
2160 	}
2161 
2162 	if (optdatalen > MCLBYTES)
2163 		return (EMSGSIZE); /* XXX */
2164 	*mp = m = m_get(M_WAIT, MT_SOOPTS);
2165 	if (optdatalen > MLEN)
2166 		MCLGET(m, M_WAIT);
2167 	m->m_len = optdatalen;
2168 	if (optdatalen)
2169 		bcopy(optdata, mtod(m, void *), optdatalen);
2170 
2171 	return (error);
2172 }
2173 
2174 void
2175 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2176 {
2177 	if (optname == -1 || optname == IPV6_PKTINFO) {
2178 		if (pktopt->ip6po_pktinfo)
2179 			free(pktopt->ip6po_pktinfo, M_IP6OPT, 0);
2180 		pktopt->ip6po_pktinfo = NULL;
2181 	}
2182 	if (optname == -1 || optname == IPV6_HOPLIMIT)
2183 		pktopt->ip6po_hlim = -1;
2184 	if (optname == -1 || optname == IPV6_TCLASS)
2185 		pktopt->ip6po_tclass = -1;
2186 	if (optname == -1 || optname == IPV6_NEXTHOP) {
2187 		if (pktopt->ip6po_nextroute.ro_rt) {
2188 			rtfree(pktopt->ip6po_nextroute.ro_rt);
2189 			pktopt->ip6po_nextroute.ro_rt = NULL;
2190 		}
2191 		if (pktopt->ip6po_nexthop)
2192 			free(pktopt->ip6po_nexthop, M_IP6OPT, 0);
2193 		pktopt->ip6po_nexthop = NULL;
2194 	}
2195 	if (optname == -1 || optname == IPV6_HOPOPTS) {
2196 		if (pktopt->ip6po_hbh)
2197 			free(pktopt->ip6po_hbh, M_IP6OPT, 0);
2198 		pktopt->ip6po_hbh = NULL;
2199 	}
2200 	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2201 		if (pktopt->ip6po_dest1)
2202 			free(pktopt->ip6po_dest1, M_IP6OPT, 0);
2203 		pktopt->ip6po_dest1 = NULL;
2204 	}
2205 	if (optname == -1 || optname == IPV6_RTHDR) {
2206 		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2207 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT, 0);
2208 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2209 		if (pktopt->ip6po_route.ro_rt) {
2210 			rtfree(pktopt->ip6po_route.ro_rt);
2211 			pktopt->ip6po_route.ro_rt = NULL;
2212 		}
2213 	}
2214 	if (optname == -1 || optname == IPV6_DSTOPTS) {
2215 		if (pktopt->ip6po_dest2)
2216 			free(pktopt->ip6po_dest2, M_IP6OPT, 0);
2217 		pktopt->ip6po_dest2 = NULL;
2218 	}
2219 }
2220 
2221 #define PKTOPT_EXTHDRCPY(type) \
2222 do {\
2223 	if (src->type) {\
2224 		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2225 		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2226 		if (dst->type == NULL && canwait == M_NOWAIT)\
2227 			goto bad;\
2228 		bcopy(src->type, dst->type, hlen);\
2229 	}\
2230 } while (/*CONSTCOND*/ 0)
2231 
2232 int
2233 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2234 {
2235 	dst->ip6po_hlim = src->ip6po_hlim;
2236 	dst->ip6po_tclass = src->ip6po_tclass;
2237 	dst->ip6po_flags = src->ip6po_flags;
2238 	if (src->ip6po_pktinfo) {
2239 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2240 		    M_IP6OPT, canwait);
2241 		if (dst->ip6po_pktinfo == NULL)
2242 			goto bad;
2243 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2244 	}
2245 	if (src->ip6po_nexthop) {
2246 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2247 		    M_IP6OPT, canwait);
2248 		if (dst->ip6po_nexthop == NULL)
2249 			goto bad;
2250 		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2251 		    src->ip6po_nexthop->sa_len);
2252 	}
2253 	PKTOPT_EXTHDRCPY(ip6po_hbh);
2254 	PKTOPT_EXTHDRCPY(ip6po_dest1);
2255 	PKTOPT_EXTHDRCPY(ip6po_dest2);
2256 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2257 	return (0);
2258 
2259   bad:
2260 	ip6_clearpktopts(dst, -1);
2261 	return (ENOBUFS);
2262 }
2263 #undef PKTOPT_EXTHDRCPY
2264 
2265 void
2266 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2267 {
2268 	if (pktopt == NULL)
2269 		return;
2270 
2271 	ip6_clearpktopts(pktopt, -1);
2272 
2273 	free(pktopt, M_IP6OPT, 0);
2274 }
2275 
2276 /*
2277  * Set the IP6 multicast options in response to user setsockopt().
2278  */
2279 int
2280 ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m)
2281 {
2282 	int error = 0;
2283 	u_int loop, ifindex;
2284 	struct ipv6_mreq *mreq;
2285 	struct ifnet *ifp;
2286 	struct ip6_moptions *im6o = *im6op;
2287 	struct route_in6 ro;
2288 	struct sockaddr_in6 *dst;
2289 	struct in6_multi_mship *imm;
2290 	struct proc *p = curproc;	/* XXX */
2291 
2292 	if (im6o == NULL) {
2293 		/*
2294 		 * No multicast option buffer attached to the pcb;
2295 		 * allocate one and initialize to default values.
2296 		 */
2297 		im6o = (struct ip6_moptions *)
2298 			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2299 
2300 		if (im6o == NULL)
2301 			return (ENOBUFS);
2302 		*im6op = im6o;
2303 		im6o->im6o_ifidx = 0;
2304 		im6o->im6o_hlim = ip6_defmcasthlim;
2305 		im6o->im6o_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2306 		LIST_INIT(&im6o->im6o_memberships);
2307 	}
2308 
2309 	switch (optname) {
2310 
2311 	case IPV6_MULTICAST_IF:
2312 		/*
2313 		 * Select the interface for outgoing multicast packets.
2314 		 */
2315 		if (m == NULL || m->m_len != sizeof(u_int)) {
2316 			error = EINVAL;
2317 			break;
2318 		}
2319 		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2320 		if (ifindex == 0)
2321 			ifp = NULL;
2322 		else {
2323 			ifp = if_get(ifindex);
2324 			if (ifp == NULL) {
2325 				error = ENXIO;	/* XXX EINVAL? */
2326 				break;
2327 			}
2328 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2329 				error = EADDRNOTAVAIL;
2330 				break;
2331 			}
2332 		}
2333 		im6o->im6o_ifidx = ifindex;
2334 		break;
2335 
2336 	case IPV6_MULTICAST_HOPS:
2337 	    {
2338 		/*
2339 		 * Set the IP6 hoplimit for outgoing multicast packets.
2340 		 */
2341 		int optval;
2342 		if (m == NULL || m->m_len != sizeof(int)) {
2343 			error = EINVAL;
2344 			break;
2345 		}
2346 		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2347 		if (optval < -1 || optval >= 256)
2348 			error = EINVAL;
2349 		else if (optval == -1)
2350 			im6o->im6o_hlim = ip6_defmcasthlim;
2351 		else
2352 			im6o->im6o_hlim = optval;
2353 		break;
2354 	    }
2355 
2356 	case IPV6_MULTICAST_LOOP:
2357 		/*
2358 		 * Set the loopback flag for outgoing multicast packets.
2359 		 * Must be zero or one.
2360 		 */
2361 		if (m == NULL || m->m_len != sizeof(u_int)) {
2362 			error = EINVAL;
2363 			break;
2364 		}
2365 		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2366 		if (loop > 1) {
2367 			error = EINVAL;
2368 			break;
2369 		}
2370 		im6o->im6o_loop = loop;
2371 		break;
2372 
2373 	case IPV6_JOIN_GROUP:
2374 		/*
2375 		 * Add a multicast group membership.
2376 		 * Group must be a valid IP6 multicast address.
2377 		 */
2378 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2379 			error = EINVAL;
2380 			break;
2381 		}
2382 		mreq = mtod(m, struct ipv6_mreq *);
2383 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2384 			/*
2385 			 * We use the unspecified address to specify to accept
2386 			 * all multicast addresses. Only super user is allowed
2387 			 * to do this.
2388 			 */
2389 			if (suser(p, 0))
2390 			{
2391 				error = EACCES;
2392 				break;
2393 			}
2394 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2395 			error = EINVAL;
2396 			break;
2397 		}
2398 
2399 		/*
2400 		 * If no interface was explicitly specified, choose an
2401 		 * appropriate one according to the given multicast address.
2402 		 */
2403 		if (mreq->ipv6mr_interface == 0) {
2404 			/*
2405 			 * Look up the routing table for the
2406 			 * address, and choose the outgoing interface.
2407 			 *   XXX: is it a good approach?
2408 			 */
2409 			bzero(&ro, sizeof(ro));
2410 			ro.ro_tableid = m->m_pkthdr.ph_rtableid;
2411 			dst = &ro.ro_dst;
2412 			dst->sin6_len = sizeof(struct sockaddr_in6);
2413 			dst->sin6_family = AF_INET6;
2414 			dst->sin6_addr = mreq->ipv6mr_multiaddr;
2415 			ro.ro_rt = rtalloc(sin6tosa(&ro.ro_dst),
2416 			    RT_REPORT|RT_RESOLVE, ro.ro_tableid);
2417 			if (ro.ro_rt == NULL) {
2418 				error = EADDRNOTAVAIL;
2419 				break;
2420 			}
2421 			ifp = ro.ro_rt->rt_ifp;
2422 			rtfree(ro.ro_rt);
2423 		} else {
2424 			/*
2425 			 * If the interface is specified, validate it.
2426 			 */
2427 			ifp = if_get(mreq->ipv6mr_interface);
2428 			if (ifp == NULL) {
2429 				error = ENXIO;	/* XXX EINVAL? */
2430 				break;
2431 			}
2432 		}
2433 
2434 		/*
2435 		 * See if we found an interface, and confirm that it
2436 		 * supports multicast
2437 		 */
2438 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2439 			error = EADDRNOTAVAIL;
2440 			break;
2441 		}
2442 		/*
2443 		 * Put interface index into the multicast address,
2444 		 * if the address has link/interface-local scope.
2445 		 */
2446 		if (IN6_IS_SCOPE_EMBED(&mreq->ipv6mr_multiaddr)) {
2447 			mreq->ipv6mr_multiaddr.s6_addr16[1] =
2448 			    htons(ifp->if_index);
2449 		}
2450 		/*
2451 		 * See if the membership already exists.
2452 		 */
2453 		LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain)
2454 			if (imm->i6mm_maddr->in6m_ifidx == ifp->if_index &&
2455 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2456 			    &mreq->ipv6mr_multiaddr))
2457 				break;
2458 		if (imm != NULL) {
2459 			error = EADDRINUSE;
2460 			break;
2461 		}
2462 		/*
2463 		 * Everything looks good; add a new record to the multicast
2464 		 * address list for the given interface.
2465 		 */
2466 		imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr, &error);
2467 		if (!imm)
2468 			break;
2469 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2470 		break;
2471 
2472 	case IPV6_LEAVE_GROUP:
2473 		/*
2474 		 * Drop a multicast group membership.
2475 		 * Group must be a valid IP6 multicast address.
2476 		 */
2477 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2478 			error = EINVAL;
2479 			break;
2480 		}
2481 		mreq = mtod(m, struct ipv6_mreq *);
2482 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2483 			if (suser(p, 0))
2484 			{
2485 				error = EACCES;
2486 				break;
2487 			}
2488 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2489 			error = EINVAL;
2490 			break;
2491 		}
2492 		/*
2493 		 * If an interface address was specified, get a pointer
2494 		 * to its ifnet structure.
2495 		 */
2496 		if (mreq->ipv6mr_interface == 0)
2497 			ifp = NULL;
2498 		else {
2499 			ifp = if_get(mreq->ipv6mr_interface);
2500 			if (ifp == NULL) {
2501 				error = ENXIO;	/* XXX EINVAL? */
2502 				break;
2503 			}
2504 		}
2505 
2506 		/*
2507 		 * Put interface index into the multicast address,
2508 		 * if the address has link-local scope.
2509 		 */
2510 		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2511 			mreq->ipv6mr_multiaddr.s6_addr16[1] =
2512 			    htons(mreq->ipv6mr_interface);
2513 		}
2514 		/*
2515 		 * Find the membership in the membership list.
2516 		 */
2517 		LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) {
2518 			if ((ifp == NULL ||
2519 			    imm->i6mm_maddr->in6m_ifidx == ifp->if_index) &&
2520 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2521 			    &mreq->ipv6mr_multiaddr))
2522 				break;
2523 		}
2524 		if (imm == NULL) {
2525 			/* Unable to resolve interface */
2526 			error = EADDRNOTAVAIL;
2527 			break;
2528 		}
2529 		/*
2530 		 * Give up the multicast address record to which the
2531 		 * membership points.
2532 		 */
2533 		LIST_REMOVE(imm, i6mm_chain);
2534 		in6_leavegroup(imm);
2535 		break;
2536 
2537 	default:
2538 		error = EOPNOTSUPP;
2539 		break;
2540 	}
2541 
2542 	/*
2543 	 * If all options have default values, no need to keep the option
2544 	 * structure.
2545 	 */
2546 	if (im6o->im6o_ifidx == 0 &&
2547 	    im6o->im6o_hlim == ip6_defmcasthlim &&
2548 	    im6o->im6o_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2549 	    LIST_EMPTY(&im6o->im6o_memberships)) {
2550 		free(*im6op, M_IPMOPTS, 0);
2551 		*im6op = NULL;
2552 	}
2553 
2554 	return (error);
2555 }
2556 
2557 /*
2558  * Return the IP6 multicast options in response to user getsockopt().
2559  */
2560 int
2561 ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp)
2562 {
2563 	u_int *hlim, *loop, *ifindex;
2564 
2565 	*mp = m_get(M_WAIT, MT_SOOPTS);
2566 
2567 	switch (optname) {
2568 
2569 	case IPV6_MULTICAST_IF:
2570 		ifindex = mtod(*mp, u_int *);
2571 		(*mp)->m_len = sizeof(u_int);
2572 		if (im6o == NULL || im6o->im6o_ifidx == 0)
2573 			*ifindex = 0;
2574 		else
2575 			*ifindex = im6o->im6o_ifidx;
2576 		return (0);
2577 
2578 	case IPV6_MULTICAST_HOPS:
2579 		hlim = mtod(*mp, u_int *);
2580 		(*mp)->m_len = sizeof(u_int);
2581 		if (im6o == NULL)
2582 			*hlim = ip6_defmcasthlim;
2583 		else
2584 			*hlim = im6o->im6o_hlim;
2585 		return (0);
2586 
2587 	case IPV6_MULTICAST_LOOP:
2588 		loop = mtod(*mp, u_int *);
2589 		(*mp)->m_len = sizeof(u_int);
2590 		if (im6o == NULL)
2591 			*loop = ip6_defmcasthlim;
2592 		else
2593 			*loop = im6o->im6o_loop;
2594 		return (0);
2595 
2596 	default:
2597 		return (EOPNOTSUPP);
2598 	}
2599 }
2600 
2601 /*
2602  * Discard the IP6 multicast options.
2603  */
2604 void
2605 ip6_freemoptions(struct ip6_moptions *im6o)
2606 {
2607 	struct in6_multi_mship *imm;
2608 
2609 	if (im6o == NULL)
2610 		return;
2611 
2612 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2613 		imm = LIST_FIRST(&im6o->im6o_memberships);
2614 		LIST_REMOVE(imm, i6mm_chain);
2615 		in6_leavegroup(imm);
2616 	}
2617 	free(im6o, M_IPMOPTS, 0);
2618 }
2619 
2620 /*
2621  * Set IPv6 outgoing packet options based on advanced API.
2622  */
2623 int
2624 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2625     struct ip6_pktopts *stickyopt, int priv, int uproto)
2626 {
2627 	u_int clen;
2628 	struct cmsghdr *cm = 0;
2629 	caddr_t cmsgs;
2630 	int error;
2631 
2632 	if (control == NULL || opt == NULL)
2633 		return (EINVAL);
2634 
2635 	ip6_initpktopts(opt);
2636 	if (stickyopt) {
2637 		int error;
2638 
2639 		/*
2640 		 * If stickyopt is provided, make a local copy of the options
2641 		 * for this particular packet, then override them by ancillary
2642 		 * objects.
2643 		 * XXX: copypktopts() does not copy the cached route to a next
2644 		 * hop (if any).  This is not very good in terms of efficiency,
2645 		 * but we can allow this since this option should be rarely
2646 		 * used.
2647 		 */
2648 		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2649 			return (error);
2650 	}
2651 
2652 	/*
2653 	 * XXX: Currently, we assume all the optional information is stored
2654 	 * in a single mbuf.
2655 	 */
2656 	if (control->m_next)
2657 		return (EINVAL);
2658 
2659 	clen = control->m_len;
2660 	cmsgs = mtod(control, caddr_t);
2661 	do {
2662 		if (clen < CMSG_LEN(0))
2663 			return (EINVAL);
2664 		cm = (struct cmsghdr *)cmsgs;
2665 		if (cm->cmsg_len < CMSG_LEN(0) || cm->cmsg_len > clen ||
2666 		    CMSG_ALIGN(cm->cmsg_len) > clen)
2667 			return (EINVAL);
2668 		if (cm->cmsg_level == IPPROTO_IPV6) {
2669 			error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2670 			    cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto);
2671 			if (error)
2672 				return (error);
2673 		}
2674 
2675 		clen -= CMSG_ALIGN(cm->cmsg_len);
2676 		cmsgs += CMSG_ALIGN(cm->cmsg_len);
2677 	} while (clen);
2678 
2679 	return (0);
2680 }
2681 
2682 /*
2683  * Set a particular packet option, as a sticky option or an ancillary data
2684  * item.  "len" can be 0 only when it's a sticky option.
2685  * We have 4 cases of combination of "sticky" and "cmsg":
2686  * "sticky=0, cmsg=0": impossible
2687  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2688  * "sticky=1, cmsg=0": RFC3542 socket option
2689  * "sticky=1, cmsg=1": RFC2292 socket option
2690  */
2691 int
2692 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2693     int priv, int sticky, int cmsg, int uproto)
2694 {
2695 	int minmtupolicy;
2696 
2697 	if (!sticky && !cmsg) {
2698 #ifdef DIAGNOSTIC
2699 		printf("ip6_setpktopt: impossible case\n");
2700 #endif
2701 		return (EINVAL);
2702 	}
2703 
2704 	/*
2705 	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2706 	 * not be specified in the context of RFC3542.  Conversely,
2707 	 * RFC3542 types should not be specified in the context of RFC2292.
2708 	 */
2709 	if (!cmsg) {
2710 		switch (optname) {
2711 		case IPV6_2292PKTINFO:
2712 		case IPV6_2292HOPLIMIT:
2713 		case IPV6_2292NEXTHOP:
2714 		case IPV6_2292HOPOPTS:
2715 		case IPV6_2292DSTOPTS:
2716 		case IPV6_2292RTHDR:
2717 		case IPV6_2292PKTOPTIONS:
2718 			return (ENOPROTOOPT);
2719 		}
2720 	}
2721 	if (sticky && cmsg) {
2722 		switch (optname) {
2723 		case IPV6_PKTINFO:
2724 		case IPV6_HOPLIMIT:
2725 		case IPV6_NEXTHOP:
2726 		case IPV6_HOPOPTS:
2727 		case IPV6_DSTOPTS:
2728 		case IPV6_RTHDRDSTOPTS:
2729 		case IPV6_RTHDR:
2730 		case IPV6_USE_MIN_MTU:
2731 		case IPV6_DONTFRAG:
2732 		case IPV6_TCLASS:
2733 			return (ENOPROTOOPT);
2734 		}
2735 	}
2736 
2737 	switch (optname) {
2738 	case IPV6_2292PKTINFO:
2739 	case IPV6_PKTINFO:
2740 	{
2741 		struct ifnet *ifp = NULL;
2742 		struct in6_pktinfo *pktinfo;
2743 
2744 		if (len != sizeof(struct in6_pktinfo))
2745 			return (EINVAL);
2746 
2747 		pktinfo = (struct in6_pktinfo *)buf;
2748 
2749 		/*
2750 		 * An application can clear any sticky IPV6_PKTINFO option by
2751 		 * doing a "regular" setsockopt with ipi6_addr being
2752 		 * in6addr_any and ipi6_ifindex being zero.
2753 		 * [RFC 3542, Section 6]
2754 		 */
2755 		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2756 		    pktinfo->ipi6_ifindex == 0 &&
2757 		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2758 			ip6_clearpktopts(opt, optname);
2759 			break;
2760 		}
2761 
2762 		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2763 		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2764 			return (EINVAL);
2765 		}
2766 
2767 		if (pktinfo->ipi6_ifindex) {
2768 			ifp = if_get(pktinfo->ipi6_ifindex);
2769 			if (ifp == NULL)
2770 				return (ENXIO);
2771 		}
2772 
2773 		/*
2774 		 * We store the address anyway, and let in6_selectsrc()
2775 		 * validate the specified address.  This is because ipi6_addr
2776 		 * may not have enough information about its scope zone, and
2777 		 * we may need additional information (such as outgoing
2778 		 * interface or the scope zone of a destination address) to
2779 		 * disambiguate the scope.
2780 		 * XXX: the delay of the validation may confuse the
2781 		 * application when it is used as a sticky option.
2782 		 */
2783 		if (opt->ip6po_pktinfo == NULL) {
2784 			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2785 			    M_IP6OPT, M_NOWAIT);
2786 			if (opt->ip6po_pktinfo == NULL)
2787 				return (ENOBUFS);
2788 		}
2789 		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2790 		break;
2791 	}
2792 
2793 	case IPV6_2292HOPLIMIT:
2794 	case IPV6_HOPLIMIT:
2795 	{
2796 		int *hlimp;
2797 
2798 		/*
2799 		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2800 		 * to simplify the ordering among hoplimit options.
2801 		 */
2802 		if (optname == IPV6_HOPLIMIT && sticky)
2803 			return (ENOPROTOOPT);
2804 
2805 		if (len != sizeof(int))
2806 			return (EINVAL);
2807 		hlimp = (int *)buf;
2808 		if (*hlimp < -1 || *hlimp > 255)
2809 			return (EINVAL);
2810 
2811 		opt->ip6po_hlim = *hlimp;
2812 		break;
2813 	}
2814 
2815 	case IPV6_TCLASS:
2816 	{
2817 		int tclass;
2818 
2819 		if (len != sizeof(int))
2820 			return (EINVAL);
2821 		tclass = *(int *)buf;
2822 		if (tclass < -1 || tclass > 255)
2823 			return (EINVAL);
2824 
2825 		opt->ip6po_tclass = tclass;
2826 		break;
2827 	}
2828 
2829 	case IPV6_2292NEXTHOP:
2830 	case IPV6_NEXTHOP:
2831 		if (!priv)
2832 			return (EPERM);
2833 
2834 		if (len == 0) {	/* just remove the option */
2835 			ip6_clearpktopts(opt, IPV6_NEXTHOP);
2836 			break;
2837 		}
2838 
2839 		/* check if cmsg_len is large enough for sa_len */
2840 		if (len < sizeof(struct sockaddr) || len < *buf)
2841 			return (EINVAL);
2842 
2843 		switch (((struct sockaddr *)buf)->sa_family) {
2844 		case AF_INET6:
2845 		{
2846 			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
2847 
2848 			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
2849 				return (EINVAL);
2850 
2851 			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
2852 			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
2853 				return (EINVAL);
2854 			}
2855 			if (IN6_IS_SCOPE_EMBED(&sa6->sin6_addr)) {
2856 				if (if_get(sa6->sin6_scope_id) == NULL)
2857 					return (EINVAL);
2858 				sa6->sin6_addr.s6_addr16[1] =
2859 				    htonl(sa6->sin6_scope_id);
2860 			} else if (sa6->sin6_scope_id)
2861 				return (EINVAL);
2862 			break;
2863 		}
2864 		case AF_LINK:	/* eventually be supported? */
2865 		default:
2866 			return (EAFNOSUPPORT);
2867 		}
2868 
2869 		/* turn off the previous option, then set the new option. */
2870 		ip6_clearpktopts(opt, IPV6_NEXTHOP);
2871 		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
2872 		if (opt->ip6po_nexthop == NULL)
2873 			return (ENOBUFS);
2874 		bcopy(buf, opt->ip6po_nexthop, *buf);
2875 		break;
2876 
2877 	case IPV6_2292HOPOPTS:
2878 	case IPV6_HOPOPTS:
2879 	{
2880 		struct ip6_hbh *hbh;
2881 		int hbhlen;
2882 
2883 		/*
2884 		 * XXX: We don't allow a non-privileged user to set ANY HbH
2885 		 * options, since per-option restriction has too much
2886 		 * overhead.
2887 		 */
2888 		if (!priv)
2889 			return (EPERM);
2890 
2891 		if (len == 0) {
2892 			ip6_clearpktopts(opt, IPV6_HOPOPTS);
2893 			break;	/* just remove the option */
2894 		}
2895 
2896 		/* message length validation */
2897 		if (len < sizeof(struct ip6_hbh))
2898 			return (EINVAL);
2899 		hbh = (struct ip6_hbh *)buf;
2900 		hbhlen = (hbh->ip6h_len + 1) << 3;
2901 		if (len != hbhlen)
2902 			return (EINVAL);
2903 
2904 		/* turn off the previous option, then set the new option. */
2905 		ip6_clearpktopts(opt, IPV6_HOPOPTS);
2906 		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
2907 		if (opt->ip6po_hbh == NULL)
2908 			return (ENOBUFS);
2909 		bcopy(hbh, opt->ip6po_hbh, hbhlen);
2910 
2911 		break;
2912 	}
2913 
2914 	case IPV6_2292DSTOPTS:
2915 	case IPV6_DSTOPTS:
2916 	case IPV6_RTHDRDSTOPTS:
2917 	{
2918 		struct ip6_dest *dest, **newdest = NULL;
2919 		int destlen;
2920 
2921 		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
2922 			return (EPERM);
2923 
2924 		if (len == 0) {
2925 			ip6_clearpktopts(opt, optname);
2926 			break;	/* just remove the option */
2927 		}
2928 
2929 		/* message length validation */
2930 		if (len < sizeof(struct ip6_dest))
2931 			return (EINVAL);
2932 		dest = (struct ip6_dest *)buf;
2933 		destlen = (dest->ip6d_len + 1) << 3;
2934 		if (len != destlen)
2935 			return (EINVAL);
2936 		/*
2937 		 * Determine the position that the destination options header
2938 		 * should be inserted; before or after the routing header.
2939 		 */
2940 		switch (optname) {
2941 		case IPV6_2292DSTOPTS:
2942 			/*
2943 			 * The old advanced API is ambiguous on this point.
2944 			 * Our approach is to determine the position based
2945 			 * according to the existence of a routing header.
2946 			 * Note, however, that this depends on the order of the
2947 			 * extension headers in the ancillary data; the 1st
2948 			 * part of the destination options header must appear
2949 			 * before the routing header in the ancillary data,
2950 			 * too.
2951 			 * RFC3542 solved the ambiguity by introducing
2952 			 * separate ancillary data or option types.
2953 			 */
2954 			if (opt->ip6po_rthdr == NULL)
2955 				newdest = &opt->ip6po_dest1;
2956 			else
2957 				newdest = &opt->ip6po_dest2;
2958 			break;
2959 		case IPV6_RTHDRDSTOPTS:
2960 			newdest = &opt->ip6po_dest1;
2961 			break;
2962 		case IPV6_DSTOPTS:
2963 			newdest = &opt->ip6po_dest2;
2964 			break;
2965 		}
2966 
2967 		/* turn off the previous option, then set the new option. */
2968 		ip6_clearpktopts(opt, optname);
2969 		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
2970 		if (*newdest == NULL)
2971 			return (ENOBUFS);
2972 		bcopy(dest, *newdest, destlen);
2973 
2974 		break;
2975 	}
2976 
2977 	case IPV6_2292RTHDR:
2978 	case IPV6_RTHDR:
2979 	{
2980 		struct ip6_rthdr *rth;
2981 		int rthlen;
2982 
2983 		if (len == 0) {
2984 			ip6_clearpktopts(opt, IPV6_RTHDR);
2985 			break;	/* just remove the option */
2986 		}
2987 
2988 		/* message length validation */
2989 		if (len < sizeof(struct ip6_rthdr))
2990 			return (EINVAL);
2991 		rth = (struct ip6_rthdr *)buf;
2992 		rthlen = (rth->ip6r_len + 1) << 3;
2993 		if (len != rthlen)
2994 			return (EINVAL);
2995 
2996 		switch (rth->ip6r_type) {
2997 		case IPV6_RTHDR_TYPE_0:
2998 			if (rth->ip6r_len == 0)	/* must contain one addr */
2999 				return (EINVAL);
3000 			if (rth->ip6r_len % 2) /* length must be even */
3001 				return (EINVAL);
3002 			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3003 				return (EINVAL);
3004 			break;
3005 		default:
3006 			return (EINVAL);	/* not supported */
3007 		}
3008 		/* turn off the previous option */
3009 		ip6_clearpktopts(opt, IPV6_RTHDR);
3010 		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3011 		if (opt->ip6po_rthdr == NULL)
3012 			return (ENOBUFS);
3013 		bcopy(rth, opt->ip6po_rthdr, rthlen);
3014 		break;
3015 	}
3016 
3017 	case IPV6_USE_MIN_MTU:
3018 		if (len != sizeof(int))
3019 			return (EINVAL);
3020 		minmtupolicy = *(int *)buf;
3021 		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3022 		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3023 		    minmtupolicy != IP6PO_MINMTU_ALL) {
3024 			return (EINVAL);
3025 		}
3026 		opt->ip6po_minmtu = minmtupolicy;
3027 		break;
3028 
3029 	case IPV6_DONTFRAG:
3030 		if (len != sizeof(int))
3031 			return (EINVAL);
3032 
3033 		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3034 			/*
3035 			 * we ignore this option for TCP sockets.
3036 			 * (RFC3542 leaves this case unspecified.)
3037 			 */
3038 			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3039 		} else
3040 			opt->ip6po_flags |= IP6PO_DONTFRAG;
3041 		break;
3042 
3043 	default:
3044 		return (ENOPROTOOPT);
3045 	} /* end of switch */
3046 
3047 	return (0);
3048 }
3049 
3050 /*
3051  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3052  * packet to the input queue of a specified interface.  Note that this
3053  * calls the output routine of the loopback "driver", but with an interface
3054  * pointer that might NOT be lo0ifp -- easier than replicating that code here.
3055  */
3056 void
3057 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
3058 {
3059 	struct mbuf *copym;
3060 	struct ip6_hdr *ip6;
3061 
3062 	/*
3063 	 * Duplicate the packet.
3064 	 */
3065 	copym = m_copy(m, 0, M_COPYALL);
3066 	if (copym == NULL)
3067 		return;
3068 
3069 	/*
3070 	 * Make sure to deep-copy IPv6 header portion in case the data
3071 	 * is in an mbuf cluster, so that we can safely override the IPv6
3072 	 * header portion later.
3073 	 */
3074 	if ((copym->m_flags & M_EXT) != 0 ||
3075 	    copym->m_len < sizeof(struct ip6_hdr)) {
3076 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3077 		if (copym == NULL)
3078 			return;
3079 	}
3080 
3081 #ifdef DIAGNOSTIC
3082 	if (copym->m_len < sizeof(*ip6)) {
3083 		m_freem(copym);
3084 		return;
3085 	}
3086 #endif
3087 
3088 	ip6 = mtod(copym, struct ip6_hdr *);
3089 	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
3090 		ip6->ip6_src.s6_addr16[1] = 0;
3091 	if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
3092 		ip6->ip6_dst.s6_addr16[1] = 0;
3093 
3094 	(void)looutput(ifp, copym, sin6tosa(dst), NULL);
3095 }
3096 
3097 /*
3098  * Chop IPv6 header off from the payload.
3099  */
3100 int
3101 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3102 {
3103 	struct mbuf *mh;
3104 	struct ip6_hdr *ip6;
3105 
3106 	ip6 = mtod(m, struct ip6_hdr *);
3107 	if (m->m_len > sizeof(*ip6)) {
3108 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3109 		if (mh == NULL) {
3110 			m_freem(m);
3111 			return ENOBUFS;
3112 		}
3113 		M_MOVE_PKTHDR(mh, m);
3114 		MH_ALIGN(mh, sizeof(*ip6));
3115 		m->m_len -= sizeof(*ip6);
3116 		m->m_data += sizeof(*ip6);
3117 		mh->m_next = m;
3118 		m = mh;
3119 		m->m_len = sizeof(*ip6);
3120 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3121 	}
3122 	exthdrs->ip6e_ip6 = m;
3123 	return 0;
3124 }
3125 
3126 u_int32_t
3127 ip6_randomid(void)
3128 {
3129 	return idgen32(&ip6_id_ctx);
3130 }
3131 
3132 void
3133 ip6_randomid_init(void)
3134 {
3135 	idgen32_init(&ip6_id_ctx);
3136 }
3137 
3138 /*
3139  *	Compute significant parts of the IPv6 checksum pseudo-header
3140  *	for use in a delayed TCP/UDP checksum calculation.
3141  */
3142 static __inline u_int16_t __attribute__((__unused__))
3143 in6_cksum_phdr(const struct in6_addr *src, const struct in6_addr *dst,
3144     u_int32_t len, u_int32_t nxt)
3145 {
3146 	u_int32_t sum = 0;
3147 	const u_int16_t *w;
3148 
3149 	w = (const u_int16_t *) src;
3150 	sum += w[0];
3151 	if (!IN6_IS_SCOPE_EMBED(src))
3152 		sum += w[1];
3153 	sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5];
3154 	sum += w[6]; sum += w[7];
3155 
3156 	w = (const u_int16_t *) dst;
3157 	sum += w[0];
3158 	if (!IN6_IS_SCOPE_EMBED(dst))
3159 		sum += w[1];
3160 	sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5];
3161 	sum += w[6]; sum += w[7];
3162 
3163 	sum += (u_int16_t)(len >> 16) + (u_int16_t)(len /*& 0xffff*/);
3164 
3165 	sum += (u_int16_t)(nxt >> 16) + (u_int16_t)(nxt /*& 0xffff*/);
3166 
3167 	sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/);
3168 
3169 	if (sum > 0xffff)
3170 		sum -= 0xffff;
3171 
3172 	return (sum);
3173 }
3174 
3175 /*
3176  * Process a delayed payload checksum calculation.
3177  */
3178 void
3179 in6_delayed_cksum(struct mbuf *m, u_int8_t nxt)
3180 {
3181 	int nxtp, offset;
3182 	u_int16_t csum;
3183 
3184 	offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxtp);
3185 	if (offset <= 0 || nxtp != nxt)
3186 		/* If the desired next protocol isn't found, punt. */
3187 		return;
3188 	csum = (u_int16_t)(in6_cksum(m, 0, offset, m->m_pkthdr.len - offset));
3189 
3190 	switch (nxt) {
3191 	case IPPROTO_TCP:
3192 		offset += offsetof(struct tcphdr, th_sum);
3193 		break;
3194 
3195 	case IPPROTO_UDP:
3196 		offset += offsetof(struct udphdr, uh_sum);
3197 		if (csum == 0)
3198 			csum = 0xffff;
3199 		break;
3200 
3201 	case IPPROTO_ICMPV6:
3202 		offset += offsetof(struct icmp6_hdr, icmp6_cksum);
3203 		break;
3204 	}
3205 
3206 	if ((offset + sizeof(u_int16_t)) > m->m_len)
3207 		m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT);
3208 	else
3209 		*(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
3210 }
3211 
3212 void
3213 in6_proto_cksum_out(struct mbuf *m, struct ifnet *ifp)
3214 {
3215 	/* some hw and in6_delayed_cksum need the pseudo header cksum */
3216 	if (m->m_pkthdr.csum_flags &
3217 	    (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) {
3218 		struct ip6_hdr *ip6;
3219 		int nxt, offset;
3220 		u_int16_t csum;
3221 
3222 		ip6 = mtod(m, struct ip6_hdr *);
3223 		offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
3224 		csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst,
3225 		    htonl(m->m_pkthdr.len - offset), htonl(nxt));
3226 		if (nxt == IPPROTO_TCP)
3227 			offset += offsetof(struct tcphdr, th_sum);
3228 		else if (nxt == IPPROTO_UDP)
3229 			offset += offsetof(struct udphdr, uh_sum);
3230 		else if (nxt == IPPROTO_ICMPV6)
3231 			offset += offsetof(struct icmp6_hdr, icmp6_cksum);
3232 		if ((offset + sizeof(u_int16_t)) > m->m_len)
3233 			m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT);
3234 		else
3235 			*(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
3236 	}
3237 
3238 	if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
3239 		if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv6) ||
3240 		    ifp->if_bridgeport != NULL) {
3241 			tcpstat.tcps_outswcsum++;
3242 			in6_delayed_cksum(m, IPPROTO_TCP);
3243 			m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */
3244 		}
3245 	} else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
3246 		if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_UDPv6) ||
3247 		    ifp->if_bridgeport != NULL) {
3248 			udpstat.udps_outswcsum++;
3249 			in6_delayed_cksum(m, IPPROTO_UDP);
3250 			m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */
3251 		}
3252 	} else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) {
3253 		in6_delayed_cksum(m, IPPROTO_ICMPV6);
3254 		m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */
3255 	}
3256 }
3257