1 /* $OpenBSD: ip6_output.c,v 1.291 2024/04/17 20:48:51 bluhm Exp $ */ 2 /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 62 */ 63 64 #include "pf.h" 65 66 #include <sys/param.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/errno.h> 70 #include <sys/protosw.h> 71 #include <sys/socket.h> 72 #include <sys/socketvar.h> 73 #include <sys/proc.h> 74 #include <sys/systm.h> 75 76 #include <net/if.h> 77 #include <net/if_var.h> 78 #include <net/if_enc.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 #include <netinet/ip.h> 83 #include <netinet/in_pcb.h> 84 #include <netinet/udp.h> 85 #include <netinet/tcp.h> 86 87 #include <netinet/ip_var.h> 88 #include <netinet/tcp_timer.h> 89 #include <netinet/tcp_var.h> 90 #include <netinet/udp_var.h> 91 92 #include <netinet6/in6_var.h> 93 #include <netinet/ip6.h> 94 #include <netinet/icmp6.h> 95 #include <netinet6/ip6_var.h> 96 #include <netinet6/nd6.h> 97 98 #include <crypto/idgen.h> 99 100 #if NPF > 0 101 #include <net/pfvar.h> 102 #endif 103 104 #ifdef IPSEC 105 #include <netinet/ip_ipsp.h> 106 #include <netinet/ip_ah.h> 107 #include <netinet/ip_esp.h> 108 109 #ifdef ENCDEBUG 110 #define DPRINTF(fmt, args...) \ 111 do { \ 112 if (encdebug) \ 113 printf("%s: " fmt "\n", __func__, ## args); \ 114 } while (0) 115 #else 116 #define DPRINTF(fmt, args...) \ 117 do { } while (0) 118 #endif 119 #endif /* IPSEC */ 120 121 struct ip6_exthdrs { 122 struct mbuf *ip6e_ip6; 123 struct mbuf *ip6e_hbh; 124 struct mbuf *ip6e_dest1; 125 struct mbuf *ip6e_rthdr; 126 struct mbuf *ip6e_dest2; 127 }; 128 129 int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int, int); 130 int ip6_getpcbopt(struct ip6_pktopts *, int, struct mbuf *); 131 int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int, int, int); 132 int ip6_setmoptions(int, struct ip6_moptions **, struct mbuf *, unsigned int); 133 int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf *); 134 int ip6_copyexthdr(struct mbuf **, caddr_t, int); 135 int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, 136 struct ip6_frag **); 137 int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); 138 int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); 139 int ip6_getpmtu(struct rtentry *, struct ifnet *, u_long *); 140 int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *); 141 static __inline u_int16_t __attribute__((__unused__)) 142 in6_cksum_phdr(const struct in6_addr *, const struct in6_addr *, 143 u_int32_t, u_int32_t); 144 void in6_delayed_cksum(struct mbuf *, u_int8_t); 145 146 int ip6_output_ipsec_pmtu_update(struct tdb *, struct route *, 147 struct in6_addr *, int, int, int); 148 149 /* Context for non-repeating IDs */ 150 struct idgen32_ctx ip6_id_ctx; 151 152 /* 153 * IP6 output. The packet in mbuf chain m contains a skeletal IP6 154 * header (with pri, len, nxt, hlim, src, dst). 155 * This function may modify ver and hlim only. 156 * The mbuf chain containing the packet will be freed. 157 * The mbuf opt, if present, will not be freed. 158 * 159 * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int. 160 * We use u_long to hold largest one, * which is rt_mtu. 161 */ 162 int 163 ip6_output(struct mbuf *m, struct ip6_pktopts *opt, struct route *ro, 164 int flags, struct ip6_moptions *im6o, const struct ipsec_level *seclevel) 165 { 166 struct ip6_hdr *ip6; 167 struct ifnet *ifp = NULL; 168 struct mbuf_list ml; 169 int hlen, tlen; 170 struct route iproute; 171 struct rtentry *rt = NULL; 172 struct sockaddr_in6 *dst; 173 int error = 0; 174 u_long mtu; 175 int dontfrag; 176 u_int16_t src_scope, dst_scope; 177 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; 178 struct ip6_exthdrs exthdrs; 179 struct in6_addr finaldst; 180 struct route *ro_pmtu = NULL; 181 int hdrsplit = 0; 182 u_int8_t sproto = 0; 183 u_char nextproto; 184 #ifdef IPSEC 185 struct tdb *tdb = NULL; 186 #endif /* IPSEC */ 187 188 ip6 = mtod(m, struct ip6_hdr *); 189 finaldst = ip6->ip6_dst; 190 191 #define MAKE_EXTHDR(hp, mp) \ 192 do { \ 193 if (hp) { \ 194 struct ip6_ext *eh = (struct ip6_ext *)(hp); \ 195 error = ip6_copyexthdr((mp), (caddr_t)(hp), \ 196 ((eh)->ip6e_len + 1) << 3); \ 197 if (error) \ 198 goto freehdrs; \ 199 } \ 200 } while (0) 201 202 bzero(&exthdrs, sizeof(exthdrs)); 203 204 if (opt) { 205 /* Hop-by-Hop options header */ 206 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); 207 /* Destination options header(1st part) */ 208 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); 209 /* Routing header */ 210 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); 211 /* Destination options header(2nd part) */ 212 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); 213 } 214 215 #ifdef IPSEC 216 if (ipsec_in_use || seclevel != NULL) { 217 error = ip6_output_ipsec_lookup(m, seclevel, &tdb); 218 if (error) { 219 /* 220 * -EINVAL is used to indicate that the packet should 221 * be silently dropped, typically because we've asked 222 * key management for an SA. 223 */ 224 if (error == -EINVAL) /* Should silently drop packet */ 225 error = 0; 226 227 goto freehdrs; 228 } 229 } 230 #endif /* IPSEC */ 231 232 /* 233 * Calculate the total length of the extension header chain. 234 * Keep the length of the unfragmentable part for fragmentation. 235 */ 236 optlen = 0; 237 if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len; 238 if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len; 239 if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len; 240 unfragpartlen = optlen + sizeof(struct ip6_hdr); 241 /* NOTE: we don't add AH/ESP length here. do that later. */ 242 if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; 243 244 /* 245 * If we need IPsec, or there is at least one extension header, 246 * separate IP6 header from the payload. 247 */ 248 if ((sproto || optlen) && !hdrsplit) { 249 if ((error = ip6_splithdr(m, &exthdrs)) != 0) { 250 m = NULL; 251 goto freehdrs; 252 } 253 m = exthdrs.ip6e_ip6; 254 hdrsplit++; 255 } 256 257 /* adjust pointer */ 258 ip6 = mtod(m, struct ip6_hdr *); 259 260 /* adjust mbuf packet header length */ 261 m->m_pkthdr.len += optlen; 262 plen = m->m_pkthdr.len - sizeof(*ip6); 263 264 /* If this is a jumbo payload, insert a jumbo payload option. */ 265 if (plen > IPV6_MAXPACKET) { 266 if (!hdrsplit) { 267 if ((error = ip6_splithdr(m, &exthdrs)) != 0) { 268 m = NULL; 269 goto freehdrs; 270 } 271 m = exthdrs.ip6e_ip6; 272 hdrsplit++; 273 } 274 /* adjust pointer */ 275 ip6 = mtod(m, struct ip6_hdr *); 276 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) 277 goto freehdrs; 278 ip6->ip6_plen = 0; 279 } else 280 ip6->ip6_plen = htons(plen); 281 282 /* 283 * Concatenate headers and fill in next header fields. 284 * Here we have, on "m" 285 * IPv6 payload 286 * and we insert headers accordingly. Finally, we should be getting: 287 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] 288 * 289 * during the header composing process, "m" points to IPv6 header. 290 * "mprev" points to an extension header prior to esp. 291 */ 292 { 293 u_char *nexthdrp = &ip6->ip6_nxt; 294 struct mbuf *mprev = m; 295 296 /* 297 * we treat dest2 specially. this makes IPsec processing 298 * much easier. the goal here is to make mprev point the 299 * mbuf prior to dest2. 300 * 301 * result: IPv6 dest2 payload 302 * m and mprev will point to IPv6 header. 303 */ 304 if (exthdrs.ip6e_dest2) { 305 if (!hdrsplit) 306 panic("%s: assumption failed: hdr not split", 307 __func__); 308 exthdrs.ip6e_dest2->m_next = m->m_next; 309 m->m_next = exthdrs.ip6e_dest2; 310 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; 311 ip6->ip6_nxt = IPPROTO_DSTOPTS; 312 } 313 314 #define MAKE_CHAIN(m, mp, p, i)\ 315 do {\ 316 if (m) {\ 317 if (!hdrsplit) \ 318 panic("assumption failed: hdr not split"); \ 319 *mtod((m), u_char *) = *(p);\ 320 *(p) = (i);\ 321 p = mtod((m), u_char *);\ 322 (m)->m_next = (mp)->m_next;\ 323 (mp)->m_next = (m);\ 324 (mp) = (m);\ 325 }\ 326 } while (0) 327 /* 328 * result: IPv6 hbh dest1 rthdr dest2 payload 329 * m will point to IPv6 header. mprev will point to the 330 * extension header prior to dest2 (rthdr in the above case). 331 */ 332 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); 333 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, 334 IPPROTO_DSTOPTS); 335 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, 336 IPPROTO_ROUTING); 337 } 338 339 /* 340 * If there is a routing header, replace the destination address field 341 * with the first hop of the routing header. 342 */ 343 if (exthdrs.ip6e_rthdr) { 344 struct ip6_rthdr *rh; 345 struct ip6_rthdr0 *rh0; 346 struct in6_addr *addr; 347 348 rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr, 349 struct ip6_rthdr *)); 350 switch (rh->ip6r_type) { 351 case IPV6_RTHDR_TYPE_0: 352 rh0 = (struct ip6_rthdr0 *)rh; 353 addr = (struct in6_addr *)(rh0 + 1); 354 ip6->ip6_dst = addr[0]; 355 bcopy(&addr[1], &addr[0], 356 sizeof(struct in6_addr) * (rh0->ip6r0_segleft - 1)); 357 addr[rh0->ip6r0_segleft - 1] = finaldst; 358 break; 359 default: /* is it possible? */ 360 error = EINVAL; 361 goto bad; 362 } 363 } 364 365 /* Source address validation */ 366 if (!(flags & IPV6_UNSPECSRC) && 367 IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { 368 /* 369 * XXX: we can probably assume validation in the caller, but 370 * we explicitly check the address here for safety. 371 */ 372 error = EOPNOTSUPP; 373 ip6stat_inc(ip6s_badscope); 374 goto bad; 375 } 376 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { 377 error = EOPNOTSUPP; 378 ip6stat_inc(ip6s_badscope); 379 goto bad; 380 } 381 382 ip6stat_inc(ip6s_localout); 383 384 /* 385 * Route packet. 386 */ 387 #if NPF > 0 388 reroute: 389 #endif 390 391 /* initialize cached route */ 392 if (ro == NULL) { 393 ro = &iproute; 394 ro->ro_rt = NULL; 395 } 396 ro_pmtu = ro; 397 if (opt && opt->ip6po_rthdr) 398 ro = &opt->ip6po_route; 399 dst = &ro->ro_dstsin6; 400 401 /* 402 * if specified, try to fill in the traffic class field. 403 * do not override if a non-zero value is already set. 404 * we check the diffserv field and the ecn field separately. 405 */ 406 if (opt && opt->ip6po_tclass >= 0) { 407 int mask = 0; 408 409 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) 410 mask |= 0xfc; 411 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) 412 mask |= 0x03; 413 if (mask != 0) 414 ip6->ip6_flow |= 415 htonl((opt->ip6po_tclass & mask) << 20); 416 } 417 418 /* fill in or override the hop limit field, if necessary. */ 419 if (opt && opt->ip6po_hlim != -1) 420 ip6->ip6_hlim = opt->ip6po_hlim & 0xff; 421 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 422 if (im6o != NULL) 423 ip6->ip6_hlim = im6o->im6o_hlim; 424 else 425 ip6->ip6_hlim = ip6_defmcasthlim; 426 } 427 428 #ifdef IPSEC 429 if (tdb != NULL) { 430 /* 431 * XXX what should we do if ip6_hlim == 0 and the 432 * packet gets tunneled? 433 */ 434 /* 435 * if we are source-routing, do not attempt to tunnel the 436 * packet just because ip6_dst is different from what tdb has. 437 * XXX 438 */ 439 error = ip6_output_ipsec_send(tdb, m, ro, 440 exthdrs.ip6e_rthdr ? 1 : 0, 0); 441 goto done; 442 } 443 #endif /* IPSEC */ 444 445 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 446 struct in6_pktinfo *pi = NULL; 447 448 /* 449 * If the caller specify the outgoing interface 450 * explicitly, use it. 451 */ 452 if (opt != NULL && (pi = opt->ip6po_pktinfo) != NULL) 453 ifp = if_get(pi->ipi6_ifindex); 454 455 if (ifp == NULL && im6o != NULL) 456 ifp = if_get(im6o->im6o_ifidx); 457 } 458 459 if (ifp == NULL) { 460 rt = in6_selectroute(&ip6->ip6_dst, opt, ro, 461 m->m_pkthdr.ph_rtableid); 462 if (rt == NULL) { 463 ip6stat_inc(ip6s_noroute); 464 error = EHOSTUNREACH; 465 goto bad; 466 } 467 if (ISSET(rt->rt_flags, RTF_LOCAL)) 468 ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid)); 469 else 470 ifp = if_get(rt->rt_ifidx); 471 /* 472 * We aren't using rtisvalid() here because the UP/DOWN state 473 * machine is broken with some Ethernet drivers like em(4). 474 * As a result we might try to use an invalid cached route 475 * entry while an interface is being detached. 476 */ 477 if (ifp == NULL) { 478 ip6stat_inc(ip6s_noroute); 479 error = EHOSTUNREACH; 480 goto bad; 481 } 482 } else { 483 route6_cache(ro, &ip6->ip6_dst, NULL, m->m_pkthdr.ph_rtableid); 484 } 485 486 if (rt && (rt->rt_flags & RTF_GATEWAY) && 487 !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) 488 dst = satosin6(rt->rt_gateway); 489 490 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 491 /* Unicast */ 492 493 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ 494 } else { 495 /* Multicast */ 496 497 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; 498 499 /* 500 * Confirm that the outgoing interface supports multicast. 501 */ 502 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 503 ip6stat_inc(ip6s_noroute); 504 error = ENETUNREACH; 505 goto bad; 506 } 507 508 if ((im6o == NULL || im6o->im6o_loop) && 509 in6_hasmulti(&ip6->ip6_dst, ifp)) { 510 /* 511 * If we belong to the destination multicast group 512 * on the outgoing interface, and the caller did not 513 * forbid loopback, loop back a copy. 514 * Can't defer TCP/UDP checksumming, do the 515 * computation now. 516 */ 517 in6_proto_cksum_out(m, NULL); 518 ip6_mloopback(ifp, m, dst); 519 } 520 #ifdef MROUTING 521 else { 522 /* 523 * If we are acting as a multicast router, perform 524 * multicast forwarding as if the packet had just 525 * arrived on the interface to which we are about 526 * to send. The multicast forwarding function 527 * recursively calls this function, using the 528 * IPV6_FORWARDING flag to prevent infinite recursion. 529 * 530 * Multicasts that are looped back by ip6_mloopback(), 531 * above, will be forwarded by the ip6_input() routine, 532 * if necessary. 533 */ 534 if (ip6_mforwarding && ip6_mrouter[ifp->if_rdomain] && 535 (flags & IPV6_FORWARDING) == 0) { 536 if (ip6_mforward(ip6, ifp, m) != 0) { 537 m_freem(m); 538 goto done; 539 } 540 } 541 } 542 #endif 543 /* 544 * Multicasts with a hoplimit of zero may be looped back, 545 * above, but must not be transmitted on a network. 546 * Also, multicasts addressed to the loopback interface 547 * are not sent -- the above call to ip6_mloopback() will 548 * loop back a copy if this host actually belongs to the 549 * destination group on the loopback interface. 550 */ 551 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) || 552 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) { 553 m_freem(m); 554 goto done; 555 } 556 } 557 558 /* 559 * If this packet is going through a loopback interface we won't 560 * be able to restore its scope ID using the interface index. 561 */ 562 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { 563 if (ifp->if_flags & IFF_LOOPBACK) 564 src_scope = ip6->ip6_src.s6_addr16[1]; 565 ip6->ip6_src.s6_addr16[1] = 0; 566 } 567 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { 568 if (ifp->if_flags & IFF_LOOPBACK) 569 dst_scope = ip6->ip6_dst.s6_addr16[1]; 570 ip6->ip6_dst.s6_addr16[1] = 0; 571 } 572 573 /* Determine path MTU. */ 574 if ((error = ip6_getpmtu(ro_pmtu->ro_rt, ifp, &mtu)) != 0) 575 goto bad; 576 577 /* 578 * The caller of this function may specify to use the minimum MTU 579 * in some cases. 580 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU 581 * setting. The logic is a bit complicated; by default, unicast 582 * packets will follow path MTU while multicast packets will be sent at 583 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets 584 * including unicast ones will be sent at the minimum MTU. Multicast 585 * packets will always be sent at the minimum MTU unless 586 * IP6PO_MINMTU_DISABLE is explicitly specified. 587 * See RFC 3542 for more details. 588 */ 589 if (mtu > IPV6_MMTU) { 590 if ((flags & IPV6_MINMTU)) 591 mtu = IPV6_MMTU; 592 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) 593 mtu = IPV6_MMTU; 594 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && (opt == NULL || 595 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { 596 mtu = IPV6_MMTU; 597 } 598 } 599 600 /* 601 * If the outgoing packet contains a hop-by-hop options header, 602 * it must be examined and processed even by the source node. 603 * (RFC 2460, section 4.) 604 */ 605 if (exthdrs.ip6e_hbh) { 606 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); 607 u_int32_t rtalert; /* returned value is ignored */ 608 u_int32_t plen = 0; /* no more than 1 jumbo payload option! */ 609 610 m->m_pkthdr.ph_ifidx = ifp->if_index; 611 if (ip6_process_hopopts(&m, (u_int8_t *)(hbh + 1), 612 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), 613 &rtalert, &plen) < 0) { 614 /* m was already freed at this point */ 615 error = EINVAL;/* better error? */ 616 goto done; 617 } 618 m->m_pkthdr.ph_ifidx = 0; 619 } 620 621 #if NPF > 0 622 if (pf_test(AF_INET6, PF_OUT, ifp, &m) != PF_PASS) { 623 error = EACCES; 624 m_freem(m); 625 goto done; 626 } 627 if (m == NULL) 628 goto done; 629 ip6 = mtod(m, struct ip6_hdr *); 630 if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) == 631 (PF_TAG_REROUTE | PF_TAG_GENERATED)) { 632 /* already rerun the route lookup, go on */ 633 m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE); 634 } else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) { 635 /* tag as generated to skip over pf_test on rerun */ 636 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 637 finaldst = ip6->ip6_dst; 638 if (ro == &iproute) 639 rtfree(ro->ro_rt); 640 ro = NULL; 641 if_put(ifp); /* drop reference since destination changed */ 642 ifp = NULL; 643 goto reroute; 644 } 645 #endif 646 647 /* 648 * If the packet is not going on the wire it can be destined 649 * to any local address. In this case do not clear its scopes 650 * to let ip6_input() find a matching local route. 651 */ 652 if (ifp->if_flags & IFF_LOOPBACK) { 653 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) 654 ip6->ip6_src.s6_addr16[1] = src_scope; 655 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) 656 ip6->ip6_dst.s6_addr16[1] = dst_scope; 657 } 658 659 /* 660 * Send the packet to the outgoing interface. 661 * If necessary, do IPv6 fragmentation before sending. 662 * 663 * the logic here is rather complex: 664 * 1: normal case (dontfrag == 0) 665 * 1-a: send as is if tlen <= path mtu 666 * 1-b: fragment if tlen > path mtu 667 * 668 * 2: if user asks us not to fragment (dontfrag == 1) 669 * 2-a: send as is if tlen <= interface mtu 670 * 2-b: error if tlen > interface mtu 671 */ 672 tlen = ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) ? 673 m->m_pkthdr.ph_mss : m->m_pkthdr.len; 674 675 if (ISSET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT)) { 676 CLR(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 677 dontfrag = 1; 678 } else if (opt && ISSET(opt->ip6po_flags, IP6PO_DONTFRAG)) 679 dontfrag = 1; 680 else 681 dontfrag = 0; 682 683 if (dontfrag && tlen > ifp->if_mtu) { /* case 2-b */ 684 #ifdef IPSEC 685 if (ip_mtudisc) 686 ipsec_adjust_mtu(m, mtu); 687 #endif 688 error = EMSGSIZE; 689 goto bad; 690 } 691 692 /* 693 * transmit packet without fragmentation 694 */ 695 if (dontfrag || tlen <= mtu) { /* case 1-a and 2-a */ 696 error = if_output_tso(ifp, &m, sin6tosa(dst), ro->ro_rt, 697 ifp->if_mtu); 698 if (error || m == NULL) 699 goto done; 700 goto bad; /* should not happen */ 701 } 702 703 /* 704 * try to fragment the packet. case 1-b 705 */ 706 if (mtu < IPV6_MMTU) { 707 /* path MTU cannot be less than IPV6_MMTU */ 708 error = EMSGSIZE; 709 goto bad; 710 } else if (ip6->ip6_plen == 0) { 711 /* jumbo payload cannot be fragmented */ 712 error = EMSGSIZE; 713 goto bad; 714 } 715 716 /* 717 * Too large for the destination or interface; 718 * fragment if possible. 719 * Must be able to put at least 8 bytes per fragment. 720 */ 721 hlen = unfragpartlen; 722 if (mtu > IPV6_MAXPACKET) 723 mtu = IPV6_MAXPACKET; 724 725 /* 726 * If we are doing fragmentation, we can't defer TCP/UDP 727 * checksumming; compute the checksum and clear the flag. 728 */ 729 in6_proto_cksum_out(m, NULL); 730 731 /* 732 * Change the next header field of the last header in the 733 * unfragmentable part. 734 */ 735 if (exthdrs.ip6e_rthdr) { 736 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); 737 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; 738 } else if (exthdrs.ip6e_dest1) { 739 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); 740 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; 741 } else if (exthdrs.ip6e_hbh) { 742 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); 743 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; 744 } else { 745 nextproto = ip6->ip6_nxt; 746 ip6->ip6_nxt = IPPROTO_FRAGMENT; 747 } 748 749 if ((error = ip6_fragment(m, &ml, hlen, nextproto, mtu)) || 750 (error = if_output_ml(ifp, &ml, sin6tosa(dst), ro->ro_rt))) 751 goto done; 752 ip6stat_inc(ip6s_fragmented); 753 goto done; 754 755 freehdrs: 756 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ 757 m_freem(exthdrs.ip6e_dest1); 758 m_freem(exthdrs.ip6e_rthdr); 759 m_freem(exthdrs.ip6e_dest2); 760 bad: 761 m_freem(m); 762 done: 763 if (ro == &iproute) 764 rtfree(ro->ro_rt); 765 else if (ro_pmtu == &iproute) 766 rtfree(ro_pmtu->ro_rt); 767 if_put(ifp); 768 #ifdef IPSEC 769 tdb_unref(tdb); 770 #endif /* IPSEC */ 771 return (error); 772 } 773 774 int 775 ip6_fragment(struct mbuf *m0, struct mbuf_list *ml, int hlen, u_char nextproto, 776 u_long mtu) 777 { 778 struct ip6_hdr *ip6; 779 u_int32_t id; 780 int tlen, len, off; 781 int error; 782 783 ml_init(ml); 784 785 ip6 = mtod(m0, struct ip6_hdr *); 786 tlen = m0->m_pkthdr.len; 787 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; 788 if (len < 8) { 789 error = EMSGSIZE; 790 goto bad; 791 } 792 id = htonl(ip6_randomid()); 793 794 /* 795 * Loop through length of payload, 796 * make new header and copy data of each part and link onto chain. 797 */ 798 for (off = hlen; off < tlen; off += len) { 799 struct mbuf *m; 800 struct mbuf *mlast; 801 struct ip6_hdr *mhip6; 802 struct ip6_frag *ip6f; 803 804 MGETHDR(m, M_DONTWAIT, MT_HEADER); 805 if (m == NULL) { 806 error = ENOBUFS; 807 goto bad; 808 } 809 ml_enqueue(ml, m); 810 if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0) 811 goto bad; 812 m->m_data += max_linkhdr; 813 mhip6 = mtod(m, struct ip6_hdr *); 814 *mhip6 = *ip6; 815 m->m_len = sizeof(struct ip6_hdr); 816 817 if ((error = ip6_insertfraghdr(m0, m, hlen, &ip6f)) != 0) 818 goto bad; 819 ip6f->ip6f_offlg = htons((off - hlen) & ~7); 820 if (off + len >= tlen) 821 len = tlen - off; 822 else 823 ip6f->ip6f_offlg |= IP6F_MORE_FRAG; 824 825 m->m_pkthdr.len = hlen + sizeof(struct ip6_frag) + len; 826 mhip6->ip6_plen = htons(m->m_pkthdr.len - 827 sizeof(struct ip6_hdr)); 828 for (mlast = m; mlast->m_next; mlast = mlast->m_next) 829 ; 830 mlast->m_next = m_copym(m0, off, len, M_DONTWAIT); 831 if (mlast->m_next == NULL) { 832 error = ENOBUFS; 833 goto bad; 834 } 835 836 ip6f->ip6f_reserved = 0; 837 ip6f->ip6f_ident = id; 838 ip6f->ip6f_nxt = nextproto; 839 } 840 841 ip6stat_add(ip6s_ofragments, ml_len(ml)); 842 m_freem(m0); 843 return (0); 844 845 bad: 846 ip6stat_inc(ip6s_odropped); 847 ml_purge(ml); 848 m_freem(m0); 849 return (error); 850 } 851 852 int 853 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen) 854 { 855 struct mbuf *m; 856 857 if (hlen > MCLBYTES) 858 return (ENOBUFS); /* XXX */ 859 860 MGET(m, M_DONTWAIT, MT_DATA); 861 if (!m) 862 return (ENOBUFS); 863 864 if (hlen > MLEN) { 865 MCLGET(m, M_DONTWAIT); 866 if ((m->m_flags & M_EXT) == 0) { 867 m_free(m); 868 return (ENOBUFS); 869 } 870 } 871 m->m_len = hlen; 872 if (hdr) 873 memcpy(mtod(m, caddr_t), hdr, hlen); 874 875 *mp = m; 876 return (0); 877 } 878 879 /* 880 * Insert jumbo payload option. 881 */ 882 int 883 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) 884 { 885 struct mbuf *mopt; 886 u_int8_t *optbuf; 887 u_int32_t v; 888 889 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ 890 891 /* 892 * If there is no hop-by-hop options header, allocate new one. 893 * If there is one but it doesn't have enough space to store the 894 * jumbo payload option, allocate a cluster to store the whole options. 895 * Otherwise, use it to store the options. 896 */ 897 if (exthdrs->ip6e_hbh == 0) { 898 MGET(mopt, M_DONTWAIT, MT_DATA); 899 if (mopt == NULL) 900 return (ENOBUFS); 901 mopt->m_len = JUMBOOPTLEN; 902 optbuf = mtod(mopt, u_int8_t *); 903 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ 904 exthdrs->ip6e_hbh = mopt; 905 } else { 906 struct ip6_hbh *hbh; 907 908 mopt = exthdrs->ip6e_hbh; 909 if (m_trailingspace(mopt) < JUMBOOPTLEN) { 910 /* 911 * XXX assumption: 912 * - exthdrs->ip6e_hbh is not referenced from places 913 * other than exthdrs. 914 * - exthdrs->ip6e_hbh is not an mbuf chain. 915 */ 916 int oldoptlen = mopt->m_len; 917 struct mbuf *n; 918 919 /* 920 * XXX: give up if the whole (new) hbh header does 921 * not fit even in an mbuf cluster. 922 */ 923 if (oldoptlen + JUMBOOPTLEN > MCLBYTES) 924 return (ENOBUFS); 925 926 /* 927 * As a consequence, we must always prepare a cluster 928 * at this point. 929 */ 930 MGET(n, M_DONTWAIT, MT_DATA); 931 if (n) { 932 MCLGET(n, M_DONTWAIT); 933 if ((n->m_flags & M_EXT) == 0) { 934 m_freem(n); 935 n = NULL; 936 } 937 } 938 if (!n) 939 return (ENOBUFS); 940 n->m_len = oldoptlen + JUMBOOPTLEN; 941 memcpy(mtod(n, caddr_t), mtod(mopt, caddr_t), 942 oldoptlen); 943 optbuf = mtod(n, u_int8_t *) + oldoptlen; 944 m_freem(mopt); 945 mopt = exthdrs->ip6e_hbh = n; 946 } else { 947 optbuf = mtod(mopt, u_int8_t *) + mopt->m_len; 948 mopt->m_len += JUMBOOPTLEN; 949 } 950 optbuf[0] = IP6OPT_PADN; 951 optbuf[1] = 0; 952 953 /* 954 * Adjust the header length according to the pad and 955 * the jumbo payload option. 956 */ 957 hbh = mtod(mopt, struct ip6_hbh *); 958 hbh->ip6h_len += (JUMBOOPTLEN >> 3); 959 } 960 961 /* fill in the option. */ 962 optbuf[2] = IP6OPT_JUMBO; 963 optbuf[3] = 4; 964 v = (u_int32_t)htonl(plen + JUMBOOPTLEN); 965 memcpy(&optbuf[4], &v, sizeof(u_int32_t)); 966 967 /* finally, adjust the packet header length */ 968 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; 969 970 return (0); 971 #undef JUMBOOPTLEN 972 } 973 974 /* 975 * Insert fragment header and copy unfragmentable header portions. 976 */ 977 int 978 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, 979 struct ip6_frag **frghdrp) 980 { 981 struct mbuf *n, *mlast; 982 983 if (hlen > sizeof(struct ip6_hdr)) { 984 n = m_copym(m0, sizeof(struct ip6_hdr), 985 hlen - sizeof(struct ip6_hdr), M_DONTWAIT); 986 if (n == NULL) 987 return (ENOBUFS); 988 m->m_next = n; 989 } else 990 n = m; 991 992 /* Search for the last mbuf of unfragmentable part. */ 993 for (mlast = n; mlast->m_next; mlast = mlast->m_next) 994 ; 995 996 if ((mlast->m_flags & M_EXT) == 0 && 997 m_trailingspace(mlast) >= sizeof(struct ip6_frag)) { 998 /* use the trailing space of the last mbuf for fragment hdr */ 999 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + 1000 mlast->m_len); 1001 mlast->m_len += sizeof(struct ip6_frag); 1002 m->m_pkthdr.len += sizeof(struct ip6_frag); 1003 } else { 1004 /* allocate a new mbuf for the fragment header */ 1005 struct mbuf *mfrg; 1006 1007 MGET(mfrg, M_DONTWAIT, MT_DATA); 1008 if (mfrg == NULL) 1009 return (ENOBUFS); 1010 mfrg->m_len = sizeof(struct ip6_frag); 1011 *frghdrp = mtod(mfrg, struct ip6_frag *); 1012 mlast->m_next = mfrg; 1013 } 1014 1015 return (0); 1016 } 1017 1018 int 1019 ip6_getpmtu(struct rtentry *rt, struct ifnet *ifp, u_long *mtup) 1020 { 1021 u_int32_t mtu = 0; 1022 int error = 0; 1023 1024 if (rt != NULL) { 1025 mtu = rt->rt_mtu; 1026 if (mtu == 0) 1027 mtu = ifp->if_mtu; 1028 else if (mtu < IPV6_MMTU) { 1029 /* RFC8021 IPv6 Atomic Fragments Considered Harmful */ 1030 mtu = IPV6_MMTU; 1031 } else if (mtu > ifp->if_mtu) { 1032 /* 1033 * The MTU on the route is larger than the MTU on 1034 * the interface! This shouldn't happen, unless the 1035 * MTU of the interface has been changed after the 1036 * interface was brought up. Change the MTU in the 1037 * route to match the interface MTU (as long as the 1038 * field isn't locked). 1039 */ 1040 mtu = ifp->if_mtu; 1041 if (!(rt->rt_locks & RTV_MTU)) 1042 rt->rt_mtu = mtu; 1043 } 1044 } else { 1045 mtu = ifp->if_mtu; 1046 } 1047 1048 *mtup = mtu; 1049 return (error); 1050 } 1051 1052 /* 1053 * IP6 socket option processing. 1054 */ 1055 int 1056 ip6_ctloutput(int op, struct socket *so, int level, int optname, 1057 struct mbuf *m) 1058 { 1059 int privileged, optdatalen, uproto; 1060 void *optdata; 1061 struct inpcb *inp = sotoinpcb(so); 1062 int error, optval; 1063 struct proc *p = curproc; /* For IPsec and rdomain */ 1064 u_int rtableid, rtid = 0; 1065 1066 error = optval = 0; 1067 1068 privileged = (inp->inp_socket->so_state & SS_PRIV); 1069 uproto = (int)so->so_proto->pr_protocol; 1070 1071 if (level != IPPROTO_IPV6) 1072 return (EINVAL); 1073 1074 rtableid = p->p_p->ps_rtableid; 1075 1076 switch (op) { 1077 case PRCO_SETOPT: 1078 switch (optname) { 1079 /* 1080 * Use of some Hop-by-Hop options or some 1081 * Destination options, might require special 1082 * privilege. That is, normal applications 1083 * (without special privilege) might be forbidden 1084 * from setting certain options in outgoing packets, 1085 * and might never see certain options in received 1086 * packets. [RFC 2292 Section 6] 1087 * KAME specific note: 1088 * KAME prevents non-privileged users from sending or 1089 * receiving ANY hbh/dst options in order to avoid 1090 * overhead of parsing options in the kernel. 1091 */ 1092 case IPV6_RECVHOPOPTS: 1093 case IPV6_RECVDSTOPTS: 1094 if (!privileged) { 1095 error = EPERM; 1096 break; 1097 } 1098 /* FALLTHROUGH */ 1099 case IPV6_UNICAST_HOPS: 1100 case IPV6_MINHOPCOUNT: 1101 case IPV6_HOPLIMIT: 1102 1103 case IPV6_RECVPKTINFO: 1104 case IPV6_RECVHOPLIMIT: 1105 case IPV6_RECVRTHDR: 1106 case IPV6_RECVPATHMTU: 1107 case IPV6_RECVTCLASS: 1108 case IPV6_V6ONLY: 1109 case IPV6_AUTOFLOWLABEL: 1110 case IPV6_RECVDSTPORT: 1111 if (m == NULL || m->m_len != sizeof(int)) { 1112 error = EINVAL; 1113 break; 1114 } 1115 optval = *mtod(m, int *); 1116 switch (optname) { 1117 1118 case IPV6_UNICAST_HOPS: 1119 if (optval < -1 || optval >= 256) 1120 error = EINVAL; 1121 else { 1122 /* -1 = kernel default */ 1123 inp->inp_hops = optval; 1124 } 1125 break; 1126 1127 case IPV6_MINHOPCOUNT: 1128 if (optval < 0 || optval > 255) 1129 error = EINVAL; 1130 else 1131 inp->inp_ip6_minhlim = optval; 1132 break; 1133 1134 #define OPTSET(bit) \ 1135 do { \ 1136 if (optval) \ 1137 inp->inp_flags |= (bit); \ 1138 else \ 1139 inp->inp_flags &= ~(bit); \ 1140 } while (/*CONSTCOND*/ 0) 1141 #define OPTBIT(bit) (inp->inp_flags & (bit) ? 1 : 0) 1142 1143 case IPV6_RECVPKTINFO: 1144 OPTSET(IN6P_PKTINFO); 1145 break; 1146 1147 case IPV6_HOPLIMIT: 1148 { 1149 struct ip6_pktopts **optp; 1150 1151 optp = &inp->inp_outputopts6; 1152 error = ip6_pcbopt(IPV6_HOPLIMIT, 1153 (u_char *)&optval, sizeof(optval), optp, 1154 privileged, uproto); 1155 break; 1156 } 1157 1158 case IPV6_RECVHOPLIMIT: 1159 OPTSET(IN6P_HOPLIMIT); 1160 break; 1161 1162 case IPV6_RECVHOPOPTS: 1163 OPTSET(IN6P_HOPOPTS); 1164 break; 1165 1166 case IPV6_RECVDSTOPTS: 1167 OPTSET(IN6P_DSTOPTS); 1168 break; 1169 1170 case IPV6_RECVRTHDR: 1171 OPTSET(IN6P_RTHDR); 1172 break; 1173 1174 case IPV6_RECVPATHMTU: 1175 /* 1176 * We ignore this option for TCP 1177 * sockets. 1178 * (RFC3542 leaves this case 1179 * unspecified.) 1180 */ 1181 if (uproto != IPPROTO_TCP) 1182 OPTSET(IN6P_MTU); 1183 break; 1184 1185 case IPV6_V6ONLY: 1186 /* 1187 * make setsockopt(IPV6_V6ONLY) 1188 * available only prior to bind(2). 1189 * see ipng mailing list, Jun 22 2001. 1190 */ 1191 if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED( 1192 &inp->inp_laddr6)) { 1193 error = EINVAL; 1194 break; 1195 } 1196 /* No support for IPv4-mapped addresses. */ 1197 if (!optval) 1198 error = EINVAL; 1199 else 1200 error = 0; 1201 break; 1202 case IPV6_RECVTCLASS: 1203 OPTSET(IN6P_TCLASS); 1204 break; 1205 case IPV6_AUTOFLOWLABEL: 1206 OPTSET(IN6P_AUTOFLOWLABEL); 1207 break; 1208 1209 case IPV6_RECVDSTPORT: 1210 OPTSET(IN6P_RECVDSTPORT); 1211 break; 1212 } 1213 break; 1214 1215 case IPV6_TCLASS: 1216 case IPV6_DONTFRAG: 1217 case IPV6_USE_MIN_MTU: 1218 if (m == NULL || m->m_len != sizeof(optval)) { 1219 error = EINVAL; 1220 break; 1221 } 1222 optval = *mtod(m, int *); 1223 { 1224 struct ip6_pktopts **optp; 1225 optp = &inp->inp_outputopts6; 1226 error = ip6_pcbopt(optname, (u_char *)&optval, 1227 sizeof(optval), optp, privileged, uproto); 1228 break; 1229 } 1230 1231 case IPV6_PKTINFO: 1232 case IPV6_HOPOPTS: 1233 case IPV6_RTHDR: 1234 case IPV6_DSTOPTS: 1235 case IPV6_RTHDRDSTOPTS: 1236 { 1237 /* new advanced API (RFC3542) */ 1238 u_char *optbuf; 1239 int optbuflen; 1240 struct ip6_pktopts **optp; 1241 1242 if (m && m->m_next) { 1243 error = EINVAL; /* XXX */ 1244 break; 1245 } 1246 if (m) { 1247 optbuf = mtod(m, u_char *); 1248 optbuflen = m->m_len; 1249 } else { 1250 optbuf = NULL; 1251 optbuflen = 0; 1252 } 1253 optp = &inp->inp_outputopts6; 1254 error = ip6_pcbopt(optname, optbuf, optbuflen, optp, 1255 privileged, uproto); 1256 break; 1257 } 1258 #undef OPTSET 1259 1260 case IPV6_MULTICAST_IF: 1261 case IPV6_MULTICAST_HOPS: 1262 case IPV6_MULTICAST_LOOP: 1263 case IPV6_JOIN_GROUP: 1264 case IPV6_LEAVE_GROUP: 1265 error = ip6_setmoptions(optname, 1266 &inp->inp_moptions6, 1267 m, inp->inp_rtableid); 1268 break; 1269 1270 case IPV6_PORTRANGE: 1271 if (m == NULL || m->m_len != sizeof(int)) { 1272 error = EINVAL; 1273 break; 1274 } 1275 optval = *mtod(m, int *); 1276 1277 switch (optval) { 1278 case IPV6_PORTRANGE_DEFAULT: 1279 inp->inp_flags &= ~(IN6P_LOWPORT); 1280 inp->inp_flags &= ~(IN6P_HIGHPORT); 1281 break; 1282 1283 case IPV6_PORTRANGE_HIGH: 1284 inp->inp_flags &= ~(IN6P_LOWPORT); 1285 inp->inp_flags |= IN6P_HIGHPORT; 1286 break; 1287 1288 case IPV6_PORTRANGE_LOW: 1289 inp->inp_flags &= ~(IN6P_HIGHPORT); 1290 inp->inp_flags |= IN6P_LOWPORT; 1291 break; 1292 1293 default: 1294 error = EINVAL; 1295 break; 1296 } 1297 break; 1298 1299 case IPSEC6_OUTSA: 1300 error = EINVAL; 1301 break; 1302 1303 case IPV6_AUTH_LEVEL: 1304 case IPV6_ESP_TRANS_LEVEL: 1305 case IPV6_ESP_NETWORK_LEVEL: 1306 case IPV6_IPCOMP_LEVEL: 1307 #ifndef IPSEC 1308 error = EINVAL; 1309 #else 1310 if (m == NULL || m->m_len != sizeof(int)) { 1311 error = EINVAL; 1312 break; 1313 } 1314 optval = *mtod(m, int *); 1315 1316 if (optval < IPSEC_LEVEL_BYPASS || 1317 optval > IPSEC_LEVEL_UNIQUE) { 1318 error = EINVAL; 1319 break; 1320 } 1321 1322 switch (optname) { 1323 case IPV6_AUTH_LEVEL: 1324 if (optval < IPSEC_AUTH_LEVEL_DEFAULT && 1325 suser(p)) { 1326 error = EACCES; 1327 break; 1328 } 1329 inp->inp_seclevel.sl_auth = optval; 1330 break; 1331 1332 case IPV6_ESP_TRANS_LEVEL: 1333 if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT && 1334 suser(p)) { 1335 error = EACCES; 1336 break; 1337 } 1338 inp->inp_seclevel.sl_esp_trans = optval; 1339 break; 1340 1341 case IPV6_ESP_NETWORK_LEVEL: 1342 if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT && 1343 suser(p)) { 1344 error = EACCES; 1345 break; 1346 } 1347 inp->inp_seclevel.sl_esp_network = optval; 1348 break; 1349 1350 case IPV6_IPCOMP_LEVEL: 1351 if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT && 1352 suser(p)) { 1353 error = EACCES; 1354 break; 1355 } 1356 inp->inp_seclevel.sl_ipcomp = optval; 1357 break; 1358 } 1359 #endif 1360 break; 1361 case SO_RTABLE: 1362 if (m == NULL || m->m_len < sizeof(u_int)) { 1363 error = EINVAL; 1364 break; 1365 } 1366 rtid = *mtod(m, u_int *); 1367 if (inp->inp_rtableid == rtid) 1368 break; 1369 /* needs privileges to switch when already set */ 1370 if (rtableid != rtid && rtableid != 0 && 1371 (error = suser(p)) != 0) 1372 break; 1373 error = in_pcbset_rtableid(inp, rtid); 1374 break; 1375 case IPV6_PIPEX: 1376 if (m != NULL && m->m_len == sizeof(int)) 1377 inp->inp_pipex = *mtod(m, int *); 1378 else 1379 error = EINVAL; 1380 break; 1381 1382 default: 1383 error = ENOPROTOOPT; 1384 break; 1385 } 1386 break; 1387 1388 case PRCO_GETOPT: 1389 switch (optname) { 1390 1391 case IPV6_RECVHOPOPTS: 1392 case IPV6_RECVDSTOPTS: 1393 case IPV6_UNICAST_HOPS: 1394 case IPV6_MINHOPCOUNT: 1395 case IPV6_RECVPKTINFO: 1396 case IPV6_RECVHOPLIMIT: 1397 case IPV6_RECVRTHDR: 1398 case IPV6_RECVPATHMTU: 1399 1400 case IPV6_V6ONLY: 1401 case IPV6_PORTRANGE: 1402 case IPV6_RECVTCLASS: 1403 case IPV6_AUTOFLOWLABEL: 1404 case IPV6_RECVDSTPORT: 1405 switch (optname) { 1406 1407 case IPV6_RECVHOPOPTS: 1408 optval = OPTBIT(IN6P_HOPOPTS); 1409 break; 1410 1411 case IPV6_RECVDSTOPTS: 1412 optval = OPTBIT(IN6P_DSTOPTS); 1413 break; 1414 1415 case IPV6_UNICAST_HOPS: 1416 optval = inp->inp_hops; 1417 break; 1418 1419 case IPV6_MINHOPCOUNT: 1420 optval = inp->inp_ip6_minhlim; 1421 break; 1422 1423 case IPV6_RECVPKTINFO: 1424 optval = OPTBIT(IN6P_PKTINFO); 1425 break; 1426 1427 case IPV6_RECVHOPLIMIT: 1428 optval = OPTBIT(IN6P_HOPLIMIT); 1429 break; 1430 1431 case IPV6_RECVRTHDR: 1432 optval = OPTBIT(IN6P_RTHDR); 1433 break; 1434 1435 case IPV6_RECVPATHMTU: 1436 optval = OPTBIT(IN6P_MTU); 1437 break; 1438 1439 case IPV6_V6ONLY: 1440 optval = 1; 1441 break; 1442 1443 case IPV6_PORTRANGE: 1444 { 1445 int flags; 1446 flags = inp->inp_flags; 1447 if (flags & IN6P_HIGHPORT) 1448 optval = IPV6_PORTRANGE_HIGH; 1449 else if (flags & IN6P_LOWPORT) 1450 optval = IPV6_PORTRANGE_LOW; 1451 else 1452 optval = 0; 1453 break; 1454 } 1455 case IPV6_RECVTCLASS: 1456 optval = OPTBIT(IN6P_TCLASS); 1457 break; 1458 1459 case IPV6_AUTOFLOWLABEL: 1460 optval = OPTBIT(IN6P_AUTOFLOWLABEL); 1461 break; 1462 1463 case IPV6_RECVDSTPORT: 1464 optval = OPTBIT(IN6P_RECVDSTPORT); 1465 break; 1466 } 1467 if (error) 1468 break; 1469 m->m_len = sizeof(int); 1470 *mtod(m, int *) = optval; 1471 break; 1472 1473 case IPV6_PATHMTU: 1474 { 1475 u_long pmtu = 0; 1476 struct ip6_mtuinfo mtuinfo; 1477 struct ifnet *ifp; 1478 struct rtentry *rt; 1479 1480 if (!(so->so_state & SS_ISCONNECTED)) 1481 return (ENOTCONN); 1482 1483 rt = in6_pcbrtentry(inp); 1484 if (!rtisvalid(rt)) 1485 return (EHOSTUNREACH); 1486 1487 ifp = if_get(rt->rt_ifidx); 1488 if (ifp == NULL) 1489 return (EHOSTUNREACH); 1490 /* 1491 * XXX: we dot not consider the case of source 1492 * routing, or optional information to specify 1493 * the outgoing interface. 1494 */ 1495 error = ip6_getpmtu(rt, ifp, &pmtu); 1496 if_put(ifp); 1497 if (error) 1498 break; 1499 if (pmtu > IPV6_MAXPACKET) 1500 pmtu = IPV6_MAXPACKET; 1501 1502 bzero(&mtuinfo, sizeof(mtuinfo)); 1503 mtuinfo.ip6m_mtu = (u_int32_t)pmtu; 1504 optdata = (void *)&mtuinfo; 1505 optdatalen = sizeof(mtuinfo); 1506 if (optdatalen > MCLBYTES) 1507 return (EMSGSIZE); /* XXX */ 1508 if (optdatalen > MLEN) 1509 MCLGET(m, M_WAIT); 1510 m->m_len = optdatalen; 1511 bcopy(optdata, mtod(m, void *), optdatalen); 1512 break; 1513 } 1514 1515 case IPV6_PKTINFO: 1516 case IPV6_HOPOPTS: 1517 case IPV6_RTHDR: 1518 case IPV6_DSTOPTS: 1519 case IPV6_RTHDRDSTOPTS: 1520 case IPV6_TCLASS: 1521 case IPV6_DONTFRAG: 1522 case IPV6_USE_MIN_MTU: 1523 error = ip6_getpcbopt(inp->inp_outputopts6, 1524 optname, m); 1525 break; 1526 1527 case IPV6_MULTICAST_IF: 1528 case IPV6_MULTICAST_HOPS: 1529 case IPV6_MULTICAST_LOOP: 1530 case IPV6_JOIN_GROUP: 1531 case IPV6_LEAVE_GROUP: 1532 error = ip6_getmoptions(optname, 1533 inp->inp_moptions6, m); 1534 break; 1535 1536 case IPSEC6_OUTSA: 1537 error = EINVAL; 1538 break; 1539 1540 case IPV6_AUTH_LEVEL: 1541 case IPV6_ESP_TRANS_LEVEL: 1542 case IPV6_ESP_NETWORK_LEVEL: 1543 case IPV6_IPCOMP_LEVEL: 1544 #ifndef IPSEC 1545 m->m_len = sizeof(int); 1546 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1547 #else 1548 m->m_len = sizeof(int); 1549 switch (optname) { 1550 case IPV6_AUTH_LEVEL: 1551 optval = inp->inp_seclevel.sl_auth; 1552 break; 1553 1554 case IPV6_ESP_TRANS_LEVEL: 1555 optval = 1556 inp->inp_seclevel.sl_esp_trans; 1557 break; 1558 1559 case IPV6_ESP_NETWORK_LEVEL: 1560 optval = 1561 inp->inp_seclevel.sl_esp_network; 1562 break; 1563 1564 case IPV6_IPCOMP_LEVEL: 1565 optval = inp->inp_seclevel.sl_ipcomp; 1566 break; 1567 } 1568 *mtod(m, int *) = optval; 1569 #endif 1570 break; 1571 case SO_RTABLE: 1572 m->m_len = sizeof(u_int); 1573 *mtod(m, u_int *) = inp->inp_rtableid; 1574 break; 1575 case IPV6_PIPEX: 1576 m->m_len = sizeof(int); 1577 *mtod(m, int *) = inp->inp_pipex; 1578 break; 1579 1580 default: 1581 error = ENOPROTOOPT; 1582 break; 1583 } 1584 break; 1585 } 1586 return (error); 1587 } 1588 1589 int 1590 ip6_raw_ctloutput(int op, struct socket *so, int level, int optname, 1591 struct mbuf *m) 1592 { 1593 int error = 0, optval; 1594 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum); 1595 struct inpcb *inp = sotoinpcb(so); 1596 1597 if (level != IPPROTO_IPV6) 1598 return (EINVAL); 1599 1600 switch (optname) { 1601 case IPV6_CHECKSUM: 1602 /* 1603 * For ICMPv6 sockets, no modification allowed for checksum 1604 * offset, permit "no change" values to help existing apps. 1605 * 1606 * RFC3542 says: "An attempt to set IPV6_CHECKSUM 1607 * for an ICMPv6 socket will fail." 1608 * The current behavior does not meet RFC3542. 1609 */ 1610 switch (op) { 1611 case PRCO_SETOPT: 1612 if (m == NULL || m->m_len != sizeof(int)) { 1613 error = EINVAL; 1614 break; 1615 } 1616 optval = *mtod(m, int *); 1617 if (optval < -1 || 1618 (optval > 0 && (optval % 2) != 0)) { 1619 /* 1620 * The API assumes non-negative even offset 1621 * values or -1 as a special value. 1622 */ 1623 error = EINVAL; 1624 } else if (so->so_proto->pr_protocol == 1625 IPPROTO_ICMPV6) { 1626 if (optval != icmp6off) 1627 error = EINVAL; 1628 } else 1629 inp->inp_cksum6 = optval; 1630 break; 1631 1632 case PRCO_GETOPT: 1633 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) 1634 optval = icmp6off; 1635 else 1636 optval = inp->inp_cksum6; 1637 1638 m->m_len = sizeof(int); 1639 *mtod(m, int *) = optval; 1640 break; 1641 1642 default: 1643 error = EINVAL; 1644 break; 1645 } 1646 break; 1647 1648 default: 1649 error = ENOPROTOOPT; 1650 break; 1651 } 1652 1653 return (error); 1654 } 1655 1656 /* 1657 * initialize ip6_pktopts. beware that there are non-zero default values in 1658 * the struct. 1659 */ 1660 void 1661 ip6_initpktopts(struct ip6_pktopts *opt) 1662 { 1663 bzero(opt, sizeof(*opt)); 1664 opt->ip6po_hlim = -1; /* -1 means default hop limit */ 1665 opt->ip6po_tclass = -1; /* -1 means default traffic class */ 1666 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY; 1667 } 1668 1669 int 1670 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, 1671 int priv, int uproto) 1672 { 1673 struct ip6_pktopts *opt; 1674 1675 if (*pktopt == NULL) { 1676 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT, 1677 M_WAITOK); 1678 ip6_initpktopts(*pktopt); 1679 } 1680 opt = *pktopt; 1681 1682 return (ip6_setpktopt(optname, buf, len, opt, priv, 1, uproto)); 1683 } 1684 1685 int 1686 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct mbuf *m) 1687 { 1688 void *optdata = NULL; 1689 int optdatalen = 0; 1690 struct ip6_ext *ip6e; 1691 int error = 0; 1692 struct in6_pktinfo null_pktinfo; 1693 int deftclass = 0, on; 1694 int defminmtu = IP6PO_MINMTU_MCASTONLY; 1695 1696 switch (optname) { 1697 case IPV6_PKTINFO: 1698 if (pktopt && pktopt->ip6po_pktinfo) 1699 optdata = (void *)pktopt->ip6po_pktinfo; 1700 else { 1701 /* XXX: we don't have to do this every time... */ 1702 bzero(&null_pktinfo, sizeof(null_pktinfo)); 1703 optdata = (void *)&null_pktinfo; 1704 } 1705 optdatalen = sizeof(struct in6_pktinfo); 1706 break; 1707 case IPV6_TCLASS: 1708 if (pktopt && pktopt->ip6po_tclass >= 0) 1709 optdata = (void *)&pktopt->ip6po_tclass; 1710 else 1711 optdata = (void *)&deftclass; 1712 optdatalen = sizeof(int); 1713 break; 1714 case IPV6_HOPOPTS: 1715 if (pktopt && pktopt->ip6po_hbh) { 1716 optdata = (void *)pktopt->ip6po_hbh; 1717 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh; 1718 optdatalen = (ip6e->ip6e_len + 1) << 3; 1719 } 1720 break; 1721 case IPV6_RTHDR: 1722 if (pktopt && pktopt->ip6po_rthdr) { 1723 optdata = (void *)pktopt->ip6po_rthdr; 1724 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr; 1725 optdatalen = (ip6e->ip6e_len + 1) << 3; 1726 } 1727 break; 1728 case IPV6_RTHDRDSTOPTS: 1729 if (pktopt && pktopt->ip6po_dest1) { 1730 optdata = (void *)pktopt->ip6po_dest1; 1731 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1; 1732 optdatalen = (ip6e->ip6e_len + 1) << 3; 1733 } 1734 break; 1735 case IPV6_DSTOPTS: 1736 if (pktopt && pktopt->ip6po_dest2) { 1737 optdata = (void *)pktopt->ip6po_dest2; 1738 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2; 1739 optdatalen = (ip6e->ip6e_len + 1) << 3; 1740 } 1741 break; 1742 case IPV6_USE_MIN_MTU: 1743 if (pktopt) 1744 optdata = (void *)&pktopt->ip6po_minmtu; 1745 else 1746 optdata = (void *)&defminmtu; 1747 optdatalen = sizeof(int); 1748 break; 1749 case IPV6_DONTFRAG: 1750 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) 1751 on = 1; 1752 else 1753 on = 0; 1754 optdata = (void *)&on; 1755 optdatalen = sizeof(on); 1756 break; 1757 default: /* should not happen */ 1758 #ifdef DIAGNOSTIC 1759 panic("%s: unexpected option", __func__); 1760 #endif 1761 return (ENOPROTOOPT); 1762 } 1763 1764 if (optdatalen > MCLBYTES) 1765 return (EMSGSIZE); /* XXX */ 1766 if (optdatalen > MLEN) 1767 MCLGET(m, M_WAIT); 1768 m->m_len = optdatalen; 1769 if (optdatalen) 1770 bcopy(optdata, mtod(m, void *), optdatalen); 1771 1772 return (error); 1773 } 1774 1775 void 1776 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) 1777 { 1778 if (optname == -1 || optname == IPV6_PKTINFO) { 1779 if (pktopt->ip6po_pktinfo) 1780 free(pktopt->ip6po_pktinfo, M_IP6OPT, 0); 1781 pktopt->ip6po_pktinfo = NULL; 1782 } 1783 if (optname == -1 || optname == IPV6_HOPLIMIT) 1784 pktopt->ip6po_hlim = -1; 1785 if (optname == -1 || optname == IPV6_TCLASS) 1786 pktopt->ip6po_tclass = -1; 1787 if (optname == -1 || optname == IPV6_HOPOPTS) { 1788 if (pktopt->ip6po_hbh) 1789 free(pktopt->ip6po_hbh, M_IP6OPT, 0); 1790 pktopt->ip6po_hbh = NULL; 1791 } 1792 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) { 1793 if (pktopt->ip6po_dest1) 1794 free(pktopt->ip6po_dest1, M_IP6OPT, 0); 1795 pktopt->ip6po_dest1 = NULL; 1796 } 1797 if (optname == -1 || optname == IPV6_RTHDR) { 1798 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) 1799 free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT, 0); 1800 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; 1801 if (pktopt->ip6po_route.ro_rt) { 1802 rtfree(pktopt->ip6po_route.ro_rt); 1803 pktopt->ip6po_route.ro_rt = NULL; 1804 } 1805 } 1806 if (optname == -1 || optname == IPV6_DSTOPTS) { 1807 if (pktopt->ip6po_dest2) 1808 free(pktopt->ip6po_dest2, M_IP6OPT, 0); 1809 pktopt->ip6po_dest2 = NULL; 1810 } 1811 } 1812 1813 #define PKTOPT_EXTHDRCPY(type) \ 1814 do {\ 1815 if (src->type) {\ 1816 size_t hlen;\ 1817 hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ 1818 dst->type = malloc(hlen, M_IP6OPT, M_NOWAIT);\ 1819 if (dst->type == NULL)\ 1820 goto bad;\ 1821 memcpy(dst->type, src->type, hlen);\ 1822 }\ 1823 } while (/*CONSTCOND*/ 0) 1824 1825 int 1826 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src) 1827 { 1828 dst->ip6po_hlim = src->ip6po_hlim; 1829 dst->ip6po_tclass = src->ip6po_tclass; 1830 dst->ip6po_flags = src->ip6po_flags; 1831 if (src->ip6po_pktinfo) { 1832 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo), 1833 M_IP6OPT, M_NOWAIT); 1834 if (dst->ip6po_pktinfo == NULL) 1835 goto bad; 1836 *dst->ip6po_pktinfo = *src->ip6po_pktinfo; 1837 } 1838 PKTOPT_EXTHDRCPY(ip6po_hbh); 1839 PKTOPT_EXTHDRCPY(ip6po_dest1); 1840 PKTOPT_EXTHDRCPY(ip6po_dest2); 1841 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ 1842 return (0); 1843 1844 bad: 1845 ip6_clearpktopts(dst, -1); 1846 return (ENOBUFS); 1847 } 1848 #undef PKTOPT_EXTHDRCPY 1849 1850 void 1851 ip6_freepcbopts(struct ip6_pktopts *pktopt) 1852 { 1853 if (pktopt == NULL) 1854 return; 1855 1856 ip6_clearpktopts(pktopt, -1); 1857 1858 free(pktopt, M_IP6OPT, 0); 1859 } 1860 1861 /* 1862 * Set the IP6 multicast options in response to user setsockopt(). 1863 */ 1864 int 1865 ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m, 1866 unsigned int rtableid) 1867 { 1868 int error = 0; 1869 u_int loop, ifindex; 1870 struct ipv6_mreq *mreq; 1871 struct ifnet *ifp; 1872 struct ip6_moptions *im6o = *im6op; 1873 struct in6_multi_mship *imm; 1874 struct proc *p = curproc; /* XXX */ 1875 1876 if (im6o == NULL) { 1877 /* 1878 * No multicast option buffer attached to the pcb; 1879 * allocate one and initialize to default values. 1880 */ 1881 im6o = malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK); 1882 if (im6o == NULL) 1883 return (ENOBUFS); 1884 *im6op = im6o; 1885 im6o->im6o_ifidx = 0; 1886 im6o->im6o_hlim = ip6_defmcasthlim; 1887 im6o->im6o_loop = IPV6_DEFAULT_MULTICAST_LOOP; 1888 LIST_INIT(&im6o->im6o_memberships); 1889 } 1890 1891 switch (optname) { 1892 1893 case IPV6_MULTICAST_IF: 1894 /* 1895 * Select the interface for outgoing multicast packets. 1896 */ 1897 if (m == NULL || m->m_len != sizeof(u_int)) { 1898 error = EINVAL; 1899 break; 1900 } 1901 memcpy(&ifindex, mtod(m, u_int *), sizeof(ifindex)); 1902 if (ifindex != 0) { 1903 ifp = if_get(ifindex); 1904 if (ifp == NULL) { 1905 error = ENXIO; /* XXX EINVAL? */ 1906 break; 1907 } 1908 if (ifp->if_rdomain != rtable_l2(rtableid) || 1909 (ifp->if_flags & IFF_MULTICAST) == 0) { 1910 error = EADDRNOTAVAIL; 1911 if_put(ifp); 1912 break; 1913 } 1914 if_put(ifp); 1915 } 1916 im6o->im6o_ifidx = ifindex; 1917 break; 1918 1919 case IPV6_MULTICAST_HOPS: 1920 { 1921 /* 1922 * Set the IP6 hoplimit for outgoing multicast packets. 1923 */ 1924 int optval; 1925 if (m == NULL || m->m_len != sizeof(int)) { 1926 error = EINVAL; 1927 break; 1928 } 1929 memcpy(&optval, mtod(m, u_int *), sizeof(optval)); 1930 if (optval < -1 || optval >= 256) 1931 error = EINVAL; 1932 else if (optval == -1) 1933 im6o->im6o_hlim = ip6_defmcasthlim; 1934 else 1935 im6o->im6o_hlim = optval; 1936 break; 1937 } 1938 1939 case IPV6_MULTICAST_LOOP: 1940 /* 1941 * Set the loopback flag for outgoing multicast packets. 1942 * Must be zero or one. 1943 */ 1944 if (m == NULL || m->m_len != sizeof(u_int)) { 1945 error = EINVAL; 1946 break; 1947 } 1948 memcpy(&loop, mtod(m, u_int *), sizeof(loop)); 1949 if (loop > 1) { 1950 error = EINVAL; 1951 break; 1952 } 1953 im6o->im6o_loop = loop; 1954 break; 1955 1956 case IPV6_JOIN_GROUP: 1957 /* 1958 * Add a multicast group membership. 1959 * Group must be a valid IP6 multicast address. 1960 */ 1961 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { 1962 error = EINVAL; 1963 break; 1964 } 1965 mreq = mtod(m, struct ipv6_mreq *); 1966 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { 1967 /* 1968 * We use the unspecified address to specify to accept 1969 * all multicast addresses. Only super user is allowed 1970 * to do this. 1971 */ 1972 if (suser(p)) 1973 { 1974 error = EACCES; 1975 break; 1976 } 1977 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { 1978 error = EINVAL; 1979 break; 1980 } 1981 1982 /* 1983 * If no interface was explicitly specified, choose an 1984 * appropriate one according to the given multicast address. 1985 */ 1986 if (mreq->ipv6mr_interface == 0) { 1987 struct rtentry *rt; 1988 struct sockaddr_in6 dst; 1989 1990 memset(&dst, 0, sizeof(dst)); 1991 dst.sin6_len = sizeof(dst); 1992 dst.sin6_family = AF_INET6; 1993 dst.sin6_addr = mreq->ipv6mr_multiaddr; 1994 rt = rtalloc(sin6tosa(&dst), RT_RESOLVE, rtableid); 1995 if (rt == NULL) { 1996 error = EADDRNOTAVAIL; 1997 break; 1998 } 1999 ifp = if_get(rt->rt_ifidx); 2000 rtfree(rt); 2001 } else { 2002 /* 2003 * If the interface is specified, validate it. 2004 */ 2005 ifp = if_get(mreq->ipv6mr_interface); 2006 if (ifp == NULL) { 2007 error = ENXIO; /* XXX EINVAL? */ 2008 break; 2009 } 2010 } 2011 2012 /* 2013 * See if we found an interface, and confirm that it 2014 * supports multicast 2015 */ 2016 if (ifp == NULL || ifp->if_rdomain != rtable_l2(rtableid) || 2017 (ifp->if_flags & IFF_MULTICAST) == 0) { 2018 if_put(ifp); 2019 error = EADDRNOTAVAIL; 2020 break; 2021 } 2022 /* 2023 * Put interface index into the multicast address, 2024 * if the address has link/interface-local scope. 2025 */ 2026 if (IN6_IS_SCOPE_EMBED(&mreq->ipv6mr_multiaddr)) { 2027 mreq->ipv6mr_multiaddr.s6_addr16[1] = 2028 htons(ifp->if_index); 2029 } 2030 /* 2031 * See if the membership already exists. 2032 */ 2033 LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) 2034 if (imm->i6mm_maddr->in6m_ifidx == ifp->if_index && 2035 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, 2036 &mreq->ipv6mr_multiaddr)) 2037 break; 2038 if (imm != NULL) { 2039 if_put(ifp); 2040 error = EADDRINUSE; 2041 break; 2042 } 2043 /* 2044 * Everything looks good; add a new record to the multicast 2045 * address list for the given interface. 2046 */ 2047 imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr, &error); 2048 if_put(ifp); 2049 if (!imm) 2050 break; 2051 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 2052 break; 2053 2054 case IPV6_LEAVE_GROUP: 2055 /* 2056 * Drop a multicast group membership. 2057 * Group must be a valid IP6 multicast address. 2058 */ 2059 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { 2060 error = EINVAL; 2061 break; 2062 } 2063 mreq = mtod(m, struct ipv6_mreq *); 2064 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { 2065 if (suser(p)) { 2066 error = EACCES; 2067 break; 2068 } 2069 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { 2070 error = EINVAL; 2071 break; 2072 } 2073 2074 /* 2075 * Put interface index into the multicast address, 2076 * if the address has link-local scope. 2077 */ 2078 if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) { 2079 mreq->ipv6mr_multiaddr.s6_addr16[1] = 2080 htons(mreq->ipv6mr_interface); 2081 } 2082 2083 /* 2084 * If an interface address was specified, get a pointer 2085 * to its ifnet structure. 2086 */ 2087 if (mreq->ipv6mr_interface == 0) 2088 ifp = NULL; 2089 else { 2090 ifp = if_get(mreq->ipv6mr_interface); 2091 if (ifp == NULL) { 2092 error = ENXIO; /* XXX EINVAL? */ 2093 break; 2094 } 2095 } 2096 2097 /* 2098 * Find the membership in the membership list. 2099 */ 2100 LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) { 2101 if ((ifp == NULL || 2102 imm->i6mm_maddr->in6m_ifidx == ifp->if_index) && 2103 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, 2104 &mreq->ipv6mr_multiaddr)) 2105 break; 2106 } 2107 2108 if_put(ifp); 2109 2110 if (imm == NULL) { 2111 /* Unable to resolve interface */ 2112 error = EADDRNOTAVAIL; 2113 break; 2114 } 2115 /* 2116 * Give up the multicast address record to which the 2117 * membership points. 2118 */ 2119 LIST_REMOVE(imm, i6mm_chain); 2120 in6_leavegroup(imm); 2121 break; 2122 2123 default: 2124 error = EOPNOTSUPP; 2125 break; 2126 } 2127 2128 /* 2129 * If all options have default values, no need to keep the option 2130 * structure. 2131 */ 2132 if (im6o->im6o_ifidx == 0 && 2133 im6o->im6o_hlim == ip6_defmcasthlim && 2134 im6o->im6o_loop == IPV6_DEFAULT_MULTICAST_LOOP && 2135 LIST_EMPTY(&im6o->im6o_memberships)) { 2136 free(*im6op, M_IPMOPTS, sizeof(**im6op)); 2137 *im6op = NULL; 2138 } 2139 2140 return (error); 2141 } 2142 2143 /* 2144 * Return the IP6 multicast options in response to user getsockopt(). 2145 */ 2146 int 2147 ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf *m) 2148 { 2149 u_int *hlim, *loop, *ifindex; 2150 2151 switch (optname) { 2152 case IPV6_MULTICAST_IF: 2153 ifindex = mtod(m, u_int *); 2154 m->m_len = sizeof(u_int); 2155 if (im6o == NULL || im6o->im6o_ifidx == 0) 2156 *ifindex = 0; 2157 else 2158 *ifindex = im6o->im6o_ifidx; 2159 return (0); 2160 2161 case IPV6_MULTICAST_HOPS: 2162 hlim = mtod(m, u_int *); 2163 m->m_len = sizeof(u_int); 2164 if (im6o == NULL) 2165 *hlim = ip6_defmcasthlim; 2166 else 2167 *hlim = im6o->im6o_hlim; 2168 return (0); 2169 2170 case IPV6_MULTICAST_LOOP: 2171 loop = mtod(m, u_int *); 2172 m->m_len = sizeof(u_int); 2173 if (im6o == NULL) 2174 *loop = ip6_defmcasthlim; 2175 else 2176 *loop = im6o->im6o_loop; 2177 return (0); 2178 2179 default: 2180 return (EOPNOTSUPP); 2181 } 2182 } 2183 2184 /* 2185 * Discard the IP6 multicast options. 2186 */ 2187 void 2188 ip6_freemoptions(struct ip6_moptions *im6o) 2189 { 2190 struct in6_multi_mship *imm; 2191 2192 if (im6o == NULL) 2193 return; 2194 2195 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 2196 imm = LIST_FIRST(&im6o->im6o_memberships); 2197 LIST_REMOVE(imm, i6mm_chain); 2198 in6_leavegroup(imm); 2199 } 2200 free(im6o, M_IPMOPTS, sizeof(*im6o)); 2201 } 2202 2203 /* 2204 * Set IPv6 outgoing packet options based on advanced API. 2205 */ 2206 int 2207 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, 2208 struct ip6_pktopts *stickyopt, int priv, int uproto) 2209 { 2210 u_int clen; 2211 struct cmsghdr *cm = 0; 2212 caddr_t cmsgs; 2213 int error; 2214 2215 if (control == NULL || opt == NULL) 2216 return (EINVAL); 2217 2218 ip6_initpktopts(opt); 2219 if (stickyopt) { 2220 int error; 2221 2222 /* 2223 * If stickyopt is provided, make a local copy of the options 2224 * for this particular packet, then override them by ancillary 2225 * objects. 2226 * XXX: copypktopts() does not copy the cached route to a next 2227 * hop (if any). This is not very good in terms of efficiency, 2228 * but we can allow this since this option should be rarely 2229 * used. 2230 */ 2231 if ((error = copypktopts(opt, stickyopt)) != 0) 2232 return (error); 2233 } 2234 2235 /* 2236 * XXX: Currently, we assume all the optional information is stored 2237 * in a single mbuf. 2238 */ 2239 if (control->m_next) 2240 return (EINVAL); 2241 2242 clen = control->m_len; 2243 cmsgs = mtod(control, caddr_t); 2244 do { 2245 if (clen < CMSG_LEN(0)) 2246 return (EINVAL); 2247 cm = (struct cmsghdr *)cmsgs; 2248 if (cm->cmsg_len < CMSG_LEN(0) || cm->cmsg_len > clen || 2249 CMSG_ALIGN(cm->cmsg_len) > clen) 2250 return (EINVAL); 2251 if (cm->cmsg_level == IPPROTO_IPV6) { 2252 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm), 2253 cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, uproto); 2254 if (error) 2255 return (error); 2256 } 2257 2258 clen -= CMSG_ALIGN(cm->cmsg_len); 2259 cmsgs += CMSG_ALIGN(cm->cmsg_len); 2260 } while (clen); 2261 2262 return (0); 2263 } 2264 2265 /* 2266 * Set a particular packet option, as a sticky option or an ancillary data 2267 * item. "len" can be 0 only when it's a sticky option. 2268 */ 2269 int 2270 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, 2271 int priv, int sticky, int uproto) 2272 { 2273 int minmtupolicy; 2274 2275 switch (optname) { 2276 case IPV6_PKTINFO: 2277 { 2278 struct ifnet *ifp = NULL; 2279 struct in6_pktinfo *pktinfo; 2280 2281 if (len != sizeof(struct in6_pktinfo)) 2282 return (EINVAL); 2283 2284 pktinfo = (struct in6_pktinfo *)buf; 2285 2286 /* 2287 * An application can clear any sticky IPV6_PKTINFO option by 2288 * doing a "regular" setsockopt with ipi6_addr being 2289 * in6addr_any and ipi6_ifindex being zero. 2290 * [RFC 3542, Section 6] 2291 */ 2292 if (opt->ip6po_pktinfo && 2293 pktinfo->ipi6_ifindex == 0 && 2294 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { 2295 ip6_clearpktopts(opt, optname); 2296 break; 2297 } 2298 2299 if (uproto == IPPROTO_TCP && 2300 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { 2301 return (EINVAL); 2302 } 2303 2304 if (pktinfo->ipi6_ifindex) { 2305 ifp = if_get(pktinfo->ipi6_ifindex); 2306 if (ifp == NULL) 2307 return (ENXIO); 2308 if_put(ifp); 2309 } 2310 2311 /* 2312 * We store the address anyway, and let in6_selectsrc() 2313 * validate the specified address. This is because ipi6_addr 2314 * may not have enough information about its scope zone, and 2315 * we may need additional information (such as outgoing 2316 * interface or the scope zone of a destination address) to 2317 * disambiguate the scope. 2318 * XXX: the delay of the validation may confuse the 2319 * application when it is used as a sticky option. 2320 */ 2321 if (opt->ip6po_pktinfo == NULL) { 2322 opt->ip6po_pktinfo = malloc(sizeof(*pktinfo), 2323 M_IP6OPT, M_NOWAIT); 2324 if (opt->ip6po_pktinfo == NULL) 2325 return (ENOBUFS); 2326 } 2327 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo)); 2328 break; 2329 } 2330 2331 case IPV6_HOPLIMIT: 2332 { 2333 int *hlimp; 2334 2335 /* 2336 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT 2337 * to simplify the ordering among hoplimit options. 2338 */ 2339 if (sticky) 2340 return (ENOPROTOOPT); 2341 2342 if (len != sizeof(int)) 2343 return (EINVAL); 2344 hlimp = (int *)buf; 2345 if (*hlimp < -1 || *hlimp > 255) 2346 return (EINVAL); 2347 2348 opt->ip6po_hlim = *hlimp; 2349 break; 2350 } 2351 2352 case IPV6_TCLASS: 2353 { 2354 int tclass; 2355 2356 if (len != sizeof(int)) 2357 return (EINVAL); 2358 tclass = *(int *)buf; 2359 if (tclass < -1 || tclass > 255) 2360 return (EINVAL); 2361 2362 opt->ip6po_tclass = tclass; 2363 break; 2364 } 2365 case IPV6_HOPOPTS: 2366 { 2367 struct ip6_hbh *hbh; 2368 int hbhlen; 2369 2370 /* 2371 * XXX: We don't allow a non-privileged user to set ANY HbH 2372 * options, since per-option restriction has too much 2373 * overhead. 2374 */ 2375 if (!priv) 2376 return (EPERM); 2377 2378 if (len == 0) { 2379 ip6_clearpktopts(opt, IPV6_HOPOPTS); 2380 break; /* just remove the option */ 2381 } 2382 2383 /* message length validation */ 2384 if (len < sizeof(struct ip6_hbh)) 2385 return (EINVAL); 2386 hbh = (struct ip6_hbh *)buf; 2387 hbhlen = (hbh->ip6h_len + 1) << 3; 2388 if (len != hbhlen) 2389 return (EINVAL); 2390 2391 /* turn off the previous option, then set the new option. */ 2392 ip6_clearpktopts(opt, IPV6_HOPOPTS); 2393 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT); 2394 if (opt->ip6po_hbh == NULL) 2395 return (ENOBUFS); 2396 memcpy(opt->ip6po_hbh, hbh, hbhlen); 2397 2398 break; 2399 } 2400 2401 case IPV6_DSTOPTS: 2402 case IPV6_RTHDRDSTOPTS: 2403 { 2404 struct ip6_dest *dest, **newdest = NULL; 2405 int destlen; 2406 2407 if (!priv) /* XXX: see the comment for IPV6_HOPOPTS */ 2408 return (EPERM); 2409 2410 if (len == 0) { 2411 ip6_clearpktopts(opt, optname); 2412 break; /* just remove the option */ 2413 } 2414 2415 /* message length validation */ 2416 if (len < sizeof(struct ip6_dest)) 2417 return (EINVAL); 2418 dest = (struct ip6_dest *)buf; 2419 destlen = (dest->ip6d_len + 1) << 3; 2420 if (len != destlen) 2421 return (EINVAL); 2422 /* 2423 * Determine the position that the destination options header 2424 * should be inserted; before or after the routing header. 2425 */ 2426 switch (optname) { 2427 case IPV6_RTHDRDSTOPTS: 2428 newdest = &opt->ip6po_dest1; 2429 break; 2430 case IPV6_DSTOPTS: 2431 newdest = &opt->ip6po_dest2; 2432 break; 2433 } 2434 2435 /* turn off the previous option, then set the new option. */ 2436 ip6_clearpktopts(opt, optname); 2437 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT); 2438 if (*newdest == NULL) 2439 return (ENOBUFS); 2440 memcpy(*newdest, dest, destlen); 2441 2442 break; 2443 } 2444 2445 case IPV6_RTHDR: 2446 { 2447 struct ip6_rthdr *rth; 2448 int rthlen; 2449 2450 if (len == 0) { 2451 ip6_clearpktopts(opt, IPV6_RTHDR); 2452 break; /* just remove the option */ 2453 } 2454 2455 /* message length validation */ 2456 if (len < sizeof(struct ip6_rthdr)) 2457 return (EINVAL); 2458 rth = (struct ip6_rthdr *)buf; 2459 rthlen = (rth->ip6r_len + 1) << 3; 2460 if (len != rthlen) 2461 return (EINVAL); 2462 2463 switch (rth->ip6r_type) { 2464 case IPV6_RTHDR_TYPE_0: 2465 if (rth->ip6r_len == 0) /* must contain one addr */ 2466 return (EINVAL); 2467 if (rth->ip6r_len % 2) /* length must be even */ 2468 return (EINVAL); 2469 if (rth->ip6r_len / 2 != rth->ip6r_segleft) 2470 return (EINVAL); 2471 break; 2472 default: 2473 return (EINVAL); /* not supported */ 2474 } 2475 /* turn off the previous option */ 2476 ip6_clearpktopts(opt, IPV6_RTHDR); 2477 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT); 2478 if (opt->ip6po_rthdr == NULL) 2479 return (ENOBUFS); 2480 memcpy(opt->ip6po_rthdr, rth, rthlen); 2481 break; 2482 } 2483 2484 case IPV6_USE_MIN_MTU: 2485 if (len != sizeof(int)) 2486 return (EINVAL); 2487 minmtupolicy = *(int *)buf; 2488 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY && 2489 minmtupolicy != IP6PO_MINMTU_DISABLE && 2490 minmtupolicy != IP6PO_MINMTU_ALL) { 2491 return (EINVAL); 2492 } 2493 opt->ip6po_minmtu = minmtupolicy; 2494 break; 2495 2496 case IPV6_DONTFRAG: 2497 if (len != sizeof(int)) 2498 return (EINVAL); 2499 2500 if (uproto == IPPROTO_TCP || *(int *)buf == 0) { 2501 /* 2502 * we ignore this option for TCP sockets. 2503 * (RFC3542 leaves this case unspecified.) 2504 */ 2505 opt->ip6po_flags &= ~IP6PO_DONTFRAG; 2506 } else 2507 opt->ip6po_flags |= IP6PO_DONTFRAG; 2508 break; 2509 2510 default: 2511 return (ENOPROTOOPT); 2512 } /* end of switch */ 2513 2514 return (0); 2515 } 2516 2517 /* 2518 * Routine called from ip6_output() to loop back a copy of an IP6 multicast 2519 * packet to the input queue of a specified interface. 2520 */ 2521 void 2522 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst) 2523 { 2524 struct mbuf *copym; 2525 struct ip6_hdr *ip6; 2526 2527 /* 2528 * Duplicate the packet. 2529 */ 2530 copym = m_copym(m, 0, M_COPYALL, M_NOWAIT); 2531 if (copym == NULL) 2532 return; 2533 2534 /* 2535 * Make sure to deep-copy IPv6 header portion in case the data 2536 * is in an mbuf cluster, so that we can safely override the IPv6 2537 * header portion later. 2538 */ 2539 if ((copym->m_flags & M_EXT) != 0 || 2540 copym->m_len < sizeof(struct ip6_hdr)) { 2541 copym = m_pullup(copym, sizeof(struct ip6_hdr)); 2542 if (copym == NULL) 2543 return; 2544 } 2545 2546 #ifdef DIAGNOSTIC 2547 if (copym->m_len < sizeof(*ip6)) { 2548 m_freem(copym); 2549 return; 2550 } 2551 #endif 2552 2553 ip6 = mtod(copym, struct ip6_hdr *); 2554 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) 2555 ip6->ip6_src.s6_addr16[1] = 0; 2556 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) 2557 ip6->ip6_dst.s6_addr16[1] = 0; 2558 2559 if_input_local(ifp, copym, dst->sin6_family); 2560 } 2561 2562 /* 2563 * Chop IPv6 header off from the payload. 2564 */ 2565 int 2566 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) 2567 { 2568 struct mbuf *mh; 2569 struct ip6_hdr *ip6; 2570 2571 ip6 = mtod(m, struct ip6_hdr *); 2572 if (m->m_len > sizeof(*ip6)) { 2573 MGET(mh, M_DONTWAIT, MT_HEADER); 2574 if (mh == NULL) { 2575 m_freem(m); 2576 return ENOBUFS; 2577 } 2578 M_MOVE_PKTHDR(mh, m); 2579 m_align(mh, sizeof(*ip6)); 2580 m->m_len -= sizeof(*ip6); 2581 m->m_data += sizeof(*ip6); 2582 mh->m_next = m; 2583 m = mh; 2584 m->m_len = sizeof(*ip6); 2585 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); 2586 } 2587 exthdrs->ip6e_ip6 = m; 2588 return 0; 2589 } 2590 2591 u_int32_t 2592 ip6_randomid(void) 2593 { 2594 return idgen32(&ip6_id_ctx); 2595 } 2596 2597 void 2598 ip6_randomid_init(void) 2599 { 2600 idgen32_init(&ip6_id_ctx); 2601 } 2602 2603 /* 2604 * Compute significant parts of the IPv6 checksum pseudo-header 2605 * for use in a delayed TCP/UDP checksum calculation. 2606 */ 2607 static __inline u_int16_t __attribute__((__unused__)) 2608 in6_cksum_phdr(const struct in6_addr *src, const struct in6_addr *dst, 2609 u_int32_t len, u_int32_t nxt) 2610 { 2611 u_int32_t sum = 0; 2612 const u_int16_t *w; 2613 2614 w = (const u_int16_t *) src; 2615 sum += w[0]; 2616 if (!IN6_IS_SCOPE_EMBED(src)) 2617 sum += w[1]; 2618 sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; 2619 sum += w[6]; sum += w[7]; 2620 2621 w = (const u_int16_t *) dst; 2622 sum += w[0]; 2623 if (!IN6_IS_SCOPE_EMBED(dst)) 2624 sum += w[1]; 2625 sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; 2626 sum += w[6]; sum += w[7]; 2627 2628 sum += (u_int16_t)(len >> 16) + (u_int16_t)(len /*& 0xffff*/); 2629 2630 sum += (u_int16_t)(nxt >> 16) + (u_int16_t)(nxt /*& 0xffff*/); 2631 2632 sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/); 2633 2634 if (sum > 0xffff) 2635 sum -= 0xffff; 2636 2637 return (sum); 2638 } 2639 2640 /* 2641 * Process a delayed payload checksum calculation. 2642 */ 2643 void 2644 in6_delayed_cksum(struct mbuf *m, u_int8_t nxt) 2645 { 2646 int nxtp, offset; 2647 u_int16_t csum; 2648 2649 offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxtp); 2650 if (offset <= 0 || nxtp != nxt) 2651 /* If the desired next protocol isn't found, punt. */ 2652 return; 2653 csum = (u_int16_t)(in6_cksum(m, 0, offset, m->m_pkthdr.len - offset)); 2654 2655 switch (nxt) { 2656 case IPPROTO_TCP: 2657 offset += offsetof(struct tcphdr, th_sum); 2658 break; 2659 2660 case IPPROTO_UDP: 2661 offset += offsetof(struct udphdr, uh_sum); 2662 if (csum == 0) 2663 csum = 0xffff; 2664 break; 2665 2666 case IPPROTO_ICMPV6: 2667 offset += offsetof(struct icmp6_hdr, icmp6_cksum); 2668 break; 2669 } 2670 2671 if ((offset + sizeof(u_int16_t)) > m->m_len) 2672 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 2673 else 2674 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 2675 } 2676 2677 void 2678 in6_proto_cksum_out(struct mbuf *m, struct ifnet *ifp) 2679 { 2680 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 2681 2682 /* some hw and in6_delayed_cksum need the pseudo header cksum */ 2683 if (m->m_pkthdr.csum_flags & 2684 (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) { 2685 int nxt, offset; 2686 u_int16_t csum; 2687 2688 offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); 2689 if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) && 2690 in_ifcap_cksum(m, ifp, IFCAP_TSOv6)) { 2691 csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, 2692 htonl(0), htonl(nxt)); 2693 } else { 2694 csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, 2695 htonl(m->m_pkthdr.len - offset), htonl(nxt)); 2696 } 2697 if (nxt == IPPROTO_TCP) 2698 offset += offsetof(struct tcphdr, th_sum); 2699 else if (nxt == IPPROTO_UDP) 2700 offset += offsetof(struct udphdr, uh_sum); 2701 else if (nxt == IPPROTO_ICMPV6) 2702 offset += offsetof(struct icmp6_hdr, icmp6_cksum); 2703 if ((offset + sizeof(u_int16_t)) > m->m_len) 2704 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 2705 else 2706 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 2707 } 2708 2709 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { 2710 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv6) || 2711 ip6->ip6_nxt != IPPROTO_TCP || 2712 ifp->if_bridgeidx != 0) { 2713 tcpstat_inc(tcps_outswcsum); 2714 in6_delayed_cksum(m, IPPROTO_TCP); 2715 m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */ 2716 } 2717 } else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { 2718 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_UDPv6) || 2719 ip6->ip6_nxt != IPPROTO_UDP || 2720 ifp->if_bridgeidx != 0) { 2721 udpstat_inc(udps_outswcsum); 2722 in6_delayed_cksum(m, IPPROTO_UDP); 2723 m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */ 2724 } 2725 } else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) { 2726 in6_delayed_cksum(m, IPPROTO_ICMPV6); 2727 m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */ 2728 } 2729 } 2730 2731 #ifdef IPSEC 2732 int 2733 ip6_output_ipsec_lookup(struct mbuf *m, const struct ipsec_level *seclevel, 2734 struct tdb **tdbout) 2735 { 2736 struct tdb *tdb; 2737 struct m_tag *mtag; 2738 struct tdb_ident *tdbi; 2739 int error; 2740 2741 /* 2742 * Check if there was an outgoing SA bound to the flow 2743 * from a transport protocol. 2744 */ 2745 2746 /* Do we have any pending SAs to apply ? */ 2747 error = ipsp_spd_lookup(m, AF_INET6, sizeof(struct ip6_hdr), 2748 IPSP_DIRECTION_OUT, NULL, seclevel, &tdb, NULL); 2749 if (error || tdb == NULL) { 2750 *tdbout = NULL; 2751 return error; 2752 } 2753 /* Loop detection */ 2754 for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) { 2755 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE) 2756 continue; 2757 tdbi = (struct tdb_ident *)(mtag + 1); 2758 if (tdbi->spi == tdb->tdb_spi && 2759 tdbi->proto == tdb->tdb_sproto && 2760 tdbi->rdomain == tdb->tdb_rdomain && 2761 !memcmp(&tdbi->dst, &tdb->tdb_dst, 2762 sizeof(union sockaddr_union))) { 2763 /* no IPsec needed */ 2764 tdb_unref(tdb); 2765 *tdbout = NULL; 2766 return 0; 2767 } 2768 } 2769 *tdbout = tdb; 2770 return 0; 2771 } 2772 2773 int 2774 ip6_output_ipsec_pmtu_update(struct tdb *tdb, struct route *ro, 2775 struct in6_addr *dst, int ifidx, int rtableid, int transportmode) 2776 { 2777 struct rtentry *rt = NULL; 2778 int rt_mtucloned = 0; 2779 2780 /* Find a host route to store the mtu in */ 2781 if (ro != NULL) 2782 rt = ro->ro_rt; 2783 /* but don't add a PMTU route for transport mode SAs */ 2784 if (transportmode) 2785 rt = NULL; 2786 else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 2787 struct sockaddr_in6 sin6; 2788 int error; 2789 2790 memset(&sin6, 0, sizeof(sin6)); 2791 sin6.sin6_family = AF_INET6; 2792 sin6.sin6_len = sizeof(sin6); 2793 sin6.sin6_addr = *dst; 2794 sin6.sin6_scope_id = in6_addr2scopeid(ifidx, dst); 2795 error = in6_embedscope(dst, &sin6, NULL, NULL); 2796 if (error) { 2797 /* should be impossible */ 2798 return error; 2799 } 2800 rt = icmp6_mtudisc_clone(&sin6, rtableid, 1); 2801 rt_mtucloned = 1; 2802 } 2803 DPRINTF("spi %08x mtu %d rt %p cloned %d", 2804 ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned); 2805 if (rt != NULL) { 2806 rt->rt_mtu = tdb->tdb_mtu; 2807 if (ro != NULL && ro->ro_rt != NULL) { 2808 rtfree(ro->ro_rt); 2809 ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE, 2810 rtableid); 2811 } 2812 if (rt_mtucloned) 2813 rtfree(rt); 2814 } 2815 return 0; 2816 } 2817 2818 int 2819 ip6_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route *ro, 2820 int tunalready, int fwd) 2821 { 2822 struct mbuf_list ml; 2823 struct ifnet *encif = NULL; 2824 struct ip6_hdr *ip6; 2825 struct in6_addr dst; 2826 u_int len; 2827 int error, ifidx, rtableid, tso = 0; 2828 2829 #if NPF > 0 2830 /* 2831 * Packet filter 2832 */ 2833 if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL || 2834 pf_test(AF_INET6, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) { 2835 m_freem(m); 2836 return EACCES; 2837 } 2838 if (m == NULL) 2839 return 0; 2840 /* 2841 * PF_TAG_REROUTE handling or not... 2842 * Packet is entering IPsec so the routing is 2843 * already overruled by the IPsec policy. 2844 * Until now the change was not reconsidered. 2845 * What's the behaviour? 2846 */ 2847 #endif 2848 2849 /* Check if we can chop the TCP packet */ 2850 ip6 = mtod(m, struct ip6_hdr *); 2851 if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) && 2852 m->m_pkthdr.ph_mss <= tdb->tdb_mtu) { 2853 tso = 1; 2854 len = m->m_pkthdr.ph_mss; 2855 } else 2856 len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); 2857 2858 /* Check if we are allowed to fragment */ 2859 dst = ip6->ip6_dst; 2860 ifidx = m->m_pkthdr.ph_ifidx; 2861 rtableid = m->m_pkthdr.ph_rtableid; 2862 if (ip_mtudisc && tdb->tdb_mtu && 2863 len > tdb->tdb_mtu && tdb->tdb_mtutimeout > gettime()) { 2864 int transportmode; 2865 2866 transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET6) && 2867 (IN6_ARE_ADDR_EQUAL(&tdb->tdb_dst.sin6.sin6_addr, &dst)); 2868 error = ip6_output_ipsec_pmtu_update(tdb, ro, &dst, ifidx, 2869 rtableid, transportmode); 2870 if (error) { 2871 ipsecstat_inc(ipsec_odrops); 2872 tdbstat_inc(tdb, tdb_odrops); 2873 m_freem(m); 2874 return error; 2875 } 2876 ipsec_adjust_mtu(m, tdb->tdb_mtu); 2877 m_freem(m); 2878 return EMSGSIZE; 2879 } 2880 /* propagate don't fragment for v6-over-v6 */ 2881 if (ip_mtudisc) 2882 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 2883 2884 /* 2885 * Clear these -- they'll be set in the recursive invocation 2886 * as needed. 2887 */ 2888 m->m_flags &= ~(M_BCAST | M_MCAST); 2889 2890 if (tso) { 2891 error = tcp_chopper(m, &ml, encif, len); 2892 if (error) 2893 goto done; 2894 } else { 2895 CLR(m->m_pkthdr.csum_flags, M_TCP_TSO); 2896 in6_proto_cksum_out(m, encif); 2897 ml_init(&ml); 2898 ml_enqueue(&ml, m); 2899 } 2900 2901 KERNEL_LOCK(); 2902 while ((m = ml_dequeue(&ml)) != NULL) { 2903 /* Callee frees mbuf */ 2904 error = ipsp_process_packet(m, tdb, AF_INET6, tunalready); 2905 if (error) 2906 break; 2907 } 2908 KERNEL_UNLOCK(); 2909 done: 2910 if (error) { 2911 ml_purge(&ml); 2912 ipsecstat_inc(ipsec_odrops); 2913 tdbstat_inc(tdb, tdb_odrops); 2914 } 2915 if (!error && tso) 2916 tcpstat_inc(tcps_outswtso); 2917 if (ip_mtudisc && error == EMSGSIZE) 2918 ip6_output_ipsec_pmtu_update(tdb, ro, &dst, ifidx, rtableid, 0); 2919 return error; 2920 } 2921 #endif /* IPSEC */ 2922