1 /* $FreeBSD: src/sys/net/if_stf.c,v 1.1.2.11 2003/01/23 21:06:44 sam Exp $ */ 2 /* $DragonFly: src/sys/net/stf/if_stf.c,v 1.25 2008/10/27 02:56:30 sephe Exp $ */ 3 /* $KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $ */ 4 5 /* 6 * Copyright (C) 2000 WIDE Project. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the project nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * 6to4 interface, based on RFC3056. 36 * 37 * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting. 38 * There is no address mapping defined from IPv6 multicast address to IPv4 39 * address. Therefore, we do not have IFF_MULTICAST on the interface. 40 * 41 * Due to the lack of address mapping for link-local addresses, we cannot 42 * throw packets toward link-local addresses (fe80::x). Also, we cannot throw 43 * packets to link-local multicast addresses (ff02::x). 44 * 45 * Here are interesting symptoms due to the lack of link-local address: 46 * 47 * Unicast routing exchange: 48 * - RIPng: Impossible. Uses link-local multicast packet toward ff02::9, 49 * and link-local addresses as nexthop. 50 * - OSPFv6: Impossible. OSPFv6 assumes that there's link-local address 51 * assigned to the link, and makes use of them. Also, HELLO packets use 52 * link-local multicast addresses (ff02::5 and ff02::6). 53 * - BGP4+: Maybe. You can only use global address as nexthop, and global 54 * address as TCP endpoint address. 55 * 56 * Multicast routing protocols: 57 * - PIM: Hello packet cannot be used to discover adjacent PIM routers. 58 * Adjacent PIM routers must be configured manually (is it really spec-wise 59 * correct thing to do?). 60 * 61 * ICMPv6: 62 * - Redirects cannot be used due to the lack of link-local address. 63 * 64 * stf interface does not have, and will not need, a link-local address. 65 * It seems to have no real benefit and does not help the above symptoms much. 66 * Even if we assign link-locals to interface, we cannot really 67 * use link-local unicast/multicast on top of 6to4 cloud (since there's no 68 * encapsulation defined for link-local address), and the above analysis does 69 * not change. RFC3056 does not mandate the assignment of link-local address 70 * either. 71 * 72 * 6to4 interface has security issues. Refer to 73 * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt 74 * for details. The code tries to filter out some of malicious packets. 75 * Note that there is no way to be 100% secure. 76 */ 77 78 #include "opt_inet.h" 79 #include "opt_inet6.h" 80 81 #include <sys/param.h> 82 #include <sys/systm.h> 83 #include <sys/socket.h> 84 #include <sys/sockio.h> 85 #include <sys/mbuf.h> 86 #include <sys/errno.h> 87 #include <sys/protosw.h> 88 #include <sys/kernel.h> 89 #include <machine/cpu.h> 90 91 #include <sys/malloc.h> 92 93 #include <net/if.h> 94 #include <net/route.h> 95 #include <net/netisr.h> 96 #include <net/if_types.h> 97 #include "if_stf.h" 98 99 #include <netinet/in.h> 100 #include <netinet/in_systm.h> 101 #include <netinet/ip.h> 102 #include <netinet/ip_var.h> 103 #include <netinet/in_var.h> 104 105 #include <netinet/ip6.h> 106 #include <netinet6/ip6_var.h> 107 #include <netinet6/in6_var.h> 108 #include <netinet/ip_ecn.h> 109 110 #include <netinet/ip_encap.h> 111 112 #include <machine/stdarg.h> 113 114 #include <net/net_osdep.h> 115 116 #include <net/bpf.h> 117 118 #define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002) 119 #define GET_V4(x) ((struct in_addr *)(&(x)->s6_addr16[1])) 120 121 struct stf_softc { 122 struct ifnet sc_if; /* common area */ 123 union { 124 struct route __sc_ro4; 125 struct route_in6 __sc_ro6; /* just for safety */ 126 } __sc_ro46; 127 #define sc_ro __sc_ro46.__sc_ro4 128 const struct encaptab *encap_cookie; 129 }; 130 131 static struct stf_softc *stf; 132 133 static MALLOC_DEFINE(M_STF, "stf", "6to4 Tunnel Interface"); 134 static int ip_stf_ttl = 40; 135 136 extern struct domain inetdomain; 137 struct protosw in_stf_protosw = 138 { SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, 139 in_stf_input, rip_output, 0, rip_ctloutput, 140 NULL, NULL, 141 0, 0, 0, 0, 142 &rip_usrreqs 143 }; 144 145 static int stfmodevent (module_t, int, void *); 146 static int stf_encapcheck (const struct mbuf *, int, int, void *); 147 static struct in6_ifaddr *stf_getsrcifa6 (struct ifnet *); 148 static int stf_output (struct ifnet *, struct mbuf *, struct sockaddr *, 149 struct rtentry *); 150 static int stf_checkaddr4 (struct stf_softc *, struct in_addr *, 151 struct ifnet *); 152 static int stf_checkaddr6 (struct stf_softc *, struct in6_addr *, 153 struct ifnet *); 154 static void stf_rtrequest (int, struct rtentry *, struct rt_addrinfo *); 155 static int stf_ioctl (struct ifnet *, u_long, caddr_t, struct ucred *); 156 157 static int 158 stfmodevent(module_t mod, int type, void *data) 159 { 160 struct stf_softc *sc; 161 int err; 162 const struct encaptab *p; 163 164 switch (type) { 165 case MOD_LOAD: 166 stf = kmalloc(sizeof(struct stf_softc), M_STF, 167 M_WAITOK | M_ZERO); 168 sc = stf; 169 170 bzero(sc, sizeof(*sc)); 171 if_initname(&(sc->sc_if), "stf", 0); 172 173 p = encap_attach_func(AF_INET, IPPROTO_IPV6, stf_encapcheck, 174 (void *)&in_stf_protosw, sc); 175 if (p == NULL) { 176 kprintf("%s: attach failed\n", if_name(&sc->sc_if)); 177 return (ENOMEM); 178 } 179 sc->encap_cookie = p; 180 181 sc->sc_if.if_mtu = IPV6_MMTU; 182 sc->sc_if.if_flags = 0; 183 sc->sc_if.if_ioctl = stf_ioctl; 184 sc->sc_if.if_output = stf_output; 185 sc->sc_if.if_type = IFT_STF; 186 #if 0 187 /* turn off ingress filter */ 188 sc->sc_if.if_flags |= IFF_LINK2; 189 #endif 190 sc->sc_if.if_snd.ifq_maxlen = IFQ_MAXLEN; 191 if_attach(&sc->sc_if, NULL); 192 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int)); 193 break; 194 case MOD_UNLOAD: 195 sc = stf; 196 bpfdetach(&sc->sc_if); 197 if_detach(&sc->sc_if); 198 err = encap_detach(sc->encap_cookie); 199 KASSERT(err == 0, ("Unexpected error detaching encap_cookie")); 200 kfree(sc, M_STF); 201 break; 202 } 203 204 return (0); 205 } 206 207 static moduledata_t stf_mod = { 208 "if_stf", 209 stfmodevent, 210 0 211 }; 212 213 DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 214 215 static int 216 stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg) 217 { 218 struct ip ip; 219 struct in6_ifaddr *ia6; 220 struct stf_softc *sc; 221 struct in_addr a, b; 222 223 sc = (struct stf_softc *)arg; 224 if (sc == NULL) 225 return 0; 226 227 if ((sc->sc_if.if_flags & IFF_UP) == 0) 228 return 0; 229 230 /* IFF_LINK0 means "no decapsulation" */ 231 if ((sc->sc_if.if_flags & IFF_LINK0) != 0) 232 return 0; 233 234 if (proto != IPPROTO_IPV6) 235 return 0; 236 237 m_copydata(m, 0, sizeof(ip), (caddr_t)&ip); 238 239 if (ip.ip_v != 4) 240 return 0; 241 242 ia6 = stf_getsrcifa6(&sc->sc_if); 243 if (ia6 == NULL) 244 return 0; 245 246 /* 247 * check if IPv4 dst matches the IPv4 address derived from the 248 * local 6to4 address. 249 * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:... 250 */ 251 if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst, 252 sizeof(ip.ip_dst)) != 0) 253 return 0; 254 255 /* 256 * check if IPv4 src matches the IPv4 address derived from the 257 * local 6to4 address masked by prefixmask. 258 * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 259 * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 260 */ 261 bzero(&a, sizeof(a)); 262 a.s_addr = GET_V4(&ia6->ia_addr.sin6_addr)->s_addr; 263 a.s_addr &= GET_V4(&ia6->ia_prefixmask.sin6_addr)->s_addr; 264 b = ip.ip_src; 265 b.s_addr &= GET_V4(&ia6->ia_prefixmask.sin6_addr)->s_addr; 266 if (a.s_addr != b.s_addr) 267 return 0; 268 269 /* stf interface makes single side match only */ 270 return 32; 271 } 272 273 static struct in6_ifaddr * 274 stf_getsrcifa6(struct ifnet *ifp) 275 { 276 struct ifaddr_container *ifac; 277 struct sockaddr_in6 *sin6; 278 struct in_addr in; 279 280 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 281 struct ifaddr *ia = ifac->ifa; 282 struct in_ifaddr_container *iac; 283 284 if (ia->ifa_addr == NULL) 285 continue; 286 if (ia->ifa_addr->sa_family != AF_INET6) 287 continue; 288 sin6 = (struct sockaddr_in6 *)ia->ifa_addr; 289 if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) 290 continue; 291 292 bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in)); 293 LIST_FOREACH(iac, INADDR_HASH(in.s_addr), ia_hash) { 294 if (iac->ia->ia_addr.sin_addr.s_addr == in.s_addr) 295 break; 296 } 297 if (iac == NULL) 298 continue; 299 300 return (struct in6_ifaddr *)ia; 301 } 302 303 return NULL; 304 } 305 306 static int 307 stf_output_serialized(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 308 struct rtentry *rt) 309 { 310 struct stf_softc *sc; 311 struct sockaddr_in6 *dst6; 312 struct in_addr *in4; 313 struct sockaddr_in *dst4; 314 u_int8_t tos; 315 struct ip *ip; 316 struct ip6_hdr *ip6; 317 struct in6_ifaddr *ia6; 318 static const uint32_t af = AF_INET6; 319 320 sc = (struct stf_softc*)ifp; 321 dst6 = (struct sockaddr_in6 *)dst; 322 323 /* just in case */ 324 if ((ifp->if_flags & IFF_UP) == 0) { 325 m_freem(m); 326 return ENETDOWN; 327 } 328 329 /* 330 * If we don't have an ip4 address that match my inner ip6 address, 331 * we shouldn't generate output. Without this check, we'll end up 332 * using wrong IPv4 source. 333 */ 334 ia6 = stf_getsrcifa6(ifp); 335 if (ia6 == NULL) { 336 m_freem(m); 337 return ENETDOWN; 338 } 339 340 if (m->m_len < sizeof(*ip6)) { 341 m = m_pullup(m, sizeof(*ip6)); 342 if (!m) 343 return ENOBUFS; 344 } 345 ip6 = mtod(m, struct ip6_hdr *); 346 tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 347 348 /* 349 * Pickup the right outer dst addr from the list of candidates. 350 * ip6_dst has priority as it may be able to give us shorter IPv4 hops. 351 */ 352 if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst)) 353 in4 = GET_V4(&ip6->ip6_dst); 354 else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr)) 355 in4 = GET_V4(&dst6->sin6_addr); 356 else { 357 m_freem(m); 358 return ENETUNREACH; 359 } 360 361 if (ifp->if_bpf) 362 bpf_ptap(ifp->if_bpf, m, &af, sizeof(af)); 363 364 M_PREPEND(m, sizeof(struct ip), MB_DONTWAIT); 365 if (m && m->m_len < sizeof(struct ip)) 366 m = m_pullup(m, sizeof(struct ip)); 367 if (m == NULL) 368 return ENOBUFS; 369 ip = mtod(m, struct ip *); 370 371 bzero(ip, sizeof(*ip)); 372 373 bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr), 374 &ip->ip_src, sizeof(ip->ip_src)); 375 bcopy(in4, &ip->ip_dst, sizeof(ip->ip_dst)); 376 ip->ip_p = IPPROTO_IPV6; 377 ip->ip_ttl = ip_stf_ttl; 378 ip->ip_len = m->m_pkthdr.len; /*host order*/ 379 if (ifp->if_flags & IFF_LINK1) 380 ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos); 381 else 382 ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos); 383 384 dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst; 385 if (dst4->sin_family != AF_INET || 386 bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) { 387 /* cache route doesn't match */ 388 dst4->sin_family = AF_INET; 389 dst4->sin_len = sizeof(struct sockaddr_in); 390 bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr)); 391 if (sc->sc_ro.ro_rt) { 392 RTFREE(sc->sc_ro.ro_rt); 393 sc->sc_ro.ro_rt = NULL; 394 } 395 } 396 397 if (sc->sc_ro.ro_rt == NULL) { 398 rtalloc(&sc->sc_ro); 399 if (sc->sc_ro.ro_rt == NULL) { 400 m_freem(m); 401 return ENETUNREACH; 402 } 403 } 404 405 return ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL); 406 } 407 408 static int 409 stf_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 410 struct rtentry *rt) 411 { 412 int error; 413 414 lwkt_serialize_enter(ifp->if_serializer); 415 error = stf_output_serialized(ifp, m, dst, rt); 416 lwkt_serialize_exit(ifp->if_serializer); 417 418 return error; 419 } 420 421 /* 422 * Parameters: 423 * inifp: incoming interface 424 */ 425 static int 426 stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp) 427 { 428 struct in_ifaddr_container *iac; 429 430 /* 431 * reject packets with the following address: 432 * 224.0.0.0/4 0.0.0.0/8 127.0.0.0/8 255.0.0.0/8 433 */ 434 if (IN_MULTICAST(ntohl(in->s_addr))) 435 return -1; 436 switch ((ntohl(in->s_addr) & 0xff000000) >> 24) { 437 case 0: case 127: case 255: 438 return -1; 439 } 440 441 /* 442 * reject packets with broadcast 443 */ 444 TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) { 445 struct in_ifaddr *ia4 = iac->ia; 446 447 if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) 448 continue; 449 if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) 450 return -1; 451 } 452 453 /* 454 * perform ingress filter 455 */ 456 if (sc && (sc->sc_if.if_flags & IFF_LINK2) == 0 && inifp) { 457 struct sockaddr_in sin; 458 struct rtentry *rt; 459 460 bzero(&sin, sizeof(sin)); 461 sin.sin_family = AF_INET; 462 sin.sin_len = sizeof(struct sockaddr_in); 463 sin.sin_addr = *in; 464 rt = rtpurelookup((struct sockaddr *)&sin); 465 if (!rt || rt->rt_ifp != inifp) { 466 #if 0 467 log(LOG_WARNING, "%s: packet from 0x%x dropped " 468 "due to ingress filter\n", if_name(&sc->sc_if), 469 (u_int32_t)ntohl(sin.sin_addr.s_addr)); 470 #endif 471 if (rt) 472 rtfree(rt); 473 return -1; 474 } 475 rtfree(rt); 476 } 477 478 return 0; 479 } 480 481 /* 482 * Parameters: 483 * inifp: incoming interface 484 */ 485 static int 486 stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp) 487 { 488 /* 489 * check 6to4 addresses 490 */ 491 if (IN6_IS_ADDR_6TO4(in6)) 492 return stf_checkaddr4(sc, GET_V4(in6), inifp); 493 494 /* 495 * reject anything that look suspicious. the test is implemented 496 * in ip6_input too, but we check here as well to 497 * (1) reject bad packets earlier, and 498 * (2) to be safe against future ip6_input change. 499 */ 500 if (IN6_IS_ADDR_V4COMPAT(in6) || IN6_IS_ADDR_V4MAPPED(in6)) 501 return -1; 502 503 return 0; 504 } 505 506 void 507 in_stf_input(struct mbuf *m, ...) 508 { 509 struct stf_softc *sc; 510 struct ip *ip; 511 struct ip6_hdr *ip6; 512 u_int8_t otos, itos; 513 struct ifnet *ifp; 514 int off, proto; 515 static const uint32_t af = AF_INET6; 516 __va_list ap; 517 518 __va_start(ap, m); 519 off = __va_arg(ap, int); 520 proto = __va_arg(ap, int); 521 __va_end(ap); 522 523 if (proto != IPPROTO_IPV6) { 524 m_freem(m); 525 return; 526 } 527 528 ip = mtod(m, struct ip *); 529 530 sc = (struct stf_softc *)encap_getarg(m); 531 532 if (sc == NULL || (sc->sc_if.if_flags & IFF_UP) == 0) { 533 m_freem(m); 534 return; 535 } 536 537 ifp = &sc->sc_if; 538 539 /* 540 * perform sanity check against outer src/dst. 541 * for source, perform ingress filter as well. 542 */ 543 if (stf_checkaddr4(sc, &ip->ip_dst, NULL) < 0 || 544 stf_checkaddr4(sc, &ip->ip_src, m->m_pkthdr.rcvif) < 0) { 545 m_freem(m); 546 return; 547 } 548 549 otos = ip->ip_tos; 550 m_adj(m, off); 551 552 if (m->m_len < sizeof(*ip6)) { 553 m = m_pullup(m, sizeof(*ip6)); 554 if (!m) 555 return; 556 } 557 ip6 = mtod(m, struct ip6_hdr *); 558 559 /* 560 * perform sanity check against inner src/dst. 561 * for source, perform ingress filter as well. 562 */ 563 if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 || 564 stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) { 565 m_freem(m); 566 return; 567 } 568 569 itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 570 if ((ifp->if_flags & IFF_LINK1) != 0) 571 ip_ecn_egress(ECN_ALLOWED, &otos, &itos); 572 else 573 ip_ecn_egress(ECN_NOCARE, &otos, &itos); 574 ip6->ip6_flow &= ~htonl(0xff << 20); 575 ip6->ip6_flow |= htonl((u_int32_t)itos << 20); 576 577 m->m_pkthdr.rcvif = ifp; 578 579 if (ifp->if_bpf) 580 bpf_ptap(ifp->if_bpf, m, &af, sizeof(af)); 581 582 /* 583 * Put the packet to the network layer input queue according to the 584 * specified address family. 585 * See net/if_gif.c for possible issues with packet processing 586 * reorder due to extra queueing. 587 */ 588 ifp->if_ipackets++; 589 ifp->if_ibytes += m->m_pkthdr.len; 590 netisr_dispatch(NETISR_IPV6, m); 591 } 592 593 /* ARGSUSED */ 594 static void 595 stf_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) 596 { 597 598 if (rt) 599 rt->rt_rmx.rmx_mtu = IPV6_MMTU; 600 } 601 602 static int 603 stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 604 { 605 struct ifaddr *ifa; 606 struct ifreq *ifr; 607 struct sockaddr_in6 *sin6; 608 int error; 609 610 error = 0; 611 switch (cmd) { 612 case SIOCSIFADDR: 613 ifa = (struct ifaddr *)data; 614 if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) { 615 error = EAFNOSUPPORT; 616 break; 617 } 618 sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; 619 if (IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) { 620 ifa->ifa_rtrequest = stf_rtrequest; 621 ifp->if_flags |= IFF_UP; 622 } else 623 error = EINVAL; 624 break; 625 626 case SIOCADDMULTI: 627 case SIOCDELMULTI: 628 ifr = (struct ifreq *)data; 629 if (ifr && ifr->ifr_addr.sa_family == AF_INET6) 630 ; 631 else 632 error = EAFNOSUPPORT; 633 break; 634 635 default: 636 error = EINVAL; 637 break; 638 } 639 640 return error; 641 } 642