1 /* $FreeBSD: src/sys/net/if_stf.c,v 1.1.2.11 2003/01/23 21:06:44 sam Exp $ */ 2 /* $DragonFly: src/sys/net/stf/if_stf.c,v 1.21 2008/03/07 11:34:20 sephe Exp $ */ 3 /* $KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $ */ 4 5 /* 6 * Copyright (C) 2000 WIDE Project. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the project nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * 6to4 interface, based on RFC3056. 36 * 37 * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting. 38 * There is no address mapping defined from IPv6 multicast address to IPv4 39 * address. Therefore, we do not have IFF_MULTICAST on the interface. 40 * 41 * Due to the lack of address mapping for link-local addresses, we cannot 42 * throw packets toward link-local addresses (fe80::x). Also, we cannot throw 43 * packets to link-local multicast addresses (ff02::x). 44 * 45 * Here are interesting symptoms due to the lack of link-local address: 46 * 47 * Unicast routing exchange: 48 * - RIPng: Impossible. Uses link-local multicast packet toward ff02::9, 49 * and link-local addresses as nexthop. 50 * - OSPFv6: Impossible. OSPFv6 assumes that there's link-local address 51 * assigned to the link, and makes use of them. Also, HELLO packets use 52 * link-local multicast addresses (ff02::5 and ff02::6). 53 * - BGP4+: Maybe. You can only use global address as nexthop, and global 54 * address as TCP endpoint address. 55 * 56 * Multicast routing protocols: 57 * - PIM: Hello packet cannot be used to discover adjacent PIM routers. 58 * Adjacent PIM routers must be configured manually (is it really spec-wise 59 * correct thing to do?). 60 * 61 * ICMPv6: 62 * - Redirects cannot be used due to the lack of link-local address. 63 * 64 * stf interface does not have, and will not need, a link-local address. 65 * It seems to have no real benefit and does not help the above symptoms much. 66 * Even if we assign link-locals to interface, we cannot really 67 * use link-local unicast/multicast on top of 6to4 cloud (since there's no 68 * encapsulation defined for link-local address), and the above analysis does 69 * not change. RFC3056 does not mandate the assignment of link-local address 70 * either. 71 * 72 * 6to4 interface has security issues. Refer to 73 * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt 74 * for details. The code tries to filter out some of malicious packets. 75 * Note that there is no way to be 100% secure. 76 */ 77 78 #include "opt_inet.h" 79 #include "opt_inet6.h" 80 81 #include <sys/param.h> 82 #include <sys/systm.h> 83 #include <sys/socket.h> 84 #include <sys/sockio.h> 85 #include <sys/mbuf.h> 86 #include <sys/errno.h> 87 #include <sys/protosw.h> 88 #include <sys/kernel.h> 89 #include <machine/cpu.h> 90 91 #include <sys/malloc.h> 92 93 #include <net/if.h> 94 #include <net/route.h> 95 #include <net/netisr.h> 96 #include <net/if_types.h> 97 #include "if_stf.h" 98 99 #include <netinet/in.h> 100 #include <netinet/in_systm.h> 101 #include <netinet/ip.h> 102 #include <netinet/ip_var.h> 103 #include <netinet/in_var.h> 104 105 #include <netinet/ip6.h> 106 #include <netinet6/ip6_var.h> 107 #include <netinet6/in6_var.h> 108 #include <netinet/ip_ecn.h> 109 110 #include <netinet/ip_encap.h> 111 112 #include <machine/stdarg.h> 113 114 #include <net/net_osdep.h> 115 116 #include <net/bpf.h> 117 118 #define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002) 119 #define GET_V4(x) ((struct in_addr *)(&(x)->s6_addr16[1])) 120 121 struct stf_softc { 122 struct ifnet sc_if; /* common area */ 123 union { 124 struct route __sc_ro4; 125 struct route_in6 __sc_ro6; /* just for safety */ 126 } __sc_ro46; 127 #define sc_ro __sc_ro46.__sc_ro4 128 const struct encaptab *encap_cookie; 129 }; 130 131 static struct stf_softc *stf; 132 133 static MALLOC_DEFINE(M_STF, "stf", "6to4 Tunnel Interface"); 134 static int ip_stf_ttl = 40; 135 136 extern struct domain inetdomain; 137 struct protosw in_stf_protosw = 138 { SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, 139 in_stf_input, rip_output, 0, rip_ctloutput, 140 0, 141 0, 0, 0, 0, 142 &rip_usrreqs 143 }; 144 145 static int stfmodevent (module_t, int, void *); 146 static int stf_encapcheck (const struct mbuf *, int, int, void *); 147 static struct in6_ifaddr *stf_getsrcifa6 (struct ifnet *); 148 static int stf_output (struct ifnet *, struct mbuf *, struct sockaddr *, 149 struct rtentry *); 150 static int stf_checkaddr4 (struct stf_softc *, struct in_addr *, 151 struct ifnet *); 152 static int stf_checkaddr6 (struct stf_softc *, struct in6_addr *, 153 struct ifnet *); 154 static void stf_rtrequest (int, struct rtentry *, struct rt_addrinfo *); 155 static int stf_ioctl (struct ifnet *, u_long, caddr_t, struct ucred *); 156 157 static int 158 stfmodevent(module_t mod, int type, void *data) 159 { 160 struct stf_softc *sc; 161 int err; 162 const struct encaptab *p; 163 164 switch (type) { 165 case MOD_LOAD: 166 stf = kmalloc(sizeof(struct stf_softc), M_STF, 167 M_WAITOK | M_ZERO); 168 sc = stf; 169 170 bzero(sc, sizeof(*sc)); 171 if_initname(&(sc->sc_if), "stf", 0); 172 173 p = encap_attach_func(AF_INET, IPPROTO_IPV6, stf_encapcheck, 174 (void *)&in_stf_protosw, sc); 175 if (p == NULL) { 176 kprintf("%s: attach failed\n", if_name(&sc->sc_if)); 177 return (ENOMEM); 178 } 179 sc->encap_cookie = p; 180 181 sc->sc_if.if_mtu = IPV6_MMTU; 182 sc->sc_if.if_flags = 0; 183 sc->sc_if.if_ioctl = stf_ioctl; 184 sc->sc_if.if_output = stf_output; 185 sc->sc_if.if_type = IFT_STF; 186 #if 0 187 /* turn off ingress filter */ 188 sc->sc_if.if_flags |= IFF_LINK2; 189 #endif 190 sc->sc_if.if_snd.ifq_maxlen = IFQ_MAXLEN; 191 if_attach(&sc->sc_if, NULL); 192 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int)); 193 break; 194 case MOD_UNLOAD: 195 sc = stf; 196 bpfdetach(&sc->sc_if); 197 if_detach(&sc->sc_if); 198 err = encap_detach(sc->encap_cookie); 199 KASSERT(err == 0, ("Unexpected error detaching encap_cookie")); 200 kfree(sc, M_STF); 201 break; 202 } 203 204 return (0); 205 } 206 207 static moduledata_t stf_mod = { 208 "if_stf", 209 stfmodevent, 210 0 211 }; 212 213 DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 214 215 static int 216 stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg) 217 { 218 struct ip ip; 219 struct in6_ifaddr *ia6; 220 struct stf_softc *sc; 221 struct in_addr a, b; 222 223 sc = (struct stf_softc *)arg; 224 if (sc == NULL) 225 return 0; 226 227 if ((sc->sc_if.if_flags & IFF_UP) == 0) 228 return 0; 229 230 /* IFF_LINK0 means "no decapsulation" */ 231 if ((sc->sc_if.if_flags & IFF_LINK0) != 0) 232 return 0; 233 234 if (proto != IPPROTO_IPV6) 235 return 0; 236 237 m_copydata(m, 0, sizeof(ip), (caddr_t)&ip); 238 239 if (ip.ip_v != 4) 240 return 0; 241 242 ia6 = stf_getsrcifa6(&sc->sc_if); 243 if (ia6 == NULL) 244 return 0; 245 246 /* 247 * check if IPv4 dst matches the IPv4 address derived from the 248 * local 6to4 address. 249 * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:... 250 */ 251 if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst, 252 sizeof(ip.ip_dst)) != 0) 253 return 0; 254 255 /* 256 * check if IPv4 src matches the IPv4 address derived from the 257 * local 6to4 address masked by prefixmask. 258 * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 259 * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 260 */ 261 bzero(&a, sizeof(a)); 262 a.s_addr = GET_V4(&ia6->ia_addr.sin6_addr)->s_addr; 263 a.s_addr &= GET_V4(&ia6->ia_prefixmask.sin6_addr)->s_addr; 264 b = ip.ip_src; 265 b.s_addr &= GET_V4(&ia6->ia_prefixmask.sin6_addr)->s_addr; 266 if (a.s_addr != b.s_addr) 267 return 0; 268 269 /* stf interface makes single side match only */ 270 return 32; 271 } 272 273 static struct in6_ifaddr * 274 stf_getsrcifa6(struct ifnet *ifp) 275 { 276 struct ifaddr_container *ifac; 277 struct in_ifaddr *ia4; 278 struct sockaddr_in6 *sin6; 279 struct in_addr in; 280 281 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 282 struct ifaddr *ia = ifac->ifa; 283 284 if (ia->ifa_addr == NULL) 285 continue; 286 if (ia->ifa_addr->sa_family != AF_INET6) 287 continue; 288 sin6 = (struct sockaddr_in6 *)ia->ifa_addr; 289 if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) 290 continue; 291 292 bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in)); 293 LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash) 294 if (ia4->ia_addr.sin_addr.s_addr == in.s_addr) 295 break; 296 if (ia4 == NULL) 297 continue; 298 299 return (struct in6_ifaddr *)ia; 300 } 301 302 return NULL; 303 } 304 305 static int 306 stf_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 307 struct rtentry *rt) 308 { 309 struct stf_softc *sc; 310 struct sockaddr_in6 *dst6; 311 struct in_addr *in4; 312 struct sockaddr_in *dst4; 313 u_int8_t tos; 314 struct ip *ip; 315 struct ip6_hdr *ip6; 316 struct in6_ifaddr *ia6; 317 static const uint32_t af = AF_INET6; 318 319 sc = (struct stf_softc*)ifp; 320 dst6 = (struct sockaddr_in6 *)dst; 321 322 /* just in case */ 323 if ((ifp->if_flags & IFF_UP) == 0) { 324 m_freem(m); 325 return ENETDOWN; 326 } 327 328 /* 329 * If we don't have an ip4 address that match my inner ip6 address, 330 * we shouldn't generate output. Without this check, we'll end up 331 * using wrong IPv4 source. 332 */ 333 ia6 = stf_getsrcifa6(ifp); 334 if (ia6 == NULL) { 335 m_freem(m); 336 return ENETDOWN; 337 } 338 339 if (m->m_len < sizeof(*ip6)) { 340 m = m_pullup(m, sizeof(*ip6)); 341 if (!m) 342 return ENOBUFS; 343 } 344 ip6 = mtod(m, struct ip6_hdr *); 345 tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 346 347 /* 348 * Pickup the right outer dst addr from the list of candidates. 349 * ip6_dst has priority as it may be able to give us shorter IPv4 hops. 350 */ 351 if (IN6_IS_ADDR_6TO4(&ip6->ip6_dst)) 352 in4 = GET_V4(&ip6->ip6_dst); 353 else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr)) 354 in4 = GET_V4(&dst6->sin6_addr); 355 else { 356 m_freem(m); 357 return ENETUNREACH; 358 } 359 360 if (ifp->if_bpf) 361 bpf_ptap(ifp->if_bpf, m, &af, sizeof(af)); 362 363 M_PREPEND(m, sizeof(struct ip), MB_DONTWAIT); 364 if (m && m->m_len < sizeof(struct ip)) 365 m = m_pullup(m, sizeof(struct ip)); 366 if (m == NULL) 367 return ENOBUFS; 368 ip = mtod(m, struct ip *); 369 370 bzero(ip, sizeof(*ip)); 371 372 bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr), 373 &ip->ip_src, sizeof(ip->ip_src)); 374 bcopy(in4, &ip->ip_dst, sizeof(ip->ip_dst)); 375 ip->ip_p = IPPROTO_IPV6; 376 ip->ip_ttl = ip_stf_ttl; 377 ip->ip_len = m->m_pkthdr.len; /*host order*/ 378 if (ifp->if_flags & IFF_LINK1) 379 ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos); 380 else 381 ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos); 382 383 dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst; 384 if (dst4->sin_family != AF_INET || 385 bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) { 386 /* cache route doesn't match */ 387 dst4->sin_family = AF_INET; 388 dst4->sin_len = sizeof(struct sockaddr_in); 389 bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr)); 390 if (sc->sc_ro.ro_rt) { 391 RTFREE(sc->sc_ro.ro_rt); 392 sc->sc_ro.ro_rt = NULL; 393 } 394 } 395 396 if (sc->sc_ro.ro_rt == NULL) { 397 rtalloc(&sc->sc_ro); 398 if (sc->sc_ro.ro_rt == NULL) { 399 m_freem(m); 400 return ENETUNREACH; 401 } 402 } 403 404 return ip_output(m, NULL, &sc->sc_ro, 0, NULL, NULL); 405 } 406 407 /* 408 * Parameters: 409 * inifp: incoming interface 410 */ 411 static int 412 stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp) 413 { 414 struct in_ifaddr *ia4; 415 416 /* 417 * reject packets with the following address: 418 * 224.0.0.0/4 0.0.0.0/8 127.0.0.0/8 255.0.0.0/8 419 */ 420 if (IN_MULTICAST(ntohl(in->s_addr))) 421 return -1; 422 switch ((ntohl(in->s_addr) & 0xff000000) >> 24) { 423 case 0: case 127: case 255: 424 return -1; 425 } 426 427 /* 428 * reject packets with broadcast 429 */ 430 TAILQ_FOREACH(ia4, &in_ifaddrhead, ia_link) 431 { 432 if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) 433 continue; 434 if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) 435 return -1; 436 } 437 438 /* 439 * perform ingress filter 440 */ 441 if (sc && (sc->sc_if.if_flags & IFF_LINK2) == 0 && inifp) { 442 struct sockaddr_in sin; 443 struct rtentry *rt; 444 445 bzero(&sin, sizeof(sin)); 446 sin.sin_family = AF_INET; 447 sin.sin_len = sizeof(struct sockaddr_in); 448 sin.sin_addr = *in; 449 rt = rtpurelookup((struct sockaddr *)&sin); 450 if (!rt || rt->rt_ifp != inifp) { 451 #if 0 452 log(LOG_WARNING, "%s: packet from 0x%x dropped " 453 "due to ingress filter\n", if_name(&sc->sc_if), 454 (u_int32_t)ntohl(sin.sin_addr.s_addr)); 455 #endif 456 if (rt) 457 rtfree(rt); 458 return -1; 459 } 460 rtfree(rt); 461 } 462 463 return 0; 464 } 465 466 /* 467 * Parameters: 468 * inifp: incoming interface 469 */ 470 static int 471 stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp) 472 { 473 /* 474 * check 6to4 addresses 475 */ 476 if (IN6_IS_ADDR_6TO4(in6)) 477 return stf_checkaddr4(sc, GET_V4(in6), inifp); 478 479 /* 480 * reject anything that look suspicious. the test is implemented 481 * in ip6_input too, but we check here as well to 482 * (1) reject bad packets earlier, and 483 * (2) to be safe against future ip6_input change. 484 */ 485 if (IN6_IS_ADDR_V4COMPAT(in6) || IN6_IS_ADDR_V4MAPPED(in6)) 486 return -1; 487 488 return 0; 489 } 490 491 void 492 in_stf_input(struct mbuf *m, ...) 493 { 494 struct stf_softc *sc; 495 struct ip *ip; 496 struct ip6_hdr *ip6; 497 u_int8_t otos, itos; 498 struct ifnet *ifp; 499 int off, proto; 500 static const uint32_t af = AF_INET6; 501 __va_list ap; 502 503 __va_start(ap, m); 504 off = __va_arg(ap, int); 505 proto = __va_arg(ap, int); 506 __va_end(ap); 507 508 if (proto != IPPROTO_IPV6) { 509 m_freem(m); 510 return; 511 } 512 513 ip = mtod(m, struct ip *); 514 515 sc = (struct stf_softc *)encap_getarg(m); 516 517 if (sc == NULL || (sc->sc_if.if_flags & IFF_UP) == 0) { 518 m_freem(m); 519 return; 520 } 521 522 ifp = &sc->sc_if; 523 524 /* 525 * perform sanity check against outer src/dst. 526 * for source, perform ingress filter as well. 527 */ 528 if (stf_checkaddr4(sc, &ip->ip_dst, NULL) < 0 || 529 stf_checkaddr4(sc, &ip->ip_src, m->m_pkthdr.rcvif) < 0) { 530 m_freem(m); 531 return; 532 } 533 534 otos = ip->ip_tos; 535 m_adj(m, off); 536 537 if (m->m_len < sizeof(*ip6)) { 538 m = m_pullup(m, sizeof(*ip6)); 539 if (!m) 540 return; 541 } 542 ip6 = mtod(m, struct ip6_hdr *); 543 544 /* 545 * perform sanity check against inner src/dst. 546 * for source, perform ingress filter as well. 547 */ 548 if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 || 549 stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) { 550 m_freem(m); 551 return; 552 } 553 554 itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 555 if ((ifp->if_flags & IFF_LINK1) != 0) 556 ip_ecn_egress(ECN_ALLOWED, &otos, &itos); 557 else 558 ip_ecn_egress(ECN_NOCARE, &otos, &itos); 559 ip6->ip6_flow &= ~htonl(0xff << 20); 560 ip6->ip6_flow |= htonl((u_int32_t)itos << 20); 561 562 m->m_pkthdr.rcvif = ifp; 563 564 if (ifp->if_bpf) 565 bpf_ptap(ifp->if_bpf, m, &af, sizeof(af)); 566 567 /* 568 * Put the packet to the network layer input queue according to the 569 * specified address family. 570 * See net/if_gif.c for possible issues with packet processing 571 * reorder due to extra queueing. 572 */ 573 ifp->if_ipackets++; 574 ifp->if_ibytes += m->m_pkthdr.len; 575 netisr_dispatch(NETISR_IPV6, m); 576 } 577 578 /* ARGSUSED */ 579 static void 580 stf_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) 581 { 582 583 if (rt) 584 rt->rt_rmx.rmx_mtu = IPV6_MMTU; 585 } 586 587 static int 588 stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 589 { 590 struct ifaddr *ifa; 591 struct ifreq *ifr; 592 struct sockaddr_in6 *sin6; 593 int error; 594 595 error = 0; 596 switch (cmd) { 597 case SIOCSIFADDR: 598 ifa = (struct ifaddr *)data; 599 if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) { 600 error = EAFNOSUPPORT; 601 break; 602 } 603 sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; 604 if (IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) { 605 ifa->ifa_rtrequest = stf_rtrequest; 606 ifp->if_flags |= IFF_UP; 607 } else 608 error = EINVAL; 609 break; 610 611 case SIOCADDMULTI: 612 case SIOCDELMULTI: 613 ifr = (struct ifreq *)data; 614 if (ifr && ifr->ifr_addr.sa_family == AF_INET6) 615 ; 616 else 617 error = EAFNOSUPPORT; 618 break; 619 620 default: 621 error = EINVAL; 622 break; 623 } 624 625 return error; 626 } 627