1 /* $NetBSD: if_mpls.c,v 1.22 2016/04/28 00:16:56 ozaki-r Exp $ */ 2 3 /* 4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Mihai Chelaru <kefren@NetBSD.org> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.22 2016/04/28 00:16:56 ozaki-r Exp $"); 34 35 #ifdef _KERNEL_OPT 36 #include "opt_inet.h" 37 #include "opt_mpls.h" 38 #endif 39 40 #include <sys/param.h> 41 42 #include <sys/errno.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/sysctl.h> 46 47 #include <net/bpf.h> 48 #include <net/if.h> 49 #include <net/if_types.h> 50 #include <net/netisr.h> 51 #include <net/route.h> 52 53 #ifdef INET 54 #include <netinet/in.h> 55 #include <netinet/in_systm.h> 56 #include <netinet/in_var.h> 57 #include <netinet/ip.h> 58 #include <netinet/ip_var.h> 59 #endif 60 61 #ifdef INET6 62 #include <netinet/ip6.h> 63 #include <netinet6/in6_var.h> 64 #include <netinet6/ip6_var.h> 65 #endif 66 67 #include <netmpls/mpls.h> 68 #include <netmpls/mpls_var.h> 69 70 #include "if_mpls.h" 71 72 #include "ioconf.h" 73 74 #define TRIM_LABEL do { \ 75 m_adj(m, sizeof(union mpls_shim)); \ 76 if (m->m_len < sizeof(union mpls_shim) && \ 77 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) \ 78 goto done; \ 79 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); \ 80 } while (/* CONSTCOND */ 0) 81 82 83 static int mpls_clone_create(struct if_clone *, int); 84 static int mpls_clone_destroy(struct ifnet *); 85 86 static struct if_clone mpls_if_cloner = 87 IF_CLONE_INITIALIZER("mpls", mpls_clone_create, mpls_clone_destroy); 88 89 90 static void mpls_input(struct ifnet *, struct mbuf *); 91 static int mpls_output(struct ifnet *, struct mbuf *, const struct sockaddr *, 92 const struct rtentry *); 93 static int mpls_ioctl(struct ifnet *, u_long, void *); 94 static int mpls_send_frame(struct mbuf *, struct ifnet *, 95 const struct rtentry *); 96 static int mpls_lse(struct mbuf *); 97 98 #ifdef INET 99 static int mpls_unlabel_inet(struct mbuf *); 100 static struct mbuf *mpls_label_inet(struct mbuf *, union mpls_shim *, uint); 101 #endif 102 103 #ifdef INET6 104 static int mpls_unlabel_inet6(struct mbuf *); 105 static struct mbuf *mpls_label_inet6(struct mbuf *, union mpls_shim *, uint); 106 #endif 107 108 static struct mbuf *mpls_prepend_shim(struct mbuf *, union mpls_shim *); 109 110 extern int mpls_defttl, mpls_mapttl_inet, mpls_mapttl_inet6, mpls_icmp_respond, 111 mpls_forwarding, mpls_frame_accept, mpls_mapprec_inet, mpls_mapclass_inet6, 112 mpls_rfc4182; 113 114 /* ARGSUSED */ 115 void 116 ifmplsattach(int count) 117 { 118 if_clone_attach(&mpls_if_cloner); 119 } 120 121 static int 122 mpls_clone_create(struct if_clone *ifc, int unit) 123 { 124 struct mpls_softc *sc; 125 126 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 127 128 if_initname(&sc->sc_if, ifc->ifc_name, unit); 129 sc->sc_if.if_softc = sc; 130 sc->sc_if.if_type = IFT_MPLS; 131 sc->sc_if.if_addrlen = 0; 132 sc->sc_if.if_hdrlen = sizeof(union mpls_shim); 133 sc->sc_if.if_dlt = DLT_NULL; 134 sc->sc_if.if_mtu = 1500; 135 sc->sc_if.if_flags = 0; 136 sc->sc_if._if_input = mpls_input; 137 sc->sc_if.if_output = mpls_output; 138 sc->sc_if.if_ioctl = mpls_ioctl; 139 140 if_attach(&sc->sc_if); 141 if_alloc_sadl(&sc->sc_if); 142 bpf_attach(&sc->sc_if, DLT_NULL, sizeof(uint32_t)); 143 return 0; 144 } 145 146 static int 147 mpls_clone_destroy(struct ifnet *ifp) 148 { 149 int s; 150 151 bpf_detach(ifp); 152 153 s = splnet(); 154 if_detach(ifp); 155 splx(s); 156 157 free(ifp->if_softc, M_DEVBUF); 158 return 0; 159 } 160 161 static void 162 mpls_input(struct ifnet *ifp, struct mbuf *m) 163 { 164 #if 0 165 /* 166 * TODO - kefren 167 * I'd love to unshim the packet, guess family 168 * and pass it to bpf 169 */ 170 bpf_mtap_af(ifp, AF_MPLS, m); 171 #endif 172 173 mpls_lse(m); 174 } 175 176 void 177 mplsintr(void) 178 { 179 struct mbuf *m; 180 int s; 181 182 while (!IF_IS_EMPTY(&mplsintrq)) { 183 s = splnet(); 184 IF_DEQUEUE(&mplsintrq, m); 185 splx(s); 186 187 if (!m) 188 return; 189 190 if (((m->m_flags & M_PKTHDR) == 0) || 191 (m->m_pkthdr.rcvif == 0)) 192 panic("mplsintr(): no pkthdr or rcvif"); 193 194 #ifdef MBUFTRACE 195 m_claimm(m, &mpls_owner); 196 #endif 197 mpls_input(m->m_pkthdr.rcvif, m); 198 } 199 } 200 201 /* 202 * prepend shim and deliver 203 */ 204 static int 205 mpls_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 206 const struct rtentry *rt) 207 { 208 union mpls_shim mh, *pms; 209 struct rtentry *rt1; 210 int err; 211 uint psize = sizeof(struct sockaddr_mpls); 212 213 KASSERT(KERNEL_LOCKED_P()); 214 215 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { 216 m_freem(m); 217 return ENETDOWN; 218 } 219 220 if (rt_gettag(rt) == NULL || rt_gettag(rt)->sa_family != AF_MPLS) { 221 m_freem(m); 222 return EINVAL; 223 } 224 225 bpf_mtap_af(ifp, dst->sa_family, m); 226 227 memset(&mh, 0, sizeof(mh)); 228 mh.s_addr = MPLS_GETSADDR(rt); 229 mh.shim.bos = 1; 230 mh.shim.exp = 0; 231 mh.shim.ttl = mpls_defttl; 232 233 pms = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 234 235 while (psize <= rt_gettag(rt)->sa_len - sizeof(mh)) { 236 pms++; 237 if (mh.shim.label != MPLS_LABEL_IMPLNULL && 238 ((m = mpls_prepend_shim(m, &mh)) == NULL)) 239 return ENOBUFS; 240 memset(&mh, 0, sizeof(mh)); 241 mh.s_addr = ntohl(pms->s_addr); 242 mh.shim.bos = mh.shim.exp = 0; 243 mh.shim.ttl = mpls_defttl; 244 psize += sizeof(mh); 245 } 246 247 switch(dst->sa_family) { 248 #ifdef INET 249 case AF_INET: 250 m = mpls_label_inet(m, &mh, psize - sizeof(struct sockaddr_mpls)); 251 break; 252 #endif 253 #ifdef INET6 254 case AF_INET6: 255 m = mpls_label_inet6(m, &mh, psize - sizeof(struct sockaddr_mpls)); 256 break; 257 #endif 258 default: 259 m = mpls_prepend_shim(m, &mh); 260 break; 261 } 262 263 if (m == NULL) { 264 IF_DROP(&ifp->if_snd); 265 ifp->if_oerrors++; 266 return ENOBUFS; 267 } 268 269 ifp->if_opackets++; 270 ifp->if_obytes += m->m_pkthdr.len; 271 272 if ((rt1=rtalloc1(rt->rt_gateway, 1)) == NULL) { 273 m_freem(m); 274 return EHOSTUNREACH; 275 } 276 277 err = mpls_send_frame(m, rt1->rt_ifp, rt); 278 rtfree(rt1); 279 return err; 280 } 281 282 static int 283 mpls_ioctl(struct ifnet *ifp, u_long cmd, void *data) 284 { 285 int error = 0, s = splnet(); 286 struct ifreq *ifr = data; 287 288 switch(cmd) { 289 case SIOCINITIFADDR: 290 ifp->if_flags |= IFF_UP | IFF_RUNNING; 291 break; 292 case SIOCSIFMTU: 293 if (ifr != NULL && ifr->ifr_mtu < 576) { 294 error = EINVAL; 295 break; 296 } 297 /* FALLTHROUGH */ 298 case SIOCGIFMTU: 299 if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) 300 error = 0; 301 break; 302 case SIOCSIFFLAGS: 303 if ((error = ifioctl_common(ifp, cmd, data)) != 0) 304 break; 305 if (ifp->if_flags & IFF_UP) 306 ifp->if_flags |= IFF_RUNNING; 307 break; 308 default: 309 error = ifioctl_common(ifp, cmd, data); 310 break; 311 } 312 splx(s); 313 return error; 314 } 315 316 /* 317 * MPLS Label Switch Engine 318 */ 319 static int 320 mpls_lse(struct mbuf *m) 321 { 322 struct sockaddr_mpls dst; 323 union mpls_shim tshim, *htag; 324 struct rtentry *rt = NULL; 325 int error = ENOBUFS; 326 uint psize = sizeof(struct sockaddr_mpls); 327 bool push_back_alert = false; 328 329 if (m->m_len < sizeof(union mpls_shim) && 330 (m = m_pullup(m, sizeof(union mpls_shim))) == NULL) 331 goto done; 332 333 dst.smpls_len = sizeof(struct sockaddr_mpls); 334 dst.smpls_family = AF_MPLS; 335 dst.smpls_addr.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 336 337 /* Check if we're accepting MPLS Frames */ 338 error = EINVAL; 339 if (!mpls_frame_accept) 340 goto done; 341 342 /* TTL decrement */ 343 if ((m = mpls_ttl_dec(m)) == NULL) 344 goto done; 345 346 /* RFC 4182 */ 347 if (mpls_rfc4182 != 0) 348 while((dst.smpls_addr.shim.label == MPLS_LABEL_IPV4NULL || 349 dst.smpls_addr.shim.label == MPLS_LABEL_IPV6NULL) && 350 __predict_false(dst.smpls_addr.shim.bos == 0)) 351 TRIM_LABEL; 352 353 /* RFC 3032 Section 2.1 Page 4 */ 354 if (__predict_false(dst.smpls_addr.shim.label == MPLS_LABEL_RTALERT) && 355 dst.smpls_addr.shim.bos == 0) { 356 TRIM_LABEL; 357 push_back_alert = true; 358 } 359 360 if (dst.smpls_addr.shim.label <= MPLS_LABEL_RESMAX) { 361 /* Don't swap reserved labels */ 362 switch (dst.smpls_addr.shim.label) { 363 #ifdef INET 364 case MPLS_LABEL_IPV4NULL: 365 /* Pop shim and push mbuf to IP stack */ 366 if (dst.smpls_addr.shim.bos) 367 error = mpls_unlabel_inet(m); 368 break; 369 #endif 370 #ifdef INET6 371 case MPLS_LABEL_IPV6NULL: 372 /* Pop shim and push mbuf to IPv6 stack */ 373 if (dst.smpls_addr.shim.bos) 374 error = mpls_unlabel_inet6(m); 375 break; 376 #endif 377 case MPLS_LABEL_RTALERT: /* Yeah, I'm all alerted */ 378 case MPLS_LABEL_IMPLNULL: /* This is logical only */ 379 default: /* Rest are not allowed */ 380 break; 381 } 382 goto done; 383 } 384 385 /* Check if we should do MPLS forwarding */ 386 error = EHOSTUNREACH; 387 if (!mpls_forwarding) 388 goto done; 389 390 /* Get a route to dst */ 391 dst.smpls_addr.shim.ttl = 392 dst.smpls_addr.shim.bos = 393 dst.smpls_addr.shim.exp = 0; 394 dst.smpls_addr.s_addr = htonl(dst.smpls_addr.s_addr); 395 if ((rt = rtalloc1((const struct sockaddr*)&dst, 1)) == NULL) 396 goto done; 397 398 /* MPLS packet with no MPLS tagged route ? */ 399 if ((rt->rt_flags & RTF_GATEWAY) == 0 || 400 rt_gettag(rt) == NULL || 401 rt_gettag(rt)->sa_family != AF_MPLS) 402 goto done; 403 404 tshim.s_addr = MPLS_GETSADDR(rt); 405 406 /* Swap labels */ 407 if ((m->m_len < sizeof(union mpls_shim)) && 408 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) { 409 error = ENOBUFS; 410 goto done; 411 } 412 413 /* Replace only the label */ 414 htag = mtod(m, union mpls_shim *); 415 htag->s_addr = ntohl(htag->s_addr); 416 htag->shim.label = tshim.shim.label; 417 htag->s_addr = htonl(htag->s_addr); 418 419 /* check if there is anything more to prepend */ 420 htag = &((struct sockaddr_mpls*)rt_gettag(rt))->smpls_addr; 421 while (psize <= rt_gettag(rt)->sa_len - sizeof(tshim)) { 422 htag++; 423 memset(&tshim, 0, sizeof(tshim)); 424 tshim.s_addr = ntohl(htag->s_addr); 425 tshim.shim.bos = tshim.shim.exp = 0; 426 tshim.shim.ttl = mpls_defttl; 427 if (tshim.shim.label != MPLS_LABEL_IMPLNULL && 428 ((m = mpls_prepend_shim(m, &tshim)) == NULL)) 429 return ENOBUFS; 430 psize += sizeof(tshim); 431 } 432 433 if (__predict_false(push_back_alert == true)) { 434 /* re-add the router alert label */ 435 memset(&tshim, 0, sizeof(tshim)); 436 tshim.s_addr = MPLS_LABEL_RTALERT; 437 tshim.shim.bos = tshim.shim.exp = 0; 438 tshim.shim.ttl = mpls_defttl; 439 if ((m = mpls_prepend_shim(m, &tshim)) == NULL) 440 return ENOBUFS; 441 } 442 443 if ((rt->rt_flags & RTF_GATEWAY) == 0) { 444 error = EHOSTUNREACH; 445 goto done; 446 } 447 448 rt->rt_use++; 449 error = mpls_send_frame(m, rt->rt_ifp, rt); 450 451 done: 452 if (error != 0 && m != NULL) 453 m_freem(m); 454 if (rt != NULL) 455 rtfree(rt); 456 457 return error; 458 } 459 460 static int 461 mpls_send_frame(struct mbuf *m, struct ifnet *ifp, const struct rtentry *rt) 462 { 463 union mpls_shim msh; 464 int ret; 465 466 msh.s_addr = MPLS_GETSADDR(rt); 467 if (msh.shim.label == MPLS_LABEL_IMPLNULL || 468 (m->m_flags & (M_MCAST | M_BCAST))) { 469 m_adj(m, sizeof(union mpls_shim)); 470 m->m_pkthdr.csum_flags = 0; 471 } 472 473 switch(ifp->if_type) { 474 /* only these are supported for now */ 475 case IFT_ETHER: 476 case IFT_TUNNEL: 477 case IFT_LOOP: 478 #ifdef INET 479 ret = ip_if_output(ifp, m, rt->rt_gateway, rt); 480 #else 481 KERNEL_LOCK(1, NULL); 482 ret = (*ifp->if_output)(ifp, m, rt->rt_gateway, rt); 483 KERNEL_UNLOCK_ONE(NULL); 484 #endif 485 return ret; 486 break; 487 default: 488 return ENETUNREACH; 489 } 490 return 0; 491 } 492 493 494 495 #ifdef INET 496 static int 497 mpls_unlabel_inet(struct mbuf *m) 498 { 499 struct ip *iph; 500 union mpls_shim *ms; 501 int iphlen; 502 503 if (mpls_mapttl_inet || mpls_mapprec_inet) { 504 505 /* get shim info */ 506 ms = mtod(m, union mpls_shim *); 507 ms->s_addr = ntohl(ms->s_addr); 508 509 /* and get rid of it */ 510 m_adj(m, sizeof(union mpls_shim)); 511 512 /* get ip header */ 513 if (m->m_len < sizeof (struct ip) && 514 (m = m_pullup(m, sizeof(struct ip))) == NULL) 515 return ENOBUFS; 516 iph = mtod(m, struct ip *); 517 iphlen = iph->ip_hl << 2; 518 519 /* get it all */ 520 if (m->m_len < iphlen) { 521 if ((m = m_pullup(m, iphlen)) == NULL) 522 return ENOBUFS; 523 iph = mtod(m, struct ip *); 524 } 525 526 /* check ipsum */ 527 if (in_cksum(m, iphlen) != 0) { 528 m_freem(m); 529 return EINVAL; 530 } 531 532 /* set IP ttl from MPLS ttl */ 533 if (mpls_mapttl_inet) 534 iph->ip_ttl = ms->shim.ttl; 535 536 /* set IP Precedence from MPLS Exp */ 537 if (mpls_mapprec_inet) { 538 iph->ip_tos = (iph->ip_tos << 3) >> 3; 539 iph->ip_tos |= ms->shim.exp << 5; 540 } 541 542 /* reset ipsum because we modified TTL and TOS */ 543 iph->ip_sum = 0; 544 iph->ip_sum = in_cksum(m, iphlen); 545 } else 546 m_adj(m, sizeof(union mpls_shim)); 547 548 /* Put it on IP queue */ 549 if (__predict_false(!pktq_enqueue(ip_pktq, m, 0))) { 550 m_freem(m); 551 return ENOBUFS; 552 } 553 return 0; 554 } 555 556 /* 557 * Prepend MPLS label 558 */ 559 static struct mbuf * 560 mpls_label_inet(struct mbuf *m, union mpls_shim *ms, uint offset) 561 { 562 struct ip iphdr; 563 564 if (mpls_mapttl_inet || mpls_mapprec_inet) { 565 if ((m->m_len < sizeof(struct ip)) && 566 (m = m_pullup(m, offset + sizeof(struct ip))) == 0) 567 return NULL; /* XXX */ 568 m_copydata(m, offset, sizeof(struct ip), &iphdr); 569 570 /* Map TTL */ 571 if (mpls_mapttl_inet) 572 ms->shim.ttl = iphdr.ip_ttl; 573 574 /* Copy IP precedence to EXP */ 575 if (mpls_mapprec_inet) 576 ms->shim.exp = ((u_int8_t)iphdr.ip_tos) >> 5; 577 } 578 579 if ((m = mpls_prepend_shim(m, ms)) == NULL) 580 return NULL; 581 582 return m; 583 } 584 585 #endif /* INET */ 586 587 #ifdef INET6 588 589 static int 590 mpls_unlabel_inet6(struct mbuf *m) 591 { 592 struct ip6_hdr *ip6hdr; 593 union mpls_shim ms; 594 595 /* TODO: mapclass */ 596 if (mpls_mapttl_inet6) { 597 ms.s_addr = ntohl(mtod(m, union mpls_shim *)->s_addr); 598 m_adj(m, sizeof(union mpls_shim)); 599 600 if (m->m_len < sizeof (struct ip6_hdr) && 601 (m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) 602 return ENOBUFS; 603 ip6hdr = mtod(m, struct ip6_hdr *); 604 605 /* Because we just decremented this in mpls_lse */ 606 ip6hdr->ip6_hlim = ms.shim.ttl + 1; 607 } else 608 m_adj(m, sizeof(union mpls_shim)); 609 610 /* Put it back on IPv6 queue. */ 611 if (__predict_false(!pktq_enqueue(ip6_pktq, m, 0))) { 612 m_freem(m); 613 return ENOBUFS; 614 } 615 return 0; 616 } 617 618 static struct mbuf * 619 mpls_label_inet6(struct mbuf *m, union mpls_shim *ms, uint offset) 620 { 621 struct ip6_hdr ip6h; 622 623 if (mpls_mapttl_inet6 || mpls_mapclass_inet6) { 624 if (m->m_len < sizeof(struct ip6_hdr) && 625 (m = m_pullup(m, offset + sizeof(struct ip6_hdr))) == 0) 626 return NULL; 627 m_copydata(m, offset, sizeof(struct ip6_hdr), &ip6h); 628 629 if (mpls_mapttl_inet6) 630 ms->shim.ttl = ip6h.ip6_hlim; 631 632 if (mpls_mapclass_inet6) 633 ms->shim.exp = ip6h.ip6_vfc << 1 >> 5; 634 } 635 636 if ((m = mpls_prepend_shim(m, ms)) == NULL) 637 return NULL; 638 639 return m; 640 } 641 642 #endif /* INET6 */ 643 644 static struct mbuf * 645 mpls_prepend_shim(struct mbuf *m, union mpls_shim *ms) 646 { 647 union mpls_shim *shim; 648 649 M_PREPEND(m, sizeof(*ms), M_DONTWAIT); 650 if (m == NULL) 651 return NULL; 652 653 if (m->m_len < sizeof(union mpls_shim) && 654 (m = m_pullup(m, sizeof(union mpls_shim))) == 0) 655 return NULL; 656 657 shim = mtod(m, union mpls_shim *); 658 659 memcpy(shim, ms, sizeof(*shim)); 660 shim->s_addr = htonl(shim->s_addr); 661 662 return m; 663 } 664