1 /* $OpenBSD: if_mpe.c,v 1.94 2019/06/26 08:13:13 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2008 Pierre-Yves Ritschard <pyr@spootnik.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "mpe.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/mbuf.h> 23 #include <sys/socket.h> 24 #include <sys/sockio.h> 25 #include <sys/ioctl.h> 26 27 #include <net/if.h> 28 #include <net/if_dl.h> 29 #include <net/if_var.h> 30 #include <net/if_types.h> 31 #include <net/netisr.h> 32 #include <net/route.h> 33 34 #include <netinet/in.h> 35 #include <netinet/ip.h> 36 37 #ifdef INET6 38 #include <netinet/ip6.h> 39 #endif /* INET6 */ 40 41 #include "bpfilter.h" 42 #if NBPFILTER > 0 43 #include <net/bpf.h> 44 #endif 45 46 #include <netmpls/mpls.h> 47 48 49 50 #ifdef MPLS_DEBUG 51 #define DPRINTF(x) do { if (mpedebug) printf x ; } while (0) 52 #else 53 #define DPRINTF(x) 54 #endif 55 56 struct mpe_softc { 57 struct ifnet sc_if; /* the interface */ 58 int sc_txhprio; 59 int sc_rxhprio; 60 unsigned int sc_rdomain; 61 struct ifaddr sc_ifa; 62 struct sockaddr_mpls sc_smpls; 63 64 int sc_dead; 65 }; 66 67 #define MPE_HDRLEN sizeof(struct shim_hdr) 68 #define MPE_MTU 1500 69 #define MPE_MTU_MIN 256 70 #define MPE_MTU_MAX 8192 71 72 void mpeattach(int); 73 int mpe_output(struct ifnet *, struct mbuf *, struct sockaddr *, 74 struct rtentry *); 75 int mpe_ioctl(struct ifnet *, u_long, caddr_t); 76 void mpe_start(struct ifnet *); 77 int mpe_clone_create(struct if_clone *, int); 78 int mpe_clone_destroy(struct ifnet *); 79 void mpe_input(struct ifnet *, struct mbuf *); 80 81 struct if_clone mpe_cloner = 82 IF_CLONE_INITIALIZER("mpe", mpe_clone_create, mpe_clone_destroy); 83 84 extern int mpls_mapttl_ip; 85 #ifdef INET6 86 extern int mpls_mapttl_ip6; 87 #endif 88 89 void 90 mpeattach(int nmpe) 91 { 92 if_clone_attach(&mpe_cloner); 93 } 94 95 int 96 mpe_clone_create(struct if_clone *ifc, int unit) 97 { 98 struct mpe_softc *sc; 99 struct ifnet *ifp; 100 101 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); 102 if (sc == NULL) 103 return (ENOMEM); 104 105 ifp = &sc->sc_if; 106 snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit); 107 ifp->if_flags = IFF_POINTOPOINT; 108 ifp->if_xflags = IFXF_CLONED; 109 ifp->if_softc = sc; 110 ifp->if_mtu = MPE_MTU; 111 ifp->if_ioctl = mpe_ioctl; 112 ifp->if_output = mpe_output; 113 ifp->if_start = mpe_start; 114 ifp->if_type = IFT_MPLS; 115 ifp->if_hdrlen = MPE_HDRLEN; 116 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 117 118 sc->sc_dead = 0; 119 120 if_attach(ifp); 121 if_alloc_sadl(ifp); 122 #if NBPFILTER > 0 123 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t)); 124 #endif 125 126 sc->sc_txhprio = 0; 127 sc->sc_rxhprio = IF_HDRPRIO_PACKET; 128 sc->sc_rdomain = 0; 129 sc->sc_ifa.ifa_ifp = ifp; 130 sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl); 131 sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls); 132 sc->sc_smpls.smpls_family = AF_MPLS; 133 134 return (0); 135 } 136 137 int 138 mpe_clone_destroy(struct ifnet *ifp) 139 { 140 struct mpe_softc *sc = ifp->if_softc; 141 142 NET_LOCK(); 143 CLR(ifp->if_flags, IFF_RUNNING); 144 sc->sc_dead = 1; 145 146 if (sc->sc_smpls.smpls_label) { 147 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 148 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 149 } 150 NET_UNLOCK(); 151 152 ifq_barrier(&ifp->if_snd); 153 154 if_detach(ifp); 155 free(sc, M_DEVBUF, sizeof *sc); 156 return (0); 157 } 158 159 /* 160 * Start output on the mpe interface. 161 */ 162 void 163 mpe_start(struct ifnet *ifp) 164 { 165 struct mpe_softc *sc = ifp->if_softc; 166 struct mbuf *m; 167 struct sockaddr *sa; 168 struct sockaddr smpls = { .sa_family = AF_MPLS }; 169 struct rtentry *rt; 170 struct ifnet *ifp0; 171 172 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 173 sa = mtod(m, struct sockaddr *); 174 rt = rtalloc(sa, RT_RESOLVE, sc->sc_rdomain); 175 if (!rtisvalid(rt)) { 176 m_freem(m); 177 rtfree(rt); 178 continue; 179 } 180 181 ifp0 = if_get(rt->rt_ifidx); 182 if (ifp0 == NULL) { 183 m_freem(m); 184 rtfree(rt); 185 continue; 186 } 187 188 m_adj(m, sa->sa_len); 189 190 #if NBPFILTER > 0 191 if (ifp->if_bpf) { 192 /* remove MPLS label before passing packet to bpf */ 193 m->m_data += sizeof(struct shim_hdr); 194 m->m_len -= sizeof(struct shim_hdr); 195 m->m_pkthdr.len -= sizeof(struct shim_hdr); 196 bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, 197 m, BPF_DIRECTION_OUT); 198 m->m_data -= sizeof(struct shim_hdr); 199 m->m_len += sizeof(struct shim_hdr); 200 m->m_pkthdr.len += sizeof(struct shim_hdr); 201 } 202 #endif 203 204 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 205 CLR(m->m_flags, M_BCAST|M_MCAST); 206 207 mpls_output(ifp0, m, &smpls, rt); 208 if_put(ifp0); 209 rtfree(rt); 210 } 211 } 212 213 int 214 mpe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 215 struct rtentry *rt) 216 { 217 struct mpe_softc *sc; 218 struct rt_mpls *rtmpls; 219 struct shim_hdr shim; 220 int error; 221 int txprio; 222 uint8_t ttl = mpls_defttl; 223 uint8_t tos, prio; 224 size_t ttloff; 225 socklen_t slen; 226 227 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) { 228 m_freem(m); 229 return (ENETUNREACH); 230 } 231 232 if (dst->sa_family == AF_LINK && ISSET(rt->rt_flags, RTF_LOCAL)) { 233 mpe_input(ifp, m); 234 return (0); 235 } 236 237 #ifdef DIAGNOSTIC 238 if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) { 239 printf("%s: trying to send packet on wrong domain. " 240 "if %d vs. mbuf %d\n", ifp->if_xname, 241 ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid)); 242 } 243 #endif 244 245 rtmpls = (struct rt_mpls *)rt->rt_llinfo; 246 if (rtmpls->mpls_operation != MPLS_OP_PUSH) { 247 m_freem(m); 248 return (ENETUNREACH); 249 } 250 251 error = 0; 252 switch (dst->sa_family) { 253 case AF_INET: { 254 struct ip *ip = mtod(m, struct ip *); 255 tos = ip->ip_tos; 256 ttloff = offsetof(struct ip, ip_ttl); 257 slen = sizeof(struct sockaddr_in); 258 break; 259 } 260 #ifdef INET6 261 case AF_INET6: { 262 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 263 uint32_t flow = bemtoh32(&ip6->ip6_flow); 264 tos = flow >> 20; 265 ttloff = offsetof(struct ip6_hdr, ip6_hlim); 266 slen = sizeof(struct sockaddr_in6); 267 break; 268 } 269 #endif 270 default: 271 m_freem(m); 272 return (EPFNOSUPPORT); 273 } 274 275 if (mpls_mapttl_ip) { 276 /* assumes the ip header is already contig */ 277 ttl = *(mtod(m, uint8_t *) + ttloff); 278 } 279 280 sc = ifp->if_softc; 281 txprio = sc->sc_txhprio; 282 283 switch (txprio) { 284 case IF_HDRPRIO_PACKET: 285 prio = m->m_pkthdr.pf.prio; 286 break; 287 case IF_HDRPRIO_PAYLOAD: 288 prio = IFQ_TOS2PRIO(tos); 289 break; 290 default: 291 prio = txprio; 292 break; 293 } 294 295 shim.shim_label = rtmpls->mpls_label | htonl(prio << MPLS_EXP_OFFSET) | 296 MPLS_BOS_MASK | htonl(ttl); 297 298 m = m_prepend(m, sizeof(shim), M_NOWAIT); 299 if (m == NULL) { 300 error = ENOMEM; 301 goto out; 302 } 303 *mtod(m, struct shim_hdr *) = shim; 304 305 m = m_prepend(m, slen, M_WAITOK); 306 if (m == NULL) { 307 error = ENOMEM; 308 goto out; 309 } 310 memcpy(mtod(m, struct sockaddr *), rt->rt_gateway, slen); 311 mtod(m, struct sockaddr *)->sa_len = slen; /* to be sure */ 312 313 m->m_pkthdr.ph_family = dst->sa_family; 314 315 error = if_enqueue(ifp, m); 316 out: 317 if (error) 318 ifp->if_oerrors++; 319 return (error); 320 } 321 322 int 323 mpe_set_label(struct mpe_softc *sc, uint32_t label, unsigned int rdomain) 324 { 325 int error; 326 327 if (sc->sc_dead) 328 return (ENXIO); 329 330 if (sc->sc_smpls.smpls_label) { 331 /* remove old MPLS route */ 332 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 333 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 334 } 335 336 /* add new MPLS route */ 337 sc->sc_smpls.smpls_label = label; 338 sc->sc_rdomain = rdomain; 339 340 error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 341 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 342 if (error) 343 sc->sc_smpls.smpls_label = 0; 344 345 return (error); 346 } 347 348 int 349 mpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 350 { 351 struct mpe_softc *sc = ifp->if_softc; 352 struct ifreq *ifr; 353 struct shim_hdr shim; 354 int error = 0; 355 356 ifr = (struct ifreq *)data; 357 switch (cmd) { 358 case SIOCSIFADDR: 359 break; 360 case SIOCSIFFLAGS: 361 if (ifp->if_flags & IFF_UP) 362 ifp->if_flags |= IFF_RUNNING; 363 else 364 ifp->if_flags &= ~IFF_RUNNING; 365 break; 366 case SIOCSIFMTU: 367 if (ifr->ifr_mtu < MPE_MTU_MIN || 368 ifr->ifr_mtu > MPE_MTU_MAX) 369 error = EINVAL; 370 else 371 ifp->if_mtu = ifr->ifr_mtu; 372 break; 373 case SIOCGETLABEL: 374 shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label); 375 if (shim.shim_label == 0) { 376 error = EADDRNOTAVAIL; 377 break; 378 } 379 error = copyout(&shim, ifr->ifr_data, sizeof(shim)); 380 break; 381 case SIOCSETLABEL: 382 error = copyin(ifr->ifr_data, &shim, sizeof(shim)); 383 if (error != 0) 384 break; 385 if (shim.shim_label > MPLS_LABEL_MAX || 386 shim.shim_label <= MPLS_LABEL_RESERVED_MAX) { 387 error = EINVAL; 388 break; 389 } 390 shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label); 391 if (sc->sc_smpls.smpls_label != shim.shim_label) { 392 error = mpe_set_label(sc, shim.shim_label, 393 sc->sc_rdomain); 394 } 395 break; 396 case SIOCDELLABEL: 397 if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) { 398 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 399 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 400 401 } 402 shim.shim_label = MPLS_LABEL2SHIM(0); 403 break; 404 405 case SIOCSLIFPHYRTABLE: 406 if (ifr->ifr_rdomainid < 0 || 407 ifr->ifr_rdomainid > RT_TABLEID_MAX || 408 !rtable_exists(ifr->ifr_rdomainid) || 409 ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) { 410 error = EINVAL; 411 break; 412 } 413 if (sc->sc_rdomain != ifr->ifr_rdomainid) { 414 error = mpe_set_label(sc, sc->sc_smpls.smpls_label, 415 ifr->ifr_rdomainid); 416 } 417 break; 418 case SIOCGLIFPHYRTABLE: 419 ifr->ifr_rdomainid = sc->sc_rdomain; 420 break; 421 422 case SIOCSTXHPRIO: 423 error = if_txhprio_l3_check(ifr->ifr_hdrprio); 424 if (error != 0) 425 break; 426 427 sc->sc_txhprio = ifr->ifr_hdrprio; 428 break; 429 case SIOCGTXHPRIO: 430 ifr->ifr_hdrprio = sc->sc_txhprio; 431 break; 432 433 case SIOCSRXHPRIO: 434 error = if_rxhprio_l3_check(ifr->ifr_hdrprio); 435 if (error != 0) 436 break; 437 438 sc->sc_rxhprio = ifr->ifr_hdrprio; 439 break; 440 case SIOCGRXHPRIO: 441 ifr->ifr_hdrprio = sc->sc_rxhprio; 442 break; 443 444 default: 445 return (ENOTTY); 446 } 447 448 return (error); 449 } 450 451 void 452 mpe_input(struct ifnet *ifp, struct mbuf *m) 453 { 454 struct mpe_softc *sc = ifp->if_softc; 455 struct shim_hdr *shim; 456 struct mbuf *n; 457 uint8_t ttl, tos; 458 uint32_t exp; 459 void (*input)(struct ifnet *, struct mbuf *); 460 int rxprio = sc->sc_rxhprio; 461 462 shim = mtod(m, struct shim_hdr *); 463 exp = ntohl(shim->shim_label & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET; 464 if (!MPLS_BOS_ISSET(shim->shim_label)) 465 goto drop; 466 467 ttl = ntohl(shim->shim_label & MPLS_TTL_MASK); 468 m_adj(m, sizeof(*shim)); 469 470 n = m; 471 while (n->m_len == 0) { 472 n = n->m_next; 473 if (n == NULL) 474 goto drop; 475 } 476 477 switch (*mtod(n, uint8_t *) >> 4) { 478 case 4: { 479 struct ip *ip; 480 if (m->m_len < sizeof(*ip)) { 481 m = m_pullup(m, sizeof(*ip)); 482 if (m == NULL) 483 return; 484 } 485 ip = mtod(m, struct ip *); 486 tos = ip->ip_tos; 487 488 if (mpls_mapttl_ip) { 489 m = mpls_ip_adjttl(m, ttl); 490 if (m == NULL) 491 return; 492 } 493 input = ipv4_input; 494 m->m_pkthdr.ph_family = AF_INET; 495 break; 496 } 497 #ifdef INET6 498 case 6: { 499 struct ip6_hdr *ip6; 500 uint32_t flow; 501 if (m->m_len < sizeof(*ip6)) { 502 m = m_pullup(m, sizeof(*ip6)); 503 if (m == NULL) 504 return; 505 } 506 ip6 = mtod(m, struct ip6_hdr *); 507 flow = bemtoh32(&ip6->ip6_flow); 508 tos = flow >> 20; 509 510 if (mpls_mapttl_ip6) { 511 m = mpls_ip6_adjttl(m, ttl); 512 if (m == NULL) 513 return; 514 } 515 input = ipv6_input; 516 m->m_pkthdr.ph_family = AF_INET6; 517 break; 518 } 519 #endif /* INET6 */ 520 default: 521 goto drop; 522 } 523 524 switch (rxprio) { 525 case IF_HDRPRIO_PACKET: 526 /* nop */ 527 break; 528 case IF_HDRPRIO_OUTER: 529 m->m_pkthdr.pf.prio = exp; 530 break; 531 case IF_HDRPRIO_PAYLOAD: 532 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos); 533 break; 534 default: 535 m->m_pkthdr.pf.prio = rxprio; 536 break; 537 } 538 539 /* new receive if and move into correct rtable */ 540 m->m_pkthdr.ph_ifidx = ifp->if_index; 541 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 542 543 /* packet has not been processed by PF yet. */ 544 KASSERT(m->m_pkthdr.pf.statekey == NULL); 545 546 #if NBPFILTER > 0 547 if (ifp->if_bpf) { 548 bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, 549 m, BPF_DIRECTION_IN); 550 } 551 #endif 552 553 (*input)(ifp, m); 554 return; 555 drop: 556 m_freem(m); 557 } 558