1 /* $OpenBSD: if_mpe.c,v 1.96 2020/07/10 13:26:41 patrick Exp $ */ 2 3 /* 4 * Copyright (c) 2008 Pierre-Yves Ritschard <pyr@spootnik.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/mbuf.h> 22 #include <sys/socket.h> 23 #include <sys/sockio.h> 24 #include <sys/ioctl.h> 25 26 #include <net/if.h> 27 #include <net/if_dl.h> 28 #include <net/if_var.h> 29 #include <net/if_types.h> 30 #include <net/netisr.h> 31 #include <net/route.h> 32 33 #include <netinet/in.h> 34 #include <netinet/ip.h> 35 36 #ifdef INET6 37 #include <netinet/ip6.h> 38 #endif /* INET6 */ 39 40 #include "bpfilter.h" 41 #if NBPFILTER > 0 42 #include <net/bpf.h> 43 #endif 44 45 #include <netmpls/mpls.h> 46 47 48 49 #ifdef MPLS_DEBUG 50 #define DPRINTF(x) do { if (mpedebug) printf x ; } while (0) 51 #else 52 #define DPRINTF(x) 53 #endif 54 55 struct mpe_softc { 56 struct ifnet sc_if; /* the interface */ 57 int sc_txhprio; 58 int sc_rxhprio; 59 unsigned int sc_rdomain; 60 struct ifaddr sc_ifa; 61 struct sockaddr_mpls sc_smpls; 62 63 int sc_dead; 64 }; 65 66 #define MPE_HDRLEN sizeof(struct shim_hdr) 67 #define MPE_MTU 1500 68 #define MPE_MTU_MIN 256 69 #define MPE_MTU_MAX 8192 70 71 void mpeattach(int); 72 int mpe_output(struct ifnet *, struct mbuf *, struct sockaddr *, 73 struct rtentry *); 74 int mpe_ioctl(struct ifnet *, u_long, caddr_t); 75 void mpe_start(struct ifnet *); 76 int mpe_clone_create(struct if_clone *, int); 77 int mpe_clone_destroy(struct ifnet *); 78 void mpe_input(struct ifnet *, struct mbuf *); 79 80 struct if_clone mpe_cloner = 81 IF_CLONE_INITIALIZER("mpe", mpe_clone_create, mpe_clone_destroy); 82 83 extern int mpls_mapttl_ip; 84 #ifdef INET6 85 extern int mpls_mapttl_ip6; 86 #endif 87 88 void 89 mpeattach(int nmpe) 90 { 91 if_clone_attach(&mpe_cloner); 92 } 93 94 int 95 mpe_clone_create(struct if_clone *ifc, int unit) 96 { 97 struct mpe_softc *sc; 98 struct ifnet *ifp; 99 100 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); 101 if (sc == NULL) 102 return (ENOMEM); 103 104 ifp = &sc->sc_if; 105 snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit); 106 ifp->if_flags = IFF_POINTOPOINT; 107 ifp->if_xflags = IFXF_CLONED; 108 ifp->if_softc = sc; 109 ifp->if_mtu = MPE_MTU; 110 ifp->if_ioctl = mpe_ioctl; 111 ifp->if_output = mpe_output; 112 ifp->if_start = mpe_start; 113 ifp->if_type = IFT_MPLS; 114 ifp->if_hdrlen = MPE_HDRLEN; 115 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN); 116 117 sc->sc_dead = 0; 118 119 if_attach(ifp); 120 if_alloc_sadl(ifp); 121 #if NBPFILTER > 0 122 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t)); 123 #endif 124 125 sc->sc_txhprio = 0; 126 sc->sc_rxhprio = IF_HDRPRIO_PACKET; 127 sc->sc_rdomain = 0; 128 sc->sc_ifa.ifa_ifp = ifp; 129 sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl); 130 sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls); 131 sc->sc_smpls.smpls_family = AF_MPLS; 132 133 return (0); 134 } 135 136 int 137 mpe_clone_destroy(struct ifnet *ifp) 138 { 139 struct mpe_softc *sc = ifp->if_softc; 140 141 NET_LOCK(); 142 CLR(ifp->if_flags, IFF_RUNNING); 143 sc->sc_dead = 1; 144 145 if (sc->sc_smpls.smpls_label) { 146 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 147 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 148 } 149 NET_UNLOCK(); 150 151 ifq_barrier(&ifp->if_snd); 152 153 if_detach(ifp); 154 free(sc, M_DEVBUF, sizeof *sc); 155 return (0); 156 } 157 158 /* 159 * Start output on the mpe interface. 160 */ 161 void 162 mpe_start(struct ifnet *ifp) 163 { 164 struct mpe_softc *sc = ifp->if_softc; 165 struct mbuf *m; 166 struct sockaddr *sa; 167 struct sockaddr smpls = { .sa_family = AF_MPLS }; 168 struct rtentry *rt; 169 struct ifnet *ifp0; 170 171 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 172 sa = mtod(m, struct sockaddr *); 173 rt = rtalloc(sa, RT_RESOLVE, sc->sc_rdomain); 174 if (!rtisvalid(rt)) { 175 m_freem(m); 176 rtfree(rt); 177 continue; 178 } 179 180 ifp0 = if_get(rt->rt_ifidx); 181 if (ifp0 == NULL) { 182 m_freem(m); 183 rtfree(rt); 184 continue; 185 } 186 187 m_adj(m, sa->sa_len); 188 189 #if NBPFILTER > 0 190 if (ifp->if_bpf) { 191 /* remove MPLS label before passing packet to bpf */ 192 m->m_data += sizeof(struct shim_hdr); 193 m->m_len -= sizeof(struct shim_hdr); 194 m->m_pkthdr.len -= sizeof(struct shim_hdr); 195 bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, 196 m, BPF_DIRECTION_OUT); 197 m->m_data -= sizeof(struct shim_hdr); 198 m->m_len += sizeof(struct shim_hdr); 199 m->m_pkthdr.len += sizeof(struct shim_hdr); 200 } 201 #endif 202 203 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 204 CLR(m->m_flags, M_BCAST|M_MCAST); 205 206 mpls_output(ifp0, m, &smpls, rt); 207 if_put(ifp0); 208 rtfree(rt); 209 } 210 } 211 212 int 213 mpe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 214 struct rtentry *rt) 215 { 216 struct mpe_softc *sc; 217 struct rt_mpls *rtmpls; 218 struct shim_hdr shim; 219 int error; 220 int txprio; 221 uint8_t ttl = mpls_defttl; 222 uint8_t tos, prio; 223 size_t ttloff; 224 socklen_t slen; 225 226 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) { 227 m_freem(m); 228 return (ENETUNREACH); 229 } 230 231 if (dst->sa_family == AF_LINK && ISSET(rt->rt_flags, RTF_LOCAL)) { 232 mpe_input(ifp, m); 233 return (0); 234 } 235 236 #ifdef DIAGNOSTIC 237 if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) { 238 printf("%s: trying to send packet on wrong domain. " 239 "if %d vs. mbuf %d\n", ifp->if_xname, 240 ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid)); 241 } 242 #endif 243 244 rtmpls = (struct rt_mpls *)rt->rt_llinfo; 245 if (rtmpls->mpls_operation != MPLS_OP_PUSH) { 246 m_freem(m); 247 return (ENETUNREACH); 248 } 249 250 error = 0; 251 switch (dst->sa_family) { 252 case AF_INET: { 253 struct ip *ip = mtod(m, struct ip *); 254 tos = ip->ip_tos; 255 ttloff = offsetof(struct ip, ip_ttl); 256 slen = sizeof(struct sockaddr_in); 257 break; 258 } 259 #ifdef INET6 260 case AF_INET6: { 261 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 262 uint32_t flow = bemtoh32(&ip6->ip6_flow); 263 tos = flow >> 20; 264 ttloff = offsetof(struct ip6_hdr, ip6_hlim); 265 slen = sizeof(struct sockaddr_in6); 266 break; 267 } 268 #endif 269 default: 270 m_freem(m); 271 return (EPFNOSUPPORT); 272 } 273 274 if (mpls_mapttl_ip) { 275 /* assumes the ip header is already contig */ 276 ttl = *(mtod(m, uint8_t *) + ttloff); 277 } 278 279 sc = ifp->if_softc; 280 txprio = sc->sc_txhprio; 281 282 switch (txprio) { 283 case IF_HDRPRIO_PACKET: 284 prio = m->m_pkthdr.pf.prio; 285 break; 286 case IF_HDRPRIO_PAYLOAD: 287 prio = IFQ_TOS2PRIO(tos); 288 break; 289 default: 290 prio = txprio; 291 break; 292 } 293 294 shim.shim_label = rtmpls->mpls_label | htonl(prio << MPLS_EXP_OFFSET) | 295 MPLS_BOS_MASK | htonl(ttl); 296 297 m = m_prepend(m, sizeof(shim), M_NOWAIT); 298 if (m == NULL) { 299 error = ENOMEM; 300 goto out; 301 } 302 *mtod(m, struct shim_hdr *) = shim; 303 304 m = m_prepend(m, slen, M_WAITOK); 305 if (m == NULL) { 306 error = ENOMEM; 307 goto out; 308 } 309 memcpy(mtod(m, struct sockaddr *), rt->rt_gateway, slen); 310 mtod(m, struct sockaddr *)->sa_len = slen; /* to be sure */ 311 312 m->m_pkthdr.ph_family = dst->sa_family; 313 314 error = if_enqueue(ifp, m); 315 out: 316 if (error) 317 ifp->if_oerrors++; 318 return (error); 319 } 320 321 int 322 mpe_set_label(struct mpe_softc *sc, uint32_t label, unsigned int rdomain) 323 { 324 int error; 325 326 if (sc->sc_dead) 327 return (ENXIO); 328 329 if (sc->sc_smpls.smpls_label) { 330 /* remove old MPLS route */ 331 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 332 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 333 } 334 335 /* add new MPLS route */ 336 sc->sc_smpls.smpls_label = label; 337 sc->sc_rdomain = rdomain; 338 339 error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 340 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 341 if (error) 342 sc->sc_smpls.smpls_label = 0; 343 344 return (error); 345 } 346 347 int 348 mpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 349 { 350 struct mpe_softc *sc = ifp->if_softc; 351 struct ifreq *ifr; 352 struct shim_hdr shim; 353 int error = 0; 354 355 ifr = (struct ifreq *)data; 356 switch (cmd) { 357 case SIOCSIFADDR: 358 break; 359 case SIOCSIFFLAGS: 360 if (ifp->if_flags & IFF_UP) 361 ifp->if_flags |= IFF_RUNNING; 362 else 363 ifp->if_flags &= ~IFF_RUNNING; 364 break; 365 case SIOCSIFMTU: 366 if (ifr->ifr_mtu < MPE_MTU_MIN || 367 ifr->ifr_mtu > MPE_MTU_MAX) 368 error = EINVAL; 369 else 370 ifp->if_mtu = ifr->ifr_mtu; 371 break; 372 case SIOCGETLABEL: 373 shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label); 374 if (shim.shim_label == 0) { 375 error = EADDRNOTAVAIL; 376 break; 377 } 378 error = copyout(&shim, ifr->ifr_data, sizeof(shim)); 379 break; 380 case SIOCSETLABEL: 381 error = copyin(ifr->ifr_data, &shim, sizeof(shim)); 382 if (error != 0) 383 break; 384 if (shim.shim_label > MPLS_LABEL_MAX || 385 shim.shim_label <= MPLS_LABEL_RESERVED_MAX) { 386 error = EINVAL; 387 break; 388 } 389 shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label); 390 if (sc->sc_smpls.smpls_label != shim.shim_label) { 391 error = mpe_set_label(sc, shim.shim_label, 392 sc->sc_rdomain); 393 } 394 break; 395 case SIOCDELLABEL: 396 if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) { 397 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL, 398 smplstosa(&sc->sc_smpls), sc->sc_rdomain); 399 400 } 401 shim.shim_label = MPLS_LABEL2SHIM(0); 402 break; 403 404 case SIOCSLIFPHYRTABLE: 405 if (ifr->ifr_rdomainid < 0 || 406 ifr->ifr_rdomainid > RT_TABLEID_MAX || 407 !rtable_exists(ifr->ifr_rdomainid) || 408 ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) { 409 error = EINVAL; 410 break; 411 } 412 if (sc->sc_rdomain != ifr->ifr_rdomainid) { 413 error = mpe_set_label(sc, sc->sc_smpls.smpls_label, 414 ifr->ifr_rdomainid); 415 } 416 break; 417 case SIOCGLIFPHYRTABLE: 418 ifr->ifr_rdomainid = sc->sc_rdomain; 419 break; 420 421 case SIOCSTXHPRIO: 422 error = if_txhprio_l3_check(ifr->ifr_hdrprio); 423 if (error != 0) 424 break; 425 426 sc->sc_txhprio = ifr->ifr_hdrprio; 427 break; 428 case SIOCGTXHPRIO: 429 ifr->ifr_hdrprio = sc->sc_txhprio; 430 break; 431 432 case SIOCSRXHPRIO: 433 error = if_rxhprio_l3_check(ifr->ifr_hdrprio); 434 if (error != 0) 435 break; 436 437 sc->sc_rxhprio = ifr->ifr_hdrprio; 438 break; 439 case SIOCGRXHPRIO: 440 ifr->ifr_hdrprio = sc->sc_rxhprio; 441 break; 442 443 default: 444 return (ENOTTY); 445 } 446 447 return (error); 448 } 449 450 void 451 mpe_input(struct ifnet *ifp, struct mbuf *m) 452 { 453 struct mpe_softc *sc = ifp->if_softc; 454 struct shim_hdr *shim; 455 struct mbuf *n; 456 uint8_t ttl, tos; 457 uint32_t exp; 458 void (*input)(struct ifnet *, struct mbuf *); 459 int rxprio = sc->sc_rxhprio; 460 461 shim = mtod(m, struct shim_hdr *); 462 exp = ntohl(shim->shim_label & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET; 463 if (!MPLS_BOS_ISSET(shim->shim_label)) 464 goto drop; 465 466 ttl = ntohl(shim->shim_label & MPLS_TTL_MASK); 467 m_adj(m, sizeof(*shim)); 468 469 n = m; 470 while (n->m_len == 0) { 471 n = n->m_next; 472 if (n == NULL) 473 goto drop; 474 } 475 476 switch (*mtod(n, uint8_t *) >> 4) { 477 case 4: { 478 struct ip *ip; 479 if (m->m_len < sizeof(*ip)) { 480 m = m_pullup(m, sizeof(*ip)); 481 if (m == NULL) 482 return; 483 } 484 ip = mtod(m, struct ip *); 485 tos = ip->ip_tos; 486 487 if (mpls_mapttl_ip) { 488 m = mpls_ip_adjttl(m, ttl); 489 if (m == NULL) 490 return; 491 } 492 input = ipv4_input; 493 m->m_pkthdr.ph_family = AF_INET; 494 break; 495 } 496 #ifdef INET6 497 case 6: { 498 struct ip6_hdr *ip6; 499 uint32_t flow; 500 if (m->m_len < sizeof(*ip6)) { 501 m = m_pullup(m, sizeof(*ip6)); 502 if (m == NULL) 503 return; 504 } 505 ip6 = mtod(m, struct ip6_hdr *); 506 flow = bemtoh32(&ip6->ip6_flow); 507 tos = flow >> 20; 508 509 if (mpls_mapttl_ip6) { 510 m = mpls_ip6_adjttl(m, ttl); 511 if (m == NULL) 512 return; 513 } 514 input = ipv6_input; 515 m->m_pkthdr.ph_family = AF_INET6; 516 break; 517 } 518 #endif /* INET6 */ 519 default: 520 goto drop; 521 } 522 523 switch (rxprio) { 524 case IF_HDRPRIO_PACKET: 525 /* nop */ 526 break; 527 case IF_HDRPRIO_OUTER: 528 m->m_pkthdr.pf.prio = exp; 529 break; 530 case IF_HDRPRIO_PAYLOAD: 531 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos); 532 break; 533 default: 534 m->m_pkthdr.pf.prio = rxprio; 535 break; 536 } 537 538 /* new receive if and move into correct rtable */ 539 m->m_pkthdr.ph_ifidx = ifp->if_index; 540 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 541 542 /* packet has not been processed by PF yet. */ 543 KASSERT(m->m_pkthdr.pf.statekey == NULL); 544 545 #if NBPFILTER > 0 546 if (ifp->if_bpf) { 547 bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, 548 m, BPF_DIRECTION_IN); 549 } 550 #endif 551 552 (*input)(ifp, m); 553 return; 554 drop: 555 m_freem(m); 556 } 557