1 /* $OpenBSD: if_mpe.c,v 1.105 2024/01/01 18:47:02 mvs Exp $ */
2
3 /*
4 * Copyright (c) 2008 Pierre-Yves Ritschard <pyr@spootnik.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/socket.h>
23 #include <sys/sockio.h>
24 #include <sys/ioctl.h>
25
26 #include <net/if.h>
27 #include <net/if_dl.h>
28 #include <net/if_var.h>
29 #include <net/if_types.h>
30 #include <net/netisr.h>
31 #include <net/route.h>
32
33 #include <netinet/in.h>
34 #include <netinet/ip.h>
35
36 #ifdef INET6
37 #include <netinet/ip6.h>
38 #endif /* INET6 */
39
40 #include "bpfilter.h"
41 #if NBPFILTER > 0
42 #include <net/bpf.h>
43 #endif
44
45 #include <netmpls/mpls.h>
46
47
48
49 #ifdef MPLS_DEBUG
50 #define DPRINTF(x) do { if (mpedebug) printf x ; } while (0)
51 #else
52 #define DPRINTF(x)
53 #endif
54
55 struct mpe_softc {
56 struct ifnet sc_if; /* the interface */
57 int sc_txhprio;
58 int sc_rxhprio;
59 unsigned int sc_rdomain;
60 struct ifaddr sc_ifa;
61 struct sockaddr_mpls sc_smpls;
62
63 int sc_dead;
64 };
65
66 #define MPE_HDRLEN sizeof(struct shim_hdr)
67 #define MPE_MTU 1500
68 #define MPE_MTU_MIN 256
69 #define MPE_MTU_MAX 8192
70
71 void mpeattach(int);
72 int mpe_output(struct ifnet *, struct mbuf *, struct sockaddr *,
73 struct rtentry *);
74 int mpe_ioctl(struct ifnet *, u_long, caddr_t);
75 void mpe_start(struct ifnet *);
76 int mpe_clone_create(struct if_clone *, int);
77 int mpe_clone_destroy(struct ifnet *);
78 void mpe_input(struct ifnet *, struct mbuf *);
79
80 struct if_clone mpe_cloner =
81 IF_CLONE_INITIALIZER("mpe", mpe_clone_create, mpe_clone_destroy);
82
83 extern int mpls_mapttl_ip;
84 #ifdef INET6
85 extern int mpls_mapttl_ip6;
86 #endif
87
88 void
mpeattach(int nmpe)89 mpeattach(int nmpe)
90 {
91 if_clone_attach(&mpe_cloner);
92 }
93
94 int
mpe_clone_create(struct if_clone * ifc,int unit)95 mpe_clone_create(struct if_clone *ifc, int unit)
96 {
97 struct mpe_softc *sc;
98 struct ifnet *ifp;
99
100 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
101 if (sc == NULL)
102 return (ENOMEM);
103
104 ifp = &sc->sc_if;
105 snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit);
106 ifp->if_flags = IFF_POINTOPOINT;
107 ifp->if_xflags = IFXF_CLONED;
108 ifp->if_softc = sc;
109 ifp->if_mtu = MPE_MTU;
110 ifp->if_ioctl = mpe_ioctl;
111 ifp->if_bpf_mtap = p2p_bpf_mtap;
112 ifp->if_input = p2p_input;
113 ifp->if_output = mpe_output;
114 ifp->if_start = mpe_start;
115 ifp->if_type = IFT_MPLS;
116 ifp->if_hdrlen = MPE_HDRLEN;
117
118 sc->sc_dead = 0;
119
120 if_counters_alloc(ifp);
121 if_attach(ifp);
122 if_alloc_sadl(ifp);
123
124 #if NBPFILTER > 0
125 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
126 #endif
127
128 sc->sc_txhprio = 0;
129 sc->sc_rxhprio = IF_HDRPRIO_PACKET;
130 sc->sc_rdomain = 0;
131 refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR);
132 sc->sc_ifa.ifa_ifp = ifp;
133 sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl);
134 sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls);
135 sc->sc_smpls.smpls_family = AF_MPLS;
136
137 return (0);
138 }
139
140 int
mpe_clone_destroy(struct ifnet * ifp)141 mpe_clone_destroy(struct ifnet *ifp)
142 {
143 struct mpe_softc *sc = ifp->if_softc;
144
145 NET_LOCK();
146 CLR(ifp->if_flags, IFF_RUNNING);
147 sc->sc_dead = 1;
148
149 if (sc->sc_smpls.smpls_label) {
150 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
151 smplstosa(&sc->sc_smpls), sc->sc_rdomain);
152 }
153 NET_UNLOCK();
154
155 ifq_barrier(&ifp->if_snd);
156
157 if_detach(ifp);
158 if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) {
159 panic("%s: ifa refcnt has %u refs", __func__,
160 sc->sc_ifa.ifa_refcnt.r_refs);
161 }
162 free(sc, M_DEVBUF, sizeof *sc);
163 return (0);
164 }
165
166 /*
167 * Start output on the mpe interface.
168 */
169 void
mpe_start(struct ifnet * ifp)170 mpe_start(struct ifnet *ifp)
171 {
172 struct mpe_softc *sc = ifp->if_softc;
173 struct mbuf *m;
174 struct sockaddr *sa;
175 struct sockaddr smpls = { .sa_family = AF_MPLS };
176 struct rtentry *rt;
177 struct ifnet *ifp0;
178
179 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
180 sa = mtod(m, struct sockaddr *);
181 rt = rtalloc(sa, RT_RESOLVE, sc->sc_rdomain);
182 if (!rtisvalid(rt)) {
183 m_freem(m);
184 rtfree(rt);
185 continue;
186 }
187
188 ifp0 = if_get(rt->rt_ifidx);
189 if (ifp0 == NULL) {
190 m_freem(m);
191 rtfree(rt);
192 continue;
193 }
194
195 m_adj(m, sa->sa_len);
196
197 #if NBPFILTER > 0
198 if (ifp->if_bpf) {
199 /* remove MPLS label before passing packet to bpf */
200 m->m_data += sizeof(struct shim_hdr);
201 m->m_len -= sizeof(struct shim_hdr);
202 m->m_pkthdr.len -= sizeof(struct shim_hdr);
203 bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
204 m, BPF_DIRECTION_OUT);
205 m->m_data -= sizeof(struct shim_hdr);
206 m->m_len += sizeof(struct shim_hdr);
207 m->m_pkthdr.len += sizeof(struct shim_hdr);
208 }
209 #endif
210
211 m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
212 CLR(m->m_flags, M_BCAST|M_MCAST);
213
214 mpls_output(ifp0, m, &smpls, rt);
215 if_put(ifp0);
216 rtfree(rt);
217 }
218 }
219
220 int
mpe_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * dst,struct rtentry * rt)221 mpe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
222 struct rtentry *rt)
223 {
224 struct mpe_softc *sc;
225 struct rt_mpls *rtmpls;
226 struct shim_hdr shim;
227 int error;
228 int txprio;
229 uint8_t ttl = mpls_defttl;
230 uint8_t tos, prio;
231 size_t ttloff;
232 socklen_t slen;
233
234 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) {
235 m_freem(m);
236 return (ENETUNREACH);
237 }
238
239 if (dst->sa_family == AF_LINK && ISSET(rt->rt_flags, RTF_LOCAL)) {
240 mpe_input(ifp, m);
241 return (0);
242 }
243
244 #ifdef DIAGNOSTIC
245 if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) {
246 printf("%s: trying to send packet on wrong domain. "
247 "if %d vs. mbuf %d\n", ifp->if_xname,
248 ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid));
249 }
250 #endif
251
252 rtmpls = (struct rt_mpls *)rt->rt_llinfo;
253 if (rtmpls->mpls_operation != MPLS_OP_PUSH) {
254 m_freem(m);
255 return (ENETUNREACH);
256 }
257
258 error = 0;
259 switch (dst->sa_family) {
260 case AF_INET: {
261 struct ip *ip = mtod(m, struct ip *);
262 tos = ip->ip_tos;
263 ttloff = offsetof(struct ip, ip_ttl);
264 slen = sizeof(struct sockaddr_in);
265 break;
266 }
267 #ifdef INET6
268 case AF_INET6: {
269 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
270 uint32_t flow = bemtoh32(&ip6->ip6_flow);
271 tos = flow >> 20;
272 ttloff = offsetof(struct ip6_hdr, ip6_hlim);
273 slen = sizeof(struct sockaddr_in6);
274 break;
275 }
276 #endif
277 default:
278 m_freem(m);
279 return (EPFNOSUPPORT);
280 }
281
282 if (mpls_mapttl_ip) {
283 /* assumes the ip header is already contig */
284 ttl = *(mtod(m, uint8_t *) + ttloff);
285 }
286
287 sc = ifp->if_softc;
288 txprio = sc->sc_txhprio;
289
290 switch (txprio) {
291 case IF_HDRPRIO_PACKET:
292 prio = m->m_pkthdr.pf.prio;
293 break;
294 case IF_HDRPRIO_PAYLOAD:
295 prio = IFQ_TOS2PRIO(tos);
296 break;
297 default:
298 prio = txprio;
299 break;
300 }
301
302 shim.shim_label = rtmpls->mpls_label | htonl(prio << MPLS_EXP_OFFSET) |
303 MPLS_BOS_MASK | htonl(ttl);
304
305 m = m_prepend(m, sizeof(shim), M_NOWAIT);
306 if (m == NULL) {
307 error = ENOMEM;
308 goto out;
309 }
310 *mtod(m, struct shim_hdr *) = shim;
311
312 m = m_prepend(m, slen, M_WAITOK);
313 if (m == NULL) {
314 error = ENOMEM;
315 goto out;
316 }
317 memcpy(mtod(m, struct sockaddr *), rt->rt_gateway, slen);
318 mtod(m, struct sockaddr *)->sa_len = slen; /* to be sure */
319
320 m->m_pkthdr.ph_family = dst->sa_family;
321
322 error = if_enqueue(ifp, m);
323 out:
324 if (error)
325 ifp->if_oerrors++;
326 return (error);
327 }
328
329 int
mpe_set_label(struct mpe_softc * sc,uint32_t label,unsigned int rdomain)330 mpe_set_label(struct mpe_softc *sc, uint32_t label, unsigned int rdomain)
331 {
332 int error;
333
334 if (sc->sc_dead)
335 return (ENXIO);
336
337 if (sc->sc_smpls.smpls_label) {
338 /* remove old MPLS route */
339 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
340 smplstosa(&sc->sc_smpls), sc->sc_rdomain);
341 }
342
343 /* add new MPLS route */
344 sc->sc_smpls.smpls_label = label;
345 sc->sc_rdomain = rdomain;
346
347 /* only install with a label or mpe_clone_destroy() will ignore it */
348 if (sc->sc_smpls.smpls_label == MPLS_LABEL2SHIM(0))
349 return 0;
350
351 error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
352 smplstosa(&sc->sc_smpls), sc->sc_rdomain);
353 if (error)
354 sc->sc_smpls.smpls_label = 0;
355
356 return (error);
357 }
358
359 int
mpe_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)360 mpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
361 {
362 struct mpe_softc *sc = ifp->if_softc;
363 struct ifreq *ifr;
364 struct shim_hdr shim;
365 int error = 0;
366
367 ifr = (struct ifreq *)data;
368 switch (cmd) {
369 case SIOCSIFADDR:
370 break;
371 case SIOCSIFFLAGS:
372 if (ifp->if_flags & IFF_UP)
373 ifp->if_flags |= IFF_RUNNING;
374 else
375 ifp->if_flags &= ~IFF_RUNNING;
376 break;
377 case SIOCSIFMTU:
378 if (ifr->ifr_mtu < MPE_MTU_MIN ||
379 ifr->ifr_mtu > MPE_MTU_MAX)
380 error = EINVAL;
381 else
382 ifp->if_mtu = ifr->ifr_mtu;
383 break;
384 case SIOCGETLABEL:
385 shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
386 if (shim.shim_label == 0) {
387 error = EADDRNOTAVAIL;
388 break;
389 }
390 error = copyout(&shim, ifr->ifr_data, sizeof(shim));
391 break;
392 case SIOCSETLABEL:
393 error = copyin(ifr->ifr_data, &shim, sizeof(shim));
394 if (error != 0)
395 break;
396 if (shim.shim_label > MPLS_LABEL_MAX ||
397 shim.shim_label <= MPLS_LABEL_RESERVED_MAX) {
398 error = EINVAL;
399 break;
400 }
401 shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label);
402 if (sc->sc_smpls.smpls_label != shim.shim_label) {
403 error = mpe_set_label(sc, shim.shim_label,
404 sc->sc_rdomain);
405 }
406 break;
407 case SIOCDELLABEL:
408 if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) {
409 rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
410 smplstosa(&sc->sc_smpls), sc->sc_rdomain);
411 }
412 sc->sc_smpls.smpls_label = MPLS_LABEL2SHIM(0);
413 break;
414
415 case SIOCSLIFPHYRTABLE:
416 if (ifr->ifr_rdomainid < 0 ||
417 ifr->ifr_rdomainid > RT_TABLEID_MAX ||
418 !rtable_exists(ifr->ifr_rdomainid) ||
419 ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) {
420 error = EINVAL;
421 break;
422 }
423 if (sc->sc_rdomain != ifr->ifr_rdomainid) {
424 error = mpe_set_label(sc, sc->sc_smpls.smpls_label,
425 ifr->ifr_rdomainid);
426 }
427 break;
428 case SIOCGLIFPHYRTABLE:
429 ifr->ifr_rdomainid = sc->sc_rdomain;
430 break;
431
432 case SIOCSTXHPRIO:
433 error = if_txhprio_l3_check(ifr->ifr_hdrprio);
434 if (error != 0)
435 break;
436
437 sc->sc_txhprio = ifr->ifr_hdrprio;
438 break;
439 case SIOCGTXHPRIO:
440 ifr->ifr_hdrprio = sc->sc_txhprio;
441 break;
442
443 case SIOCSRXHPRIO:
444 error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
445 if (error != 0)
446 break;
447
448 sc->sc_rxhprio = ifr->ifr_hdrprio;
449 break;
450 case SIOCGRXHPRIO:
451 ifr->ifr_hdrprio = sc->sc_rxhprio;
452 break;
453
454 default:
455 return (ENOTTY);
456 }
457
458 return (error);
459 }
460
461 void
mpe_input(struct ifnet * ifp,struct mbuf * m)462 mpe_input(struct ifnet *ifp, struct mbuf *m)
463 {
464 struct mpe_softc *sc = ifp->if_softc;
465 struct shim_hdr *shim;
466 struct mbuf *n;
467 uint8_t ttl, tos;
468 uint32_t exp;
469 int rxprio = sc->sc_rxhprio;
470
471 shim = mtod(m, struct shim_hdr *);
472 exp = ntohl(shim->shim_label & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET;
473 if (!MPLS_BOS_ISSET(shim->shim_label))
474 goto drop;
475
476 ttl = ntohl(shim->shim_label & MPLS_TTL_MASK);
477 m_adj(m, sizeof(*shim));
478
479 n = m;
480 while (n->m_len == 0) {
481 n = n->m_next;
482 if (n == NULL)
483 goto drop;
484 }
485
486 switch (*mtod(n, uint8_t *) >> 4) {
487 case 4: {
488 struct ip *ip;
489 if (m->m_len < sizeof(*ip)) {
490 m = m_pullup(m, sizeof(*ip));
491 if (m == NULL)
492 return;
493 }
494 ip = mtod(m, struct ip *);
495 tos = ip->ip_tos;
496
497 if (mpls_mapttl_ip) {
498 m = mpls_ip_adjttl(m, ttl);
499 if (m == NULL)
500 return;
501 }
502
503 m->m_pkthdr.ph_family = AF_INET;
504 break;
505 }
506 #ifdef INET6
507 case 6: {
508 struct ip6_hdr *ip6;
509 uint32_t flow;
510 if (m->m_len < sizeof(*ip6)) {
511 m = m_pullup(m, sizeof(*ip6));
512 if (m == NULL)
513 return;
514 }
515 ip6 = mtod(m, struct ip6_hdr *);
516 flow = bemtoh32(&ip6->ip6_flow);
517 tos = flow >> 20;
518
519 if (mpls_mapttl_ip6) {
520 m = mpls_ip6_adjttl(m, ttl);
521 if (m == NULL)
522 return;
523 }
524
525 m->m_pkthdr.ph_family = AF_INET6;
526 break;
527 }
528 #endif /* INET6 */
529 default:
530 goto drop;
531 }
532
533 switch (rxprio) {
534 case IF_HDRPRIO_PACKET:
535 /* nop */
536 break;
537 case IF_HDRPRIO_OUTER:
538 m->m_pkthdr.pf.prio = exp;
539 break;
540 case IF_HDRPRIO_PAYLOAD:
541 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos);
542 break;
543 default:
544 m->m_pkthdr.pf.prio = rxprio;
545 break;
546 }
547
548 if_vinput(ifp, m);
549 return;
550 drop:
551 m_freem(m);
552 }
553