xref: /openbsd/sys/net/if_mpe.c (revision b5b8bc53)
1 /* $OpenBSD: if_mpe.c,v 1.105 2024/01/01 18:47:02 mvs Exp $ */
2 
3 /*
4  * Copyright (c) 2008 Pierre-Yves Ritschard <pyr@spootnik.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/socket.h>
23 #include <sys/sockio.h>
24 #include <sys/ioctl.h>
25 
26 #include <net/if.h>
27 #include <net/if_dl.h>
28 #include <net/if_var.h>
29 #include <net/if_types.h>
30 #include <net/netisr.h>
31 #include <net/route.h>
32 
33 #include <netinet/in.h>
34 #include <netinet/ip.h>
35 
36 #ifdef INET6
37 #include <netinet/ip6.h>
38 #endif /* INET6 */
39 
40 #include "bpfilter.h"
41 #if NBPFILTER > 0
42 #include <net/bpf.h>
43 #endif
44 
45 #include <netmpls/mpls.h>
46 
47 
48 
49 #ifdef MPLS_DEBUG
50 #define DPRINTF(x)    do { if (mpedebug) printf x ; } while (0)
51 #else
52 #define DPRINTF(x)
53 #endif
54 
55 struct mpe_softc {
56 	struct ifnet		sc_if;		/* the interface */
57 	int			sc_txhprio;
58 	int			sc_rxhprio;
59 	unsigned int		sc_rdomain;
60 	struct ifaddr		sc_ifa;
61 	struct sockaddr_mpls	sc_smpls;
62 
63 	int			sc_dead;
64 };
65 
66 #define MPE_HDRLEN	sizeof(struct shim_hdr)
67 #define MPE_MTU		1500
68 #define MPE_MTU_MIN	256
69 #define MPE_MTU_MAX	8192
70 
71 void	mpeattach(int);
72 int	mpe_output(struct ifnet *, struct mbuf *, struct sockaddr *,
73 	    struct rtentry *);
74 int	mpe_ioctl(struct ifnet *, u_long, caddr_t);
75 void	mpe_start(struct ifnet *);
76 int	mpe_clone_create(struct if_clone *, int);
77 int	mpe_clone_destroy(struct ifnet *);
78 void	mpe_input(struct ifnet *, struct mbuf *);
79 
80 struct if_clone	mpe_cloner =
81     IF_CLONE_INITIALIZER("mpe", mpe_clone_create, mpe_clone_destroy);
82 
83 extern int	mpls_mapttl_ip;
84 #ifdef INET6
85 extern int	mpls_mapttl_ip6;
86 #endif
87 
88 void
mpeattach(int nmpe)89 mpeattach(int nmpe)
90 {
91 	if_clone_attach(&mpe_cloner);
92 }
93 
94 int
mpe_clone_create(struct if_clone * ifc,int unit)95 mpe_clone_create(struct if_clone *ifc, int unit)
96 {
97 	struct mpe_softc	*sc;
98 	struct ifnet		*ifp;
99 
100 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
101 	if (sc == NULL)
102 		return (ENOMEM);
103 
104 	ifp = &sc->sc_if;
105 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit);
106 	ifp->if_flags = IFF_POINTOPOINT;
107 	ifp->if_xflags = IFXF_CLONED;
108 	ifp->if_softc = sc;
109 	ifp->if_mtu = MPE_MTU;
110 	ifp->if_ioctl = mpe_ioctl;
111 	ifp->if_bpf_mtap = p2p_bpf_mtap;
112 	ifp->if_input = p2p_input;
113 	ifp->if_output = mpe_output;
114 	ifp->if_start = mpe_start;
115 	ifp->if_type = IFT_MPLS;
116 	ifp->if_hdrlen = MPE_HDRLEN;
117 
118 	sc->sc_dead = 0;
119 
120 	if_counters_alloc(ifp);
121 	if_attach(ifp);
122 	if_alloc_sadl(ifp);
123 
124 #if NBPFILTER > 0
125 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
126 #endif
127 
128 	sc->sc_txhprio = 0;
129 	sc->sc_rxhprio = IF_HDRPRIO_PACKET;
130 	sc->sc_rdomain = 0;
131 	refcnt_init_trace(&sc->sc_ifa.ifa_refcnt, DT_REFCNT_IDX_IFADDR);
132 	sc->sc_ifa.ifa_ifp = ifp;
133 	sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl);
134 	sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls);
135 	sc->sc_smpls.smpls_family = AF_MPLS;
136 
137 	return (0);
138 }
139 
140 int
mpe_clone_destroy(struct ifnet * ifp)141 mpe_clone_destroy(struct ifnet *ifp)
142 {
143 	struct mpe_softc	*sc = ifp->if_softc;
144 
145 	NET_LOCK();
146 	CLR(ifp->if_flags, IFF_RUNNING);
147 	sc->sc_dead = 1;
148 
149 	if (sc->sc_smpls.smpls_label) {
150 		rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
151 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
152 	}
153 	NET_UNLOCK();
154 
155 	ifq_barrier(&ifp->if_snd);
156 
157 	if_detach(ifp);
158 	if (refcnt_rele(&sc->sc_ifa.ifa_refcnt) == 0) {
159 		panic("%s: ifa refcnt has %u refs", __func__,
160 		    sc->sc_ifa.ifa_refcnt.r_refs);
161 	}
162 	free(sc, M_DEVBUF, sizeof *sc);
163 	return (0);
164 }
165 
166 /*
167  * Start output on the mpe interface.
168  */
169 void
mpe_start(struct ifnet * ifp)170 mpe_start(struct ifnet *ifp)
171 {
172 	struct mpe_softc	*sc = ifp->if_softc;
173 	struct mbuf		*m;
174 	struct sockaddr		*sa;
175 	struct sockaddr		smpls = { .sa_family = AF_MPLS };
176 	struct rtentry		*rt;
177 	struct ifnet		*ifp0;
178 
179 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
180 		sa = mtod(m, struct sockaddr *);
181 		rt = rtalloc(sa, RT_RESOLVE, sc->sc_rdomain);
182 		if (!rtisvalid(rt)) {
183 			m_freem(m);
184 			rtfree(rt);
185 			continue;
186 		}
187 
188 		ifp0 = if_get(rt->rt_ifidx);
189 		if (ifp0 == NULL) {
190 			m_freem(m);
191 			rtfree(rt);
192 			continue;
193 		}
194 
195 		m_adj(m, sa->sa_len);
196 
197 #if NBPFILTER > 0
198 		if (ifp->if_bpf) {
199 			/* remove MPLS label before passing packet to bpf */
200 			m->m_data += sizeof(struct shim_hdr);
201 			m->m_len -= sizeof(struct shim_hdr);
202 			m->m_pkthdr.len -= sizeof(struct shim_hdr);
203 			bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
204 			    m, BPF_DIRECTION_OUT);
205 			m->m_data -= sizeof(struct shim_hdr);
206 			m->m_len += sizeof(struct shim_hdr);
207 			m->m_pkthdr.len += sizeof(struct shim_hdr);
208 		}
209 #endif
210 
211 		m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
212 		CLR(m->m_flags, M_BCAST|M_MCAST);
213 
214 		mpls_output(ifp0, m, &smpls, rt);
215 		if_put(ifp0);
216 		rtfree(rt);
217 	}
218 }
219 
220 int
mpe_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * dst,struct rtentry * rt)221 mpe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
222 	struct rtentry *rt)
223 {
224 	struct mpe_softc *sc;
225 	struct rt_mpls	*rtmpls;
226 	struct shim_hdr	shim;
227 	int		error;
228 	int		txprio;
229 	uint8_t		ttl = mpls_defttl;
230 	uint8_t		tos, prio;
231 	size_t		ttloff;
232 	socklen_t	slen;
233 
234 	if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) {
235 		m_freem(m);
236 		return (ENETUNREACH);
237 	}
238 
239 	if (dst->sa_family == AF_LINK && ISSET(rt->rt_flags, RTF_LOCAL)) {
240 		mpe_input(ifp, m);
241 		return (0);
242 	}
243 
244 #ifdef DIAGNOSTIC
245 	if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) {
246 		printf("%s: trying to send packet on wrong domain. "
247 		    "if %d vs. mbuf %d\n", ifp->if_xname,
248 		    ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid));
249 	}
250 #endif
251 
252 	rtmpls = (struct rt_mpls *)rt->rt_llinfo;
253 	if (rtmpls->mpls_operation != MPLS_OP_PUSH) {
254 		m_freem(m);
255 		return (ENETUNREACH);
256 	}
257 
258 	error = 0;
259 	switch (dst->sa_family) {
260 	case AF_INET: {
261 		struct ip *ip = mtod(m, struct ip *);
262 		tos = ip->ip_tos;
263 		ttloff = offsetof(struct ip, ip_ttl);
264 		slen = sizeof(struct sockaddr_in);
265 		break;
266 	}
267 #ifdef INET6
268 	case AF_INET6: {
269 		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
270 		uint32_t flow = bemtoh32(&ip6->ip6_flow);
271 		tos = flow >> 20;
272 		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
273 		slen = sizeof(struct sockaddr_in6);
274 		break;
275 	}
276 #endif
277 	default:
278 		m_freem(m);
279 		return (EPFNOSUPPORT);
280 	}
281 
282 	if (mpls_mapttl_ip) {
283 		/* assumes the ip header is already contig */
284 		ttl = *(mtod(m, uint8_t *) + ttloff);
285 	}
286 
287 	sc = ifp->if_softc;
288 	txprio = sc->sc_txhprio;
289 
290 	switch (txprio) {
291 	case IF_HDRPRIO_PACKET:
292 		prio = m->m_pkthdr.pf.prio;
293 		break;
294 	case IF_HDRPRIO_PAYLOAD:
295 		prio = IFQ_TOS2PRIO(tos);
296 		break;
297 	default:
298 		prio = txprio;
299 		break;
300 	}
301 
302 	shim.shim_label = rtmpls->mpls_label | htonl(prio << MPLS_EXP_OFFSET) |
303 	    MPLS_BOS_MASK | htonl(ttl);
304 
305 	m = m_prepend(m, sizeof(shim), M_NOWAIT);
306 	if (m == NULL) {
307 		error = ENOMEM;
308 		goto out;
309 	}
310 	*mtod(m, struct shim_hdr *) = shim;
311 
312 	m = m_prepend(m, slen, M_WAITOK);
313 	if (m == NULL) {
314 		error = ENOMEM;
315 		goto out;
316 	}
317 	memcpy(mtod(m, struct sockaddr *), rt->rt_gateway, slen);
318 	mtod(m, struct sockaddr *)->sa_len = slen; /* to be sure */
319 
320 	m->m_pkthdr.ph_family = dst->sa_family;
321 
322 	error = if_enqueue(ifp, m);
323 out:
324 	if (error)
325 		ifp->if_oerrors++;
326 	return (error);
327 }
328 
329 int
mpe_set_label(struct mpe_softc * sc,uint32_t label,unsigned int rdomain)330 mpe_set_label(struct mpe_softc *sc, uint32_t label, unsigned int rdomain)
331 {
332 	int error;
333 
334 	if (sc->sc_dead)
335 		return (ENXIO);
336 
337 	if (sc->sc_smpls.smpls_label) {
338 		/* remove old MPLS route */
339 		rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
340 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
341 	}
342 
343 	/* add new MPLS route */
344 	sc->sc_smpls.smpls_label = label;
345 	sc->sc_rdomain = rdomain;
346 
347 	/* only install with a label or mpe_clone_destroy() will ignore it */
348 	if (sc->sc_smpls.smpls_label == MPLS_LABEL2SHIM(0))
349 		return 0;
350 
351 	error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
352 	    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
353 	if (error)
354 		sc->sc_smpls.smpls_label = 0;
355 
356 	return (error);
357 }
358 
359 int
mpe_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)360 mpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
361 {
362 	struct mpe_softc	*sc = ifp->if_softc;
363 	struct ifreq		*ifr;
364 	struct shim_hdr		 shim;
365 	int			 error = 0;
366 
367 	ifr = (struct ifreq *)data;
368 	switch (cmd) {
369 	case SIOCSIFADDR:
370 		break;
371 	case SIOCSIFFLAGS:
372 		if (ifp->if_flags & IFF_UP)
373 			ifp->if_flags |= IFF_RUNNING;
374 		else
375 			ifp->if_flags &= ~IFF_RUNNING;
376 		break;
377 	case SIOCSIFMTU:
378 		if (ifr->ifr_mtu < MPE_MTU_MIN ||
379 		    ifr->ifr_mtu > MPE_MTU_MAX)
380 			error = EINVAL;
381 		else
382 			ifp->if_mtu = ifr->ifr_mtu;
383 		break;
384 	case SIOCGETLABEL:
385 		shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
386 		if (shim.shim_label == 0) {
387 			error = EADDRNOTAVAIL;
388 			break;
389 		}
390 		error = copyout(&shim, ifr->ifr_data, sizeof(shim));
391 		break;
392 	case SIOCSETLABEL:
393 		error = copyin(ifr->ifr_data, &shim, sizeof(shim));
394 		if (error != 0)
395 			break;
396 		if (shim.shim_label > MPLS_LABEL_MAX ||
397 		    shim.shim_label <= MPLS_LABEL_RESERVED_MAX) {
398 			error = EINVAL;
399 			break;
400 		}
401 		shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label);
402 		if (sc->sc_smpls.smpls_label != shim.shim_label) {
403 			error = mpe_set_label(sc, shim.shim_label,
404 			    sc->sc_rdomain);
405 		}
406 		break;
407 	case SIOCDELLABEL:
408 		if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) {
409 			rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
410 			    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
411 		}
412 		sc->sc_smpls.smpls_label = MPLS_LABEL2SHIM(0);
413 		break;
414 
415 	case SIOCSLIFPHYRTABLE:
416 		if (ifr->ifr_rdomainid < 0 ||
417 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
418 		    !rtable_exists(ifr->ifr_rdomainid) ||
419 		    ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) {
420 			error = EINVAL;
421 			break;
422 		}
423 		if (sc->sc_rdomain != ifr->ifr_rdomainid) {
424 			error = mpe_set_label(sc, sc->sc_smpls.smpls_label,
425 			    ifr->ifr_rdomainid);
426 		}
427 		break;
428 	case SIOCGLIFPHYRTABLE:
429 		ifr->ifr_rdomainid = sc->sc_rdomain;
430 		break;
431 
432 	case SIOCSTXHPRIO:
433 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
434 		if (error != 0)
435 			break;
436 
437 		sc->sc_txhprio = ifr->ifr_hdrprio;
438 		break;
439 	case SIOCGTXHPRIO:
440 		ifr->ifr_hdrprio = sc->sc_txhprio;
441 		break;
442 
443 	case SIOCSRXHPRIO:
444 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
445 		if (error != 0)
446 			break;
447 
448 		sc->sc_rxhprio = ifr->ifr_hdrprio;
449 		break;
450 	case SIOCGRXHPRIO:
451 		ifr->ifr_hdrprio = sc->sc_rxhprio;
452 		break;
453 
454 	default:
455 		return (ENOTTY);
456 	}
457 
458 	return (error);
459 }
460 
461 void
mpe_input(struct ifnet * ifp,struct mbuf * m)462 mpe_input(struct ifnet *ifp, struct mbuf *m)
463 {
464 	struct mpe_softc *sc = ifp->if_softc;
465 	struct shim_hdr	*shim;
466 	struct mbuf	*n;
467 	uint8_t		 ttl, tos;
468 	uint32_t	 exp;
469 	int rxprio = sc->sc_rxhprio;
470 
471 	shim = mtod(m, struct shim_hdr *);
472 	exp = ntohl(shim->shim_label & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET;
473 	if (!MPLS_BOS_ISSET(shim->shim_label))
474 		goto drop;
475 
476 	ttl = ntohl(shim->shim_label & MPLS_TTL_MASK);
477 	m_adj(m, sizeof(*shim));
478 
479 	n = m;
480 	while (n->m_len == 0) {
481 		n = n->m_next;
482 		if (n == NULL)
483 			goto drop;
484 	}
485 
486 	switch (*mtod(n, uint8_t *) >> 4) {
487 	case 4: {
488 		struct ip *ip;
489 		if (m->m_len < sizeof(*ip)) {
490 			m = m_pullup(m, sizeof(*ip));
491 			if (m == NULL)
492 				return;
493 		}
494 		ip = mtod(m, struct ip *);
495 		tos = ip->ip_tos;
496 
497 		if (mpls_mapttl_ip) {
498 			m = mpls_ip_adjttl(m, ttl);
499 			if (m == NULL)
500 				return;
501 		}
502 
503 		m->m_pkthdr.ph_family = AF_INET;
504 		break;
505 	}
506 #ifdef INET6
507 	case 6: {
508 		struct ip6_hdr *ip6;
509 		uint32_t flow;
510 		if (m->m_len < sizeof(*ip6)) {
511 			m = m_pullup(m, sizeof(*ip6));
512 			if (m == NULL)
513 				return;
514 		}
515 		ip6 = mtod(m, struct ip6_hdr *);
516 		flow = bemtoh32(&ip6->ip6_flow);
517 		tos = flow >> 20;
518 
519 		if (mpls_mapttl_ip6) {
520 			m = mpls_ip6_adjttl(m, ttl);
521 			if (m == NULL)
522 				return;
523 		}
524 
525 		m->m_pkthdr.ph_family = AF_INET6;
526 		break;
527 	}
528 #endif /* INET6 */
529 	default:
530 		goto drop;
531 	}
532 
533 	switch (rxprio) {
534 	case IF_HDRPRIO_PACKET:
535 		/* nop */
536 		break;
537 	case IF_HDRPRIO_OUTER:
538 		m->m_pkthdr.pf.prio = exp;
539 		break;
540 	case IF_HDRPRIO_PAYLOAD:
541 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos);
542 		break;
543 	default:
544 		m->m_pkthdr.pf.prio = rxprio;
545 		break;
546 	}
547 
548 	if_vinput(ifp, m);
549 	return;
550 drop:
551 	m_freem(m);
552 }
553