xref: /openbsd/sys/net/if_mpe.c (revision 771fbea0)
1 /* $OpenBSD: if_mpe.c,v 1.100 2021/03/26 19:00:21 kn Exp $ */
2 
3 /*
4  * Copyright (c) 2008 Pierre-Yves Ritschard <pyr@spootnik.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/socket.h>
23 #include <sys/sockio.h>
24 #include <sys/ioctl.h>
25 
26 #include <net/if.h>
27 #include <net/if_dl.h>
28 #include <net/if_var.h>
29 #include <net/if_types.h>
30 #include <net/netisr.h>
31 #include <net/route.h>
32 
33 #include <netinet/in.h>
34 #include <netinet/ip.h>
35 
36 #ifdef INET6
37 #include <netinet/ip6.h>
38 #endif /* INET6 */
39 
40 #include "bpfilter.h"
41 #if NBPFILTER > 0
42 #include <net/bpf.h>
43 #endif
44 
45 #include <netmpls/mpls.h>
46 
47 
48 
49 #ifdef MPLS_DEBUG
50 #define DPRINTF(x)    do { if (mpedebug) printf x ; } while (0)
51 #else
52 #define DPRINTF(x)
53 #endif
54 
55 struct mpe_softc {
56 	struct ifnet		sc_if;		/* the interface */
57 	int			sc_txhprio;
58 	int			sc_rxhprio;
59 	unsigned int		sc_rdomain;
60 	struct ifaddr		sc_ifa;
61 	struct sockaddr_mpls	sc_smpls;
62 
63 	int			sc_dead;
64 };
65 
66 #define MPE_HDRLEN	sizeof(struct shim_hdr)
67 #define MPE_MTU		1500
68 #define MPE_MTU_MIN	256
69 #define MPE_MTU_MAX	8192
70 
71 void	mpeattach(int);
72 int	mpe_output(struct ifnet *, struct mbuf *, struct sockaddr *,
73 	    struct rtentry *);
74 int	mpe_ioctl(struct ifnet *, u_long, caddr_t);
75 void	mpe_start(struct ifnet *);
76 int	mpe_clone_create(struct if_clone *, int);
77 int	mpe_clone_destroy(struct ifnet *);
78 void	mpe_input(struct ifnet *, struct mbuf *);
79 
80 struct if_clone	mpe_cloner =
81     IF_CLONE_INITIALIZER("mpe", mpe_clone_create, mpe_clone_destroy);
82 
83 extern int	mpls_mapttl_ip;
84 #ifdef INET6
85 extern int	mpls_mapttl_ip6;
86 #endif
87 
88 void
89 mpeattach(int nmpe)
90 {
91 	if_clone_attach(&mpe_cloner);
92 }
93 
94 int
95 mpe_clone_create(struct if_clone *ifc, int unit)
96 {
97 	struct mpe_softc	*sc;
98 	struct ifnet		*ifp;
99 
100 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
101 	if (sc == NULL)
102 		return (ENOMEM);
103 
104 	ifp = &sc->sc_if;
105 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit);
106 	ifp->if_flags = IFF_POINTOPOINT;
107 	ifp->if_xflags = IFXF_CLONED;
108 	ifp->if_softc = sc;
109 	ifp->if_mtu = MPE_MTU;
110 	ifp->if_ioctl = mpe_ioctl;
111 	ifp->if_bpf_mtap = p2p_bpf_mtap;
112 	ifp->if_input = p2p_input;
113 	ifp->if_output = mpe_output;
114 	ifp->if_start = mpe_start;
115 	ifp->if_type = IFT_MPLS;
116 	ifp->if_hdrlen = MPE_HDRLEN;
117 
118 	sc->sc_dead = 0;
119 
120 	if_attach(ifp);
121 	if_alloc_sadl(ifp);
122 	if_counters_alloc(ifp);
123 
124 #if NBPFILTER > 0
125 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
126 #endif
127 
128 	sc->sc_txhprio = 0;
129 	sc->sc_rxhprio = IF_HDRPRIO_PACKET;
130 	sc->sc_rdomain = 0;
131 	sc->sc_ifa.ifa_ifp = ifp;
132 	sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl);
133 	sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls);
134 	sc->sc_smpls.smpls_family = AF_MPLS;
135 
136 	return (0);
137 }
138 
139 int
140 mpe_clone_destroy(struct ifnet *ifp)
141 {
142 	struct mpe_softc	*sc = ifp->if_softc;
143 
144 	NET_LOCK();
145 	CLR(ifp->if_flags, IFF_RUNNING);
146 	sc->sc_dead = 1;
147 
148 	if (sc->sc_smpls.smpls_label) {
149 		rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
150 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
151 	}
152 	NET_UNLOCK();
153 
154 	ifq_barrier(&ifp->if_snd);
155 
156 	if_detach(ifp);
157 	free(sc, M_DEVBUF, sizeof *sc);
158 	return (0);
159 }
160 
161 /*
162  * Start output on the mpe interface.
163  */
164 void
165 mpe_start(struct ifnet *ifp)
166 {
167 	struct mpe_softc	*sc = ifp->if_softc;
168 	struct mbuf		*m;
169 	struct sockaddr		*sa;
170 	struct sockaddr		smpls = { .sa_family = AF_MPLS };
171 	struct rtentry		*rt;
172 	struct ifnet		*ifp0;
173 
174 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
175 		sa = mtod(m, struct sockaddr *);
176 		rt = rtalloc(sa, RT_RESOLVE, sc->sc_rdomain);
177 		if (!rtisvalid(rt)) {
178 			m_freem(m);
179 			rtfree(rt);
180 			continue;
181 		}
182 
183 		ifp0 = if_get(rt->rt_ifidx);
184 		if (ifp0 == NULL) {
185 			m_freem(m);
186 			rtfree(rt);
187 			continue;
188 		}
189 
190 		m_adj(m, sa->sa_len);
191 
192 #if NBPFILTER > 0
193 		if (ifp->if_bpf) {
194 			/* remove MPLS label before passing packet to bpf */
195 			m->m_data += sizeof(struct shim_hdr);
196 			m->m_len -= sizeof(struct shim_hdr);
197 			m->m_pkthdr.len -= sizeof(struct shim_hdr);
198 			bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
199 			    m, BPF_DIRECTION_OUT);
200 			m->m_data -= sizeof(struct shim_hdr);
201 			m->m_len += sizeof(struct shim_hdr);
202 			m->m_pkthdr.len += sizeof(struct shim_hdr);
203 		}
204 #endif
205 
206 		m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
207 		CLR(m->m_flags, M_BCAST|M_MCAST);
208 
209 		mpls_output(ifp0, m, &smpls, rt);
210 		if_put(ifp0);
211 		rtfree(rt);
212 	}
213 }
214 
215 int
216 mpe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
217 	struct rtentry *rt)
218 {
219 	struct mpe_softc *sc;
220 	struct rt_mpls	*rtmpls;
221 	struct shim_hdr	shim;
222 	int		error;
223 	int		txprio;
224 	uint8_t		ttl = mpls_defttl;
225 	uint8_t		tos, prio;
226 	size_t		ttloff;
227 	socklen_t	slen;
228 
229 	if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) {
230 		m_freem(m);
231 		return (ENETUNREACH);
232 	}
233 
234 	if (dst->sa_family == AF_LINK && ISSET(rt->rt_flags, RTF_LOCAL)) {
235 		mpe_input(ifp, m);
236 		return (0);
237 	}
238 
239 #ifdef DIAGNOSTIC
240 	if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) {
241 		printf("%s: trying to send packet on wrong domain. "
242 		    "if %d vs. mbuf %d\n", ifp->if_xname,
243 		    ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid));
244 	}
245 #endif
246 
247 	rtmpls = (struct rt_mpls *)rt->rt_llinfo;
248 	if (rtmpls->mpls_operation != MPLS_OP_PUSH) {
249 		m_freem(m);
250 		return (ENETUNREACH);
251 	}
252 
253 	error = 0;
254 	switch (dst->sa_family) {
255 	case AF_INET: {
256 		struct ip *ip = mtod(m, struct ip *);
257 		tos = ip->ip_tos;
258 		ttloff = offsetof(struct ip, ip_ttl);
259 		slen = sizeof(struct sockaddr_in);
260 		break;
261 	}
262 #ifdef INET6
263 	case AF_INET6: {
264 		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
265 		uint32_t flow = bemtoh32(&ip6->ip6_flow);
266 		tos = flow >> 20;
267 		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
268 		slen = sizeof(struct sockaddr_in6);
269 		break;
270 	}
271 #endif
272 	default:
273 		m_freem(m);
274 		return (EPFNOSUPPORT);
275 	}
276 
277 	if (mpls_mapttl_ip) {
278 		/* assumes the ip header is already contig */
279 		ttl = *(mtod(m, uint8_t *) + ttloff);
280 	}
281 
282 	sc = ifp->if_softc;
283 	txprio = sc->sc_txhprio;
284 
285 	switch (txprio) {
286 	case IF_HDRPRIO_PACKET:
287 		prio = m->m_pkthdr.pf.prio;
288 		break;
289 	case IF_HDRPRIO_PAYLOAD:
290 		prio = IFQ_TOS2PRIO(tos);
291 		break;
292 	default:
293 		prio = txprio;
294 		break;
295 	}
296 
297 	shim.shim_label = rtmpls->mpls_label | htonl(prio << MPLS_EXP_OFFSET) |
298 	    MPLS_BOS_MASK | htonl(ttl);
299 
300 	m = m_prepend(m, sizeof(shim), M_NOWAIT);
301 	if (m == NULL) {
302 		error = ENOMEM;
303 		goto out;
304 	}
305 	*mtod(m, struct shim_hdr *) = shim;
306 
307 	m = m_prepend(m, slen, M_WAITOK);
308 	if (m == NULL) {
309 		error = ENOMEM;
310 		goto out;
311 	}
312 	memcpy(mtod(m, struct sockaddr *), rt->rt_gateway, slen);
313 	mtod(m, struct sockaddr *)->sa_len = slen; /* to be sure */
314 
315 	m->m_pkthdr.ph_family = dst->sa_family;
316 
317 	error = if_enqueue(ifp, m);
318 out:
319 	if (error)
320 		ifp->if_oerrors++;
321 	return (error);
322 }
323 
324 int
325 mpe_set_label(struct mpe_softc *sc, uint32_t label, unsigned int rdomain)
326 {
327 	int error;
328 
329 	if (sc->sc_dead)
330 		return (ENXIO);
331 
332 	if (sc->sc_smpls.smpls_label) {
333 		/* remove old MPLS route */
334 		rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
335 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
336 	}
337 
338 	/* add new MPLS route */
339 	sc->sc_smpls.smpls_label = label;
340 	sc->sc_rdomain = rdomain;
341 
342 	/* only install with a label or mpe_clone_destroy() will ignore it */
343 	if (sc->sc_smpls.smpls_label == MPLS_LABEL2SHIM(0))
344 		return 0;
345 
346 	error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
347 	    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
348 	if (error)
349 		sc->sc_smpls.smpls_label = 0;
350 
351 	return (error);
352 }
353 
354 int
355 mpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
356 {
357 	struct mpe_softc	*sc = ifp->if_softc;
358 	struct ifreq		*ifr;
359 	struct shim_hdr		 shim;
360 	int			 error = 0;
361 
362 	ifr = (struct ifreq *)data;
363 	switch (cmd) {
364 	case SIOCSIFADDR:
365 		break;
366 	case SIOCSIFFLAGS:
367 		if (ifp->if_flags & IFF_UP)
368 			ifp->if_flags |= IFF_RUNNING;
369 		else
370 			ifp->if_flags &= ~IFF_RUNNING;
371 		break;
372 	case SIOCSIFMTU:
373 		if (ifr->ifr_mtu < MPE_MTU_MIN ||
374 		    ifr->ifr_mtu > MPE_MTU_MAX)
375 			error = EINVAL;
376 		else
377 			ifp->if_mtu = ifr->ifr_mtu;
378 		break;
379 	case SIOCGETLABEL:
380 		shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
381 		if (shim.shim_label == 0) {
382 			error = EADDRNOTAVAIL;
383 			break;
384 		}
385 		error = copyout(&shim, ifr->ifr_data, sizeof(shim));
386 		break;
387 	case SIOCSETLABEL:
388 		error = copyin(ifr->ifr_data, &shim, sizeof(shim));
389 		if (error != 0)
390 			break;
391 		if (shim.shim_label > MPLS_LABEL_MAX ||
392 		    shim.shim_label <= MPLS_LABEL_RESERVED_MAX) {
393 			error = EINVAL;
394 			break;
395 		}
396 		shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label);
397 		if (sc->sc_smpls.smpls_label != shim.shim_label) {
398 			error = mpe_set_label(sc, shim.shim_label,
399 			    sc->sc_rdomain);
400 		}
401 		break;
402 	case SIOCDELLABEL:
403 		if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) {
404 			rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
405 			    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
406 
407 		}
408 		sc->sc_smpls.smpls_label = MPLS_LABEL2SHIM(0);
409 		break;
410 
411 	case SIOCSLIFPHYRTABLE:
412 		if (ifr->ifr_rdomainid < 0 ||
413 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
414 		    !rtable_exists(ifr->ifr_rdomainid) ||
415 		    ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) {
416 			error = EINVAL;
417 			break;
418 		}
419 		if (sc->sc_rdomain != ifr->ifr_rdomainid) {
420 			error = mpe_set_label(sc, sc->sc_smpls.smpls_label,
421 			    ifr->ifr_rdomainid);
422 		}
423 		break;
424 	case SIOCGLIFPHYRTABLE:
425 		ifr->ifr_rdomainid = sc->sc_rdomain;
426 		break;
427 
428 	case SIOCSTXHPRIO:
429 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
430 		if (error != 0)
431 			break;
432 
433 		sc->sc_txhprio = ifr->ifr_hdrprio;
434 		break;
435 	case SIOCGTXHPRIO:
436 		ifr->ifr_hdrprio = sc->sc_txhprio;
437 		break;
438 
439 	case SIOCSRXHPRIO:
440 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
441 		if (error != 0)
442 			break;
443 
444 		sc->sc_rxhprio = ifr->ifr_hdrprio;
445 		break;
446 	case SIOCGRXHPRIO:
447 		ifr->ifr_hdrprio = sc->sc_rxhprio;
448 		break;
449 
450 	default:
451 		return (ENOTTY);
452 	}
453 
454 	return (error);
455 }
456 
457 void
458 mpe_input(struct ifnet *ifp, struct mbuf *m)
459 {
460 	struct mpe_softc *sc = ifp->if_softc;
461 	struct shim_hdr	*shim;
462 	struct mbuf 	*n;
463 	uint8_t		 ttl, tos;
464 	uint32_t	 exp;
465 	int rxprio = sc->sc_rxhprio;
466 
467 	shim = mtod(m, struct shim_hdr *);
468 	exp = ntohl(shim->shim_label & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET;
469 	if (!MPLS_BOS_ISSET(shim->shim_label))
470 		goto drop;
471 
472 	ttl = ntohl(shim->shim_label & MPLS_TTL_MASK);
473 	m_adj(m, sizeof(*shim));
474 
475 	n = m;
476 	while (n->m_len == 0) {
477 		n = n->m_next;
478 		if (n == NULL)
479 			goto drop;
480 	}
481 
482 	switch (*mtod(n, uint8_t *) >> 4) {
483 	case 4: {
484 		struct ip *ip;
485 		if (m->m_len < sizeof(*ip)) {
486 			m = m_pullup(m, sizeof(*ip));
487 			if (m == NULL)
488 				return;
489 		}
490 		ip = mtod(m, struct ip *);
491 		tos = ip->ip_tos;
492 
493 		if (mpls_mapttl_ip) {
494 			m = mpls_ip_adjttl(m, ttl);
495 			if (m == NULL)
496 				return;
497 		}
498 
499 		m->m_pkthdr.ph_family = AF_INET;
500 		break;
501 	}
502 #ifdef INET6
503 	case 6: {
504 		struct ip6_hdr *ip6;
505 		uint32_t flow;
506 		if (m->m_len < sizeof(*ip6)) {
507 			m = m_pullup(m, sizeof(*ip6));
508 			if (m == NULL)
509 				return;
510 		}
511 		ip6 = mtod(m, struct ip6_hdr *);
512 		flow = bemtoh32(&ip6->ip6_flow);
513 		tos = flow >> 20;
514 
515 		if (mpls_mapttl_ip6) {
516 			m = mpls_ip6_adjttl(m, ttl);
517 			if (m == NULL)
518 				return;
519 		}
520 
521 		m->m_pkthdr.ph_family = AF_INET6;
522 		break;
523 	}
524 #endif /* INET6 */
525 	default:
526 		goto drop;
527 	}
528 
529 	switch (rxprio) {
530 	case IF_HDRPRIO_PACKET:
531 		/* nop */
532 		break;
533 	case IF_HDRPRIO_OUTER:
534 		m->m_pkthdr.pf.prio = exp;
535 		break;
536 	case IF_HDRPRIO_PAYLOAD:
537 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos);
538 		break;
539 	default:
540 		m->m_pkthdr.pf.prio = rxprio;
541 		break;
542 	}
543 
544 	if_vinput(ifp, m);
545 	return;
546 drop:
547 	m_freem(m);
548 }
549