xref: /openbsd/sys/net/if_mpe.c (revision 3cab2bb3)
1 /* $OpenBSD: if_mpe.c,v 1.96 2020/07/10 13:26:41 patrick Exp $ */
2 
3 /*
4  * Copyright (c) 2008 Pierre-Yves Ritschard <pyr@spootnik.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/mbuf.h>
22 #include <sys/socket.h>
23 #include <sys/sockio.h>
24 #include <sys/ioctl.h>
25 
26 #include <net/if.h>
27 #include <net/if_dl.h>
28 #include <net/if_var.h>
29 #include <net/if_types.h>
30 #include <net/netisr.h>
31 #include <net/route.h>
32 
33 #include <netinet/in.h>
34 #include <netinet/ip.h>
35 
36 #ifdef INET6
37 #include <netinet/ip6.h>
38 #endif /* INET6 */
39 
40 #include "bpfilter.h"
41 #if NBPFILTER > 0
42 #include <net/bpf.h>
43 #endif
44 
45 #include <netmpls/mpls.h>
46 
47 
48 
49 #ifdef MPLS_DEBUG
50 #define DPRINTF(x)    do { if (mpedebug) printf x ; } while (0)
51 #else
52 #define DPRINTF(x)
53 #endif
54 
55 struct mpe_softc {
56 	struct ifnet		sc_if;		/* the interface */
57 	int			sc_txhprio;
58 	int			sc_rxhprio;
59 	unsigned int		sc_rdomain;
60 	struct ifaddr		sc_ifa;
61 	struct sockaddr_mpls	sc_smpls;
62 
63 	int			sc_dead;
64 };
65 
66 #define MPE_HDRLEN	sizeof(struct shim_hdr)
67 #define MPE_MTU		1500
68 #define MPE_MTU_MIN	256
69 #define MPE_MTU_MAX	8192
70 
71 void	mpeattach(int);
72 int	mpe_output(struct ifnet *, struct mbuf *, struct sockaddr *,
73 	    struct rtentry *);
74 int	mpe_ioctl(struct ifnet *, u_long, caddr_t);
75 void	mpe_start(struct ifnet *);
76 int	mpe_clone_create(struct if_clone *, int);
77 int	mpe_clone_destroy(struct ifnet *);
78 void	mpe_input(struct ifnet *, struct mbuf *);
79 
80 struct if_clone	mpe_cloner =
81     IF_CLONE_INITIALIZER("mpe", mpe_clone_create, mpe_clone_destroy);
82 
83 extern int	mpls_mapttl_ip;
84 #ifdef INET6
85 extern int	mpls_mapttl_ip6;
86 #endif
87 
88 void
89 mpeattach(int nmpe)
90 {
91 	if_clone_attach(&mpe_cloner);
92 }
93 
94 int
95 mpe_clone_create(struct if_clone *ifc, int unit)
96 {
97 	struct mpe_softc	*sc;
98 	struct ifnet		*ifp;
99 
100 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
101 	if (sc == NULL)
102 		return (ENOMEM);
103 
104 	ifp = &sc->sc_if;
105 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit);
106 	ifp->if_flags = IFF_POINTOPOINT;
107 	ifp->if_xflags = IFXF_CLONED;
108 	ifp->if_softc = sc;
109 	ifp->if_mtu = MPE_MTU;
110 	ifp->if_ioctl = mpe_ioctl;
111 	ifp->if_output = mpe_output;
112 	ifp->if_start = mpe_start;
113 	ifp->if_type = IFT_MPLS;
114 	ifp->if_hdrlen = MPE_HDRLEN;
115 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
116 
117 	sc->sc_dead = 0;
118 
119 	if_attach(ifp);
120 	if_alloc_sadl(ifp);
121 #if NBPFILTER > 0
122 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
123 #endif
124 
125 	sc->sc_txhprio = 0;
126 	sc->sc_rxhprio = IF_HDRPRIO_PACKET;
127 	sc->sc_rdomain = 0;
128 	sc->sc_ifa.ifa_ifp = ifp;
129 	sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl);
130 	sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls);
131 	sc->sc_smpls.smpls_family = AF_MPLS;
132 
133 	return (0);
134 }
135 
136 int
137 mpe_clone_destroy(struct ifnet *ifp)
138 {
139 	struct mpe_softc	*sc = ifp->if_softc;
140 
141 	NET_LOCK();
142 	CLR(ifp->if_flags, IFF_RUNNING);
143 	sc->sc_dead = 1;
144 
145 	if (sc->sc_smpls.smpls_label) {
146 		rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
147 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
148 	}
149 	NET_UNLOCK();
150 
151 	ifq_barrier(&ifp->if_snd);
152 
153 	if_detach(ifp);
154 	free(sc, M_DEVBUF, sizeof *sc);
155 	return (0);
156 }
157 
158 /*
159  * Start output on the mpe interface.
160  */
161 void
162 mpe_start(struct ifnet *ifp)
163 {
164 	struct mpe_softc	*sc = ifp->if_softc;
165 	struct mbuf		*m;
166 	struct sockaddr		*sa;
167 	struct sockaddr		smpls = { .sa_family = AF_MPLS };
168 	struct rtentry		*rt;
169 	struct ifnet		*ifp0;
170 
171 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
172 		sa = mtod(m, struct sockaddr *);
173 		rt = rtalloc(sa, RT_RESOLVE, sc->sc_rdomain);
174 		if (!rtisvalid(rt)) {
175 			m_freem(m);
176 			rtfree(rt);
177 			continue;
178 		}
179 
180 		ifp0 = if_get(rt->rt_ifidx);
181 		if (ifp0 == NULL) {
182 			m_freem(m);
183 			rtfree(rt);
184 			continue;
185 		}
186 
187 		m_adj(m, sa->sa_len);
188 
189 #if NBPFILTER > 0
190 		if (ifp->if_bpf) {
191 			/* remove MPLS label before passing packet to bpf */
192 			m->m_data += sizeof(struct shim_hdr);
193 			m->m_len -= sizeof(struct shim_hdr);
194 			m->m_pkthdr.len -= sizeof(struct shim_hdr);
195 			bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
196 			    m, BPF_DIRECTION_OUT);
197 			m->m_data -= sizeof(struct shim_hdr);
198 			m->m_len += sizeof(struct shim_hdr);
199 			m->m_pkthdr.len += sizeof(struct shim_hdr);
200 		}
201 #endif
202 
203 		m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
204 		CLR(m->m_flags, M_BCAST|M_MCAST);
205 
206 		mpls_output(ifp0, m, &smpls, rt);
207 		if_put(ifp0);
208 		rtfree(rt);
209 	}
210 }
211 
212 int
213 mpe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
214 	struct rtentry *rt)
215 {
216 	struct mpe_softc *sc;
217 	struct rt_mpls	*rtmpls;
218 	struct shim_hdr	shim;
219 	int		error;
220 	int		txprio;
221 	uint8_t		ttl = mpls_defttl;
222 	uint8_t		tos, prio;
223 	size_t		ttloff;
224 	socklen_t	slen;
225 
226 	if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) {
227 		m_freem(m);
228 		return (ENETUNREACH);
229 	}
230 
231 	if (dst->sa_family == AF_LINK && ISSET(rt->rt_flags, RTF_LOCAL)) {
232 		mpe_input(ifp, m);
233 		return (0);
234 	}
235 
236 #ifdef DIAGNOSTIC
237 	if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) {
238 		printf("%s: trying to send packet on wrong domain. "
239 		    "if %d vs. mbuf %d\n", ifp->if_xname,
240 		    ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid));
241 	}
242 #endif
243 
244 	rtmpls = (struct rt_mpls *)rt->rt_llinfo;
245 	if (rtmpls->mpls_operation != MPLS_OP_PUSH) {
246 		m_freem(m);
247 		return (ENETUNREACH);
248 	}
249 
250 	error = 0;
251 	switch (dst->sa_family) {
252 	case AF_INET: {
253 		struct ip *ip = mtod(m, struct ip *);
254 		tos = ip->ip_tos;
255 		ttloff = offsetof(struct ip, ip_ttl);
256 		slen = sizeof(struct sockaddr_in);
257 		break;
258 	}
259 #ifdef INET6
260 	case AF_INET6: {
261 		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
262 		uint32_t flow = bemtoh32(&ip6->ip6_flow);
263 		tos = flow >> 20;
264 		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
265 		slen = sizeof(struct sockaddr_in6);
266 		break;
267 	}
268 #endif
269 	default:
270 		m_freem(m);
271 		return (EPFNOSUPPORT);
272 	}
273 
274 	if (mpls_mapttl_ip) {
275 		/* assumes the ip header is already contig */
276 		ttl = *(mtod(m, uint8_t *) + ttloff);
277 	}
278 
279 	sc = ifp->if_softc;
280 	txprio = sc->sc_txhprio;
281 
282 	switch (txprio) {
283 	case IF_HDRPRIO_PACKET:
284 		prio = m->m_pkthdr.pf.prio;
285 		break;
286 	case IF_HDRPRIO_PAYLOAD:
287 		prio = IFQ_TOS2PRIO(tos);
288 		break;
289 	default:
290 		prio = txprio;
291 		break;
292 	}
293 
294 	shim.shim_label = rtmpls->mpls_label | htonl(prio << MPLS_EXP_OFFSET) |
295 	    MPLS_BOS_MASK | htonl(ttl);
296 
297 	m = m_prepend(m, sizeof(shim), M_NOWAIT);
298 	if (m == NULL) {
299 		error = ENOMEM;
300 		goto out;
301 	}
302 	*mtod(m, struct shim_hdr *) = shim;
303 
304 	m = m_prepend(m, slen, M_WAITOK);
305 	if (m == NULL) {
306 		error = ENOMEM;
307 		goto out;
308 	}
309 	memcpy(mtod(m, struct sockaddr *), rt->rt_gateway, slen);
310 	mtod(m, struct sockaddr *)->sa_len = slen; /* to be sure */
311 
312 	m->m_pkthdr.ph_family = dst->sa_family;
313 
314 	error = if_enqueue(ifp, m);
315 out:
316 	if (error)
317 		ifp->if_oerrors++;
318 	return (error);
319 }
320 
321 int
322 mpe_set_label(struct mpe_softc *sc, uint32_t label, unsigned int rdomain)
323 {
324 	int error;
325 
326 	if (sc->sc_dead)
327 		return (ENXIO);
328 
329 	if (sc->sc_smpls.smpls_label) {
330 		/* remove old MPLS route */
331 		rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
332 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
333 	}
334 
335 	/* add new MPLS route */
336 	sc->sc_smpls.smpls_label = label;
337 	sc->sc_rdomain = rdomain;
338 
339 	error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
340 	    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
341 	if (error)
342 		sc->sc_smpls.smpls_label = 0;
343 
344 	return (error);
345 }
346 
347 int
348 mpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
349 {
350 	struct mpe_softc	*sc = ifp->if_softc;
351 	struct ifreq		*ifr;
352 	struct shim_hdr		 shim;
353 	int			 error = 0;
354 
355 	ifr = (struct ifreq *)data;
356 	switch (cmd) {
357 	case SIOCSIFADDR:
358 		break;
359 	case SIOCSIFFLAGS:
360 		if (ifp->if_flags & IFF_UP)
361 			ifp->if_flags |= IFF_RUNNING;
362 		else
363 			ifp->if_flags &= ~IFF_RUNNING;
364 		break;
365 	case SIOCSIFMTU:
366 		if (ifr->ifr_mtu < MPE_MTU_MIN ||
367 		    ifr->ifr_mtu > MPE_MTU_MAX)
368 			error = EINVAL;
369 		else
370 			ifp->if_mtu = ifr->ifr_mtu;
371 		break;
372 	case SIOCGETLABEL:
373 		shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
374 		if (shim.shim_label == 0) {
375 			error = EADDRNOTAVAIL;
376 			break;
377 		}
378 		error = copyout(&shim, ifr->ifr_data, sizeof(shim));
379 		break;
380 	case SIOCSETLABEL:
381 		error = copyin(ifr->ifr_data, &shim, sizeof(shim));
382 		if (error != 0)
383 			break;
384 		if (shim.shim_label > MPLS_LABEL_MAX ||
385 		    shim.shim_label <= MPLS_LABEL_RESERVED_MAX) {
386 			error = EINVAL;
387 			break;
388 		}
389 		shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label);
390 		if (sc->sc_smpls.smpls_label != shim.shim_label) {
391 			error = mpe_set_label(sc, shim.shim_label,
392 			    sc->sc_rdomain);
393 		}
394 		break;
395 	case SIOCDELLABEL:
396 		if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) {
397 			rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
398 			    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
399 
400 		}
401 		shim.shim_label = MPLS_LABEL2SHIM(0);
402 		break;
403 
404 	case SIOCSLIFPHYRTABLE:
405 		if (ifr->ifr_rdomainid < 0 ||
406 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
407 		    !rtable_exists(ifr->ifr_rdomainid) ||
408 		    ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) {
409 			error = EINVAL;
410 			break;
411 		}
412 		if (sc->sc_rdomain != ifr->ifr_rdomainid) {
413 			error = mpe_set_label(sc, sc->sc_smpls.smpls_label,
414 			    ifr->ifr_rdomainid);
415 		}
416 		break;
417 	case SIOCGLIFPHYRTABLE:
418 		ifr->ifr_rdomainid = sc->sc_rdomain;
419 		break;
420 
421 	case SIOCSTXHPRIO:
422 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
423 		if (error != 0)
424 			break;
425 
426 		sc->sc_txhprio = ifr->ifr_hdrprio;
427 		break;
428 	case SIOCGTXHPRIO:
429 		ifr->ifr_hdrprio = sc->sc_txhprio;
430 		break;
431 
432 	case SIOCSRXHPRIO:
433 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
434 		if (error != 0)
435 			break;
436 
437 		sc->sc_rxhprio = ifr->ifr_hdrprio;
438 		break;
439 	case SIOCGRXHPRIO:
440 		ifr->ifr_hdrprio = sc->sc_rxhprio;
441 		break;
442 
443 	default:
444 		return (ENOTTY);
445 	}
446 
447 	return (error);
448 }
449 
450 void
451 mpe_input(struct ifnet *ifp, struct mbuf *m)
452 {
453 	struct mpe_softc *sc = ifp->if_softc;
454 	struct shim_hdr	*shim;
455 	struct mbuf 	*n;
456 	uint8_t		 ttl, tos;
457 	uint32_t	 exp;
458 	void (*input)(struct ifnet *, struct mbuf *);
459 	int rxprio = sc->sc_rxhprio;
460 
461 	shim = mtod(m, struct shim_hdr *);
462 	exp = ntohl(shim->shim_label & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET;
463 	if (!MPLS_BOS_ISSET(shim->shim_label))
464 		goto drop;
465 
466 	ttl = ntohl(shim->shim_label & MPLS_TTL_MASK);
467 	m_adj(m, sizeof(*shim));
468 
469 	n = m;
470 	while (n->m_len == 0) {
471 		n = n->m_next;
472 		if (n == NULL)
473 			goto drop;
474 	}
475 
476 	switch (*mtod(n, uint8_t *) >> 4) {
477 	case 4: {
478 		struct ip *ip;
479 		if (m->m_len < sizeof(*ip)) {
480 			m = m_pullup(m, sizeof(*ip));
481 			if (m == NULL)
482 				return;
483 		}
484 		ip = mtod(m, struct ip *);
485 		tos = ip->ip_tos;
486 
487 		if (mpls_mapttl_ip) {
488 			m = mpls_ip_adjttl(m, ttl);
489 			if (m == NULL)
490 				return;
491 		}
492 		input = ipv4_input;
493 		m->m_pkthdr.ph_family = AF_INET;
494 		break;
495 	}
496 #ifdef INET6
497 	case 6: {
498 		struct ip6_hdr *ip6;
499 		uint32_t flow;
500 		if (m->m_len < sizeof(*ip6)) {
501 			m = m_pullup(m, sizeof(*ip6));
502 			if (m == NULL)
503 				return;
504 		}
505 		ip6 = mtod(m, struct ip6_hdr *);
506 		flow = bemtoh32(&ip6->ip6_flow);
507 		tos = flow >> 20;
508 
509 		if (mpls_mapttl_ip6) {
510 			m = mpls_ip6_adjttl(m, ttl);
511 			if (m == NULL)
512 				return;
513 		}
514 		input = ipv6_input;
515 		m->m_pkthdr.ph_family = AF_INET6;
516 		break;
517 	}
518 #endif /* INET6 */
519 	default:
520 		goto drop;
521 	}
522 
523 	switch (rxprio) {
524 	case IF_HDRPRIO_PACKET:
525 		/* nop */
526 		break;
527 	case IF_HDRPRIO_OUTER:
528 		m->m_pkthdr.pf.prio = exp;
529 		break;
530 	case IF_HDRPRIO_PAYLOAD:
531 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos);
532 		break;
533 	default:
534 		m->m_pkthdr.pf.prio = rxprio;
535 		break;
536 	}
537 
538 	/* new receive if and move into correct rtable */
539 	m->m_pkthdr.ph_ifidx = ifp->if_index;
540 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
541 
542 	/* packet has not been processed by PF yet. */
543 	KASSERT(m->m_pkthdr.pf.statekey == NULL);
544 
545 #if NBPFILTER > 0
546 	if (ifp->if_bpf) {
547 		bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
548 		    m, BPF_DIRECTION_IN);
549 	}
550 #endif
551 
552 	(*input)(ifp, m);
553 	return;
554 drop:
555 	m_freem(m);
556 }
557