xref: /openbsd/sys/net/if_mpe.c (revision 274d7c50)
1 /* $OpenBSD: if_mpe.c,v 1.94 2019/06/26 08:13:13 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2008 Pierre-Yves Ritschard <pyr@spootnik.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "mpe.h"
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/mbuf.h>
23 #include <sys/socket.h>
24 #include <sys/sockio.h>
25 #include <sys/ioctl.h>
26 
27 #include <net/if.h>
28 #include <net/if_dl.h>
29 #include <net/if_var.h>
30 #include <net/if_types.h>
31 #include <net/netisr.h>
32 #include <net/route.h>
33 
34 #include <netinet/in.h>
35 #include <netinet/ip.h>
36 
37 #ifdef INET6
38 #include <netinet/ip6.h>
39 #endif /* INET6 */
40 
41 #include "bpfilter.h"
42 #if NBPFILTER > 0
43 #include <net/bpf.h>
44 #endif
45 
46 #include <netmpls/mpls.h>
47 
48 
49 
50 #ifdef MPLS_DEBUG
51 #define DPRINTF(x)    do { if (mpedebug) printf x ; } while (0)
52 #else
53 #define DPRINTF(x)
54 #endif
55 
56 struct mpe_softc {
57 	struct ifnet		sc_if;		/* the interface */
58 	int			sc_txhprio;
59 	int			sc_rxhprio;
60 	unsigned int		sc_rdomain;
61 	struct ifaddr		sc_ifa;
62 	struct sockaddr_mpls	sc_smpls;
63 
64 	int			sc_dead;
65 };
66 
67 #define MPE_HDRLEN	sizeof(struct shim_hdr)
68 #define MPE_MTU		1500
69 #define MPE_MTU_MIN	256
70 #define MPE_MTU_MAX	8192
71 
72 void	mpeattach(int);
73 int	mpe_output(struct ifnet *, struct mbuf *, struct sockaddr *,
74 	    struct rtentry *);
75 int	mpe_ioctl(struct ifnet *, u_long, caddr_t);
76 void	mpe_start(struct ifnet *);
77 int	mpe_clone_create(struct if_clone *, int);
78 int	mpe_clone_destroy(struct ifnet *);
79 void	mpe_input(struct ifnet *, struct mbuf *);
80 
81 struct if_clone	mpe_cloner =
82     IF_CLONE_INITIALIZER("mpe", mpe_clone_create, mpe_clone_destroy);
83 
84 extern int	mpls_mapttl_ip;
85 #ifdef INET6
86 extern int	mpls_mapttl_ip6;
87 #endif
88 
89 void
90 mpeattach(int nmpe)
91 {
92 	if_clone_attach(&mpe_cloner);
93 }
94 
95 int
96 mpe_clone_create(struct if_clone *ifc, int unit)
97 {
98 	struct mpe_softc	*sc;
99 	struct ifnet		*ifp;
100 
101 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
102 	if (sc == NULL)
103 		return (ENOMEM);
104 
105 	ifp = &sc->sc_if;
106 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit);
107 	ifp->if_flags = IFF_POINTOPOINT;
108 	ifp->if_xflags = IFXF_CLONED;
109 	ifp->if_softc = sc;
110 	ifp->if_mtu = MPE_MTU;
111 	ifp->if_ioctl = mpe_ioctl;
112 	ifp->if_output = mpe_output;
113 	ifp->if_start = mpe_start;
114 	ifp->if_type = IFT_MPLS;
115 	ifp->if_hdrlen = MPE_HDRLEN;
116 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
117 
118 	sc->sc_dead = 0;
119 
120 	if_attach(ifp);
121 	if_alloc_sadl(ifp);
122 #if NBPFILTER > 0
123 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
124 #endif
125 
126 	sc->sc_txhprio = 0;
127 	sc->sc_rxhprio = IF_HDRPRIO_PACKET;
128 	sc->sc_rdomain = 0;
129 	sc->sc_ifa.ifa_ifp = ifp;
130 	sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl);
131 	sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls);
132 	sc->sc_smpls.smpls_family = AF_MPLS;
133 
134 	return (0);
135 }
136 
137 int
138 mpe_clone_destroy(struct ifnet *ifp)
139 {
140 	struct mpe_softc	*sc = ifp->if_softc;
141 
142 	NET_LOCK();
143 	CLR(ifp->if_flags, IFF_RUNNING);
144 	sc->sc_dead = 1;
145 
146 	if (sc->sc_smpls.smpls_label) {
147 		rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
148 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
149 	}
150 	NET_UNLOCK();
151 
152 	ifq_barrier(&ifp->if_snd);
153 
154 	if_detach(ifp);
155 	free(sc, M_DEVBUF, sizeof *sc);
156 	return (0);
157 }
158 
159 /*
160  * Start output on the mpe interface.
161  */
162 void
163 mpe_start(struct ifnet *ifp)
164 {
165 	struct mpe_softc	*sc = ifp->if_softc;
166 	struct mbuf		*m;
167 	struct sockaddr		*sa;
168 	struct sockaddr		smpls = { .sa_family = AF_MPLS };
169 	struct rtentry		*rt;
170 	struct ifnet		*ifp0;
171 
172 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
173 		sa = mtod(m, struct sockaddr *);
174 		rt = rtalloc(sa, RT_RESOLVE, sc->sc_rdomain);
175 		if (!rtisvalid(rt)) {
176 			m_freem(m);
177 			rtfree(rt);
178 			continue;
179 		}
180 
181 		ifp0 = if_get(rt->rt_ifidx);
182 		if (ifp0 == NULL) {
183 			m_freem(m);
184 			rtfree(rt);
185 			continue;
186 		}
187 
188 		m_adj(m, sa->sa_len);
189 
190 #if NBPFILTER > 0
191 		if (ifp->if_bpf) {
192 			/* remove MPLS label before passing packet to bpf */
193 			m->m_data += sizeof(struct shim_hdr);
194 			m->m_len -= sizeof(struct shim_hdr);
195 			m->m_pkthdr.len -= sizeof(struct shim_hdr);
196 			bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
197 			    m, BPF_DIRECTION_OUT);
198 			m->m_data -= sizeof(struct shim_hdr);
199 			m->m_len += sizeof(struct shim_hdr);
200 			m->m_pkthdr.len += sizeof(struct shim_hdr);
201 		}
202 #endif
203 
204 		m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
205 		CLR(m->m_flags, M_BCAST|M_MCAST);
206 
207 		mpls_output(ifp0, m, &smpls, rt);
208 		if_put(ifp0);
209 		rtfree(rt);
210 	}
211 }
212 
213 int
214 mpe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
215 	struct rtentry *rt)
216 {
217 	struct mpe_softc *sc;
218 	struct rt_mpls	*rtmpls;
219 	struct shim_hdr	shim;
220 	int		error;
221 	int		txprio;
222 	uint8_t		ttl = mpls_defttl;
223 	uint8_t		tos, prio;
224 	size_t		ttloff;
225 	socklen_t	slen;
226 
227 	if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPLS)) {
228 		m_freem(m);
229 		return (ENETUNREACH);
230 	}
231 
232 	if (dst->sa_family == AF_LINK && ISSET(rt->rt_flags, RTF_LOCAL)) {
233 		mpe_input(ifp, m);
234 		return (0);
235 	}
236 
237 #ifdef DIAGNOSTIC
238 	if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) {
239 		printf("%s: trying to send packet on wrong domain. "
240 		    "if %d vs. mbuf %d\n", ifp->if_xname,
241 		    ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid));
242 	}
243 #endif
244 
245 	rtmpls = (struct rt_mpls *)rt->rt_llinfo;
246 	if (rtmpls->mpls_operation != MPLS_OP_PUSH) {
247 		m_freem(m);
248 		return (ENETUNREACH);
249 	}
250 
251 	error = 0;
252 	switch (dst->sa_family) {
253 	case AF_INET: {
254 		struct ip *ip = mtod(m, struct ip *);
255 		tos = ip->ip_tos;
256 		ttloff = offsetof(struct ip, ip_ttl);
257 		slen = sizeof(struct sockaddr_in);
258 		break;
259 	}
260 #ifdef INET6
261 	case AF_INET6: {
262 		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
263 		uint32_t flow = bemtoh32(&ip6->ip6_flow);
264 		tos = flow >> 20;
265 		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
266 		slen = sizeof(struct sockaddr_in6);
267 		break;
268 	}
269 #endif
270 	default:
271 		m_freem(m);
272 		return (EPFNOSUPPORT);
273 	}
274 
275 	if (mpls_mapttl_ip) {
276 		/* assumes the ip header is already contig */
277 		ttl = *(mtod(m, uint8_t *) + ttloff);
278 	}
279 
280 	sc = ifp->if_softc;
281 	txprio = sc->sc_txhprio;
282 
283 	switch (txprio) {
284 	case IF_HDRPRIO_PACKET:
285 		prio = m->m_pkthdr.pf.prio;
286 		break;
287 	case IF_HDRPRIO_PAYLOAD:
288 		prio = IFQ_TOS2PRIO(tos);
289 		break;
290 	default:
291 		prio = txprio;
292 		break;
293 	}
294 
295 	shim.shim_label = rtmpls->mpls_label | htonl(prio << MPLS_EXP_OFFSET) |
296 	    MPLS_BOS_MASK | htonl(ttl);
297 
298 	m = m_prepend(m, sizeof(shim), M_NOWAIT);
299 	if (m == NULL) {
300 		error = ENOMEM;
301 		goto out;
302 	}
303 	*mtod(m, struct shim_hdr *) = shim;
304 
305 	m = m_prepend(m, slen, M_WAITOK);
306 	if (m == NULL) {
307 		error = ENOMEM;
308 		goto out;
309 	}
310 	memcpy(mtod(m, struct sockaddr *), rt->rt_gateway, slen);
311 	mtod(m, struct sockaddr *)->sa_len = slen; /* to be sure */
312 
313 	m->m_pkthdr.ph_family = dst->sa_family;
314 
315 	error = if_enqueue(ifp, m);
316 out:
317 	if (error)
318 		ifp->if_oerrors++;
319 	return (error);
320 }
321 
322 int
323 mpe_set_label(struct mpe_softc *sc, uint32_t label, unsigned int rdomain)
324 {
325 	int error;
326 
327 	if (sc->sc_dead)
328 		return (ENXIO);
329 
330 	if (sc->sc_smpls.smpls_label) {
331 		/* remove old MPLS route */
332 		rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
333 		    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
334 	}
335 
336 	/* add new MPLS route */
337 	sc->sc_smpls.smpls_label = label;
338 	sc->sc_rdomain = rdomain;
339 
340 	error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
341 	    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
342 	if (error)
343 		sc->sc_smpls.smpls_label = 0;
344 
345 	return (error);
346 }
347 
348 int
349 mpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
350 {
351 	struct mpe_softc	*sc = ifp->if_softc;
352 	struct ifreq		*ifr;
353 	struct shim_hdr		 shim;
354 	int			 error = 0;
355 
356 	ifr = (struct ifreq *)data;
357 	switch (cmd) {
358 	case SIOCSIFADDR:
359 		break;
360 	case SIOCSIFFLAGS:
361 		if (ifp->if_flags & IFF_UP)
362 			ifp->if_flags |= IFF_RUNNING;
363 		else
364 			ifp->if_flags &= ~IFF_RUNNING;
365 		break;
366 	case SIOCSIFMTU:
367 		if (ifr->ifr_mtu < MPE_MTU_MIN ||
368 		    ifr->ifr_mtu > MPE_MTU_MAX)
369 			error = EINVAL;
370 		else
371 			ifp->if_mtu = ifr->ifr_mtu;
372 		break;
373 	case SIOCGETLABEL:
374 		shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
375 		if (shim.shim_label == 0) {
376 			error = EADDRNOTAVAIL;
377 			break;
378 		}
379 		error = copyout(&shim, ifr->ifr_data, sizeof(shim));
380 		break;
381 	case SIOCSETLABEL:
382 		error = copyin(ifr->ifr_data, &shim, sizeof(shim));
383 		if (error != 0)
384 			break;
385 		if (shim.shim_label > MPLS_LABEL_MAX ||
386 		    shim.shim_label <= MPLS_LABEL_RESERVED_MAX) {
387 			error = EINVAL;
388 			break;
389 		}
390 		shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label);
391 		if (sc->sc_smpls.smpls_label != shim.shim_label) {
392 			error = mpe_set_label(sc, shim.shim_label,
393 			    sc->sc_rdomain);
394 		}
395 		break;
396 	case SIOCDELLABEL:
397 		if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) {
398 			rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
399 			    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
400 
401 		}
402 		shim.shim_label = MPLS_LABEL2SHIM(0);
403 		break;
404 
405 	case SIOCSLIFPHYRTABLE:
406 		if (ifr->ifr_rdomainid < 0 ||
407 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
408 		    !rtable_exists(ifr->ifr_rdomainid) ||
409 		    ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) {
410 			error = EINVAL;
411 			break;
412 		}
413 		if (sc->sc_rdomain != ifr->ifr_rdomainid) {
414 			error = mpe_set_label(sc, sc->sc_smpls.smpls_label,
415 			    ifr->ifr_rdomainid);
416 		}
417 		break;
418 	case SIOCGLIFPHYRTABLE:
419 		ifr->ifr_rdomainid = sc->sc_rdomain;
420 		break;
421 
422 	case SIOCSTXHPRIO:
423 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
424 		if (error != 0)
425 			break;
426 
427 		sc->sc_txhprio = ifr->ifr_hdrprio;
428 		break;
429 	case SIOCGTXHPRIO:
430 		ifr->ifr_hdrprio = sc->sc_txhprio;
431 		break;
432 
433 	case SIOCSRXHPRIO:
434 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
435 		if (error != 0)
436 			break;
437 
438 		sc->sc_rxhprio = ifr->ifr_hdrprio;
439 		break;
440 	case SIOCGRXHPRIO:
441 		ifr->ifr_hdrprio = sc->sc_rxhprio;
442 		break;
443 
444 	default:
445 		return (ENOTTY);
446 	}
447 
448 	return (error);
449 }
450 
451 void
452 mpe_input(struct ifnet *ifp, struct mbuf *m)
453 {
454 	struct mpe_softc *sc = ifp->if_softc;
455 	struct shim_hdr	*shim;
456 	struct mbuf 	*n;
457 	uint8_t		 ttl, tos;
458 	uint32_t	 exp;
459 	void (*input)(struct ifnet *, struct mbuf *);
460 	int rxprio = sc->sc_rxhprio;
461 
462 	shim = mtod(m, struct shim_hdr *);
463 	exp = ntohl(shim->shim_label & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET;
464 	if (!MPLS_BOS_ISSET(shim->shim_label))
465 		goto drop;
466 
467 	ttl = ntohl(shim->shim_label & MPLS_TTL_MASK);
468 	m_adj(m, sizeof(*shim));
469 
470 	n = m;
471 	while (n->m_len == 0) {
472 		n = n->m_next;
473 		if (n == NULL)
474 			goto drop;
475 	}
476 
477 	switch (*mtod(n, uint8_t *) >> 4) {
478 	case 4: {
479 		struct ip *ip;
480 		if (m->m_len < sizeof(*ip)) {
481 			m = m_pullup(m, sizeof(*ip));
482 			if (m == NULL)
483 				return;
484 		}
485 		ip = mtod(m, struct ip *);
486 		tos = ip->ip_tos;
487 
488 		if (mpls_mapttl_ip) {
489 			m = mpls_ip_adjttl(m, ttl);
490 			if (m == NULL)
491 				return;
492 		}
493 		input = ipv4_input;
494 		m->m_pkthdr.ph_family = AF_INET;
495 		break;
496 	}
497 #ifdef INET6
498 	case 6: {
499 		struct ip6_hdr *ip6;
500 		uint32_t flow;
501 		if (m->m_len < sizeof(*ip6)) {
502 			m = m_pullup(m, sizeof(*ip6));
503 			if (m == NULL)
504 				return;
505 		}
506 		ip6 = mtod(m, struct ip6_hdr *);
507 		flow = bemtoh32(&ip6->ip6_flow);
508 		tos = flow >> 20;
509 
510 		if (mpls_mapttl_ip6) {
511 			m = mpls_ip6_adjttl(m, ttl);
512 			if (m == NULL)
513 				return;
514 		}
515 		input = ipv6_input;
516 		m->m_pkthdr.ph_family = AF_INET6;
517 		break;
518 	}
519 #endif /* INET6 */
520 	default:
521 		goto drop;
522 	}
523 
524 	switch (rxprio) {
525 	case IF_HDRPRIO_PACKET:
526 		/* nop */
527 		break;
528 	case IF_HDRPRIO_OUTER:
529 		m->m_pkthdr.pf.prio = exp;
530 		break;
531 	case IF_HDRPRIO_PAYLOAD:
532 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos);
533 		break;
534 	default:
535 		m->m_pkthdr.pf.prio = rxprio;
536 		break;
537 	}
538 
539 	/* new receive if and move into correct rtable */
540 	m->m_pkthdr.ph_ifidx = ifp->if_index;
541 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
542 
543 	/* packet has not been processed by PF yet. */
544 	KASSERT(m->m_pkthdr.pf.statekey == NULL);
545 
546 #if NBPFILTER > 0
547 	if (ifp->if_bpf) {
548 		bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
549 		    m, BPF_DIRECTION_IN);
550 	}
551 #endif
552 
553 	(*input)(ifp, m);
554 	return;
555 drop:
556 	m_freem(m);
557 }
558