xref: /openbsd/sys/net/if_gif.c (revision 4cfece93)
1 /*	$OpenBSD: if_gif.c,v 1.130 2020/07/10 13:26:41 patrick Exp $	*/
2 /*	$KAME: if_gif.c,v 1.43 2001/02/20 08:51:07 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/mbuf.h>
36 #include <sys/socket.h>
37 #include <sys/sockio.h>
38 #include <sys/syslog.h>
39 #include <sys/queue.h>
40 
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_types.h>
44 #include <net/route.h>
45 
46 #include <netinet/in.h>
47 #include <netinet/in_var.h>
48 #include <netinet/ip.h>
49 #include <netinet/ip_var.h>
50 #include <netinet/ip_ipip.h>
51 #include <netinet/ip_ecn.h>
52 
53 #ifdef INET6
54 #include <netinet6/in6_var.h>
55 #include <netinet/ip6.h>
56 #include <netinet6/ip6_var.h>
57 #endif /* INET6 */
58 
59 #include <net/if_gif.h>
60 
61 #include "bpfilter.h"
62 #if NBPFILTER > 0
63 #include <net/bpf.h>
64 #endif
65 
66 #ifdef MPLS
67 #include <netmpls/mpls.h>
68 #endif
69 
70 #include "pf.h"
71 #if NPF > 0
72 #include <net/pfvar.h>
73 #endif
74 
75 #define GIF_MTU		(1280)	/* Default MTU */
76 #define GIF_MTU_MIN	(1280)	/* Minimum MTU */
77 #define GIF_MTU_MAX	(8192)	/* Maximum MTU */
78 
79 union gif_addr {
80 	struct in6_addr		in6;
81 	struct in_addr		in4;
82 };
83 
84 struct gif_tunnel {
85 	TAILQ_ENTRY(gif_tunnel)	t_entry;
86 
87 	union gif_addr		t_src;
88 #define t_src4		t_src.in4
89 #define t_src6		t_src.in6
90 	union gif_addr		t_dst;
91 #define t_dst4		t_dst.in4
92 #define t_dst6		t_dst.in6
93 	u_int			t_rtableid;
94 
95 	sa_family_t		t_af;
96 };
97 
98 TAILQ_HEAD(gif_list, gif_tunnel);
99 
100 static inline int	gif_cmp(const struct gif_tunnel *,
101 			    const struct gif_tunnel *);
102 
103 struct gif_softc {
104 	struct gif_tunnel	sc_tunnel; /* must be first */
105 	struct ifnet		sc_if;
106 	uint16_t		sc_df;
107 	int			sc_ttl;
108 	int			sc_txhprio;
109 	int			sc_rxhprio;
110 	int			sc_ecn;
111 };
112 
113 struct gif_list gif_list = TAILQ_HEAD_INITIALIZER(gif_list);
114 
115 void	gifattach(int);
116 int	gif_clone_create(struct if_clone *, int);
117 int	gif_clone_destroy(struct ifnet *);
118 
119 void	gif_start(struct ifnet *);
120 int	gif_ioctl(struct ifnet *, u_long, caddr_t);
121 int	gif_output(struct ifnet *, struct mbuf *, struct sockaddr *,
122 	    struct rtentry *);
123 int	gif_send(struct gif_softc *, struct mbuf *, uint8_t, uint8_t, uint8_t);
124 
125 int	gif_up(struct gif_softc *);
126 int	gif_down(struct gif_softc *);
127 int	gif_set_tunnel(struct gif_softc *, struct if_laddrreq *);
128 int	gif_get_tunnel(struct gif_softc *, struct if_laddrreq *);
129 int	gif_del_tunnel(struct gif_softc *);
130 int	in_gif_output(struct ifnet *, int, struct mbuf **);
131 int	in6_gif_output(struct ifnet *, int, struct mbuf **);
132 int	gif_input(struct gif_tunnel *, struct mbuf **, int *, int, int,
133 	    uint8_t);
134 
135 /*
136  * gif global variable definitions
137  */
138 struct if_clone gif_cloner =
139     IF_CLONE_INITIALIZER("gif", gif_clone_create, gif_clone_destroy);
140 
141 void
142 gifattach(int count)
143 {
144 	if_clone_attach(&gif_cloner);
145 }
146 
147 int
148 gif_clone_create(struct if_clone *ifc, int unit)
149 {
150 	struct gif_softc *sc;
151 	struct ifnet *ifp;
152 
153 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
154 	ifp = &sc->sc_if;
155 
156 	sc->sc_df = htons(0);
157 	sc->sc_ttl = ip_defttl;
158 	sc->sc_txhprio = IF_HDRPRIO_PAYLOAD;
159 	sc->sc_rxhprio = IF_HDRPRIO_PAYLOAD;
160 	sc->sc_ecn = ECN_ALLOWED;
161 
162 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
163 	    "%s%d", ifc->ifc_name, unit);
164 
165 	ifp->if_mtu    = GIF_MTU;
166 	ifp->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
167 	ifp->if_xflags = IFXF_CLONED;
168 	ifp->if_ioctl  = gif_ioctl;
169 	ifp->if_start  = gif_start;
170 	ifp->if_output = gif_output;
171 	ifp->if_rtrequest = p2p_rtrequest;
172 	ifp->if_type   = IFT_GIF;
173 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
174 	ifp->if_softc = sc;
175 
176 	if_attach(ifp);
177 	if_alloc_sadl(ifp);
178 
179 #if NBPFILTER > 0
180 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
181 #endif
182 
183 	NET_LOCK();
184 	TAILQ_INSERT_TAIL(&gif_list, &sc->sc_tunnel, t_entry);
185 	NET_UNLOCK();
186 
187 	return (0);
188 }
189 
190 int
191 gif_clone_destroy(struct ifnet *ifp)
192 {
193 	struct gif_softc *sc = ifp->if_softc;
194 
195 	NET_LOCK();
196 	if (ISSET(ifp->if_flags, IFF_RUNNING))
197 		gif_down(sc);
198 
199 	TAILQ_REMOVE(&gif_list, &sc->sc_tunnel, t_entry);
200 	NET_UNLOCK();
201 
202 	if_detach(ifp);
203 
204 	free(sc, M_DEVBUF, sizeof(*sc));
205 
206 	return (0);
207 }
208 
209 void
210 gif_start(struct ifnet *ifp)
211 {
212 	struct gif_softc *sc = ifp->if_softc;
213 	struct mbuf *m;
214 #if NBPFILTER > 0
215 	caddr_t if_bpf;
216 #endif
217 	uint8_t proto, ttl, tos;
218 	int ttloff, tttl;
219 
220 	tttl = sc->sc_ttl;
221 
222 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
223 #if NBPFILTER > 0
224 		if_bpf = ifp->if_bpf;
225 		if (if_bpf) {
226 			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m,
227 			    BPF_DIRECTION_OUT);
228 		}
229 #endif
230 
231 		switch (m->m_pkthdr.ph_family) {
232 		case AF_INET: {
233 			struct ip *ip;
234 
235 			m = m_pullup(m, sizeof(*ip));
236 			if (m == NULL)
237 				continue;
238 
239 			ip = mtod(m, struct ip *);
240 			tos = ip->ip_tos;
241 
242 			ttloff = offsetof(struct ip, ip_ttl);
243 			proto = IPPROTO_IPV4;
244 			break;
245 		}
246 #ifdef INET6
247 		case AF_INET6: {
248 			struct ip6_hdr *ip6;
249 
250 			m = m_pullup(m, sizeof(*ip6));
251 			if (m == NULL)
252 				continue;
253 
254 			ip6 = mtod(m, struct ip6_hdr *);
255 			tos = ntohl(ip6->ip6_flow >> 20);
256 
257 			ttloff = offsetof(struct ip6_hdr, ip6_hlim);
258 			proto = IPPROTO_IPV6;
259 			break;
260 		}
261 #endif
262 #ifdef MPLS
263 		case AF_MPLS: {
264 			uint32_t shim;
265 
266 			m = m_pullup(m, sizeof(shim));
267 			if (m == NULL)
268 				continue;
269 
270 			shim = *mtod(m, uint32_t *) & MPLS_EXP_MASK;
271 			tos = (ntohl(shim) >> MPLS_EXP_OFFSET) << 5;
272 
273 			ttloff = 3;
274 
275 			proto = IPPROTO_MPLS;
276 			break;
277 		}
278 #endif
279 		default:
280 			unhandled_af(m->m_pkthdr.ph_family);
281 		}
282 
283 		if (tttl == -1) {
284 			KASSERT(m->m_len > ttloff);
285 
286 			ttl = *(m->m_data + ttloff);
287 		} else
288 			ttl = tttl;
289 
290 		switch (sc->sc_txhprio) {
291 		case IF_HDRPRIO_PAYLOAD:
292 			/* tos is already set */
293 			break;
294 		case IF_HDRPRIO_PACKET:
295 			tos = IFQ_PRIO2TOS(m->m_pkthdr.pf.prio);
296 			break;
297 		default:
298 			tos = IFQ_PRIO2TOS(sc->sc_txhprio);
299 			break;
300 		}
301 
302 		gif_send(sc, m, proto, ttl, tos);
303 	}
304 }
305 
306 int
307 gif_send(struct gif_softc *sc, struct mbuf *m,
308     uint8_t proto, uint8_t ttl, uint8_t itos)
309 {
310 	uint8_t otos;
311 
312 	m->m_flags &= ~(M_BCAST|M_MCAST);
313 	m->m_pkthdr.ph_rtableid = sc->sc_tunnel.t_rtableid;
314 
315 #if NPF > 0
316 	pf_pkt_addr_changed(m);
317 #endif
318 
319 	ip_ecn_ingress(sc->sc_ecn, &otos, &itos);
320 
321 	switch (sc->sc_tunnel.t_af) {
322 	case AF_INET: {
323 		struct ip *ip;
324 
325 		if (in_nullhost(sc->sc_tunnel.t_dst4))
326 			goto drop;
327 
328 		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
329 		if (m == NULL)
330 			return (-1);
331 
332 		ip = mtod(m, struct ip *);
333 		ip->ip_off = sc->sc_df;
334 		ip->ip_tos = otos;
335 		ip->ip_len = htons(m->m_pkthdr.len);
336 		ip->ip_ttl = ttl;
337 		ip->ip_p = proto;
338 		ip->ip_src = sc->sc_tunnel.t_src4;
339 		ip->ip_dst = sc->sc_tunnel.t_dst4;
340 
341 		ip_send(m);
342 		break;
343 	}
344 #ifdef INET6
345 	case AF_INET6: {
346 		struct ip6_hdr *ip6;
347 		int len = m->m_pkthdr.len;
348 		uint32_t flow;
349 
350 		if (IN6_IS_ADDR_UNSPECIFIED(&sc->sc_tunnel.t_dst6))
351 			goto drop;
352 
353 		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
354 		if (m == NULL)
355 			return (-1);
356 
357 		flow = otos << 20;
358 		if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
359 			flow |= m->m_pkthdr.ph_flowid;
360 
361 		ip6 = mtod(m, struct ip6_hdr *);
362 		ip6->ip6_flow = htonl(flow);
363 		ip6->ip6_vfc |= IPV6_VERSION;
364 		ip6->ip6_plen = htons(len);
365 		ip6->ip6_nxt = proto;
366 		ip6->ip6_hlim = ttl;
367 		ip6->ip6_src = sc->sc_tunnel.t_src6;
368 		ip6->ip6_dst = sc->sc_tunnel.t_dst6;
369 
370 		if (sc->sc_df)
371 			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
372 
373 		ip6_send(m);
374 		break;
375 	}
376 #endif
377 	default:
378 		m_freem(m);
379 		break;
380 	}
381 
382 	return (0);
383 
384 drop:
385 	m_freem(m);
386 	return (0);
387 }
388 
389 int
390 gif_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
391     struct rtentry *rt)
392 {
393 	struct m_tag *mtag;
394 	int error = 0;
395 
396 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
397 		error = ENETDOWN;
398 		goto drop;
399 	}
400 
401 	switch (dst->sa_family) {
402 	case AF_INET:
403 #ifdef INET6
404 	case AF_INET6:
405 #endif
406 #ifdef MPLS
407 	case AF_MPLS:
408 #endif
409 		break;
410 	default:
411 		error = EAFNOSUPPORT;
412 		goto drop;
413 	}
414 
415 	/* Try to limit infinite recursion through misconfiguration. */
416 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
417 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
418 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
419 		    sizeof(ifp->if_index)) == 0) {
420 			error = EIO;
421 			goto drop;
422 		}
423 	}
424 
425 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
426 	if (mtag == NULL) {
427 		error = ENOBUFS;
428 		goto drop;
429 	}
430 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
431 	m_tag_prepend(m, mtag);
432 
433 	m->m_pkthdr.ph_family = dst->sa_family;
434 
435 	error = if_enqueue(ifp, m);
436 
437 	if (error)
438 		ifp->if_oerrors++;
439 	return (error);
440 
441 drop:
442 	m_freem(m);
443 	return (error);
444 }
445 
446 int
447 gif_up(struct gif_softc *sc)
448 {
449 	NET_ASSERT_LOCKED();
450 
451 	SET(sc->sc_if.if_flags, IFF_RUNNING);
452 
453 	return (0);
454 }
455 
456 int
457 gif_down(struct gif_softc *sc)
458 {
459 	NET_ASSERT_LOCKED();
460 
461 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
462 
463 	/* barrier? */
464 
465 	return (0);
466 }
467 
468 int
469 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
470 {
471 	struct gif_softc *sc = ifp->if_softc;
472 	struct ifreq *ifr = (struct ifreq *)data;
473 	int error = 0;
474 
475 	switch (cmd) {
476 	case SIOCSIFADDR:
477 		SET(ifp->if_flags, IFF_UP);
478 		/* FALLTHROUGH */
479 	case SIOCSIFFLAGS:
480 		if (ISSET(ifp->if_flags, IFF_UP)) {
481 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
482 				error = gif_up(sc);
483 			else
484 				error = 0;
485 		} else {
486 			if (ISSET(ifp->if_flags, IFF_RUNNING))
487 				error = gif_down(sc);
488 		}
489 		break;
490 
491 	case SIOCADDMULTI:
492 	case SIOCDELMULTI:
493 		break;
494 
495 	case SIOCSLIFPHYADDR:
496 		error = gif_set_tunnel(sc, (struct if_laddrreq *)data);
497 		break;
498 	case SIOCGLIFPHYADDR:
499 		error = gif_get_tunnel(sc, (struct if_laddrreq *)data);
500 		break;
501 	case SIOCDIFPHYADDR:
502 		error = gif_del_tunnel(sc);
503 		break;
504 
505 	case SIOCSIFMTU:
506 		if (ifr->ifr_mtu < GIF_MTU_MIN || ifr->ifr_mtu > GIF_MTU_MAX) {
507 			error = EINVAL;
508 			break;
509 		}
510 
511 		ifp->if_mtu = ifr->ifr_mtu;
512 		break;
513 
514 	case SIOCSLIFPHYRTABLE:
515 		if (ifr->ifr_rdomainid < 0 ||
516 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
517 		    !rtable_exists(ifr->ifr_rdomainid)) {
518 			error = EINVAL;
519 			break;
520 		}
521 		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
522 		break;
523 	case SIOCGLIFPHYRTABLE:
524 		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
525 		break;
526 
527 	case SIOCSLIFPHYTTL:
528 		if (ifr->ifr_ttl != -1 &&
529 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
530 			error = EINVAL;
531 			break;
532 		}
533 
534 		/* commit */
535 		sc->sc_ttl = ifr->ifr_ttl;
536 		break;
537 	case SIOCGLIFPHYTTL:
538 		ifr->ifr_ttl = sc->sc_ttl;
539 		break;
540 
541 	case SIOCSLIFPHYDF:
542 		/* commit */
543 		sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
544 		break;
545 	case SIOCGLIFPHYDF:
546 		ifr->ifr_df = sc->sc_df ? 1 : 0;
547 		break;
548 
549 	case SIOCSLIFPHYECN:
550 		sc->sc_ecn = ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
551 		break;
552 	case SIOCGLIFPHYECN:
553 		ifr->ifr_metric = (sc->sc_ecn == ECN_ALLOWED);
554 		break;
555 
556 	case SIOCSTXHPRIO:
557 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
558 		if (error != 0)
559 			break;
560 
561 		sc->sc_txhprio = ifr->ifr_hdrprio;
562 		break;
563 	case SIOCGTXHPRIO:
564 		ifr->ifr_hdrprio = sc->sc_txhprio;
565 		break;
566 
567 	case SIOCSRXHPRIO:
568 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
569 		if (error != 0)
570 			break;
571 
572 		sc->sc_rxhprio = ifr->ifr_hdrprio;
573 		break;
574 	case SIOCGRXHPRIO:
575 		ifr->ifr_hdrprio = sc->sc_rxhprio;
576 		break;
577 
578 	default:
579 		error = ENOTTY;
580 		break;
581 	}
582 
583 	return (error);
584 }
585 
586 int
587 gif_get_tunnel(struct gif_softc *sc, struct if_laddrreq *req)
588 {
589 	struct gif_tunnel *tunnel = &sc->sc_tunnel;
590 	struct sockaddr *src = (struct sockaddr *)&req->addr;
591 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
592 	struct sockaddr_in *sin;
593 #ifdef INET6 /* ifconfig already embeds the scopeid */
594 	struct sockaddr_in6 *sin6;
595 #endif
596 
597 	switch (tunnel->t_af) {
598 	case AF_UNSPEC:
599 		return (EADDRNOTAVAIL);
600 	case AF_INET:
601 		sin = (struct sockaddr_in *)src;
602 		memset(sin, 0, sizeof(*sin));
603 		sin->sin_family = AF_INET;
604 		sin->sin_len = sizeof(*sin);
605 		sin->sin_addr = tunnel->t_src4;
606 
607 		sin = (struct sockaddr_in *)dst;
608 		memset(sin, 0, sizeof(*sin));
609 		sin->sin_family = AF_INET;
610 		sin->sin_len = sizeof(*sin);
611 		sin->sin_addr = tunnel->t_dst4;
612 
613 		break;
614 
615 #ifdef INET6
616 	case AF_INET6:
617 		sin6 = (struct sockaddr_in6 *)src;
618 		memset(sin6, 0, sizeof(*sin6));
619 		sin6->sin6_family = AF_INET6;
620 		sin6->sin6_len = sizeof(*sin6);
621 		in6_recoverscope(sin6, &tunnel->t_src6);
622 
623 		sin6 = (struct sockaddr_in6 *)dst;
624 		memset(sin6, 0, sizeof(*sin6));
625 		sin6->sin6_family = AF_INET6;
626 		sin6->sin6_len = sizeof(*sin6);
627 		in6_recoverscope(sin6, &tunnel->t_dst6);
628 
629 		break;
630 #endif
631 	default:
632 		return (EAFNOSUPPORT);
633 	}
634 
635 	return (0);
636 }
637 
638 int
639 gif_set_tunnel(struct gif_softc *sc, struct if_laddrreq *req)
640 {
641 	struct gif_tunnel *tunnel = &sc->sc_tunnel;
642 	struct sockaddr *src = (struct sockaddr *)&req->addr;
643 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
644 	struct sockaddr_in *src4, *dst4;
645 #ifdef INET6
646 	struct sockaddr_in6 *src6, *dst6;
647 	int error;
648 #endif
649 
650 	/* sa_family and sa_len must be equal */
651 	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
652 		return (EINVAL);
653 
654 	/* validate */
655 	switch (dst->sa_family) {
656 	case AF_INET:
657 		if (dst->sa_len != sizeof(*dst4))
658 			return (EINVAL);
659 
660 		src4 = (struct sockaddr_in *)src;
661 		if (in_nullhost(src4->sin_addr) ||
662 		    IN_MULTICAST(src4->sin_addr.s_addr))
663 			return (EINVAL);
664 
665 		dst4 = (struct sockaddr_in *)dst;
666 		/* dst4 can be 0.0.0.0 */
667 		if (IN_MULTICAST(dst4->sin_addr.s_addr))
668 			return (EINVAL);
669 
670 		tunnel->t_src4 = src4->sin_addr;
671 		tunnel->t_dst4 = dst4->sin_addr;
672 
673 		break;
674 #ifdef INET6
675 	case AF_INET6:
676 		if (dst->sa_len != sizeof(*dst6))
677 			return (EINVAL);
678 
679 		src6 = (struct sockaddr_in6 *)src;
680 		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
681 		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
682 			return (EINVAL);
683 
684 		dst6 = (struct sockaddr_in6 *)dst;
685 		if (IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr))
686 			return (EINVAL);
687 
688 		error = in6_embedscope(&tunnel->t_src6, src6, NULL);
689 		if (error != 0)
690 			return (error);
691 
692 		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL);
693 		if (error != 0)
694 			return (error);
695 
696 		break;
697 #endif
698 	default:
699 		return (EAFNOSUPPORT);
700 	}
701 
702 	/* commit */
703 	tunnel->t_af = dst->sa_family;
704 
705 	return (0);
706 }
707 
708 int
709 gif_del_tunnel(struct gif_softc *sc)
710 {
711 	/* commit */
712 	sc->sc_tunnel.t_af = AF_UNSPEC;
713 
714 	return (0);
715 }
716 
717 int
718 in_gif_input(struct mbuf **mp, int *offp, int proto, int af)
719 {
720 	struct mbuf *m = *mp;
721 	struct gif_tunnel key;
722 	struct ip *ip;
723 	int rv;
724 
725 	ip = mtod(m, struct ip *);
726 
727 	key.t_af = AF_INET;
728 	key.t_src4 = ip->ip_dst;
729 	key.t_dst4 = ip->ip_src;
730 
731 	rv = gif_input(&key, mp, offp, proto, af, ip->ip_tos);
732 	if (rv == -1)
733 		rv = ipip_input(mp, offp, proto, af);
734 
735 	return (rv);
736 }
737 
738 #ifdef INET6
739 int
740 in6_gif_input(struct mbuf **mp, int *offp, int proto, int af)
741 {
742 	struct mbuf *m = *mp;
743 	struct gif_tunnel key;
744 	struct ip6_hdr *ip6;
745 	uint32_t flow;
746 	int rv;
747 
748 	ip6 = mtod(m, struct ip6_hdr *);
749 
750 	key.t_af = AF_INET6;
751 	key.t_src6 = ip6->ip6_dst;
752 	key.t_dst6 = ip6->ip6_src;
753 
754 	flow = ntohl(ip6->ip6_flow);
755 
756 	rv = gif_input(&key, mp, offp, proto, af, flow >> 20);
757 	if (rv == -1)
758 		rv = ipip_input(mp, offp, proto, af);
759 
760 	return (rv);
761 }
762 #endif /* INET6 */
763 
764 struct gif_softc *
765 gif_find(const struct gif_tunnel *key)
766 {
767 	struct gif_tunnel *t;
768 	struct gif_softc *sc;
769 
770 	TAILQ_FOREACH(t, &gif_list, t_entry) {
771 		if (gif_cmp(key, t) != 0)
772 			continue;
773 
774 		sc = (struct gif_softc *)t;
775 		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
776 			continue;
777 
778 		return (sc);
779 	}
780 
781 	return (NULL);
782 }
783 
784 int
785 gif_input(struct gif_tunnel *key, struct mbuf **mp, int *offp, int proto,
786     int af, uint8_t otos)
787 {
788 	struct mbuf *m = *mp;
789 	struct gif_softc *sc;
790 	struct ifnet *ifp;
791 	void (*input)(struct ifnet *, struct mbuf *);
792 	uint8_t itos;
793 	int rxhprio;
794 
795 	/* IP-in-IP header is caused by tunnel mode, so skip gif lookup */
796 	if (m->m_flags & M_TUNNEL) {
797 		m->m_flags &= ~M_TUNNEL;
798 		return (-1);
799 	}
800 
801 	key->t_rtableid = m->m_pkthdr.ph_rtableid;
802 
803 	sc = gif_find(key);
804 	if (sc == NULL) {
805 		memset(&key->t_dst, 0, sizeof(key->t_dst));
806 		sc = gif_find(key);
807 		if (sc == NULL)
808 			return (-1);
809 	}
810 
811 	m_adj(m, *offp); /* this is ours now */
812 
813 	ifp = &sc->sc_if;
814 	rxhprio = sc->sc_rxhprio;
815 
816 	switch (proto) {
817 	case IPPROTO_IPV4: {
818 		struct ip *ip;
819 
820 		m = *mp = m_pullup(m, sizeof(*ip));
821 		if (m == NULL)
822 			return (IPPROTO_DONE);
823 
824 		ip = mtod(m, struct ip *);
825 
826 		itos = ip->ip_tos;
827 		if (ip_ecn_egress(sc->sc_ecn, &otos, &itos) == 0)
828 			goto drop;
829 
830 		if (itos != ip->ip_tos)
831 			ip_tos_patch(ip, itos);
832 
833 		m->m_pkthdr.ph_family = AF_INET;
834 		input = ipv4_input;
835 		break;
836 	}
837 #ifdef INET6
838 	case IPPROTO_IPV6: {
839 		struct ip6_hdr *ip6;
840 
841 		m = *mp = m_pullup(m, sizeof(*ip6));
842 		if (m == NULL)
843 			return (IPPROTO_DONE);
844 
845 		ip6 = mtod(m, struct ip6_hdr *);
846 
847 		itos = ntohl(ip6->ip6_flow) >> 20;
848 		if (!ip_ecn_egress(sc->sc_ecn, &otos, &itos))
849 			goto drop;
850 
851 		CLR(ip6->ip6_flow, htonl(0xff << 20));
852 		SET(ip6->ip6_flow, htonl(itos << 20));
853 
854 		m->m_pkthdr.ph_family = AF_INET6;
855 		input = ipv6_input;
856 		break;
857 	}
858 #endif /* INET6 */
859 #ifdef MPLS
860 	case IPPROTO_MPLS: {
861 		uint32_t shim;
862 		m = *mp = m_pullup(m, sizeof(shim));
863 		if (m == NULL)
864 			return (IPPROTO_DONE);
865 
866 		shim = *mtod(m, uint32_t *) & MPLS_EXP_MASK;
867 		itos = (ntohl(shim) >> MPLS_EXP_OFFSET) << 5;
868 
869 		m->m_pkthdr.ph_family = AF_MPLS;
870 		input = mpls_input;
871 		break;
872 	}
873 #endif /* MPLS */
874 	default:
875 		return (-1);
876 	}
877 
878 	m->m_flags &= ~(M_MCAST|M_BCAST);
879 	m->m_pkthdr.ph_ifidx = ifp->if_index;
880 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
881 
882 	switch (rxhprio) {
883 	case IF_HDRPRIO_PACKET:
884 		/* nop */
885 		break;
886 	case IF_HDRPRIO_PAYLOAD:
887 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
888 		break;
889 	case IF_HDRPRIO_OUTER:
890 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
891 		break;
892 	default:
893 		m->m_pkthdr.pf.prio = rxhprio;
894 		break;
895 	}
896 
897 #if NPF > 0
898 	pf_pkt_addr_changed(m);
899 #endif
900 
901 	ifp->if_ipackets++;
902 	ifp->if_ibytes += m->m_pkthdr.len;
903 
904 #if NBPFILTER > 0
905 	{
906 		caddr_t if_bpf = ifp->if_bpf;
907 		if (if_bpf) {
908 			bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
909 			    m, BPF_DIRECTION_IN);
910 		}
911 	}
912 #endif
913 
914 	*mp = NULL;
915 	(*input)(ifp, m);
916 	return (IPPROTO_DONE);
917 
918  drop:
919 	m_freemp(mp);
920 	return (IPPROTO_DONE);
921 }
922 
923 static inline int
924 gif_ip_cmp(int af, const union gif_addr *a, const union gif_addr *b)
925 {
926 	switch (af) {
927 #ifdef INET6
928 	case AF_INET6:
929 		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
930 #endif /* INET6 */
931 	case AF_INET:
932 		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
933 	default:
934 		panic("%s: unsupported af %d\n", __func__, af);
935 	}
936 
937 	return (0);
938 }
939 
940 
941 static inline int
942 gif_cmp(const struct gif_tunnel *a, const struct gif_tunnel *b)
943 {
944 	int rv;
945 
946 	/* sort by routing table */
947 	if (a->t_rtableid > b->t_rtableid)
948 		return (1);
949 	if (a->t_rtableid < b->t_rtableid)
950 		return (-1);
951 
952 	/* sort by address */
953 	if (a->t_af > b->t_af)
954 		return (1);
955 	if (a->t_af < b->t_af)
956 		return (-1);
957 
958 	rv = gif_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
959 	if (rv != 0)
960 		return (rv);
961 
962 	rv = gif_ip_cmp(a->t_af, &a->t_src, &b->t_src);
963 	if (rv != 0)
964 		return (rv);
965 
966 	return (0);
967 }
968