xref: /openbsd/sys/net/if_gif.c (revision d89ec533)
1 /*	$OpenBSD: if_gif.c,v 1.133 2021/05/16 15:10:20 deraadt Exp $	*/
2 /*	$KAME: if_gif.c,v 1.43 2001/02/20 08:51:07 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/mbuf.h>
36 #include <sys/socket.h>
37 #include <sys/sockio.h>
38 #include <sys/syslog.h>
39 #include <sys/queue.h>
40 
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_types.h>
44 #include <net/route.h>
45 
46 #include <netinet/in.h>
47 #include <netinet/in_var.h>
48 #include <netinet/ip.h>
49 #include <netinet/ip_var.h>
50 #include <netinet/ip_ipip.h>
51 #include <netinet/ip_ecn.h>
52 
53 #ifdef INET6
54 #include <netinet6/in6_var.h>
55 #include <netinet/ip6.h>
56 #include <netinet6/ip6_var.h>
57 #endif /* INET6 */
58 
59 #include <net/if_gif.h>
60 
61 #include "bpfilter.h"
62 #if NBPFILTER > 0
63 #include <net/bpf.h>
64 #endif
65 
66 #ifdef MPLS
67 #include <netmpls/mpls.h>
68 #endif
69 
70 #include "pf.h"
71 #if NPF > 0
72 #include <net/pfvar.h>
73 #endif
74 
75 #define GIF_MTU		(1280)	/* Default MTU */
76 #define GIF_MTU_MIN	(1280)	/* Minimum MTU */
77 #define GIF_MTU_MAX	(8192)	/* Maximum MTU */
78 
79 union gif_addr {
80 	struct in6_addr		in6;
81 	struct in_addr		in4;
82 };
83 
84 struct gif_tunnel {
85 	TAILQ_ENTRY(gif_tunnel)	t_entry;
86 
87 	union gif_addr		t_src;
88 #define t_src4		t_src.in4
89 #define t_src6		t_src.in6
90 	union gif_addr		t_dst;
91 #define t_dst4		t_dst.in4
92 #define t_dst6		t_dst.in6
93 	u_int			t_rtableid;
94 
95 	sa_family_t		t_af;
96 };
97 
98 TAILQ_HEAD(gif_list, gif_tunnel);
99 
100 static inline int	gif_cmp(const struct gif_tunnel *,
101 			    const struct gif_tunnel *);
102 
103 struct gif_softc {
104 	struct gif_tunnel	sc_tunnel; /* must be first */
105 	struct ifnet		sc_if;
106 	uint16_t		sc_df;
107 	int			sc_ttl;
108 	int			sc_txhprio;
109 	int			sc_rxhprio;
110 	int			sc_ecn;
111 };
112 
113 struct gif_list gif_list = TAILQ_HEAD_INITIALIZER(gif_list);
114 
115 void	gifattach(int);
116 int	gif_clone_create(struct if_clone *, int);
117 int	gif_clone_destroy(struct ifnet *);
118 
119 void	gif_start(struct ifnet *);
120 int	gif_ioctl(struct ifnet *, u_long, caddr_t);
121 int	gif_output(struct ifnet *, struct mbuf *, struct sockaddr *,
122 	    struct rtentry *);
123 int	gif_send(struct gif_softc *, struct mbuf *, uint8_t, uint8_t, uint8_t);
124 
125 int	gif_up(struct gif_softc *);
126 int	gif_down(struct gif_softc *);
127 int	gif_set_tunnel(struct gif_softc *, struct if_laddrreq *);
128 int	gif_get_tunnel(struct gif_softc *, struct if_laddrreq *);
129 int	gif_del_tunnel(struct gif_softc *);
130 int	in_gif_output(struct ifnet *, int, struct mbuf **);
131 int	in6_gif_output(struct ifnet *, int, struct mbuf **);
132 int	gif_input(struct gif_tunnel *, struct mbuf **, int *, int, int,
133 	    uint8_t);
134 
135 /*
136  * gif global variable definitions
137  */
138 struct if_clone gif_cloner =
139     IF_CLONE_INITIALIZER("gif", gif_clone_create, gif_clone_destroy);
140 
141 void
142 gifattach(int count)
143 {
144 	if_clone_attach(&gif_cloner);
145 }
146 
147 int
148 gif_clone_create(struct if_clone *ifc, int unit)
149 {
150 	struct gif_softc *sc;
151 	struct ifnet *ifp;
152 
153 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
154 	ifp = &sc->sc_if;
155 
156 	sc->sc_df = htons(0);
157 	sc->sc_ttl = ip_defttl;
158 	sc->sc_txhprio = IF_HDRPRIO_PAYLOAD;
159 	sc->sc_rxhprio = IF_HDRPRIO_PAYLOAD;
160 	sc->sc_ecn = ECN_ALLOWED;
161 
162 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
163 	    "%s%d", ifc->ifc_name, unit);
164 
165 	ifp->if_mtu    = GIF_MTU;
166 	ifp->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
167 	ifp->if_xflags = IFXF_CLONED;
168 	ifp->if_ioctl  = gif_ioctl;
169 	ifp->if_bpf_mtap = p2p_bpf_mtap;
170 	ifp->if_input  = p2p_input;
171 	ifp->if_start  = gif_start;
172 	ifp->if_output = gif_output;
173 	ifp->if_rtrequest = p2p_rtrequest;
174 	ifp->if_type   = IFT_GIF;
175 	ifp->if_softc = sc;
176 
177 	if_attach(ifp);
178 	if_alloc_sadl(ifp);
179 	if_counters_alloc(ifp);
180 
181 #if NBPFILTER > 0
182 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
183 #endif
184 
185 	NET_LOCK();
186 	TAILQ_INSERT_TAIL(&gif_list, &sc->sc_tunnel, t_entry);
187 	NET_UNLOCK();
188 
189 	return (0);
190 }
191 
192 int
193 gif_clone_destroy(struct ifnet *ifp)
194 {
195 	struct gif_softc *sc = ifp->if_softc;
196 
197 	NET_LOCK();
198 	if (ISSET(ifp->if_flags, IFF_RUNNING))
199 		gif_down(sc);
200 
201 	TAILQ_REMOVE(&gif_list, &sc->sc_tunnel, t_entry);
202 	NET_UNLOCK();
203 
204 	if_detach(ifp);
205 
206 	free(sc, M_DEVBUF, sizeof(*sc));
207 
208 	return (0);
209 }
210 
211 void
212 gif_start(struct ifnet *ifp)
213 {
214 	struct gif_softc *sc = ifp->if_softc;
215 	struct mbuf *m;
216 #if NBPFILTER > 0
217 	caddr_t if_bpf;
218 #endif
219 	uint8_t proto, ttl, tos;
220 	int ttloff, tttl;
221 
222 	tttl = sc->sc_ttl;
223 
224 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
225 #if NBPFILTER > 0
226 		if_bpf = ifp->if_bpf;
227 		if (if_bpf) {
228 			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m,
229 			    BPF_DIRECTION_OUT);
230 		}
231 #endif
232 
233 		switch (m->m_pkthdr.ph_family) {
234 		case AF_INET: {
235 			struct ip *ip;
236 
237 			m = m_pullup(m, sizeof(*ip));
238 			if (m == NULL)
239 				continue;
240 
241 			ip = mtod(m, struct ip *);
242 			tos = ip->ip_tos;
243 
244 			ttloff = offsetof(struct ip, ip_ttl);
245 			proto = IPPROTO_IPV4;
246 			break;
247 		}
248 #ifdef INET6
249 		case AF_INET6: {
250 			struct ip6_hdr *ip6;
251 
252 			m = m_pullup(m, sizeof(*ip6));
253 			if (m == NULL)
254 				continue;
255 
256 			ip6 = mtod(m, struct ip6_hdr *);
257 			tos = ntohl(ip6->ip6_flow >> 20);
258 
259 			ttloff = offsetof(struct ip6_hdr, ip6_hlim);
260 			proto = IPPROTO_IPV6;
261 			break;
262 		}
263 #endif
264 #ifdef MPLS
265 		case AF_MPLS: {
266 			uint32_t shim;
267 
268 			m = m_pullup(m, sizeof(shim));
269 			if (m == NULL)
270 				continue;
271 
272 			shim = *mtod(m, uint32_t *) & MPLS_EXP_MASK;
273 			tos = (ntohl(shim) >> MPLS_EXP_OFFSET) << 5;
274 
275 			ttloff = 3;
276 
277 			proto = IPPROTO_MPLS;
278 			break;
279 		}
280 #endif
281 		default:
282 			unhandled_af(m->m_pkthdr.ph_family);
283 		}
284 
285 		if (tttl == -1) {
286 			KASSERT(m->m_len > ttloff);
287 
288 			ttl = *(m->m_data + ttloff);
289 		} else
290 			ttl = tttl;
291 
292 		switch (sc->sc_txhprio) {
293 		case IF_HDRPRIO_PAYLOAD:
294 			/* tos is already set */
295 			break;
296 		case IF_HDRPRIO_PACKET:
297 			tos = IFQ_PRIO2TOS(m->m_pkthdr.pf.prio);
298 			break;
299 		default:
300 			tos = IFQ_PRIO2TOS(sc->sc_txhprio);
301 			break;
302 		}
303 
304 		gif_send(sc, m, proto, ttl, tos);
305 	}
306 }
307 
308 int
309 gif_send(struct gif_softc *sc, struct mbuf *m,
310     uint8_t proto, uint8_t ttl, uint8_t itos)
311 {
312 	uint8_t otos;
313 
314 	m->m_flags &= ~(M_BCAST|M_MCAST);
315 	m->m_pkthdr.ph_rtableid = sc->sc_tunnel.t_rtableid;
316 
317 #if NPF > 0
318 	pf_pkt_addr_changed(m);
319 #endif
320 
321 	ip_ecn_ingress(sc->sc_ecn, &otos, &itos);
322 
323 	switch (sc->sc_tunnel.t_af) {
324 	case AF_INET: {
325 		struct ip *ip;
326 
327 		if (in_nullhost(sc->sc_tunnel.t_dst4))
328 			goto drop;
329 
330 		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
331 		if (m == NULL)
332 			return (-1);
333 
334 		ip = mtod(m, struct ip *);
335 		ip->ip_off = sc->sc_df;
336 		ip->ip_tos = otos;
337 		ip->ip_len = htons(m->m_pkthdr.len);
338 		ip->ip_ttl = ttl;
339 		ip->ip_p = proto;
340 		ip->ip_src = sc->sc_tunnel.t_src4;
341 		ip->ip_dst = sc->sc_tunnel.t_dst4;
342 
343 		ip_send(m);
344 		break;
345 	}
346 #ifdef INET6
347 	case AF_INET6: {
348 		struct ip6_hdr *ip6;
349 		int len = m->m_pkthdr.len;
350 		uint32_t flow;
351 
352 		if (IN6_IS_ADDR_UNSPECIFIED(&sc->sc_tunnel.t_dst6))
353 			goto drop;
354 
355 		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
356 		if (m == NULL)
357 			return (-1);
358 
359 		flow = otos << 20;
360 		if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
361 			flow |= m->m_pkthdr.ph_flowid;
362 
363 		ip6 = mtod(m, struct ip6_hdr *);
364 		ip6->ip6_flow = htonl(flow);
365 		ip6->ip6_vfc |= IPV6_VERSION;
366 		ip6->ip6_plen = htons(len);
367 		ip6->ip6_nxt = proto;
368 		ip6->ip6_hlim = ttl;
369 		ip6->ip6_src = sc->sc_tunnel.t_src6;
370 		ip6->ip6_dst = sc->sc_tunnel.t_dst6;
371 
372 		if (sc->sc_df)
373 			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
374 
375 		ip6_send(m);
376 		break;
377 	}
378 #endif
379 	default:
380 		m_freem(m);
381 		break;
382 	}
383 
384 	return (0);
385 
386 drop:
387 	m_freem(m);
388 	return (0);
389 }
390 
391 int
392 gif_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
393     struct rtentry *rt)
394 {
395 	struct m_tag *mtag;
396 	int error = 0;
397 
398 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
399 		error = ENETDOWN;
400 		goto drop;
401 	}
402 
403 	switch (dst->sa_family) {
404 	case AF_INET:
405 #ifdef INET6
406 	case AF_INET6:
407 #endif
408 #ifdef MPLS
409 	case AF_MPLS:
410 #endif
411 		break;
412 	default:
413 		error = EAFNOSUPPORT;
414 		goto drop;
415 	}
416 
417 	/* Try to limit infinite recursion through misconfiguration. */
418 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
419 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
420 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
421 		    sizeof(ifp->if_index)) == 0) {
422 			error = EIO;
423 			goto drop;
424 		}
425 	}
426 
427 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
428 	if (mtag == NULL) {
429 		error = ENOBUFS;
430 		goto drop;
431 	}
432 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
433 	m_tag_prepend(m, mtag);
434 
435 	m->m_pkthdr.ph_family = dst->sa_family;
436 
437 	error = if_enqueue(ifp, m);
438 
439 	if (error)
440 		ifp->if_oerrors++;
441 	return (error);
442 
443 drop:
444 	m_freem(m);
445 	return (error);
446 }
447 
448 int
449 gif_up(struct gif_softc *sc)
450 {
451 	NET_ASSERT_LOCKED();
452 
453 	SET(sc->sc_if.if_flags, IFF_RUNNING);
454 
455 	return (0);
456 }
457 
458 int
459 gif_down(struct gif_softc *sc)
460 {
461 	NET_ASSERT_LOCKED();
462 
463 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
464 
465 	/* barrier? */
466 
467 	return (0);
468 }
469 
470 int
471 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
472 {
473 	struct gif_softc *sc = ifp->if_softc;
474 	struct ifreq *ifr = (struct ifreq *)data;
475 	int error = 0;
476 
477 	switch (cmd) {
478 	case SIOCSIFADDR:
479 		SET(ifp->if_flags, IFF_UP);
480 		/* FALLTHROUGH */
481 	case SIOCSIFFLAGS:
482 		if (ISSET(ifp->if_flags, IFF_UP)) {
483 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
484 				error = gif_up(sc);
485 			else
486 				error = 0;
487 		} else {
488 			if (ISSET(ifp->if_flags, IFF_RUNNING))
489 				error = gif_down(sc);
490 		}
491 		break;
492 
493 	case SIOCADDMULTI:
494 	case SIOCDELMULTI:
495 		break;
496 
497 	case SIOCSLIFPHYADDR:
498 		error = gif_set_tunnel(sc, (struct if_laddrreq *)data);
499 		break;
500 	case SIOCGLIFPHYADDR:
501 		error = gif_get_tunnel(sc, (struct if_laddrreq *)data);
502 		break;
503 	case SIOCDIFPHYADDR:
504 		error = gif_del_tunnel(sc);
505 		break;
506 
507 	case SIOCSIFMTU:
508 		if (ifr->ifr_mtu < GIF_MTU_MIN || ifr->ifr_mtu > GIF_MTU_MAX) {
509 			error = EINVAL;
510 			break;
511 		}
512 
513 		ifp->if_mtu = ifr->ifr_mtu;
514 		break;
515 
516 	case SIOCSLIFPHYRTABLE:
517 		if (ifr->ifr_rdomainid < 0 ||
518 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
519 		    !rtable_exists(ifr->ifr_rdomainid)) {
520 			error = EINVAL;
521 			break;
522 		}
523 		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
524 		break;
525 	case SIOCGLIFPHYRTABLE:
526 		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
527 		break;
528 
529 	case SIOCSLIFPHYTTL:
530 		if (ifr->ifr_ttl != -1 &&
531 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
532 			error = EINVAL;
533 			break;
534 		}
535 
536 		/* commit */
537 		sc->sc_ttl = ifr->ifr_ttl;
538 		break;
539 	case SIOCGLIFPHYTTL:
540 		ifr->ifr_ttl = sc->sc_ttl;
541 		break;
542 
543 	case SIOCSLIFPHYDF:
544 		/* commit */
545 		sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
546 		break;
547 	case SIOCGLIFPHYDF:
548 		ifr->ifr_df = sc->sc_df ? 1 : 0;
549 		break;
550 
551 	case SIOCSLIFPHYECN:
552 		sc->sc_ecn = ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
553 		break;
554 	case SIOCGLIFPHYECN:
555 		ifr->ifr_metric = (sc->sc_ecn == ECN_ALLOWED);
556 		break;
557 
558 	case SIOCSTXHPRIO:
559 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
560 		if (error != 0)
561 			break;
562 
563 		sc->sc_txhprio = ifr->ifr_hdrprio;
564 		break;
565 	case SIOCGTXHPRIO:
566 		ifr->ifr_hdrprio = sc->sc_txhprio;
567 		break;
568 
569 	case SIOCSRXHPRIO:
570 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
571 		if (error != 0)
572 			break;
573 
574 		sc->sc_rxhprio = ifr->ifr_hdrprio;
575 		break;
576 	case SIOCGRXHPRIO:
577 		ifr->ifr_hdrprio = sc->sc_rxhprio;
578 		break;
579 
580 	default:
581 		error = ENOTTY;
582 		break;
583 	}
584 
585 	return (error);
586 }
587 
588 int
589 gif_get_tunnel(struct gif_softc *sc, struct if_laddrreq *req)
590 {
591 	struct gif_tunnel *tunnel = &sc->sc_tunnel;
592 	struct sockaddr *src = (struct sockaddr *)&req->addr;
593 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
594 	struct sockaddr_in *sin;
595 #ifdef INET6 /* ifconfig already embeds the scopeid */
596 	struct sockaddr_in6 *sin6;
597 #endif
598 
599 	switch (tunnel->t_af) {
600 	case AF_UNSPEC:
601 		return (EADDRNOTAVAIL);
602 	case AF_INET:
603 		sin = (struct sockaddr_in *)src;
604 		memset(sin, 0, sizeof(*sin));
605 		sin->sin_family = AF_INET;
606 		sin->sin_len = sizeof(*sin);
607 		sin->sin_addr = tunnel->t_src4;
608 
609 		sin = (struct sockaddr_in *)dst;
610 		memset(sin, 0, sizeof(*sin));
611 		sin->sin_family = AF_INET;
612 		sin->sin_len = sizeof(*sin);
613 		sin->sin_addr = tunnel->t_dst4;
614 
615 		break;
616 
617 #ifdef INET6
618 	case AF_INET6:
619 		sin6 = (struct sockaddr_in6 *)src;
620 		memset(sin6, 0, sizeof(*sin6));
621 		sin6->sin6_family = AF_INET6;
622 		sin6->sin6_len = sizeof(*sin6);
623 		in6_recoverscope(sin6, &tunnel->t_src6);
624 
625 		sin6 = (struct sockaddr_in6 *)dst;
626 		memset(sin6, 0, sizeof(*sin6));
627 		sin6->sin6_family = AF_INET6;
628 		sin6->sin6_len = sizeof(*sin6);
629 		in6_recoverscope(sin6, &tunnel->t_dst6);
630 
631 		break;
632 #endif
633 	default:
634 		return (EAFNOSUPPORT);
635 	}
636 
637 	return (0);
638 }
639 
640 int
641 gif_set_tunnel(struct gif_softc *sc, struct if_laddrreq *req)
642 {
643 	struct gif_tunnel *tunnel = &sc->sc_tunnel;
644 	struct sockaddr *src = (struct sockaddr *)&req->addr;
645 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
646 	struct sockaddr_in *src4, *dst4;
647 #ifdef INET6
648 	struct sockaddr_in6 *src6, *dst6;
649 	int error;
650 #endif
651 
652 	/* sa_family and sa_len must be equal */
653 	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
654 		return (EINVAL);
655 
656 	/* validate */
657 	switch (dst->sa_family) {
658 	case AF_INET:
659 		if (dst->sa_len != sizeof(*dst4))
660 			return (EINVAL);
661 
662 		src4 = (struct sockaddr_in *)src;
663 		if (in_nullhost(src4->sin_addr) ||
664 		    IN_MULTICAST(src4->sin_addr.s_addr))
665 			return (EINVAL);
666 
667 		dst4 = (struct sockaddr_in *)dst;
668 		/* dst4 can be 0.0.0.0 */
669 		if (IN_MULTICAST(dst4->sin_addr.s_addr))
670 			return (EINVAL);
671 
672 		tunnel->t_src4 = src4->sin_addr;
673 		tunnel->t_dst4 = dst4->sin_addr;
674 
675 		break;
676 #ifdef INET6
677 	case AF_INET6:
678 		if (dst->sa_len != sizeof(*dst6))
679 			return (EINVAL);
680 
681 		src6 = (struct sockaddr_in6 *)src;
682 		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
683 		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
684 			return (EINVAL);
685 
686 		dst6 = (struct sockaddr_in6 *)dst;
687 		if (IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr))
688 			return (EINVAL);
689 
690 		error = in6_embedscope(&tunnel->t_src6, src6, NULL);
691 		if (error != 0)
692 			return (error);
693 
694 		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL);
695 		if (error != 0)
696 			return (error);
697 
698 		break;
699 #endif
700 	default:
701 		return (EAFNOSUPPORT);
702 	}
703 
704 	/* commit */
705 	tunnel->t_af = dst->sa_family;
706 
707 	return (0);
708 }
709 
710 int
711 gif_del_tunnel(struct gif_softc *sc)
712 {
713 	/* commit */
714 	sc->sc_tunnel.t_af = AF_UNSPEC;
715 
716 	return (0);
717 }
718 
719 int
720 in_gif_input(struct mbuf **mp, int *offp, int proto, int af)
721 {
722 	struct mbuf *m = *mp;
723 	struct gif_tunnel key;
724 	struct ip *ip;
725 	int rv;
726 
727 	ip = mtod(m, struct ip *);
728 
729 	key.t_af = AF_INET;
730 	key.t_src4 = ip->ip_dst;
731 	key.t_dst4 = ip->ip_src;
732 
733 	rv = gif_input(&key, mp, offp, proto, af, ip->ip_tos);
734 	if (rv == -1)
735 		rv = ipip_input(mp, offp, proto, af);
736 
737 	return (rv);
738 }
739 
740 #ifdef INET6
741 int
742 in6_gif_input(struct mbuf **mp, int *offp, int proto, int af)
743 {
744 	struct mbuf *m = *mp;
745 	struct gif_tunnel key;
746 	struct ip6_hdr *ip6;
747 	uint32_t flow;
748 	int rv;
749 
750 	ip6 = mtod(m, struct ip6_hdr *);
751 
752 	key.t_af = AF_INET6;
753 	key.t_src6 = ip6->ip6_dst;
754 	key.t_dst6 = ip6->ip6_src;
755 
756 	flow = ntohl(ip6->ip6_flow);
757 
758 	rv = gif_input(&key, mp, offp, proto, af, flow >> 20);
759 	if (rv == -1)
760 		rv = ipip_input(mp, offp, proto, af);
761 
762 	return (rv);
763 }
764 #endif /* INET6 */
765 
766 struct gif_softc *
767 gif_find(const struct gif_tunnel *key)
768 {
769 	struct gif_tunnel *t;
770 	struct gif_softc *sc;
771 
772 	TAILQ_FOREACH(t, &gif_list, t_entry) {
773 		if (gif_cmp(key, t) != 0)
774 			continue;
775 
776 		sc = (struct gif_softc *)t;
777 		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
778 			continue;
779 
780 		return (sc);
781 	}
782 
783 	return (NULL);
784 }
785 
786 int
787 gif_input(struct gif_tunnel *key, struct mbuf **mp, int *offp, int proto,
788     int af, uint8_t otos)
789 {
790 	struct mbuf *m = *mp;
791 	struct gif_softc *sc;
792 	struct ifnet *ifp;
793 	uint8_t itos;
794 	int rxhprio;
795 
796 	/* IP-in-IP header is caused by tunnel mode, so skip gif lookup */
797 	if (m->m_flags & M_TUNNEL) {
798 		m->m_flags &= ~M_TUNNEL;
799 		return (-1);
800 	}
801 
802 	key->t_rtableid = m->m_pkthdr.ph_rtableid;
803 
804 	sc = gif_find(key);
805 	if (sc == NULL) {
806 		memset(&key->t_dst, 0, sizeof(key->t_dst));
807 		sc = gif_find(key);
808 		if (sc == NULL)
809 			return (-1);
810 	}
811 
812 	m_adj(m, *offp); /* this is ours now */
813 
814 	ifp = &sc->sc_if;
815 	rxhprio = sc->sc_rxhprio;
816 
817 	switch (proto) {
818 	case IPPROTO_IPV4: {
819 		struct ip *ip;
820 
821 		m = *mp = m_pullup(m, sizeof(*ip));
822 		if (m == NULL)
823 			return (IPPROTO_DONE);
824 
825 		ip = mtod(m, struct ip *);
826 
827 		itos = ip->ip_tos;
828 		if (ip_ecn_egress(sc->sc_ecn, &otos, &itos) == 0)
829 			goto drop;
830 
831 		if (itos != ip->ip_tos)
832 			ip_tos_patch(ip, itos);
833 
834 		m->m_pkthdr.ph_family = AF_INET;
835 		break;
836 	}
837 #ifdef INET6
838 	case IPPROTO_IPV6: {
839 		struct ip6_hdr *ip6;
840 
841 		m = *mp = m_pullup(m, sizeof(*ip6));
842 		if (m == NULL)
843 			return (IPPROTO_DONE);
844 
845 		ip6 = mtod(m, struct ip6_hdr *);
846 
847 		itos = ntohl(ip6->ip6_flow) >> 20;
848 		if (!ip_ecn_egress(sc->sc_ecn, &otos, &itos))
849 			goto drop;
850 
851 		CLR(ip6->ip6_flow, htonl(0xff << 20));
852 		SET(ip6->ip6_flow, htonl(itos << 20));
853 
854 		m->m_pkthdr.ph_family = AF_INET6;
855 		break;
856 	}
857 #endif /* INET6 */
858 #ifdef MPLS
859 	case IPPROTO_MPLS: {
860 		uint32_t shim;
861 		m = *mp = m_pullup(m, sizeof(shim));
862 		if (m == NULL)
863 			return (IPPROTO_DONE);
864 
865 		shim = *mtod(m, uint32_t *) & MPLS_EXP_MASK;
866 		itos = (ntohl(shim) >> MPLS_EXP_OFFSET) << 5;
867 
868 		m->m_pkthdr.ph_family = AF_MPLS;
869 		break;
870 	}
871 #endif /* MPLS */
872 	default:
873 		return (-1);
874 	}
875 
876 	m->m_flags &= ~(M_MCAST|M_BCAST);
877 
878 	switch (rxhprio) {
879 	case IF_HDRPRIO_PACKET:
880 		/* nop */
881 		break;
882 	case IF_HDRPRIO_PAYLOAD:
883 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
884 		break;
885 	case IF_HDRPRIO_OUTER:
886 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
887 		break;
888 	default:
889 		m->m_pkthdr.pf.prio = rxhprio;
890 		break;
891 	}
892 
893 	*mp = NULL;
894 	if_vinput(ifp, m);
895 	return (IPPROTO_DONE);
896 
897  drop:
898 	m_freemp(mp);
899 	return (IPPROTO_DONE);
900 }
901 
902 static inline int
903 gif_ip_cmp(int af, const union gif_addr *a, const union gif_addr *b)
904 {
905 	switch (af) {
906 #ifdef INET6
907 	case AF_INET6:
908 		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
909 #endif /* INET6 */
910 	case AF_INET:
911 		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
912 	default:
913 		panic("%s: unsupported af %d", __func__, af);
914 	}
915 
916 	return (0);
917 }
918 
919 
920 static inline int
921 gif_cmp(const struct gif_tunnel *a, const struct gif_tunnel *b)
922 {
923 	int rv;
924 
925 	/* sort by routing table */
926 	if (a->t_rtableid > b->t_rtableid)
927 		return (1);
928 	if (a->t_rtableid < b->t_rtableid)
929 		return (-1);
930 
931 	/* sort by address */
932 	if (a->t_af > b->t_af)
933 		return (1);
934 	if (a->t_af < b->t_af)
935 		return (-1);
936 
937 	rv = gif_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
938 	if (rv != 0)
939 		return (rv);
940 
941 	rv = gif_ip_cmp(a->t_af, &a->t_src, &b->t_src);
942 	if (rv != 0)
943 		return (rv);
944 
945 	return (0);
946 }
947