xref: /openbsd/sys/net/if_gre.c (revision 09467b48)
1 /*	$OpenBSD: if_gre.c,v 1.158 2020/07/10 13:26:41 patrick Exp $ */
2 /*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
3 
4 /*
5  * Copyright (c) 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Heiko W.Rupp <hwr@pilhuhn.de>
10  *
11  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
37  * See gre(4) for more details.
38  * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
39  */
40 
41 #include "bpfilter.h"
42 #include "pf.h"
43 
44 #include <sys/param.h>
45 #include <sys/mbuf.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/kernel.h>
49 #include <sys/systm.h>
50 #include <sys/errno.h>
51 #include <sys/timeout.h>
52 #include <sys/queue.h>
53 #include <sys/tree.h>
54 #include <sys/pool.h>
55 #include <sys/rwlock.h>
56 
57 #include <crypto/siphash.h>
58 
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_types.h>
62 #include <net/if_media.h>
63 #include <net/route.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/in_var.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/ip_ecn.h>
71 
72 #ifdef INET6
73 #include <netinet/ip6.h>
74 #include <netinet6/ip6_var.h>
75 #include <netinet6/in6_var.h>
76 #endif
77 
78 #ifdef PIPEX
79 #include <net/pipex.h>
80 #endif
81 
82 #ifdef MPLS
83 #include <netmpls/mpls.h>
84 #endif /* MPLS */
85 
86 #if NBPFILTER > 0
87 #include <net/bpf.h>
88 #endif
89 
90 #if NPF > 0
91 #include <net/pfvar.h>
92 #endif
93 
94 #include <net/if_gre.h>
95 
96 #include <netinet/ip_gre.h>
97 #include <sys/sysctl.h>
98 
99 /* for nvgre bridge shizz */
100 #include <sys/socket.h>
101 #include <net/if_bridge.h>
102 
103 /*
104  * packet formats
105  */
106 struct gre_header {
107 	uint16_t		gre_flags;
108 #define GRE_CP				0x8000  /* Checksum Present */
109 #define GRE_KP				0x2000  /* Key Present */
110 #define GRE_SP				0x1000  /* Sequence Present */
111 
112 #define GRE_VERS_MASK			0x0007
113 #define GRE_VERS_0			0x0000
114 #define GRE_VERS_1			0x0001
115 
116 	uint16_t		gre_proto;
117 } __packed __aligned(4);
118 
119 struct gre_h_cksum {
120 	uint16_t		gre_cksum;
121 	uint16_t		gre_reserved1;
122 } __packed __aligned(4);
123 
124 struct gre_h_key {
125 	uint32_t		gre_key;
126 } __packed __aligned(4);
127 
128 #define GRE_EOIP		0x6400
129 
130 struct gre_h_key_eoip {
131 	uint16_t		eoip_len;	/* network order */
132 	uint16_t		eoip_tunnel_id;	/* little endian */
133 } __packed __aligned(4);
134 
135 #define NVGRE_VSID_RES_MIN	0x000000 /* reserved for future use */
136 #define NVGRE_VSID_RES_MAX	0x000fff
137 #define NVGRE_VSID_NVE2NVE	0xffffff /* vendor specific NVE-to-NVE comms */
138 
139 struct gre_h_seq {
140 	uint32_t		gre_seq;
141 } __packed __aligned(4);
142 
143 struct gre_h_wccp {
144 	uint8_t			wccp_flags;
145 	uint8_t			service_id;
146 	uint8_t			alt_bucket;
147 	uint8_t			pri_bucket;
148 } __packed __aligned(4);
149 
150 #define GRE_WCCP 0x883e
151 
152 #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))
153 
154 /*
155  * GRE tunnel metadata
156  */
157 
158 #define GRE_KA_NONE		0
159 #define GRE_KA_DOWN		1
160 #define GRE_KA_HOLD		2
161 #define GRE_KA_UP		3
162 
163 union gre_addr {
164 	struct in_addr		in4;
165 	struct in6_addr		in6;
166 };
167 
168 static inline int
169 		gre_ip_cmp(int, const union gre_addr *,
170 		    const union gre_addr *);
171 
172 #define GRE_KEY_MIN		0x00000000U
173 #define GRE_KEY_MAX		0xffffffffU
174 #define GRE_KEY_SHIFT		0
175 
176 #define GRE_KEY_ENTROPY_MIN	0x00000000U
177 #define GRE_KEY_ENTROPY_MAX	0x00ffffffU
178 #define GRE_KEY_ENTROPY_SHIFT	8
179 
180 struct gre_tunnel {
181 	uint32_t		t_key_mask;
182 #define GRE_KEY_NONE			htonl(0x00000000U)
183 #define GRE_KEY_ENTROPY			htonl(0xffffff00U)
184 #define GRE_KEY_MASK			htonl(0xffffffffU)
185 	uint32_t		t_key;
186 
187 	u_int			t_rtableid;
188 	union gre_addr		t_src;
189 #define t_src4	t_src.in4
190 #define t_src6	t_src.in6
191 	union gre_addr		t_dst;
192 #define t_dst4	t_dst.in4
193 #define t_dst6	t_dst.in6
194 	int			t_ttl;
195 	int			t_txhprio;
196 	int			t_rxhprio;
197 	int			t_ecn;
198 	uint16_t		t_df;
199 	sa_family_t		t_af;
200 };
201 
202 static int
203 		gre_cmp_src(const struct gre_tunnel *,
204 		    const struct gre_tunnel *);
205 static int
206 		gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);
207 
208 static int	gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int);
209 static int	gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *);
210 static int	gre_del_tunnel(struct gre_tunnel *);
211 
212 static int	gre_set_vnetid(struct gre_tunnel *, struct ifreq *);
213 static int	gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
214 static int	gre_del_vnetid(struct gre_tunnel *);
215 
216 static int	gre_set_vnetflowid(struct gre_tunnel *, struct ifreq *);
217 static int	gre_get_vnetflowid(struct gre_tunnel *, struct ifreq *);
218 
219 static struct mbuf *
220 		gre_encap_dst(const struct gre_tunnel *, const union gre_addr *,
221 		    struct mbuf *, uint16_t, uint8_t, uint8_t);
222 #define gre_encap(_t, _m, _p, _ttl, _tos) \
223 		gre_encap_dst((_t), &(_t)->t_dst, (_m), (_p), (_ttl), (_tos))
224 
225 static struct mbuf *
226 		gre_encap_dst_ip(const struct gre_tunnel *,
227 		    const union gre_addr *, struct mbuf *, uint8_t, uint8_t);
228 #define gre_encap_ip(_t, _m, _ttl, _tos) \
229 		gre_encap_dst_ip((_t), &(_t)->t_dst, (_m), (_ttl), (_tos))
230 
231 static int
232 		gre_ip_output(const struct gre_tunnel *, struct mbuf *);
233 
234 static int	gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *,
235 		    u_long, void *);
236 
237 static uint8_t	gre_l2_tos(const struct gre_tunnel *, const struct mbuf *);
238 static uint8_t	gre_l3_tos(const struct gre_tunnel *,
239 		    const struct mbuf *, uint8_t);
240 
241 /*
242  * layer 3 GRE tunnels
243  */
244 
245 struct gre_softc {
246 	struct gre_tunnel	sc_tunnel; /* must be first */
247 	TAILQ_ENTRY(gre_softc)	sc_entry;
248 
249 	struct ifnet		sc_if;
250 
251 	struct timeout		sc_ka_send;
252 	struct timeout		sc_ka_hold;
253 
254 	unsigned int		sc_ka_state;
255 	unsigned int		sc_ka_timeo;
256 	unsigned int		sc_ka_count;
257 
258 	unsigned int		sc_ka_holdmax;
259 	unsigned int		sc_ka_holdcnt;
260 
261 	SIPHASH_KEY		sc_ka_key;
262 	uint32_t		sc_ka_bias;
263 	int			sc_ka_recvtm;
264 };
265 
266 TAILQ_HEAD(gre_list, gre_softc);
267 
268 struct gre_keepalive {
269 	uint32_t		gk_uptime;
270 	uint32_t		gk_random;
271 	uint8_t			gk_digest[SIPHASH_DIGEST_LENGTH];
272 } __packed __aligned(4);
273 
274 static int	gre_clone_create(struct if_clone *, int);
275 static int	gre_clone_destroy(struct ifnet *);
276 
277 struct if_clone gre_cloner =
278     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
279 
280 /* protected by NET_LOCK */
281 struct gre_list gre_list = TAILQ_HEAD_INITIALIZER(gre_list);
282 
283 static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
284 		    struct rtentry *);
285 static void	gre_start(struct ifnet *);
286 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
287 
288 static int	gre_up(struct gre_softc *);
289 static int	gre_down(struct gre_softc *);
290 static void	gre_link_state(struct ifnet *, unsigned int);
291 
292 static int	gre_input_key(struct mbuf **, int *, int, int, uint8_t,
293 		    struct gre_tunnel *);
294 
295 static struct mbuf *
296 		gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *,
297 		    uint8_t *, uint8_t);
298 #ifdef INET6
299 static struct mbuf *
300 		gre_ipv6_patch(const struct gre_tunnel *, struct mbuf *,
301 		    uint8_t *, uint8_t);
302 #endif
303 #ifdef MPLS
304 static struct mbuf *
305 		gre_mpls_patch(const struct gre_tunnel *, struct mbuf *,
306 		    uint8_t *, uint8_t);
307 #endif
308 static void	gre_keepalive_send(void *);
309 static void	gre_keepalive_recv(struct ifnet *ifp, struct mbuf *);
310 static void	gre_keepalive_hold(void *);
311 
312 static struct mbuf *
313 		gre_l3_encap_dst(const struct gre_tunnel *, const void *,
314 		    struct mbuf *m, sa_family_t);
315 
316 #define gre_l3_encap(_t, _m, _af) \
317 		gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af))
318 
319 struct mgre_softc {
320 	struct gre_tunnel	sc_tunnel; /* must be first */
321 	RBT_ENTRY(mgre_softc)	sc_entry;
322 
323 	struct ifnet		sc_if;
324 };
325 
326 RBT_HEAD(mgre_tree, mgre_softc);
327 
328 static inline int
329 		mgre_cmp(const struct mgre_softc *, const struct mgre_softc *);
330 
331 RBT_PROTOTYPE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
332 
333 static int	mgre_clone_create(struct if_clone *, int);
334 static int	mgre_clone_destroy(struct ifnet *);
335 
336 struct if_clone mgre_cloner =
337     IF_CLONE_INITIALIZER("mgre", mgre_clone_create, mgre_clone_destroy);
338 
339 static void	mgre_rtrequest(struct ifnet *, int, struct rtentry *);
340 static int	mgre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
341 		    struct rtentry *);
342 static void	mgre_start(struct ifnet *);
343 static int	mgre_ioctl(struct ifnet *, u_long, caddr_t);
344 
345 static int	mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *);
346 static int	mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *);
347 static int	mgre_up(struct mgre_softc *);
348 static int	mgre_down(struct mgre_softc *);
349 
350 /* protected by NET_LOCK */
351 struct mgre_tree mgre_tree = RBT_INITIALIZER();
352 
353 /*
354  * Ethernet GRE tunnels
355  */
356 
357 static struct mbuf *
358 		gre_ether_align(struct mbuf *, int);
359 
360 struct egre_softc {
361 	struct gre_tunnel	sc_tunnel; /* must be first */
362 	RBT_ENTRY(egre_softc)	sc_entry;
363 
364 	struct arpcom		sc_ac;
365 	struct ifmedia		sc_media;
366 };
367 
368 RBT_HEAD(egre_tree, egre_softc);
369 
370 static inline int
371 		egre_cmp(const struct egre_softc *, const struct egre_softc *);
372 
373 RBT_PROTOTYPE(egre_tree, egre_softc, sc_entry, egre_cmp);
374 
375 static int	egre_clone_create(struct if_clone *, int);
376 static int	egre_clone_destroy(struct ifnet *);
377 
378 static void	egre_start(struct ifnet *);
379 static int	egre_ioctl(struct ifnet *, u_long, caddr_t);
380 static int	egre_media_change(struct ifnet *);
381 static void	egre_media_status(struct ifnet *, struct ifmediareq *);
382 
383 static int	egre_up(struct egre_softc *);
384 static int	egre_down(struct egre_softc *);
385 
386 static int	egre_input(const struct gre_tunnel *, struct mbuf *, int,
387 		    uint8_t);
388 struct if_clone egre_cloner =
389     IF_CLONE_INITIALIZER("egre", egre_clone_create, egre_clone_destroy);
390 
391 /* protected by NET_LOCK */
392 struct egre_tree egre_tree = RBT_INITIALIZER();
393 
394 /*
395  * Network Virtualisation Using Generic Routing Encapsulation (NVGRE)
396  */
397 
398 #define NVGRE_AGE_TMO		100	/* seconds */
399 
400 struct nvgre_entry {
401 	RB_ENTRY(nvgre_entry)	 nv_entry;
402 	struct ether_addr	 nv_dst;
403 	uint8_t			 nv_type;
404 #define NVGRE_ENTRY_DYNAMIC		0
405 #define NVGRE_ENTRY_STATIC		1
406 	union gre_addr		 nv_gateway;
407 	struct refcnt		 nv_refs;
408 	int			 nv_age;
409 };
410 
411 RBT_HEAD(nvgre_map, nvgre_entry);
412 
413 static inline int
414 		nvgre_entry_cmp(const struct nvgre_entry *,
415 		    const struct nvgre_entry *);
416 
417 RBT_PROTOTYPE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp);
418 
419 struct nvgre_softc {
420 	struct gre_tunnel	 sc_tunnel; /* must be first */
421 	unsigned int		 sc_ifp0;
422 	RBT_ENTRY(nvgre_softc)	 sc_uentry;
423 	RBT_ENTRY(nvgre_softc)	 sc_mentry;
424 
425 	struct arpcom		 sc_ac;
426 	struct ifmedia		 sc_media;
427 
428 	struct mbuf_queue	 sc_send_list;
429 	struct task		 sc_send_task;
430 
431 	void			*sc_inm;
432 	struct task		 sc_ltask;
433 	struct task		 sc_dtask;
434 
435 	struct rwlock		 sc_ether_lock;
436 	struct nvgre_map	 sc_ether_map;
437 	unsigned int		 sc_ether_num;
438 	unsigned int		 sc_ether_max;
439 	int			 sc_ether_tmo;
440 	struct timeout		 sc_ether_age;
441 };
442 
443 RBT_HEAD(nvgre_ucast_tree, nvgre_softc);
444 RBT_HEAD(nvgre_mcast_tree, nvgre_softc);
445 
446 static inline int
447 		nvgre_cmp_ucast(const struct nvgre_softc *,
448 		    const struct nvgre_softc *);
449 static int
450 		nvgre_cmp_mcast(const struct gre_tunnel *,
451 		    const union gre_addr *, unsigned int,
452 		    const struct gre_tunnel *, const union gre_addr *,
453 		    unsigned int);
454 static inline int
455 		nvgre_cmp_mcast_sc(const struct nvgre_softc *,
456 		    const struct nvgre_softc *);
457 
458 RBT_PROTOTYPE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
459 RBT_PROTOTYPE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
460 
461 static int	nvgre_clone_create(struct if_clone *, int);
462 static int	nvgre_clone_destroy(struct ifnet *);
463 
464 static void	nvgre_start(struct ifnet *);
465 static int	nvgre_ioctl(struct ifnet *, u_long, caddr_t);
466 
467 static int	nvgre_up(struct nvgre_softc *);
468 static int	nvgre_down(struct nvgre_softc *);
469 static int	nvgre_set_parent(struct nvgre_softc *, const char *);
470 static void	nvgre_link_change(void *);
471 static void	nvgre_detach(void *);
472 
473 static int	nvgre_input(const struct gre_tunnel *, struct mbuf *, int,
474 		    uint8_t);
475 static void	nvgre_send(void *);
476 
477 static int	nvgre_rtfind(struct nvgre_softc *, struct ifbaconf *);
478 static void	nvgre_flush_map(struct nvgre_softc *);
479 static void	nvgre_input_map(struct nvgre_softc *,
480 		    const struct gre_tunnel *, const struct ether_header *);
481 static void	nvgre_age(void *);
482 
483 struct if_clone nvgre_cloner =
484     IF_CLONE_INITIALIZER("nvgre", nvgre_clone_create, nvgre_clone_destroy);
485 
486 struct pool nvgre_pool;
487 
488 /* protected by NET_LOCK */
489 struct nvgre_ucast_tree nvgre_ucast_tree = RBT_INITIALIZER();
490 struct nvgre_mcast_tree nvgre_mcast_tree = RBT_INITIALIZER();
491 
492 /*
493  * MikroTik Ethernet over IP protocol (eoip)
494  */
495 
496 struct eoip_softc {
497 	struct gre_tunnel	sc_tunnel; /* must be first */
498 	uint16_t		sc_tunnel_id;
499 	RBT_ENTRY(eoip_softc)	sc_entry;
500 
501 	struct arpcom		sc_ac;
502 	struct ifmedia		sc_media;
503 
504 	struct timeout		sc_ka_send;
505 	struct timeout		sc_ka_hold;
506 
507 	unsigned int		sc_ka_state;
508 	unsigned int		sc_ka_timeo;
509 	unsigned int		sc_ka_count;
510 
511 	unsigned int		sc_ka_holdmax;
512 	unsigned int		sc_ka_holdcnt;
513 };
514 
515 RBT_HEAD(eoip_tree, eoip_softc);
516 
517 static inline int
518 		eoip_cmp(const struct eoip_softc *, const struct eoip_softc *);
519 
520 RBT_PROTOTYPE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
521 
522 static int	eoip_clone_create(struct if_clone *, int);
523 static int	eoip_clone_destroy(struct ifnet *);
524 
525 static void	eoip_start(struct ifnet *);
526 static int	eoip_ioctl(struct ifnet *, u_long, caddr_t);
527 
528 static void	eoip_keepalive_send(void *);
529 static void	eoip_keepalive_recv(struct eoip_softc *);
530 static void	eoip_keepalive_hold(void *);
531 
532 static int	eoip_up(struct eoip_softc *);
533 static int	eoip_down(struct eoip_softc *);
534 
535 static struct mbuf *
536 		eoip_encap(struct eoip_softc *, struct mbuf *, uint8_t);
537 
538 static struct mbuf *
539 		eoip_input(struct gre_tunnel *, struct mbuf *,
540 		    const struct gre_header *, uint8_t, int);
541 struct if_clone eoip_cloner =
542     IF_CLONE_INITIALIZER("eoip", eoip_clone_create, eoip_clone_destroy);
543 
544 /* protected by NET_LOCK */
545 struct eoip_tree eoip_tree = RBT_INITIALIZER();
546 
547 /*
548  * It is not easy to calculate the right value for a GRE MTU.
549  * We leave this task to the admin and use the same default that
550  * other vendors use.
551  */
552 #define GREMTU 1476
553 
554 /*
555  * We can control the acceptance of GRE and MobileIP packets by
556  * altering the sysctl net.inet.gre.allow values
557  * respectively. Zero means drop them, all else is acceptance.  We can also
558  * control acceptance of WCCPv1-style GRE packets through the
559  * net.inet.gre.wccp value, but be aware it depends upon normal GRE being
560  * allowed as well.
561  *
562  */
563 int gre_allow = 0;
564 int gre_wccp = 0;
565 
566 void
567 greattach(int n)
568 {
569 	if_clone_attach(&gre_cloner);
570 	if_clone_attach(&mgre_cloner);
571 	if_clone_attach(&egre_cloner);
572 	if_clone_attach(&nvgre_cloner);
573 	if_clone_attach(&eoip_cloner);
574 }
575 
576 static int
577 gre_clone_create(struct if_clone *ifc, int unit)
578 {
579 	struct gre_softc *sc;
580 	struct ifnet *ifp;
581 
582 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
583 	snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
584 	    ifc->ifc_name, unit);
585 
586 	ifp = &sc->sc_if;
587 	ifp->if_softc = sc;
588 	ifp->if_type = IFT_TUNNEL;
589 	ifp->if_hdrlen = GRE_HDRLEN;
590 	ifp->if_mtu = GREMTU;
591 	ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
592 	ifp->if_xflags = IFXF_CLONED;
593 	ifp->if_output = gre_output;
594 	ifp->if_start = gre_start;
595 	ifp->if_ioctl = gre_ioctl;
596 	ifp->if_rtrequest = p2p_rtrequest;
597 
598 	sc->sc_tunnel.t_ttl = ip_defttl;
599 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
600 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
601 	sc->sc_tunnel.t_df = htons(0);
602 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
603 
604 	timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc);
605 	timeout_set_proc(&sc->sc_ka_hold, gre_keepalive_hold, sc);
606 	sc->sc_ka_state = GRE_KA_NONE;
607 
608 	if_counters_alloc(ifp);
609 	if_attach(ifp);
610 	if_alloc_sadl(ifp);
611 
612 #if NBPFILTER > 0
613 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
614 #endif
615 
616 	ifp->if_llprio = IFQ_TOS2PRIO(IPTOS_PREC_INTERNETCONTROL);
617 
618 	NET_LOCK();
619 	TAILQ_INSERT_TAIL(&gre_list, sc, sc_entry);
620 	NET_UNLOCK();
621 
622 	return (0);
623 }
624 
625 static int
626 gre_clone_destroy(struct ifnet *ifp)
627 {
628 	struct gre_softc *sc = ifp->if_softc;
629 
630 	NET_LOCK();
631 	if (ISSET(ifp->if_flags, IFF_RUNNING))
632 		gre_down(sc);
633 
634 	TAILQ_REMOVE(&gre_list, sc, sc_entry);
635 	NET_UNLOCK();
636 
637 	if_detach(ifp);
638 
639 	free(sc, M_DEVBUF, sizeof(*sc));
640 
641 	return (0);
642 }
643 
644 static int
645 mgre_clone_create(struct if_clone *ifc, int unit)
646 {
647 	struct mgre_softc *sc;
648 	struct ifnet *ifp;
649 
650 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
651 	ifp = &sc->sc_if;
652 
653 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
654 	    "%s%d", ifc->ifc_name, unit);
655 
656 	ifp->if_softc = sc;
657 	ifp->if_type = IFT_L3IPVLAN;
658 	ifp->if_hdrlen = GRE_HDRLEN;
659 	ifp->if_mtu = GREMTU;
660 	ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX;
661 	ifp->if_xflags = IFXF_CLONED;
662 	ifp->if_rtrequest = mgre_rtrequest;
663 	ifp->if_output = mgre_output;
664 	ifp->if_start = mgre_start;
665 	ifp->if_ioctl = mgre_ioctl;
666 
667 	sc->sc_tunnel.t_ttl = ip_defttl;
668 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
669 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
670 	sc->sc_tunnel.t_df = htons(0);
671 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
672 
673 	if_counters_alloc(ifp);
674 	if_attach(ifp);
675 	if_alloc_sadl(ifp);
676 
677 #if NBPFILTER > 0
678 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
679 #endif
680 
681 	return (0);
682 }
683 
684 static int
685 mgre_clone_destroy(struct ifnet *ifp)
686 {
687 	struct mgre_softc *sc = ifp->if_softc;
688 
689 	NET_LOCK();
690 	if (ISSET(ifp->if_flags, IFF_RUNNING))
691 		mgre_down(sc);
692 	NET_UNLOCK();
693 
694 	if_detach(ifp);
695 
696 	free(sc, M_DEVBUF, sizeof(*sc));
697 
698 	return (0);
699 }
700 
701 static int
702 egre_clone_create(struct if_clone *ifc, int unit)
703 {
704 	struct egre_softc *sc;
705 	struct ifnet *ifp;
706 
707 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
708 	ifp = &sc->sc_ac.ac_if;
709 
710 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
711 	    ifc->ifc_name, unit);
712 
713 	ifp->if_softc = sc;
714 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
715 	ifp->if_ioctl = egre_ioctl;
716 	ifp->if_start = egre_start;
717 	ifp->if_xflags = IFXF_CLONED;
718 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
719 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
720 	ether_fakeaddr(ifp);
721 
722 	sc->sc_tunnel.t_ttl = ip_defttl;
723 	sc->sc_tunnel.t_txhprio = 0;
724 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
725 	sc->sc_tunnel.t_df = htons(0);
726 
727 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
728 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
729 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
730 
731 	if_counters_alloc(ifp);
732 	if_attach(ifp);
733 	ether_ifattach(ifp);
734 
735 	return (0);
736 }
737 
738 static int
739 egre_clone_destroy(struct ifnet *ifp)
740 {
741 	struct egre_softc *sc = ifp->if_softc;
742 
743 	NET_LOCK();
744 	if (ISSET(ifp->if_flags, IFF_RUNNING))
745 		egre_down(sc);
746 	NET_UNLOCK();
747 
748 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
749 	ether_ifdetach(ifp);
750 	if_detach(ifp);
751 
752 	free(sc, M_DEVBUF, sizeof(*sc));
753 
754 	return (0);
755 }
756 
757 static int
758 nvgre_clone_create(struct if_clone *ifc, int unit)
759 {
760 	struct nvgre_softc *sc;
761 	struct ifnet *ifp;
762 	struct gre_tunnel *tunnel;
763 
764 	if (nvgre_pool.pr_size == 0) {
765 		pool_init(&nvgre_pool, sizeof(struct nvgre_entry), 0,
766 		    IPL_SOFTNET, 0, "nvgren", NULL);
767 	}
768 
769 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
770 	ifp = &sc->sc_ac.ac_if;
771 
772 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
773 	    ifc->ifc_name, unit);
774 
775 	ifp->if_softc = sc;
776 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
777 	ifp->if_ioctl = nvgre_ioctl;
778 	ifp->if_start = nvgre_start;
779 	ifp->if_xflags = IFXF_CLONED;
780 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
781 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
782 	ether_fakeaddr(ifp);
783 
784 	tunnel = &sc->sc_tunnel;
785 	tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL;
786 	tunnel->t_txhprio = 0;
787 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
788 	tunnel->t_df = htons(IP_DF);
789 	tunnel->t_key_mask = GRE_KEY_ENTROPY;
790 	tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) <<
791 	    GRE_KEY_ENTROPY_SHIFT);
792 
793 	mq_init(&sc->sc_send_list, IFQ_MAXLEN * 2, IPL_SOFTNET);
794 	task_set(&sc->sc_send_task, nvgre_send, sc);
795 	task_set(&sc->sc_ltask, nvgre_link_change, sc);
796 	task_set(&sc->sc_dtask, nvgre_detach, sc);
797 
798 	rw_init(&sc->sc_ether_lock, "nvgrelk");
799 	RBT_INIT(nvgre_map, &sc->sc_ether_map);
800 	sc->sc_ether_num = 0;
801 	sc->sc_ether_max = 100;
802 	sc->sc_ether_tmo = 240 * hz;
803 	timeout_set_proc(&sc->sc_ether_age, nvgre_age, sc); /* ugh */
804 
805 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
806 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
807 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
808 
809 	if_counters_alloc(ifp);
810 	if_attach(ifp);
811 	ether_ifattach(ifp);
812 
813 	return (0);
814 }
815 
816 static int
817 nvgre_clone_destroy(struct ifnet *ifp)
818 {
819 	struct nvgre_softc *sc = ifp->if_softc;
820 
821 	NET_LOCK();
822 	if (ISSET(ifp->if_flags, IFF_RUNNING))
823 		nvgre_down(sc);
824 	NET_UNLOCK();
825 
826 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
827 	ether_ifdetach(ifp);
828 	if_detach(ifp);
829 
830 	free(sc, M_DEVBUF, sizeof(*sc));
831 
832 	return (0);
833 }
834 
835 static int
836 eoip_clone_create(struct if_clone *ifc, int unit)
837 {
838 	struct eoip_softc *sc;
839 	struct ifnet *ifp;
840 
841 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
842 	ifp = &sc->sc_ac.ac_if;
843 
844 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
845 	    ifc->ifc_name, unit);
846 
847 	ifp->if_softc = sc;
848 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
849 	ifp->if_ioctl = eoip_ioctl;
850 	ifp->if_start = eoip_start;
851 	ifp->if_xflags = IFXF_CLONED;
852 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
853 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
854 	ether_fakeaddr(ifp);
855 
856 	sc->sc_tunnel.t_ttl = ip_defttl;
857 	sc->sc_tunnel.t_txhprio = 0;
858 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
859 	sc->sc_tunnel.t_df = htons(0);
860 
861 	sc->sc_ka_timeo = 10;
862 	sc->sc_ka_count = 10;
863 
864 	timeout_set(&sc->sc_ka_send, eoip_keepalive_send, sc);
865 	timeout_set_proc(&sc->sc_ka_hold, eoip_keepalive_hold, sc);
866 	sc->sc_ka_state = GRE_KA_DOWN;
867 
868 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
869 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
870 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
871 
872 	if_counters_alloc(ifp);
873 	if_attach(ifp);
874 	ether_ifattach(ifp);
875 
876 	return (0);
877 }
878 
879 static int
880 eoip_clone_destroy(struct ifnet *ifp)
881 {
882 	struct eoip_softc *sc = ifp->if_softc;
883 
884 	NET_LOCK();
885 	if (ISSET(ifp->if_flags, IFF_RUNNING))
886 		eoip_down(sc);
887 	NET_UNLOCK();
888 
889 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
890 	ether_ifdetach(ifp);
891 	if_detach(ifp);
892 
893 	free(sc, M_DEVBUF, sizeof(*sc));
894 
895 	return (0);
896 }
897 
898 int
899 gre_input(struct mbuf **mp, int *offp, int type, int af)
900 {
901 	struct mbuf *m = *mp;
902 	struct gre_tunnel key;
903 	struct ip *ip;
904 
905 	ip = mtod(m, struct ip *);
906 
907 	/* XXX check if ip_src is sane for nvgre? */
908 
909 	key.t_af = AF_INET;
910 	key.t_src4 = ip->ip_dst;
911 	key.t_dst4 = ip->ip_src;
912 
913 	if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1)
914 		return (rip_input(mp, offp, type, af));
915 
916 	return (IPPROTO_DONE);
917 }
918 
919 #ifdef INET6
920 int
921 gre_input6(struct mbuf **mp, int *offp, int type, int af)
922 {
923 	struct mbuf *m = *mp;
924 	struct gre_tunnel key;
925 	struct ip6_hdr *ip6;
926 	uint32_t flow;
927 
928 	ip6 = mtod(m, struct ip6_hdr *);
929 
930 	/* XXX check if ip6_src is sane for nvgre? */
931 
932 	key.t_af = AF_INET6;
933 	key.t_src6 = ip6->ip6_dst;
934 	key.t_dst6 = ip6->ip6_src;
935 
936 	flow = bemtoh32(&ip6->ip6_flow);
937 
938 	if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1)
939 		return (rip6_input(mp, offp, type, af));
940 
941 	return (IPPROTO_DONE);
942 }
943 #endif /* INET6 */
944 
945 static inline struct ifnet *
946 gre_find(const struct gre_tunnel *key)
947 {
948 	struct gre_softc *sc;
949 
950 	TAILQ_FOREACH(sc, &gre_list, sc_entry) {
951 		if (gre_cmp(key, &sc->sc_tunnel) != 0)
952 			continue;
953 
954 		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
955 			continue;
956 
957 		return (&sc->sc_if);
958 	}
959 
960 	return (NULL);
961 }
962 
963 static inline struct ifnet *
964 mgre_find(const struct gre_tunnel *key)
965 {
966 	struct mgre_softc *sc;
967 
968 	NET_ASSERT_LOCKED();
969 	sc = RBT_FIND(mgre_tree, &mgre_tree, (const struct mgre_softc *)key);
970 	if (sc != NULL)
971 		return (&sc->sc_if);
972 
973 	return (NULL);
974 }
975 
976 static struct mbuf *
977 gre_input_1(struct gre_tunnel *key, struct mbuf *m,
978     const struct gre_header *gh, uint8_t otos, int iphlen)
979 {
980 	switch (gh->gre_proto) {
981 	case htons(ETHERTYPE_PPP):
982 #ifdef PIPEX
983 		if (pipex_enable) {
984 			struct pipex_session *session;
985 
986 			session = pipex_pptp_lookup_session(m);
987 			if (session != NULL &&
988 			    pipex_pptp_input(m, session) == NULL)
989 				return (NULL);
990 		}
991 #endif
992 		break;
993 	case htons(GRE_EOIP):
994 		return (eoip_input(key, m, gh, otos, iphlen));
995 		break;
996 	}
997 
998 	return (m);
999 }
1000 
1001 static int
1002 gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos,
1003     struct gre_tunnel *key)
1004 {
1005 	struct mbuf *m = *mp;
1006 	int iphlen = *offp, hlen, rxprio;
1007 	struct ifnet *ifp;
1008 	const struct gre_tunnel *tunnel;
1009 	caddr_t buf;
1010 	struct gre_header *gh;
1011 	struct gre_h_key *gkh;
1012 	void (*input)(struct ifnet *, struct mbuf *);
1013 	struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *,
1014 	    uint8_t *, uint8_t);
1015 	int bpf_af = AF_UNSPEC; /* bpf */
1016 	int mcast = 0;
1017 	uint8_t itos;
1018 
1019 	if (!gre_allow)
1020 		goto decline;
1021 
1022 	key->t_rtableid = m->m_pkthdr.ph_rtableid;
1023 
1024 	hlen = iphlen + sizeof(*gh);
1025 	if (m->m_pkthdr.len < hlen)
1026 		goto decline;
1027 
1028 	m = m_pullup(m, hlen);
1029 	if (m == NULL)
1030 		return (IPPROTO_DONE);
1031 
1032 	buf = mtod(m, caddr_t);
1033 	gh = (struct gre_header *)(buf + iphlen);
1034 
1035 	/* check the version */
1036 	switch (gh->gre_flags & htons(GRE_VERS_MASK)) {
1037 	case htons(GRE_VERS_0):
1038 		break;
1039 
1040 	case htons(GRE_VERS_1):
1041 		m = gre_input_1(key, m, gh, otos, iphlen);
1042 		if (m == NULL)
1043 			return (IPPROTO_DONE);
1044 		/* FALLTHROUGH */
1045 	default:
1046 		goto decline;
1047 	}
1048 
1049 	/* the only optional bit in the header is K flag */
1050 	if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
1051 		goto decline;
1052 
1053 	if (gh->gre_flags & htons(GRE_KP)) {
1054 		hlen += sizeof(*gkh);
1055 		if (m->m_pkthdr.len < hlen)
1056 			goto decline;
1057 
1058 		m = m_pullup(m, hlen);
1059 		if (m == NULL)
1060 			return (IPPROTO_DONE);
1061 
1062 		buf = mtod(m, caddr_t);
1063 		gh = (struct gre_header *)(buf + iphlen);
1064 		gkh = (struct gre_h_key *)(gh + 1);
1065 
1066 		key->t_key_mask = GRE_KEY_MASK;
1067 		key->t_key = gkh->gre_key;
1068 	} else
1069 		key->t_key_mask = GRE_KEY_NONE;
1070 
1071 	if (gh->gre_proto == htons(ETHERTYPE_TRANSETHER)) {
1072 		if (egre_input(key, m, hlen, otos) == -1 &&
1073 		    nvgre_input(key, m, hlen, otos) == -1)
1074 			goto decline;
1075 
1076 		return (IPPROTO_DONE);
1077 	}
1078 
1079 	ifp = gre_find(key);
1080 	if (ifp == NULL) {
1081 		ifp = mgre_find(key);
1082 		if (ifp == NULL)
1083 			goto decline;
1084 	}
1085 
1086 	switch (gh->gre_proto) {
1087 	case htons(GRE_WCCP): {
1088 		struct mbuf *n;
1089 		int off;
1090 
1091 		/* WCCP/GRE:
1092 		 *   So far as I can see (and test) it seems that Cisco's WCCP
1093 		 *   GRE tunnel is precisely a IP-in-GRE tunnel that differs
1094 		 *   only in its protocol number.  At least, it works for me.
1095 		 *
1096 		 *   The Internet Drafts can be found if you look for
1097 		 *   the following:
1098 		 *     draft-forster-wrec-wccp-v1-00.txt
1099 		 *     draft-wilson-wrec-wccp-v2-01.txt
1100 		 */
1101 
1102 		if (!gre_wccp && !ISSET(ifp->if_flags, IFF_LINK0))
1103 			goto decline;
1104 
1105 		/*
1106 		 * If the first nibble of the payload does not look like
1107 		 * IPv4, assume it is WCCP v2.
1108 		 */
1109 		n = m_getptr(m, hlen, &off);
1110 		if (n == NULL)
1111 			goto decline;
1112 		if (n->m_data[off] >> 4 != IPVERSION)
1113 			hlen += sizeof(gre_wccp);
1114 
1115 		/* FALLTHROUGH */
1116 	}
1117 	case htons(ETHERTYPE_IP):
1118 #if NBPFILTER > 0
1119 		bpf_af = AF_INET;
1120 #endif
1121 		patch = gre_ipv4_patch;
1122 		input = ipv4_input;
1123 		break;
1124 #ifdef INET6
1125 	case htons(ETHERTYPE_IPV6):
1126 #if NBPFILTER > 0
1127 		bpf_af = AF_INET6;
1128 #endif
1129 		patch = gre_ipv6_patch;
1130 		input = ipv6_input;
1131 		break;
1132 #endif
1133 #ifdef MPLS
1134 	case htons(ETHERTYPE_MPLS_MCAST):
1135 		mcast = M_MCAST|M_BCAST;
1136 		/* fallthrough */
1137 	case htons(ETHERTYPE_MPLS):
1138 #if NBPFILTER > 0
1139 		bpf_af = AF_MPLS;
1140 #endif
1141 		patch = gre_mpls_patch;
1142 		input = mpls_input;
1143 		break;
1144 #endif
1145 	case htons(0):
1146 		if (ifp->if_type != IFT_TUNNEL) {
1147 			/* keepalives dont make sense for mgre */
1148 			goto decline;
1149 		}
1150 
1151 		m_adj(m, hlen);
1152 		gre_keepalive_recv(ifp, m);
1153 		return (IPPROTO_DONE);
1154 
1155 	default:
1156 		goto decline;
1157 	}
1158 
1159 	/* it's ours now */
1160 
1161 	m_adj(m, hlen);
1162 
1163 	tunnel = ifp->if_softc; /* gre and mgre tunnel info is at the front */
1164 
1165 	m = (*patch)(tunnel, m, &itos, otos);
1166 	if (m == NULL)
1167 		return (IPPROTO_DONE);
1168 
1169 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
1170 		SET(m->m_pkthdr.csum_flags, M_FLOWID);
1171 		m->m_pkthdr.ph_flowid =
1172 		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1173 	}
1174 
1175 	rxprio = tunnel->t_rxhprio;
1176 	switch (rxprio) {
1177 	case IF_HDRPRIO_PACKET:
1178 		/* nop */
1179 		break;
1180 	case IF_HDRPRIO_OUTER:
1181 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
1182 		break;
1183 	case IF_HDRPRIO_PAYLOAD:
1184 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
1185 		break;
1186 	default:
1187 		m->m_pkthdr.pf.prio = rxprio;
1188 		break;
1189 	}
1190 
1191 	m->m_flags &= ~(M_MCAST|M_BCAST);
1192 	m->m_flags |= mcast;
1193 	m->m_pkthdr.ph_ifidx = ifp->if_index;
1194 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
1195 
1196 #if NPF > 0
1197 	pf_pkt_addr_changed(m);
1198 #endif
1199 
1200 	counters_pkt(ifp->if_counters,
1201 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
1202 
1203 #if NBPFILTER > 0
1204 	if (ifp->if_bpf)
1205 		bpf_mtap_af(ifp->if_bpf, bpf_af, m, BPF_DIRECTION_IN);
1206 #endif
1207 
1208 	(*input)(ifp, m);
1209 	return (IPPROTO_DONE);
1210 decline:
1211 	*mp = m;
1212 	return (-1);
1213 }
1214 
1215 static struct mbuf *
1216 gre_ipv4_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1217     uint8_t *itosp, uint8_t otos)
1218 {
1219 	struct ip *ip;
1220 	uint8_t itos;
1221 
1222 	m = m_pullup(m, sizeof(*ip));
1223 	if (m == NULL)
1224 		return (NULL);
1225 
1226 	ip = mtod(m, struct ip *);
1227 
1228 	itos = ip->ip_tos;
1229 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1230 		m_freem(m);
1231 		return (NULL);
1232 	}
1233 	if (itos != ip->ip_tos)
1234 		ip_tos_patch(ip, itos);
1235 
1236 	*itosp = itos;
1237 
1238 	return (m);
1239 }
1240 
1241 #ifdef INET6
1242 static struct mbuf *
1243 gre_ipv6_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1244     uint8_t *itosp, uint8_t otos)
1245 {
1246 	struct ip6_hdr *ip6;
1247 	uint32_t flow;
1248 	uint8_t itos;
1249 
1250 	m = m_pullup(m, sizeof(*ip6));
1251 	if (m == NULL)
1252 		return (NULL);
1253 
1254 	ip6 = mtod(m, struct ip6_hdr *);
1255 
1256 	flow = bemtoh32(&ip6->ip6_flow);
1257 	itos = flow >> 20;
1258 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1259 		m_freem(m);
1260 		return (NULL);
1261 	}
1262 
1263 	CLR(flow, 0xff << 20);
1264 	SET(flow, itos << 20);
1265 	htobem32(&ip6->ip6_flow, flow);
1266 
1267 	*itosp = itos;
1268 
1269 	return (m);
1270 }
1271 #endif
1272 
1273 #ifdef MPLS
1274 static struct mbuf *
1275 gre_mpls_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1276     uint8_t *itosp, uint8_t otos)
1277 {
1278 	uint8_t itos;
1279 	uint32_t shim;
1280 
1281 	m = m_pullup(m, sizeof(shim));
1282 	if (m == NULL)
1283 		return (NULL);
1284 
1285 	shim = *mtod(m, uint32_t *);
1286 	itos = (ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5;
1287 
1288 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1289 		m_freem(m);
1290 		return (NULL);
1291 	}
1292 
1293 	*itosp = itos;
1294 
1295 	return (m);
1296 }
1297 #endif
1298 
1299 #define gre_l2_prio(_t, _m, _otos) do {					\
1300 	int rxprio = (_t)->t_rxhprio;					\
1301 	switch (rxprio) {						\
1302 	case IF_HDRPRIO_PACKET:						\
1303 		/* nop */						\
1304 		break;							\
1305 	case IF_HDRPRIO_OUTER:						\
1306 		(_m)->m_pkthdr.pf.prio = IFQ_TOS2PRIO((_otos));		\
1307 		break;							\
1308 	default:							\
1309 		(_m)->m_pkthdr.pf.prio = rxprio;			\
1310 		break;							\
1311 	}								\
1312 } while (0)
1313 
1314 static int
1315 egre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, uint8_t otos)
1316 {
1317 	struct egre_softc *sc;
1318 
1319 	NET_ASSERT_LOCKED();
1320 	sc = RBT_FIND(egre_tree, &egre_tree, (const struct egre_softc *)key);
1321 	if (sc == NULL)
1322 		return (-1);
1323 
1324 	/* it's ours now */
1325 	m = gre_ether_align(m, hlen);
1326 	if (m == NULL)
1327 		return (0);
1328 
1329 	if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) {
1330 		SET(m->m_pkthdr.csum_flags, M_FLOWID);
1331 		m->m_pkthdr.ph_flowid =
1332 		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1333 	}
1334 
1335 	m->m_flags &= ~(M_MCAST|M_BCAST);
1336 
1337 #if NPF > 0
1338 	pf_pkt_addr_changed(m);
1339 #endif
1340 
1341 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1342 
1343 	if_vinput(&sc->sc_ac.ac_if, m);
1344 
1345 	return (0);
1346 }
1347 
1348 static int
1349 nvgre_rtfind(struct nvgre_softc *sc, struct ifbaconf *baconf)
1350 {
1351 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1352 	struct nvgre_entry *nv;
1353 	struct ifbareq bareq;
1354 	caddr_t uaddr, end;
1355 	int error;
1356 	int age;
1357 
1358 	if (baconf->ifbac_len == 0) {
1359 		/* single read is atomic */
1360 		baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq);
1361 		return (0);
1362 	}
1363 
1364 	uaddr = baconf->ifbac_buf;
1365 	end = uaddr + baconf->ifbac_len;
1366 
1367 	rw_enter_read(&sc->sc_ether_lock);
1368 	RBT_FOREACH(nv, nvgre_map, &sc->sc_ether_map) {
1369 		if (uaddr >= end)
1370 			break;
1371 
1372 		memcpy(bareq.ifba_name, ifp->if_xname,
1373 		    sizeof(bareq.ifba_name));
1374 		memcpy(bareq.ifba_ifsname, ifp->if_xname,
1375 		    sizeof(bareq.ifba_ifsname));
1376 		memcpy(&bareq.ifba_dst, &nv->nv_dst,
1377 		    sizeof(bareq.ifba_dst));
1378 
1379 		memset(&bareq.ifba_dstsa, 0, sizeof(bareq.ifba_dstsa));
1380 		switch (sc->sc_tunnel.t_af) {
1381 		case AF_INET: {
1382 			struct sockaddr_in *sin;
1383 
1384 			sin = (struct sockaddr_in *)&bareq.ifba_dstsa;
1385 			sin->sin_len = sizeof(*sin);
1386 			sin->sin_family = AF_INET;
1387 			sin->sin_addr = nv->nv_gateway.in4;
1388 
1389 			break;
1390 		}
1391 #ifdef INET6
1392 		case AF_INET6: {
1393 			struct sockaddr_in6 *sin6;
1394 
1395 			sin6 = (struct sockaddr_in6 *)&bareq.ifba_dstsa;
1396 			sin6->sin6_len = sizeof(*sin6);
1397 			sin6->sin6_family = AF_INET6;
1398 			sin6->sin6_addr = nv->nv_gateway.in6;
1399 
1400 			break;
1401 		}
1402 #endif /* INET6 */
1403 		default:
1404 			unhandled_af(sc->sc_tunnel.t_af);
1405 		}
1406 
1407 		switch (nv->nv_type) {
1408 		case NVGRE_ENTRY_DYNAMIC:
1409 			age = (ticks - nv->nv_age) / hz;
1410 			bareq.ifba_age = MIN(age, 0xff);
1411 			bareq.ifba_flags = IFBAF_DYNAMIC;
1412 			break;
1413 		case NVGRE_ENTRY_STATIC:
1414 			bareq.ifba_age = 0;
1415 			bareq.ifba_flags = IFBAF_STATIC;
1416 			break;
1417 		}
1418 
1419 		error = copyout(&bareq, uaddr, sizeof(bareq));
1420 		if (error != 0) {
1421 			rw_exit_read(&sc->sc_ether_lock);
1422 			return (error);
1423 		}
1424 
1425 		uaddr += sizeof(bareq);
1426 	}
1427 	baconf->ifbac_len = sc->sc_ether_num * sizeof(bareq);
1428 	rw_exit_read(&sc->sc_ether_lock);
1429 
1430 	return (0);
1431 }
1432 
1433 static void
1434 nvgre_flush_map(struct nvgre_softc *sc)
1435 {
1436 	struct nvgre_map map;
1437 	struct nvgre_entry *nv, *nnv;
1438 
1439 	rw_enter_write(&sc->sc_ether_lock);
1440 	map = sc->sc_ether_map;
1441 	RBT_INIT(nvgre_map, &sc->sc_ether_map);
1442 	sc->sc_ether_num = 0;
1443 	rw_exit_write(&sc->sc_ether_lock);
1444 
1445 	RBT_FOREACH_SAFE(nv, nvgre_map, &map, nnv) {
1446 		RBT_REMOVE(nvgre_map, &map, nv);
1447 		if (refcnt_rele(&nv->nv_refs))
1448 			pool_put(&nvgre_pool, nv);
1449 	}
1450 }
1451 
1452 static void
1453 nvgre_input_map(struct nvgre_softc *sc, const struct gre_tunnel *key,
1454     const struct ether_header *eh)
1455 {
1456 	struct nvgre_entry *nv, nkey;
1457 	int new = 0;
1458 
1459 	if (ETHER_IS_BROADCAST(eh->ether_shost) ||
1460 	    ETHER_IS_MULTICAST(eh->ether_shost))
1461 		return;
1462 
1463 	memcpy(&nkey.nv_dst, eh->ether_shost, ETHER_ADDR_LEN);
1464 
1465 	/* remember where it came from */
1466 	rw_enter_read(&sc->sc_ether_lock);
1467 	nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &nkey);
1468 	if (nv == NULL)
1469 		new = 1;
1470 	else {
1471 		nv->nv_age = ticks;
1472 
1473 		if (nv->nv_type != NVGRE_ENTRY_DYNAMIC ||
1474 		    gre_ip_cmp(key->t_af, &key->t_dst, &nv->nv_gateway))
1475 			nv = NULL;
1476 		else
1477 			refcnt_take(&nv->nv_refs);
1478 	}
1479 	rw_exit_read(&sc->sc_ether_lock);
1480 
1481 	if (new) {
1482 		struct nvgre_entry *onv;
1483 		unsigned int num;
1484 
1485 		nv = pool_get(&nvgre_pool, PR_NOWAIT);
1486 		if (nv == NULL) {
1487 			/* oh well */
1488 			return;
1489 		}
1490 
1491 		memcpy(&nv->nv_dst, eh->ether_shost, ETHER_ADDR_LEN);
1492 		nv->nv_type = NVGRE_ENTRY_DYNAMIC;
1493 		nv->nv_gateway = key->t_dst;
1494 		refcnt_init(&nv->nv_refs);
1495 		nv->nv_age = ticks;
1496 
1497 		rw_enter_write(&sc->sc_ether_lock);
1498 		num = sc->sc_ether_num;
1499 		if (++num > sc->sc_ether_max)
1500 			onv = nv;
1501 		else {
1502 			/* try to give the ref to the map */
1503 			onv = RBT_INSERT(nvgre_map, &sc->sc_ether_map, nv);
1504 			if (onv == NULL) {
1505 				/* count the successful insert */
1506 				sc->sc_ether_num = num;
1507 			}
1508 		}
1509 		rw_exit_write(&sc->sc_ether_lock);
1510 
1511 		if (onv != NULL)
1512 			pool_put(&nvgre_pool, nv);
1513 	} else if (nv != NULL) {
1514 		rw_enter_write(&sc->sc_ether_lock);
1515 		nv->nv_gateway = key->t_dst;
1516 		rw_exit_write(&sc->sc_ether_lock);
1517 
1518 		if (refcnt_rele(&nv->nv_refs)) {
1519 			/* ioctl may have deleted the entry */
1520 			pool_put(&nvgre_pool, nv);
1521 		}
1522 	}
1523 }
1524 
1525 static inline struct nvgre_softc *
1526 nvgre_mcast_find(const struct gre_tunnel *key, unsigned int if0idx)
1527 {
1528 	struct nvgre_softc *sc;
1529 	int rv;
1530 
1531 	/*
1532 	 * building an nvgre_softc to use with RBT_FIND is expensive, and
1533 	 * would need to swap the src and dst addresses in the key. so do the
1534 	 * find by hand.
1535 	 */
1536 
1537 	NET_ASSERT_LOCKED();
1538 	sc = RBT_ROOT(nvgre_mcast_tree, &nvgre_mcast_tree);
1539 	while (sc != NULL) {
1540 		rv = nvgre_cmp_mcast(key, &key->t_src, if0idx,
1541 		    &sc->sc_tunnel, &sc->sc_tunnel.t_dst, sc->sc_ifp0);
1542 		if (rv == 0)
1543 			return (sc);
1544 		if (rv < 0)
1545 			sc = RBT_LEFT(nvgre_mcast_tree, sc);
1546 		else
1547 			sc = RBT_RIGHT(nvgre_mcast_tree, sc);
1548 	}
1549 
1550 	return (NULL);
1551 }
1552 
1553 static inline struct nvgre_softc *
1554 nvgre_ucast_find(const struct gre_tunnel *key)
1555 {
1556 	NET_ASSERT_LOCKED();
1557 	return (RBT_FIND(nvgre_ucast_tree, &nvgre_ucast_tree,
1558 	    (struct nvgre_softc *)key));
1559 }
1560 
1561 static int
1562 nvgre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen,
1563     uint8_t otos)
1564 {
1565 	struct nvgre_softc *sc;
1566 
1567 	if (ISSET(m->m_flags, M_MCAST|M_BCAST))
1568 		sc = nvgre_mcast_find(key, m->m_pkthdr.ph_ifidx);
1569 	else
1570 		sc = nvgre_ucast_find(key);
1571 
1572 	if (sc == NULL)
1573 		return (-1);
1574 
1575 	/* it's ours now */
1576 	m = gre_ether_align(m, hlen);
1577 	if (m == NULL)
1578 		return (0);
1579 
1580 	nvgre_input_map(sc, key, mtod(m, struct ether_header *));
1581 
1582 	SET(m->m_pkthdr.csum_flags, M_FLOWID);
1583 	m->m_pkthdr.ph_flowid = bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1584 
1585 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1586 
1587 	m->m_flags &= ~(M_MCAST|M_BCAST);
1588 
1589 #if NPF > 0
1590 	pf_pkt_addr_changed(m);
1591 #endif
1592 
1593 	if_vinput(&sc->sc_ac.ac_if, m);
1594 
1595 	return (0);
1596 }
1597 
1598 static struct mbuf *
1599 gre_ether_align(struct mbuf *m, int hlen)
1600 {
1601 	struct mbuf *n;
1602 	int off;
1603 
1604 	m_adj(m, hlen);
1605 
1606 	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
1607 		m_freem(m);
1608 		return (NULL);
1609 	}
1610 
1611 	m = m_pullup(m, sizeof(struct ether_header));
1612 	if (m == NULL)
1613 		return (NULL);
1614 
1615 	n = m_getptr(m, sizeof(struct ether_header), &off);
1616 	if (n == NULL) {
1617 		m_freem(m);
1618 		return (NULL);
1619 	}
1620 
1621 	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
1622 		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
1623 		m_freem(m);
1624 		if (n == NULL)
1625 			return (NULL);
1626 		m = n;
1627 	}
1628 
1629 	return (m);
1630 }
1631 
1632 static void
1633 gre_keepalive_recv(struct ifnet *ifp, struct mbuf *m)
1634 {
1635 	struct gre_softc *sc = ifp->if_softc;
1636 	struct gre_keepalive *gk;
1637 	SIPHASH_CTX ctx;
1638 	uint8_t digest[SIPHASH_DIGEST_LENGTH];
1639 	int uptime, delta;
1640 	int tick = ticks;
1641 
1642 	if (sc->sc_ka_state == GRE_KA_NONE ||
1643 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
1644 		goto drop;
1645 
1646 	if (m->m_pkthdr.len < sizeof(*gk))
1647 		goto drop;
1648 	m = m_pullup(m, sizeof(*gk));
1649 	if (m == NULL)
1650 		return;
1651 
1652 	gk = mtod(m, struct gre_keepalive *);
1653 	uptime = bemtoh32(&gk->gk_uptime) - sc->sc_ka_bias;
1654 	delta = tick - uptime;
1655 	if (delta < 0)
1656 		goto drop;
1657 	if (delta > hz * 10) /* magic */
1658 		goto drop;
1659 
1660 	/* avoid too much siphash work */
1661 	delta = tick - sc->sc_ka_recvtm;
1662 	if (delta > 0 && delta < (hz / 10))
1663 		goto drop;
1664 
1665 	SipHash24_Init(&ctx, &sc->sc_ka_key);
1666 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
1667 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
1668 	SipHash24_Final(digest, &ctx);
1669 
1670 	if (memcmp(digest, gk->gk_digest, sizeof(digest)) != 0)
1671 		goto drop;
1672 
1673 	sc->sc_ka_recvtm = tick;
1674 
1675 	switch (sc->sc_ka_state) {
1676 	case GRE_KA_DOWN:
1677 		sc->sc_ka_state = GRE_KA_HOLD;
1678 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
1679 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
1680 		    16 * sc->sc_ka_count);
1681 		break;
1682 	case GRE_KA_HOLD:
1683 		if (--sc->sc_ka_holdcnt > 0)
1684 			break;
1685 
1686 		sc->sc_ka_state = GRE_KA_UP;
1687 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
1688 		break;
1689 
1690 	case GRE_KA_UP:
1691 		sc->sc_ka_holdmax--;
1692 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
1693 		break;
1694 	}
1695 
1696 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
1697 
1698 drop:
1699 	m_freem(m);
1700 }
1701 
1702 static int
1703 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1704     struct rtentry *rt)
1705 {
1706 	struct m_tag *mtag;
1707 	int error = 0;
1708 
1709 	if (!gre_allow) {
1710 		error = EACCES;
1711 		goto drop;
1712 	}
1713 
1714 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1715 		error = ENETDOWN;
1716 		goto drop;
1717 	}
1718 
1719 	switch (dst->sa_family) {
1720 	case AF_INET:
1721 #ifdef INET6
1722 	case AF_INET6:
1723 #endif
1724 #ifdef MPLS
1725 	case AF_MPLS:
1726 #endif
1727 		break;
1728 	default:
1729 		error = EAFNOSUPPORT;
1730 		goto drop;
1731 	}
1732 
1733 	/* Try to limit infinite recursion through misconfiguration. */
1734 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1735 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1736 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1737 		    sizeof(ifp->if_index)) == 0) {
1738 			m_freem(m);
1739 			error = EIO;
1740 			goto end;
1741 		}
1742 	}
1743 
1744 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1745 	if (mtag == NULL) {
1746 		m_freem(m);
1747 		error = ENOBUFS;
1748 		goto end;
1749 	}
1750 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1751 	m_tag_prepend(m, mtag);
1752 
1753 	m->m_pkthdr.ph_family = dst->sa_family;
1754 
1755 	error = if_enqueue(ifp, m);
1756 end:
1757 	if (error)
1758 		ifp->if_oerrors++;
1759 	return (error);
1760 
1761 drop:
1762 	m_freem(m);
1763 	return (error);
1764 }
1765 
1766 void
1767 gre_start(struct ifnet *ifp)
1768 {
1769 	struct gre_softc *sc = ifp->if_softc;
1770 	struct mbuf *m;
1771 	int af;
1772 #if NBPFILTER > 0
1773 	caddr_t if_bpf;
1774 #endif
1775 
1776 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1777 		af = m->m_pkthdr.ph_family;
1778 
1779 #if NBPFILTER > 0
1780 		if_bpf = ifp->if_bpf;
1781 		if (if_bpf)
1782 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
1783 #endif
1784 
1785 		m = gre_l3_encap(&sc->sc_tunnel, m, af);
1786 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1787 			ifp->if_oerrors++;
1788 			continue;
1789 		}
1790 	}
1791 }
1792 
1793 void
1794 mgre_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1795 {
1796 	struct ifnet *lo0ifp;
1797 	struct ifaddr *ifa, *lo0ifa;
1798 
1799 	switch (req) {
1800 	case RTM_ADD:
1801 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1802 			break;
1803 
1804 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1805 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1806 			    rt_key(rt)->sa_len) == 0)
1807 				break;
1808 		}
1809 
1810 		if (ifa == NULL)
1811 			break;
1812 
1813 		KASSERT(ifa == rt->rt_ifa);
1814 
1815 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1816 		KASSERT(lo0ifp != NULL);
1817 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1818 			if (lo0ifa->ifa_addr->sa_family ==
1819 			    ifa->ifa_addr->sa_family)
1820 				break;
1821 		}
1822 		if_put(lo0ifp);
1823 
1824 		if (lo0ifa == NULL)
1825 			break;
1826 
1827 		rt->rt_flags &= ~RTF_LLINFO;
1828 		break;
1829 	case RTM_DELETE:
1830 	case RTM_RESOLVE:
1831 	default:
1832 		break;
1833 	}
1834 }
1835 
1836 static int
1837 mgre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dest,
1838     struct rtentry *rt0)
1839 {
1840 	struct mgre_softc *sc = ifp->if_softc;
1841 	struct sockaddr *gate;
1842 	struct rtentry *rt;
1843 	struct m_tag *mtag;
1844 	int error = 0;
1845 	sa_family_t af;
1846 	const void *addr;
1847 
1848 	if (!gre_allow) {
1849 		error = EACCES;
1850 		goto drop;
1851 	}
1852 
1853 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1854 		error = ENETDOWN;
1855 		goto drop;
1856 	}
1857 
1858 	switch (dest->sa_family) {
1859 	case AF_INET:
1860 #ifdef INET6
1861 	case AF_INET6:
1862 #endif
1863 #ifdef MPLS
1864 	case AF_MPLS:
1865 #endif
1866 		break;
1867 	default:
1868 		error = EAFNOSUPPORT;
1869 		goto drop;
1870 	}
1871 
1872 	if (ISSET(m->m_flags, M_MCAST|M_BCAST)) {
1873 		error = ENETUNREACH;
1874 		goto drop;
1875 	}
1876 
1877 	rt = rt_getll(rt0);
1878 
1879 	/* chech rt_expire? */
1880 	if (ISSET(rt->rt_flags, RTF_REJECT)) {
1881 		error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
1882 		goto drop;
1883 	}
1884 	if (!ISSET(rt->rt_flags, RTF_HOST)) {
1885 		error = EHOSTUNREACH;
1886 		goto drop;
1887 	}
1888 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1889 		error = EINVAL;
1890 		goto drop;
1891 	}
1892 
1893 	gate = rt->rt_gateway;
1894 	af = gate->sa_family;
1895 	if (af != sc->sc_tunnel.t_af) {
1896 		error = EAGAIN;
1897 		goto drop;
1898 	}
1899 
1900 	/* Try to limit infinite recursion through misconfiguration. */
1901 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1902 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1903 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1904 		    sizeof(ifp->if_index)) == 0) {
1905 			error = EIO;
1906 			goto drop;
1907 		}
1908 	}
1909 
1910 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1911 	if (mtag == NULL) {
1912 		error = ENOBUFS;
1913 		goto drop;
1914 	}
1915 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1916 	m_tag_prepend(m, mtag);
1917 
1918 	switch (af) {
1919 	case AF_INET: {
1920 		struct sockaddr_in *sin = (struct sockaddr_in *)gate;
1921 		addr = &sin->sin_addr;
1922 		break;
1923 	}
1924 #ifdef INET6
1925 	case AF_INET6: {
1926 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)gate;
1927 		addr = &sin6->sin6_addr;
1928 		break;
1929 	}
1930  #endif
1931 	default:
1932 		unhandled_af(af);
1933 		/* NOTREACHED */
1934 	}
1935 
1936 	m = gre_l3_encap_dst(&sc->sc_tunnel, addr, m, dest->sa_family);
1937 	if (m == NULL) {
1938 		ifp->if_oerrors++;
1939 		return (ENOBUFS);
1940 	}
1941 
1942 	m->m_pkthdr.ph_family = dest->sa_family;
1943 
1944 	error = if_enqueue(ifp, m);
1945 	if (error)
1946 		ifp->if_oerrors++;
1947 	return (error);
1948 
1949 drop:
1950 	m_freem(m);
1951 	return (error);
1952 }
1953 
1954 static void
1955 mgre_start(struct ifnet *ifp)
1956 {
1957 	struct mgre_softc *sc = ifp->if_softc;
1958 	struct mbuf *m;
1959 #if NBPFILTER > 0
1960 	caddr_t if_bpf;
1961 #endif
1962 
1963 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1964 #if NBPFILTER > 0
1965 		if_bpf = ifp->if_bpf;
1966 		if (if_bpf) {
1967 			struct m_hdr mh;
1968 			struct mbuf *n;
1969 			int off;
1970 
1971 			n = m_getptr(m, ifp->if_hdrlen, &off);
1972 			KASSERT(n != NULL);
1973 
1974 			mh.mh_flags = 0;
1975 			mh.mh_next = n->m_next;
1976 			mh.mh_len = n->m_len - off;
1977 			mh.mh_data = n->m_data + off;
1978 
1979 			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family,
1980 			    (struct mbuf *)&mh, BPF_DIRECTION_OUT);
1981 		}
1982 #endif
1983 
1984 		if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
1985 			ifp->if_oerrors++;
1986 			continue;
1987 		}
1988 	}
1989 }
1990 
1991 static void
1992 egre_start(struct ifnet *ifp)
1993 {
1994 	struct egre_softc *sc = ifp->if_softc;
1995 	struct mbuf *m0, *m;
1996 #if NBPFILTER > 0
1997 	caddr_t if_bpf;
1998 #endif
1999 
2000 	if (!gre_allow) {
2001 		ifq_purge(&ifp->if_snd);
2002 		return;
2003 	}
2004 
2005 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
2006 #if NBPFILTER > 0
2007 		if_bpf = ifp->if_bpf;
2008 		if (if_bpf)
2009 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
2010 #endif
2011 
2012 		/* force prepend mbuf because of alignment problems */
2013 		m = m_get(M_DONTWAIT, m0->m_type);
2014 		if (m == NULL) {
2015 			m_freem(m0);
2016 			continue;
2017 		}
2018 
2019 		M_MOVE_PKTHDR(m, m0);
2020 		m->m_next = m0;
2021 
2022 		m_align(m, 0);
2023 		m->m_len = 0;
2024 
2025 		m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER),
2026 		    sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m));
2027 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
2028 			ifp->if_oerrors++;
2029 			continue;
2030 		}
2031 	}
2032 }
2033 
2034 static struct mbuf *
2035 gre_l3_encap_dst(const struct gre_tunnel *tunnel, const void *dst,
2036     struct mbuf *m, sa_family_t af)
2037 {
2038 	uint16_t proto;
2039 	uint8_t ttl, itos, otos;
2040 	int tttl = tunnel->t_ttl;
2041 	int ttloff;
2042 
2043 	switch (af) {
2044 	case AF_INET: {
2045 		struct ip *ip;
2046 
2047 		m = m_pullup(m, sizeof(*ip));
2048 		if (m == NULL)
2049 			return (NULL);
2050 
2051 		ip = mtod(m, struct ip *);
2052 		itos = ip->ip_tos;
2053 
2054 		ttloff = offsetof(struct ip, ip_ttl);
2055 		proto = htons(ETHERTYPE_IP);
2056 		break;
2057 	}
2058 #ifdef INET6
2059 	case AF_INET6: {
2060 		struct ip6_hdr *ip6;
2061 
2062 		m = m_pullup(m, sizeof(*ip6));
2063 		if (m == NULL)
2064 			return (NULL);
2065 
2066 		ip6 = mtod(m, struct ip6_hdr *);
2067 		itos = (ntohl(ip6->ip6_flow) & 0x0ff00000) >> 20;
2068 
2069 		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
2070 		proto = htons(ETHERTYPE_IPV6);
2071 		break;
2072 	}
2073  #endif
2074 #ifdef MPLS
2075 	case AF_MPLS: {
2076 		uint32_t shim;
2077 
2078 		m = m_pullup(m, sizeof(shim));
2079 		if (m == NULL)
2080 			return (NULL);
2081 
2082 		shim = bemtoh32(mtod(m, uint32_t *)) & MPLS_EXP_MASK;
2083 		itos = (shim >> MPLS_EXP_OFFSET) << 5;
2084 
2085 		ttloff = 3;
2086 
2087 		if (m->m_flags & (M_BCAST | M_MCAST))
2088 			proto = htons(ETHERTYPE_MPLS_MCAST);
2089 		else
2090 			proto = htons(ETHERTYPE_MPLS);
2091 		break;
2092 	}
2093 #endif
2094 	default:
2095 		unhandled_af(af);
2096 	}
2097 
2098 	if (tttl == -1) {
2099 		KASSERT(m->m_len > ttloff); /* m_pullup has happened */
2100 
2101 		ttl = *(m->m_data + ttloff);
2102 	} else
2103 		ttl = tttl;
2104 
2105 	itos = gre_l3_tos(tunnel, m, itos);
2106 	ip_ecn_ingress(tunnel->t_ecn, &otos, &itos);
2107 
2108 	return (gre_encap_dst(tunnel, dst, m, proto, ttl, otos));
2109 }
2110 
2111 static struct mbuf *
2112 gre_encap_dst(const struct gre_tunnel *tunnel, const union gre_addr *dst,
2113     struct mbuf *m, uint16_t proto, uint8_t ttl, uint8_t tos)
2114 {
2115 	struct gre_header *gh;
2116 	struct gre_h_key *gkh;
2117 	int hlen;
2118 
2119 	hlen = sizeof(*gh);
2120 	if (tunnel->t_key_mask != GRE_KEY_NONE)
2121 		hlen += sizeof(*gkh);
2122 
2123 	m = m_prepend(m, hlen, M_DONTWAIT);
2124 	if (m == NULL)
2125 		return (NULL);
2126 
2127 	gh = mtod(m, struct gre_header *);
2128 	gh->gre_flags = GRE_VERS_0;
2129 	gh->gre_proto = proto;
2130 	if (tunnel->t_key_mask != GRE_KEY_NONE) {
2131 		gh->gre_flags |= htons(GRE_KP);
2132 
2133 		gkh = (struct gre_h_key *)(gh + 1);
2134 		gkh->gre_key = tunnel->t_key;
2135 
2136 		if (tunnel->t_key_mask == GRE_KEY_ENTROPY &&
2137 		    ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) {
2138 			gkh->gre_key |= htonl(~GRE_KEY_ENTROPY &
2139 			    m->m_pkthdr.ph_flowid);
2140 		}
2141 	}
2142 
2143 	return (gre_encap_dst_ip(tunnel, dst, m, ttl, tos));
2144 }
2145 
2146 static struct mbuf *
2147 gre_encap_dst_ip(const struct gre_tunnel *tunnel, const union gre_addr *dst,
2148     struct mbuf *m, uint8_t ttl, uint8_t tos)
2149 {
2150 	switch (tunnel->t_af) {
2151 	case AF_UNSPEC:
2152 		/* packets may arrive before tunnel is set up */
2153 		m_freem(m);
2154 		return (NULL);
2155 	case AF_INET: {
2156 		struct ip *ip;
2157 
2158 		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
2159 		if (m == NULL)
2160 			return (NULL);
2161 
2162 		ip = mtod(m, struct ip *);
2163 		ip->ip_v = IPVERSION;
2164 		ip->ip_hl = sizeof(*ip) >> 2;
2165 		ip->ip_off = tunnel->t_df;
2166 		ip->ip_tos = tos;
2167 		ip->ip_len = htons(m->m_pkthdr.len);
2168 		ip->ip_ttl = ttl;
2169 		ip->ip_p = IPPROTO_GRE;
2170 		ip->ip_src = tunnel->t_src4;
2171 		ip->ip_dst = dst->in4;
2172 		break;
2173 	}
2174 #ifdef INET6
2175 	case AF_INET6: {
2176 		struct ip6_hdr *ip6;
2177 		int len = m->m_pkthdr.len;
2178 
2179 		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
2180 		if (m == NULL)
2181 			return (NULL);
2182 
2183 		ip6 = mtod(m, struct ip6_hdr *);
2184 		ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ?
2185 		    htonl(m->m_pkthdr.ph_flowid) : 0;
2186 		ip6->ip6_vfc |= IPV6_VERSION;
2187 		ip6->ip6_flow |= htonl((uint32_t)tos << 20);
2188 		ip6->ip6_plen = htons(len);
2189 		ip6->ip6_nxt = IPPROTO_GRE;
2190 		ip6->ip6_hlim = ttl;
2191 		ip6->ip6_src = tunnel->t_src6;
2192 		ip6->ip6_dst = dst->in6;
2193 
2194 		if (tunnel->t_df)
2195 			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
2196 
2197 		break;
2198 	}
2199 #endif /* INET6 */
2200 	default:
2201 		unhandled_af(tunnel->t_af);
2202 	}
2203 
2204 	return (m);
2205 }
2206 
2207 static int
2208 gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m)
2209 {
2210 	m->m_flags &= ~(M_BCAST|M_MCAST);
2211 	m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
2212 
2213 #if NPF > 0
2214 	pf_pkt_addr_changed(m);
2215 #endif
2216 
2217 	switch (tunnel->t_af) {
2218 	case AF_INET:
2219 		ip_send(m);
2220 		break;
2221 #ifdef INET6
2222 	case AF_INET6:
2223 		ip6_send(m);
2224 		break;
2225 #endif
2226 	default:
2227 		unhandled_af(tunnel->t_af);
2228 	}
2229 
2230 	return (0);
2231 }
2232 
2233 static int
2234 gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel,
2235     u_long cmd, void *data)
2236 {
2237 	struct ifreq *ifr = (struct ifreq *)data;
2238 	int error = 0;
2239 
2240 	switch(cmd) {
2241 	case SIOCSIFMTU:
2242 		if (ifr->ifr_mtu < 576) {
2243 			error = EINVAL;
2244 			break;
2245 		}
2246 		ifp->if_mtu = ifr->ifr_mtu;
2247 		break;
2248 	case SIOCADDMULTI:
2249 	case SIOCDELMULTI:
2250 		break;
2251 
2252 	case SIOCSVNETID:
2253 		error = gre_set_vnetid(tunnel, ifr);
2254 		break;
2255 
2256 	case SIOCGVNETID:
2257 		error = gre_get_vnetid(tunnel, ifr);
2258 		break;
2259 	case SIOCDVNETID:
2260 		error = gre_del_vnetid(tunnel);
2261 		break;
2262 
2263 	case SIOCSVNETFLOWID:
2264 		error = gre_set_vnetflowid(tunnel, ifr);
2265 		break;
2266 
2267 	case SIOCGVNETFLOWID:
2268 		error = gre_get_vnetflowid(tunnel, ifr);
2269 		break;
2270 
2271 	case SIOCSLIFPHYADDR:
2272 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 1);
2273 		break;
2274 	case SIOCGLIFPHYADDR:
2275 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2276 		break;
2277 	case SIOCDIFPHYADDR:
2278 		error = gre_del_tunnel(tunnel);
2279 		break;
2280 
2281 	case SIOCSLIFPHYRTABLE:
2282 		if (ifr->ifr_rdomainid < 0 ||
2283 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2284 		    !rtable_exists(ifr->ifr_rdomainid)) {
2285 			error = EINVAL;
2286 			break;
2287 		}
2288 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2289 		break;
2290 	case SIOCGLIFPHYRTABLE:
2291 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2292 		break;
2293 
2294 	case SIOCSLIFPHYDF:
2295 		/* commit */
2296 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2297 		break;
2298 	case SIOCGLIFPHYDF:
2299 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2300 		break;
2301 
2302 	default:
2303 		error = ENOTTY;
2304 		break;
2305 	}
2306 
2307 	return (error);
2308 }
2309 
2310 static uint8_t
2311 gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m)
2312 {
2313 	uint8_t prio;
2314 
2315 	switch (t->t_txhprio) {
2316 	case IF_HDRPRIO_PACKET:
2317 		prio = m->m_pkthdr.pf.prio;
2318 		break;
2319 	default:
2320 		prio = t->t_txhprio;
2321 		break;
2322 	}
2323 
2324 	return (IFQ_PRIO2TOS(prio));
2325 }
2326 
2327 static uint8_t
2328 gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t tos)
2329 {
2330 	uint8_t prio;
2331 
2332 	switch (t->t_txhprio) {
2333 	case IF_HDRPRIO_PAYLOAD:
2334 		return (tos);
2335 	case IF_HDRPRIO_PACKET:
2336 		prio = m->m_pkthdr.pf.prio;
2337 		break;
2338 	default:
2339 		prio = t->t_txhprio;
2340 		break;
2341 	}
2342 
2343 	return (IFQ_PRIO2TOS(prio) | (tos & IPTOS_ECN_MASK));
2344 }
2345 
2346 static int
2347 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2348 {
2349 	struct gre_softc *sc = ifp->if_softc;
2350 	struct ifreq *ifr = (struct ifreq *)data;
2351 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2352 	int error = 0;
2353 
2354 	switch(cmd) {
2355 	case SIOCSIFADDR:
2356 		ifp->if_flags |= IFF_UP;
2357 		/* FALLTHROUGH */
2358 	case SIOCSIFFLAGS:
2359 		if (ISSET(ifp->if_flags, IFF_UP)) {
2360 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2361 				error = gre_up(sc);
2362 			else
2363 				error = 0;
2364 		} else {
2365 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2366 				error = gre_down(sc);
2367 		}
2368 		break;
2369 	case SIOCSIFRDOMAIN:
2370 		/* let if_rdomain do its thing */
2371 		error = ENOTTY;
2372 		break;
2373 
2374 	case SIOCSETKALIVE:
2375 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2376 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256 ||
2377 		    (ikar->ikar_timeo == 0) != (ikar->ikar_cnt == 0))
2378 			return (EINVAL);
2379 
2380 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2381 			sc->sc_ka_count = 0;
2382 			sc->sc_ka_timeo = 0;
2383 			sc->sc_ka_state = GRE_KA_NONE;
2384 		} else {
2385 			sc->sc_ka_count = ikar->ikar_cnt;
2386 			sc->sc_ka_timeo = ikar->ikar_timeo;
2387 			sc->sc_ka_state = GRE_KA_DOWN;
2388 
2389 			arc4random_buf(&sc->sc_ka_key, sizeof(sc->sc_ka_key));
2390 			sc->sc_ka_bias = arc4random();
2391 			sc->sc_ka_holdmax = sc->sc_ka_count;
2392 
2393 			sc->sc_ka_recvtm = ticks - hz;
2394 			timeout_add(&sc->sc_ka_send, 1);
2395 			timeout_add_sec(&sc->sc_ka_hold,
2396 			    sc->sc_ka_timeo * sc->sc_ka_count);
2397 		}
2398 		break;
2399 
2400 	case SIOCGETKALIVE:
2401 		ikar->ikar_cnt = sc->sc_ka_count;
2402 		ikar->ikar_timeo = sc->sc_ka_timeo;
2403 		break;
2404 
2405 	case SIOCSLIFPHYTTL:
2406 		if (ifr->ifr_ttl != -1 &&
2407 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2408 			error = EINVAL;
2409 			break;
2410 		}
2411 
2412 		/* commit */
2413 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2414 		break;
2415 
2416 	case SIOCGLIFPHYTTL:
2417 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2418 		break;
2419 
2420 	case SIOCSLIFPHYECN:
2421 		sc->sc_tunnel.t_ecn =
2422 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2423 		break;
2424 	case SIOCGLIFPHYECN:
2425 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2426 		break;
2427 
2428 	case SIOCSTXHPRIO:
2429 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2430 		if (error != 0)
2431 			break;
2432 
2433 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2434 		break;
2435 	case SIOCGTXHPRIO:
2436 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2437 		break;
2438 
2439 	case SIOCSRXHPRIO:
2440 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2441 		if (error != 0)
2442 			break;
2443 
2444 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2445 		break;
2446 	case SIOCGRXHPRIO:
2447 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2448 		break;
2449 
2450 	default:
2451 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2452 		break;
2453 	}
2454 
2455 	return (error);
2456 }
2457 
2458 static int
2459 mgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2460 {
2461 	struct mgre_softc *sc = ifp->if_softc;
2462 	struct ifreq *ifr = (struct ifreq *)data;
2463 	int error = 0;
2464 
2465 	switch(cmd) {
2466 	case SIOCSIFADDR:
2467 		break;
2468 	case SIOCSIFFLAGS:
2469 		if (ISSET(ifp->if_flags, IFF_UP)) {
2470 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2471 				error = mgre_up(sc);
2472 			else
2473 				error = 0;
2474 		} else {
2475 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2476 				error = mgre_down(sc);
2477 		}
2478 		break;
2479 
2480 	case SIOCSLIFPHYTTL:
2481 		if (ifr->ifr_ttl != -1 &&
2482 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2483 			error = EINVAL;
2484 			break;
2485 		}
2486 
2487 		/* commit */
2488 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2489 		break;
2490 
2491 	case SIOCGLIFPHYTTL:
2492 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2493 		break;
2494 
2495 	case SIOCSLIFPHYECN:
2496 		sc->sc_tunnel.t_ecn =
2497 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2498 		break;
2499 	case SIOCGLIFPHYECN:
2500 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2501 		break;
2502 
2503 	case SIOCSLIFPHYADDR:
2504 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2505 			error = EBUSY;
2506 			break;
2507 		}
2508 		error = mgre_set_tunnel(sc, (struct if_laddrreq *)data);
2509 		break;
2510 	case SIOCGLIFPHYADDR:
2511 		error = mgre_get_tunnel(sc, (struct if_laddrreq *)data);
2512 		break;
2513 
2514 	case SIOCSTXHPRIO:
2515 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2516 		if (error != 0)
2517 			break;
2518 
2519 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2520 		break;
2521 	case SIOCGTXHPRIO:
2522 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2523 		break;
2524 
2525 	case SIOCSRXHPRIO:
2526 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2527 		if (error != 0)
2528 			break;
2529 
2530 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2531 		break;
2532 	case SIOCGRXHPRIO:
2533 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2534 		break;
2535 
2536 	case SIOCSVNETID:
2537 	case SIOCDVNETID:
2538 	case SIOCDIFPHYADDR:
2539 	case SIOCSLIFPHYRTABLE:
2540 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2541 			error = EBUSY;
2542 			break;
2543 		}
2544 
2545 		/* FALLTHROUGH */
2546 	default:
2547 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2548 		break;
2549 	}
2550 
2551 	return (error);
2552 }
2553 
2554 static int
2555 mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2556 {
2557 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2558 	struct sockaddr *addr = (struct sockaddr *)&req->addr;
2559 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2560 	struct sockaddr_in *addr4;
2561 #ifdef INET6
2562 	struct sockaddr_in6 *addr6;
2563 	int error;
2564 #endif
2565 
2566 	if (dstaddr->sa_family != AF_UNSPEC)
2567 		return (EINVAL);
2568 
2569 	/* validate */
2570 	switch (addr->sa_family) {
2571 	case AF_INET:
2572 		if (addr->sa_len != sizeof(*addr4))
2573 			return (EINVAL);
2574 
2575 		addr4 = (struct sockaddr_in *)addr;
2576 		if (in_nullhost(addr4->sin_addr) ||
2577 		    IN_MULTICAST(addr4->sin_addr.s_addr))
2578 			return (EINVAL);
2579 
2580 		tunnel->t_src4 = addr4->sin_addr;
2581 		tunnel->t_dst4.s_addr = INADDR_ANY;
2582 
2583 		break;
2584 #ifdef INET6
2585 	case AF_INET6:
2586 		if (addr->sa_len != sizeof(*addr6))
2587 			return (EINVAL);
2588 
2589 		addr6 = (struct sockaddr_in6 *)addr;
2590 		if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) ||
2591 		    IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr))
2592 			return (EINVAL);
2593 
2594 		error = in6_embedscope(&tunnel->t_src6, addr6, NULL);
2595 		if (error != 0)
2596 			return (error);
2597 
2598 		memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));
2599 
2600 		break;
2601 #endif
2602 	default:
2603 		return (EAFNOSUPPORT);
2604 	}
2605 
2606 	/* commit */
2607 	tunnel->t_af = addr->sa_family;
2608 
2609 	return (0);
2610 }
2611 
2612 static int
2613 mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2614 {
2615 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2616 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2617 	struct sockaddr_in *sin;
2618 #ifdef INET6
2619 	struct sockaddr_in6 *sin6;
2620 #endif
2621 
2622 	switch (tunnel->t_af) {
2623 	case AF_UNSPEC:
2624 		return (EADDRNOTAVAIL);
2625 	case AF_INET:
2626 		sin = (struct sockaddr_in *)&req->addr;
2627 		memset(sin, 0, sizeof(*sin));
2628 		sin->sin_family = AF_INET;
2629 		sin->sin_len = sizeof(*sin);
2630 		sin->sin_addr = tunnel->t_src4;
2631 		break;
2632 
2633 #ifdef INET6
2634 	case AF_INET6:
2635 		sin6 = (struct sockaddr_in6 *)&req->addr;
2636 		memset(sin6, 0, sizeof(*sin6));
2637 		sin6->sin6_family = AF_INET6;
2638 		sin6->sin6_len = sizeof(*sin6);
2639 		in6_recoverscope(sin6, &tunnel->t_src6);
2640 		break;
2641 #endif
2642 	default:
2643 		unhandled_af(tunnel->t_af);
2644 	}
2645 
2646 	dstaddr->sa_len = 2;
2647 	dstaddr->sa_family = AF_UNSPEC;
2648 
2649 	return (0);
2650 }
2651 
2652 static int
2653 egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2654 {
2655 	struct egre_softc *sc = ifp->if_softc;
2656 	struct ifreq *ifr = (struct ifreq *)data;
2657 	int error = 0;
2658 
2659 	switch(cmd) {
2660 	case SIOCSIFADDR:
2661 		break;
2662 	case SIOCSIFFLAGS:
2663 		if (ISSET(ifp->if_flags, IFF_UP)) {
2664 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2665 				error = egre_up(sc);
2666 			else
2667 				error = 0;
2668 		} else {
2669 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2670 				error = egre_down(sc);
2671 		}
2672 		break;
2673 
2674 	case SIOCSLIFPHYTTL:
2675 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2676 			error = EINVAL;
2677 			break;
2678 		}
2679 
2680 		/* commit */
2681 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2682 		break;
2683 
2684 	case SIOCGLIFPHYTTL:
2685 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2686 		break;
2687 
2688 	case SIOCSTXHPRIO:
2689 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2690 		if (error != 0)
2691 			break;
2692 
2693 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2694 		break;
2695 	case SIOCGTXHPRIO:
2696 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2697 		break;
2698 
2699 	case SIOCSRXHPRIO:
2700 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2701 		if (error != 0)
2702 			break;
2703 
2704 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2705 		break;
2706 	case SIOCGRXHPRIO:
2707 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2708 		break;
2709 
2710 	case SIOCSVNETID:
2711 	case SIOCDVNETID:
2712 	case SIOCSVNETFLOWID:
2713 	case SIOCSLIFPHYADDR:
2714 	case SIOCDIFPHYADDR:
2715 	case SIOCSLIFPHYRTABLE:
2716 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2717 			error = EBUSY;
2718 			break;
2719 		}
2720 
2721 		/* FALLTHROUGH */
2722 	default:
2723 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2724 		if (error == ENOTTY)
2725 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2726 		break;
2727 	}
2728 
2729 	if (error == ENETRESET) {
2730 		/* no hardware to program */
2731 		error = 0;
2732 	}
2733 
2734 	return (error);
2735 }
2736 
2737 static int
2738 nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2739 {
2740 	struct nvgre_softc *sc = ifp->if_softc;
2741 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2742 
2743 	struct ifreq *ifr = (struct ifreq *)data;
2744 	struct if_parent *parent = (struct if_parent *)data;
2745 	struct ifbrparam *bparam = (struct ifbrparam *)data;
2746 	struct ifnet *ifp0;
2747 
2748 	int error = 0;
2749 
2750 	switch (cmd) {
2751 	case SIOCSIFADDR:
2752 		break;
2753 	case SIOCSIFFLAGS:
2754 		if (ISSET(ifp->if_flags, IFF_UP)) {
2755 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2756 				error = nvgre_up(sc);
2757 			else
2758 				error = ENETRESET;
2759 		} else {
2760 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2761 				error = nvgre_down(sc);
2762 		}
2763 		break;
2764 
2765 	case SIOCSLIFPHYADDR:
2766 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2767 			error = EBUSY;
2768 			break;
2769 		}
2770 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 0);
2771 		if (error == 0)
2772 			nvgre_flush_map(sc);
2773 		break;
2774 	case SIOCGLIFPHYADDR:
2775 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2776 		break;
2777 	case SIOCDIFPHYADDR:
2778 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2779 			error = EBUSY;
2780 			break;
2781 		}
2782 		error = gre_del_tunnel(tunnel);
2783 		if (error == 0)
2784 			nvgre_flush_map(sc);
2785 		break;
2786 
2787 	case SIOCSIFPARENT:
2788 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2789 			error = EBUSY;
2790 			break;
2791 		}
2792 		error = nvgre_set_parent(sc, parent->ifp_parent);
2793 		if (error == 0)
2794 			nvgre_flush_map(sc);
2795 		break;
2796 	case SIOCGIFPARENT:
2797 		ifp0 = if_get(sc->sc_ifp0);
2798 		if (ifp0 == NULL)
2799 			error = EADDRNOTAVAIL;
2800 		else {
2801 			memcpy(parent->ifp_parent, ifp0->if_xname,
2802 			    sizeof(parent->ifp_parent));
2803 		}
2804 		if_put(ifp0);
2805 		break;
2806 	case SIOCDIFPARENT:
2807 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2808 			error = EBUSY;
2809 			break;
2810 		}
2811 		/* commit */
2812 		sc->sc_ifp0 = 0;
2813 		nvgre_flush_map(sc);
2814 		break;
2815 
2816 	case SIOCSVNETID:
2817 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2818 			error = EBUSY;
2819 			break;
2820 		}
2821 		if (ifr->ifr_vnetid < GRE_KEY_ENTROPY_MIN ||
2822 		    ifr->ifr_vnetid > GRE_KEY_ENTROPY_MAX) {
2823 			error = EINVAL;
2824 			break;
2825 		}
2826 
2827 		/* commit */
2828 		tunnel->t_key = htonl(ifr->ifr_vnetid << GRE_KEY_ENTROPY_SHIFT);
2829 		nvgre_flush_map(sc);
2830 		break;
2831 	case SIOCGVNETID:
2832 		error = gre_get_vnetid(tunnel, ifr);
2833 		break;
2834 
2835 	case SIOCSLIFPHYRTABLE:
2836 		if (ifr->ifr_rdomainid < 0 ||
2837 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2838 		    !rtable_exists(ifr->ifr_rdomainid)) {
2839 			error = EINVAL;
2840 			break;
2841 		}
2842 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2843 		nvgre_flush_map(sc);
2844 		break;
2845 	case SIOCGLIFPHYRTABLE:
2846 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2847 		break;
2848 
2849 	case SIOCSLIFPHYDF:
2850 		/* commit */
2851 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2852 		break;
2853 	case SIOCGLIFPHYDF:
2854 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2855 		break;
2856 
2857 	case SIOCSLIFPHYTTL:
2858 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2859 			error = EINVAL;
2860 			break;
2861 		}
2862 
2863 		/* commit */
2864 		tunnel->t_ttl = ifr->ifr_ttl;
2865 		break;
2866 
2867 	case SIOCGLIFPHYTTL:
2868 		ifr->ifr_ttl = tunnel->t_ttl;
2869 		break;
2870 
2871 	case SIOCSTXHPRIO:
2872 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2873 		if (error != 0)
2874 			break;
2875 
2876 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2877 		break;
2878 	case SIOCGTXHPRIO:
2879 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2880 		break;
2881 
2882 	case SIOCSRXHPRIO:
2883 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2884 		if (error != 0)
2885 			break;
2886 
2887 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2888 		break;
2889 	case SIOCGRXHPRIO:
2890 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2891 		break;
2892 
2893 	case SIOCBRDGSCACHE:
2894 		if (bparam->ifbrp_csize < 1) {
2895 			error = EINVAL;
2896 			break;
2897 		}
2898 
2899 		/* commit */
2900 		sc->sc_ether_max = bparam->ifbrp_csize;
2901 		break;
2902 	case SIOCBRDGGCACHE:
2903 		bparam->ifbrp_csize = sc->sc_ether_max;
2904 		break;
2905 
2906 	case SIOCBRDGSTO:
2907 		if (bparam->ifbrp_ctime < 0 ||
2908 		    bparam->ifbrp_ctime > INT_MAX / hz) {
2909 			error = EINVAL;
2910 			break;
2911 		}
2912 		sc->sc_ether_tmo = bparam->ifbrp_ctime * hz;
2913 		break;
2914 	case SIOCBRDGGTO:
2915 		bparam->ifbrp_ctime = sc->sc_ether_tmo / hz;
2916 		break;
2917 
2918 	case SIOCBRDGRTS:
2919 		error = nvgre_rtfind(sc, (struct ifbaconf *)data);
2920 		break;
2921 	case SIOCBRDGFLUSH:
2922 		nvgre_flush_map(sc);
2923 		break;
2924 
2925 	case SIOCADDMULTI:
2926 	case SIOCDELMULTI:
2927 		break;
2928 
2929 	default:
2930 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2931 		break;
2932 	}
2933 
2934 	if (error == ENETRESET) {
2935 		/* no hardware to program */
2936 		error = 0;
2937 	}
2938 
2939 	return (error);
2940 }
2941 
2942 static int
2943 eoip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2944 {
2945 	struct eoip_softc *sc = ifp->if_softc;
2946 	struct ifreq *ifr = (struct ifreq *)data;
2947 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2948 	int error = 0;
2949 
2950 	switch(cmd) {
2951 	case SIOCSIFADDR:
2952 		break;
2953 	case SIOCSIFFLAGS:
2954 		if (ISSET(ifp->if_flags, IFF_UP)) {
2955 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2956 				error = eoip_up(sc);
2957 			else
2958 				error = 0;
2959 		} else {
2960 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2961 				error = eoip_down(sc);
2962 		}
2963 		break;
2964 
2965 	case SIOCSETKALIVE:
2966 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2967 			error = EBUSY;
2968 			break;
2969 		}
2970 
2971 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2972 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256)
2973 			return (EINVAL);
2974 
2975 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2976 			sc->sc_ka_count = 0;
2977 			sc->sc_ka_timeo = 0;
2978 			sc->sc_ka_state = GRE_KA_NONE;
2979 		} else {
2980 			sc->sc_ka_count = ikar->ikar_cnt;
2981 			sc->sc_ka_timeo = ikar->ikar_timeo;
2982 			sc->sc_ka_state = GRE_KA_DOWN;
2983 		}
2984 		break;
2985 
2986 	case SIOCGETKALIVE:
2987 		ikar->ikar_cnt = sc->sc_ka_count;
2988 		ikar->ikar_timeo = sc->sc_ka_timeo;
2989 		break;
2990 
2991 	case SIOCSVNETID:
2992 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2993 			error = EBUSY;
2994 			break;
2995 		}
2996 		if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffff)
2997 			return (EINVAL);
2998 
2999 		sc->sc_tunnel.t_key = htole16(ifr->ifr_vnetid); /* for cmp */
3000 		sc->sc_tunnel_id = htole16(ifr->ifr_vnetid);
3001 		break;
3002 
3003 	case SIOCGVNETID:
3004 		ifr->ifr_vnetid = letoh16(sc->sc_tunnel_id);
3005 		break;
3006 
3007 	case SIOCSLIFPHYADDR:
3008 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3009 			error = EBUSY;
3010 			break;
3011 		}
3012 
3013 		error = gre_set_tunnel(&sc->sc_tunnel,
3014 		    (struct if_laddrreq *)data, 1);
3015 		break;
3016 	case SIOCGLIFPHYADDR:
3017 		error = gre_get_tunnel(&sc->sc_tunnel,
3018 		    (struct if_laddrreq *)data);
3019 		break;
3020 	case SIOCDIFPHYADDR:
3021 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3022 			error = EBUSY;
3023 			break;
3024 		}
3025 
3026 		error = gre_del_tunnel(&sc->sc_tunnel);
3027 		break;
3028 
3029 	case SIOCSLIFPHYRTABLE:
3030 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3031 			error = EBUSY;
3032 			break;
3033 		}
3034 
3035 		if (ifr->ifr_rdomainid < 0 ||
3036 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
3037 		    !rtable_exists(ifr->ifr_rdomainid)) {
3038 			error = EINVAL;
3039 			break;
3040 		}
3041 		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
3042 		break;
3043 	case SIOCGLIFPHYRTABLE:
3044 		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
3045 		break;
3046 
3047 	case SIOCSLIFPHYTTL:
3048 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
3049 			error = EINVAL;
3050 			break;
3051 		}
3052 
3053 		/* commit */
3054 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
3055 		break;
3056 	case SIOCGLIFPHYTTL:
3057 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
3058 		break;
3059 
3060 	case SIOCSLIFPHYDF:
3061 		/* commit */
3062 		sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
3063 		break;
3064 	case SIOCGLIFPHYDF:
3065 		ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
3066 		break;
3067 
3068 	case SIOCSTXHPRIO:
3069 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
3070 		if (error != 0)
3071 			break;
3072 
3073 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
3074 		break;
3075 	case SIOCGTXHPRIO:
3076 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
3077 		break;
3078 
3079 	case SIOCSRXHPRIO:
3080 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
3081 		if (error != 0)
3082 			break;
3083 
3084 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
3085 		break;
3086 	case SIOCGRXHPRIO:
3087 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
3088 		break;
3089 
3090 	case SIOCADDMULTI:
3091 	case SIOCDELMULTI:
3092 		break;
3093 
3094 	default:
3095 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
3096 		break;
3097 	}
3098 
3099 	if (error == ENETRESET) {
3100 		/* no hardware to program */
3101 		error = 0;
3102 	}
3103 
3104 	return (error);
3105 }
3106 
3107 static int
3108 gre_up(struct gre_softc *sc)
3109 {
3110 	NET_ASSERT_LOCKED();
3111 	SET(sc->sc_if.if_flags, IFF_RUNNING);
3112 
3113 	if (sc->sc_ka_state != GRE_KA_NONE)
3114 		gre_keepalive_send(sc);
3115 
3116 	return (0);
3117 }
3118 
3119 static int
3120 gre_down(struct gre_softc *sc)
3121 {
3122 	NET_ASSERT_LOCKED();
3123 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
3124 
3125 	if (sc->sc_ka_state != GRE_KA_NONE) {
3126 		timeout_del_barrier(&sc->sc_ka_hold);
3127 		timeout_del_barrier(&sc->sc_ka_send);
3128 
3129 		sc->sc_ka_state = GRE_KA_DOWN;
3130 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
3131 	}
3132 
3133 	return (0);
3134 }
3135 
3136 static void
3137 gre_link_state(struct ifnet *ifp, unsigned int state)
3138 {
3139 	int link_state = LINK_STATE_UNKNOWN;
3140 
3141 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3142 		switch (state) {
3143 		case GRE_KA_NONE:
3144 			/* maybe up? or down? it's unknown, really */
3145 			break;
3146 		case GRE_KA_UP:
3147 			link_state = LINK_STATE_UP;
3148 			break;
3149 		default:
3150 			link_state = LINK_STATE_KALIVE_DOWN;
3151 			break;
3152 		}
3153 	}
3154 
3155 	if (ifp->if_link_state != link_state) {
3156 		ifp->if_link_state = link_state;
3157 		if_link_state_change(ifp);
3158 	}
3159 }
3160 
3161 static void
3162 gre_keepalive_send(void *arg)
3163 {
3164 	struct gre_tunnel t;
3165 	struct gre_softc *sc = arg;
3166 	struct mbuf *m;
3167 	struct gre_keepalive *gk;
3168 	SIPHASH_CTX ctx;
3169 	int linkhdr, len;
3170 	uint16_t proto;
3171 	uint8_t ttl;
3172 	uint8_t tos;
3173 
3174 	/*
3175 	 * re-schedule immediately, so we deal with incomplete configuation
3176 	 * or temporary errors.
3177 	 */
3178 	if (sc->sc_ka_timeo)
3179 		timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
3180 
3181 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
3182 	    sc->sc_ka_state == GRE_KA_NONE ||
3183 	    sc->sc_tunnel.t_af == AF_UNSPEC ||
3184 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
3185 		return;
3186 
3187 	/* this is really conservative */
3188 #ifdef INET6
3189 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
3190 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
3191 #else
3192 	linkhdr = max_linkhdr + sizeof(struct ip) +
3193 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
3194 #endif
3195 	len = linkhdr + sizeof(*gk);
3196 
3197 	MGETHDR(m, M_DONTWAIT, MT_DATA);
3198 	if (m == NULL)
3199 		return;
3200 
3201 	if (len > MHLEN) {
3202 		MCLGETI(m, M_DONTWAIT, NULL, len);
3203 		if (!ISSET(m->m_flags, M_EXT)) {
3204 			m_freem(m);
3205 			return;
3206 		}
3207 	}
3208 
3209 	m->m_pkthdr.len = m->m_len = len;
3210 	m_adj(m, linkhdr);
3211 
3212 	/*
3213 	 * build the inside packet
3214 	 */
3215 	gk = mtod(m, struct gre_keepalive *);
3216 	htobem32(&gk->gk_uptime, sc->sc_ka_bias + ticks);
3217 	htobem32(&gk->gk_random, arc4random());
3218 
3219 	SipHash24_Init(&ctx, &sc->sc_ka_key);
3220 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
3221 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
3222 	SipHash24_Final(gk->gk_digest, &ctx);
3223 
3224 	ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl;
3225 
3226 	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
3227 	tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio));
3228 
3229 	t.t_af = sc->sc_tunnel.t_af;
3230 	t.t_df = sc->sc_tunnel.t_df;
3231 	t.t_src = sc->sc_tunnel.t_dst;
3232 	t.t_dst = sc->sc_tunnel.t_src;
3233 	t.t_key = sc->sc_tunnel.t_key;
3234 	t.t_key_mask = sc->sc_tunnel.t_key_mask;
3235 
3236 	m = gre_encap(&t, m, htons(0), ttl, tos);
3237 	if (m == NULL)
3238 		return;
3239 
3240 	switch (sc->sc_tunnel.t_af) {
3241 	case AF_INET: {
3242 		struct ip *ip;
3243 
3244 		ip = mtod(m, struct ip *);
3245 		ip->ip_id = htons(ip_randomid());
3246 		ip->ip_sum = 0;
3247 		ip->ip_sum = in_cksum(m, sizeof(*ip));
3248 
3249 		proto = htons(ETHERTYPE_IP);
3250 		break;
3251 	}
3252 #ifdef INET6
3253 	case AF_INET6:
3254 		proto = htons(ETHERTYPE_IPV6);
3255 		break;
3256 #endif
3257 	default:
3258 		m_freem(m);
3259 		return;
3260 	}
3261 
3262 	/*
3263 	 * put it in the tunnel
3264 	 */
3265 	m = gre_encap(&sc->sc_tunnel, m, proto, ttl, tos);
3266 	if (m == NULL)
3267 		return;
3268 
3269 	gre_ip_output(&sc->sc_tunnel, m);
3270 }
3271 
3272 static void
3273 gre_keepalive_hold(void *arg)
3274 {
3275 	struct gre_softc *sc = arg;
3276 	struct ifnet *ifp = &sc->sc_if;
3277 
3278 	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
3279 	    sc->sc_ka_state == GRE_KA_NONE)
3280 		return;
3281 
3282 	NET_LOCK();
3283 	sc->sc_ka_state = GRE_KA_DOWN;
3284 	gre_link_state(ifp, sc->sc_ka_state);
3285 	NET_UNLOCK();
3286 }
3287 
3288 static int
3289 gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req, int ucast)
3290 {
3291 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3292 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3293 	struct sockaddr_in *src4, *dst4;
3294 #ifdef INET6
3295 	struct sockaddr_in6 *src6, *dst6;
3296 	int error;
3297 #endif
3298 
3299 	/* sa_family and sa_len must be equal */
3300 	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
3301 		return (EINVAL);
3302 
3303 	/* validate */
3304 	switch (dst->sa_family) {
3305 	case AF_INET:
3306 		if (dst->sa_len != sizeof(*dst4))
3307 			return (EINVAL);
3308 
3309 		src4 = (struct sockaddr_in *)src;
3310 		if (in_nullhost(src4->sin_addr) ||
3311 		    IN_MULTICAST(src4->sin_addr.s_addr))
3312 			return (EINVAL);
3313 
3314 		dst4 = (struct sockaddr_in *)dst;
3315 		if (in_nullhost(dst4->sin_addr) ||
3316 		    (IN_MULTICAST(dst4->sin_addr.s_addr) != !ucast))
3317 			return (EINVAL);
3318 
3319 		tunnel->t_src4 = src4->sin_addr;
3320 		tunnel->t_dst4 = dst4->sin_addr;
3321 
3322 		break;
3323 #ifdef INET6
3324 	case AF_INET6:
3325 		if (dst->sa_len != sizeof(*dst6))
3326 			return (EINVAL);
3327 
3328 		src6 = (struct sockaddr_in6 *)src;
3329 		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
3330 		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
3331 			return (EINVAL);
3332 
3333 		dst6 = (struct sockaddr_in6 *)dst;
3334 		if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
3335 		    IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) != !ucast)
3336 			return (EINVAL);
3337 
3338 		if (src6->sin6_scope_id != dst6->sin6_scope_id)
3339 			return (EINVAL);
3340 
3341 		error = in6_embedscope(&tunnel->t_src6, src6, NULL);
3342 		if (error != 0)
3343 			return (error);
3344 
3345 		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL);
3346 		if (error != 0)
3347 			return (error);
3348 
3349 		break;
3350 #endif
3351 	default:
3352 		return (EAFNOSUPPORT);
3353 	}
3354 
3355 	/* commit */
3356 	tunnel->t_af = dst->sa_family;
3357 
3358 	return (0);
3359 }
3360 
3361 static int
3362 gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
3363 {
3364 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3365 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3366 	struct sockaddr_in *sin;
3367 #ifdef INET6 /* ifconfig already embeds the scopeid */
3368 	struct sockaddr_in6 *sin6;
3369 #endif
3370 
3371 	switch (tunnel->t_af) {
3372 	case AF_UNSPEC:
3373 		return (EADDRNOTAVAIL);
3374 	case AF_INET:
3375 		sin = (struct sockaddr_in *)src;
3376 		memset(sin, 0, sizeof(*sin));
3377 		sin->sin_family = AF_INET;
3378 		sin->sin_len = sizeof(*sin);
3379 		sin->sin_addr = tunnel->t_src4;
3380 
3381 		sin = (struct sockaddr_in *)dst;
3382 		memset(sin, 0, sizeof(*sin));
3383 		sin->sin_family = AF_INET;
3384 		sin->sin_len = sizeof(*sin);
3385 		sin->sin_addr = tunnel->t_dst4;
3386 
3387 		break;
3388 
3389 #ifdef INET6
3390 	case AF_INET6:
3391 		sin6 = (struct sockaddr_in6 *)src;
3392 		memset(sin6, 0, sizeof(*sin6));
3393 		sin6->sin6_family = AF_INET6;
3394 		sin6->sin6_len = sizeof(*sin6);
3395 		in6_recoverscope(sin6, &tunnel->t_src6);
3396 
3397 		sin6 = (struct sockaddr_in6 *)dst;
3398 		memset(sin6, 0, sizeof(*sin6));
3399 		sin6->sin6_family = AF_INET6;
3400 		sin6->sin6_len = sizeof(*sin6);
3401 		in6_recoverscope(sin6, &tunnel->t_dst6);
3402 
3403 		break;
3404 #endif
3405 	default:
3406 		return (EAFNOSUPPORT);
3407 	}
3408 
3409 	return (0);
3410 }
3411 
3412 static int
3413 gre_del_tunnel(struct gre_tunnel *tunnel)
3414 {
3415 	/* commit */
3416 	tunnel->t_af = AF_UNSPEC;
3417 
3418 	return (0);
3419 }
3420 
3421 static int
3422 gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3423 {
3424 	uint32_t key;
3425 	uint32_t min = GRE_KEY_MIN;
3426 	uint32_t max = GRE_KEY_MAX;
3427 	unsigned int shift = GRE_KEY_SHIFT;
3428 	uint32_t mask = GRE_KEY_MASK;
3429 
3430 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
3431 		min = GRE_KEY_ENTROPY_MIN;
3432 		max = GRE_KEY_ENTROPY_MAX;
3433 		shift = GRE_KEY_ENTROPY_SHIFT;
3434 		mask = GRE_KEY_ENTROPY;
3435 	}
3436 
3437 	if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max)
3438 		return (EINVAL);
3439 
3440 	key = htonl(ifr->ifr_vnetid << shift);
3441 
3442 	/* commit */
3443 	tunnel->t_key_mask = mask;
3444 	tunnel->t_key = key;
3445 
3446 	return (0);
3447 }
3448 
3449 static int
3450 gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3451 {
3452 	int shift;
3453 
3454 	switch (tunnel->t_key_mask) {
3455 	case GRE_KEY_NONE:
3456 		return (EADDRNOTAVAIL);
3457 	case GRE_KEY_ENTROPY:
3458 		shift = GRE_KEY_ENTROPY_SHIFT;
3459 		break;
3460 	case GRE_KEY_MASK:
3461 		shift = GRE_KEY_SHIFT;
3462 		break;
3463 	}
3464 
3465 	ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift;
3466 
3467 	return (0);
3468 }
3469 
3470 static int
3471 gre_del_vnetid(struct gre_tunnel *tunnel)
3472 {
3473 	tunnel->t_key_mask = GRE_KEY_NONE;
3474 
3475 	return (0);
3476 }
3477 
3478 static int
3479 gre_set_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3480 {
3481 	uint32_t mask, key;
3482 
3483 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3484 		return (EADDRNOTAVAIL);
3485 
3486 	mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK;
3487 	if (tunnel->t_key_mask == mask) {
3488 		/* nop */
3489 		return (0);
3490 	}
3491 
3492 	key = ntohl(tunnel->t_key);
3493 	if (mask == GRE_KEY_ENTROPY) {
3494 		if (key > GRE_KEY_ENTROPY_MAX)
3495 			return (ERANGE);
3496 
3497 		key = htonl(key << GRE_KEY_ENTROPY_SHIFT);
3498 	} else
3499 		key = htonl(key >> GRE_KEY_ENTROPY_SHIFT);
3500 
3501 	/* commit */
3502 	tunnel->t_key_mask = mask;
3503 	tunnel->t_key = key;
3504 
3505 	return (0);
3506 }
3507 
3508 static int
3509 gre_get_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3510 {
3511 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3512 		return (EADDRNOTAVAIL);
3513 
3514 	ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY;
3515 
3516 	return (0);
3517 }
3518 
3519 static int
3520 mgre_up(struct mgre_softc *sc)
3521 {
3522 	unsigned int hlen;
3523 
3524 	switch (sc->sc_tunnel.t_af) {
3525 	case AF_UNSPEC:
3526 		return (EDESTADDRREQ);
3527 	case AF_INET:
3528 		hlen = sizeof(struct ip);
3529 		break;
3530 #ifdef INET6
3531 	case AF_INET6:
3532 		hlen = sizeof(struct ip6_hdr);
3533 		break;
3534 #endif /* INET6 */
3535 	default:
3536 		unhandled_af(sc->sc_tunnel.t_af);
3537 	}
3538 
3539 	hlen += sizeof(struct gre_header);
3540 	if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE)
3541 		hlen += sizeof(struct gre_h_key);
3542 
3543 	NET_ASSERT_LOCKED();
3544 
3545 	if (RBT_INSERT(mgre_tree, &mgre_tree, sc) != NULL)
3546 		return (EADDRINUSE);
3547 
3548 	sc->sc_if.if_hdrlen = hlen;
3549 	SET(sc->sc_if.if_flags, IFF_RUNNING);
3550 
3551 	return (0);
3552 }
3553 
3554 static int
3555 mgre_down(struct mgre_softc *sc)
3556 {
3557 	NET_ASSERT_LOCKED();
3558 
3559 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
3560 	sc->sc_if.if_hdrlen = GRE_HDRLEN; /* symmetry */
3561 
3562 	RBT_REMOVE(mgre_tree, &mgre_tree, sc);
3563 
3564 	/* barrier? */
3565 
3566 	return (0);
3567 }
3568 
3569 static int
3570 egre_up(struct egre_softc *sc)
3571 {
3572 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3573 		return (EDESTADDRREQ);
3574 
3575 	NET_ASSERT_LOCKED();
3576 
3577 	if (RBT_INSERT(egre_tree, &egre_tree, sc) != NULL)
3578 		return (EADDRINUSE);
3579 
3580 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3581 
3582 	return (0);
3583 }
3584 
3585 static int
3586 egre_down(struct egre_softc *sc)
3587 {
3588 	NET_ASSERT_LOCKED();
3589 
3590 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3591 
3592 	RBT_REMOVE(egre_tree, &egre_tree, sc);
3593 
3594 	/* barrier? */
3595 
3596 	return (0);
3597 }
3598 
3599 static int
3600 egre_media_change(struct ifnet *ifp)
3601 {
3602 	return (ENOTTY);
3603 }
3604 
3605 static void
3606 egre_media_status(struct ifnet *ifp, struct ifmediareq *imr)
3607 {
3608 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
3609 	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
3610 }
3611 
3612 static int
3613 nvgre_up(struct nvgre_softc *sc)
3614 {
3615 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3616 	struct ifnet *ifp0;
3617 	void *inm;
3618 	int error;
3619 
3620 	if (tunnel->t_af == AF_UNSPEC)
3621 		return (EDESTADDRREQ);
3622 
3623 	ifp0 = if_get(sc->sc_ifp0);
3624 	if (ifp0 == NULL)
3625 		return (ENXIO);
3626 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3627 		error = ENODEV;
3628 		goto put;
3629 	}
3630 
3631 	NET_ASSERT_LOCKED();
3632 
3633 	if (RBT_INSERT(nvgre_mcast_tree, &nvgre_mcast_tree, sc) != NULL) {
3634 		error = EADDRINUSE;
3635 		goto put;
3636 	}
3637 	if (RBT_INSERT(nvgre_ucast_tree, &nvgre_ucast_tree, sc) != NULL) {
3638 		error = EADDRINUSE;
3639 		goto remove_mcast;
3640 	}
3641 
3642 	switch (tunnel->t_af) {
3643 	case AF_INET:
3644 		inm = in_addmulti(&tunnel->t_dst4, ifp0);
3645 		if (inm == NULL) {
3646 			error = ECONNABORTED;
3647 			goto remove_ucast;
3648 		}
3649 		break;
3650 #ifdef INET6
3651 	case AF_INET6:
3652 		inm = in6_addmulti(&tunnel->t_dst6, ifp0, &error);
3653 		if (inm == NULL) {
3654 			/* error is already set */
3655 			goto remove_ucast;
3656 		}
3657 		break;
3658 #endif /* INET6 */
3659 	default:
3660 		unhandled_af(tunnel->t_af);
3661 	}
3662 
3663 	if_linkstatehook_add(ifp0, &sc->sc_ltask);
3664 	if_detachhook_add(ifp0, &sc->sc_dtask);
3665 
3666 	if_put(ifp0);
3667 
3668 	sc->sc_inm = inm;
3669 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3670 
3671 	timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO);
3672 
3673 	return (0);
3674 
3675 remove_ucast:
3676 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3677 remove_mcast:
3678 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3679 put:
3680 	if_put(ifp0);
3681 	return (error);
3682 }
3683 
3684 static int
3685 nvgre_down(struct nvgre_softc *sc)
3686 {
3687 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3688 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3689 	struct taskq *softnet = net_tq(ifp->if_index);
3690 	struct ifnet *ifp0;
3691 
3692 	NET_ASSERT_LOCKED();
3693 
3694 	CLR(ifp->if_flags, IFF_RUNNING);
3695 
3696 	NET_UNLOCK();
3697 	timeout_del_barrier(&sc->sc_ether_age);
3698 	ifq_barrier(&ifp->if_snd);
3699 	if (!task_del(softnet, &sc->sc_send_task))
3700 		taskq_barrier(softnet);
3701 	NET_LOCK();
3702 
3703 	mq_purge(&sc->sc_send_list);
3704 
3705 	ifp0 = if_get(sc->sc_ifp0);
3706 	if (ifp0 != NULL) {
3707 		if_detachhook_del(ifp0, &sc->sc_dtask);
3708 		if_linkstatehook_del(ifp0, &sc->sc_ltask);
3709 	}
3710 	if_put(ifp0);
3711 
3712 	switch (tunnel->t_af) {
3713 	case AF_INET:
3714 		in_delmulti(sc->sc_inm);
3715 		break;
3716 
3717 #ifdef INET6
3718 	case AF_INET6:
3719 		in6_delmulti(sc->sc_inm);
3720 		break;
3721 #endif
3722 	default:
3723 		unhandled_af(tunnel->t_af);
3724 	}
3725 
3726 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3727 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3728 
3729 	return (0);
3730 }
3731 
3732 static void
3733 nvgre_link_change(void *arg)
3734 {
3735 	/* nop */
3736 }
3737 
3738 static void
3739 nvgre_detach(void *arg)
3740 {
3741 	struct nvgre_softc *sc = arg;
3742 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3743 
3744 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3745 		nvgre_down(sc);
3746 		if_down(ifp);
3747 	}
3748 
3749 	sc->sc_ifp0 = 0;
3750 }
3751 
3752 static int
3753 nvgre_set_parent(struct nvgre_softc *sc, const char *parent)
3754 {
3755 	struct ifnet *ifp0;
3756 
3757 	ifp0 = ifunit(parent); /* doesn't need an if_put */
3758 	if (ifp0 == NULL)
3759 		return (EINVAL);
3760 
3761 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST))
3762 		return (EPROTONOSUPPORT);
3763 
3764 	/* commit */
3765 	sc->sc_ifp0 = ifp0->if_index;
3766 
3767 	return (0);
3768 }
3769 
3770 static void
3771 nvgre_age(void *arg)
3772 {
3773 	struct nvgre_softc *sc = arg;
3774 	struct nvgre_entry *nv, *nnv;
3775 	int tmo = sc->sc_ether_tmo * 2;
3776 	int diff;
3777 
3778 	if (!ISSET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING))
3779 		return;
3780 
3781 	rw_enter_write(&sc->sc_ether_lock); /* XXX */
3782 	RBT_FOREACH_SAFE(nv, nvgre_map, &sc->sc_ether_map, nnv) {
3783 		if (nv->nv_type != NVGRE_ENTRY_DYNAMIC)
3784 			continue;
3785 
3786 		diff = ticks - nv->nv_age;
3787 		if (diff < tmo)
3788 			continue;
3789 
3790 		sc->sc_ether_num--;
3791 		RBT_REMOVE(nvgre_map, &sc->sc_ether_map, nv);
3792 		if (refcnt_rele(&nv->nv_refs))
3793 			pool_put(&nvgre_pool, nv);
3794 	}
3795 	rw_exit_write(&sc->sc_ether_lock);
3796 
3797 	timeout_add_sec(&sc->sc_ether_age, NVGRE_AGE_TMO);
3798 }
3799 
3800 static inline int
3801 nvgre_entry_valid(struct nvgre_softc *sc, const struct nvgre_entry *nv)
3802 {
3803 	int diff;
3804 
3805 	if (nv == NULL)
3806 		return (0);
3807 
3808 	if (nv->nv_type == NVGRE_ENTRY_STATIC)
3809 		return (1);
3810 
3811 	diff = ticks - nv->nv_age;
3812 	if (diff < sc->sc_ether_tmo)
3813 		return (1);
3814 
3815 	return (0);
3816 }
3817 
3818 static void
3819 nvgre_start(struct ifnet *ifp)
3820 {
3821 	struct nvgre_softc *sc = ifp->if_softc;
3822 	const struct gre_tunnel *tunnel = &sc->sc_tunnel;
3823 	union gre_addr gateway;
3824 	struct nvgre_entry *nv, key;
3825 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
3826 	struct ether_header *eh;
3827 	struct mbuf *m, *m0;
3828 #if NBPFILTER > 0
3829 	caddr_t if_bpf;
3830 #endif
3831 
3832 	if (!gre_allow) {
3833 		ifq_purge(&ifp->if_snd);
3834 		return;
3835 	}
3836 
3837 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3838 #if NBPFILTER > 0
3839 		if_bpf = ifp->if_bpf;
3840 		if (if_bpf)
3841 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3842 #endif
3843 
3844 		eh = mtod(m0, struct ether_header *);
3845 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
3846 			gateway = tunnel->t_dst;
3847 		else {
3848 			memcpy(&key.nv_dst, eh->ether_dhost,
3849 			    sizeof(key.nv_dst));
3850 
3851 			rw_enter_read(&sc->sc_ether_lock);
3852 			nv = RBT_FIND(nvgre_map, &sc->sc_ether_map, &key);
3853 			if (nvgre_entry_valid(sc, nv))
3854 				gateway = nv->nv_gateway;
3855 			else {
3856 				/* "flood" to unknown hosts */
3857 				gateway = tunnel->t_dst;
3858 			}
3859 			rw_exit_read(&sc->sc_ether_lock);
3860 		}
3861 
3862 		/* force prepend mbuf because of alignment problems */
3863 		m = m_get(M_DONTWAIT, m0->m_type);
3864 		if (m == NULL) {
3865 			m_freem(m0);
3866 			continue;
3867 		}
3868 
3869 		M_MOVE_PKTHDR(m, m0);
3870 		m->m_next = m0;
3871 
3872 		m_align(m, 0);
3873 		m->m_len = 0;
3874 
3875 		m = gre_encap_dst(tunnel, &gateway, m,
3876 		    htons(ETHERTYPE_TRANSETHER),
3877 		    tunnel->t_ttl, gre_l2_tos(tunnel, m));
3878 		if (m == NULL)
3879 			continue;
3880 
3881 		m->m_flags &= ~(M_BCAST|M_MCAST);
3882 		m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
3883 
3884 #if NPF > 0
3885 		pf_pkt_addr_changed(m);
3886 #endif
3887 
3888 		ml_enqueue(&ml, m);
3889 	}
3890 
3891 	if (!ml_empty(&ml)) {
3892 		if (mq_enlist(&sc->sc_send_list, &ml) == 0)
3893 			task_add(net_tq(ifp->if_index), &sc->sc_send_task);
3894 		/* else set OACTIVE? */
3895 	}
3896 }
3897 
3898 static uint64_t
3899 nvgre_send4(struct nvgre_softc *sc, struct mbuf_list *ml)
3900 {
3901 	struct ip_moptions imo;
3902 	struct mbuf *m;
3903 	uint64_t oerrors = 0;
3904 
3905 	imo.imo_ifidx = sc->sc_ifp0;
3906 	imo.imo_ttl = sc->sc_tunnel.t_ttl;
3907 	imo.imo_loop = 0;
3908 
3909 	NET_LOCK();
3910 	while ((m = ml_dequeue(ml)) != NULL) {
3911 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
3912 			oerrors++;
3913 	}
3914 	NET_UNLOCK();
3915 
3916 	return (oerrors);
3917 }
3918 
3919 #ifdef INET6
3920 static uint64_t
3921 nvgre_send6(struct nvgre_softc *sc, struct mbuf_list *ml)
3922 {
3923 	struct ip6_moptions im6o;
3924 	struct mbuf *m;
3925 	uint64_t oerrors = 0;
3926 
3927 	im6o.im6o_ifidx = sc->sc_ifp0;
3928 	im6o.im6o_hlim = sc->sc_tunnel.t_ttl;
3929 	im6o.im6o_loop = 0;
3930 
3931 	NET_LOCK();
3932 	while ((m = ml_dequeue(ml)) != NULL) {
3933 		if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
3934 			oerrors++;
3935 	}
3936 	NET_UNLOCK();
3937 
3938 	return (oerrors);
3939 }
3940 #endif /* INET6 */
3941 
3942 static void
3943 nvgre_send(void *arg)
3944 {
3945 	struct nvgre_softc *sc = arg;
3946 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3947 	sa_family_t af = sc->sc_tunnel.t_af;
3948 	struct mbuf_list ml;
3949 	uint64_t oerrors;
3950 
3951 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3952 		return;
3953 
3954 	mq_delist(&sc->sc_send_list, &ml);
3955 	if (ml_empty(&ml))
3956 		return;
3957 
3958 	switch (af) {
3959 	case AF_INET:
3960 		oerrors = nvgre_send4(sc, &ml);
3961 		break;
3962 #ifdef INET6
3963 	case AF_INET6:
3964 		oerrors = nvgre_send6(sc, &ml);
3965 		break;
3966 #endif
3967 	default:
3968 		unhandled_af(af);
3969 		/* NOTREACHED */
3970 	}
3971 
3972 	ifp->if_oerrors += oerrors; /* XXX should be ifq_oerrors */
3973 }
3974 
3975 static int
3976 eoip_up(struct eoip_softc *sc)
3977 {
3978 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3979 		return (EDESTADDRREQ);
3980 
3981 	NET_ASSERT_LOCKED();
3982 
3983 	if (RBT_INSERT(eoip_tree, &eoip_tree, sc) != NULL)
3984 		return (EADDRINUSE);
3985 
3986 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3987 
3988 	if (sc->sc_ka_state != GRE_KA_NONE) {
3989 		sc->sc_ka_holdmax = sc->sc_ka_count;
3990 		eoip_keepalive_send(sc);
3991 	}
3992 
3993 	return (0);
3994 }
3995 
3996 static int
3997 eoip_down(struct eoip_softc *sc)
3998 {
3999 	NET_ASSERT_LOCKED();
4000 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
4001 
4002 	if (sc->sc_ka_state != GRE_KA_NONE) {
4003 		timeout_del_barrier(&sc->sc_ka_hold);
4004 		timeout_del_barrier(&sc->sc_ka_send);
4005 
4006 		sc->sc_ka_state = GRE_KA_DOWN;
4007 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
4008 	}
4009 
4010 	RBT_REMOVE(eoip_tree, &eoip_tree, sc);
4011 
4012 	return (0);
4013 }
4014 
4015 static void
4016 eoip_start(struct ifnet *ifp)
4017 {
4018 	struct eoip_softc *sc = ifp->if_softc;
4019 	struct mbuf *m0, *m;
4020 #if NBPFILTER > 0
4021 	caddr_t if_bpf;
4022 #endif
4023 
4024 	if (!gre_allow) {
4025 		ifq_purge(&ifp->if_snd);
4026 		return;
4027 	}
4028 
4029 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
4030 #if NBPFILTER > 0
4031 		if_bpf = ifp->if_bpf;
4032 		if (if_bpf)
4033 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
4034 #endif
4035 
4036 		/* force prepend mbuf because of alignment problems */
4037 		m = m_get(M_DONTWAIT, m0->m_type);
4038 		if (m == NULL) {
4039 			m_freem(m0);
4040 			continue;
4041 		}
4042 
4043 		M_MOVE_PKTHDR(m, m0);
4044 		m->m_next = m0;
4045 
4046 		m_align(m, 0);
4047 		m->m_len = 0;
4048 
4049 		m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
4050 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
4051 			ifp->if_oerrors++;
4052 			continue;
4053 		}
4054 	}
4055 }
4056 
4057 static struct mbuf *
4058 eoip_encap(struct eoip_softc *sc, struct mbuf *m, uint8_t tos)
4059 {
4060 	struct gre_header *gh;
4061 	struct gre_h_key_eoip *eoiph;
4062 	int len = m->m_pkthdr.len;
4063 
4064 	m = m_prepend(m, sizeof(*gh) + sizeof(*eoiph), M_DONTWAIT);
4065 	if (m == NULL)
4066 		return (NULL);
4067 
4068 	gh = mtod(m, struct gre_header *);
4069 	gh->gre_flags = htons(GRE_VERS_1 | GRE_KP);
4070 	gh->gre_proto = htons(GRE_EOIP);
4071 
4072 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
4073 	htobem16(&eoiph->eoip_len, len);
4074 	eoiph->eoip_tunnel_id = sc->sc_tunnel_id;
4075 
4076 	return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos));
4077 }
4078 
4079 static void
4080 eoip_keepalive_send(void *arg)
4081 {
4082 	struct eoip_softc *sc = arg;
4083 	struct ifnet *ifp = &sc->sc_ac.ac_if;
4084 	struct mbuf *m;
4085 	int linkhdr;
4086 
4087 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
4088 		return;
4089 
4090 	/* this is really conservative */
4091 #ifdef INET6
4092 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
4093 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
4094 #else
4095 	linkhdr = max_linkhdr + sizeof(struct ip) +
4096 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
4097 #endif
4098 	MGETHDR(m, M_DONTWAIT, MT_DATA);
4099 	if (m == NULL)
4100 		return;
4101 
4102 	if (linkhdr > MHLEN) {
4103 		MCLGETI(m, M_DONTWAIT, NULL, linkhdr);
4104 		if (!ISSET(m->m_flags, M_EXT)) {
4105 			m_freem(m);
4106 			return;
4107 		}
4108 	}
4109 
4110 	m->m_pkthdr.pf.prio = ifp->if_llprio;
4111 	m->m_pkthdr.len = m->m_len = linkhdr;
4112 	m_adj(m, linkhdr);
4113 
4114 	m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
4115 	if (m == NULL)
4116 		return;
4117 
4118 	gre_ip_output(&sc->sc_tunnel, m);
4119 
4120 	timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
4121 }
4122 
4123 static void
4124 eoip_keepalive_hold(void *arg)
4125 {
4126 	struct eoip_softc *sc = arg;
4127 	struct ifnet *ifp = &sc->sc_ac.ac_if;
4128 
4129 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
4130 		return;
4131 
4132 	NET_LOCK();
4133 	sc->sc_ka_state = GRE_KA_DOWN;
4134 	gre_link_state(ifp, sc->sc_ka_state);
4135 	NET_UNLOCK();
4136 }
4137 
4138 static void
4139 eoip_keepalive_recv(struct eoip_softc *sc)
4140 {
4141 	switch (sc->sc_ka_state) {
4142 	case GRE_KA_NONE:
4143 		return;
4144 	case GRE_KA_DOWN:
4145 		sc->sc_ka_state = GRE_KA_HOLD;
4146 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
4147 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
4148 		    16 * sc->sc_ka_count);
4149 		break;
4150 	case GRE_KA_HOLD:
4151 		if (--sc->sc_ka_holdcnt > 0)
4152 			break;
4153 
4154 		sc->sc_ka_state = GRE_KA_UP;
4155 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
4156 		break;
4157 
4158 	case GRE_KA_UP:
4159 		sc->sc_ka_holdmax--;
4160 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
4161 		break;
4162 	}
4163 
4164 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
4165 }
4166 
4167 static struct mbuf *
4168 eoip_input(struct gre_tunnel *key, struct mbuf *m,
4169     const struct gre_header *gh, uint8_t otos, int iphlen)
4170 {
4171 	struct eoip_softc *sc;
4172 	struct gre_h_key_eoip *eoiph;
4173 	int hlen, len;
4174 	caddr_t buf;
4175 
4176 	if (gh->gre_flags != htons(GRE_KP | GRE_VERS_1))
4177 		goto decline;
4178 
4179 	hlen = iphlen + sizeof(*gh) + sizeof(*eoiph);
4180 	if (m->m_pkthdr.len < hlen)
4181 		goto decline;
4182 
4183 	m = m_pullup(m, hlen);
4184 	if (m == NULL)
4185 		return (NULL);
4186 
4187 	buf = mtod(m, caddr_t);
4188 	gh = (struct gre_header *)(buf + iphlen);
4189 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
4190 
4191 	key->t_key = eoiph->eoip_tunnel_id;
4192 
4193 	NET_ASSERT_LOCKED();
4194 	sc = RBT_FIND(eoip_tree, &eoip_tree, (const struct eoip_softc *)key);
4195 	if (sc == NULL)
4196 		goto decline;
4197 
4198 	/* it's ours now */
4199 	len = bemtoh16(&eoiph->eoip_len);
4200 	if (len == 0) {
4201 		eoip_keepalive_recv(sc);
4202 		goto drop;
4203 	}
4204 
4205 	m = gre_ether_align(m, hlen);
4206 	if (m == NULL)
4207 		return (NULL);
4208 
4209 	if (m->m_pkthdr.len < len)
4210 		goto drop;
4211 	if (m->m_pkthdr.len != len)
4212 		m_adj(m, len - m->m_pkthdr.len);
4213 
4214 	gre_l2_prio(&sc->sc_tunnel, m, otos);
4215 
4216 	m->m_flags &= ~(M_MCAST|M_BCAST);
4217 
4218 #if NPF > 0
4219 	pf_pkt_addr_changed(m);
4220 #endif
4221 
4222 	if_vinput(&sc->sc_ac.ac_if, m);
4223 
4224 	return (NULL);
4225 
4226 decline:
4227 	return (m);
4228 drop:
4229 	m_freem(m);
4230 	return (NULL);
4231 }
4232 
4233 int
4234 gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
4235     size_t newlen)
4236 {
4237 	int error;
4238 
4239 	/* All sysctl names at this level are terminal. */
4240 	if (namelen != 1)
4241 		return (ENOTDIR);
4242 
4243 	switch (name[0]) {
4244 	case GRECTL_ALLOW:
4245 		NET_LOCK();
4246 		error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_allow);
4247 		NET_UNLOCK();
4248 		return (error);
4249 	case GRECTL_WCCP:
4250 		NET_LOCK();
4251 		error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_wccp);
4252 		NET_UNLOCK();
4253 		return (error);
4254 	default:
4255 		return (ENOPROTOOPT);
4256 	}
4257 	/* NOTREACHED */
4258 }
4259 
4260 static inline int
4261 gre_ip_cmp(int af, const union gre_addr *a, const union gre_addr *b)
4262 {
4263 	switch (af) {
4264 #ifdef INET6
4265 	case AF_INET6:
4266 		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
4267 #endif /* INET6 */
4268 	case AF_INET:
4269 		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
4270 	default:
4271 		unhandled_af(af);
4272 	}
4273 
4274 	return (0);
4275 }
4276 
4277 static int
4278 gre_cmp_src(const struct gre_tunnel *a, const struct gre_tunnel *b)
4279 {
4280 	uint32_t ka, kb;
4281 	uint32_t mask;
4282 	int rv;
4283 
4284 	/* is K set at all? */
4285 	ka = a->t_key_mask & GRE_KEY_ENTROPY;
4286 	kb = b->t_key_mask & GRE_KEY_ENTROPY;
4287 
4288 	/* sort by whether K is set */
4289 	if (ka > kb)
4290 		return (1);
4291 	if (ka < kb)
4292 		return (-1);
4293 
4294 	/* is K set on both? */
4295 	if (ka != GRE_KEY_NONE) {
4296 		/* get common prefix */
4297 		mask = a->t_key_mask & b->t_key_mask;
4298 
4299 		ka = a->t_key & mask;
4300 		kb = b->t_key & mask;
4301 
4302 		/* sort by common prefix */
4303 		if (ka > kb)
4304 			return (1);
4305 		if (ka < kb)
4306 			return (-1);
4307 	}
4308 
4309 	/* sort by routing table */
4310 	if (a->t_rtableid > b->t_rtableid)
4311 		return (1);
4312 	if (a->t_rtableid < b->t_rtableid)
4313 		return (-1);
4314 
4315 	/* sort by address */
4316 	if (a->t_af > b->t_af)
4317 		return (1);
4318 	if (a->t_af < b->t_af)
4319 		return (-1);
4320 
4321 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4322 	if (rv != 0)
4323 		return (rv);
4324 
4325 	return (0);
4326 }
4327 
4328 static int
4329 gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
4330 {
4331 	int rv;
4332 
4333 	rv = gre_cmp_src(a, b);
4334 	if (rv != 0)
4335 		return (rv);
4336 
4337 	return (gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst));
4338 }
4339 
4340 static inline int
4341 mgre_cmp(const struct mgre_softc *a, const struct mgre_softc *b)
4342 {
4343 	return (gre_cmp_src(&a->sc_tunnel, &b->sc_tunnel));
4344 }
4345 
4346 RBT_GENERATE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
4347 
4348 static inline int
4349 egre_cmp(const struct egre_softc *a, const struct egre_softc *b)
4350 {
4351 	return (gre_cmp(&a->sc_tunnel, &b->sc_tunnel));
4352 }
4353 
4354 RBT_GENERATE(egre_tree, egre_softc, sc_entry, egre_cmp);
4355 
4356 static inline int
4357 nvgre_entry_cmp(const struct nvgre_entry *a, const struct nvgre_entry *b)
4358 {
4359 	return (memcmp(&a->nv_dst, &b->nv_dst, sizeof(a->nv_dst)));
4360 }
4361 
4362 RBT_GENERATE(nvgre_map, nvgre_entry, nv_entry, nvgre_entry_cmp);
4363 
4364 static int
4365 nvgre_cmp_tunnel(const struct gre_tunnel *a, const struct gre_tunnel *b)
4366 {
4367 	uint32_t ka, kb;
4368 
4369 	ka = a->t_key & GRE_KEY_ENTROPY;
4370 	kb = b->t_key & GRE_KEY_ENTROPY;
4371 
4372 	/* sort by common prefix */
4373 	if (ka > kb)
4374 		return (1);
4375 	if (ka < kb)
4376 		return (-1);
4377 
4378 	/* sort by routing table */
4379 	if (a->t_rtableid > b->t_rtableid)
4380 		return (1);
4381 	if (a->t_rtableid < b->t_rtableid)
4382 		return (-1);
4383 
4384 	/* sort by address */
4385 	if (a->t_af > b->t_af)
4386 		return (1);
4387 	if (a->t_af < b->t_af)
4388 		return (-1);
4389 
4390 	return (0);
4391 }
4392 
4393 static inline int
4394 nvgre_cmp_ucast(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4395 {
4396 	const struct gre_tunnel *a = &na->sc_tunnel;
4397 	const struct gre_tunnel *b = &nb->sc_tunnel;
4398 	int rv;
4399 
4400 	rv = nvgre_cmp_tunnel(a, b);
4401 	if (rv != 0)
4402 		return (rv);
4403 
4404 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4405 	if (rv != 0)
4406 		return (rv);
4407 
4408 	return (0);
4409 }
4410 
4411 static int
4412 nvgre_cmp_mcast(const struct gre_tunnel *a, const union gre_addr *aa,
4413     unsigned int if0idxa, const struct gre_tunnel *b,
4414     const union gre_addr *ab,unsigned int if0idxb)
4415 {
4416 	int rv;
4417 
4418 	rv = nvgre_cmp_tunnel(a, b);
4419 	if (rv != 0)
4420 		return (rv);
4421 
4422 	rv = gre_ip_cmp(a->t_af, aa, ab);
4423 	if (rv != 0)
4424 		return (rv);
4425 
4426 	if (if0idxa > if0idxb)
4427 		return (1);
4428 	if (if0idxa < if0idxb)
4429 		return (-1);
4430 
4431 	return (0);
4432 }
4433 
4434 static inline int
4435 nvgre_cmp_mcast_sc(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4436 {
4437 	const struct gre_tunnel *a = &na->sc_tunnel;
4438 	const struct gre_tunnel *b = &nb->sc_tunnel;
4439 
4440 	return (nvgre_cmp_mcast(a, &a->t_dst, na->sc_ifp0,
4441 	    b, &b->t_dst, nb->sc_ifp0));
4442 }
4443 
4444 RBT_GENERATE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
4445 RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
4446 
4447 static inline int
4448 eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
4449 {
4450 	const struct gre_tunnel *a = &ea->sc_tunnel;
4451 	const struct gre_tunnel *b = &eb->sc_tunnel;
4452 	int rv;
4453 
4454 	if (a->t_key > b->t_key)
4455 		return (1);
4456 	if (a->t_key < b->t_key)
4457 		return (-1);
4458 
4459 	/* sort by routing table */
4460 	if (a->t_rtableid > b->t_rtableid)
4461 		return (1);
4462 	if (a->t_rtableid < b->t_rtableid)
4463 		return (-1);
4464 
4465 	/* sort by address */
4466 	if (a->t_af > b->t_af)
4467 		return (1);
4468 	if (a->t_af < b->t_af)
4469 		return (-1);
4470 
4471 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4472 	if (rv != 0)
4473 		return (rv);
4474 
4475 	rv = gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
4476 	if (rv != 0)
4477 		return (rv);
4478 
4479 	return (0);
4480 }
4481 
4482 RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
4483