xref: /openbsd/sys/net/if_gre.c (revision 097a140d)
1 /*	$OpenBSD: if_gre.c,v 1.171 2021/03/10 10:21:47 jsg Exp $ */
2 /*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
3 
4 /*
5  * Copyright (c) 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Heiko W.Rupp <hwr@pilhuhn.de>
10  *
11  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
37  * See gre(4) for more details.
38  * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
39  */
40 
41 #include "bpfilter.h"
42 #include "pf.h"
43 
44 #include <sys/param.h>
45 #include <sys/mbuf.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/kernel.h>
49 #include <sys/systm.h>
50 #include <sys/errno.h>
51 #include <sys/timeout.h>
52 #include <sys/queue.h>
53 #include <sys/tree.h>
54 #include <sys/pool.h>
55 #include <sys/rwlock.h>
56 
57 #include <crypto/siphash.h>
58 
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_types.h>
62 #include <net/if_media.h>
63 #include <net/route.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/in_var.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/ip_ecn.h>
71 
72 #ifdef INET6
73 #include <netinet/ip6.h>
74 #include <netinet6/ip6_var.h>
75 #include <netinet6/in6_var.h>
76 #endif
77 
78 #ifdef PIPEX
79 #include <net/pipex.h>
80 #endif
81 
82 #ifdef MPLS
83 #include <netmpls/mpls.h>
84 #endif /* MPLS */
85 
86 #if NBPFILTER > 0
87 #include <net/bpf.h>
88 #endif
89 
90 #if NPF > 0
91 #include <net/pfvar.h>
92 #endif
93 
94 #include <net/if_gre.h>
95 
96 #include <netinet/ip_gre.h>
97 #include <sys/sysctl.h>
98 
99 /* for nvgre bridge shizz */
100 #include <sys/socket.h>
101 #include <net/if_bridge.h>
102 #include <net/if_etherbridge.h>
103 
104 /*
105  * packet formats
106  */
107 struct gre_header {
108 	uint16_t		gre_flags;
109 #define GRE_CP				0x8000  /* Checksum Present */
110 #define GRE_KP				0x2000  /* Key Present */
111 #define GRE_SP				0x1000  /* Sequence Present */
112 
113 #define GRE_VERS_MASK			0x0007
114 #define GRE_VERS_0			0x0000
115 #define GRE_VERS_1			0x0001
116 
117 	uint16_t		gre_proto;
118 } __packed __aligned(4);
119 
120 struct gre_h_cksum {
121 	uint16_t		gre_cksum;
122 	uint16_t		gre_reserved1;
123 } __packed __aligned(4);
124 
125 struct gre_h_key {
126 	uint32_t		gre_key;
127 } __packed __aligned(4);
128 
129 #define GRE_EOIP		0x6400
130 
131 struct gre_h_key_eoip {
132 	uint16_t		eoip_len;	/* network order */
133 	uint16_t		eoip_tunnel_id;	/* little endian */
134 } __packed __aligned(4);
135 
136 #define NVGRE_VSID_RES_MIN	0x000000 /* reserved for future use */
137 #define NVGRE_VSID_RES_MAX	0x000fff
138 #define NVGRE_VSID_NVE2NVE	0xffffff /* vendor specific NVE-to-NVE comms */
139 
140 struct gre_h_seq {
141 	uint32_t		gre_seq;
142 } __packed __aligned(4);
143 
144 struct gre_h_wccp {
145 	uint8_t			wccp_flags;
146 	uint8_t			service_id;
147 	uint8_t			alt_bucket;
148 	uint8_t			pri_bucket;
149 } __packed __aligned(4);
150 
151 #define GRE_WCCP 0x883e
152 
153 #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))
154 
155 /*
156  * GRE tunnel metadata
157  */
158 
159 #define GRE_KA_NONE		0
160 #define GRE_KA_DOWN		1
161 #define GRE_KA_HOLD		2
162 #define GRE_KA_UP		3
163 
164 union gre_addr {
165 	struct in_addr		in4;
166 	struct in6_addr		in6;
167 };
168 
169 static inline int
170 		gre_ip_cmp(int, const union gre_addr *,
171 		    const union gre_addr *);
172 
173 #define GRE_KEY_MIN		0x00000000U
174 #define GRE_KEY_MAX		0xffffffffU
175 #define GRE_KEY_SHIFT		0
176 
177 #define GRE_KEY_ENTROPY_MIN	0x00000000U
178 #define GRE_KEY_ENTROPY_MAX	0x00ffffffU
179 #define GRE_KEY_ENTROPY_SHIFT	8
180 
181 struct gre_tunnel {
182 	uint32_t		t_key_mask;
183 #define GRE_KEY_NONE			htonl(0x00000000U)
184 #define GRE_KEY_ENTROPY			htonl(0xffffff00U)
185 #define GRE_KEY_MASK			htonl(0xffffffffU)
186 	uint32_t		t_key;
187 
188 	u_int			t_rtableid;
189 	union gre_addr		t_src;
190 #define t_src4	t_src.in4
191 #define t_src6	t_src.in6
192 	union gre_addr		t_dst;
193 #define t_dst4	t_dst.in4
194 #define t_dst6	t_dst.in6
195 	int			t_ttl;
196 	int			t_txhprio;
197 	int			t_rxhprio;
198 	int			t_ecn;
199 	uint16_t		t_df;
200 	sa_family_t		t_af;
201 };
202 
203 static int
204 		gre_cmp_src(const struct gre_tunnel *,
205 		    const struct gre_tunnel *);
206 static int
207 		gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);
208 
209 static int	gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int);
210 static int	gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *);
211 static int	gre_del_tunnel(struct gre_tunnel *);
212 
213 static int	gre_set_vnetid(struct gre_tunnel *, struct ifreq *);
214 static int	gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
215 static int	gre_del_vnetid(struct gre_tunnel *);
216 
217 static int	gre_set_vnetflowid(struct gre_tunnel *, struct ifreq *);
218 static int	gre_get_vnetflowid(struct gre_tunnel *, struct ifreq *);
219 
220 static struct mbuf *
221 		gre_encap_dst(const struct gre_tunnel *, const union gre_addr *,
222 		    struct mbuf *, uint16_t, uint8_t, uint8_t);
223 #define gre_encap(_t, _m, _p, _ttl, _tos) \
224 		gre_encap_dst((_t), &(_t)->t_dst, (_m), (_p), (_ttl), (_tos))
225 
226 static struct mbuf *
227 		gre_encap_dst_ip(const struct gre_tunnel *,
228 		    const union gre_addr *, struct mbuf *, uint8_t, uint8_t);
229 #define gre_encap_ip(_t, _m, _ttl, _tos) \
230 		gre_encap_dst_ip((_t), &(_t)->t_dst, (_m), (_ttl), (_tos))
231 
232 static int
233 		gre_ip_output(const struct gre_tunnel *, struct mbuf *);
234 
235 static int	gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *,
236 		    u_long, void *);
237 
238 static uint8_t	gre_l2_tos(const struct gre_tunnel *, const struct mbuf *);
239 static uint8_t	gre_l3_tos(const struct gre_tunnel *,
240 		    const struct mbuf *, uint8_t);
241 
242 /*
243  * layer 3 GRE tunnels
244  */
245 
246 struct gre_softc {
247 	struct gre_tunnel	sc_tunnel; /* must be first */
248 	TAILQ_ENTRY(gre_softc)	sc_entry;
249 
250 	struct ifnet		sc_if;
251 
252 	struct timeout		sc_ka_send;
253 	struct timeout		sc_ka_hold;
254 
255 	unsigned int		sc_ka_state;
256 	unsigned int		sc_ka_timeo;
257 	unsigned int		sc_ka_count;
258 
259 	unsigned int		sc_ka_holdmax;
260 	unsigned int		sc_ka_holdcnt;
261 
262 	SIPHASH_KEY		sc_ka_key;
263 	uint32_t		sc_ka_bias;
264 	int			sc_ka_recvtm;
265 };
266 
267 TAILQ_HEAD(gre_list, gre_softc);
268 
269 struct gre_keepalive {
270 	uint32_t		gk_uptime;
271 	uint32_t		gk_random;
272 	uint8_t			gk_digest[SIPHASH_DIGEST_LENGTH];
273 } __packed __aligned(4);
274 
275 static int	gre_clone_create(struct if_clone *, int);
276 static int	gre_clone_destroy(struct ifnet *);
277 
278 struct if_clone gre_cloner =
279     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
280 
281 /* protected by NET_LOCK */
282 struct gre_list gre_list = TAILQ_HEAD_INITIALIZER(gre_list);
283 
284 static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
285 		    struct rtentry *);
286 static void	gre_start(struct ifnet *);
287 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
288 
289 static int	gre_up(struct gre_softc *);
290 static int	gre_down(struct gre_softc *);
291 static void	gre_link_state(struct ifnet *, unsigned int);
292 
293 static int	gre_input_key(struct mbuf **, int *, int, int, uint8_t,
294 		    struct gre_tunnel *);
295 
296 static struct mbuf *
297 		gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *,
298 		    uint8_t *, uint8_t);
299 #ifdef INET6
300 static struct mbuf *
301 		gre_ipv6_patch(const struct gre_tunnel *, struct mbuf *,
302 		    uint8_t *, uint8_t);
303 #endif
304 #ifdef MPLS
305 static struct mbuf *
306 		gre_mpls_patch(const struct gre_tunnel *, struct mbuf *,
307 		    uint8_t *, uint8_t);
308 #endif
309 static void	gre_keepalive_send(void *);
310 static void	gre_keepalive_recv(struct ifnet *ifp, struct mbuf *);
311 static void	gre_keepalive_hold(void *);
312 
313 static struct mbuf *
314 		gre_l3_encap_dst(const struct gre_tunnel *, const void *,
315 		    struct mbuf *m, sa_family_t);
316 
317 #define gre_l3_encap(_t, _m, _af) \
318 		gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af))
319 
320 struct mgre_softc {
321 	struct gre_tunnel	sc_tunnel; /* must be first */
322 	RBT_ENTRY(mgre_softc)	sc_entry;
323 
324 	struct ifnet		sc_if;
325 };
326 
327 RBT_HEAD(mgre_tree, mgre_softc);
328 
329 static inline int
330 		mgre_cmp(const struct mgre_softc *, const struct mgre_softc *);
331 
332 RBT_PROTOTYPE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
333 
334 static int	mgre_clone_create(struct if_clone *, int);
335 static int	mgre_clone_destroy(struct ifnet *);
336 
337 struct if_clone mgre_cloner =
338     IF_CLONE_INITIALIZER("mgre", mgre_clone_create, mgre_clone_destroy);
339 
340 static void	mgre_rtrequest(struct ifnet *, int, struct rtentry *);
341 static int	mgre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
342 		    struct rtentry *);
343 static void	mgre_start(struct ifnet *);
344 static int	mgre_ioctl(struct ifnet *, u_long, caddr_t);
345 
346 static int	mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *);
347 static int	mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *);
348 static int	mgre_up(struct mgre_softc *);
349 static int	mgre_down(struct mgre_softc *);
350 
351 /* protected by NET_LOCK */
352 struct mgre_tree mgre_tree = RBT_INITIALIZER();
353 
354 /*
355  * Ethernet GRE tunnels
356  */
357 
358 static struct mbuf *
359 		gre_ether_align(struct mbuf *, int);
360 
361 struct egre_softc {
362 	struct gre_tunnel	sc_tunnel; /* must be first */
363 	RBT_ENTRY(egre_softc)	sc_entry;
364 
365 	struct arpcom		sc_ac;
366 	struct ifmedia		sc_media;
367 };
368 
369 RBT_HEAD(egre_tree, egre_softc);
370 
371 static inline int
372 		egre_cmp(const struct egre_softc *, const struct egre_softc *);
373 
374 RBT_PROTOTYPE(egre_tree, egre_softc, sc_entry, egre_cmp);
375 
376 static int	egre_clone_create(struct if_clone *, int);
377 static int	egre_clone_destroy(struct ifnet *);
378 
379 static void	egre_start(struct ifnet *);
380 static int	egre_ioctl(struct ifnet *, u_long, caddr_t);
381 static int	egre_media_change(struct ifnet *);
382 static void	egre_media_status(struct ifnet *, struct ifmediareq *);
383 
384 static int	egre_up(struct egre_softc *);
385 static int	egre_down(struct egre_softc *);
386 
387 static int	egre_input(const struct gre_tunnel *, struct mbuf *, int,
388 		    uint8_t);
389 struct if_clone egre_cloner =
390     IF_CLONE_INITIALIZER("egre", egre_clone_create, egre_clone_destroy);
391 
392 /* protected by NET_LOCK */
393 struct egre_tree egre_tree = RBT_INITIALIZER();
394 
395 /*
396  * Network Virtualisation Using Generic Routing Encapsulation (NVGRE)
397  */
398 
399 struct nvgre_softc {
400 	struct gre_tunnel	 sc_tunnel; /* must be first */
401 	unsigned int		 sc_ifp0;
402 	RBT_ENTRY(nvgre_softc)	 sc_uentry;
403 	RBT_ENTRY(nvgre_softc)	 sc_mentry;
404 
405 	struct arpcom		 sc_ac;
406 	struct ifmedia		 sc_media;
407 
408 	struct mbuf_queue	 sc_send_list;
409 	struct task		 sc_send_task;
410 
411 	void			*sc_inm;
412 	struct task		 sc_ltask;
413 	struct task		 sc_dtask;
414 
415 	struct etherbridge	 sc_eb;
416 };
417 
418 RBT_HEAD(nvgre_ucast_tree, nvgre_softc);
419 RBT_HEAD(nvgre_mcast_tree, nvgre_softc);
420 
421 static inline int
422 		nvgre_cmp_ucast(const struct nvgre_softc *,
423 		    const struct nvgre_softc *);
424 static int
425 		nvgre_cmp_mcast(const struct gre_tunnel *,
426 		    const union gre_addr *, unsigned int,
427 		    const struct gre_tunnel *, const union gre_addr *,
428 		    unsigned int);
429 static inline int
430 		nvgre_cmp_mcast_sc(const struct nvgre_softc *,
431 		    const struct nvgre_softc *);
432 
433 RBT_PROTOTYPE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
434 RBT_PROTOTYPE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
435 
436 static int	nvgre_clone_create(struct if_clone *, int);
437 static int	nvgre_clone_destroy(struct ifnet *);
438 
439 static void	nvgre_start(struct ifnet *);
440 static int	nvgre_ioctl(struct ifnet *, u_long, caddr_t);
441 
442 static int	nvgre_up(struct nvgre_softc *);
443 static int	nvgre_down(struct nvgre_softc *);
444 static int	nvgre_set_parent(struct nvgre_softc *, const char *);
445 static void	nvgre_link_change(void *);
446 static void	nvgre_detach(void *);
447 
448 static int	nvgre_input(const struct gre_tunnel *, struct mbuf *, int,
449 		    uint8_t);
450 static void	nvgre_send(void *);
451 
452 static int	nvgre_add_addr(struct nvgre_softc *, const struct ifbareq *);
453 static int	nvgre_del_addr(struct nvgre_softc *, const struct ifbareq *);
454 
455 static int	 nvgre_eb_port_eq(void *, void *, void *);
456 static void	*nvgre_eb_port_take(void *, void *);
457 static void	 nvgre_eb_port_rele(void *, void *);
458 static size_t	 nvgre_eb_port_ifname(void *, char *, size_t, void *);
459 static void	 nvgre_eb_port_sa(void *, struct sockaddr_storage *, void *);
460 
461 static const struct etherbridge_ops nvgre_etherbridge_ops = {
462 	nvgre_eb_port_eq,
463 	nvgre_eb_port_take,
464 	nvgre_eb_port_rele,
465 	nvgre_eb_port_ifname,
466 	nvgre_eb_port_sa,
467 };
468 
469 struct if_clone nvgre_cloner =
470     IF_CLONE_INITIALIZER("nvgre", nvgre_clone_create, nvgre_clone_destroy);
471 
472 struct pool nvgre_endpoint_pool;
473 
474 /* protected by NET_LOCK */
475 struct nvgre_ucast_tree nvgre_ucast_tree = RBT_INITIALIZER();
476 struct nvgre_mcast_tree nvgre_mcast_tree = RBT_INITIALIZER();
477 
478 /*
479  * MikroTik Ethernet over IP protocol (eoip)
480  */
481 
482 struct eoip_softc {
483 	struct gre_tunnel	sc_tunnel; /* must be first */
484 	uint16_t		sc_tunnel_id;
485 	RBT_ENTRY(eoip_softc)	sc_entry;
486 
487 	struct arpcom		sc_ac;
488 	struct ifmedia		sc_media;
489 
490 	struct timeout		sc_ka_send;
491 	struct timeout		sc_ka_hold;
492 
493 	unsigned int		sc_ka_state;
494 	unsigned int		sc_ka_timeo;
495 	unsigned int		sc_ka_count;
496 
497 	unsigned int		sc_ka_holdmax;
498 	unsigned int		sc_ka_holdcnt;
499 };
500 
501 RBT_HEAD(eoip_tree, eoip_softc);
502 
503 static inline int
504 		eoip_cmp(const struct eoip_softc *, const struct eoip_softc *);
505 
506 RBT_PROTOTYPE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
507 
508 static int	eoip_clone_create(struct if_clone *, int);
509 static int	eoip_clone_destroy(struct ifnet *);
510 
511 static void	eoip_start(struct ifnet *);
512 static int	eoip_ioctl(struct ifnet *, u_long, caddr_t);
513 
514 static void	eoip_keepalive_send(void *);
515 static void	eoip_keepalive_recv(struct eoip_softc *);
516 static void	eoip_keepalive_hold(void *);
517 
518 static int	eoip_up(struct eoip_softc *);
519 static int	eoip_down(struct eoip_softc *);
520 
521 static struct mbuf *
522 		eoip_encap(struct eoip_softc *, struct mbuf *, uint8_t);
523 
524 static struct mbuf *
525 		eoip_input(struct gre_tunnel *, struct mbuf *,
526 		    const struct gre_header *, uint8_t, int);
527 struct if_clone eoip_cloner =
528     IF_CLONE_INITIALIZER("eoip", eoip_clone_create, eoip_clone_destroy);
529 
530 /* protected by NET_LOCK */
531 struct eoip_tree eoip_tree = RBT_INITIALIZER();
532 
533 /*
534  * It is not easy to calculate the right value for a GRE MTU.
535  * We leave this task to the admin and use the same default that
536  * other vendors use.
537  */
538 #define GREMTU 1476
539 
540 /*
541  * We can control the acceptance of GRE and MobileIP packets by
542  * altering the sysctl net.inet.gre.allow values
543  * respectively. Zero means drop them, all else is acceptance.  We can also
544  * control acceptance of WCCPv1-style GRE packets through the
545  * net.inet.gre.wccp value, but be aware it depends upon normal GRE being
546  * allowed as well.
547  *
548  */
549 int gre_allow = 0;
550 int gre_wccp = 0;
551 
552 void
553 greattach(int n)
554 {
555 	if_clone_attach(&gre_cloner);
556 	if_clone_attach(&mgre_cloner);
557 	if_clone_attach(&egre_cloner);
558 	if_clone_attach(&nvgre_cloner);
559 	if_clone_attach(&eoip_cloner);
560 }
561 
562 static int
563 gre_clone_create(struct if_clone *ifc, int unit)
564 {
565 	struct gre_softc *sc;
566 	struct ifnet *ifp;
567 
568 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
569 	snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
570 	    ifc->ifc_name, unit);
571 
572 	ifp = &sc->sc_if;
573 	ifp->if_softc = sc;
574 	ifp->if_type = IFT_TUNNEL;
575 	ifp->if_hdrlen = GRE_HDRLEN;
576 	ifp->if_mtu = GREMTU;
577 	ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
578 	ifp->if_xflags = IFXF_CLONED;
579 	ifp->if_bpf_mtap = p2p_bpf_mtap;
580 	ifp->if_input = p2p_input;
581 	ifp->if_output = gre_output;
582 	ifp->if_start = gre_start;
583 	ifp->if_ioctl = gre_ioctl;
584 	ifp->if_rtrequest = p2p_rtrequest;
585 
586 	sc->sc_tunnel.t_ttl = ip_defttl;
587 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
588 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
589 	sc->sc_tunnel.t_df = htons(0);
590 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
591 
592 	timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc);
593 	timeout_set_proc(&sc->sc_ka_hold, gre_keepalive_hold, sc);
594 	sc->sc_ka_state = GRE_KA_NONE;
595 
596 	if_counters_alloc(ifp);
597 	if_attach(ifp);
598 	if_alloc_sadl(ifp);
599 
600 #if NBPFILTER > 0
601 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
602 #endif
603 
604 	ifp->if_llprio = IFQ_TOS2PRIO(IPTOS_PREC_INTERNETCONTROL);
605 
606 	NET_LOCK();
607 	TAILQ_INSERT_TAIL(&gre_list, sc, sc_entry);
608 	NET_UNLOCK();
609 
610 	return (0);
611 }
612 
613 static int
614 gre_clone_destroy(struct ifnet *ifp)
615 {
616 	struct gre_softc *sc = ifp->if_softc;
617 
618 	NET_LOCK();
619 	if (ISSET(ifp->if_flags, IFF_RUNNING))
620 		gre_down(sc);
621 
622 	TAILQ_REMOVE(&gre_list, sc, sc_entry);
623 	NET_UNLOCK();
624 
625 	if_detach(ifp);
626 
627 	free(sc, M_DEVBUF, sizeof(*sc));
628 
629 	return (0);
630 }
631 
632 static int
633 mgre_clone_create(struct if_clone *ifc, int unit)
634 {
635 	struct mgre_softc *sc;
636 	struct ifnet *ifp;
637 
638 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
639 	ifp = &sc->sc_if;
640 
641 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
642 	    "%s%d", ifc->ifc_name, unit);
643 
644 	ifp->if_softc = sc;
645 	ifp->if_type = IFT_L3IPVLAN;
646 	ifp->if_hdrlen = GRE_HDRLEN;
647 	ifp->if_mtu = GREMTU;
648 	ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX;
649 	ifp->if_xflags = IFXF_CLONED;
650 	ifp->if_bpf_mtap = p2p_bpf_mtap;
651 	ifp->if_input = p2p_input;
652 	ifp->if_rtrequest = mgre_rtrequest;
653 	ifp->if_output = mgre_output;
654 	ifp->if_start = mgre_start;
655 	ifp->if_ioctl = mgre_ioctl;
656 
657 	sc->sc_tunnel.t_ttl = ip_defttl;
658 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
659 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
660 	sc->sc_tunnel.t_df = htons(0);
661 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
662 
663 	if_counters_alloc(ifp);
664 	if_attach(ifp);
665 	if_alloc_sadl(ifp);
666 
667 #if NBPFILTER > 0
668 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
669 #endif
670 
671 	return (0);
672 }
673 
674 static int
675 mgre_clone_destroy(struct ifnet *ifp)
676 {
677 	struct mgre_softc *sc = ifp->if_softc;
678 
679 	NET_LOCK();
680 	if (ISSET(ifp->if_flags, IFF_RUNNING))
681 		mgre_down(sc);
682 	NET_UNLOCK();
683 
684 	if_detach(ifp);
685 
686 	free(sc, M_DEVBUF, sizeof(*sc));
687 
688 	return (0);
689 }
690 
691 static int
692 egre_clone_create(struct if_clone *ifc, int unit)
693 {
694 	struct egre_softc *sc;
695 	struct ifnet *ifp;
696 
697 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
698 	ifp = &sc->sc_ac.ac_if;
699 
700 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
701 	    ifc->ifc_name, unit);
702 
703 	ifp->if_softc = sc;
704 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
705 	ifp->if_ioctl = egre_ioctl;
706 	ifp->if_start = egre_start;
707 	ifp->if_xflags = IFXF_CLONED;
708 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
709 	ether_fakeaddr(ifp);
710 
711 	sc->sc_tunnel.t_ttl = ip_defttl;
712 	sc->sc_tunnel.t_txhprio = 0;
713 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
714 	sc->sc_tunnel.t_df = htons(0);
715 
716 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
717 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
718 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
719 
720 	if_counters_alloc(ifp);
721 	if_attach(ifp);
722 	ether_ifattach(ifp);
723 
724 	return (0);
725 }
726 
727 static int
728 egre_clone_destroy(struct ifnet *ifp)
729 {
730 	struct egre_softc *sc = ifp->if_softc;
731 
732 	NET_LOCK();
733 	if (ISSET(ifp->if_flags, IFF_RUNNING))
734 		egre_down(sc);
735 	NET_UNLOCK();
736 
737 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
738 	ether_ifdetach(ifp);
739 	if_detach(ifp);
740 
741 	free(sc, M_DEVBUF, sizeof(*sc));
742 
743 	return (0);
744 }
745 
746 static int
747 nvgre_clone_create(struct if_clone *ifc, int unit)
748 {
749 	struct nvgre_softc *sc;
750 	struct ifnet *ifp;
751 	struct gre_tunnel *tunnel;
752 	int error;
753 
754 	if (nvgre_endpoint_pool.pr_size == 0) {
755 		pool_init(&nvgre_endpoint_pool, sizeof(union gre_addr),
756 		    0, IPL_SOFTNET, 0, "nvgreep", NULL);
757 	}
758 
759 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
760 	ifp = &sc->sc_ac.ac_if;
761 
762 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
763 	    ifc->ifc_name, unit);
764 
765 	error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
766 	    &nvgre_etherbridge_ops, sc);
767 	if (error != 0) {
768 		free(sc, M_DEVBUF, sizeof(*sc));
769 		return (error);
770 	}
771 
772 	ifp->if_softc = sc;
773 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
774 	ifp->if_ioctl = nvgre_ioctl;
775 	ifp->if_start = nvgre_start;
776 	ifp->if_xflags = IFXF_CLONED;
777 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
778 	ether_fakeaddr(ifp);
779 
780 	tunnel = &sc->sc_tunnel;
781 	tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL;
782 	tunnel->t_txhprio = 0;
783 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
784 	tunnel->t_df = htons(IP_DF);
785 	tunnel->t_key_mask = GRE_KEY_ENTROPY;
786 	tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) <<
787 	    GRE_KEY_ENTROPY_SHIFT);
788 
789 	mq_init(&sc->sc_send_list, IFQ_MAXLEN * 2, IPL_SOFTNET);
790 	task_set(&sc->sc_send_task, nvgre_send, sc);
791 	task_set(&sc->sc_ltask, nvgre_link_change, sc);
792 	task_set(&sc->sc_dtask, nvgre_detach, sc);
793 
794 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
795 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
796 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
797 
798 	if_counters_alloc(ifp);
799 	if_attach(ifp);
800 	ether_ifattach(ifp);
801 
802 	return (0);
803 }
804 
805 static int
806 nvgre_clone_destroy(struct ifnet *ifp)
807 {
808 	struct nvgre_softc *sc = ifp->if_softc;
809 
810 	NET_LOCK();
811 	if (ISSET(ifp->if_flags, IFF_RUNNING))
812 		nvgre_down(sc);
813 	NET_UNLOCK();
814 
815 	etherbridge_destroy(&sc->sc_eb);
816 
817 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
818 	ether_ifdetach(ifp);
819 	if_detach(ifp);
820 
821 	free(sc, M_DEVBUF, sizeof(*sc));
822 
823 	return (0);
824 }
825 
826 static int
827 eoip_clone_create(struct if_clone *ifc, int unit)
828 {
829 	struct eoip_softc *sc;
830 	struct ifnet *ifp;
831 
832 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
833 	ifp = &sc->sc_ac.ac_if;
834 
835 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
836 	    ifc->ifc_name, unit);
837 
838 	ifp->if_softc = sc;
839 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
840 	ifp->if_ioctl = eoip_ioctl;
841 	ifp->if_start = eoip_start;
842 	ifp->if_xflags = IFXF_CLONED;
843 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
844 	ether_fakeaddr(ifp);
845 
846 	sc->sc_tunnel.t_ttl = ip_defttl;
847 	sc->sc_tunnel.t_txhprio = 0;
848 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
849 	sc->sc_tunnel.t_df = htons(0);
850 
851 	sc->sc_ka_timeo = 10;
852 	sc->sc_ka_count = 10;
853 
854 	timeout_set(&sc->sc_ka_send, eoip_keepalive_send, sc);
855 	timeout_set_proc(&sc->sc_ka_hold, eoip_keepalive_hold, sc);
856 	sc->sc_ka_state = GRE_KA_DOWN;
857 
858 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
859 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
860 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
861 
862 	if_counters_alloc(ifp);
863 	if_attach(ifp);
864 	ether_ifattach(ifp);
865 
866 	return (0);
867 }
868 
869 static int
870 eoip_clone_destroy(struct ifnet *ifp)
871 {
872 	struct eoip_softc *sc = ifp->if_softc;
873 
874 	NET_LOCK();
875 	if (ISSET(ifp->if_flags, IFF_RUNNING))
876 		eoip_down(sc);
877 	NET_UNLOCK();
878 
879 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
880 	ether_ifdetach(ifp);
881 	if_detach(ifp);
882 
883 	free(sc, M_DEVBUF, sizeof(*sc));
884 
885 	return (0);
886 }
887 
888 int
889 gre_input(struct mbuf **mp, int *offp, int type, int af)
890 {
891 	struct mbuf *m = *mp;
892 	struct gre_tunnel key;
893 	struct ip *ip;
894 
895 	ip = mtod(m, struct ip *);
896 
897 	/* XXX check if ip_src is sane for nvgre? */
898 
899 	key.t_af = AF_INET;
900 	key.t_src4 = ip->ip_dst;
901 	key.t_dst4 = ip->ip_src;
902 
903 	if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1)
904 		return (rip_input(mp, offp, type, af));
905 
906 	return (IPPROTO_DONE);
907 }
908 
909 #ifdef INET6
910 int
911 gre_input6(struct mbuf **mp, int *offp, int type, int af)
912 {
913 	struct mbuf *m = *mp;
914 	struct gre_tunnel key;
915 	struct ip6_hdr *ip6;
916 	uint32_t flow;
917 
918 	ip6 = mtod(m, struct ip6_hdr *);
919 
920 	/* XXX check if ip6_src is sane for nvgre? */
921 
922 	key.t_af = AF_INET6;
923 	key.t_src6 = ip6->ip6_dst;
924 	key.t_dst6 = ip6->ip6_src;
925 
926 	flow = bemtoh32(&ip6->ip6_flow);
927 
928 	if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1)
929 		return (rip6_input(mp, offp, type, af));
930 
931 	return (IPPROTO_DONE);
932 }
933 #endif /* INET6 */
934 
935 static inline struct ifnet *
936 gre_find(const struct gre_tunnel *key)
937 {
938 	struct gre_softc *sc;
939 
940 	TAILQ_FOREACH(sc, &gre_list, sc_entry) {
941 		if (gre_cmp(key, &sc->sc_tunnel) != 0)
942 			continue;
943 
944 		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
945 			continue;
946 
947 		return (&sc->sc_if);
948 	}
949 
950 	return (NULL);
951 }
952 
953 static inline struct ifnet *
954 mgre_find(const struct gre_tunnel *key)
955 {
956 	struct mgre_softc *sc;
957 
958 	NET_ASSERT_LOCKED();
959 	sc = RBT_FIND(mgre_tree, &mgre_tree, (const struct mgre_softc *)key);
960 	if (sc != NULL)
961 		return (&sc->sc_if);
962 
963 	return (NULL);
964 }
965 
966 static struct mbuf *
967 gre_input_1(struct gre_tunnel *key, struct mbuf *m,
968     const struct gre_header *gh, uint8_t otos, int iphlen)
969 {
970 	switch (gh->gre_proto) {
971 	case htons(ETHERTYPE_PPP):
972 #ifdef PIPEX
973 		if (pipex_enable) {
974 			struct pipex_session *session;
975 
976 			session = pipex_pptp_lookup_session(m);
977 			if (session != NULL &&
978 			    pipex_pptp_input(m, session) == NULL)
979 				return (NULL);
980 		}
981 #endif
982 		break;
983 	case htons(GRE_EOIP):
984 		return (eoip_input(key, m, gh, otos, iphlen));
985 		break;
986 	}
987 
988 	return (m);
989 }
990 
991 static int
992 gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos,
993     struct gre_tunnel *key)
994 {
995 	struct mbuf *m = *mp;
996 	int iphlen = *offp, hlen, rxprio;
997 	struct ifnet *ifp;
998 	const struct gre_tunnel *tunnel;
999 	caddr_t buf;
1000 	struct gre_header *gh;
1001 	struct gre_h_key *gkh;
1002 	struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *,
1003 	    uint8_t *, uint8_t);
1004 	int mcast = 0;
1005 	uint8_t itos;
1006 
1007 	if (!gre_allow)
1008 		goto decline;
1009 
1010 	key->t_rtableid = m->m_pkthdr.ph_rtableid;
1011 
1012 	hlen = iphlen + sizeof(*gh);
1013 	if (m->m_pkthdr.len < hlen)
1014 		goto decline;
1015 
1016 	m = m_pullup(m, hlen);
1017 	if (m == NULL)
1018 		return (IPPROTO_DONE);
1019 
1020 	buf = mtod(m, caddr_t);
1021 	gh = (struct gre_header *)(buf + iphlen);
1022 
1023 	/* check the version */
1024 	switch (gh->gre_flags & htons(GRE_VERS_MASK)) {
1025 	case htons(GRE_VERS_0):
1026 		break;
1027 
1028 	case htons(GRE_VERS_1):
1029 		m = gre_input_1(key, m, gh, otos, iphlen);
1030 		if (m == NULL)
1031 			return (IPPROTO_DONE);
1032 		/* FALLTHROUGH */
1033 	default:
1034 		goto decline;
1035 	}
1036 
1037 	/* the only optional bit in the header is K flag */
1038 	if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
1039 		goto decline;
1040 
1041 	if (gh->gre_flags & htons(GRE_KP)) {
1042 		hlen += sizeof(*gkh);
1043 		if (m->m_pkthdr.len < hlen)
1044 			goto decline;
1045 
1046 		m = m_pullup(m, hlen);
1047 		if (m == NULL)
1048 			return (IPPROTO_DONE);
1049 
1050 		buf = mtod(m, caddr_t);
1051 		gh = (struct gre_header *)(buf + iphlen);
1052 		gkh = (struct gre_h_key *)(gh + 1);
1053 
1054 		key->t_key_mask = GRE_KEY_MASK;
1055 		key->t_key = gkh->gre_key;
1056 	} else
1057 		key->t_key_mask = GRE_KEY_NONE;
1058 
1059 	if (gh->gre_proto == htons(ETHERTYPE_TRANSETHER)) {
1060 		if (egre_input(key, m, hlen, otos) == -1 &&
1061 		    nvgre_input(key, m, hlen, otos) == -1)
1062 			goto decline;
1063 
1064 		return (IPPROTO_DONE);
1065 	}
1066 
1067 	ifp = gre_find(key);
1068 	if (ifp == NULL) {
1069 		ifp = mgre_find(key);
1070 		if (ifp == NULL)
1071 			goto decline;
1072 	}
1073 
1074 	switch (gh->gre_proto) {
1075 	case htons(GRE_WCCP): {
1076 		struct mbuf *n;
1077 		int off;
1078 
1079 		/* WCCP/GRE:
1080 		 *   So far as I can see (and test) it seems that Cisco's WCCP
1081 		 *   GRE tunnel is precisely a IP-in-GRE tunnel that differs
1082 		 *   only in its protocol number.  At least, it works for me.
1083 		 *
1084 		 *   The Internet Drafts can be found if you look for
1085 		 *   the following:
1086 		 *     draft-forster-wrec-wccp-v1-00.txt
1087 		 *     draft-wilson-wrec-wccp-v2-01.txt
1088 		 */
1089 
1090 		if (!gre_wccp && !ISSET(ifp->if_flags, IFF_LINK0))
1091 			goto decline;
1092 
1093 		/*
1094 		 * If the first nibble of the payload does not look like
1095 		 * IPv4, assume it is WCCP v2.
1096 		 */
1097 		n = m_getptr(m, hlen, &off);
1098 		if (n == NULL)
1099 			goto decline;
1100 		if (n->m_data[off] >> 4 != IPVERSION)
1101 			hlen += 4;  /* four-octet Redirect header */
1102 
1103 		/* FALLTHROUGH */
1104 	}
1105 	case htons(ETHERTYPE_IP):
1106 		m->m_pkthdr.ph_family = AF_INET;
1107 		patch = gre_ipv4_patch;
1108 		break;
1109 #ifdef INET6
1110 	case htons(ETHERTYPE_IPV6):
1111 		m->m_pkthdr.ph_family = AF_INET6;
1112 		patch = gre_ipv6_patch;
1113 		break;
1114 #endif
1115 #ifdef MPLS
1116 	case htons(ETHERTYPE_MPLS_MCAST):
1117 		mcast = M_MCAST|M_BCAST;
1118 		/* fallthrough */
1119 	case htons(ETHERTYPE_MPLS):
1120 		m->m_pkthdr.ph_family = AF_MPLS;
1121 		patch = gre_mpls_patch;
1122 		break;
1123 #endif
1124 	case htons(0):
1125 		if (ifp->if_type != IFT_TUNNEL) {
1126 			/* keepalives dont make sense for mgre */
1127 			goto decline;
1128 		}
1129 
1130 		m_adj(m, hlen);
1131 		gre_keepalive_recv(ifp, m);
1132 		return (IPPROTO_DONE);
1133 
1134 	default:
1135 		goto decline;
1136 	}
1137 
1138 	/* it's ours now */
1139 
1140 	m_adj(m, hlen);
1141 
1142 	tunnel = ifp->if_softc; /* gre and mgre tunnel info is at the front */
1143 
1144 	m = (*patch)(tunnel, m, &itos, otos);
1145 	if (m == NULL)
1146 		return (IPPROTO_DONE);
1147 
1148 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
1149 		SET(m->m_pkthdr.csum_flags, M_FLOWID);
1150 		m->m_pkthdr.ph_flowid =
1151 		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1152 	}
1153 
1154 	rxprio = tunnel->t_rxhprio;
1155 	switch (rxprio) {
1156 	case IF_HDRPRIO_PACKET:
1157 		/* nop */
1158 		break;
1159 	case IF_HDRPRIO_OUTER:
1160 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
1161 		break;
1162 	case IF_HDRPRIO_PAYLOAD:
1163 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
1164 		break;
1165 	default:
1166 		m->m_pkthdr.pf.prio = rxprio;
1167 		break;
1168 	}
1169 
1170 	m->m_flags &= ~(M_MCAST|M_BCAST);
1171 	m->m_flags |= mcast;
1172 
1173 	if_vinput(ifp, m);
1174 	return (IPPROTO_DONE);
1175 decline:
1176 	*mp = m;
1177 	return (-1);
1178 }
1179 
1180 static struct mbuf *
1181 gre_ipv4_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1182     uint8_t *itosp, uint8_t otos)
1183 {
1184 	struct ip *ip;
1185 	uint8_t itos;
1186 
1187 	m = m_pullup(m, sizeof(*ip));
1188 	if (m == NULL)
1189 		return (NULL);
1190 
1191 	ip = mtod(m, struct ip *);
1192 
1193 	itos = ip->ip_tos;
1194 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1195 		m_freem(m);
1196 		return (NULL);
1197 	}
1198 	if (itos != ip->ip_tos)
1199 		ip_tos_patch(ip, itos);
1200 
1201 	*itosp = itos;
1202 
1203 	return (m);
1204 }
1205 
1206 #ifdef INET6
1207 static struct mbuf *
1208 gre_ipv6_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1209     uint8_t *itosp, uint8_t otos)
1210 {
1211 	struct ip6_hdr *ip6;
1212 	uint32_t flow;
1213 	uint8_t itos;
1214 
1215 	m = m_pullup(m, sizeof(*ip6));
1216 	if (m == NULL)
1217 		return (NULL);
1218 
1219 	ip6 = mtod(m, struct ip6_hdr *);
1220 
1221 	flow = bemtoh32(&ip6->ip6_flow);
1222 	itos = flow >> 20;
1223 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1224 		m_freem(m);
1225 		return (NULL);
1226 	}
1227 
1228 	CLR(flow, 0xff << 20);
1229 	SET(flow, itos << 20);
1230 	htobem32(&ip6->ip6_flow, flow);
1231 
1232 	*itosp = itos;
1233 
1234 	return (m);
1235 }
1236 #endif
1237 
1238 #ifdef MPLS
1239 static struct mbuf *
1240 gre_mpls_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1241     uint8_t *itosp, uint8_t otos)
1242 {
1243 	uint8_t itos;
1244 	uint32_t shim;
1245 
1246 	m = m_pullup(m, sizeof(shim));
1247 	if (m == NULL)
1248 		return (NULL);
1249 
1250 	shim = *mtod(m, uint32_t *);
1251 	itos = (ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5;
1252 
1253 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1254 		m_freem(m);
1255 		return (NULL);
1256 	}
1257 
1258 	*itosp = itos;
1259 
1260 	return (m);
1261 }
1262 #endif
1263 
1264 #define gre_l2_prio(_t, _m, _otos) do {					\
1265 	int rxprio = (_t)->t_rxhprio;					\
1266 	switch (rxprio) {						\
1267 	case IF_HDRPRIO_PACKET:						\
1268 		/* nop */						\
1269 		break;							\
1270 	case IF_HDRPRIO_OUTER:						\
1271 		(_m)->m_pkthdr.pf.prio = IFQ_TOS2PRIO((_otos));		\
1272 		break;							\
1273 	default:							\
1274 		(_m)->m_pkthdr.pf.prio = rxprio;			\
1275 		break;							\
1276 	}								\
1277 } while (0)
1278 
1279 static int
1280 egre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, uint8_t otos)
1281 {
1282 	struct egre_softc *sc;
1283 
1284 	NET_ASSERT_LOCKED();
1285 	sc = RBT_FIND(egre_tree, &egre_tree, (const struct egre_softc *)key);
1286 	if (sc == NULL)
1287 		return (-1);
1288 
1289 	/* it's ours now */
1290 	m = gre_ether_align(m, hlen);
1291 	if (m == NULL)
1292 		return (0);
1293 
1294 	if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) {
1295 		SET(m->m_pkthdr.csum_flags, M_FLOWID);
1296 		m->m_pkthdr.ph_flowid =
1297 		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1298 	}
1299 
1300 	m->m_flags &= ~(M_MCAST|M_BCAST);
1301 
1302 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1303 
1304 	if_vinput(&sc->sc_ac.ac_if, m);
1305 
1306 	return (0);
1307 }
1308 
1309 static inline struct nvgre_softc *
1310 nvgre_mcast_find(const struct gre_tunnel *key, unsigned int if0idx)
1311 {
1312 	struct nvgre_softc *sc;
1313 	int rv;
1314 
1315 	/*
1316 	 * building an nvgre_softc to use with RBT_FIND is expensive, and
1317 	 * would need to swap the src and dst addresses in the key. so do the
1318 	 * find by hand.
1319 	 */
1320 
1321 	NET_ASSERT_LOCKED();
1322 	sc = RBT_ROOT(nvgre_mcast_tree, &nvgre_mcast_tree);
1323 	while (sc != NULL) {
1324 		rv = nvgre_cmp_mcast(key, &key->t_src, if0idx,
1325 		    &sc->sc_tunnel, &sc->sc_tunnel.t_dst, sc->sc_ifp0);
1326 		if (rv == 0)
1327 			return (sc);
1328 		if (rv < 0)
1329 			sc = RBT_LEFT(nvgre_mcast_tree, sc);
1330 		else
1331 			sc = RBT_RIGHT(nvgre_mcast_tree, sc);
1332 	}
1333 
1334 	return (NULL);
1335 }
1336 
1337 static inline struct nvgre_softc *
1338 nvgre_ucast_find(const struct gre_tunnel *key)
1339 {
1340 	NET_ASSERT_LOCKED();
1341 	return (RBT_FIND(nvgre_ucast_tree, &nvgre_ucast_tree,
1342 	    (struct nvgre_softc *)key));
1343 }
1344 
1345 static int
1346 nvgre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen,
1347     uint8_t otos)
1348 {
1349 	struct nvgre_softc *sc;
1350 	struct ether_header *eh;
1351 
1352 	if (ISSET(m->m_flags, M_MCAST|M_BCAST))
1353 		sc = nvgre_mcast_find(key, m->m_pkthdr.ph_ifidx);
1354 	else
1355 		sc = nvgre_ucast_find(key);
1356 
1357 	if (sc == NULL)
1358 		return (-1);
1359 
1360 	/* it's ours now */
1361 	m = gre_ether_align(m, hlen);
1362 	if (m == NULL)
1363 		return (0);
1364 
1365 	eh = mtod(m, struct ether_header *);
1366 	etherbridge_map_ea(&sc->sc_eb, (void *)&key->t_dst,
1367 	    (struct ether_addr *)eh->ether_shost);
1368 
1369 	SET(m->m_pkthdr.csum_flags, M_FLOWID);
1370 	m->m_pkthdr.ph_flowid = bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1371 
1372 	m->m_flags &= ~(M_MCAST|M_BCAST);
1373 
1374 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1375 
1376 	if_vinput(&sc->sc_ac.ac_if, m);
1377 
1378 	return (0);
1379 }
1380 
1381 static struct mbuf *
1382 gre_ether_align(struct mbuf *m, int hlen)
1383 {
1384 	struct mbuf *n;
1385 	int off;
1386 
1387 	m_adj(m, hlen);
1388 
1389 	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
1390 		m_freem(m);
1391 		return (NULL);
1392 	}
1393 
1394 	m = m_pullup(m, sizeof(struct ether_header));
1395 	if (m == NULL)
1396 		return (NULL);
1397 
1398 	n = m_getptr(m, sizeof(struct ether_header), &off);
1399 	if (n == NULL) {
1400 		m_freem(m);
1401 		return (NULL);
1402 	}
1403 
1404 	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
1405 		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
1406 		m_freem(m);
1407 		if (n == NULL)
1408 			return (NULL);
1409 		m = n;
1410 	}
1411 
1412 	return (m);
1413 }
1414 
1415 static void
1416 gre_keepalive_recv(struct ifnet *ifp, struct mbuf *m)
1417 {
1418 	struct gre_softc *sc = ifp->if_softc;
1419 	struct gre_keepalive *gk;
1420 	SIPHASH_CTX ctx;
1421 	uint8_t digest[SIPHASH_DIGEST_LENGTH];
1422 	int uptime, delta;
1423 	int tick = ticks;
1424 
1425 	if (sc->sc_ka_state == GRE_KA_NONE ||
1426 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
1427 		goto drop;
1428 
1429 	if (m->m_pkthdr.len < sizeof(*gk))
1430 		goto drop;
1431 	m = m_pullup(m, sizeof(*gk));
1432 	if (m == NULL)
1433 		return;
1434 
1435 	gk = mtod(m, struct gre_keepalive *);
1436 	uptime = bemtoh32(&gk->gk_uptime) - sc->sc_ka_bias;
1437 	delta = tick - uptime;
1438 	if (delta < 0)
1439 		goto drop;
1440 	if (delta > hz * 10) /* magic */
1441 		goto drop;
1442 
1443 	/* avoid too much siphash work */
1444 	delta = tick - sc->sc_ka_recvtm;
1445 	if (delta > 0 && delta < (hz / 10))
1446 		goto drop;
1447 
1448 	SipHash24_Init(&ctx, &sc->sc_ka_key);
1449 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
1450 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
1451 	SipHash24_Final(digest, &ctx);
1452 
1453 	if (memcmp(digest, gk->gk_digest, sizeof(digest)) != 0)
1454 		goto drop;
1455 
1456 	sc->sc_ka_recvtm = tick;
1457 
1458 	switch (sc->sc_ka_state) {
1459 	case GRE_KA_DOWN:
1460 		sc->sc_ka_state = GRE_KA_HOLD;
1461 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
1462 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
1463 		    16 * sc->sc_ka_count);
1464 		break;
1465 	case GRE_KA_HOLD:
1466 		if (--sc->sc_ka_holdcnt > 0)
1467 			break;
1468 
1469 		sc->sc_ka_state = GRE_KA_UP;
1470 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
1471 		break;
1472 
1473 	case GRE_KA_UP:
1474 		sc->sc_ka_holdmax--;
1475 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
1476 		break;
1477 	}
1478 
1479 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
1480 
1481 drop:
1482 	m_freem(m);
1483 }
1484 
1485 static int
1486 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1487     struct rtentry *rt)
1488 {
1489 	struct m_tag *mtag;
1490 	int error = 0;
1491 
1492 	if (!gre_allow) {
1493 		error = EACCES;
1494 		goto drop;
1495 	}
1496 
1497 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1498 		error = ENETDOWN;
1499 		goto drop;
1500 	}
1501 
1502 	switch (dst->sa_family) {
1503 	case AF_INET:
1504 #ifdef INET6
1505 	case AF_INET6:
1506 #endif
1507 #ifdef MPLS
1508 	case AF_MPLS:
1509 #endif
1510 		break;
1511 	default:
1512 		error = EAFNOSUPPORT;
1513 		goto drop;
1514 	}
1515 
1516 	/* Try to limit infinite recursion through misconfiguration. */
1517 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1518 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1519 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1520 		    sizeof(ifp->if_index)) == 0) {
1521 			m_freem(m);
1522 			error = EIO;
1523 			goto end;
1524 		}
1525 	}
1526 
1527 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1528 	if (mtag == NULL) {
1529 		m_freem(m);
1530 		error = ENOBUFS;
1531 		goto end;
1532 	}
1533 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1534 	m_tag_prepend(m, mtag);
1535 
1536 	m->m_pkthdr.ph_family = dst->sa_family;
1537 
1538 	error = if_enqueue(ifp, m);
1539 end:
1540 	if (error)
1541 		ifp->if_oerrors++;
1542 	return (error);
1543 
1544 drop:
1545 	m_freem(m);
1546 	return (error);
1547 }
1548 
1549 void
1550 gre_start(struct ifnet *ifp)
1551 {
1552 	struct gre_softc *sc = ifp->if_softc;
1553 	struct mbuf *m;
1554 	int af;
1555 #if NBPFILTER > 0
1556 	caddr_t if_bpf;
1557 #endif
1558 
1559 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1560 		af = m->m_pkthdr.ph_family;
1561 
1562 #if NBPFILTER > 0
1563 		if_bpf = ifp->if_bpf;
1564 		if (if_bpf)
1565 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
1566 #endif
1567 
1568 		m = gre_l3_encap(&sc->sc_tunnel, m, af);
1569 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1570 			ifp->if_oerrors++;
1571 			continue;
1572 		}
1573 	}
1574 }
1575 
1576 void
1577 mgre_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1578 {
1579 	struct ifnet *lo0ifp;
1580 	struct ifaddr *ifa, *lo0ifa;
1581 
1582 	switch (req) {
1583 	case RTM_ADD:
1584 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1585 			break;
1586 
1587 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1588 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1589 			    rt_key(rt)->sa_len) == 0)
1590 				break;
1591 		}
1592 
1593 		if (ifa == NULL)
1594 			break;
1595 
1596 		KASSERT(ifa == rt->rt_ifa);
1597 
1598 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1599 		KASSERT(lo0ifp != NULL);
1600 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1601 			if (lo0ifa->ifa_addr->sa_family ==
1602 			    ifa->ifa_addr->sa_family)
1603 				break;
1604 		}
1605 		if_put(lo0ifp);
1606 
1607 		if (lo0ifa == NULL)
1608 			break;
1609 
1610 		rt->rt_flags &= ~RTF_LLINFO;
1611 		break;
1612 	case RTM_DELETE:
1613 	case RTM_RESOLVE:
1614 	default:
1615 		break;
1616 	}
1617 }
1618 
1619 static int
1620 mgre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dest,
1621     struct rtentry *rt0)
1622 {
1623 	struct mgre_softc *sc = ifp->if_softc;
1624 	struct sockaddr *gate;
1625 	struct rtentry *rt;
1626 	struct m_tag *mtag;
1627 	int error = 0;
1628 	sa_family_t af;
1629 	const void *addr;
1630 
1631 	if (!gre_allow) {
1632 		error = EACCES;
1633 		goto drop;
1634 	}
1635 
1636 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1637 		error = ENETDOWN;
1638 		goto drop;
1639 	}
1640 
1641 	switch (dest->sa_family) {
1642 	case AF_INET:
1643 #ifdef INET6
1644 	case AF_INET6:
1645 #endif
1646 #ifdef MPLS
1647 	case AF_MPLS:
1648 #endif
1649 		break;
1650 	default:
1651 		error = EAFNOSUPPORT;
1652 		goto drop;
1653 	}
1654 
1655 	if (ISSET(m->m_flags, M_MCAST|M_BCAST)) {
1656 		error = ENETUNREACH;
1657 		goto drop;
1658 	}
1659 
1660 	rt = rt_getll(rt0);
1661 
1662 	/* check rt_expire? */
1663 	if (ISSET(rt->rt_flags, RTF_REJECT)) {
1664 		error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
1665 		goto drop;
1666 	}
1667 	if (!ISSET(rt->rt_flags, RTF_HOST)) {
1668 		error = EHOSTUNREACH;
1669 		goto drop;
1670 	}
1671 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1672 		error = EINVAL;
1673 		goto drop;
1674 	}
1675 
1676 	gate = rt->rt_gateway;
1677 	af = gate->sa_family;
1678 	if (af != sc->sc_tunnel.t_af) {
1679 		error = EAGAIN;
1680 		goto drop;
1681 	}
1682 
1683 	/* Try to limit infinite recursion through misconfiguration. */
1684 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1685 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1686 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1687 		    sizeof(ifp->if_index)) == 0) {
1688 			error = EIO;
1689 			goto drop;
1690 		}
1691 	}
1692 
1693 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1694 	if (mtag == NULL) {
1695 		error = ENOBUFS;
1696 		goto drop;
1697 	}
1698 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1699 	m_tag_prepend(m, mtag);
1700 
1701 	switch (af) {
1702 	case AF_INET: {
1703 		struct sockaddr_in *sin = (struct sockaddr_in *)gate;
1704 		addr = &sin->sin_addr;
1705 		break;
1706 	}
1707 #ifdef INET6
1708 	case AF_INET6: {
1709 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)gate;
1710 		addr = &sin6->sin6_addr;
1711 		break;
1712 	}
1713  #endif
1714 	default:
1715 		unhandled_af(af);
1716 		/* NOTREACHED */
1717 	}
1718 
1719 	m = gre_l3_encap_dst(&sc->sc_tunnel, addr, m, dest->sa_family);
1720 	if (m == NULL) {
1721 		ifp->if_oerrors++;
1722 		return (ENOBUFS);
1723 	}
1724 
1725 	m->m_pkthdr.ph_family = dest->sa_family;
1726 
1727 	error = if_enqueue(ifp, m);
1728 	if (error)
1729 		ifp->if_oerrors++;
1730 	return (error);
1731 
1732 drop:
1733 	m_freem(m);
1734 	return (error);
1735 }
1736 
1737 static void
1738 mgre_start(struct ifnet *ifp)
1739 {
1740 	struct mgre_softc *sc = ifp->if_softc;
1741 	struct mbuf *m;
1742 #if NBPFILTER > 0
1743 	caddr_t if_bpf;
1744 #endif
1745 
1746 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1747 #if NBPFILTER > 0
1748 		if_bpf = ifp->if_bpf;
1749 		if (if_bpf) {
1750 			struct m_hdr mh;
1751 			struct mbuf *n;
1752 			int off;
1753 
1754 			n = m_getptr(m, ifp->if_hdrlen, &off);
1755 			KASSERT(n != NULL);
1756 
1757 			mh.mh_flags = 0;
1758 			mh.mh_next = n->m_next;
1759 			mh.mh_len = n->m_len - off;
1760 			mh.mh_data = n->m_data + off;
1761 
1762 			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family,
1763 			    (struct mbuf *)&mh, BPF_DIRECTION_OUT);
1764 		}
1765 #endif
1766 
1767 		if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
1768 			ifp->if_oerrors++;
1769 			continue;
1770 		}
1771 	}
1772 }
1773 
1774 static void
1775 egre_start(struct ifnet *ifp)
1776 {
1777 	struct egre_softc *sc = ifp->if_softc;
1778 	struct mbuf *m0, *m;
1779 #if NBPFILTER > 0
1780 	caddr_t if_bpf;
1781 #endif
1782 
1783 	if (!gre_allow) {
1784 		ifq_purge(&ifp->if_snd);
1785 		return;
1786 	}
1787 
1788 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
1789 #if NBPFILTER > 0
1790 		if_bpf = ifp->if_bpf;
1791 		if (if_bpf)
1792 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
1793 #endif
1794 
1795 		/* force prepend mbuf because of alignment problems */
1796 		m = m_get(M_DONTWAIT, m0->m_type);
1797 		if (m == NULL) {
1798 			m_freem(m0);
1799 			continue;
1800 		}
1801 
1802 		M_MOVE_PKTHDR(m, m0);
1803 		m->m_next = m0;
1804 
1805 		m_align(m, 0);
1806 		m->m_len = 0;
1807 
1808 		m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER),
1809 		    sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m));
1810 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1811 			ifp->if_oerrors++;
1812 			continue;
1813 		}
1814 	}
1815 }
1816 
1817 static struct mbuf *
1818 gre_l3_encap_dst(const struct gre_tunnel *tunnel, const void *dst,
1819     struct mbuf *m, sa_family_t af)
1820 {
1821 	uint16_t proto;
1822 	uint8_t ttl, itos, otos;
1823 	int tttl = tunnel->t_ttl;
1824 	int ttloff;
1825 
1826 	switch (af) {
1827 	case AF_INET: {
1828 		struct ip *ip;
1829 
1830 		m = m_pullup(m, sizeof(*ip));
1831 		if (m == NULL)
1832 			return (NULL);
1833 
1834 		ip = mtod(m, struct ip *);
1835 		itos = ip->ip_tos;
1836 
1837 		ttloff = offsetof(struct ip, ip_ttl);
1838 		proto = htons(ETHERTYPE_IP);
1839 		break;
1840 	}
1841 #ifdef INET6
1842 	case AF_INET6: {
1843 		struct ip6_hdr *ip6;
1844 
1845 		m = m_pullup(m, sizeof(*ip6));
1846 		if (m == NULL)
1847 			return (NULL);
1848 
1849 		ip6 = mtod(m, struct ip6_hdr *);
1850 		itos = (ntohl(ip6->ip6_flow) & 0x0ff00000) >> 20;
1851 
1852 		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
1853 		proto = htons(ETHERTYPE_IPV6);
1854 		break;
1855 	}
1856  #endif
1857 #ifdef MPLS
1858 	case AF_MPLS: {
1859 		uint32_t shim;
1860 
1861 		m = m_pullup(m, sizeof(shim));
1862 		if (m == NULL)
1863 			return (NULL);
1864 
1865 		shim = bemtoh32(mtod(m, uint32_t *)) & MPLS_EXP_MASK;
1866 		itos = (shim >> MPLS_EXP_OFFSET) << 5;
1867 
1868 		ttloff = 3;
1869 
1870 		if (m->m_flags & (M_BCAST | M_MCAST))
1871 			proto = htons(ETHERTYPE_MPLS_MCAST);
1872 		else
1873 			proto = htons(ETHERTYPE_MPLS);
1874 		break;
1875 	}
1876 #endif
1877 	default:
1878 		unhandled_af(af);
1879 	}
1880 
1881 	if (tttl == -1) {
1882 		KASSERT(m->m_len > ttloff); /* m_pullup has happened */
1883 
1884 		ttl = *(m->m_data + ttloff);
1885 	} else
1886 		ttl = tttl;
1887 
1888 	itos = gre_l3_tos(tunnel, m, itos);
1889 	ip_ecn_ingress(tunnel->t_ecn, &otos, &itos);
1890 
1891 	return (gre_encap_dst(tunnel, dst, m, proto, ttl, otos));
1892 }
1893 
1894 static struct mbuf *
1895 gre_encap_dst(const struct gre_tunnel *tunnel, const union gre_addr *dst,
1896     struct mbuf *m, uint16_t proto, uint8_t ttl, uint8_t tos)
1897 {
1898 	struct gre_header *gh;
1899 	struct gre_h_key *gkh;
1900 	int hlen;
1901 
1902 	hlen = sizeof(*gh);
1903 	if (tunnel->t_key_mask != GRE_KEY_NONE)
1904 		hlen += sizeof(*gkh);
1905 
1906 	m = m_prepend(m, hlen, M_DONTWAIT);
1907 	if (m == NULL)
1908 		return (NULL);
1909 
1910 	gh = mtod(m, struct gre_header *);
1911 	gh->gre_flags = GRE_VERS_0;
1912 	gh->gre_proto = proto;
1913 	if (tunnel->t_key_mask != GRE_KEY_NONE) {
1914 		gh->gre_flags |= htons(GRE_KP);
1915 
1916 		gkh = (struct gre_h_key *)(gh + 1);
1917 		gkh->gre_key = tunnel->t_key;
1918 
1919 		if (tunnel->t_key_mask == GRE_KEY_ENTROPY &&
1920 		    ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) {
1921 			gkh->gre_key |= htonl(~GRE_KEY_ENTROPY &
1922 			    m->m_pkthdr.ph_flowid);
1923 		}
1924 	}
1925 
1926 	return (gre_encap_dst_ip(tunnel, dst, m, ttl, tos));
1927 }
1928 
1929 static struct mbuf *
1930 gre_encap_dst_ip(const struct gre_tunnel *tunnel, const union gre_addr *dst,
1931     struct mbuf *m, uint8_t ttl, uint8_t tos)
1932 {
1933 	switch (tunnel->t_af) {
1934 	case AF_UNSPEC:
1935 		/* packets may arrive before tunnel is set up */
1936 		m_freem(m);
1937 		return (NULL);
1938 	case AF_INET: {
1939 		struct ip *ip;
1940 
1941 		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
1942 		if (m == NULL)
1943 			return (NULL);
1944 
1945 		ip = mtod(m, struct ip *);
1946 		ip->ip_v = IPVERSION;
1947 		ip->ip_hl = sizeof(*ip) >> 2;
1948 		ip->ip_off = tunnel->t_df;
1949 		ip->ip_tos = tos;
1950 		ip->ip_len = htons(m->m_pkthdr.len);
1951 		ip->ip_ttl = ttl;
1952 		ip->ip_p = IPPROTO_GRE;
1953 		ip->ip_src = tunnel->t_src4;
1954 		ip->ip_dst = dst->in4;
1955 		break;
1956 	}
1957 #ifdef INET6
1958 	case AF_INET6: {
1959 		struct ip6_hdr *ip6;
1960 		int len = m->m_pkthdr.len;
1961 
1962 		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
1963 		if (m == NULL)
1964 			return (NULL);
1965 
1966 		ip6 = mtod(m, struct ip6_hdr *);
1967 		ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ?
1968 		    htonl(m->m_pkthdr.ph_flowid) : 0;
1969 		ip6->ip6_vfc |= IPV6_VERSION;
1970 		ip6->ip6_flow |= htonl((uint32_t)tos << 20);
1971 		ip6->ip6_plen = htons(len);
1972 		ip6->ip6_nxt = IPPROTO_GRE;
1973 		ip6->ip6_hlim = ttl;
1974 		ip6->ip6_src = tunnel->t_src6;
1975 		ip6->ip6_dst = dst->in6;
1976 
1977 		if (tunnel->t_df)
1978 			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
1979 
1980 		break;
1981 	}
1982 #endif /* INET6 */
1983 	default:
1984 		unhandled_af(tunnel->t_af);
1985 	}
1986 
1987 	return (m);
1988 }
1989 
1990 static int
1991 gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m)
1992 {
1993 	m->m_flags &= ~(M_BCAST|M_MCAST);
1994 	m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
1995 
1996 #if NPF > 0
1997 	pf_pkt_addr_changed(m);
1998 #endif
1999 
2000 	switch (tunnel->t_af) {
2001 	case AF_INET:
2002 		ip_send(m);
2003 		break;
2004 #ifdef INET6
2005 	case AF_INET6:
2006 		ip6_send(m);
2007 		break;
2008 #endif
2009 	default:
2010 		unhandled_af(tunnel->t_af);
2011 	}
2012 
2013 	return (0);
2014 }
2015 
2016 static int
2017 gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel,
2018     u_long cmd, void *data)
2019 {
2020 	struct ifreq *ifr = (struct ifreq *)data;
2021 	int error = 0;
2022 
2023 	switch(cmd) {
2024 	case SIOCSIFMTU:
2025 		if (ifr->ifr_mtu < 576) {
2026 			error = EINVAL;
2027 			break;
2028 		}
2029 		ifp->if_mtu = ifr->ifr_mtu;
2030 		break;
2031 	case SIOCADDMULTI:
2032 	case SIOCDELMULTI:
2033 		break;
2034 
2035 	case SIOCSVNETID:
2036 		error = gre_set_vnetid(tunnel, ifr);
2037 		break;
2038 
2039 	case SIOCGVNETID:
2040 		error = gre_get_vnetid(tunnel, ifr);
2041 		break;
2042 	case SIOCDVNETID:
2043 		error = gre_del_vnetid(tunnel);
2044 		break;
2045 
2046 	case SIOCSVNETFLOWID:
2047 		error = gre_set_vnetflowid(tunnel, ifr);
2048 		break;
2049 
2050 	case SIOCGVNETFLOWID:
2051 		error = gre_get_vnetflowid(tunnel, ifr);
2052 		break;
2053 
2054 	case SIOCSLIFPHYADDR:
2055 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 1);
2056 		break;
2057 	case SIOCGLIFPHYADDR:
2058 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2059 		break;
2060 	case SIOCDIFPHYADDR:
2061 		error = gre_del_tunnel(tunnel);
2062 		break;
2063 
2064 	case SIOCSLIFPHYRTABLE:
2065 		if (ifr->ifr_rdomainid < 0 ||
2066 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2067 		    !rtable_exists(ifr->ifr_rdomainid)) {
2068 			error = EINVAL;
2069 			break;
2070 		}
2071 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2072 		break;
2073 	case SIOCGLIFPHYRTABLE:
2074 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2075 		break;
2076 
2077 	case SIOCSLIFPHYDF:
2078 		/* commit */
2079 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2080 		break;
2081 	case SIOCGLIFPHYDF:
2082 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2083 		break;
2084 
2085 	default:
2086 		error = ENOTTY;
2087 		break;
2088 	}
2089 
2090 	return (error);
2091 }
2092 
2093 static uint8_t
2094 gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m)
2095 {
2096 	uint8_t prio;
2097 
2098 	switch (t->t_txhprio) {
2099 	case IF_HDRPRIO_PACKET:
2100 		prio = m->m_pkthdr.pf.prio;
2101 		break;
2102 	default:
2103 		prio = t->t_txhprio;
2104 		break;
2105 	}
2106 
2107 	return (IFQ_PRIO2TOS(prio));
2108 }
2109 
2110 static uint8_t
2111 gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t tos)
2112 {
2113 	uint8_t prio;
2114 
2115 	switch (t->t_txhprio) {
2116 	case IF_HDRPRIO_PAYLOAD:
2117 		return (tos);
2118 	case IF_HDRPRIO_PACKET:
2119 		prio = m->m_pkthdr.pf.prio;
2120 		break;
2121 	default:
2122 		prio = t->t_txhprio;
2123 		break;
2124 	}
2125 
2126 	return (IFQ_PRIO2TOS(prio) | (tos & IPTOS_ECN_MASK));
2127 }
2128 
2129 static int
2130 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2131 {
2132 	struct gre_softc *sc = ifp->if_softc;
2133 	struct ifreq *ifr = (struct ifreq *)data;
2134 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2135 	int error = 0;
2136 
2137 	switch(cmd) {
2138 	case SIOCSIFADDR:
2139 		ifp->if_flags |= IFF_UP;
2140 		/* FALLTHROUGH */
2141 	case SIOCSIFFLAGS:
2142 		if (ISSET(ifp->if_flags, IFF_UP)) {
2143 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2144 				error = gre_up(sc);
2145 			else
2146 				error = 0;
2147 		} else {
2148 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2149 				error = gre_down(sc);
2150 		}
2151 		break;
2152 	case SIOCSIFRDOMAIN:
2153 		/* let if_rdomain do its thing */
2154 		error = ENOTTY;
2155 		break;
2156 
2157 	case SIOCSETKALIVE:
2158 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2159 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256 ||
2160 		    (ikar->ikar_timeo == 0) != (ikar->ikar_cnt == 0))
2161 			return (EINVAL);
2162 
2163 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2164 			sc->sc_ka_count = 0;
2165 			sc->sc_ka_timeo = 0;
2166 			sc->sc_ka_state = GRE_KA_NONE;
2167 		} else {
2168 			sc->sc_ka_count = ikar->ikar_cnt;
2169 			sc->sc_ka_timeo = ikar->ikar_timeo;
2170 			sc->sc_ka_state = GRE_KA_DOWN;
2171 
2172 			arc4random_buf(&sc->sc_ka_key, sizeof(sc->sc_ka_key));
2173 			sc->sc_ka_bias = arc4random();
2174 			sc->sc_ka_holdmax = sc->sc_ka_count;
2175 
2176 			sc->sc_ka_recvtm = ticks - hz;
2177 			timeout_add(&sc->sc_ka_send, 1);
2178 			timeout_add_sec(&sc->sc_ka_hold,
2179 			    sc->sc_ka_timeo * sc->sc_ka_count);
2180 		}
2181 		break;
2182 
2183 	case SIOCGETKALIVE:
2184 		ikar->ikar_cnt = sc->sc_ka_count;
2185 		ikar->ikar_timeo = sc->sc_ka_timeo;
2186 		break;
2187 
2188 	case SIOCSLIFPHYTTL:
2189 		if (ifr->ifr_ttl != -1 &&
2190 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2191 			error = EINVAL;
2192 			break;
2193 		}
2194 
2195 		/* commit */
2196 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2197 		break;
2198 
2199 	case SIOCGLIFPHYTTL:
2200 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2201 		break;
2202 
2203 	case SIOCSLIFPHYECN:
2204 		sc->sc_tunnel.t_ecn =
2205 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2206 		break;
2207 	case SIOCGLIFPHYECN:
2208 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2209 		break;
2210 
2211 	case SIOCSTXHPRIO:
2212 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2213 		if (error != 0)
2214 			break;
2215 
2216 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2217 		break;
2218 	case SIOCGTXHPRIO:
2219 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2220 		break;
2221 
2222 	case SIOCSRXHPRIO:
2223 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2224 		if (error != 0)
2225 			break;
2226 
2227 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2228 		break;
2229 	case SIOCGRXHPRIO:
2230 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2231 		break;
2232 
2233 	default:
2234 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2235 		break;
2236 	}
2237 
2238 	return (error);
2239 }
2240 
2241 static int
2242 mgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2243 {
2244 	struct mgre_softc *sc = ifp->if_softc;
2245 	struct ifreq *ifr = (struct ifreq *)data;
2246 	int error = 0;
2247 
2248 	switch(cmd) {
2249 	case SIOCSIFADDR:
2250 		break;
2251 	case SIOCSIFFLAGS:
2252 		if (ISSET(ifp->if_flags, IFF_UP)) {
2253 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2254 				error = mgre_up(sc);
2255 			else
2256 				error = 0;
2257 		} else {
2258 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2259 				error = mgre_down(sc);
2260 		}
2261 		break;
2262 
2263 	case SIOCSLIFPHYTTL:
2264 		if (ifr->ifr_ttl != -1 &&
2265 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2266 			error = EINVAL;
2267 			break;
2268 		}
2269 
2270 		/* commit */
2271 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2272 		break;
2273 
2274 	case SIOCGLIFPHYTTL:
2275 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2276 		break;
2277 
2278 	case SIOCSLIFPHYECN:
2279 		sc->sc_tunnel.t_ecn =
2280 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2281 		break;
2282 	case SIOCGLIFPHYECN:
2283 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2284 		break;
2285 
2286 	case SIOCSLIFPHYADDR:
2287 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2288 			error = EBUSY;
2289 			break;
2290 		}
2291 		error = mgre_set_tunnel(sc, (struct if_laddrreq *)data);
2292 		break;
2293 	case SIOCGLIFPHYADDR:
2294 		error = mgre_get_tunnel(sc, (struct if_laddrreq *)data);
2295 		break;
2296 
2297 	case SIOCSTXHPRIO:
2298 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2299 		if (error != 0)
2300 			break;
2301 
2302 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2303 		break;
2304 	case SIOCGTXHPRIO:
2305 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2306 		break;
2307 
2308 	case SIOCSRXHPRIO:
2309 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2310 		if (error != 0)
2311 			break;
2312 
2313 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2314 		break;
2315 	case SIOCGRXHPRIO:
2316 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2317 		break;
2318 
2319 	case SIOCSVNETID:
2320 	case SIOCDVNETID:
2321 	case SIOCDIFPHYADDR:
2322 	case SIOCSLIFPHYRTABLE:
2323 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2324 			error = EBUSY;
2325 			break;
2326 		}
2327 
2328 		/* FALLTHROUGH */
2329 	default:
2330 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2331 		break;
2332 	}
2333 
2334 	return (error);
2335 }
2336 
2337 static int
2338 mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2339 {
2340 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2341 	struct sockaddr *addr = (struct sockaddr *)&req->addr;
2342 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2343 	struct sockaddr_in *addr4;
2344 #ifdef INET6
2345 	struct sockaddr_in6 *addr6;
2346 	int error;
2347 #endif
2348 
2349 	if (dstaddr->sa_family != AF_UNSPEC)
2350 		return (EINVAL);
2351 
2352 	/* validate */
2353 	switch (addr->sa_family) {
2354 	case AF_INET:
2355 		if (addr->sa_len != sizeof(*addr4))
2356 			return (EINVAL);
2357 
2358 		addr4 = (struct sockaddr_in *)addr;
2359 		if (in_nullhost(addr4->sin_addr) ||
2360 		    IN_MULTICAST(addr4->sin_addr.s_addr))
2361 			return (EINVAL);
2362 
2363 		tunnel->t_src4 = addr4->sin_addr;
2364 		tunnel->t_dst4.s_addr = INADDR_ANY;
2365 
2366 		break;
2367 #ifdef INET6
2368 	case AF_INET6:
2369 		if (addr->sa_len != sizeof(*addr6))
2370 			return (EINVAL);
2371 
2372 		addr6 = (struct sockaddr_in6 *)addr;
2373 		if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) ||
2374 		    IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr))
2375 			return (EINVAL);
2376 
2377 		error = in6_embedscope(&tunnel->t_src6, addr6, NULL);
2378 		if (error != 0)
2379 			return (error);
2380 
2381 		memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));
2382 
2383 		break;
2384 #endif
2385 	default:
2386 		return (EAFNOSUPPORT);
2387 	}
2388 
2389 	/* commit */
2390 	tunnel->t_af = addr->sa_family;
2391 
2392 	return (0);
2393 }
2394 
2395 static int
2396 mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2397 {
2398 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2399 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2400 	struct sockaddr_in *sin;
2401 #ifdef INET6
2402 	struct sockaddr_in6 *sin6;
2403 #endif
2404 
2405 	switch (tunnel->t_af) {
2406 	case AF_UNSPEC:
2407 		return (EADDRNOTAVAIL);
2408 	case AF_INET:
2409 		sin = (struct sockaddr_in *)&req->addr;
2410 		memset(sin, 0, sizeof(*sin));
2411 		sin->sin_family = AF_INET;
2412 		sin->sin_len = sizeof(*sin);
2413 		sin->sin_addr = tunnel->t_src4;
2414 		break;
2415 
2416 #ifdef INET6
2417 	case AF_INET6:
2418 		sin6 = (struct sockaddr_in6 *)&req->addr;
2419 		memset(sin6, 0, sizeof(*sin6));
2420 		sin6->sin6_family = AF_INET6;
2421 		sin6->sin6_len = sizeof(*sin6);
2422 		in6_recoverscope(sin6, &tunnel->t_src6);
2423 		break;
2424 #endif
2425 	default:
2426 		unhandled_af(tunnel->t_af);
2427 	}
2428 
2429 	dstaddr->sa_len = 2;
2430 	dstaddr->sa_family = AF_UNSPEC;
2431 
2432 	return (0);
2433 }
2434 
2435 static int
2436 egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2437 {
2438 	struct egre_softc *sc = ifp->if_softc;
2439 	struct ifreq *ifr = (struct ifreq *)data;
2440 	int error = 0;
2441 
2442 	switch(cmd) {
2443 	case SIOCSIFADDR:
2444 		break;
2445 	case SIOCSIFFLAGS:
2446 		if (ISSET(ifp->if_flags, IFF_UP)) {
2447 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2448 				error = egre_up(sc);
2449 			else
2450 				error = 0;
2451 		} else {
2452 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2453 				error = egre_down(sc);
2454 		}
2455 		break;
2456 
2457 	case SIOCSLIFPHYTTL:
2458 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2459 			error = EINVAL;
2460 			break;
2461 		}
2462 
2463 		/* commit */
2464 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2465 		break;
2466 
2467 	case SIOCGLIFPHYTTL:
2468 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2469 		break;
2470 
2471 	case SIOCSTXHPRIO:
2472 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2473 		if (error != 0)
2474 			break;
2475 
2476 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2477 		break;
2478 	case SIOCGTXHPRIO:
2479 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2480 		break;
2481 
2482 	case SIOCSRXHPRIO:
2483 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2484 		if (error != 0)
2485 			break;
2486 
2487 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2488 		break;
2489 	case SIOCGRXHPRIO:
2490 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2491 		break;
2492 
2493 	case SIOCSVNETID:
2494 	case SIOCDVNETID:
2495 	case SIOCSVNETFLOWID:
2496 	case SIOCSLIFPHYADDR:
2497 	case SIOCDIFPHYADDR:
2498 	case SIOCSLIFPHYRTABLE:
2499 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2500 			error = EBUSY;
2501 			break;
2502 		}
2503 
2504 		/* FALLTHROUGH */
2505 	default:
2506 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2507 		if (error == ENOTTY)
2508 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2509 		break;
2510 	}
2511 
2512 	if (error == ENETRESET) {
2513 		/* no hardware to program */
2514 		error = 0;
2515 	}
2516 
2517 	return (error);
2518 }
2519 
2520 static int
2521 nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2522 {
2523 	struct nvgre_softc *sc = ifp->if_softc;
2524 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2525 
2526 	struct ifreq *ifr = (struct ifreq *)data;
2527 	struct if_parent *parent = (struct if_parent *)data;
2528 	struct ifbrparam *bparam = (struct ifbrparam *)data;
2529 	struct ifnet *ifp0;
2530 
2531 	int error = 0;
2532 
2533 	switch (cmd) {
2534 	case SIOCSIFADDR:
2535 		break;
2536 	case SIOCSIFFLAGS:
2537 		if (ISSET(ifp->if_flags, IFF_UP)) {
2538 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2539 				error = nvgre_up(sc);
2540 			else
2541 				error = ENETRESET;
2542 		} else {
2543 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2544 				error = nvgre_down(sc);
2545 		}
2546 		break;
2547 
2548 	case SIOCSLIFPHYADDR:
2549 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2550 			error = EBUSY;
2551 			break;
2552 		}
2553 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 0);
2554 		if (error == 0)
2555 			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2556 		break;
2557 	case SIOCGLIFPHYADDR:
2558 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2559 		break;
2560 	case SIOCDIFPHYADDR:
2561 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2562 			error = EBUSY;
2563 			break;
2564 		}
2565 		error = gre_del_tunnel(tunnel);
2566 		if (error == 0)
2567 			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2568 		break;
2569 
2570 	case SIOCSIFPARENT:
2571 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2572 			error = EBUSY;
2573 			break;
2574 		}
2575 		error = nvgre_set_parent(sc, parent->ifp_parent);
2576 		if (error == 0)
2577 			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2578 		break;
2579 	case SIOCGIFPARENT:
2580 		ifp0 = if_get(sc->sc_ifp0);
2581 		if (ifp0 == NULL)
2582 			error = EADDRNOTAVAIL;
2583 		else {
2584 			memcpy(parent->ifp_parent, ifp0->if_xname,
2585 			    sizeof(parent->ifp_parent));
2586 		}
2587 		if_put(ifp0);
2588 		break;
2589 	case SIOCDIFPARENT:
2590 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2591 			error = EBUSY;
2592 			break;
2593 		}
2594 		/* commit */
2595 		sc->sc_ifp0 = 0;
2596 		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2597 		break;
2598 
2599 	case SIOCSVNETID:
2600 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2601 			error = EBUSY;
2602 			break;
2603 		}
2604 		if (ifr->ifr_vnetid < GRE_KEY_ENTROPY_MIN ||
2605 		    ifr->ifr_vnetid > GRE_KEY_ENTROPY_MAX) {
2606 			error = EINVAL;
2607 			break;
2608 		}
2609 
2610 		/* commit */
2611 		tunnel->t_key = htonl(ifr->ifr_vnetid << GRE_KEY_ENTROPY_SHIFT);
2612 		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2613 		break;
2614 	case SIOCGVNETID:
2615 		error = gre_get_vnetid(tunnel, ifr);
2616 		break;
2617 
2618 	case SIOCSLIFPHYRTABLE:
2619 		if (ifr->ifr_rdomainid < 0 ||
2620 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2621 		    !rtable_exists(ifr->ifr_rdomainid)) {
2622 			error = EINVAL;
2623 			break;
2624 		}
2625 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2626 		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2627 		break;
2628 	case SIOCGLIFPHYRTABLE:
2629 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2630 		break;
2631 
2632 	case SIOCSLIFPHYDF:
2633 		/* commit */
2634 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2635 		break;
2636 	case SIOCGLIFPHYDF:
2637 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2638 		break;
2639 
2640 	case SIOCSLIFPHYTTL:
2641 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2642 			error = EINVAL;
2643 			break;
2644 		}
2645 
2646 		/* commit */
2647 		tunnel->t_ttl = ifr->ifr_ttl;
2648 		break;
2649 
2650 	case SIOCGLIFPHYTTL:
2651 		ifr->ifr_ttl = tunnel->t_ttl;
2652 		break;
2653 
2654 	case SIOCSTXHPRIO:
2655 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2656 		if (error != 0)
2657 			break;
2658 
2659 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2660 		break;
2661 	case SIOCGTXHPRIO:
2662 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2663 		break;
2664 
2665 	case SIOCSRXHPRIO:
2666 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2667 		if (error != 0)
2668 			break;
2669 
2670 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2671 		break;
2672 	case SIOCGRXHPRIO:
2673 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2674 		break;
2675 
2676 	case SIOCBRDGSCACHE:
2677 		error = etherbridge_set_max(&sc->sc_eb, bparam);
2678 		break;
2679 	case SIOCBRDGGCACHE:
2680 		error = etherbridge_get_max(&sc->sc_eb, bparam);
2681 		break;
2682 
2683 	case SIOCBRDGSTO:
2684 		error = etherbridge_set_tmo(&sc->sc_eb, bparam);
2685 		break;
2686 	case SIOCBRDGGTO:
2687 		error = etherbridge_get_tmo(&sc->sc_eb, bparam);
2688 		break;
2689 
2690 	case SIOCBRDGRTS:
2691 		error = etherbridge_rtfind(&sc->sc_eb,
2692 		    (struct ifbaconf *)data);
2693 		break;
2694 	case SIOCBRDGFLUSH:
2695 		etherbridge_flush(&sc->sc_eb,
2696 		    ((struct ifbreq *)data)->ifbr_ifsflags);
2697 		break;
2698 	case SIOCBRDGSADDR:
2699 		error = nvgre_add_addr(sc, (struct ifbareq *)data);
2700 		break;
2701 	case SIOCBRDGDADDR:
2702 		error = nvgre_del_addr(sc, (struct ifbareq *)data);
2703 		break;
2704 
2705 	case SIOCADDMULTI:
2706 	case SIOCDELMULTI:
2707 		break;
2708 
2709 	default:
2710 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2711 		break;
2712 	}
2713 
2714 	if (error == ENETRESET) {
2715 		/* no hardware to program */
2716 		error = 0;
2717 	}
2718 
2719 	return (error);
2720 }
2721 
2722 static int
2723 eoip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2724 {
2725 	struct eoip_softc *sc = ifp->if_softc;
2726 	struct ifreq *ifr = (struct ifreq *)data;
2727 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2728 	int error = 0;
2729 
2730 	switch(cmd) {
2731 	case SIOCSIFADDR:
2732 		break;
2733 	case SIOCSIFFLAGS:
2734 		if (ISSET(ifp->if_flags, IFF_UP)) {
2735 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2736 				error = eoip_up(sc);
2737 			else
2738 				error = 0;
2739 		} else {
2740 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2741 				error = eoip_down(sc);
2742 		}
2743 		break;
2744 
2745 	case SIOCSETKALIVE:
2746 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2747 			error = EBUSY;
2748 			break;
2749 		}
2750 
2751 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2752 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256)
2753 			return (EINVAL);
2754 
2755 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2756 			sc->sc_ka_count = 0;
2757 			sc->sc_ka_timeo = 0;
2758 			sc->sc_ka_state = GRE_KA_NONE;
2759 		} else {
2760 			sc->sc_ka_count = ikar->ikar_cnt;
2761 			sc->sc_ka_timeo = ikar->ikar_timeo;
2762 			sc->sc_ka_state = GRE_KA_DOWN;
2763 		}
2764 		break;
2765 
2766 	case SIOCGETKALIVE:
2767 		ikar->ikar_cnt = sc->sc_ka_count;
2768 		ikar->ikar_timeo = sc->sc_ka_timeo;
2769 		break;
2770 
2771 	case SIOCSVNETID:
2772 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2773 			error = EBUSY;
2774 			break;
2775 		}
2776 		if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffff)
2777 			return (EINVAL);
2778 
2779 		sc->sc_tunnel.t_key = htole16(ifr->ifr_vnetid); /* for cmp */
2780 		sc->sc_tunnel_id = htole16(ifr->ifr_vnetid);
2781 		break;
2782 
2783 	case SIOCGVNETID:
2784 		ifr->ifr_vnetid = letoh16(sc->sc_tunnel_id);
2785 		break;
2786 
2787 	case SIOCSLIFPHYADDR:
2788 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2789 			error = EBUSY;
2790 			break;
2791 		}
2792 
2793 		error = gre_set_tunnel(&sc->sc_tunnel,
2794 		    (struct if_laddrreq *)data, 1);
2795 		break;
2796 	case SIOCGLIFPHYADDR:
2797 		error = gre_get_tunnel(&sc->sc_tunnel,
2798 		    (struct if_laddrreq *)data);
2799 		break;
2800 	case SIOCDIFPHYADDR:
2801 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2802 			error = EBUSY;
2803 			break;
2804 		}
2805 
2806 		error = gre_del_tunnel(&sc->sc_tunnel);
2807 		break;
2808 
2809 	case SIOCSLIFPHYRTABLE:
2810 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2811 			error = EBUSY;
2812 			break;
2813 		}
2814 
2815 		if (ifr->ifr_rdomainid < 0 ||
2816 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2817 		    !rtable_exists(ifr->ifr_rdomainid)) {
2818 			error = EINVAL;
2819 			break;
2820 		}
2821 		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
2822 		break;
2823 	case SIOCGLIFPHYRTABLE:
2824 		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
2825 		break;
2826 
2827 	case SIOCSLIFPHYTTL:
2828 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2829 			error = EINVAL;
2830 			break;
2831 		}
2832 
2833 		/* commit */
2834 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2835 		break;
2836 	case SIOCGLIFPHYTTL:
2837 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2838 		break;
2839 
2840 	case SIOCSLIFPHYDF:
2841 		/* commit */
2842 		sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2843 		break;
2844 	case SIOCGLIFPHYDF:
2845 		ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
2846 		break;
2847 
2848 	case SIOCSTXHPRIO:
2849 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2850 		if (error != 0)
2851 			break;
2852 
2853 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2854 		break;
2855 	case SIOCGTXHPRIO:
2856 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2857 		break;
2858 
2859 	case SIOCSRXHPRIO:
2860 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2861 		if (error != 0)
2862 			break;
2863 
2864 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2865 		break;
2866 	case SIOCGRXHPRIO:
2867 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2868 		break;
2869 
2870 	case SIOCADDMULTI:
2871 	case SIOCDELMULTI:
2872 		break;
2873 
2874 	default:
2875 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2876 		break;
2877 	}
2878 
2879 	if (error == ENETRESET) {
2880 		/* no hardware to program */
2881 		error = 0;
2882 	}
2883 
2884 	return (error);
2885 }
2886 
2887 static int
2888 gre_up(struct gre_softc *sc)
2889 {
2890 	NET_ASSERT_LOCKED();
2891 	SET(sc->sc_if.if_flags, IFF_RUNNING);
2892 
2893 	if (sc->sc_ka_state != GRE_KA_NONE)
2894 		gre_keepalive_send(sc);
2895 
2896 	return (0);
2897 }
2898 
2899 static int
2900 gre_down(struct gre_softc *sc)
2901 {
2902 	NET_ASSERT_LOCKED();
2903 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
2904 
2905 	if (sc->sc_ka_state != GRE_KA_NONE) {
2906 		timeout_del_barrier(&sc->sc_ka_hold);
2907 		timeout_del_barrier(&sc->sc_ka_send);
2908 
2909 		sc->sc_ka_state = GRE_KA_DOWN;
2910 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
2911 	}
2912 
2913 	return (0);
2914 }
2915 
2916 static void
2917 gre_link_state(struct ifnet *ifp, unsigned int state)
2918 {
2919 	int link_state = LINK_STATE_UNKNOWN;
2920 
2921 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2922 		switch (state) {
2923 		case GRE_KA_NONE:
2924 			/* maybe up? or down? it's unknown, really */
2925 			break;
2926 		case GRE_KA_UP:
2927 			link_state = LINK_STATE_UP;
2928 			break;
2929 		default:
2930 			link_state = LINK_STATE_KALIVE_DOWN;
2931 			break;
2932 		}
2933 	}
2934 
2935 	if (ifp->if_link_state != link_state) {
2936 		ifp->if_link_state = link_state;
2937 		if_link_state_change(ifp);
2938 	}
2939 }
2940 
2941 static void
2942 gre_keepalive_send(void *arg)
2943 {
2944 	struct gre_tunnel t;
2945 	struct gre_softc *sc = arg;
2946 	struct mbuf *m;
2947 	struct gre_keepalive *gk;
2948 	SIPHASH_CTX ctx;
2949 	int linkhdr, len;
2950 	uint16_t proto;
2951 	uint8_t ttl;
2952 	uint8_t tos;
2953 
2954 	/*
2955 	 * re-schedule immediately, so we deal with incomplete configuration
2956 	 * or temporary errors.
2957 	 */
2958 	if (sc->sc_ka_timeo)
2959 		timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
2960 
2961 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
2962 	    sc->sc_ka_state == GRE_KA_NONE ||
2963 	    sc->sc_tunnel.t_af == AF_UNSPEC ||
2964 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
2965 		return;
2966 
2967 	/* this is really conservative */
2968 #ifdef INET6
2969 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
2970 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
2971 #else
2972 	linkhdr = max_linkhdr + sizeof(struct ip) +
2973 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
2974 #endif
2975 	len = linkhdr + sizeof(*gk);
2976 
2977 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2978 	if (m == NULL)
2979 		return;
2980 
2981 	if (len > MHLEN) {
2982 		MCLGETL(m, M_DONTWAIT, len);
2983 		if (!ISSET(m->m_flags, M_EXT)) {
2984 			m_freem(m);
2985 			return;
2986 		}
2987 	}
2988 
2989 	m->m_pkthdr.len = m->m_len = len;
2990 	m_adj(m, linkhdr);
2991 
2992 	/*
2993 	 * build the inside packet
2994 	 */
2995 	gk = mtod(m, struct gre_keepalive *);
2996 	htobem32(&gk->gk_uptime, sc->sc_ka_bias + ticks);
2997 	htobem32(&gk->gk_random, arc4random());
2998 
2999 	SipHash24_Init(&ctx, &sc->sc_ka_key);
3000 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
3001 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
3002 	SipHash24_Final(gk->gk_digest, &ctx);
3003 
3004 	ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl;
3005 
3006 	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
3007 	tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio));
3008 
3009 	t.t_af = sc->sc_tunnel.t_af;
3010 	t.t_df = sc->sc_tunnel.t_df;
3011 	t.t_src = sc->sc_tunnel.t_dst;
3012 	t.t_dst = sc->sc_tunnel.t_src;
3013 	t.t_key = sc->sc_tunnel.t_key;
3014 	t.t_key_mask = sc->sc_tunnel.t_key_mask;
3015 
3016 	m = gre_encap(&t, m, htons(0), ttl, tos);
3017 	if (m == NULL)
3018 		return;
3019 
3020 	switch (sc->sc_tunnel.t_af) {
3021 	case AF_INET: {
3022 		struct ip *ip;
3023 
3024 		ip = mtod(m, struct ip *);
3025 		ip->ip_id = htons(ip_randomid());
3026 		ip->ip_sum = 0;
3027 		ip->ip_sum = in_cksum(m, sizeof(*ip));
3028 
3029 		proto = htons(ETHERTYPE_IP);
3030 		break;
3031 	}
3032 #ifdef INET6
3033 	case AF_INET6:
3034 		proto = htons(ETHERTYPE_IPV6);
3035 		break;
3036 #endif
3037 	default:
3038 		m_freem(m);
3039 		return;
3040 	}
3041 
3042 	/*
3043 	 * put it in the tunnel
3044 	 */
3045 	m = gre_encap(&sc->sc_tunnel, m, proto, ttl, tos);
3046 	if (m == NULL)
3047 		return;
3048 
3049 	gre_ip_output(&sc->sc_tunnel, m);
3050 }
3051 
3052 static void
3053 gre_keepalive_hold(void *arg)
3054 {
3055 	struct gre_softc *sc = arg;
3056 	struct ifnet *ifp = &sc->sc_if;
3057 
3058 	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
3059 	    sc->sc_ka_state == GRE_KA_NONE)
3060 		return;
3061 
3062 	NET_LOCK();
3063 	sc->sc_ka_state = GRE_KA_DOWN;
3064 	gre_link_state(ifp, sc->sc_ka_state);
3065 	NET_UNLOCK();
3066 }
3067 
3068 static int
3069 gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req, int ucast)
3070 {
3071 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3072 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3073 	struct sockaddr_in *src4, *dst4;
3074 #ifdef INET6
3075 	struct sockaddr_in6 *src6, *dst6;
3076 	int error;
3077 #endif
3078 
3079 	/* sa_family and sa_len must be equal */
3080 	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
3081 		return (EINVAL);
3082 
3083 	/* validate */
3084 	switch (dst->sa_family) {
3085 	case AF_INET:
3086 		if (dst->sa_len != sizeof(*dst4))
3087 			return (EINVAL);
3088 
3089 		src4 = (struct sockaddr_in *)src;
3090 		if (in_nullhost(src4->sin_addr) ||
3091 		    IN_MULTICAST(src4->sin_addr.s_addr))
3092 			return (EINVAL);
3093 
3094 		dst4 = (struct sockaddr_in *)dst;
3095 		if (in_nullhost(dst4->sin_addr) ||
3096 		    (IN_MULTICAST(dst4->sin_addr.s_addr) != !ucast))
3097 			return (EINVAL);
3098 
3099 		tunnel->t_src4 = src4->sin_addr;
3100 		tunnel->t_dst4 = dst4->sin_addr;
3101 
3102 		break;
3103 #ifdef INET6
3104 	case AF_INET6:
3105 		if (dst->sa_len != sizeof(*dst6))
3106 			return (EINVAL);
3107 
3108 		src6 = (struct sockaddr_in6 *)src;
3109 		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
3110 		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
3111 			return (EINVAL);
3112 
3113 		dst6 = (struct sockaddr_in6 *)dst;
3114 		if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
3115 		    IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) != !ucast)
3116 			return (EINVAL);
3117 
3118 		if (src6->sin6_scope_id != dst6->sin6_scope_id)
3119 			return (EINVAL);
3120 
3121 		error = in6_embedscope(&tunnel->t_src6, src6, NULL);
3122 		if (error != 0)
3123 			return (error);
3124 
3125 		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL);
3126 		if (error != 0)
3127 			return (error);
3128 
3129 		break;
3130 #endif
3131 	default:
3132 		return (EAFNOSUPPORT);
3133 	}
3134 
3135 	/* commit */
3136 	tunnel->t_af = dst->sa_family;
3137 
3138 	return (0);
3139 }
3140 
3141 static int
3142 gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
3143 {
3144 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3145 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3146 	struct sockaddr_in *sin;
3147 #ifdef INET6 /* ifconfig already embeds the scopeid */
3148 	struct sockaddr_in6 *sin6;
3149 #endif
3150 
3151 	switch (tunnel->t_af) {
3152 	case AF_UNSPEC:
3153 		return (EADDRNOTAVAIL);
3154 	case AF_INET:
3155 		sin = (struct sockaddr_in *)src;
3156 		memset(sin, 0, sizeof(*sin));
3157 		sin->sin_family = AF_INET;
3158 		sin->sin_len = sizeof(*sin);
3159 		sin->sin_addr = tunnel->t_src4;
3160 
3161 		sin = (struct sockaddr_in *)dst;
3162 		memset(sin, 0, sizeof(*sin));
3163 		sin->sin_family = AF_INET;
3164 		sin->sin_len = sizeof(*sin);
3165 		sin->sin_addr = tunnel->t_dst4;
3166 
3167 		break;
3168 
3169 #ifdef INET6
3170 	case AF_INET6:
3171 		sin6 = (struct sockaddr_in6 *)src;
3172 		memset(sin6, 0, sizeof(*sin6));
3173 		sin6->sin6_family = AF_INET6;
3174 		sin6->sin6_len = sizeof(*sin6);
3175 		in6_recoverscope(sin6, &tunnel->t_src6);
3176 
3177 		sin6 = (struct sockaddr_in6 *)dst;
3178 		memset(sin6, 0, sizeof(*sin6));
3179 		sin6->sin6_family = AF_INET6;
3180 		sin6->sin6_len = sizeof(*sin6);
3181 		in6_recoverscope(sin6, &tunnel->t_dst6);
3182 
3183 		break;
3184 #endif
3185 	default:
3186 		return (EAFNOSUPPORT);
3187 	}
3188 
3189 	return (0);
3190 }
3191 
3192 static int
3193 gre_del_tunnel(struct gre_tunnel *tunnel)
3194 {
3195 	/* commit */
3196 	tunnel->t_af = AF_UNSPEC;
3197 
3198 	return (0);
3199 }
3200 
3201 static int
3202 gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3203 {
3204 	uint32_t key;
3205 	uint32_t min = GRE_KEY_MIN;
3206 	uint32_t max = GRE_KEY_MAX;
3207 	unsigned int shift = GRE_KEY_SHIFT;
3208 	uint32_t mask = GRE_KEY_MASK;
3209 
3210 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
3211 		min = GRE_KEY_ENTROPY_MIN;
3212 		max = GRE_KEY_ENTROPY_MAX;
3213 		shift = GRE_KEY_ENTROPY_SHIFT;
3214 		mask = GRE_KEY_ENTROPY;
3215 	}
3216 
3217 	if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max)
3218 		return (EINVAL);
3219 
3220 	key = htonl(ifr->ifr_vnetid << shift);
3221 
3222 	/* commit */
3223 	tunnel->t_key_mask = mask;
3224 	tunnel->t_key = key;
3225 
3226 	return (0);
3227 }
3228 
3229 static int
3230 gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3231 {
3232 	int shift;
3233 
3234 	switch (tunnel->t_key_mask) {
3235 	case GRE_KEY_NONE:
3236 		return (EADDRNOTAVAIL);
3237 	case GRE_KEY_ENTROPY:
3238 		shift = GRE_KEY_ENTROPY_SHIFT;
3239 		break;
3240 	case GRE_KEY_MASK:
3241 		shift = GRE_KEY_SHIFT;
3242 		break;
3243 	}
3244 
3245 	ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift;
3246 
3247 	return (0);
3248 }
3249 
3250 static int
3251 gre_del_vnetid(struct gre_tunnel *tunnel)
3252 {
3253 	tunnel->t_key_mask = GRE_KEY_NONE;
3254 
3255 	return (0);
3256 }
3257 
3258 static int
3259 gre_set_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3260 {
3261 	uint32_t mask, key;
3262 
3263 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3264 		return (EADDRNOTAVAIL);
3265 
3266 	mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK;
3267 	if (tunnel->t_key_mask == mask) {
3268 		/* nop */
3269 		return (0);
3270 	}
3271 
3272 	key = ntohl(tunnel->t_key);
3273 	if (mask == GRE_KEY_ENTROPY) {
3274 		if (key > GRE_KEY_ENTROPY_MAX)
3275 			return (ERANGE);
3276 
3277 		key = htonl(key << GRE_KEY_ENTROPY_SHIFT);
3278 	} else
3279 		key = htonl(key >> GRE_KEY_ENTROPY_SHIFT);
3280 
3281 	/* commit */
3282 	tunnel->t_key_mask = mask;
3283 	tunnel->t_key = key;
3284 
3285 	return (0);
3286 }
3287 
3288 static int
3289 gre_get_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3290 {
3291 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3292 		return (EADDRNOTAVAIL);
3293 
3294 	ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY;
3295 
3296 	return (0);
3297 }
3298 
3299 static int
3300 mgre_up(struct mgre_softc *sc)
3301 {
3302 	unsigned int hlen;
3303 
3304 	switch (sc->sc_tunnel.t_af) {
3305 	case AF_UNSPEC:
3306 		return (EDESTADDRREQ);
3307 	case AF_INET:
3308 		hlen = sizeof(struct ip);
3309 		break;
3310 #ifdef INET6
3311 	case AF_INET6:
3312 		hlen = sizeof(struct ip6_hdr);
3313 		break;
3314 #endif /* INET6 */
3315 	default:
3316 		unhandled_af(sc->sc_tunnel.t_af);
3317 	}
3318 
3319 	hlen += sizeof(struct gre_header);
3320 	if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE)
3321 		hlen += sizeof(struct gre_h_key);
3322 
3323 	NET_ASSERT_LOCKED();
3324 
3325 	if (RBT_INSERT(mgre_tree, &mgre_tree, sc) != NULL)
3326 		return (EADDRINUSE);
3327 
3328 	sc->sc_if.if_hdrlen = hlen;
3329 	SET(sc->sc_if.if_flags, IFF_RUNNING);
3330 
3331 	return (0);
3332 }
3333 
3334 static int
3335 mgre_down(struct mgre_softc *sc)
3336 {
3337 	NET_ASSERT_LOCKED();
3338 
3339 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
3340 	sc->sc_if.if_hdrlen = GRE_HDRLEN; /* symmetry */
3341 
3342 	RBT_REMOVE(mgre_tree, &mgre_tree, sc);
3343 
3344 	/* barrier? */
3345 
3346 	return (0);
3347 }
3348 
3349 static int
3350 egre_up(struct egre_softc *sc)
3351 {
3352 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3353 		return (EDESTADDRREQ);
3354 
3355 	NET_ASSERT_LOCKED();
3356 
3357 	if (RBT_INSERT(egre_tree, &egre_tree, sc) != NULL)
3358 		return (EADDRINUSE);
3359 
3360 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3361 
3362 	return (0);
3363 }
3364 
3365 static int
3366 egre_down(struct egre_softc *sc)
3367 {
3368 	NET_ASSERT_LOCKED();
3369 
3370 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3371 
3372 	RBT_REMOVE(egre_tree, &egre_tree, sc);
3373 
3374 	/* barrier? */
3375 
3376 	return (0);
3377 }
3378 
3379 static int
3380 egre_media_change(struct ifnet *ifp)
3381 {
3382 	return (ENOTTY);
3383 }
3384 
3385 static void
3386 egre_media_status(struct ifnet *ifp, struct ifmediareq *imr)
3387 {
3388 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
3389 	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
3390 }
3391 
3392 static int
3393 nvgre_up(struct nvgre_softc *sc)
3394 {
3395 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3396 	struct ifnet *ifp0;
3397 	void *inm;
3398 	int error;
3399 
3400 	if (tunnel->t_af == AF_UNSPEC)
3401 		return (EDESTADDRREQ);
3402 
3403 	ifp0 = if_get(sc->sc_ifp0);
3404 	if (ifp0 == NULL)
3405 		return (ENXIO);
3406 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3407 		error = ENODEV;
3408 		goto put;
3409 	}
3410 
3411 	NET_ASSERT_LOCKED();
3412 
3413 	if (RBT_INSERT(nvgre_mcast_tree, &nvgre_mcast_tree, sc) != NULL) {
3414 		error = EADDRINUSE;
3415 		goto put;
3416 	}
3417 	if (RBT_INSERT(nvgre_ucast_tree, &nvgre_ucast_tree, sc) != NULL) {
3418 		error = EADDRINUSE;
3419 		goto remove_mcast;
3420 	}
3421 
3422 	switch (tunnel->t_af) {
3423 	case AF_INET:
3424 		inm = in_addmulti(&tunnel->t_dst4, ifp0);
3425 		if (inm == NULL) {
3426 			error = ECONNABORTED;
3427 			goto remove_ucast;
3428 		}
3429 		break;
3430 #ifdef INET6
3431 	case AF_INET6:
3432 		inm = in6_addmulti(&tunnel->t_dst6, ifp0, &error);
3433 		if (inm == NULL) {
3434 			/* error is already set */
3435 			goto remove_ucast;
3436 		}
3437 		break;
3438 #endif /* INET6 */
3439 	default:
3440 		unhandled_af(tunnel->t_af);
3441 	}
3442 
3443 	if_linkstatehook_add(ifp0, &sc->sc_ltask);
3444 	if_detachhook_add(ifp0, &sc->sc_dtask);
3445 
3446 	if_put(ifp0);
3447 
3448 	sc->sc_inm = inm;
3449 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3450 
3451 	return (0);
3452 
3453 remove_ucast:
3454 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3455 remove_mcast:
3456 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3457 put:
3458 	if_put(ifp0);
3459 	return (error);
3460 }
3461 
3462 static int
3463 nvgre_down(struct nvgre_softc *sc)
3464 {
3465 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3466 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3467 	struct taskq *softnet = net_tq(ifp->if_index);
3468 	struct ifnet *ifp0;
3469 
3470 	NET_ASSERT_LOCKED();
3471 
3472 	CLR(ifp->if_flags, IFF_RUNNING);
3473 
3474 	NET_UNLOCK();
3475 	ifq_barrier(&ifp->if_snd);
3476 	if (!task_del(softnet, &sc->sc_send_task))
3477 		taskq_barrier(softnet);
3478 	NET_LOCK();
3479 
3480 	mq_purge(&sc->sc_send_list);
3481 
3482 	ifp0 = if_get(sc->sc_ifp0);
3483 	if (ifp0 != NULL) {
3484 		if_detachhook_del(ifp0, &sc->sc_dtask);
3485 		if_linkstatehook_del(ifp0, &sc->sc_ltask);
3486 	}
3487 	if_put(ifp0);
3488 
3489 	switch (tunnel->t_af) {
3490 	case AF_INET:
3491 		in_delmulti(sc->sc_inm);
3492 		break;
3493 
3494 #ifdef INET6
3495 	case AF_INET6:
3496 		in6_delmulti(sc->sc_inm);
3497 		break;
3498 #endif
3499 	default:
3500 		unhandled_af(tunnel->t_af);
3501 	}
3502 
3503 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3504 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3505 
3506 	return (0);
3507 }
3508 
3509 static void
3510 nvgre_link_change(void *arg)
3511 {
3512 	/* nop */
3513 }
3514 
3515 static void
3516 nvgre_detach(void *arg)
3517 {
3518 	struct nvgre_softc *sc = arg;
3519 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3520 
3521 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3522 		nvgre_down(sc);
3523 		if_down(ifp);
3524 	}
3525 
3526 	sc->sc_ifp0 = 0;
3527 }
3528 
3529 static int
3530 nvgre_set_parent(struct nvgre_softc *sc, const char *parent)
3531 {
3532 	struct ifnet *ifp0;
3533 
3534 	ifp0 = if_unit(parent);
3535 	if (ifp0 == NULL)
3536 		return (EINVAL);
3537 
3538 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3539 		if_put(ifp0);
3540 		return (EPROTONOSUPPORT);
3541 	}
3542 
3543 	/* commit */
3544 	sc->sc_ifp0 = ifp0->if_index;
3545 	if_put(ifp0);
3546 
3547 	return (0);
3548 }
3549 
3550 static int
3551 nvgre_add_addr(struct nvgre_softc *sc, const struct ifbareq *ifba)
3552 {
3553 	struct sockaddr_in *sin;
3554 #ifdef INET6
3555 	struct sockaddr_in6 *sin6;
3556 	struct sockaddr_in6 src6 = {
3557 		.sin6_len = sizeof(src6),
3558 		.sin6_family = AF_UNSPEC,
3559 	};
3560 	int error;
3561 #endif
3562 	union gre_addr endpoint;
3563 	unsigned int type;
3564 
3565 	/* ignore ifba_ifsname */
3566 
3567 	if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK))
3568 		return (EINVAL);
3569 	switch (ifba->ifba_flags & IFBAF_TYPEMASK) {
3570 	case IFBAF_DYNAMIC:
3571 		type = EBE_DYNAMIC;
3572 		break;
3573 	case IFBAF_STATIC:
3574 		type = EBE_STATIC;
3575 		break;
3576 	default:
3577 		return (EINVAL);
3578 	}
3579 
3580 	memset(&endpoint, 0, sizeof(endpoint));
3581 
3582 	if (ifba->ifba_dstsa.ss_family != sc->sc_tunnel.t_af)
3583 		return (EAFNOSUPPORT);
3584 	switch (ifba->ifba_dstsa.ss_family) {
3585 	case AF_INET:
3586 		sin = (struct sockaddr_in *)&ifba->ifba_dstsa;
3587 		if (in_nullhost(sin->sin_addr) ||
3588 		    IN_MULTICAST(sin->sin_addr.s_addr))
3589 			return (EADDRNOTAVAIL);
3590 
3591 		endpoint.in4 = sin->sin_addr;
3592 		break;
3593 
3594 #ifdef INET6
3595 	case AF_INET6:
3596 		sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa;
3597 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
3598 		    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
3599 			return (EADDRNOTAVAIL);
3600 
3601 		in6_recoverscope(&src6, &sc->sc_tunnel.t_src6);
3602 
3603 		if (src6.sin6_scope_id != sin6->sin6_scope_id)
3604 			return (EADDRNOTAVAIL);
3605 
3606 		error = in6_embedscope(&endpoint.in6, sin6, NULL);
3607 		if (error != 0)
3608 			return (error);
3609 
3610 		break;
3611 #endif
3612 	default: /* AF_UNSPEC */
3613 		return (EADDRNOTAVAIL);
3614 	}
3615 
3616 	return (etherbridge_add_addr(&sc->sc_eb, &endpoint,
3617 	    &ifba->ifba_dst, type));
3618 }
3619 
3620 static int
3621 nvgre_del_addr(struct nvgre_softc *sc, const struct ifbareq *ifba)
3622 {
3623 	return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst));
3624 }
3625 
3626 static void
3627 nvgre_start(struct ifnet *ifp)
3628 {
3629 	struct nvgre_softc *sc = ifp->if_softc;
3630 	const struct gre_tunnel *tunnel = &sc->sc_tunnel;
3631 	union gre_addr gateway;
3632 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
3633 	struct ether_header *eh;
3634 	struct mbuf *m, *m0;
3635 #if NBPFILTER > 0
3636 	caddr_t if_bpf;
3637 #endif
3638 
3639 	if (!gre_allow) {
3640 		ifq_purge(&ifp->if_snd);
3641 		return;
3642 	}
3643 
3644 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3645 #if NBPFILTER > 0
3646 		if_bpf = ifp->if_bpf;
3647 		if (if_bpf)
3648 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3649 #endif
3650 
3651 		eh = mtod(m0, struct ether_header *);
3652 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
3653 			gateway = tunnel->t_dst;
3654 		else {
3655 			const union gre_addr *endpoint;
3656 
3657 			smr_read_enter();
3658 			endpoint = etherbridge_resolve_ea(&sc->sc_eb,
3659 			    (struct ether_addr *)eh->ether_dhost);
3660 			if (endpoint == NULL) {
3661 				/* "flood" to unknown hosts */
3662 				endpoint = &tunnel->t_dst;
3663 			}
3664 			gateway = *endpoint;
3665 			smr_read_leave();
3666 		}
3667 
3668 		/* force prepend mbuf because of alignment problems */
3669 		m = m_get(M_DONTWAIT, m0->m_type);
3670 		if (m == NULL) {
3671 			m_freem(m0);
3672 			continue;
3673 		}
3674 
3675 		M_MOVE_PKTHDR(m, m0);
3676 		m->m_next = m0;
3677 
3678 		m_align(m, 0);
3679 		m->m_len = 0;
3680 
3681 		m = gre_encap_dst(tunnel, &gateway, m,
3682 		    htons(ETHERTYPE_TRANSETHER),
3683 		    tunnel->t_ttl, gre_l2_tos(tunnel, m));
3684 		if (m == NULL)
3685 			continue;
3686 
3687 		m->m_flags &= ~(M_BCAST|M_MCAST);
3688 		m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
3689 
3690 #if NPF > 0
3691 		pf_pkt_addr_changed(m);
3692 #endif
3693 
3694 		ml_enqueue(&ml, m);
3695 	}
3696 
3697 	if (!ml_empty(&ml)) {
3698 		if (mq_enlist(&sc->sc_send_list, &ml) == 0)
3699 			task_add(net_tq(ifp->if_index), &sc->sc_send_task);
3700 		/* else set OACTIVE? */
3701 	}
3702 }
3703 
3704 static uint64_t
3705 nvgre_send4(struct nvgre_softc *sc, struct mbuf_list *ml)
3706 {
3707 	struct ip_moptions imo;
3708 	struct mbuf *m;
3709 	uint64_t oerrors = 0;
3710 
3711 	imo.imo_ifidx = sc->sc_ifp0;
3712 	imo.imo_ttl = sc->sc_tunnel.t_ttl;
3713 	imo.imo_loop = 0;
3714 
3715 	NET_LOCK();
3716 	while ((m = ml_dequeue(ml)) != NULL) {
3717 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
3718 			oerrors++;
3719 	}
3720 	NET_UNLOCK();
3721 
3722 	return (oerrors);
3723 }
3724 
3725 #ifdef INET6
3726 static uint64_t
3727 nvgre_send6(struct nvgre_softc *sc, struct mbuf_list *ml)
3728 {
3729 	struct ip6_moptions im6o;
3730 	struct mbuf *m;
3731 	uint64_t oerrors = 0;
3732 
3733 	im6o.im6o_ifidx = sc->sc_ifp0;
3734 	im6o.im6o_hlim = sc->sc_tunnel.t_ttl;
3735 	im6o.im6o_loop = 0;
3736 
3737 	NET_LOCK();
3738 	while ((m = ml_dequeue(ml)) != NULL) {
3739 		if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
3740 			oerrors++;
3741 	}
3742 	NET_UNLOCK();
3743 
3744 	return (oerrors);
3745 }
3746 #endif /* INET6 */
3747 
3748 static void
3749 nvgre_send(void *arg)
3750 {
3751 	struct nvgre_softc *sc = arg;
3752 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3753 	sa_family_t af = sc->sc_tunnel.t_af;
3754 	struct mbuf_list ml;
3755 	uint64_t oerrors;
3756 
3757 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3758 		return;
3759 
3760 	mq_delist(&sc->sc_send_list, &ml);
3761 	if (ml_empty(&ml))
3762 		return;
3763 
3764 	switch (af) {
3765 	case AF_INET:
3766 		oerrors = nvgre_send4(sc, &ml);
3767 		break;
3768 #ifdef INET6
3769 	case AF_INET6:
3770 		oerrors = nvgre_send6(sc, &ml);
3771 		break;
3772 #endif
3773 	default:
3774 		unhandled_af(af);
3775 		/* NOTREACHED */
3776 	}
3777 
3778 	ifp->if_oerrors += oerrors; /* XXX should be ifq_oerrors */
3779 }
3780 
3781 static int
3782 eoip_up(struct eoip_softc *sc)
3783 {
3784 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3785 		return (EDESTADDRREQ);
3786 
3787 	NET_ASSERT_LOCKED();
3788 
3789 	if (RBT_INSERT(eoip_tree, &eoip_tree, sc) != NULL)
3790 		return (EADDRINUSE);
3791 
3792 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3793 
3794 	if (sc->sc_ka_state != GRE_KA_NONE) {
3795 		sc->sc_ka_holdmax = sc->sc_ka_count;
3796 		eoip_keepalive_send(sc);
3797 	}
3798 
3799 	return (0);
3800 }
3801 
3802 static int
3803 eoip_down(struct eoip_softc *sc)
3804 {
3805 	NET_ASSERT_LOCKED();
3806 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3807 
3808 	if (sc->sc_ka_state != GRE_KA_NONE) {
3809 		timeout_del_barrier(&sc->sc_ka_hold);
3810 		timeout_del_barrier(&sc->sc_ka_send);
3811 
3812 		sc->sc_ka_state = GRE_KA_DOWN;
3813 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
3814 	}
3815 
3816 	RBT_REMOVE(eoip_tree, &eoip_tree, sc);
3817 
3818 	return (0);
3819 }
3820 
3821 static void
3822 eoip_start(struct ifnet *ifp)
3823 {
3824 	struct eoip_softc *sc = ifp->if_softc;
3825 	struct mbuf *m0, *m;
3826 #if NBPFILTER > 0
3827 	caddr_t if_bpf;
3828 #endif
3829 
3830 	if (!gre_allow) {
3831 		ifq_purge(&ifp->if_snd);
3832 		return;
3833 	}
3834 
3835 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3836 #if NBPFILTER > 0
3837 		if_bpf = ifp->if_bpf;
3838 		if (if_bpf)
3839 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3840 #endif
3841 
3842 		/* force prepend mbuf because of alignment problems */
3843 		m = m_get(M_DONTWAIT, m0->m_type);
3844 		if (m == NULL) {
3845 			m_freem(m0);
3846 			continue;
3847 		}
3848 
3849 		M_MOVE_PKTHDR(m, m0);
3850 		m->m_next = m0;
3851 
3852 		m_align(m, 0);
3853 		m->m_len = 0;
3854 
3855 		m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
3856 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
3857 			ifp->if_oerrors++;
3858 			continue;
3859 		}
3860 	}
3861 }
3862 
3863 static struct mbuf *
3864 eoip_encap(struct eoip_softc *sc, struct mbuf *m, uint8_t tos)
3865 {
3866 	struct gre_header *gh;
3867 	struct gre_h_key_eoip *eoiph;
3868 	int len = m->m_pkthdr.len;
3869 
3870 	m = m_prepend(m, sizeof(*gh) + sizeof(*eoiph), M_DONTWAIT);
3871 	if (m == NULL)
3872 		return (NULL);
3873 
3874 	gh = mtod(m, struct gre_header *);
3875 	gh->gre_flags = htons(GRE_VERS_1 | GRE_KP);
3876 	gh->gre_proto = htons(GRE_EOIP);
3877 
3878 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
3879 	htobem16(&eoiph->eoip_len, len);
3880 	eoiph->eoip_tunnel_id = sc->sc_tunnel_id;
3881 
3882 	return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos));
3883 }
3884 
3885 static void
3886 eoip_keepalive_send(void *arg)
3887 {
3888 	struct eoip_softc *sc = arg;
3889 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3890 	struct mbuf *m;
3891 	int linkhdr;
3892 
3893 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3894 		return;
3895 
3896 	/* this is really conservative */
3897 #ifdef INET6
3898 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
3899 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
3900 #else
3901 	linkhdr = max_linkhdr + sizeof(struct ip) +
3902 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
3903 #endif
3904 	MGETHDR(m, M_DONTWAIT, MT_DATA);
3905 	if (m == NULL)
3906 		return;
3907 
3908 	if (linkhdr > MHLEN) {
3909 		MCLGETL(m, M_DONTWAIT, linkhdr);
3910 		if (!ISSET(m->m_flags, M_EXT)) {
3911 			m_freem(m);
3912 			return;
3913 		}
3914 	}
3915 
3916 	m->m_pkthdr.pf.prio = ifp->if_llprio;
3917 	m->m_pkthdr.len = m->m_len = linkhdr;
3918 	m_adj(m, linkhdr);
3919 
3920 	m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
3921 	if (m == NULL)
3922 		return;
3923 
3924 	gre_ip_output(&sc->sc_tunnel, m);
3925 
3926 	timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
3927 }
3928 
3929 static void
3930 eoip_keepalive_hold(void *arg)
3931 {
3932 	struct eoip_softc *sc = arg;
3933 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3934 
3935 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3936 		return;
3937 
3938 	NET_LOCK();
3939 	sc->sc_ka_state = GRE_KA_DOWN;
3940 	gre_link_state(ifp, sc->sc_ka_state);
3941 	NET_UNLOCK();
3942 }
3943 
3944 static void
3945 eoip_keepalive_recv(struct eoip_softc *sc)
3946 {
3947 	switch (sc->sc_ka_state) {
3948 	case GRE_KA_NONE:
3949 		return;
3950 	case GRE_KA_DOWN:
3951 		sc->sc_ka_state = GRE_KA_HOLD;
3952 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
3953 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
3954 		    16 * sc->sc_ka_count);
3955 		break;
3956 	case GRE_KA_HOLD:
3957 		if (--sc->sc_ka_holdcnt > 0)
3958 			break;
3959 
3960 		sc->sc_ka_state = GRE_KA_UP;
3961 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
3962 		break;
3963 
3964 	case GRE_KA_UP:
3965 		sc->sc_ka_holdmax--;
3966 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
3967 		break;
3968 	}
3969 
3970 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
3971 }
3972 
3973 static struct mbuf *
3974 eoip_input(struct gre_tunnel *key, struct mbuf *m,
3975     const struct gre_header *gh, uint8_t otos, int iphlen)
3976 {
3977 	struct eoip_softc *sc;
3978 	struct gre_h_key_eoip *eoiph;
3979 	int hlen, len;
3980 	caddr_t buf;
3981 
3982 	if (gh->gre_flags != htons(GRE_KP | GRE_VERS_1))
3983 		goto decline;
3984 
3985 	hlen = iphlen + sizeof(*gh) + sizeof(*eoiph);
3986 	if (m->m_pkthdr.len < hlen)
3987 		goto decline;
3988 
3989 	m = m_pullup(m, hlen);
3990 	if (m == NULL)
3991 		return (NULL);
3992 
3993 	buf = mtod(m, caddr_t);
3994 	gh = (struct gre_header *)(buf + iphlen);
3995 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
3996 
3997 	key->t_key = eoiph->eoip_tunnel_id;
3998 
3999 	NET_ASSERT_LOCKED();
4000 	sc = RBT_FIND(eoip_tree, &eoip_tree, (const struct eoip_softc *)key);
4001 	if (sc == NULL)
4002 		goto decline;
4003 
4004 	/* it's ours now */
4005 	len = bemtoh16(&eoiph->eoip_len);
4006 	if (len == 0) {
4007 		eoip_keepalive_recv(sc);
4008 		goto drop;
4009 	}
4010 
4011 	m = gre_ether_align(m, hlen);
4012 	if (m == NULL)
4013 		return (NULL);
4014 
4015 	if (m->m_pkthdr.len < len)
4016 		goto drop;
4017 	if (m->m_pkthdr.len != len)
4018 		m_adj(m, len - m->m_pkthdr.len);
4019 
4020 	m->m_flags &= ~(M_MCAST|M_BCAST);
4021 
4022 	gre_l2_prio(&sc->sc_tunnel, m, otos);
4023 
4024 	if_vinput(&sc->sc_ac.ac_if, m);
4025 
4026 	return (NULL);
4027 
4028 decline:
4029 	return (m);
4030 drop:
4031 	m_freem(m);
4032 	return (NULL);
4033 }
4034 
4035 const struct sysctl_bounded_args gre_vars[] = {
4036 	{ GRECTL_ALLOW, &gre_allow, 0, 1 },
4037 	{ GRECTL_WCCP, &gre_wccp, 0, 1 },
4038 };
4039 
4040 int
4041 gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
4042     size_t newlen)
4043 {
4044 	int error;
4045 
4046 	NET_LOCK();
4047 	error = sysctl_bounded_arr(gre_vars, nitems(gre_vars), name,
4048 	    namelen, oldp, oldlenp, newp, newlen);
4049 	NET_UNLOCK();
4050 	return error;
4051 }
4052 
4053 static inline int
4054 gre_ip_cmp(int af, const union gre_addr *a, const union gre_addr *b)
4055 {
4056 	switch (af) {
4057 #ifdef INET6
4058 	case AF_INET6:
4059 		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
4060 #endif /* INET6 */
4061 	case AF_INET:
4062 		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
4063 	default:
4064 		unhandled_af(af);
4065 	}
4066 
4067 	return (0);
4068 }
4069 
4070 static int
4071 gre_cmp_src(const struct gre_tunnel *a, const struct gre_tunnel *b)
4072 {
4073 	uint32_t ka, kb;
4074 	uint32_t mask;
4075 	int rv;
4076 
4077 	/* is K set at all? */
4078 	ka = a->t_key_mask & GRE_KEY_ENTROPY;
4079 	kb = b->t_key_mask & GRE_KEY_ENTROPY;
4080 
4081 	/* sort by whether K is set */
4082 	if (ka > kb)
4083 		return (1);
4084 	if (ka < kb)
4085 		return (-1);
4086 
4087 	/* is K set on both? */
4088 	if (ka != GRE_KEY_NONE) {
4089 		/* get common prefix */
4090 		mask = a->t_key_mask & b->t_key_mask;
4091 
4092 		ka = a->t_key & mask;
4093 		kb = b->t_key & mask;
4094 
4095 		/* sort by common prefix */
4096 		if (ka > kb)
4097 			return (1);
4098 		if (ka < kb)
4099 			return (-1);
4100 	}
4101 
4102 	/* sort by routing table */
4103 	if (a->t_rtableid > b->t_rtableid)
4104 		return (1);
4105 	if (a->t_rtableid < b->t_rtableid)
4106 		return (-1);
4107 
4108 	/* sort by address */
4109 	if (a->t_af > b->t_af)
4110 		return (1);
4111 	if (a->t_af < b->t_af)
4112 		return (-1);
4113 
4114 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4115 	if (rv != 0)
4116 		return (rv);
4117 
4118 	return (0);
4119 }
4120 
4121 static int
4122 gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
4123 {
4124 	int rv;
4125 
4126 	rv = gre_cmp_src(a, b);
4127 	if (rv != 0)
4128 		return (rv);
4129 
4130 	return (gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst));
4131 }
4132 
4133 static inline int
4134 mgre_cmp(const struct mgre_softc *a, const struct mgre_softc *b)
4135 {
4136 	return (gre_cmp_src(&a->sc_tunnel, &b->sc_tunnel));
4137 }
4138 
4139 RBT_GENERATE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
4140 
4141 static inline int
4142 egre_cmp(const struct egre_softc *a, const struct egre_softc *b)
4143 {
4144 	return (gre_cmp(&a->sc_tunnel, &b->sc_tunnel));
4145 }
4146 
4147 RBT_GENERATE(egre_tree, egre_softc, sc_entry, egre_cmp);
4148 
4149 static int
4150 nvgre_cmp_tunnel(const struct gre_tunnel *a, const struct gre_tunnel *b)
4151 {
4152 	uint32_t ka, kb;
4153 
4154 	ka = a->t_key & GRE_KEY_ENTROPY;
4155 	kb = b->t_key & GRE_KEY_ENTROPY;
4156 
4157 	/* sort by common prefix */
4158 	if (ka > kb)
4159 		return (1);
4160 	if (ka < kb)
4161 		return (-1);
4162 
4163 	/* sort by routing table */
4164 	if (a->t_rtableid > b->t_rtableid)
4165 		return (1);
4166 	if (a->t_rtableid < b->t_rtableid)
4167 		return (-1);
4168 
4169 	/* sort by address */
4170 	if (a->t_af > b->t_af)
4171 		return (1);
4172 	if (a->t_af < b->t_af)
4173 		return (-1);
4174 
4175 	return (0);
4176 }
4177 
4178 static inline int
4179 nvgre_cmp_ucast(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4180 {
4181 	const struct gre_tunnel *a = &na->sc_tunnel;
4182 	const struct gre_tunnel *b = &nb->sc_tunnel;
4183 	int rv;
4184 
4185 	rv = nvgre_cmp_tunnel(a, b);
4186 	if (rv != 0)
4187 		return (rv);
4188 
4189 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4190 	if (rv != 0)
4191 		return (rv);
4192 
4193 	return (0);
4194 }
4195 
4196 static int
4197 nvgre_cmp_mcast(const struct gre_tunnel *a, const union gre_addr *aa,
4198     unsigned int if0idxa, const struct gre_tunnel *b,
4199     const union gre_addr *ab,unsigned int if0idxb)
4200 {
4201 	int rv;
4202 
4203 	rv = nvgre_cmp_tunnel(a, b);
4204 	if (rv != 0)
4205 		return (rv);
4206 
4207 	rv = gre_ip_cmp(a->t_af, aa, ab);
4208 	if (rv != 0)
4209 		return (rv);
4210 
4211 	if (if0idxa > if0idxb)
4212 		return (1);
4213 	if (if0idxa < if0idxb)
4214 		return (-1);
4215 
4216 	return (0);
4217 }
4218 
4219 static inline int
4220 nvgre_cmp_mcast_sc(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4221 {
4222 	const struct gre_tunnel *a = &na->sc_tunnel;
4223 	const struct gre_tunnel *b = &nb->sc_tunnel;
4224 
4225 	return (nvgre_cmp_mcast(a, &a->t_dst, na->sc_ifp0,
4226 	    b, &b->t_dst, nb->sc_ifp0));
4227 }
4228 
4229 RBT_GENERATE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
4230 RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
4231 
4232 static inline int
4233 eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
4234 {
4235 	const struct gre_tunnel *a = &ea->sc_tunnel;
4236 	const struct gre_tunnel *b = &eb->sc_tunnel;
4237 	int rv;
4238 
4239 	if (a->t_key > b->t_key)
4240 		return (1);
4241 	if (a->t_key < b->t_key)
4242 		return (-1);
4243 
4244 	/* sort by routing table */
4245 	if (a->t_rtableid > b->t_rtableid)
4246 		return (1);
4247 	if (a->t_rtableid < b->t_rtableid)
4248 		return (-1);
4249 
4250 	/* sort by address */
4251 	if (a->t_af > b->t_af)
4252 		return (1);
4253 	if (a->t_af < b->t_af)
4254 		return (-1);
4255 
4256 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4257 	if (rv != 0)
4258 		return (rv);
4259 
4260 	rv = gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
4261 	if (rv != 0)
4262 		return (rv);
4263 
4264 	return (0);
4265 }
4266 
4267 RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
4268 
4269 static int
4270 nvgre_eb_port_eq(void *arg, void *a, void *b)
4271 {
4272 	struct nvgre_softc *sc = arg;
4273 
4274 	return (gre_ip_cmp(sc->sc_tunnel.t_af, a, b) == 0);
4275 }
4276 
4277 static void *
4278 nvgre_eb_port_take(void *arg, void *port)
4279 {
4280 	union gre_addr *ea = port;
4281 	union gre_addr *endpoint;
4282 
4283 	endpoint = pool_get(&nvgre_endpoint_pool, PR_NOWAIT);
4284 	if (endpoint == NULL)
4285 		return (NULL);
4286 
4287 	*endpoint = *ea;
4288 
4289 	return (endpoint);
4290 }
4291 
4292 static void
4293 nvgre_eb_port_rele(void *arg, void *port)
4294 {
4295 	union gre_addr *endpoint = port;
4296 
4297 	pool_put(&nvgre_endpoint_pool, endpoint);
4298 }
4299 
4300 static size_t
4301 nvgre_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
4302 {
4303 	struct nvgre_softc *sc = arg;
4304 
4305 	return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len));
4306 }
4307 
4308 static void
4309 nvgre_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
4310 {
4311 	struct nvgre_softc *sc = arg;
4312 	union gre_addr *endpoint = port;
4313 
4314 	switch (sc->sc_tunnel.t_af) {
4315 	case AF_INET: {
4316 		struct sockaddr_in *sin = (struct sockaddr_in *)ss;
4317 
4318 		sin->sin_len = sizeof(*sin);
4319 		sin->sin_family = AF_INET;
4320 		sin->sin_addr = endpoint->in4;
4321 		break;
4322 	}
4323 #ifdef INET6
4324 	case AF_INET6: {
4325 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
4326 
4327 		sin6->sin6_len = sizeof(*sin6);
4328 		sin6->sin6_family = AF_INET6;
4329 		in6_recoverscope(sin6, &endpoint->in6);
4330 
4331 		break;
4332 	}
4333 #endif /* INET6 */
4334 	default:
4335 		unhandled_af(sc->sc_tunnel.t_af);
4336 	}
4337 }
4338