1 /* $OpenBSD: if_gre.c,v 1.179 2024/12/04 18:20:46 mvs Exp $ */
2 /* $NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
3
4 /*
5 * Copyright (c) 1998 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Heiko W.Rupp <hwr@pilhuhn.de>
10 *
11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 /*
36 * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
37 * See gre(4) for more details.
38 * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
39 */
40
41 #include "bpfilter.h"
42 #include "pf.h"
43
44 #include <sys/param.h>
45 #include <sys/mbuf.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/kernel.h>
49 #include <sys/systm.h>
50 #include <sys/errno.h>
51 #include <sys/timeout.h>
52 #include <sys/queue.h>
53 #include <sys/tree.h>
54 #include <sys/pool.h>
55 #include <sys/rwlock.h>
56
57 #include <crypto/siphash.h>
58
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_types.h>
62 #include <net/if_media.h>
63 #include <net/route.h>
64
65 #include <netinet/in.h>
66 #include <netinet/in_var.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/ip_ecn.h>
71
72 #ifdef INET6
73 #include <netinet/ip6.h>
74 #include <netinet6/ip6_var.h>
75 #include <netinet6/in6_var.h>
76 #endif
77
78 #ifdef PIPEX
79 #include <net/pipex.h>
80 #endif
81
82 #ifdef MPLS
83 #include <netmpls/mpls.h>
84 #endif /* MPLS */
85
86 #if NBPFILTER > 0
87 #include <net/bpf.h>
88 #endif
89
90 #if NPF > 0
91 #include <net/pfvar.h>
92 #endif
93
94 #include <net/if_gre.h>
95
96 #include <netinet/ip_gre.h>
97 #include <sys/sysctl.h>
98
99 /* for nvgre bridge shizz */
100 #include <net/if_bridge.h>
101 #include <net/if_etherbridge.h>
102
103 /*
104 * Locks used to protect data:
105 * a atomic
106 */
107
108 /*
109 * packet formats
110 */
111 struct gre_header {
112 uint16_t gre_flags;
113 #define GRE_CP 0x8000 /* Checksum Present */
114 #define GRE_KP 0x2000 /* Key Present */
115 #define GRE_SP 0x1000 /* Sequence Present */
116
117 #define GRE_VERS_MASK 0x0007
118 #define GRE_VERS_0 0x0000
119 #define GRE_VERS_1 0x0001
120
121 uint16_t gre_proto;
122 } __packed __aligned(4);
123
124 struct gre_h_cksum {
125 uint16_t gre_cksum;
126 uint16_t gre_reserved1;
127 } __packed __aligned(4);
128
129 struct gre_h_key {
130 uint32_t gre_key;
131 } __packed __aligned(4);
132
133 #define GRE_EOIP 0x6400
134
135 struct gre_h_key_eoip {
136 uint16_t eoip_len; /* network order */
137 uint16_t eoip_tunnel_id; /* little endian */
138 } __packed __aligned(4);
139
140 #define NVGRE_VSID_RES_MIN 0x000000 /* reserved for future use */
141 #define NVGRE_VSID_RES_MAX 0x000fff
142 #define NVGRE_VSID_NVE2NVE 0xffffff /* vendor specific NVE-to-NVE comms */
143
144 struct gre_h_seq {
145 uint32_t gre_seq;
146 } __packed __aligned(4);
147
148 struct gre_h_wccp {
149 uint8_t wccp_flags;
150 uint8_t service_id;
151 uint8_t alt_bucket;
152 uint8_t pri_bucket;
153 } __packed __aligned(4);
154
155 #define GRE_WCCP 0x883e
156
157 #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))
158
159 /*
160 * GRE tunnel metadata
161 */
162
163 #define GRE_KA_NONE 0
164 #define GRE_KA_DOWN 1
165 #define GRE_KA_HOLD 2
166 #define GRE_KA_UP 3
167
168 union gre_addr {
169 struct in_addr in4;
170 struct in6_addr in6;
171 };
172
173 static inline int
174 gre_ip_cmp(int, const union gre_addr *,
175 const union gre_addr *);
176
177 #define GRE_KEY_MIN 0x00000000U
178 #define GRE_KEY_MAX 0xffffffffU
179 #define GRE_KEY_SHIFT 0
180
181 #define GRE_KEY_ENTROPY_MIN 0x00000000U
182 #define GRE_KEY_ENTROPY_MAX 0x00ffffffU
183 #define GRE_KEY_ENTROPY_SHIFT 8
184
185 struct gre_tunnel {
186 uint32_t t_key_mask;
187 #define GRE_KEY_NONE htonl(0x00000000U)
188 #define GRE_KEY_ENTROPY htonl(0xffffff00U)
189 #define GRE_KEY_MASK htonl(0xffffffffU)
190 uint32_t t_key;
191
192 u_int t_rtableid;
193 union gre_addr t_src;
194 #define t_src4 t_src.in4
195 #define t_src6 t_src.in6
196 union gre_addr t_dst;
197 #define t_dst4 t_dst.in4
198 #define t_dst6 t_dst.in6
199 int t_ttl;
200 int t_txhprio;
201 int t_rxhprio;
202 int t_ecn;
203 uint16_t t_df;
204 sa_family_t t_af;
205 };
206
207 static int
208 gre_cmp_src(const struct gre_tunnel *,
209 const struct gre_tunnel *);
210 static int
211 gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);
212
213 static int gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int);
214 static int gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *);
215 static int gre_del_tunnel(struct gre_tunnel *);
216
217 static int gre_set_vnetid(struct gre_tunnel *, struct ifreq *);
218 static int gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
219 static int gre_del_vnetid(struct gre_tunnel *);
220
221 static int gre_set_vnetflowid(struct gre_tunnel *, struct ifreq *);
222 static int gre_get_vnetflowid(struct gre_tunnel *, struct ifreq *);
223
224 static struct mbuf *
225 gre_encap_dst(const struct gre_tunnel *, const union gre_addr *,
226 struct mbuf *, uint16_t, uint8_t, uint8_t);
227 #define gre_encap(_t, _m, _p, _ttl, _tos) \
228 gre_encap_dst((_t), &(_t)->t_dst, (_m), (_p), (_ttl), (_tos))
229
230 static struct mbuf *
231 gre_encap_dst_ip(const struct gre_tunnel *,
232 const union gre_addr *, struct mbuf *, uint8_t, uint8_t);
233 #define gre_encap_ip(_t, _m, _ttl, _tos) \
234 gre_encap_dst_ip((_t), &(_t)->t_dst, (_m), (_ttl), (_tos))
235
236 static int
237 gre_ip_output(const struct gre_tunnel *, struct mbuf *);
238
239 static int gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *,
240 u_long, void *);
241
242 static uint8_t gre_l2_tos(const struct gre_tunnel *, const struct mbuf *);
243 static uint8_t gre_l3_tos(const struct gre_tunnel *,
244 const struct mbuf *, uint8_t);
245
246 /*
247 * layer 3 GRE tunnels
248 */
249
250 struct gre_softc {
251 struct gre_tunnel sc_tunnel; /* must be first */
252 TAILQ_ENTRY(gre_softc) sc_entry;
253
254 struct ifnet sc_if;
255
256 struct timeout sc_ka_send;
257 struct timeout sc_ka_hold;
258
259 unsigned int sc_ka_state;
260 unsigned int sc_ka_timeo;
261 unsigned int sc_ka_count;
262
263 unsigned int sc_ka_holdmax;
264 unsigned int sc_ka_holdcnt;
265
266 SIPHASH_KEY sc_ka_key;
267 uint32_t sc_ka_bias;
268 int sc_ka_recvtm;
269 };
270
271 TAILQ_HEAD(gre_list, gre_softc);
272
273 struct gre_keepalive {
274 uint32_t gk_uptime;
275 uint32_t gk_random;
276 uint8_t gk_digest[SIPHASH_DIGEST_LENGTH];
277 } __packed __aligned(4);
278
279 static int gre_clone_create(struct if_clone *, int);
280 static int gre_clone_destroy(struct ifnet *);
281
282 struct if_clone gre_cloner =
283 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
284
285 /* protected by NET_LOCK */
286 struct gre_list gre_list = TAILQ_HEAD_INITIALIZER(gre_list);
287
288 static int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
289 struct rtentry *);
290 static void gre_start(struct ifnet *);
291 static int gre_ioctl(struct ifnet *, u_long, caddr_t);
292
293 static int gre_up(struct gre_softc *);
294 static int gre_down(struct gre_softc *);
295 static void gre_link_state(struct ifnet *, unsigned int);
296
297 static int gre_input_key(struct mbuf **, int *, int, int, uint8_t,
298 struct gre_tunnel *);
299
300 static struct mbuf *
301 gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *,
302 uint8_t *, uint8_t);
303 #ifdef INET6
304 static struct mbuf *
305 gre_ipv6_patch(const struct gre_tunnel *, struct mbuf *,
306 uint8_t *, uint8_t);
307 #endif
308 #ifdef MPLS
309 static struct mbuf *
310 gre_mpls_patch(const struct gre_tunnel *, struct mbuf *,
311 uint8_t *, uint8_t);
312 #endif
313 static void gre_keepalive_send(void *);
314 static void gre_keepalive_recv(struct ifnet *ifp, struct mbuf *);
315 static void gre_keepalive_hold(void *);
316
317 static struct mbuf *
318 gre_l3_encap_dst(const struct gre_tunnel *, const void *,
319 struct mbuf *m, sa_family_t);
320
321 #define gre_l3_encap(_t, _m, _af) \
322 gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af))
323
324 struct mgre_softc {
325 struct gre_tunnel sc_tunnel; /* must be first */
326 RBT_ENTRY(mgre_softc) sc_entry;
327
328 struct ifnet sc_if;
329 };
330
331 RBT_HEAD(mgre_tree, mgre_softc);
332
333 static inline int
334 mgre_cmp(const struct mgre_softc *, const struct mgre_softc *);
335
336 RBT_PROTOTYPE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
337
338 static int mgre_clone_create(struct if_clone *, int);
339 static int mgre_clone_destroy(struct ifnet *);
340
341 struct if_clone mgre_cloner =
342 IF_CLONE_INITIALIZER("mgre", mgre_clone_create, mgre_clone_destroy);
343
344 static void mgre_rtrequest(struct ifnet *, int, struct rtentry *);
345 static int mgre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
346 struct rtentry *);
347 static void mgre_start(struct ifnet *);
348 static int mgre_ioctl(struct ifnet *, u_long, caddr_t);
349
350 static int mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *);
351 static int mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *);
352 static int mgre_up(struct mgre_softc *);
353 static int mgre_down(struct mgre_softc *);
354
355 /* protected by NET_LOCK */
356 struct mgre_tree mgre_tree = RBT_INITIALIZER();
357
358 /*
359 * Ethernet GRE tunnels
360 */
361
362 static struct mbuf *
363 gre_ether_align(struct mbuf *, int);
364
365 struct egre_softc {
366 struct gre_tunnel sc_tunnel; /* must be first */
367 RBT_ENTRY(egre_softc) sc_entry;
368
369 struct arpcom sc_ac;
370 struct ifmedia sc_media;
371 };
372
373 RBT_HEAD(egre_tree, egre_softc);
374
375 static inline int
376 egre_cmp(const struct egre_softc *, const struct egre_softc *);
377
378 RBT_PROTOTYPE(egre_tree, egre_softc, sc_entry, egre_cmp);
379
380 static int egre_clone_create(struct if_clone *, int);
381 static int egre_clone_destroy(struct ifnet *);
382
383 static void egre_start(struct ifnet *);
384 static int egre_ioctl(struct ifnet *, u_long, caddr_t);
385 static int egre_media_change(struct ifnet *);
386 static void egre_media_status(struct ifnet *, struct ifmediareq *);
387
388 static int egre_up(struct egre_softc *);
389 static int egre_down(struct egre_softc *);
390
391 static int egre_input(const struct gre_tunnel *, struct mbuf *, int,
392 uint8_t);
393 struct if_clone egre_cloner =
394 IF_CLONE_INITIALIZER("egre", egre_clone_create, egre_clone_destroy);
395
396 /* protected by NET_LOCK */
397 struct egre_tree egre_tree = RBT_INITIALIZER();
398
399 /*
400 * Network Virtualisation Using Generic Routing Encapsulation (NVGRE)
401 */
402
403 struct nvgre_softc {
404 struct gre_tunnel sc_tunnel; /* must be first */
405 unsigned int sc_ifp0;
406 RBT_ENTRY(nvgre_softc) sc_uentry;
407 RBT_ENTRY(nvgre_softc) sc_mentry;
408
409 struct arpcom sc_ac;
410 struct ifmedia sc_media;
411
412 struct mbuf_queue sc_send_list;
413 struct task sc_send_task;
414
415 void *sc_inm;
416 struct task sc_ltask;
417 struct task sc_dtask;
418
419 struct etherbridge sc_eb;
420 };
421
422 RBT_HEAD(nvgre_ucast_tree, nvgre_softc);
423 RBT_HEAD(nvgre_mcast_tree, nvgre_softc);
424
425 static inline int
426 nvgre_cmp_ucast(const struct nvgre_softc *,
427 const struct nvgre_softc *);
428 static int
429 nvgre_cmp_mcast(const struct gre_tunnel *,
430 const union gre_addr *, unsigned int,
431 const struct gre_tunnel *, const union gre_addr *,
432 unsigned int);
433 static inline int
434 nvgre_cmp_mcast_sc(const struct nvgre_softc *,
435 const struct nvgre_softc *);
436
437 RBT_PROTOTYPE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
438 RBT_PROTOTYPE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
439
440 static int nvgre_clone_create(struct if_clone *, int);
441 static int nvgre_clone_destroy(struct ifnet *);
442
443 static void nvgre_start(struct ifnet *);
444 static int nvgre_ioctl(struct ifnet *, u_long, caddr_t);
445
446 static int nvgre_up(struct nvgre_softc *);
447 static int nvgre_down(struct nvgre_softc *);
448 static int nvgre_set_parent(struct nvgre_softc *, const char *);
449 static void nvgre_link_change(void *);
450 static void nvgre_detach(void *);
451
452 static int nvgre_input(const struct gre_tunnel *, struct mbuf *, int,
453 uint8_t);
454 static void nvgre_send(void *);
455
456 static int nvgre_add_addr(struct nvgre_softc *, const struct ifbareq *);
457 static int nvgre_del_addr(struct nvgre_softc *, const struct ifbareq *);
458
459 static int nvgre_eb_port_eq(void *, void *, void *);
460 static void *nvgre_eb_port_take(void *, void *);
461 static void nvgre_eb_port_rele(void *, void *);
462 static size_t nvgre_eb_port_ifname(void *, char *, size_t, void *);
463 static void nvgre_eb_port_sa(void *, struct sockaddr_storage *, void *);
464
465 static const struct etherbridge_ops nvgre_etherbridge_ops = {
466 nvgre_eb_port_eq,
467 nvgre_eb_port_take,
468 nvgre_eb_port_rele,
469 nvgre_eb_port_ifname,
470 nvgre_eb_port_sa,
471 };
472
473 struct if_clone nvgre_cloner =
474 IF_CLONE_INITIALIZER("nvgre", nvgre_clone_create, nvgre_clone_destroy);
475
476 struct pool nvgre_endpoint_pool;
477
478 /* protected by NET_LOCK */
479 struct nvgre_ucast_tree nvgre_ucast_tree = RBT_INITIALIZER();
480 struct nvgre_mcast_tree nvgre_mcast_tree = RBT_INITIALIZER();
481
482 /*
483 * MikroTik Ethernet over IP protocol (eoip)
484 */
485
486 struct eoip_softc {
487 struct gre_tunnel sc_tunnel; /* must be first */
488 uint16_t sc_tunnel_id;
489 RBT_ENTRY(eoip_softc) sc_entry;
490
491 struct arpcom sc_ac;
492 struct ifmedia sc_media;
493
494 struct timeout sc_ka_send;
495 struct timeout sc_ka_hold;
496
497 unsigned int sc_ka_state;
498 unsigned int sc_ka_timeo;
499 unsigned int sc_ka_count;
500
501 unsigned int sc_ka_holdmax;
502 unsigned int sc_ka_holdcnt;
503 };
504
505 RBT_HEAD(eoip_tree, eoip_softc);
506
507 static inline int
508 eoip_cmp(const struct eoip_softc *, const struct eoip_softc *);
509
510 RBT_PROTOTYPE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
511
512 static int eoip_clone_create(struct if_clone *, int);
513 static int eoip_clone_destroy(struct ifnet *);
514
515 static void eoip_start(struct ifnet *);
516 static int eoip_ioctl(struct ifnet *, u_long, caddr_t);
517
518 static void eoip_keepalive_send(void *);
519 static void eoip_keepalive_recv(struct eoip_softc *);
520 static void eoip_keepalive_hold(void *);
521
522 static int eoip_up(struct eoip_softc *);
523 static int eoip_down(struct eoip_softc *);
524
525 static struct mbuf *
526 eoip_encap(struct eoip_softc *, struct mbuf *, uint8_t);
527
528 static struct mbuf *
529 eoip_input(struct gre_tunnel *, struct mbuf *,
530 const struct gre_header *, uint8_t, int);
531 struct if_clone eoip_cloner =
532 IF_CLONE_INITIALIZER("eoip", eoip_clone_create, eoip_clone_destroy);
533
534 /* protected by NET_LOCK */
535 struct eoip_tree eoip_tree = RBT_INITIALIZER();
536
537 /*
538 * It is not easy to calculate the right value for a GRE MTU.
539 * We leave this task to the admin and use the same default that
540 * other vendors use.
541 */
542 #define GREMTU 1476
543
544 /*
545 * We can control the acceptance of GRE and MobileIP packets by
546 * altering the sysctl net.inet.gre.allow values
547 * respectively. Zero means drop them, all else is acceptance. We can also
548 * control acceptance of WCCPv1-style GRE packets through the
549 * net.inet.gre.wccp value, but be aware it depends upon normal GRE being
550 * allowed as well.
551 *
552 */
553 int gre_allow = 0; /* [a] */
554 int gre_wccp = 0; /* [a] */
555
556 void
greattach(int n)557 greattach(int n)
558 {
559 if_clone_attach(&gre_cloner);
560 if_clone_attach(&mgre_cloner);
561 if_clone_attach(&egre_cloner);
562 if_clone_attach(&nvgre_cloner);
563 if_clone_attach(&eoip_cloner);
564 }
565
566 static int
gre_clone_create(struct if_clone * ifc,int unit)567 gre_clone_create(struct if_clone *ifc, int unit)
568 {
569 struct gre_softc *sc;
570 struct ifnet *ifp;
571
572 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
573 snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
574 ifc->ifc_name, unit);
575
576 ifp = &sc->sc_if;
577 ifp->if_softc = sc;
578 ifp->if_type = IFT_TUNNEL;
579 ifp->if_hdrlen = GRE_HDRLEN;
580 ifp->if_mtu = GREMTU;
581 ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
582 ifp->if_xflags = IFXF_CLONED;
583 ifp->if_bpf_mtap = p2p_bpf_mtap;
584 ifp->if_input = p2p_input;
585 ifp->if_output = gre_output;
586 ifp->if_start = gre_start;
587 ifp->if_ioctl = gre_ioctl;
588 ifp->if_rtrequest = p2p_rtrequest;
589
590 sc->sc_tunnel.t_ttl = ip_defttl;
591 sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
592 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
593 sc->sc_tunnel.t_df = htons(0);
594 sc->sc_tunnel.t_ecn = ECN_ALLOWED;
595
596 timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc);
597 timeout_set_proc(&sc->sc_ka_hold, gre_keepalive_hold, sc);
598 sc->sc_ka_state = GRE_KA_NONE;
599
600 if_counters_alloc(ifp);
601 if_attach(ifp);
602 if_alloc_sadl(ifp);
603
604 #if NBPFILTER > 0
605 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
606 #endif
607
608 ifp->if_llprio = IFQ_TOS2PRIO(IPTOS_PREC_INTERNETCONTROL);
609
610 NET_LOCK();
611 TAILQ_INSERT_TAIL(&gre_list, sc, sc_entry);
612 NET_UNLOCK();
613
614 return (0);
615 }
616
617 static int
gre_clone_destroy(struct ifnet * ifp)618 gre_clone_destroy(struct ifnet *ifp)
619 {
620 struct gre_softc *sc = ifp->if_softc;
621
622 NET_LOCK();
623 if (ISSET(ifp->if_flags, IFF_RUNNING))
624 gre_down(sc);
625
626 TAILQ_REMOVE(&gre_list, sc, sc_entry);
627 NET_UNLOCK();
628
629 if_detach(ifp);
630
631 free(sc, M_DEVBUF, sizeof(*sc));
632
633 return (0);
634 }
635
636 static int
mgre_clone_create(struct if_clone * ifc,int unit)637 mgre_clone_create(struct if_clone *ifc, int unit)
638 {
639 struct mgre_softc *sc;
640 struct ifnet *ifp;
641
642 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
643 ifp = &sc->sc_if;
644
645 snprintf(ifp->if_xname, sizeof(ifp->if_xname),
646 "%s%d", ifc->ifc_name, unit);
647
648 ifp->if_softc = sc;
649 ifp->if_type = IFT_L3IPVLAN;
650 ifp->if_hdrlen = GRE_HDRLEN;
651 ifp->if_mtu = GREMTU;
652 ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX;
653 ifp->if_xflags = IFXF_CLONED;
654 ifp->if_bpf_mtap = p2p_bpf_mtap;
655 ifp->if_input = p2p_input;
656 ifp->if_rtrequest = mgre_rtrequest;
657 ifp->if_output = mgre_output;
658 ifp->if_start = mgre_start;
659 ifp->if_ioctl = mgre_ioctl;
660
661 sc->sc_tunnel.t_ttl = ip_defttl;
662 sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
663 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
664 sc->sc_tunnel.t_df = htons(0);
665 sc->sc_tunnel.t_ecn = ECN_ALLOWED;
666
667 if_counters_alloc(ifp);
668 if_attach(ifp);
669 if_alloc_sadl(ifp);
670
671 #if NBPFILTER > 0
672 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
673 #endif
674
675 return (0);
676 }
677
678 static int
mgre_clone_destroy(struct ifnet * ifp)679 mgre_clone_destroy(struct ifnet *ifp)
680 {
681 struct mgre_softc *sc = ifp->if_softc;
682
683 NET_LOCK();
684 if (ISSET(ifp->if_flags, IFF_RUNNING))
685 mgre_down(sc);
686 NET_UNLOCK();
687
688 if_detach(ifp);
689
690 free(sc, M_DEVBUF, sizeof(*sc));
691
692 return (0);
693 }
694
695 static int
egre_clone_create(struct if_clone * ifc,int unit)696 egre_clone_create(struct if_clone *ifc, int unit)
697 {
698 struct egre_softc *sc;
699 struct ifnet *ifp;
700
701 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
702 ifp = &sc->sc_ac.ac_if;
703
704 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
705 ifc->ifc_name, unit);
706
707 ifp->if_softc = sc;
708 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
709 ifp->if_ioctl = egre_ioctl;
710 ifp->if_start = egre_start;
711 ifp->if_xflags = IFXF_CLONED;
712 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
713 ether_fakeaddr(ifp);
714
715 sc->sc_tunnel.t_ttl = ip_defttl;
716 sc->sc_tunnel.t_txhprio = 0;
717 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
718 sc->sc_tunnel.t_df = htons(0);
719
720 ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
721 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
722 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
723
724 if_counters_alloc(ifp);
725 if_attach(ifp);
726 ether_ifattach(ifp);
727
728 return (0);
729 }
730
731 static int
egre_clone_destroy(struct ifnet * ifp)732 egre_clone_destroy(struct ifnet *ifp)
733 {
734 struct egre_softc *sc = ifp->if_softc;
735
736 NET_LOCK();
737 if (ISSET(ifp->if_flags, IFF_RUNNING))
738 egre_down(sc);
739 NET_UNLOCK();
740
741 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
742 ether_ifdetach(ifp);
743 if_detach(ifp);
744
745 free(sc, M_DEVBUF, sizeof(*sc));
746
747 return (0);
748 }
749
750 static int
nvgre_clone_create(struct if_clone * ifc,int unit)751 nvgre_clone_create(struct if_clone *ifc, int unit)
752 {
753 struct nvgre_softc *sc;
754 struct ifnet *ifp;
755 struct gre_tunnel *tunnel;
756 int error;
757
758 if (nvgre_endpoint_pool.pr_size == 0) {
759 pool_init(&nvgre_endpoint_pool, sizeof(union gre_addr),
760 0, IPL_SOFTNET, 0, "nvgreep", NULL);
761 }
762
763 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
764 ifp = &sc->sc_ac.ac_if;
765
766 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
767 ifc->ifc_name, unit);
768
769 error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
770 &nvgre_etherbridge_ops, sc);
771 if (error != 0) {
772 free(sc, M_DEVBUF, sizeof(*sc));
773 return (error);
774 }
775
776 ifp->if_softc = sc;
777 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
778 ifp->if_ioctl = nvgre_ioctl;
779 ifp->if_start = nvgre_start;
780 ifp->if_xflags = IFXF_CLONED;
781 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
782 ether_fakeaddr(ifp);
783
784 tunnel = &sc->sc_tunnel;
785 tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL;
786 tunnel->t_txhprio = 0;
787 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
788 tunnel->t_df = htons(IP_DF);
789 tunnel->t_key_mask = GRE_KEY_ENTROPY;
790 tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) <<
791 GRE_KEY_ENTROPY_SHIFT);
792
793 mq_init(&sc->sc_send_list, IFQ_MAXLEN * 2, IPL_SOFTNET);
794 task_set(&sc->sc_send_task, nvgre_send, sc);
795 task_set(&sc->sc_ltask, nvgre_link_change, sc);
796 task_set(&sc->sc_dtask, nvgre_detach, sc);
797
798 ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
799 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
800 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
801
802 if_counters_alloc(ifp);
803 if_attach(ifp);
804 ether_ifattach(ifp);
805
806 return (0);
807 }
808
809 static int
nvgre_clone_destroy(struct ifnet * ifp)810 nvgre_clone_destroy(struct ifnet *ifp)
811 {
812 struct nvgre_softc *sc = ifp->if_softc;
813
814 NET_LOCK();
815 if (ISSET(ifp->if_flags, IFF_RUNNING))
816 nvgre_down(sc);
817 NET_UNLOCK();
818
819 etherbridge_destroy(&sc->sc_eb);
820
821 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
822 ether_ifdetach(ifp);
823 if_detach(ifp);
824
825 free(sc, M_DEVBUF, sizeof(*sc));
826
827 return (0);
828 }
829
830 static int
eoip_clone_create(struct if_clone * ifc,int unit)831 eoip_clone_create(struct if_clone *ifc, int unit)
832 {
833 struct eoip_softc *sc;
834 struct ifnet *ifp;
835
836 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
837 ifp = &sc->sc_ac.ac_if;
838
839 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
840 ifc->ifc_name, unit);
841
842 ifp->if_softc = sc;
843 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
844 ifp->if_ioctl = eoip_ioctl;
845 ifp->if_start = eoip_start;
846 ifp->if_xflags = IFXF_CLONED;
847 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
848 ether_fakeaddr(ifp);
849
850 sc->sc_tunnel.t_ttl = ip_defttl;
851 sc->sc_tunnel.t_txhprio = 0;
852 sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
853 sc->sc_tunnel.t_df = htons(0);
854
855 sc->sc_ka_timeo = 10;
856 sc->sc_ka_count = 10;
857
858 timeout_set(&sc->sc_ka_send, eoip_keepalive_send, sc);
859 timeout_set_proc(&sc->sc_ka_hold, eoip_keepalive_hold, sc);
860 sc->sc_ka_state = GRE_KA_DOWN;
861
862 ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
863 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
864 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
865
866 if_counters_alloc(ifp);
867 if_attach(ifp);
868 ether_ifattach(ifp);
869
870 return (0);
871 }
872
873 static int
eoip_clone_destroy(struct ifnet * ifp)874 eoip_clone_destroy(struct ifnet *ifp)
875 {
876 struct eoip_softc *sc = ifp->if_softc;
877
878 NET_LOCK();
879 if (ISSET(ifp->if_flags, IFF_RUNNING))
880 eoip_down(sc);
881 NET_UNLOCK();
882
883 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
884 ether_ifdetach(ifp);
885 if_detach(ifp);
886
887 free(sc, M_DEVBUF, sizeof(*sc));
888
889 return (0);
890 }
891
892 int
gre_input(struct mbuf ** mp,int * offp,int type,int af)893 gre_input(struct mbuf **mp, int *offp, int type, int af)
894 {
895 struct mbuf *m = *mp;
896 struct gre_tunnel key;
897 struct ip *ip;
898
899 ip = mtod(m, struct ip *);
900
901 /* XXX check if ip_src is sane for nvgre? */
902
903 key.t_af = AF_INET;
904 key.t_src4 = ip->ip_dst;
905 key.t_dst4 = ip->ip_src;
906
907 if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1)
908 return (rip_input(mp, offp, type, af));
909
910 return (IPPROTO_DONE);
911 }
912
913 #ifdef INET6
914 int
gre_input6(struct mbuf ** mp,int * offp,int type,int af)915 gre_input6(struct mbuf **mp, int *offp, int type, int af)
916 {
917 struct mbuf *m = *mp;
918 struct gre_tunnel key;
919 struct ip6_hdr *ip6;
920 uint32_t flow;
921
922 ip6 = mtod(m, struct ip6_hdr *);
923
924 /* XXX check if ip6_src is sane for nvgre? */
925
926 key.t_af = AF_INET6;
927 key.t_src6 = ip6->ip6_dst;
928 key.t_dst6 = ip6->ip6_src;
929
930 flow = bemtoh32(&ip6->ip6_flow);
931
932 if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1)
933 return (rip6_input(mp, offp, type, af));
934
935 return (IPPROTO_DONE);
936 }
937 #endif /* INET6 */
938
939 static inline struct ifnet *
gre_find(const struct gre_tunnel * key)940 gre_find(const struct gre_tunnel *key)
941 {
942 struct gre_softc *sc;
943
944 TAILQ_FOREACH(sc, &gre_list, sc_entry) {
945 if (gre_cmp(key, &sc->sc_tunnel) != 0)
946 continue;
947
948 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
949 continue;
950
951 return (&sc->sc_if);
952 }
953
954 return (NULL);
955 }
956
957 static inline struct ifnet *
mgre_find(const struct gre_tunnel * key)958 mgre_find(const struct gre_tunnel *key)
959 {
960 struct mgre_softc *sc;
961
962 NET_ASSERT_LOCKED();
963 sc = RBT_FIND(mgre_tree, &mgre_tree, (const struct mgre_softc *)key);
964 if (sc != NULL)
965 return (&sc->sc_if);
966
967 return (NULL);
968 }
969
970 static struct mbuf *
gre_input_1(struct gre_tunnel * key,struct mbuf * m,const struct gre_header * gh,uint8_t otos,int iphlen)971 gre_input_1(struct gre_tunnel *key, struct mbuf *m,
972 const struct gre_header *gh, uint8_t otos, int iphlen)
973 {
974 switch (gh->gre_proto) {
975 case htons(ETHERTYPE_PPP):
976 #ifdef PIPEX
977 if (pipex_enable) {
978 struct pipex_session *session;
979
980 session = pipex_pptp_lookup_session(m);
981 if (session != NULL) {
982 struct mbuf *m0;
983
984 m0 = pipex_pptp_input(m, session);
985 pipex_rele_session(session);
986
987 if (m0 == NULL)
988 return (NULL);
989 }
990 }
991 #endif
992 break;
993 case htons(GRE_EOIP):
994 return (eoip_input(key, m, gh, otos, iphlen));
995 break;
996 }
997
998 return (m);
999 }
1000
1001 static int
gre_input_key(struct mbuf ** mp,int * offp,int type,int af,uint8_t otos,struct gre_tunnel * key)1002 gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos,
1003 struct gre_tunnel *key)
1004 {
1005 struct mbuf *m = *mp;
1006 int iphlen = *offp, hlen, rxprio;
1007 struct ifnet *ifp;
1008 const struct gre_tunnel *tunnel;
1009 caddr_t buf;
1010 struct gre_header *gh;
1011 struct gre_h_key *gkh;
1012 struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *,
1013 uint8_t *, uint8_t);
1014 int mcast = 0;
1015 uint8_t itos;
1016
1017 if (!atomic_load_int(&gre_allow))
1018 goto decline;
1019
1020 key->t_rtableid = m->m_pkthdr.ph_rtableid;
1021
1022 hlen = iphlen + sizeof(*gh);
1023 if (m->m_pkthdr.len < hlen)
1024 goto decline;
1025
1026 m = m_pullup(m, hlen);
1027 if (m == NULL)
1028 return (IPPROTO_DONE);
1029
1030 buf = mtod(m, caddr_t);
1031 gh = (struct gre_header *)(buf + iphlen);
1032
1033 /* check the version */
1034 switch (gh->gre_flags & htons(GRE_VERS_MASK)) {
1035 case htons(GRE_VERS_0):
1036 break;
1037
1038 case htons(GRE_VERS_1):
1039 m = gre_input_1(key, m, gh, otos, iphlen);
1040 if (m == NULL)
1041 return (IPPROTO_DONE);
1042 /* FALLTHROUGH */
1043 default:
1044 goto decline;
1045 }
1046
1047 /* the only optional bit in the header is K flag */
1048 if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
1049 goto decline;
1050
1051 if (gh->gre_flags & htons(GRE_KP)) {
1052 hlen += sizeof(*gkh);
1053 if (m->m_pkthdr.len < hlen)
1054 goto decline;
1055
1056 m = m_pullup(m, hlen);
1057 if (m == NULL)
1058 return (IPPROTO_DONE);
1059
1060 buf = mtod(m, caddr_t);
1061 gh = (struct gre_header *)(buf + iphlen);
1062 gkh = (struct gre_h_key *)(gh + 1);
1063
1064 key->t_key_mask = GRE_KEY_MASK;
1065 key->t_key = gkh->gre_key;
1066 } else
1067 key->t_key_mask = GRE_KEY_NONE;
1068
1069 if (gh->gre_proto == htons(ETHERTYPE_TRANSETHER)) {
1070 if (egre_input(key, m, hlen, otos) == -1 &&
1071 nvgre_input(key, m, hlen, otos) == -1)
1072 goto decline;
1073
1074 return (IPPROTO_DONE);
1075 }
1076
1077 ifp = gre_find(key);
1078 if (ifp == NULL) {
1079 ifp = mgre_find(key);
1080 if (ifp == NULL)
1081 goto decline;
1082 }
1083
1084 switch (gh->gre_proto) {
1085 case htons(GRE_WCCP): {
1086 struct mbuf *n;
1087 int off;
1088
1089 /* WCCP/GRE:
1090 * So far as I can see (and test) it seems that Cisco's WCCP
1091 * GRE tunnel is precisely a IP-in-GRE tunnel that differs
1092 * only in its protocol number. At least, it works for me.
1093 *
1094 * The Internet Drafts can be found if you look for
1095 * the following:
1096 * draft-forster-wrec-wccp-v1-00.txt
1097 * draft-wilson-wrec-wccp-v2-01.txt
1098 */
1099
1100 if (!atomic_load_int(&gre_wccp) &&
1101 !ISSET(ifp->if_flags, IFF_LINK0))
1102 goto decline;
1103
1104 /*
1105 * If the first nibble of the payload does not look like
1106 * IPv4, assume it is WCCP v2.
1107 */
1108 n = m_getptr(m, hlen, &off);
1109 if (n == NULL)
1110 goto decline;
1111 if (n->m_data[off] >> 4 != IPVERSION)
1112 hlen += 4; /* four-octet Redirect header */
1113
1114 /* FALLTHROUGH */
1115 }
1116 case htons(ETHERTYPE_IP):
1117 m->m_pkthdr.ph_family = AF_INET;
1118 patch = gre_ipv4_patch;
1119 break;
1120 #ifdef INET6
1121 case htons(ETHERTYPE_IPV6):
1122 m->m_pkthdr.ph_family = AF_INET6;
1123 patch = gre_ipv6_patch;
1124 break;
1125 #endif
1126 #ifdef MPLS
1127 case htons(ETHERTYPE_MPLS_MCAST):
1128 mcast = M_MCAST|M_BCAST;
1129 /* fallthrough */
1130 case htons(ETHERTYPE_MPLS):
1131 m->m_pkthdr.ph_family = AF_MPLS;
1132 patch = gre_mpls_patch;
1133 break;
1134 #endif
1135 case htons(0):
1136 if (ifp->if_type != IFT_TUNNEL) {
1137 /* keepalives dont make sense for mgre */
1138 goto decline;
1139 }
1140
1141 m_adj(m, hlen);
1142 gre_keepalive_recv(ifp, m);
1143 return (IPPROTO_DONE);
1144
1145 default:
1146 goto decline;
1147 }
1148
1149 /* it's ours now */
1150
1151 m_adj(m, hlen);
1152
1153 tunnel = ifp->if_softc; /* gre and mgre tunnel info is at the front */
1154
1155 m = (*patch)(tunnel, m, &itos, otos);
1156 if (m == NULL)
1157 return (IPPROTO_DONE);
1158
1159 if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
1160 SET(m->m_pkthdr.csum_flags, M_FLOWID);
1161 m->m_pkthdr.ph_flowid =
1162 bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1163 }
1164
1165 rxprio = tunnel->t_rxhprio;
1166 switch (rxprio) {
1167 case IF_HDRPRIO_PACKET:
1168 /* nop */
1169 break;
1170 case IF_HDRPRIO_OUTER:
1171 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
1172 break;
1173 case IF_HDRPRIO_PAYLOAD:
1174 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
1175 break;
1176 default:
1177 m->m_pkthdr.pf.prio = rxprio;
1178 break;
1179 }
1180
1181 m->m_flags &= ~(M_MCAST|M_BCAST);
1182 m->m_flags |= mcast;
1183
1184 if_vinput(ifp, m);
1185 return (IPPROTO_DONE);
1186 decline:
1187 *mp = m;
1188 return (-1);
1189 }
1190
1191 static struct mbuf *
gre_ipv4_patch(const struct gre_tunnel * tunnel,struct mbuf * m,uint8_t * itosp,uint8_t otos)1192 gre_ipv4_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1193 uint8_t *itosp, uint8_t otos)
1194 {
1195 struct ip *ip;
1196 uint8_t itos;
1197
1198 m = m_pullup(m, sizeof(*ip));
1199 if (m == NULL)
1200 return (NULL);
1201
1202 ip = mtod(m, struct ip *);
1203
1204 itos = ip->ip_tos;
1205 if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1206 m_freem(m);
1207 return (NULL);
1208 }
1209 if (itos != ip->ip_tos)
1210 ip_tos_patch(ip, itos);
1211
1212 *itosp = itos;
1213
1214 return (m);
1215 }
1216
1217 #ifdef INET6
1218 static struct mbuf *
gre_ipv6_patch(const struct gre_tunnel * tunnel,struct mbuf * m,uint8_t * itosp,uint8_t otos)1219 gre_ipv6_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1220 uint8_t *itosp, uint8_t otos)
1221 {
1222 struct ip6_hdr *ip6;
1223 uint32_t flow;
1224 uint8_t itos;
1225
1226 m = m_pullup(m, sizeof(*ip6));
1227 if (m == NULL)
1228 return (NULL);
1229
1230 ip6 = mtod(m, struct ip6_hdr *);
1231
1232 flow = bemtoh32(&ip6->ip6_flow);
1233 itos = flow >> 20;
1234 if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1235 m_freem(m);
1236 return (NULL);
1237 }
1238
1239 CLR(flow, 0xff << 20);
1240 SET(flow, itos << 20);
1241 htobem32(&ip6->ip6_flow, flow);
1242
1243 *itosp = itos;
1244
1245 return (m);
1246 }
1247 #endif
1248
1249 #ifdef MPLS
1250 static struct mbuf *
gre_mpls_patch(const struct gre_tunnel * tunnel,struct mbuf * m,uint8_t * itosp,uint8_t otos)1251 gre_mpls_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1252 uint8_t *itosp, uint8_t otos)
1253 {
1254 uint8_t itos;
1255 uint32_t shim;
1256
1257 m = m_pullup(m, sizeof(shim));
1258 if (m == NULL)
1259 return (NULL);
1260
1261 shim = *mtod(m, uint32_t *);
1262 itos = (ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5;
1263
1264 if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1265 m_freem(m);
1266 return (NULL);
1267 }
1268
1269 *itosp = itos;
1270
1271 return (m);
1272 }
1273 #endif
1274
1275 #define gre_l2_prio(_t, _m, _otos) do { \
1276 int rxprio = (_t)->t_rxhprio; \
1277 switch (rxprio) { \
1278 case IF_HDRPRIO_PACKET: \
1279 /* nop */ \
1280 break; \
1281 case IF_HDRPRIO_OUTER: \
1282 (_m)->m_pkthdr.pf.prio = IFQ_TOS2PRIO((_otos)); \
1283 break; \
1284 default: \
1285 (_m)->m_pkthdr.pf.prio = rxprio; \
1286 break; \
1287 } \
1288 } while (0)
1289
1290 static int
egre_input(const struct gre_tunnel * key,struct mbuf * m,int hlen,uint8_t otos)1291 egre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, uint8_t otos)
1292 {
1293 struct egre_softc *sc;
1294
1295 NET_ASSERT_LOCKED();
1296 sc = RBT_FIND(egre_tree, &egre_tree, (const struct egre_softc *)key);
1297 if (sc == NULL)
1298 return (-1);
1299
1300 /* it's ours now */
1301 m = gre_ether_align(m, hlen);
1302 if (m == NULL)
1303 return (0);
1304
1305 if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) {
1306 SET(m->m_pkthdr.csum_flags, M_FLOWID);
1307 m->m_pkthdr.ph_flowid =
1308 bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1309 }
1310
1311 m->m_flags &= ~(M_MCAST|M_BCAST);
1312
1313 gre_l2_prio(&sc->sc_tunnel, m, otos);
1314
1315 if_vinput(&sc->sc_ac.ac_if, m);
1316
1317 return (0);
1318 }
1319
1320 static inline struct nvgre_softc *
nvgre_mcast_find(const struct gre_tunnel * key,unsigned int if0idx)1321 nvgre_mcast_find(const struct gre_tunnel *key, unsigned int if0idx)
1322 {
1323 struct nvgre_softc *sc;
1324 int rv;
1325
1326 /*
1327 * building an nvgre_softc to use with RBT_FIND is expensive, and
1328 * would need to swap the src and dst addresses in the key. so do the
1329 * find by hand.
1330 */
1331
1332 NET_ASSERT_LOCKED();
1333 sc = RBT_ROOT(nvgre_mcast_tree, &nvgre_mcast_tree);
1334 while (sc != NULL) {
1335 rv = nvgre_cmp_mcast(key, &key->t_src, if0idx,
1336 &sc->sc_tunnel, &sc->sc_tunnel.t_dst, sc->sc_ifp0);
1337 if (rv == 0)
1338 return (sc);
1339 if (rv < 0)
1340 sc = RBT_LEFT(nvgre_mcast_tree, sc);
1341 else
1342 sc = RBT_RIGHT(nvgre_mcast_tree, sc);
1343 }
1344
1345 return (NULL);
1346 }
1347
1348 static inline struct nvgre_softc *
nvgre_ucast_find(const struct gre_tunnel * key)1349 nvgre_ucast_find(const struct gre_tunnel *key)
1350 {
1351 NET_ASSERT_LOCKED();
1352 return (RBT_FIND(nvgre_ucast_tree, &nvgre_ucast_tree,
1353 (struct nvgre_softc *)key));
1354 }
1355
1356 static int
nvgre_input(const struct gre_tunnel * key,struct mbuf * m,int hlen,uint8_t otos)1357 nvgre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen,
1358 uint8_t otos)
1359 {
1360 struct nvgre_softc *sc;
1361 struct ether_header *eh;
1362
1363 if (ISSET(m->m_flags, M_MCAST|M_BCAST))
1364 sc = nvgre_mcast_find(key, m->m_pkthdr.ph_ifidx);
1365 else
1366 sc = nvgre_ucast_find(key);
1367
1368 if (sc == NULL)
1369 return (-1);
1370
1371 /* it's ours now */
1372 m = gre_ether_align(m, hlen);
1373 if (m == NULL)
1374 return (0);
1375
1376 eh = mtod(m, struct ether_header *);
1377 etherbridge_map_ea(&sc->sc_eb, (void *)&key->t_dst,
1378 (struct ether_addr *)eh->ether_shost);
1379
1380 SET(m->m_pkthdr.csum_flags, M_FLOWID);
1381 m->m_pkthdr.ph_flowid = bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1382
1383 m->m_flags &= ~(M_MCAST|M_BCAST);
1384
1385 gre_l2_prio(&sc->sc_tunnel, m, otos);
1386
1387 if_vinput(&sc->sc_ac.ac_if, m);
1388
1389 return (0);
1390 }
1391
1392 static struct mbuf *
gre_ether_align(struct mbuf * m,int hlen)1393 gre_ether_align(struct mbuf *m, int hlen)
1394 {
1395 struct mbuf *n;
1396 int off;
1397
1398 m_adj(m, hlen);
1399
1400 if (m->m_pkthdr.len < sizeof(struct ether_header)) {
1401 m_freem(m);
1402 return (NULL);
1403 }
1404
1405 m = m_pullup(m, sizeof(struct ether_header));
1406 if (m == NULL)
1407 return (NULL);
1408
1409 n = m_getptr(m, sizeof(struct ether_header), &off);
1410 if (n == NULL) {
1411 m_freem(m);
1412 return (NULL);
1413 }
1414
1415 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
1416 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
1417 m_freem(m);
1418 if (n == NULL)
1419 return (NULL);
1420 m = n;
1421 }
1422
1423 return (m);
1424 }
1425
1426 static void
gre_keepalive_recv(struct ifnet * ifp,struct mbuf * m)1427 gre_keepalive_recv(struct ifnet *ifp, struct mbuf *m)
1428 {
1429 struct gre_softc *sc = ifp->if_softc;
1430 struct gre_keepalive *gk;
1431 SIPHASH_CTX ctx;
1432 uint8_t digest[SIPHASH_DIGEST_LENGTH];
1433 int uptime, delta;
1434 int tick = ticks;
1435
1436 if (sc->sc_ka_state == GRE_KA_NONE ||
1437 sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
1438 goto drop;
1439
1440 if (m->m_pkthdr.len < sizeof(*gk))
1441 goto drop;
1442 m = m_pullup(m, sizeof(*gk));
1443 if (m == NULL)
1444 return;
1445
1446 gk = mtod(m, struct gre_keepalive *);
1447 uptime = bemtoh32(&gk->gk_uptime) - sc->sc_ka_bias;
1448 delta = tick - uptime;
1449 if (delta < 0)
1450 goto drop;
1451 if (delta > hz * 10) /* magic */
1452 goto drop;
1453
1454 /* avoid too much siphash work */
1455 delta = tick - sc->sc_ka_recvtm;
1456 if (delta > 0 && delta < (hz / 10))
1457 goto drop;
1458
1459 SipHash24_Init(&ctx, &sc->sc_ka_key);
1460 SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
1461 SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
1462 SipHash24_Final(digest, &ctx);
1463
1464 if (memcmp(digest, gk->gk_digest, sizeof(digest)) != 0)
1465 goto drop;
1466
1467 sc->sc_ka_recvtm = tick;
1468
1469 switch (sc->sc_ka_state) {
1470 case GRE_KA_DOWN:
1471 sc->sc_ka_state = GRE_KA_HOLD;
1472 sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
1473 sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
1474 16 * sc->sc_ka_count);
1475 break;
1476 case GRE_KA_HOLD:
1477 if (--sc->sc_ka_holdcnt > 0)
1478 break;
1479
1480 sc->sc_ka_state = GRE_KA_UP;
1481 gre_link_state(&sc->sc_if, sc->sc_ka_state);
1482 break;
1483
1484 case GRE_KA_UP:
1485 sc->sc_ka_holdmax--;
1486 sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
1487 break;
1488 }
1489
1490 timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
1491
1492 drop:
1493 m_freem(m);
1494 }
1495
1496 static int
gre_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * dst,struct rtentry * rt)1497 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1498 struct rtentry *rt)
1499 {
1500 struct m_tag *mtag;
1501 int error = 0;
1502
1503 if (!atomic_load_int(&gre_allow)) {
1504 error = EACCES;
1505 goto drop;
1506 }
1507
1508 if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1509 error = ENETDOWN;
1510 goto drop;
1511 }
1512
1513 switch (dst->sa_family) {
1514 case AF_INET:
1515 #ifdef INET6
1516 case AF_INET6:
1517 #endif
1518 #ifdef MPLS
1519 case AF_MPLS:
1520 #endif
1521 break;
1522 default:
1523 error = EAFNOSUPPORT;
1524 goto drop;
1525 }
1526
1527 /* Try to limit infinite recursion through misconfiguration. */
1528 for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1529 mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1530 if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1531 sizeof(ifp->if_index)) == 0) {
1532 m_freem(m);
1533 error = EIO;
1534 goto end;
1535 }
1536 }
1537
1538 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1539 if (mtag == NULL) {
1540 m_freem(m);
1541 error = ENOBUFS;
1542 goto end;
1543 }
1544 memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1545 m_tag_prepend(m, mtag);
1546
1547 m->m_pkthdr.ph_family = dst->sa_family;
1548
1549 error = if_enqueue(ifp, m);
1550 end:
1551 if (error)
1552 ifp->if_oerrors++;
1553 return (error);
1554
1555 drop:
1556 m_freem(m);
1557 return (error);
1558 }
1559
1560 void
gre_start(struct ifnet * ifp)1561 gre_start(struct ifnet *ifp)
1562 {
1563 struct gre_softc *sc = ifp->if_softc;
1564 struct mbuf *m;
1565 int af;
1566 #if NBPFILTER > 0
1567 caddr_t if_bpf;
1568 #endif
1569
1570 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1571 af = m->m_pkthdr.ph_family;
1572
1573 #if NBPFILTER > 0
1574 if_bpf = ifp->if_bpf;
1575 if (if_bpf)
1576 bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
1577 #endif
1578
1579 m = gre_l3_encap(&sc->sc_tunnel, m, af);
1580 if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1581 ifp->if_oerrors++;
1582 continue;
1583 }
1584 }
1585 }
1586
1587 void
mgre_rtrequest(struct ifnet * ifp,int req,struct rtentry * rt)1588 mgre_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1589 {
1590 struct ifnet *lo0ifp;
1591 struct ifaddr *ifa, *lo0ifa;
1592
1593 switch (req) {
1594 case RTM_ADD:
1595 if (!ISSET(rt->rt_flags, RTF_LOCAL))
1596 break;
1597
1598 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1599 if (memcmp(rt_key(rt), ifa->ifa_addr,
1600 rt_key(rt)->sa_len) == 0)
1601 break;
1602 }
1603
1604 if (ifa == NULL)
1605 break;
1606
1607 KASSERT(ifa == rt->rt_ifa);
1608
1609 lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1610 KASSERT(lo0ifp != NULL);
1611 TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1612 if (lo0ifa->ifa_addr->sa_family ==
1613 ifa->ifa_addr->sa_family)
1614 break;
1615 }
1616 if_put(lo0ifp);
1617
1618 if (lo0ifa == NULL)
1619 break;
1620
1621 rt->rt_flags &= ~RTF_LLINFO;
1622 break;
1623 case RTM_DELETE:
1624 case RTM_RESOLVE:
1625 default:
1626 break;
1627 }
1628 }
1629
1630 static int
mgre_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * dest,struct rtentry * rt0)1631 mgre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dest,
1632 struct rtentry *rt0)
1633 {
1634 struct mgre_softc *sc = ifp->if_softc;
1635 struct sockaddr *gate;
1636 struct rtentry *rt;
1637 struct m_tag *mtag;
1638 int error = 0;
1639 sa_family_t af;
1640 const void *addr;
1641
1642 if (!atomic_load_int(&gre_allow)) {
1643 error = EACCES;
1644 goto drop;
1645 }
1646
1647 if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1648 error = ENETDOWN;
1649 goto drop;
1650 }
1651
1652 switch (dest->sa_family) {
1653 case AF_INET:
1654 #ifdef INET6
1655 case AF_INET6:
1656 #endif
1657 #ifdef MPLS
1658 case AF_MPLS:
1659 #endif
1660 break;
1661 default:
1662 error = EAFNOSUPPORT;
1663 goto drop;
1664 }
1665
1666 if (ISSET(m->m_flags, M_MCAST|M_BCAST)) {
1667 error = ENETUNREACH;
1668 goto drop;
1669 }
1670
1671 rt = rt_getll(rt0);
1672
1673 /* check rt_expire? */
1674 if (ISSET(rt->rt_flags, RTF_REJECT)) {
1675 error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
1676 goto drop;
1677 }
1678 if (!ISSET(rt->rt_flags, RTF_HOST)) {
1679 error = EHOSTUNREACH;
1680 goto drop;
1681 }
1682 if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1683 error = EINVAL;
1684 goto drop;
1685 }
1686
1687 gate = rt->rt_gateway;
1688 af = gate->sa_family;
1689 if (af != sc->sc_tunnel.t_af) {
1690 error = EAGAIN;
1691 goto drop;
1692 }
1693
1694 /* Try to limit infinite recursion through misconfiguration. */
1695 for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1696 mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1697 if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1698 sizeof(ifp->if_index)) == 0) {
1699 error = EIO;
1700 goto drop;
1701 }
1702 }
1703
1704 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1705 if (mtag == NULL) {
1706 error = ENOBUFS;
1707 goto drop;
1708 }
1709 memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1710 m_tag_prepend(m, mtag);
1711
1712 switch (af) {
1713 case AF_INET: {
1714 struct sockaddr_in *sin = (struct sockaddr_in *)gate;
1715 addr = &sin->sin_addr;
1716 break;
1717 }
1718 #ifdef INET6
1719 case AF_INET6: {
1720 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)gate;
1721 addr = &sin6->sin6_addr;
1722 break;
1723 }
1724 #endif
1725 default:
1726 unhandled_af(af);
1727 /* NOTREACHED */
1728 }
1729
1730 m = gre_l3_encap_dst(&sc->sc_tunnel, addr, m, dest->sa_family);
1731 if (m == NULL) {
1732 ifp->if_oerrors++;
1733 return (ENOBUFS);
1734 }
1735
1736 m->m_pkthdr.ph_family = dest->sa_family;
1737
1738 error = if_enqueue(ifp, m);
1739 if (error)
1740 ifp->if_oerrors++;
1741 return (error);
1742
1743 drop:
1744 m_freem(m);
1745 return (error);
1746 }
1747
1748 static void
mgre_start(struct ifnet * ifp)1749 mgre_start(struct ifnet *ifp)
1750 {
1751 struct mgre_softc *sc = ifp->if_softc;
1752 struct mbuf *m;
1753 #if NBPFILTER > 0
1754 caddr_t if_bpf;
1755 #endif
1756
1757 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1758 #if NBPFILTER > 0
1759 if_bpf = ifp->if_bpf;
1760 if (if_bpf) {
1761 struct m_hdr mh;
1762 struct mbuf *n;
1763 int off;
1764
1765 n = m_getptr(m, ifp->if_hdrlen, &off);
1766 KASSERT(n != NULL);
1767
1768 mh.mh_flags = 0;
1769 mh.mh_next = n->m_next;
1770 mh.mh_len = n->m_len - off;
1771 mh.mh_data = n->m_data + off;
1772
1773 bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family,
1774 (struct mbuf *)&mh, BPF_DIRECTION_OUT);
1775 }
1776 #endif
1777
1778 if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
1779 ifp->if_oerrors++;
1780 continue;
1781 }
1782 }
1783 }
1784
1785 static void
egre_start(struct ifnet * ifp)1786 egre_start(struct ifnet *ifp)
1787 {
1788 struct egre_softc *sc = ifp->if_softc;
1789 struct mbuf *m0, *m;
1790 #if NBPFILTER > 0
1791 caddr_t if_bpf;
1792 #endif
1793
1794 if (!atomic_load_int(&gre_allow)) {
1795 ifq_purge(&ifp->if_snd);
1796 return;
1797 }
1798
1799 while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
1800 #if NBPFILTER > 0
1801 if_bpf = ifp->if_bpf;
1802 if (if_bpf)
1803 bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
1804 #endif
1805
1806 /* force prepend mbuf because of alignment problems */
1807 m = m_get(M_DONTWAIT, m0->m_type);
1808 if (m == NULL) {
1809 m_freem(m0);
1810 continue;
1811 }
1812
1813 M_MOVE_PKTHDR(m, m0);
1814 m->m_next = m0;
1815
1816 m_align(m, 0);
1817 m->m_len = 0;
1818
1819 m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER),
1820 sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m));
1821 if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1822 ifp->if_oerrors++;
1823 continue;
1824 }
1825 }
1826 }
1827
1828 static struct mbuf *
gre_l3_encap_dst(const struct gre_tunnel * tunnel,const void * dst,struct mbuf * m,sa_family_t af)1829 gre_l3_encap_dst(const struct gre_tunnel *tunnel, const void *dst,
1830 struct mbuf *m, sa_family_t af)
1831 {
1832 uint16_t proto;
1833 uint8_t ttl, itos, otos;
1834 int tttl = tunnel->t_ttl;
1835 int ttloff;
1836
1837 switch (af) {
1838 case AF_INET: {
1839 struct ip *ip;
1840
1841 m = m_pullup(m, sizeof(*ip));
1842 if (m == NULL)
1843 return (NULL);
1844
1845 ip = mtod(m, struct ip *);
1846 itos = ip->ip_tos;
1847
1848 ttloff = offsetof(struct ip, ip_ttl);
1849 proto = htons(ETHERTYPE_IP);
1850 break;
1851 }
1852 #ifdef INET6
1853 case AF_INET6: {
1854 struct ip6_hdr *ip6;
1855
1856 m = m_pullup(m, sizeof(*ip6));
1857 if (m == NULL)
1858 return (NULL);
1859
1860 ip6 = mtod(m, struct ip6_hdr *);
1861 itos = (ntohl(ip6->ip6_flow) & 0x0ff00000) >> 20;
1862
1863 ttloff = offsetof(struct ip6_hdr, ip6_hlim);
1864 proto = htons(ETHERTYPE_IPV6);
1865 break;
1866 }
1867 #endif
1868 #ifdef MPLS
1869 case AF_MPLS: {
1870 uint32_t shim;
1871
1872 m = m_pullup(m, sizeof(shim));
1873 if (m == NULL)
1874 return (NULL);
1875
1876 shim = bemtoh32(mtod(m, uint32_t *)) & MPLS_EXP_MASK;
1877 itos = (shim >> MPLS_EXP_OFFSET) << 5;
1878
1879 ttloff = 3;
1880
1881 if (m->m_flags & (M_BCAST | M_MCAST))
1882 proto = htons(ETHERTYPE_MPLS_MCAST);
1883 else
1884 proto = htons(ETHERTYPE_MPLS);
1885 break;
1886 }
1887 #endif
1888 default:
1889 unhandled_af(af);
1890 }
1891
1892 if (tttl == -1) {
1893 KASSERT(m->m_len > ttloff); /* m_pullup has happened */
1894
1895 ttl = *(m->m_data + ttloff);
1896 } else
1897 ttl = tttl;
1898
1899 itos = gre_l3_tos(tunnel, m, itos);
1900 ip_ecn_ingress(tunnel->t_ecn, &otos, &itos);
1901
1902 return (gre_encap_dst(tunnel, dst, m, proto, ttl, otos));
1903 }
1904
1905 static struct mbuf *
gre_encap_dst(const struct gre_tunnel * tunnel,const union gre_addr * dst,struct mbuf * m,uint16_t proto,uint8_t ttl,uint8_t tos)1906 gre_encap_dst(const struct gre_tunnel *tunnel, const union gre_addr *dst,
1907 struct mbuf *m, uint16_t proto, uint8_t ttl, uint8_t tos)
1908 {
1909 struct gre_header *gh;
1910 struct gre_h_key *gkh;
1911 int hlen;
1912
1913 hlen = sizeof(*gh);
1914 if (tunnel->t_key_mask != GRE_KEY_NONE)
1915 hlen += sizeof(*gkh);
1916
1917 m = m_prepend(m, hlen, M_DONTWAIT);
1918 if (m == NULL)
1919 return (NULL);
1920
1921 gh = mtod(m, struct gre_header *);
1922 gh->gre_flags = GRE_VERS_0;
1923 gh->gre_proto = proto;
1924 if (tunnel->t_key_mask != GRE_KEY_NONE) {
1925 gh->gre_flags |= htons(GRE_KP);
1926
1927 gkh = (struct gre_h_key *)(gh + 1);
1928 gkh->gre_key = tunnel->t_key;
1929
1930 if (tunnel->t_key_mask == GRE_KEY_ENTROPY &&
1931 ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) {
1932 gkh->gre_key |= htonl(~GRE_KEY_ENTROPY &
1933 m->m_pkthdr.ph_flowid);
1934 }
1935 }
1936
1937 return (gre_encap_dst_ip(tunnel, dst, m, ttl, tos));
1938 }
1939
1940 static struct mbuf *
gre_encap_dst_ip(const struct gre_tunnel * tunnel,const union gre_addr * dst,struct mbuf * m,uint8_t ttl,uint8_t tos)1941 gre_encap_dst_ip(const struct gre_tunnel *tunnel, const union gre_addr *dst,
1942 struct mbuf *m, uint8_t ttl, uint8_t tos)
1943 {
1944 switch (tunnel->t_af) {
1945 case AF_UNSPEC:
1946 /* packets may arrive before tunnel is set up */
1947 m_freem(m);
1948 return (NULL);
1949 case AF_INET: {
1950 struct ip *ip;
1951
1952 m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
1953 if (m == NULL)
1954 return (NULL);
1955
1956 ip = mtod(m, struct ip *);
1957 ip->ip_v = IPVERSION;
1958 ip->ip_hl = sizeof(*ip) >> 2;
1959 ip->ip_off = tunnel->t_df;
1960 ip->ip_tos = tos;
1961 ip->ip_len = htons(m->m_pkthdr.len);
1962 ip->ip_ttl = ttl;
1963 ip->ip_p = IPPROTO_GRE;
1964 ip->ip_src = tunnel->t_src4;
1965 ip->ip_dst = dst->in4;
1966 break;
1967 }
1968 #ifdef INET6
1969 case AF_INET6: {
1970 struct ip6_hdr *ip6;
1971 int len = m->m_pkthdr.len;
1972
1973 m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
1974 if (m == NULL)
1975 return (NULL);
1976
1977 ip6 = mtod(m, struct ip6_hdr *);
1978 ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ?
1979 htonl(m->m_pkthdr.ph_flowid) : 0;
1980 ip6->ip6_vfc |= IPV6_VERSION;
1981 ip6->ip6_flow |= htonl((uint32_t)tos << 20);
1982 ip6->ip6_plen = htons(len);
1983 ip6->ip6_nxt = IPPROTO_GRE;
1984 ip6->ip6_hlim = ttl;
1985 ip6->ip6_src = tunnel->t_src6;
1986 ip6->ip6_dst = dst->in6;
1987
1988 if (tunnel->t_df)
1989 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
1990
1991 break;
1992 }
1993 #endif /* INET6 */
1994 default:
1995 unhandled_af(tunnel->t_af);
1996 }
1997
1998 return (m);
1999 }
2000
2001 static int
gre_ip_output(const struct gre_tunnel * tunnel,struct mbuf * m)2002 gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m)
2003 {
2004 m->m_flags &= ~(M_BCAST|M_MCAST);
2005 m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
2006
2007 #if NPF > 0
2008 pf_pkt_addr_changed(m);
2009 #endif
2010
2011 switch (tunnel->t_af) {
2012 case AF_INET:
2013 ip_send(m);
2014 break;
2015 #ifdef INET6
2016 case AF_INET6:
2017 ip6_send(m);
2018 break;
2019 #endif
2020 default:
2021 unhandled_af(tunnel->t_af);
2022 }
2023
2024 return (0);
2025 }
2026
2027 static int
gre_tunnel_ioctl(struct ifnet * ifp,struct gre_tunnel * tunnel,u_long cmd,void * data)2028 gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel,
2029 u_long cmd, void *data)
2030 {
2031 struct ifreq *ifr = (struct ifreq *)data;
2032 int error = 0;
2033
2034 switch(cmd) {
2035 case SIOCSIFMTU:
2036 if (ifr->ifr_mtu < 576) {
2037 error = EINVAL;
2038 break;
2039 }
2040 ifp->if_mtu = ifr->ifr_mtu;
2041 break;
2042 case SIOCADDMULTI:
2043 case SIOCDELMULTI:
2044 break;
2045
2046 case SIOCSVNETID:
2047 error = gre_set_vnetid(tunnel, ifr);
2048 break;
2049
2050 case SIOCGVNETID:
2051 error = gre_get_vnetid(tunnel, ifr);
2052 break;
2053 case SIOCDVNETID:
2054 error = gre_del_vnetid(tunnel);
2055 break;
2056
2057 case SIOCSVNETFLOWID:
2058 error = gre_set_vnetflowid(tunnel, ifr);
2059 break;
2060
2061 case SIOCGVNETFLOWID:
2062 error = gre_get_vnetflowid(tunnel, ifr);
2063 break;
2064
2065 case SIOCSLIFPHYADDR:
2066 error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 1);
2067 break;
2068 case SIOCGLIFPHYADDR:
2069 error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2070 break;
2071 case SIOCDIFPHYADDR:
2072 error = gre_del_tunnel(tunnel);
2073 break;
2074
2075 case SIOCSLIFPHYRTABLE:
2076 if (ifr->ifr_rdomainid < 0 ||
2077 ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2078 !rtable_exists(ifr->ifr_rdomainid)) {
2079 error = EINVAL;
2080 break;
2081 }
2082 tunnel->t_rtableid = ifr->ifr_rdomainid;
2083 break;
2084 case SIOCGLIFPHYRTABLE:
2085 ifr->ifr_rdomainid = tunnel->t_rtableid;
2086 break;
2087
2088 case SIOCSLIFPHYDF:
2089 /* commit */
2090 tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2091 break;
2092 case SIOCGLIFPHYDF:
2093 ifr->ifr_df = tunnel->t_df ? 1 : 0;
2094 break;
2095
2096 default:
2097 error = ENOTTY;
2098 break;
2099 }
2100
2101 return (error);
2102 }
2103
2104 static uint8_t
gre_l2_tos(const struct gre_tunnel * t,const struct mbuf * m)2105 gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m)
2106 {
2107 uint8_t prio;
2108
2109 switch (t->t_txhprio) {
2110 case IF_HDRPRIO_PACKET:
2111 prio = m->m_pkthdr.pf.prio;
2112 break;
2113 default:
2114 prio = t->t_txhprio;
2115 break;
2116 }
2117
2118 return (IFQ_PRIO2TOS(prio));
2119 }
2120
2121 static uint8_t
gre_l3_tos(const struct gre_tunnel * t,const struct mbuf * m,uint8_t tos)2122 gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t tos)
2123 {
2124 uint8_t prio;
2125
2126 switch (t->t_txhprio) {
2127 case IF_HDRPRIO_PAYLOAD:
2128 return (tos);
2129 case IF_HDRPRIO_PACKET:
2130 prio = m->m_pkthdr.pf.prio;
2131 break;
2132 default:
2133 prio = t->t_txhprio;
2134 break;
2135 }
2136
2137 return (IFQ_PRIO2TOS(prio) | (tos & IPTOS_ECN_MASK));
2138 }
2139
2140 static int
gre_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)2141 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2142 {
2143 struct gre_softc *sc = ifp->if_softc;
2144 struct ifreq *ifr = (struct ifreq *)data;
2145 struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2146 int error = 0;
2147
2148 switch(cmd) {
2149 case SIOCSIFADDR:
2150 ifp->if_flags |= IFF_UP;
2151 /* FALLTHROUGH */
2152 case SIOCSIFFLAGS:
2153 if (ISSET(ifp->if_flags, IFF_UP)) {
2154 if (!ISSET(ifp->if_flags, IFF_RUNNING))
2155 error = gre_up(sc);
2156 else
2157 error = 0;
2158 } else {
2159 if (ISSET(ifp->if_flags, IFF_RUNNING))
2160 error = gre_down(sc);
2161 }
2162 break;
2163 case SIOCSIFRDOMAIN:
2164 /* let if_rdomain do its thing */
2165 error = ENOTTY;
2166 break;
2167
2168 case SIOCSETKALIVE:
2169 if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2170 ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256 ||
2171 (ikar->ikar_timeo == 0) != (ikar->ikar_cnt == 0))
2172 return (EINVAL);
2173
2174 if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2175 sc->sc_ka_count = 0;
2176 sc->sc_ka_timeo = 0;
2177 sc->sc_ka_state = GRE_KA_NONE;
2178 } else {
2179 sc->sc_ka_count = ikar->ikar_cnt;
2180 sc->sc_ka_timeo = ikar->ikar_timeo;
2181 sc->sc_ka_state = GRE_KA_DOWN;
2182
2183 arc4random_buf(&sc->sc_ka_key, sizeof(sc->sc_ka_key));
2184 sc->sc_ka_bias = arc4random();
2185 sc->sc_ka_holdmax = sc->sc_ka_count;
2186
2187 sc->sc_ka_recvtm = ticks - hz;
2188 timeout_add(&sc->sc_ka_send, 1);
2189 timeout_add_sec(&sc->sc_ka_hold,
2190 sc->sc_ka_timeo * sc->sc_ka_count);
2191 }
2192 break;
2193
2194 case SIOCGETKALIVE:
2195 ikar->ikar_cnt = sc->sc_ka_count;
2196 ikar->ikar_timeo = sc->sc_ka_timeo;
2197 break;
2198
2199 case SIOCSLIFPHYTTL:
2200 if (ifr->ifr_ttl != -1 &&
2201 (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2202 error = EINVAL;
2203 break;
2204 }
2205
2206 /* commit */
2207 sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2208 break;
2209
2210 case SIOCGLIFPHYTTL:
2211 ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2212 break;
2213
2214 case SIOCSLIFPHYECN:
2215 sc->sc_tunnel.t_ecn =
2216 ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2217 break;
2218 case SIOCGLIFPHYECN:
2219 ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2220 break;
2221
2222 case SIOCSTXHPRIO:
2223 error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2224 if (error != 0)
2225 break;
2226
2227 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2228 break;
2229 case SIOCGTXHPRIO:
2230 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2231 break;
2232
2233 case SIOCSRXHPRIO:
2234 error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2235 if (error != 0)
2236 break;
2237
2238 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2239 break;
2240 case SIOCGRXHPRIO:
2241 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2242 break;
2243
2244 default:
2245 error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2246 break;
2247 }
2248
2249 return (error);
2250 }
2251
2252 static int
mgre_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)2253 mgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2254 {
2255 struct mgre_softc *sc = ifp->if_softc;
2256 struct ifreq *ifr = (struct ifreq *)data;
2257 int error = 0;
2258
2259 switch(cmd) {
2260 case SIOCSIFADDR:
2261 break;
2262 case SIOCSIFFLAGS:
2263 if (ISSET(ifp->if_flags, IFF_UP)) {
2264 if (!ISSET(ifp->if_flags, IFF_RUNNING))
2265 error = mgre_up(sc);
2266 else
2267 error = 0;
2268 } else {
2269 if (ISSET(ifp->if_flags, IFF_RUNNING))
2270 error = mgre_down(sc);
2271 }
2272 break;
2273
2274 case SIOCSLIFPHYTTL:
2275 if (ifr->ifr_ttl != -1 &&
2276 (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2277 error = EINVAL;
2278 break;
2279 }
2280
2281 /* commit */
2282 sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2283 break;
2284
2285 case SIOCGLIFPHYTTL:
2286 ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2287 break;
2288
2289 case SIOCSLIFPHYECN:
2290 sc->sc_tunnel.t_ecn =
2291 ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2292 break;
2293 case SIOCGLIFPHYECN:
2294 ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2295 break;
2296
2297 case SIOCSLIFPHYADDR:
2298 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2299 error = EBUSY;
2300 break;
2301 }
2302 error = mgre_set_tunnel(sc, (struct if_laddrreq *)data);
2303 break;
2304 case SIOCGLIFPHYADDR:
2305 error = mgre_get_tunnel(sc, (struct if_laddrreq *)data);
2306 break;
2307
2308 case SIOCSTXHPRIO:
2309 error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2310 if (error != 0)
2311 break;
2312
2313 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2314 break;
2315 case SIOCGTXHPRIO:
2316 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2317 break;
2318
2319 case SIOCSRXHPRIO:
2320 error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2321 if (error != 0)
2322 break;
2323
2324 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2325 break;
2326 case SIOCGRXHPRIO:
2327 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2328 break;
2329
2330 case SIOCSVNETID:
2331 case SIOCDVNETID:
2332 case SIOCDIFPHYADDR:
2333 case SIOCSLIFPHYRTABLE:
2334 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2335 error = EBUSY;
2336 break;
2337 }
2338
2339 /* FALLTHROUGH */
2340 default:
2341 error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2342 break;
2343 }
2344
2345 return (error);
2346 }
2347
2348 static int
mgre_set_tunnel(struct mgre_softc * sc,struct if_laddrreq * req)2349 mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2350 {
2351 struct gre_tunnel *tunnel = &sc->sc_tunnel;
2352 struct sockaddr *addr = (struct sockaddr *)&req->addr;
2353 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2354 struct sockaddr_in *addr4;
2355 #ifdef INET6
2356 struct sockaddr_in6 *addr6;
2357 int error;
2358 #endif
2359
2360 if (dstaddr->sa_family != AF_UNSPEC)
2361 return (EINVAL);
2362
2363 /* validate */
2364 switch (addr->sa_family) {
2365 case AF_INET:
2366 if (addr->sa_len != sizeof(*addr4))
2367 return (EINVAL);
2368
2369 addr4 = (struct sockaddr_in *)addr;
2370 if (in_nullhost(addr4->sin_addr) ||
2371 IN_MULTICAST(addr4->sin_addr.s_addr))
2372 return (EINVAL);
2373
2374 tunnel->t_src4 = addr4->sin_addr;
2375 tunnel->t_dst4.s_addr = INADDR_ANY;
2376
2377 break;
2378 #ifdef INET6
2379 case AF_INET6:
2380 if (addr->sa_len != sizeof(*addr6))
2381 return (EINVAL);
2382
2383 addr6 = (struct sockaddr_in6 *)addr;
2384 if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) ||
2385 IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr))
2386 return (EINVAL);
2387
2388 error = in6_embedscope(&tunnel->t_src6, addr6, NULL, NULL);
2389 if (error != 0)
2390 return (error);
2391
2392 memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));
2393
2394 break;
2395 #endif
2396 default:
2397 return (EAFNOSUPPORT);
2398 }
2399
2400 /* commit */
2401 tunnel->t_af = addr->sa_family;
2402
2403 return (0);
2404 }
2405
2406 static int
mgre_get_tunnel(struct mgre_softc * sc,struct if_laddrreq * req)2407 mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2408 {
2409 struct gre_tunnel *tunnel = &sc->sc_tunnel;
2410 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2411 struct sockaddr_in *sin;
2412 #ifdef INET6
2413 struct sockaddr_in6 *sin6;
2414 #endif
2415
2416 switch (tunnel->t_af) {
2417 case AF_UNSPEC:
2418 return (EADDRNOTAVAIL);
2419 case AF_INET:
2420 sin = (struct sockaddr_in *)&req->addr;
2421 memset(sin, 0, sizeof(*sin));
2422 sin->sin_family = AF_INET;
2423 sin->sin_len = sizeof(*sin);
2424 sin->sin_addr = tunnel->t_src4;
2425 break;
2426
2427 #ifdef INET6
2428 case AF_INET6:
2429 sin6 = (struct sockaddr_in6 *)&req->addr;
2430 memset(sin6, 0, sizeof(*sin6));
2431 sin6->sin6_family = AF_INET6;
2432 sin6->sin6_len = sizeof(*sin6);
2433 in6_recoverscope(sin6, &tunnel->t_src6);
2434 break;
2435 #endif
2436 default:
2437 unhandled_af(tunnel->t_af);
2438 }
2439
2440 dstaddr->sa_len = 2;
2441 dstaddr->sa_family = AF_UNSPEC;
2442
2443 return (0);
2444 }
2445
2446 static int
egre_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)2447 egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2448 {
2449 struct egre_softc *sc = ifp->if_softc;
2450 struct ifreq *ifr = (struct ifreq *)data;
2451 int error = 0;
2452
2453 switch(cmd) {
2454 case SIOCSIFADDR:
2455 break;
2456 case SIOCSIFFLAGS:
2457 if (ISSET(ifp->if_flags, IFF_UP)) {
2458 if (!ISSET(ifp->if_flags, IFF_RUNNING))
2459 error = egre_up(sc);
2460 else
2461 error = 0;
2462 } else {
2463 if (ISSET(ifp->if_flags, IFF_RUNNING))
2464 error = egre_down(sc);
2465 }
2466 break;
2467
2468 case SIOCSLIFPHYTTL:
2469 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2470 error = EINVAL;
2471 break;
2472 }
2473
2474 /* commit */
2475 sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2476 break;
2477
2478 case SIOCGLIFPHYTTL:
2479 ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2480 break;
2481
2482 case SIOCSTXHPRIO:
2483 error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2484 if (error != 0)
2485 break;
2486
2487 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2488 break;
2489 case SIOCGTXHPRIO:
2490 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2491 break;
2492
2493 case SIOCSRXHPRIO:
2494 error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2495 if (error != 0)
2496 break;
2497
2498 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2499 break;
2500 case SIOCGRXHPRIO:
2501 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2502 break;
2503
2504 case SIOCSVNETID:
2505 case SIOCDVNETID:
2506 case SIOCSVNETFLOWID:
2507 case SIOCSLIFPHYADDR:
2508 case SIOCDIFPHYADDR:
2509 case SIOCSLIFPHYRTABLE:
2510 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2511 error = EBUSY;
2512 break;
2513 }
2514
2515 /* FALLTHROUGH */
2516 default:
2517 error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2518 if (error == ENOTTY)
2519 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2520 break;
2521 }
2522
2523 if (error == ENETRESET) {
2524 /* no hardware to program */
2525 error = 0;
2526 }
2527
2528 return (error);
2529 }
2530
2531 static int
nvgre_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)2532 nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2533 {
2534 struct nvgre_softc *sc = ifp->if_softc;
2535 struct gre_tunnel *tunnel = &sc->sc_tunnel;
2536
2537 struct ifreq *ifr = (struct ifreq *)data;
2538 struct if_parent *parent = (struct if_parent *)data;
2539 struct ifbrparam *bparam = (struct ifbrparam *)data;
2540 struct ifnet *ifp0;
2541
2542 int error = 0;
2543
2544 switch (cmd) {
2545 case SIOCSIFADDR:
2546 break;
2547 case SIOCSIFFLAGS:
2548 if (ISSET(ifp->if_flags, IFF_UP)) {
2549 if (!ISSET(ifp->if_flags, IFF_RUNNING))
2550 error = nvgre_up(sc);
2551 else
2552 error = ENETRESET;
2553 } else {
2554 if (ISSET(ifp->if_flags, IFF_RUNNING))
2555 error = nvgre_down(sc);
2556 }
2557 break;
2558
2559 case SIOCSLIFPHYADDR:
2560 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2561 error = EBUSY;
2562 break;
2563 }
2564 error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 0);
2565 if (error == 0)
2566 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2567 break;
2568 case SIOCGLIFPHYADDR:
2569 error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2570 break;
2571 case SIOCDIFPHYADDR:
2572 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2573 error = EBUSY;
2574 break;
2575 }
2576 error = gre_del_tunnel(tunnel);
2577 if (error == 0)
2578 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2579 break;
2580
2581 case SIOCSIFPARENT:
2582 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2583 error = EBUSY;
2584 break;
2585 }
2586 error = nvgre_set_parent(sc, parent->ifp_parent);
2587 if (error == 0)
2588 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2589 break;
2590 case SIOCGIFPARENT:
2591 ifp0 = if_get(sc->sc_ifp0);
2592 if (ifp0 == NULL)
2593 error = EADDRNOTAVAIL;
2594 else {
2595 memcpy(parent->ifp_parent, ifp0->if_xname,
2596 sizeof(parent->ifp_parent));
2597 }
2598 if_put(ifp0);
2599 break;
2600 case SIOCDIFPARENT:
2601 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2602 error = EBUSY;
2603 break;
2604 }
2605 /* commit */
2606 sc->sc_ifp0 = 0;
2607 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2608 break;
2609
2610 case SIOCSVNETID:
2611 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2612 error = EBUSY;
2613 break;
2614 }
2615 if (ifr->ifr_vnetid < GRE_KEY_ENTROPY_MIN ||
2616 ifr->ifr_vnetid > GRE_KEY_ENTROPY_MAX) {
2617 error = EINVAL;
2618 break;
2619 }
2620
2621 /* commit */
2622 tunnel->t_key = htonl(ifr->ifr_vnetid << GRE_KEY_ENTROPY_SHIFT);
2623 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2624 break;
2625 case SIOCGVNETID:
2626 error = gre_get_vnetid(tunnel, ifr);
2627 break;
2628
2629 case SIOCSLIFPHYRTABLE:
2630 if (ifr->ifr_rdomainid < 0 ||
2631 ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2632 !rtable_exists(ifr->ifr_rdomainid)) {
2633 error = EINVAL;
2634 break;
2635 }
2636 tunnel->t_rtableid = ifr->ifr_rdomainid;
2637 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2638 break;
2639 case SIOCGLIFPHYRTABLE:
2640 ifr->ifr_rdomainid = tunnel->t_rtableid;
2641 break;
2642
2643 case SIOCSLIFPHYDF:
2644 /* commit */
2645 tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2646 break;
2647 case SIOCGLIFPHYDF:
2648 ifr->ifr_df = tunnel->t_df ? 1 : 0;
2649 break;
2650
2651 case SIOCSLIFPHYTTL:
2652 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2653 error = EINVAL;
2654 break;
2655 }
2656
2657 /* commit */
2658 tunnel->t_ttl = ifr->ifr_ttl;
2659 break;
2660
2661 case SIOCGLIFPHYTTL:
2662 ifr->ifr_ttl = tunnel->t_ttl;
2663 break;
2664
2665 case SIOCSTXHPRIO:
2666 error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2667 if (error != 0)
2668 break;
2669
2670 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2671 break;
2672 case SIOCGTXHPRIO:
2673 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2674 break;
2675
2676 case SIOCSRXHPRIO:
2677 error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2678 if (error != 0)
2679 break;
2680
2681 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2682 break;
2683 case SIOCGRXHPRIO:
2684 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2685 break;
2686
2687 case SIOCBRDGSCACHE:
2688 error = etherbridge_set_max(&sc->sc_eb, bparam);
2689 break;
2690 case SIOCBRDGGCACHE:
2691 error = etherbridge_get_max(&sc->sc_eb, bparam);
2692 break;
2693
2694 case SIOCBRDGSTO:
2695 error = etherbridge_set_tmo(&sc->sc_eb, bparam);
2696 break;
2697 case SIOCBRDGGTO:
2698 error = etherbridge_get_tmo(&sc->sc_eb, bparam);
2699 break;
2700
2701 case SIOCBRDGRTS:
2702 error = etherbridge_rtfind(&sc->sc_eb,
2703 (struct ifbaconf *)data);
2704 break;
2705 case SIOCBRDGFLUSH:
2706 etherbridge_flush(&sc->sc_eb,
2707 ((struct ifbreq *)data)->ifbr_ifsflags);
2708 break;
2709 case SIOCBRDGSADDR:
2710 error = nvgre_add_addr(sc, (struct ifbareq *)data);
2711 break;
2712 case SIOCBRDGDADDR:
2713 error = nvgre_del_addr(sc, (struct ifbareq *)data);
2714 break;
2715
2716 case SIOCADDMULTI:
2717 case SIOCDELMULTI:
2718 break;
2719
2720 default:
2721 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2722 break;
2723 }
2724
2725 if (error == ENETRESET) {
2726 /* no hardware to program */
2727 error = 0;
2728 }
2729
2730 return (error);
2731 }
2732
2733 static int
eoip_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)2734 eoip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2735 {
2736 struct eoip_softc *sc = ifp->if_softc;
2737 struct ifreq *ifr = (struct ifreq *)data;
2738 struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2739 int error = 0;
2740
2741 switch(cmd) {
2742 case SIOCSIFADDR:
2743 break;
2744 case SIOCSIFFLAGS:
2745 if (ISSET(ifp->if_flags, IFF_UP)) {
2746 if (!ISSET(ifp->if_flags, IFF_RUNNING))
2747 error = eoip_up(sc);
2748 else
2749 error = 0;
2750 } else {
2751 if (ISSET(ifp->if_flags, IFF_RUNNING))
2752 error = eoip_down(sc);
2753 }
2754 break;
2755
2756 case SIOCSETKALIVE:
2757 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2758 error = EBUSY;
2759 break;
2760 }
2761
2762 if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2763 ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256)
2764 return (EINVAL);
2765
2766 if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2767 sc->sc_ka_count = 0;
2768 sc->sc_ka_timeo = 0;
2769 sc->sc_ka_state = GRE_KA_NONE;
2770 } else {
2771 sc->sc_ka_count = ikar->ikar_cnt;
2772 sc->sc_ka_timeo = ikar->ikar_timeo;
2773 sc->sc_ka_state = GRE_KA_DOWN;
2774 }
2775 break;
2776
2777 case SIOCGETKALIVE:
2778 ikar->ikar_cnt = sc->sc_ka_count;
2779 ikar->ikar_timeo = sc->sc_ka_timeo;
2780 break;
2781
2782 case SIOCSVNETID:
2783 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2784 error = EBUSY;
2785 break;
2786 }
2787 if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffff)
2788 return (EINVAL);
2789
2790 sc->sc_tunnel.t_key = htole16(ifr->ifr_vnetid); /* for cmp */
2791 sc->sc_tunnel_id = htole16(ifr->ifr_vnetid);
2792 break;
2793
2794 case SIOCGVNETID:
2795 ifr->ifr_vnetid = letoh16(sc->sc_tunnel_id);
2796 break;
2797
2798 case SIOCSLIFPHYADDR:
2799 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2800 error = EBUSY;
2801 break;
2802 }
2803
2804 error = gre_set_tunnel(&sc->sc_tunnel,
2805 (struct if_laddrreq *)data, 1);
2806 break;
2807 case SIOCGLIFPHYADDR:
2808 error = gre_get_tunnel(&sc->sc_tunnel,
2809 (struct if_laddrreq *)data);
2810 break;
2811 case SIOCDIFPHYADDR:
2812 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2813 error = EBUSY;
2814 break;
2815 }
2816
2817 error = gre_del_tunnel(&sc->sc_tunnel);
2818 break;
2819
2820 case SIOCSLIFPHYRTABLE:
2821 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2822 error = EBUSY;
2823 break;
2824 }
2825
2826 if (ifr->ifr_rdomainid < 0 ||
2827 ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2828 !rtable_exists(ifr->ifr_rdomainid)) {
2829 error = EINVAL;
2830 break;
2831 }
2832 sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
2833 break;
2834 case SIOCGLIFPHYRTABLE:
2835 ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
2836 break;
2837
2838 case SIOCSLIFPHYTTL:
2839 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2840 error = EINVAL;
2841 break;
2842 }
2843
2844 /* commit */
2845 sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2846 break;
2847 case SIOCGLIFPHYTTL:
2848 ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2849 break;
2850
2851 case SIOCSLIFPHYDF:
2852 /* commit */
2853 sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2854 break;
2855 case SIOCGLIFPHYDF:
2856 ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
2857 break;
2858
2859 case SIOCSTXHPRIO:
2860 error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2861 if (error != 0)
2862 break;
2863
2864 sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2865 break;
2866 case SIOCGTXHPRIO:
2867 ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2868 break;
2869
2870 case SIOCSRXHPRIO:
2871 error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2872 if (error != 0)
2873 break;
2874
2875 sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2876 break;
2877 case SIOCGRXHPRIO:
2878 ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2879 break;
2880
2881 case SIOCADDMULTI:
2882 case SIOCDELMULTI:
2883 break;
2884
2885 default:
2886 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2887 break;
2888 }
2889
2890 if (error == ENETRESET) {
2891 /* no hardware to program */
2892 error = 0;
2893 }
2894
2895 return (error);
2896 }
2897
2898 static int
gre_up(struct gre_softc * sc)2899 gre_up(struct gre_softc *sc)
2900 {
2901 NET_ASSERT_LOCKED();
2902 SET(sc->sc_if.if_flags, IFF_RUNNING);
2903
2904 if (sc->sc_ka_state != GRE_KA_NONE)
2905 gre_keepalive_send(sc);
2906
2907 return (0);
2908 }
2909
2910 static int
gre_down(struct gre_softc * sc)2911 gre_down(struct gre_softc *sc)
2912 {
2913 NET_ASSERT_LOCKED();
2914 CLR(sc->sc_if.if_flags, IFF_RUNNING);
2915
2916 if (sc->sc_ka_state != GRE_KA_NONE) {
2917 timeout_del_barrier(&sc->sc_ka_hold);
2918 timeout_del_barrier(&sc->sc_ka_send);
2919
2920 sc->sc_ka_state = GRE_KA_DOWN;
2921 gre_link_state(&sc->sc_if, sc->sc_ka_state);
2922 }
2923
2924 return (0);
2925 }
2926
2927 static void
gre_link_state(struct ifnet * ifp,unsigned int state)2928 gre_link_state(struct ifnet *ifp, unsigned int state)
2929 {
2930 int link_state = LINK_STATE_UNKNOWN;
2931
2932 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2933 switch (state) {
2934 case GRE_KA_NONE:
2935 /* maybe up? or down? it's unknown, really */
2936 break;
2937 case GRE_KA_UP:
2938 link_state = LINK_STATE_UP;
2939 break;
2940 default:
2941 link_state = LINK_STATE_KALIVE_DOWN;
2942 break;
2943 }
2944 }
2945
2946 if (ifp->if_link_state != link_state) {
2947 ifp->if_link_state = link_state;
2948 if_link_state_change(ifp);
2949 }
2950 }
2951
2952 static void
gre_keepalive_send(void * arg)2953 gre_keepalive_send(void *arg)
2954 {
2955 struct gre_tunnel t;
2956 struct gre_softc *sc = arg;
2957 struct mbuf *m;
2958 struct gre_keepalive *gk;
2959 SIPHASH_CTX ctx;
2960 int linkhdr, len;
2961 uint16_t proto;
2962 uint8_t ttl;
2963 uint8_t tos;
2964
2965 /*
2966 * re-schedule immediately, so we deal with incomplete configuration
2967 * or temporary errors.
2968 */
2969 if (sc->sc_ka_timeo)
2970 timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
2971
2972 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
2973 sc->sc_ka_state == GRE_KA_NONE ||
2974 sc->sc_tunnel.t_af == AF_UNSPEC ||
2975 sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
2976 return;
2977
2978 /* this is really conservative */
2979 #ifdef INET6
2980 linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
2981 sizeof(struct gre_header) + sizeof(struct gre_h_key);
2982 #else
2983 linkhdr = max_linkhdr + sizeof(struct ip) +
2984 sizeof(struct gre_header) + sizeof(struct gre_h_key);
2985 #endif
2986 len = linkhdr + sizeof(*gk);
2987
2988 MGETHDR(m, M_DONTWAIT, MT_DATA);
2989 if (m == NULL)
2990 return;
2991
2992 if (len > MHLEN) {
2993 MCLGETL(m, M_DONTWAIT, len);
2994 if (!ISSET(m->m_flags, M_EXT)) {
2995 m_freem(m);
2996 return;
2997 }
2998 }
2999
3000 m->m_pkthdr.len = m->m_len = len;
3001 m_adj(m, linkhdr);
3002
3003 /*
3004 * build the inside packet
3005 */
3006 gk = mtod(m, struct gre_keepalive *);
3007 htobem32(&gk->gk_uptime, sc->sc_ka_bias + ticks);
3008 htobem32(&gk->gk_random, arc4random());
3009
3010 SipHash24_Init(&ctx, &sc->sc_ka_key);
3011 SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
3012 SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
3013 SipHash24_Final(gk->gk_digest, &ctx);
3014
3015 ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl;
3016
3017 m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
3018 tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio));
3019
3020 t.t_af = sc->sc_tunnel.t_af;
3021 t.t_df = sc->sc_tunnel.t_df;
3022 t.t_src = sc->sc_tunnel.t_dst;
3023 t.t_dst = sc->sc_tunnel.t_src;
3024 t.t_key = sc->sc_tunnel.t_key;
3025 t.t_key_mask = sc->sc_tunnel.t_key_mask;
3026
3027 m = gre_encap(&t, m, htons(0), ttl, tos);
3028 if (m == NULL)
3029 return;
3030
3031 switch (sc->sc_tunnel.t_af) {
3032 case AF_INET: {
3033 struct ip *ip;
3034
3035 ip = mtod(m, struct ip *);
3036 ip->ip_id = htons(ip_randomid());
3037 in_hdr_cksum_out(m, NULL);
3038
3039 proto = htons(ETHERTYPE_IP);
3040 break;
3041 }
3042 #ifdef INET6
3043 case AF_INET6:
3044 proto = htons(ETHERTYPE_IPV6);
3045 break;
3046 #endif
3047 default:
3048 m_freem(m);
3049 return;
3050 }
3051
3052 /*
3053 * put it in the tunnel
3054 */
3055 m = gre_encap(&sc->sc_tunnel, m, proto, ttl, tos);
3056 if (m == NULL)
3057 return;
3058
3059 gre_ip_output(&sc->sc_tunnel, m);
3060 }
3061
3062 static void
gre_keepalive_hold(void * arg)3063 gre_keepalive_hold(void *arg)
3064 {
3065 struct gre_softc *sc = arg;
3066 struct ifnet *ifp = &sc->sc_if;
3067
3068 if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
3069 sc->sc_ka_state == GRE_KA_NONE)
3070 return;
3071
3072 NET_LOCK();
3073 sc->sc_ka_state = GRE_KA_DOWN;
3074 gre_link_state(ifp, sc->sc_ka_state);
3075 NET_UNLOCK();
3076 }
3077
3078 static int
gre_set_tunnel(struct gre_tunnel * tunnel,struct if_laddrreq * req,int ucast)3079 gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req, int ucast)
3080 {
3081 struct sockaddr *src = (struct sockaddr *)&req->addr;
3082 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3083 struct sockaddr_in *src4, *dst4;
3084 #ifdef INET6
3085 struct sockaddr_in6 *src6, *dst6;
3086 int error;
3087 #endif
3088
3089 /* sa_family and sa_len must be equal */
3090 if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
3091 return (EINVAL);
3092
3093 /* validate */
3094 switch (dst->sa_family) {
3095 case AF_INET:
3096 if (dst->sa_len != sizeof(*dst4))
3097 return (EINVAL);
3098
3099 src4 = (struct sockaddr_in *)src;
3100 if (in_nullhost(src4->sin_addr) ||
3101 IN_MULTICAST(src4->sin_addr.s_addr))
3102 return (EINVAL);
3103
3104 dst4 = (struct sockaddr_in *)dst;
3105 if (in_nullhost(dst4->sin_addr) ||
3106 (IN_MULTICAST(dst4->sin_addr.s_addr) != !ucast))
3107 return (EINVAL);
3108
3109 tunnel->t_src4 = src4->sin_addr;
3110 tunnel->t_dst4 = dst4->sin_addr;
3111
3112 break;
3113 #ifdef INET6
3114 case AF_INET6:
3115 if (dst->sa_len != sizeof(*dst6))
3116 return (EINVAL);
3117
3118 src6 = (struct sockaddr_in6 *)src;
3119 if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
3120 IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
3121 return (EINVAL);
3122
3123 dst6 = (struct sockaddr_in6 *)dst;
3124 if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
3125 IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) != !ucast)
3126 return (EINVAL);
3127
3128 if (src6->sin6_scope_id != dst6->sin6_scope_id)
3129 return (EINVAL);
3130
3131 error = in6_embedscope(&tunnel->t_src6, src6, NULL, NULL);
3132 if (error != 0)
3133 return (error);
3134
3135 error = in6_embedscope(&tunnel->t_dst6, dst6, NULL, NULL);
3136 if (error != 0)
3137 return (error);
3138
3139 break;
3140 #endif
3141 default:
3142 return (EAFNOSUPPORT);
3143 }
3144
3145 /* commit */
3146 tunnel->t_af = dst->sa_family;
3147
3148 return (0);
3149 }
3150
3151 static int
gre_get_tunnel(struct gre_tunnel * tunnel,struct if_laddrreq * req)3152 gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
3153 {
3154 struct sockaddr *src = (struct sockaddr *)&req->addr;
3155 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3156 struct sockaddr_in *sin;
3157 #ifdef INET6 /* ifconfig already embeds the scopeid */
3158 struct sockaddr_in6 *sin6;
3159 #endif
3160
3161 switch (tunnel->t_af) {
3162 case AF_UNSPEC:
3163 return (EADDRNOTAVAIL);
3164 case AF_INET:
3165 sin = (struct sockaddr_in *)src;
3166 memset(sin, 0, sizeof(*sin));
3167 sin->sin_family = AF_INET;
3168 sin->sin_len = sizeof(*sin);
3169 sin->sin_addr = tunnel->t_src4;
3170
3171 sin = (struct sockaddr_in *)dst;
3172 memset(sin, 0, sizeof(*sin));
3173 sin->sin_family = AF_INET;
3174 sin->sin_len = sizeof(*sin);
3175 sin->sin_addr = tunnel->t_dst4;
3176
3177 break;
3178
3179 #ifdef INET6
3180 case AF_INET6:
3181 sin6 = (struct sockaddr_in6 *)src;
3182 memset(sin6, 0, sizeof(*sin6));
3183 sin6->sin6_family = AF_INET6;
3184 sin6->sin6_len = sizeof(*sin6);
3185 in6_recoverscope(sin6, &tunnel->t_src6);
3186
3187 sin6 = (struct sockaddr_in6 *)dst;
3188 memset(sin6, 0, sizeof(*sin6));
3189 sin6->sin6_family = AF_INET6;
3190 sin6->sin6_len = sizeof(*sin6);
3191 in6_recoverscope(sin6, &tunnel->t_dst6);
3192
3193 break;
3194 #endif
3195 default:
3196 return (EAFNOSUPPORT);
3197 }
3198
3199 return (0);
3200 }
3201
3202 static int
gre_del_tunnel(struct gre_tunnel * tunnel)3203 gre_del_tunnel(struct gre_tunnel *tunnel)
3204 {
3205 /* commit */
3206 tunnel->t_af = AF_UNSPEC;
3207
3208 return (0);
3209 }
3210
3211 static int
gre_set_vnetid(struct gre_tunnel * tunnel,struct ifreq * ifr)3212 gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3213 {
3214 uint32_t key;
3215 uint32_t min = GRE_KEY_MIN;
3216 uint32_t max = GRE_KEY_MAX;
3217 unsigned int shift = GRE_KEY_SHIFT;
3218 uint32_t mask = GRE_KEY_MASK;
3219
3220 if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
3221 min = GRE_KEY_ENTROPY_MIN;
3222 max = GRE_KEY_ENTROPY_MAX;
3223 shift = GRE_KEY_ENTROPY_SHIFT;
3224 mask = GRE_KEY_ENTROPY;
3225 }
3226
3227 if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max)
3228 return (EINVAL);
3229
3230 key = htonl(ifr->ifr_vnetid << shift);
3231
3232 /* commit */
3233 tunnel->t_key_mask = mask;
3234 tunnel->t_key = key;
3235
3236 return (0);
3237 }
3238
3239 static int
gre_get_vnetid(struct gre_tunnel * tunnel,struct ifreq * ifr)3240 gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3241 {
3242 int shift;
3243
3244 switch (tunnel->t_key_mask) {
3245 case GRE_KEY_NONE:
3246 return (EADDRNOTAVAIL);
3247 case GRE_KEY_ENTROPY:
3248 shift = GRE_KEY_ENTROPY_SHIFT;
3249 break;
3250 case GRE_KEY_MASK:
3251 shift = GRE_KEY_SHIFT;
3252 break;
3253 }
3254
3255 ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift;
3256
3257 return (0);
3258 }
3259
3260 static int
gre_del_vnetid(struct gre_tunnel * tunnel)3261 gre_del_vnetid(struct gre_tunnel *tunnel)
3262 {
3263 tunnel->t_key_mask = GRE_KEY_NONE;
3264
3265 return (0);
3266 }
3267
3268 static int
gre_set_vnetflowid(struct gre_tunnel * tunnel,struct ifreq * ifr)3269 gre_set_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3270 {
3271 uint32_t mask, key;
3272
3273 if (tunnel->t_key_mask == GRE_KEY_NONE)
3274 return (EADDRNOTAVAIL);
3275
3276 mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK;
3277 if (tunnel->t_key_mask == mask) {
3278 /* nop */
3279 return (0);
3280 }
3281
3282 key = ntohl(tunnel->t_key);
3283 if (mask == GRE_KEY_ENTROPY) {
3284 if (key > GRE_KEY_ENTROPY_MAX)
3285 return (ERANGE);
3286
3287 key = htonl(key << GRE_KEY_ENTROPY_SHIFT);
3288 } else
3289 key = htonl(key >> GRE_KEY_ENTROPY_SHIFT);
3290
3291 /* commit */
3292 tunnel->t_key_mask = mask;
3293 tunnel->t_key = key;
3294
3295 return (0);
3296 }
3297
3298 static int
gre_get_vnetflowid(struct gre_tunnel * tunnel,struct ifreq * ifr)3299 gre_get_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3300 {
3301 if (tunnel->t_key_mask == GRE_KEY_NONE)
3302 return (EADDRNOTAVAIL);
3303
3304 ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY;
3305
3306 return (0);
3307 }
3308
3309 static int
mgre_up(struct mgre_softc * sc)3310 mgre_up(struct mgre_softc *sc)
3311 {
3312 unsigned int hlen;
3313
3314 switch (sc->sc_tunnel.t_af) {
3315 case AF_UNSPEC:
3316 return (EDESTADDRREQ);
3317 case AF_INET:
3318 hlen = sizeof(struct ip);
3319 break;
3320 #ifdef INET6
3321 case AF_INET6:
3322 hlen = sizeof(struct ip6_hdr);
3323 break;
3324 #endif /* INET6 */
3325 default:
3326 unhandled_af(sc->sc_tunnel.t_af);
3327 }
3328
3329 hlen += sizeof(struct gre_header);
3330 if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE)
3331 hlen += sizeof(struct gre_h_key);
3332
3333 NET_ASSERT_LOCKED();
3334
3335 if (RBT_INSERT(mgre_tree, &mgre_tree, sc) != NULL)
3336 return (EADDRINUSE);
3337
3338 sc->sc_if.if_hdrlen = hlen;
3339 SET(sc->sc_if.if_flags, IFF_RUNNING);
3340
3341 return (0);
3342 }
3343
3344 static int
mgre_down(struct mgre_softc * sc)3345 mgre_down(struct mgre_softc *sc)
3346 {
3347 NET_ASSERT_LOCKED();
3348
3349 CLR(sc->sc_if.if_flags, IFF_RUNNING);
3350 sc->sc_if.if_hdrlen = GRE_HDRLEN; /* symmetry */
3351
3352 RBT_REMOVE(mgre_tree, &mgre_tree, sc);
3353
3354 /* barrier? */
3355
3356 return (0);
3357 }
3358
3359 static int
egre_up(struct egre_softc * sc)3360 egre_up(struct egre_softc *sc)
3361 {
3362 if (sc->sc_tunnel.t_af == AF_UNSPEC)
3363 return (EDESTADDRREQ);
3364
3365 NET_ASSERT_LOCKED();
3366
3367 if (RBT_INSERT(egre_tree, &egre_tree, sc) != NULL)
3368 return (EADDRINUSE);
3369
3370 SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3371
3372 return (0);
3373 }
3374
3375 static int
egre_down(struct egre_softc * sc)3376 egre_down(struct egre_softc *sc)
3377 {
3378 NET_ASSERT_LOCKED();
3379
3380 CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3381
3382 RBT_REMOVE(egre_tree, &egre_tree, sc);
3383
3384 /* barrier? */
3385
3386 return (0);
3387 }
3388
3389 static int
egre_media_change(struct ifnet * ifp)3390 egre_media_change(struct ifnet *ifp)
3391 {
3392 return (ENOTTY);
3393 }
3394
3395 static void
egre_media_status(struct ifnet * ifp,struct ifmediareq * imr)3396 egre_media_status(struct ifnet *ifp, struct ifmediareq *imr)
3397 {
3398 imr->ifm_active = IFM_ETHER | IFM_AUTO;
3399 imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
3400 }
3401
3402 static int
nvgre_up(struct nvgre_softc * sc)3403 nvgre_up(struct nvgre_softc *sc)
3404 {
3405 struct gre_tunnel *tunnel = &sc->sc_tunnel;
3406 struct ifnet *ifp0;
3407 void *inm;
3408 int error;
3409
3410 if (tunnel->t_af == AF_UNSPEC)
3411 return (EDESTADDRREQ);
3412
3413 ifp0 = if_get(sc->sc_ifp0);
3414 if (ifp0 == NULL)
3415 return (ENXIO);
3416 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3417 error = ENODEV;
3418 goto put;
3419 }
3420
3421 NET_ASSERT_LOCKED();
3422
3423 if (RBT_INSERT(nvgre_mcast_tree, &nvgre_mcast_tree, sc) != NULL) {
3424 error = EADDRINUSE;
3425 goto put;
3426 }
3427 if (RBT_INSERT(nvgre_ucast_tree, &nvgre_ucast_tree, sc) != NULL) {
3428 error = EADDRINUSE;
3429 goto remove_mcast;
3430 }
3431
3432 switch (tunnel->t_af) {
3433 case AF_INET:
3434 inm = in_addmulti(&tunnel->t_dst4, ifp0);
3435 if (inm == NULL) {
3436 error = ECONNABORTED;
3437 goto remove_ucast;
3438 }
3439 break;
3440 #ifdef INET6
3441 case AF_INET6:
3442 inm = in6_addmulti(&tunnel->t_dst6, ifp0, &error);
3443 if (inm == NULL) {
3444 /* error is already set */
3445 goto remove_ucast;
3446 }
3447 break;
3448 #endif /* INET6 */
3449 default:
3450 unhandled_af(tunnel->t_af);
3451 }
3452
3453 if_linkstatehook_add(ifp0, &sc->sc_ltask);
3454 if_detachhook_add(ifp0, &sc->sc_dtask);
3455
3456 if_put(ifp0);
3457
3458 sc->sc_inm = inm;
3459 SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3460
3461 return (0);
3462
3463 remove_ucast:
3464 RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3465 remove_mcast:
3466 RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3467 put:
3468 if_put(ifp0);
3469 return (error);
3470 }
3471
3472 static int
nvgre_down(struct nvgre_softc * sc)3473 nvgre_down(struct nvgre_softc *sc)
3474 {
3475 struct gre_tunnel *tunnel = &sc->sc_tunnel;
3476 struct ifnet *ifp = &sc->sc_ac.ac_if;
3477 struct taskq *softnet = net_tq(ifp->if_index);
3478 struct ifnet *ifp0;
3479
3480 NET_ASSERT_LOCKED();
3481
3482 CLR(ifp->if_flags, IFF_RUNNING);
3483
3484 NET_UNLOCK();
3485 ifq_barrier(&ifp->if_snd);
3486 if (!task_del(softnet, &sc->sc_send_task))
3487 taskq_barrier(softnet);
3488 NET_LOCK();
3489
3490 mq_purge(&sc->sc_send_list);
3491
3492 ifp0 = if_get(sc->sc_ifp0);
3493 if (ifp0 != NULL) {
3494 if_detachhook_del(ifp0, &sc->sc_dtask);
3495 if_linkstatehook_del(ifp0, &sc->sc_ltask);
3496 }
3497 if_put(ifp0);
3498
3499 switch (tunnel->t_af) {
3500 case AF_INET:
3501 in_delmulti(sc->sc_inm);
3502 break;
3503
3504 #ifdef INET6
3505 case AF_INET6:
3506 in6_delmulti(sc->sc_inm);
3507 break;
3508 #endif
3509 default:
3510 unhandled_af(tunnel->t_af);
3511 }
3512
3513 RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3514 RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3515
3516 return (0);
3517 }
3518
3519 static void
nvgre_link_change(void * arg)3520 nvgre_link_change(void *arg)
3521 {
3522 /* nop */
3523 }
3524
3525 static void
nvgre_detach(void * arg)3526 nvgre_detach(void *arg)
3527 {
3528 struct nvgre_softc *sc = arg;
3529 struct ifnet *ifp = &sc->sc_ac.ac_if;
3530
3531 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3532 nvgre_down(sc);
3533 if_down(ifp);
3534 }
3535
3536 sc->sc_ifp0 = 0;
3537 }
3538
3539 static int
nvgre_set_parent(struct nvgre_softc * sc,const char * parent)3540 nvgre_set_parent(struct nvgre_softc *sc, const char *parent)
3541 {
3542 struct ifnet *ifp0;
3543
3544 ifp0 = if_unit(parent);
3545 if (ifp0 == NULL)
3546 return (EINVAL);
3547
3548 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3549 if_put(ifp0);
3550 return (EPROTONOSUPPORT);
3551 }
3552
3553 ifsetlro(ifp0, 0);
3554
3555 /* commit */
3556 sc->sc_ifp0 = ifp0->if_index;
3557 if_put(ifp0);
3558
3559 return (0);
3560 }
3561
3562 static int
nvgre_add_addr(struct nvgre_softc * sc,const struct ifbareq * ifba)3563 nvgre_add_addr(struct nvgre_softc *sc, const struct ifbareq *ifba)
3564 {
3565 struct sockaddr_in *sin;
3566 #ifdef INET6
3567 struct sockaddr_in6 *sin6;
3568 struct sockaddr_in6 src6 = {
3569 .sin6_len = sizeof(src6),
3570 .sin6_family = AF_UNSPEC,
3571 };
3572 int error;
3573 #endif
3574 union gre_addr endpoint;
3575 unsigned int type;
3576
3577 /* ignore ifba_ifsname */
3578
3579 if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK))
3580 return (EINVAL);
3581 switch (ifba->ifba_flags & IFBAF_TYPEMASK) {
3582 case IFBAF_DYNAMIC:
3583 type = EBE_DYNAMIC;
3584 break;
3585 case IFBAF_STATIC:
3586 type = EBE_STATIC;
3587 break;
3588 default:
3589 return (EINVAL);
3590 }
3591
3592 memset(&endpoint, 0, sizeof(endpoint));
3593
3594 if (ifba->ifba_dstsa.ss_family != sc->sc_tunnel.t_af)
3595 return (EAFNOSUPPORT);
3596 switch (ifba->ifba_dstsa.ss_family) {
3597 case AF_INET:
3598 sin = (struct sockaddr_in *)&ifba->ifba_dstsa;
3599 if (in_nullhost(sin->sin_addr) ||
3600 IN_MULTICAST(sin->sin_addr.s_addr))
3601 return (EADDRNOTAVAIL);
3602
3603 endpoint.in4 = sin->sin_addr;
3604 break;
3605
3606 #ifdef INET6
3607 case AF_INET6:
3608 sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa;
3609 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
3610 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
3611 return (EADDRNOTAVAIL);
3612
3613 in6_recoverscope(&src6, &sc->sc_tunnel.t_src6);
3614
3615 if (src6.sin6_scope_id != sin6->sin6_scope_id)
3616 return (EADDRNOTAVAIL);
3617
3618 error = in6_embedscope(&endpoint.in6, sin6, NULL, NULL);
3619 if (error != 0)
3620 return (error);
3621
3622 break;
3623 #endif
3624 default: /* AF_UNSPEC */
3625 return (EADDRNOTAVAIL);
3626 }
3627
3628 return (etherbridge_add_addr(&sc->sc_eb, &endpoint,
3629 &ifba->ifba_dst, type));
3630 }
3631
3632 static int
nvgre_del_addr(struct nvgre_softc * sc,const struct ifbareq * ifba)3633 nvgre_del_addr(struct nvgre_softc *sc, const struct ifbareq *ifba)
3634 {
3635 return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst));
3636 }
3637
3638 static void
nvgre_start(struct ifnet * ifp)3639 nvgre_start(struct ifnet *ifp)
3640 {
3641 struct nvgre_softc *sc = ifp->if_softc;
3642 const struct gre_tunnel *tunnel = &sc->sc_tunnel;
3643 union gre_addr gateway;
3644 struct mbuf_list ml = MBUF_LIST_INITIALIZER();
3645 struct ether_header *eh;
3646 struct mbuf *m, *m0;
3647 #if NBPFILTER > 0
3648 caddr_t if_bpf;
3649 #endif
3650
3651 if (!atomic_load_int(&gre_allow)) {
3652 ifq_purge(&ifp->if_snd);
3653 return;
3654 }
3655
3656 while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3657 #if NBPFILTER > 0
3658 if_bpf = ifp->if_bpf;
3659 if (if_bpf)
3660 bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3661 #endif
3662
3663 eh = mtod(m0, struct ether_header *);
3664 if (ETHER_IS_BROADCAST(eh->ether_dhost))
3665 gateway = tunnel->t_dst;
3666 else {
3667 const union gre_addr *endpoint;
3668
3669 smr_read_enter();
3670 endpoint = etherbridge_resolve_ea(&sc->sc_eb,
3671 (struct ether_addr *)eh->ether_dhost);
3672 if (endpoint == NULL) {
3673 /* "flood" to unknown hosts */
3674 endpoint = &tunnel->t_dst;
3675 }
3676 gateway = *endpoint;
3677 smr_read_leave();
3678 }
3679
3680 /* force prepend mbuf because of alignment problems */
3681 m = m_get(M_DONTWAIT, m0->m_type);
3682 if (m == NULL) {
3683 m_freem(m0);
3684 continue;
3685 }
3686
3687 M_MOVE_PKTHDR(m, m0);
3688 m->m_next = m0;
3689
3690 m_align(m, 0);
3691 m->m_len = 0;
3692
3693 m = gre_encap_dst(tunnel, &gateway, m,
3694 htons(ETHERTYPE_TRANSETHER),
3695 tunnel->t_ttl, gre_l2_tos(tunnel, m));
3696 if (m == NULL)
3697 continue;
3698
3699 m->m_flags &= ~(M_BCAST|M_MCAST);
3700 m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
3701
3702 #if NPF > 0
3703 pf_pkt_addr_changed(m);
3704 #endif
3705
3706 ml_enqueue(&ml, m);
3707 }
3708
3709 if (!ml_empty(&ml)) {
3710 if (mq_enlist(&sc->sc_send_list, &ml) == 0)
3711 task_add(net_tq(ifp->if_index), &sc->sc_send_task);
3712 /* else set OACTIVE? */
3713 }
3714 }
3715
3716 static uint64_t
nvgre_send4(struct nvgre_softc * sc,struct mbuf_list * ml)3717 nvgre_send4(struct nvgre_softc *sc, struct mbuf_list *ml)
3718 {
3719 struct ip_moptions imo;
3720 struct mbuf *m;
3721 uint64_t oerrors = 0;
3722
3723 imo.imo_ifidx = sc->sc_ifp0;
3724 imo.imo_ttl = sc->sc_tunnel.t_ttl;
3725 imo.imo_loop = 0;
3726
3727 NET_LOCK();
3728 while ((m = ml_dequeue(ml)) != NULL) {
3729 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
3730 oerrors++;
3731 }
3732 NET_UNLOCK();
3733
3734 return (oerrors);
3735 }
3736
3737 #ifdef INET6
3738 static uint64_t
nvgre_send6(struct nvgre_softc * sc,struct mbuf_list * ml)3739 nvgre_send6(struct nvgre_softc *sc, struct mbuf_list *ml)
3740 {
3741 struct ip6_moptions im6o;
3742 struct mbuf *m;
3743 uint64_t oerrors = 0;
3744
3745 im6o.im6o_ifidx = sc->sc_ifp0;
3746 im6o.im6o_hlim = sc->sc_tunnel.t_ttl;
3747 im6o.im6o_loop = 0;
3748
3749 NET_LOCK();
3750 while ((m = ml_dequeue(ml)) != NULL) {
3751 if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
3752 oerrors++;
3753 }
3754 NET_UNLOCK();
3755
3756 return (oerrors);
3757 }
3758 #endif /* INET6 */
3759
3760 static void
nvgre_send(void * arg)3761 nvgre_send(void *arg)
3762 {
3763 struct nvgre_softc *sc = arg;
3764 struct ifnet *ifp = &sc->sc_ac.ac_if;
3765 sa_family_t af = sc->sc_tunnel.t_af;
3766 struct mbuf_list ml;
3767 uint64_t oerrors;
3768
3769 if (!ISSET(ifp->if_flags, IFF_RUNNING))
3770 return;
3771
3772 mq_delist(&sc->sc_send_list, &ml);
3773 if (ml_empty(&ml))
3774 return;
3775
3776 switch (af) {
3777 case AF_INET:
3778 oerrors = nvgre_send4(sc, &ml);
3779 break;
3780 #ifdef INET6
3781 case AF_INET6:
3782 oerrors = nvgre_send6(sc, &ml);
3783 break;
3784 #endif
3785 default:
3786 unhandled_af(af);
3787 /* NOTREACHED */
3788 }
3789
3790 ifp->if_oerrors += oerrors; /* XXX should be ifq_oerrors */
3791 }
3792
3793 static int
eoip_up(struct eoip_softc * sc)3794 eoip_up(struct eoip_softc *sc)
3795 {
3796 if (sc->sc_tunnel.t_af == AF_UNSPEC)
3797 return (EDESTADDRREQ);
3798
3799 NET_ASSERT_LOCKED();
3800
3801 if (RBT_INSERT(eoip_tree, &eoip_tree, sc) != NULL)
3802 return (EADDRINUSE);
3803
3804 SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3805
3806 if (sc->sc_ka_state != GRE_KA_NONE) {
3807 sc->sc_ka_holdmax = sc->sc_ka_count;
3808 eoip_keepalive_send(sc);
3809 }
3810
3811 return (0);
3812 }
3813
3814 static int
eoip_down(struct eoip_softc * sc)3815 eoip_down(struct eoip_softc *sc)
3816 {
3817 NET_ASSERT_LOCKED();
3818 CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3819
3820 if (sc->sc_ka_state != GRE_KA_NONE) {
3821 timeout_del_barrier(&sc->sc_ka_hold);
3822 timeout_del_barrier(&sc->sc_ka_send);
3823
3824 sc->sc_ka_state = GRE_KA_DOWN;
3825 gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
3826 }
3827
3828 RBT_REMOVE(eoip_tree, &eoip_tree, sc);
3829
3830 return (0);
3831 }
3832
3833 static void
eoip_start(struct ifnet * ifp)3834 eoip_start(struct ifnet *ifp)
3835 {
3836 struct eoip_softc *sc = ifp->if_softc;
3837 struct mbuf *m0, *m;
3838 #if NBPFILTER > 0
3839 caddr_t if_bpf;
3840 #endif
3841
3842 if (!atomic_load_int(&gre_allow)) {
3843 ifq_purge(&ifp->if_snd);
3844 return;
3845 }
3846
3847 while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3848 #if NBPFILTER > 0
3849 if_bpf = ifp->if_bpf;
3850 if (if_bpf)
3851 bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3852 #endif
3853
3854 /* force prepend mbuf because of alignment problems */
3855 m = m_get(M_DONTWAIT, m0->m_type);
3856 if (m == NULL) {
3857 m_freem(m0);
3858 continue;
3859 }
3860
3861 M_MOVE_PKTHDR(m, m0);
3862 m->m_next = m0;
3863
3864 m_align(m, 0);
3865 m->m_len = 0;
3866
3867 m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
3868 if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
3869 ifp->if_oerrors++;
3870 continue;
3871 }
3872 }
3873 }
3874
3875 static struct mbuf *
eoip_encap(struct eoip_softc * sc,struct mbuf * m,uint8_t tos)3876 eoip_encap(struct eoip_softc *sc, struct mbuf *m, uint8_t tos)
3877 {
3878 struct gre_header *gh;
3879 struct gre_h_key_eoip *eoiph;
3880 int len = m->m_pkthdr.len;
3881
3882 m = m_prepend(m, sizeof(*gh) + sizeof(*eoiph), M_DONTWAIT);
3883 if (m == NULL)
3884 return (NULL);
3885
3886 gh = mtod(m, struct gre_header *);
3887 gh->gre_flags = htons(GRE_VERS_1 | GRE_KP);
3888 gh->gre_proto = htons(GRE_EOIP);
3889
3890 eoiph = (struct gre_h_key_eoip *)(gh + 1);
3891 htobem16(&eoiph->eoip_len, len);
3892 eoiph->eoip_tunnel_id = sc->sc_tunnel_id;
3893
3894 return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos));
3895 }
3896
3897 static void
eoip_keepalive_send(void * arg)3898 eoip_keepalive_send(void *arg)
3899 {
3900 struct eoip_softc *sc = arg;
3901 struct ifnet *ifp = &sc->sc_ac.ac_if;
3902 struct mbuf *m;
3903 int linkhdr;
3904
3905 if (!ISSET(ifp->if_flags, IFF_RUNNING))
3906 return;
3907
3908 /* this is really conservative */
3909 #ifdef INET6
3910 linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
3911 sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
3912 #else
3913 linkhdr = max_linkhdr + sizeof(struct ip) +
3914 sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
3915 #endif
3916 MGETHDR(m, M_DONTWAIT, MT_DATA);
3917 if (m == NULL)
3918 return;
3919
3920 if (linkhdr > MHLEN) {
3921 MCLGETL(m, M_DONTWAIT, linkhdr);
3922 if (!ISSET(m->m_flags, M_EXT)) {
3923 m_freem(m);
3924 return;
3925 }
3926 }
3927
3928 m->m_pkthdr.pf.prio = ifp->if_llprio;
3929 m->m_pkthdr.len = m->m_len = linkhdr;
3930 m_adj(m, linkhdr);
3931
3932 m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
3933 if (m == NULL)
3934 return;
3935
3936 gre_ip_output(&sc->sc_tunnel, m);
3937
3938 timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
3939 }
3940
3941 static void
eoip_keepalive_hold(void * arg)3942 eoip_keepalive_hold(void *arg)
3943 {
3944 struct eoip_softc *sc = arg;
3945 struct ifnet *ifp = &sc->sc_ac.ac_if;
3946
3947 if (!ISSET(ifp->if_flags, IFF_RUNNING))
3948 return;
3949
3950 NET_LOCK();
3951 sc->sc_ka_state = GRE_KA_DOWN;
3952 gre_link_state(ifp, sc->sc_ka_state);
3953 NET_UNLOCK();
3954 }
3955
3956 static void
eoip_keepalive_recv(struct eoip_softc * sc)3957 eoip_keepalive_recv(struct eoip_softc *sc)
3958 {
3959 switch (sc->sc_ka_state) {
3960 case GRE_KA_NONE:
3961 return;
3962 case GRE_KA_DOWN:
3963 sc->sc_ka_state = GRE_KA_HOLD;
3964 sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
3965 sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
3966 16 * sc->sc_ka_count);
3967 break;
3968 case GRE_KA_HOLD:
3969 if (--sc->sc_ka_holdcnt > 0)
3970 break;
3971
3972 sc->sc_ka_state = GRE_KA_UP;
3973 gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
3974 break;
3975
3976 case GRE_KA_UP:
3977 sc->sc_ka_holdmax--;
3978 sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
3979 break;
3980 }
3981
3982 timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
3983 }
3984
3985 static struct mbuf *
eoip_input(struct gre_tunnel * key,struct mbuf * m,const struct gre_header * gh,uint8_t otos,int iphlen)3986 eoip_input(struct gre_tunnel *key, struct mbuf *m,
3987 const struct gre_header *gh, uint8_t otos, int iphlen)
3988 {
3989 struct eoip_softc *sc;
3990 struct gre_h_key_eoip *eoiph;
3991 int hlen, len;
3992 caddr_t buf;
3993
3994 if (gh->gre_flags != htons(GRE_KP | GRE_VERS_1))
3995 goto decline;
3996
3997 hlen = iphlen + sizeof(*gh) + sizeof(*eoiph);
3998 if (m->m_pkthdr.len < hlen)
3999 goto decline;
4000
4001 m = m_pullup(m, hlen);
4002 if (m == NULL)
4003 return (NULL);
4004
4005 buf = mtod(m, caddr_t);
4006 gh = (struct gre_header *)(buf + iphlen);
4007 eoiph = (struct gre_h_key_eoip *)(gh + 1);
4008
4009 key->t_key = eoiph->eoip_tunnel_id;
4010
4011 NET_ASSERT_LOCKED();
4012 sc = RBT_FIND(eoip_tree, &eoip_tree, (const struct eoip_softc *)key);
4013 if (sc == NULL)
4014 goto decline;
4015
4016 /* it's ours now */
4017 len = bemtoh16(&eoiph->eoip_len);
4018 if (len == 0) {
4019 eoip_keepalive_recv(sc);
4020 goto drop;
4021 }
4022
4023 m = gre_ether_align(m, hlen);
4024 if (m == NULL)
4025 return (NULL);
4026
4027 if (m->m_pkthdr.len < len)
4028 goto drop;
4029 if (m->m_pkthdr.len != len)
4030 m_adj(m, len - m->m_pkthdr.len);
4031
4032 m->m_flags &= ~(M_MCAST|M_BCAST);
4033
4034 gre_l2_prio(&sc->sc_tunnel, m, otos);
4035
4036 if_vinput(&sc->sc_ac.ac_if, m);
4037
4038 return (NULL);
4039
4040 decline:
4041 return (m);
4042 drop:
4043 m_freem(m);
4044 return (NULL);
4045 }
4046
4047 const struct sysctl_bounded_args gre_vars[] = {
4048 { GRECTL_ALLOW, &gre_allow, 0, 1 },
4049 { GRECTL_WCCP, &gre_wccp, 0, 1 },
4050 };
4051
4052 int
gre_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)4053 gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
4054 size_t newlen)
4055 {
4056 return sysctl_bounded_arr(gre_vars, nitems(gre_vars), name,
4057 namelen, oldp, oldlenp, newp, newlen);
4058 }
4059
4060 static inline int
gre_ip_cmp(int af,const union gre_addr * a,const union gre_addr * b)4061 gre_ip_cmp(int af, const union gre_addr *a, const union gre_addr *b)
4062 {
4063 switch (af) {
4064 #ifdef INET6
4065 case AF_INET6:
4066 return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
4067 #endif /* INET6 */
4068 case AF_INET:
4069 return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
4070 default:
4071 unhandled_af(af);
4072 }
4073
4074 return (0);
4075 }
4076
4077 static int
gre_cmp_src(const struct gre_tunnel * a,const struct gre_tunnel * b)4078 gre_cmp_src(const struct gre_tunnel *a, const struct gre_tunnel *b)
4079 {
4080 uint32_t ka, kb;
4081 uint32_t mask;
4082 int rv;
4083
4084 /* is K set at all? */
4085 ka = a->t_key_mask & GRE_KEY_ENTROPY;
4086 kb = b->t_key_mask & GRE_KEY_ENTROPY;
4087
4088 /* sort by whether K is set */
4089 if (ka > kb)
4090 return (1);
4091 if (ka < kb)
4092 return (-1);
4093
4094 /* is K set on both? */
4095 if (ka != GRE_KEY_NONE) {
4096 /* get common prefix */
4097 mask = a->t_key_mask & b->t_key_mask;
4098
4099 ka = a->t_key & mask;
4100 kb = b->t_key & mask;
4101
4102 /* sort by common prefix */
4103 if (ka > kb)
4104 return (1);
4105 if (ka < kb)
4106 return (-1);
4107 }
4108
4109 /* sort by routing table */
4110 if (a->t_rtableid > b->t_rtableid)
4111 return (1);
4112 if (a->t_rtableid < b->t_rtableid)
4113 return (-1);
4114
4115 /* sort by address */
4116 if (a->t_af > b->t_af)
4117 return (1);
4118 if (a->t_af < b->t_af)
4119 return (-1);
4120
4121 rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4122 if (rv != 0)
4123 return (rv);
4124
4125 return (0);
4126 }
4127
4128 static int
gre_cmp(const struct gre_tunnel * a,const struct gre_tunnel * b)4129 gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
4130 {
4131 int rv;
4132
4133 rv = gre_cmp_src(a, b);
4134 if (rv != 0)
4135 return (rv);
4136
4137 return (gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst));
4138 }
4139
4140 static inline int
mgre_cmp(const struct mgre_softc * a,const struct mgre_softc * b)4141 mgre_cmp(const struct mgre_softc *a, const struct mgre_softc *b)
4142 {
4143 return (gre_cmp_src(&a->sc_tunnel, &b->sc_tunnel));
4144 }
4145
4146 RBT_GENERATE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
4147
4148 static inline int
egre_cmp(const struct egre_softc * a,const struct egre_softc * b)4149 egre_cmp(const struct egre_softc *a, const struct egre_softc *b)
4150 {
4151 return (gre_cmp(&a->sc_tunnel, &b->sc_tunnel));
4152 }
4153
4154 RBT_GENERATE(egre_tree, egre_softc, sc_entry, egre_cmp);
4155
4156 static int
nvgre_cmp_tunnel(const struct gre_tunnel * a,const struct gre_tunnel * b)4157 nvgre_cmp_tunnel(const struct gre_tunnel *a, const struct gre_tunnel *b)
4158 {
4159 uint32_t ka, kb;
4160
4161 ka = a->t_key & GRE_KEY_ENTROPY;
4162 kb = b->t_key & GRE_KEY_ENTROPY;
4163
4164 /* sort by common prefix */
4165 if (ka > kb)
4166 return (1);
4167 if (ka < kb)
4168 return (-1);
4169
4170 /* sort by routing table */
4171 if (a->t_rtableid > b->t_rtableid)
4172 return (1);
4173 if (a->t_rtableid < b->t_rtableid)
4174 return (-1);
4175
4176 /* sort by address */
4177 if (a->t_af > b->t_af)
4178 return (1);
4179 if (a->t_af < b->t_af)
4180 return (-1);
4181
4182 return (0);
4183 }
4184
4185 static inline int
nvgre_cmp_ucast(const struct nvgre_softc * na,const struct nvgre_softc * nb)4186 nvgre_cmp_ucast(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4187 {
4188 const struct gre_tunnel *a = &na->sc_tunnel;
4189 const struct gre_tunnel *b = &nb->sc_tunnel;
4190 int rv;
4191
4192 rv = nvgre_cmp_tunnel(a, b);
4193 if (rv != 0)
4194 return (rv);
4195
4196 rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4197 if (rv != 0)
4198 return (rv);
4199
4200 return (0);
4201 }
4202
4203 static int
nvgre_cmp_mcast(const struct gre_tunnel * a,const union gre_addr * aa,unsigned int if0idxa,const struct gre_tunnel * b,const union gre_addr * ab,unsigned int if0idxb)4204 nvgre_cmp_mcast(const struct gre_tunnel *a, const union gre_addr *aa,
4205 unsigned int if0idxa, const struct gre_tunnel *b,
4206 const union gre_addr *ab,unsigned int if0idxb)
4207 {
4208 int rv;
4209
4210 rv = nvgre_cmp_tunnel(a, b);
4211 if (rv != 0)
4212 return (rv);
4213
4214 rv = gre_ip_cmp(a->t_af, aa, ab);
4215 if (rv != 0)
4216 return (rv);
4217
4218 if (if0idxa > if0idxb)
4219 return (1);
4220 if (if0idxa < if0idxb)
4221 return (-1);
4222
4223 return (0);
4224 }
4225
4226 static inline int
nvgre_cmp_mcast_sc(const struct nvgre_softc * na,const struct nvgre_softc * nb)4227 nvgre_cmp_mcast_sc(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4228 {
4229 const struct gre_tunnel *a = &na->sc_tunnel;
4230 const struct gre_tunnel *b = &nb->sc_tunnel;
4231
4232 return (nvgre_cmp_mcast(a, &a->t_dst, na->sc_ifp0,
4233 b, &b->t_dst, nb->sc_ifp0));
4234 }
4235
4236 RBT_GENERATE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
4237 RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
4238
4239 static inline int
eoip_cmp(const struct eoip_softc * ea,const struct eoip_softc * eb)4240 eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
4241 {
4242 const struct gre_tunnel *a = &ea->sc_tunnel;
4243 const struct gre_tunnel *b = &eb->sc_tunnel;
4244 int rv;
4245
4246 if (a->t_key > b->t_key)
4247 return (1);
4248 if (a->t_key < b->t_key)
4249 return (-1);
4250
4251 /* sort by routing table */
4252 if (a->t_rtableid > b->t_rtableid)
4253 return (1);
4254 if (a->t_rtableid < b->t_rtableid)
4255 return (-1);
4256
4257 /* sort by address */
4258 if (a->t_af > b->t_af)
4259 return (1);
4260 if (a->t_af < b->t_af)
4261 return (-1);
4262
4263 rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4264 if (rv != 0)
4265 return (rv);
4266
4267 rv = gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
4268 if (rv != 0)
4269 return (rv);
4270
4271 return (0);
4272 }
4273
4274 RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
4275
4276 static int
nvgre_eb_port_eq(void * arg,void * a,void * b)4277 nvgre_eb_port_eq(void *arg, void *a, void *b)
4278 {
4279 struct nvgre_softc *sc = arg;
4280
4281 return (gre_ip_cmp(sc->sc_tunnel.t_af, a, b) == 0);
4282 }
4283
4284 static void *
nvgre_eb_port_take(void * arg,void * port)4285 nvgre_eb_port_take(void *arg, void *port)
4286 {
4287 union gre_addr *ea = port;
4288 union gre_addr *endpoint;
4289
4290 endpoint = pool_get(&nvgre_endpoint_pool, PR_NOWAIT);
4291 if (endpoint == NULL)
4292 return (NULL);
4293
4294 *endpoint = *ea;
4295
4296 return (endpoint);
4297 }
4298
4299 static void
nvgre_eb_port_rele(void * arg,void * port)4300 nvgre_eb_port_rele(void *arg, void *port)
4301 {
4302 union gre_addr *endpoint = port;
4303
4304 pool_put(&nvgre_endpoint_pool, endpoint);
4305 }
4306
4307 static size_t
nvgre_eb_port_ifname(void * arg,char * dst,size_t len,void * port)4308 nvgre_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
4309 {
4310 struct nvgre_softc *sc = arg;
4311
4312 return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len));
4313 }
4314
4315 static void
nvgre_eb_port_sa(void * arg,struct sockaddr_storage * ss,void * port)4316 nvgre_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
4317 {
4318 struct nvgre_softc *sc = arg;
4319 union gre_addr *endpoint = port;
4320
4321 switch (sc->sc_tunnel.t_af) {
4322 case AF_INET: {
4323 struct sockaddr_in *sin = (struct sockaddr_in *)ss;
4324
4325 sin->sin_len = sizeof(*sin);
4326 sin->sin_family = AF_INET;
4327 sin->sin_addr = endpoint->in4;
4328 break;
4329 }
4330 #ifdef INET6
4331 case AF_INET6: {
4332 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
4333
4334 sin6->sin6_len = sizeof(*sin6);
4335 sin6->sin6_family = AF_INET6;
4336 in6_recoverscope(sin6, &endpoint->in6);
4337
4338 break;
4339 }
4340 #endif /* INET6 */
4341 default:
4342 unhandled_af(sc->sc_tunnel.t_af);
4343 }
4344 }
4345