1 /* $OpenBSD: if_tun.c,v 1.250 2024/12/30 02:46:00 guenther Exp $ */
2 /* $NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $ */
3
4 /*
5 * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6 * Nottingham University 1987.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 /*
31 * This driver takes packets off the IP i/f and hands them up to a
32 * user process to have its wicked way with. This driver has its
33 * roots in a similar driver written by Phil Cockcroft (formerly) at
34 * UCL. This driver is based much more on read/write/select mode of
35 * operation though.
36 */
37
38 /* #define TUN_DEBUG 9 */
39
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/sigio.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/fcntl.h>
51 #include <sys/time.h>
52 #include <sys/device.h>
53 #include <sys/vnode.h>
54 #include <sys/signalvar.h>
55 #include <sys/conf.h>
56 #include <sys/event.h>
57 #include <sys/mutex.h>
58 #include <sys/smr.h>
59
60 #include <net/if.h>
61 #include <net/if_types.h>
62 #include <net/netisr.h>
63 #include <net/rtable.h>
64
65 #include <netinet/in.h>
66 #include <netinet/if_ether.h>
67
68 #include "bpfilter.h"
69 #if NBPFILTER > 0
70 #include <net/bpf.h>
71 #endif
72
73 #ifdef MPLS
74 #include <netmpls/mpls.h>
75 #endif /* MPLS */
76
77 #include <net/if_tun.h>
78
79 struct tun_softc {
80 struct arpcom sc_ac; /* ethernet common data */
81 #define sc_if sc_ac.ac_if
82 struct mutex sc_mtx;
83 struct klist sc_rklist; /* knotes for read */
84 struct klist sc_wklist; /* knotes for write (unused) */
85 SMR_LIST_ENTRY(tun_softc)
86 sc_entry; /* all tunnel interfaces */
87 int sc_unit;
88 struct sigio_ref sc_sigio; /* async I/O registration */
89 unsigned int sc_flags; /* misc flags */
90 #define TUN_DEAD (1 << 16)
91 #define TUN_HDR (1 << 17)
92
93 dev_t sc_dev;
94 struct refcnt sc_refs;
95 unsigned int sc_reading;
96 };
97
98 #ifdef TUN_DEBUG
99 int tundebug = TUN_DEBUG;
100 #define TUNDEBUG(a) (tundebug? printf a : 0)
101 #else
102 #define TUNDEBUG(a) /* (tundebug? printf a : 0) */
103 #endif
104
105 /* Pretend that these IFF flags are changeable by TUNSIFINFO */
106 #define TUN_IFF_FLAGS (IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
107
108 #define TUN_IF_CAPS ( \
109 IFCAP_CSUM_IPv4 | \
110 IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4|IFCAP_CSUM_TCPv6|IFCAP_CSUM_UDPv6 | \
111 IFCAP_VLAN_MTU|IFCAP_VLAN_HWTAGGING|IFCAP_VLAN_HWOFFLOAD | \
112 IFCAP_TSOv4|IFCAP_TSOv6|IFCAP_LRO \
113 )
114
115 void tunattach(int);
116
117 int tun_dev_open(dev_t, const struct if_clone *, int, struct proc *);
118 int tun_dev_close(dev_t, struct proc *);
119 int tun_dev_ioctl(dev_t, u_long, void *);
120 int tun_dev_read(dev_t, struct uio *, int);
121 int tun_dev_write(dev_t, struct uio *, int, int);
122 int tun_dev_kqfilter(dev_t, struct knote *);
123
124 int tun_ioctl(struct ifnet *, u_long, caddr_t);
125 void tun_input(struct ifnet *, struct mbuf *);
126 int tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
127 struct rtentry *);
128 int tun_enqueue(struct ifnet *, struct mbuf *);
129 int tun_clone_create(struct if_clone *, int);
130 int tap_clone_create(struct if_clone *, int);
131 int tun_create(struct if_clone *, int, int);
132 int tun_clone_destroy(struct ifnet *);
133 void tun_wakeup(struct tun_softc *);
134 void tun_start(struct ifnet *);
135 int filt_tunread(struct knote *, long);
136 int filt_tunwrite(struct knote *, long);
137 int filt_tunmodify(struct kevent *, struct knote *);
138 int filt_tunprocess(struct knote *, struct kevent *);
139 void filt_tunrdetach(struct knote *);
140 void filt_tunwdetach(struct knote *);
141 void tun_link_state(struct ifnet *, int);
142
143 const struct filterops tunread_filtops = {
144 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
145 .f_attach = NULL,
146 .f_detach = filt_tunrdetach,
147 .f_event = filt_tunread,
148 .f_modify = filt_tunmodify,
149 .f_process = filt_tunprocess,
150 };
151
152 const struct filterops tunwrite_filtops = {
153 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
154 .f_attach = NULL,
155 .f_detach = filt_tunwdetach,
156 .f_event = filt_tunwrite,
157 .f_modify = filt_tunmodify,
158 .f_process = filt_tunprocess,
159 };
160
161 SMR_LIST_HEAD(tun_list, tun_softc);
162
163 struct if_clone tun_cloner =
164 IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
165
166 struct if_clone tap_cloner =
167 IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy);
168
169 void
tunattach(int n)170 tunattach(int n)
171 {
172 if_clone_attach(&tun_cloner);
173 if_clone_attach(&tap_cloner);
174 }
175
176 int
tun_clone_create(struct if_clone * ifc,int unit)177 tun_clone_create(struct if_clone *ifc, int unit)
178 {
179 return (tun_create(ifc, unit, 0));
180 }
181
182 int
tap_clone_create(struct if_clone * ifc,int unit)183 tap_clone_create(struct if_clone *ifc, int unit)
184 {
185 return (tun_create(ifc, unit, TUN_LAYER2));
186 }
187
188 struct tun_list tun_devs_list = SMR_LIST_HEAD_INITIALIZER(tun_list);
189
190 struct tun_softc *
tun_name_lookup(const char * name)191 tun_name_lookup(const char *name)
192 {
193 struct tun_softc *sc;
194
195 KERNEL_ASSERT_LOCKED();
196
197 SMR_LIST_FOREACH_LOCKED(sc, &tun_devs_list, sc_entry) {
198 if (strcmp(sc->sc_if.if_xname, name) == 0)
199 return (sc);
200 }
201
202 return (NULL);
203 }
204
205 int
tun_insert(struct tun_softc * sc)206 tun_insert(struct tun_softc *sc)
207 {
208 int error = 0;
209
210 /* check for a race */
211 if (tun_name_lookup(sc->sc_if.if_xname) != NULL)
212 error = EEXIST;
213 else {
214 /* tun_name_lookup checks for the right lock already */
215 SMR_LIST_INSERT_HEAD_LOCKED(&tun_devs_list, sc, sc_entry);
216 }
217
218 return (error);
219 }
220
221 int
tun_create(struct if_clone * ifc,int unit,int flags)222 tun_create(struct if_clone *ifc, int unit, int flags)
223 {
224 struct tun_softc *sc;
225 struct ifnet *ifp;
226
227 if (unit > minor(~0U))
228 return (ENXIO);
229
230 KERNEL_ASSERT_LOCKED();
231
232 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
233 refcnt_init(&sc->sc_refs);
234
235 ifp = &sc->sc_if;
236 snprintf(ifp->if_xname, sizeof(ifp->if_xname),
237 "%s%d", ifc->ifc_name, unit);
238 mtx_init(&sc->sc_mtx, IPL_NET);
239 klist_init_mutex(&sc->sc_rklist, &sc->sc_mtx);
240 klist_init_mutex(&sc->sc_wklist, &sc->sc_mtx);
241 ifp->if_softc = sc;
242
243 /* this is enough state for tun_dev_open to work with */
244
245 if (tun_insert(sc) != 0)
246 goto exists;
247
248 /* build the interface */
249
250 ifp->if_ioctl = tun_ioctl;
251 ifp->if_enqueue = tun_enqueue;
252 ifp->if_start = tun_start;
253 ifp->if_hardmtu = TUNMRU;
254 ifp->if_link_state = LINK_STATE_DOWN;
255
256 if_counters_alloc(ifp);
257
258 if ((flags & TUN_LAYER2) == 0) {
259 #if NBPFILTER > 0
260 ifp->if_bpf_mtap = bpf_mtap;
261 #endif
262 ifp->if_input = tun_input;
263 ifp->if_output = tun_output;
264 ifp->if_mtu = ETHERMTU;
265 ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST);
266 ifp->if_type = IFT_TUNNEL;
267 ifp->if_hdrlen = sizeof(u_int32_t);
268 ifp->if_rtrequest = p2p_rtrequest;
269
270 if_attach(ifp);
271 if_alloc_sadl(ifp);
272
273 #if NBPFILTER > 0
274 bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
275 #endif
276 } else {
277 sc->sc_flags |= TUN_LAYER2;
278 ether_fakeaddr(ifp);
279 ifp->if_flags =
280 (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
281
282 if_attach(ifp);
283 ether_ifattach(ifp);
284 }
285
286 sigio_init(&sc->sc_sigio);
287
288 /* tell tun_dev_open we're initialised */
289
290 sc->sc_flags |= TUN_INITED|TUN_STAYUP;
291 wakeup(sc);
292
293 return (0);
294
295 exists:
296 klist_free(&sc->sc_rklist);
297 klist_free(&sc->sc_wklist);
298 free(sc, M_DEVBUF, sizeof(*sc));
299 return (EEXIST);
300 }
301
302 int
tun_clone_destroy(struct ifnet * ifp)303 tun_clone_destroy(struct ifnet *ifp)
304 {
305 struct tun_softc *sc = ifp->if_softc;
306 dev_t dev;
307
308 KERNEL_ASSERT_LOCKED();
309
310 if (ISSET(sc->sc_flags, TUN_DEAD))
311 return (ENXIO);
312 SET(sc->sc_flags, TUN_DEAD);
313
314 /* kick userland off the device */
315 dev = sc->sc_dev;
316 if (dev) {
317 struct vnode *vp;
318
319 if (vfinddev(dev, VCHR, &vp))
320 VOP_REVOKE(vp, REVOKEALL);
321
322 KASSERT(sc->sc_dev == 0);
323 }
324
325 /* prevent userland from getting to the device again */
326 SMR_LIST_REMOVE_LOCKED(sc, sc_entry);
327 smr_barrier();
328
329 /* help read() give up */
330 if (sc->sc_reading)
331 wakeup(&ifp->if_snd);
332
333 /* wait for device entrypoints to finish */
334 refcnt_finalize(&sc->sc_refs, "tundtor");
335
336 klist_invalidate(&sc->sc_rklist);
337 klist_invalidate(&sc->sc_wklist);
338
339 klist_free(&sc->sc_rklist);
340 klist_free(&sc->sc_wklist);
341
342 if (ISSET(sc->sc_flags, TUN_LAYER2))
343 ether_ifdetach(ifp);
344
345 if_detach(ifp);
346 sigio_free(&sc->sc_sigio);
347
348 free(sc, M_DEVBUF, sizeof *sc);
349 return (0);
350 }
351
352 static struct tun_softc *
tun_get(dev_t dev)353 tun_get(dev_t dev)
354 {
355 struct tun_softc *sc;
356
357 smr_read_enter();
358 SMR_LIST_FOREACH(sc, &tun_devs_list, sc_entry) {
359 if (sc->sc_dev == dev) {
360 refcnt_take(&sc->sc_refs);
361 break;
362 }
363 }
364 smr_read_leave();
365
366 return (sc);
367 }
368
369 static inline void
tun_put(struct tun_softc * sc)370 tun_put(struct tun_softc *sc)
371 {
372 refcnt_rele_wake(&sc->sc_refs);
373 }
374
375 int
tunopen(dev_t dev,int flag,int mode,struct proc * p)376 tunopen(dev_t dev, int flag, int mode, struct proc *p)
377 {
378 return (tun_dev_open(dev, &tun_cloner, mode, p));
379 }
380
381 int
tapopen(dev_t dev,int flag,int mode,struct proc * p)382 tapopen(dev_t dev, int flag, int mode, struct proc *p)
383 {
384 return (tun_dev_open(dev, &tap_cloner, mode, p));
385 }
386
387 int
tun_dev_open(dev_t dev,const struct if_clone * ifc,int mode,struct proc * p)388 tun_dev_open(dev_t dev, const struct if_clone *ifc, int mode, struct proc *p)
389 {
390 struct tun_softc *sc;
391 struct ifnet *ifp;
392 int error;
393 u_short stayup = 0;
394 struct vnode *vp;
395
396 char name[IFNAMSIZ];
397 unsigned int rdomain;
398
399 /*
400 * Find the vnode associated with this open before we sleep
401 * and let something else revoke it. Our caller has a reference
402 * to it so we don't need to account for it.
403 */
404 if (!vfinddev(dev, VCHR, &vp))
405 panic("%s vfinddev failed", __func__);
406
407 snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
408 rdomain = rtable_l2(p->p_p->ps_rtableid);
409
410 /* let's find or make an interface to work with */
411 while ((sc = tun_name_lookup(name)) == NULL) {
412 error = if_clone_create(name, rdomain);
413 switch (error) {
414 case 0: /* it's probably ours */
415 stayup = TUN_STAYUP;
416 /* FALLTHROUGH */
417 case EEXIST: /* we may have lost a race with someone else */
418 break;
419 default:
420 return (error);
421 }
422 }
423
424 refcnt_take(&sc->sc_refs);
425
426 /* wait for it to be fully constructed before we use it */
427 for (;;) {
428 if (ISSET(sc->sc_flags, TUN_DEAD)) {
429 error = ENXIO;
430 goto done;
431 }
432
433 if (ISSET(sc->sc_flags, TUN_INITED))
434 break;
435
436 error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
437 if (error != 0) {
438 /* XXX if_clone_destroy if stayup? */
439 goto done;
440 }
441 }
442
443 /* Has tun_clone_destroy torn the rug out under us? */
444 if (vp->v_type == VBAD) {
445 error = ENXIO;
446 goto done;
447 }
448
449 if (sc->sc_dev != 0) {
450 /* aww, we lost */
451 error = EBUSY;
452 goto done;
453 }
454 /* it's ours now */
455 sc->sc_dev = dev;
456 CLR(sc->sc_flags, stayup);
457
458 /* automatically mark the interface running on open */
459 ifp = &sc->sc_if;
460 NET_LOCK();
461 SET(ifp->if_flags, IFF_UP | IFF_RUNNING);
462 NET_UNLOCK();
463 tun_link_state(ifp, LINK_STATE_FULL_DUPLEX);
464 error = 0;
465
466 done:
467 tun_put(sc);
468 return (error);
469 }
470
471 /*
472 * tunclose - close the device; if closing the real device, flush pending
473 * output and unless STAYUP bring down and destroy the interface.
474 */
475 int
tunclose(dev_t dev,int flag,int mode,struct proc * p)476 tunclose(dev_t dev, int flag, int mode, struct proc *p)
477 {
478 return (tun_dev_close(dev, p));
479 }
480
481 int
tapclose(dev_t dev,int flag,int mode,struct proc * p)482 tapclose(dev_t dev, int flag, int mode, struct proc *p)
483 {
484 return (tun_dev_close(dev, p));
485 }
486
487 int
tun_dev_close(dev_t dev,struct proc * p)488 tun_dev_close(dev_t dev, struct proc *p)
489 {
490 struct tun_softc *sc;
491 struct ifnet *ifp;
492 int error = 0;
493 char name[IFNAMSIZ];
494 int destroy = 0;
495
496 sc = tun_get(dev);
497 if (sc == NULL)
498 return (ENXIO);
499
500 ifp = &sc->sc_if;
501
502 /*
503 * junk all pending output
504 */
505 NET_LOCK();
506 CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
507 CLR(ifp->if_capabilities, TUN_IF_CAPS);
508 NET_UNLOCK();
509 ifq_purge(&ifp->if_snd);
510
511 CLR(sc->sc_flags, TUN_ASYNC|TUN_HDR);
512 sigio_free(&sc->sc_sigio);
513
514 if (!ISSET(sc->sc_flags, TUN_DEAD)) {
515 /* we can't hold a reference to sc before we start a dtor */
516 if (!ISSET(sc->sc_flags, TUN_STAYUP)) {
517 destroy = 1;
518 strlcpy(name, ifp->if_xname, sizeof(name));
519 } else {
520 tun_link_state(ifp, LINK_STATE_DOWN);
521 }
522 }
523
524 sc->sc_dev = 0;
525
526 tun_put(sc);
527
528 if (destroy)
529 if_clone_destroy(name);
530
531 return (error);
532 }
533
534 /*
535 * Process an ioctl request.
536 */
537 int
tun_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)538 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
539 {
540 struct tun_softc *sc = (struct tun_softc *)(ifp->if_softc);
541 struct ifreq *ifr = (struct ifreq *)data;
542 int error = 0;
543
544 switch (cmd) {
545 case SIOCSIFADDR:
546 SET(ifp->if_flags, IFF_UP);
547 /* FALLTHROUGH */
548 case SIOCSIFFLAGS:
549 if (ISSET(ifp->if_flags, IFF_UP))
550 SET(ifp->if_flags, IFF_RUNNING);
551 else
552 CLR(ifp->if_flags, IFF_RUNNING);
553 break;
554
555 case SIOCSIFMTU:
556 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
557 error = EINVAL;
558 else
559 ifp->if_mtu = ifr->ifr_mtu;
560 break;
561 case SIOCADDMULTI:
562 case SIOCDELMULTI:
563 break;
564 default:
565 if (sc->sc_flags & TUN_LAYER2)
566 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
567 else
568 error = ENOTTY;
569 }
570
571 return (error);
572 }
573
574 /*
575 * tun_output - queue packets from higher level ready to put out.
576 */
577 int
tun_output(struct ifnet * ifp,struct mbuf * m0,struct sockaddr * dst,struct rtentry * rt)578 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
579 struct rtentry *rt)
580 {
581 u_int32_t *af;
582
583 if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
584 m_freem(m0);
585 return (EHOSTDOWN);
586 }
587
588 M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
589 if (m0 == NULL)
590 return (ENOBUFS);
591 af = mtod(m0, u_int32_t *);
592 *af = htonl(dst->sa_family);
593
594 return (if_enqueue(ifp, m0));
595 }
596
597 int
tun_enqueue(struct ifnet * ifp,struct mbuf * m0)598 tun_enqueue(struct ifnet *ifp, struct mbuf *m0)
599 {
600 struct tun_softc *sc = ifp->if_softc;
601 int error;
602
603 error = ifq_enqueue(&ifp->if_snd, m0);
604 if (error != 0)
605 return (error);
606
607 tun_wakeup(sc);
608
609 return (0);
610 }
611
612 void
tun_wakeup(struct tun_softc * sc)613 tun_wakeup(struct tun_softc *sc)
614 {
615 if (sc->sc_reading)
616 wakeup(&sc->sc_if.if_snd);
617
618 knote(&sc->sc_rklist, 0);
619
620 if (sc->sc_flags & TUN_ASYNC)
621 pgsigio(&sc->sc_sigio, SIGIO, 0);
622 }
623
624 /*
625 * the cdevsw interface is now pretty minimal.
626 */
627 int
tunioctl(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)628 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
629 {
630 return (tun_dev_ioctl(dev, cmd, data));
631 }
632
633 int
tapioctl(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)634 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
635 {
636 return (tun_dev_ioctl(dev, cmd, data));
637 }
638
639 static int
tun_set_capabilities(struct tun_softc * sc,const struct tun_capabilities * cap)640 tun_set_capabilities(struct tun_softc *sc, const struct tun_capabilities *cap)
641 {
642 if (ISSET(cap->tun_if_capabilities, ~TUN_IF_CAPS))
643 return (EINVAL);
644
645 KERNEL_ASSERT_LOCKED();
646 SET(sc->sc_flags, TUN_HDR);
647
648 NET_LOCK();
649 CLR(sc->sc_if.if_capabilities, TUN_IF_CAPS);
650 SET(sc->sc_if.if_capabilities, cap->tun_if_capabilities);
651 NET_UNLOCK();
652 return (0);
653 }
654
655 static int
tun_get_capabilities(struct tun_softc * sc,struct tun_capabilities * cap)656 tun_get_capabilities(struct tun_softc *sc, struct tun_capabilities *cap)
657 {
658 int error = 0;
659
660 NET_LOCK_SHARED();
661 if (ISSET(sc->sc_flags, TUN_HDR)) {
662 cap->tun_if_capabilities =
663 (sc->sc_if.if_capabilities & TUN_IF_CAPS);
664 } else
665 error = ENODEV;
666 NET_UNLOCK_SHARED();
667
668 return (error);
669 }
670
671 static int
tun_del_capabilities(struct tun_softc * sc)672 tun_del_capabilities(struct tun_softc *sc)
673 {
674 NET_LOCK();
675 CLR(sc->sc_if.if_capabilities, TUN_IF_CAPS);
676 NET_UNLOCK();
677
678 KERNEL_ASSERT_LOCKED();
679 CLR(sc->sc_flags, TUN_HDR);
680
681 return (0);
682 }
683
684 static int
tun_hdatalen(struct tun_softc * sc)685 tun_hdatalen(struct tun_softc *sc)
686 {
687 struct ifnet *ifp = &sc->sc_if;
688 int len;
689
690 len = ifq_hdatalen(&ifp->if_snd);
691 if (len > 0 && ISSET(sc->sc_flags, TUN_HDR))
692 len += sizeof(struct tun_hdr);
693
694 return (len);
695 }
696
697 int
tun_dev_ioctl(dev_t dev,u_long cmd,void * data)698 tun_dev_ioctl(dev_t dev, u_long cmd, void *data)
699 {
700 struct tun_softc *sc;
701 struct tuninfo *tunp;
702 int error = 0;
703
704 sc = tun_get(dev);
705 if (sc == NULL)
706 return (ENXIO);
707
708 switch (cmd) {
709 case TUNSIFINFO:
710 tunp = (struct tuninfo *)data;
711 if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
712 error = EINVAL;
713 break;
714 }
715 if (tunp->type != sc->sc_if.if_type) {
716 error = EINVAL;
717 break;
718 }
719 if (tunp->flags != (sc->sc_if.if_flags & TUN_IFF_FLAGS)) {
720 error = EINVAL;
721 break;
722 }
723 sc->sc_if.if_mtu = tunp->mtu;
724 sc->sc_if.if_baudrate = tunp->baudrate;
725 break;
726 case TUNGIFINFO:
727 tunp = (struct tuninfo *)data;
728 tunp->mtu = sc->sc_if.if_mtu;
729 tunp->type = sc->sc_if.if_type;
730 tunp->flags = sc->sc_if.if_flags & TUN_IFF_FLAGS;
731 tunp->baudrate = sc->sc_if.if_baudrate;
732 break;
733 #ifdef TUN_DEBUG
734 case TUNSDEBUG:
735 tundebug = *(int *)data;
736 break;
737 case TUNGDEBUG:
738 *(int *)data = tundebug;
739 break;
740 #endif
741 case TUNSIFMODE:
742 if (*(int *)data != (sc->sc_if.if_flags & TUN_IFF_FLAGS)) {
743 error = EINVAL;
744 break;
745 }
746 break;
747
748 case TUNSCAP:
749 error = tun_set_capabilities(sc,
750 (const struct tun_capabilities *)data);
751 break;
752 case TUNGCAP:
753 error = tun_get_capabilities(sc,
754 (struct tun_capabilities *)data);
755 break;
756 case TUNDCAP:
757 error = tun_del_capabilities(sc);
758 break;
759
760 case FIOASYNC:
761 if (*(int *)data)
762 sc->sc_flags |= TUN_ASYNC;
763 else
764 sc->sc_flags &= ~TUN_ASYNC;
765 break;
766 case FIONREAD:
767 *(int *)data = tun_hdatalen(sc);
768 break;
769 case FIOSETOWN:
770 case TIOCSPGRP:
771 error = sigio_setown(&sc->sc_sigio, cmd, data);
772 break;
773 case FIOGETOWN:
774 case TIOCGPGRP:
775 sigio_getown(&sc->sc_sigio, cmd, data);
776 break;
777 case SIOCGIFADDR:
778 if (!(sc->sc_flags & TUN_LAYER2)) {
779 error = EINVAL;
780 break;
781 }
782 bcopy(sc->sc_ac.ac_enaddr, data,
783 sizeof(sc->sc_ac.ac_enaddr));
784 break;
785
786 case SIOCSIFADDR:
787 if (!(sc->sc_flags & TUN_LAYER2)) {
788 error = EINVAL;
789 break;
790 }
791 bcopy(data, sc->sc_ac.ac_enaddr,
792 sizeof(sc->sc_ac.ac_enaddr));
793 break;
794 default:
795 error = ENOTTY;
796 break;
797 }
798
799 tun_put(sc);
800 return (error);
801 }
802
803 /*
804 * The cdevsw read interface - reads a packet at a time, or at
805 * least as much of a packet as can be read.
806 */
807 int
tunread(dev_t dev,struct uio * uio,int ioflag)808 tunread(dev_t dev, struct uio *uio, int ioflag)
809 {
810 return (tun_dev_read(dev, uio, ioflag));
811 }
812
813 int
tapread(dev_t dev,struct uio * uio,int ioflag)814 tapread(dev_t dev, struct uio *uio, int ioflag)
815 {
816 return (tun_dev_read(dev, uio, ioflag));
817 }
818
819 int
tun_dev_read(dev_t dev,struct uio * uio,int ioflag)820 tun_dev_read(dev_t dev, struct uio *uio, int ioflag)
821 {
822 struct tun_softc *sc;
823 struct ifnet *ifp;
824 struct mbuf *m, *m0;
825 size_t len;
826 int error = 0;
827
828 sc = tun_get(dev);
829 if (sc == NULL)
830 return (ENXIO);
831
832 ifp = &sc->sc_if;
833
834 error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY),
835 (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev);
836 if (error != 0)
837 goto put;
838
839 #if NBPFILTER > 0
840 if (ifp->if_bpf)
841 bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
842 #endif
843
844 if (ISSET(sc->sc_flags, TUN_HDR)) {
845 struct tun_hdr th;
846
847 KASSERT(ISSET(m0->m_flags, M_PKTHDR));
848
849 th.th_flags = 0;
850 if (ISSET(m0->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT))
851 SET(th.th_flags, TUN_H_IPV4_CSUM);
852 if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_CSUM_OUT))
853 SET(th.th_flags, TUN_H_TCP_CSUM);
854 if (ISSET(m0->m_pkthdr.csum_flags, M_UDP_CSUM_OUT))
855 SET(th.th_flags, TUN_H_UDP_CSUM);
856 if (ISSET(m0->m_pkthdr.csum_flags, M_ICMP_CSUM_OUT))
857 SET(th.th_flags, TUN_H_ICMP_CSUM);
858
859 th.th_pad = 0;
860
861 th.th_vtag = 0;
862 if (ISSET(m0->m_flags, M_VLANTAG)) {
863 SET(th.th_flags, TUN_H_VTAG);
864 th.th_vtag = m0->m_pkthdr.ether_vtag;
865 }
866
867 th.th_mss = 0;
868 if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) {
869 SET(th.th_flags, TUN_H_TCP_MSS);
870 th.th_mss = m0->m_pkthdr.ph_mss;
871 }
872
873 len = ulmin(uio->uio_resid, sizeof(th));
874 if (len > 0) {
875 error = uiomove(&th, len, uio);
876 if (error != 0)
877 goto free;
878 }
879 }
880
881 m = m0;
882 while (uio->uio_resid > 0) {
883 len = ulmin(uio->uio_resid, m->m_len);
884 if (len > 0) {
885 error = uiomove(mtod(m, void *), len, uio);
886 if (error != 0)
887 break;
888 }
889
890 m = m->m_next;
891 if (m == NULL)
892 break;
893 }
894
895 free:
896 m_freem(m0);
897
898 put:
899 tun_put(sc);
900 return (error);
901 }
902
903 /*
904 * the cdevsw write interface - an atomic write is a packet - or else!
905 */
906 int
tunwrite(dev_t dev,struct uio * uio,int ioflag)907 tunwrite(dev_t dev, struct uio *uio, int ioflag)
908 {
909 return (tun_dev_write(dev, uio, ioflag, 0));
910 }
911
912 int
tapwrite(dev_t dev,struct uio * uio,int ioflag)913 tapwrite(dev_t dev, struct uio *uio, int ioflag)
914 {
915 return (tun_dev_write(dev, uio, ioflag, ETHER_ALIGN));
916 }
917
918 int
tun_dev_write(dev_t dev,struct uio * uio,int ioflag,int align)919 tun_dev_write(dev_t dev, struct uio *uio, int ioflag, int align)
920 {
921 struct tun_softc *sc;
922 struct ifnet *ifp;
923 struct mbuf *m0, *m, *n;
924 int error = 0;
925 size_t len, alen, mlen;
926 size_t hlen;
927 struct tun_hdr th;
928
929 sc = tun_get(dev);
930 if (sc == NULL)
931 return (ENXIO);
932
933 ifp = &sc->sc_if;
934
935 hlen = ifp->if_hdrlen;
936 if (ISSET(sc->sc_flags, TUN_HDR))
937 hlen += sizeof(th);
938 if (uio->uio_resid < hlen ||
939 uio->uio_resid > (hlen + MAXMCLBYTES)) {
940 error = EMSGSIZE;
941 goto put;
942 }
943
944 m0 = m_gethdr(M_DONTWAIT, MT_DATA);
945 if (m0 == NULL) {
946 error = ENOMEM;
947 goto put;
948 }
949
950 if (ISSET(sc->sc_flags, TUN_HDR)) {
951 error = uiomove(&th, sizeof(th), uio);
952 if (error != 0)
953 goto drop;
954
955 if (ISSET(th.th_flags, TUN_H_IPV4_CSUM)) {
956 SET(m0->m_pkthdr.csum_flags,
957 M_IPV4_CSUM_OUT | M_IPV4_CSUM_IN_OK);
958 }
959
960 switch (th.th_flags &
961 (TUN_H_TCP_CSUM|TUN_H_UDP_CSUM|TUN_H_ICMP_CSUM)) {
962 case 0:
963 break;
964 case TUN_H_TCP_CSUM:
965 SET(m0->m_pkthdr.csum_flags,
966 M_TCP_CSUM_OUT | M_TCP_CSUM_IN_OK);
967 break;
968 case TUN_H_UDP_CSUM:
969 SET(m0->m_pkthdr.csum_flags,
970 M_UDP_CSUM_OUT | M_UDP_CSUM_IN_OK);
971 break;
972 case TUN_H_ICMP_CSUM:
973 SET(m0->m_pkthdr.csum_flags,
974 M_ICMP_CSUM_OUT | M_ICMP_CSUM_IN_OK);
975 break;
976 default:
977 error = EINVAL;
978 goto drop;
979 }
980
981 if (ISSET(th.th_flags, TUN_H_VTAG)) {
982 if (!ISSET(sc->sc_flags, TUN_LAYER2)) {
983 error = EINVAL;
984 goto drop;
985 }
986 SET(m0->m_flags, M_VLANTAG);
987 m0->m_pkthdr.ether_vtag = th.th_vtag;
988 }
989
990 if (ISSET(th.th_flags, TUN_H_TCP_MSS)) {
991 SET(m0->m_pkthdr.csum_flags, M_TCP_TSO);
992 m0->m_pkthdr.ph_mss = th.th_mss;
993 }
994 }
995
996 align += roundup(max_linkhdr, sizeof(long));
997 mlen = MHLEN; /* how much space in the mbuf */
998
999 len = uio->uio_resid;
1000 m0->m_pkthdr.len = len;
1001
1002 m = m0;
1003 for (;;) {
1004 alen = align + len; /* what we want to put in this mbuf */
1005 if (alen > mlen) {
1006 if (alen > MAXMCLBYTES)
1007 alen = MAXMCLBYTES;
1008 m_clget(m, M_DONTWAIT, alen);
1009 if (!ISSET(m->m_flags, M_EXT)) {
1010 error = ENOMEM;
1011 goto put;
1012 }
1013 }
1014
1015 m->m_len = alen;
1016 if (align > 0) {
1017 /* avoid m_adj to protect m0->m_pkthdr.len */
1018 m->m_data += align;
1019 m->m_len -= align;
1020 }
1021
1022 error = uiomove(mtod(m, void *), m->m_len, uio);
1023 if (error != 0)
1024 goto drop;
1025
1026 len = uio->uio_resid;
1027 if (len == 0)
1028 break;
1029
1030 n = m_get(M_DONTWAIT, MT_DATA);
1031 if (n == NULL) {
1032 error = ENOMEM;
1033 goto put;
1034 }
1035
1036 align = 0;
1037 mlen = MLEN;
1038
1039 m->m_next = n;
1040 m = n;
1041 }
1042
1043 NET_LOCK();
1044 if_vinput(ifp, m0);
1045 NET_UNLOCK();
1046
1047 tun_put(sc);
1048 return (0);
1049
1050 drop:
1051 m_freem(m0);
1052 put:
1053 tun_put(sc);
1054 return (error);
1055 }
1056
1057 void
tun_input(struct ifnet * ifp,struct mbuf * m0)1058 tun_input(struct ifnet *ifp, struct mbuf *m0)
1059 {
1060 uint32_t af;
1061
1062 KASSERT(m0->m_len >= sizeof(af));
1063
1064 af = *mtod(m0, uint32_t *);
1065 /* strip the tunnel header */
1066 m_adj(m0, sizeof(af));
1067
1068 switch (ntohl(af)) {
1069 case AF_INET:
1070 ipv4_input(ifp, m0);
1071 break;
1072 #ifdef INET6
1073 case AF_INET6:
1074 ipv6_input(ifp, m0);
1075 break;
1076 #endif
1077 #ifdef MPLS
1078 case AF_MPLS:
1079 mpls_input(ifp, m0);
1080 break;
1081 #endif
1082 default:
1083 m_freem(m0);
1084 break;
1085 }
1086 }
1087
1088 int
tunkqfilter(dev_t dev,struct knote * kn)1089 tunkqfilter(dev_t dev, struct knote *kn)
1090 {
1091 return (tun_dev_kqfilter(dev, kn));
1092 }
1093
1094 int
tapkqfilter(dev_t dev,struct knote * kn)1095 tapkqfilter(dev_t dev, struct knote *kn)
1096 {
1097 return (tun_dev_kqfilter(dev, kn));
1098 }
1099
1100 int
tun_dev_kqfilter(dev_t dev,struct knote * kn)1101 tun_dev_kqfilter(dev_t dev, struct knote *kn)
1102 {
1103 struct tun_softc *sc;
1104 struct klist *klist;
1105 int error = 0;
1106
1107 sc = tun_get(dev);
1108 if (sc == NULL)
1109 return (ENXIO);
1110
1111 switch (kn->kn_filter) {
1112 case EVFILT_READ:
1113 klist = &sc->sc_rklist;
1114 kn->kn_fop = &tunread_filtops;
1115 break;
1116 case EVFILT_WRITE:
1117 klist = &sc->sc_wklist;
1118 kn->kn_fop = &tunwrite_filtops;
1119 break;
1120 default:
1121 error = EINVAL;
1122 goto put;
1123 }
1124
1125 kn->kn_hook = sc;
1126
1127 klist_insert(klist, kn);
1128
1129 put:
1130 tun_put(sc);
1131 return (error);
1132 }
1133
1134 void
filt_tunrdetach(struct knote * kn)1135 filt_tunrdetach(struct knote *kn)
1136 {
1137 struct tun_softc *sc = kn->kn_hook;
1138
1139 klist_remove(&sc->sc_rklist, kn);
1140 }
1141
1142 int
filt_tunread(struct knote * kn,long hint)1143 filt_tunread(struct knote *kn, long hint)
1144 {
1145 struct tun_softc *sc = kn->kn_hook;
1146
1147 MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
1148
1149 kn->kn_data = tun_hdatalen(sc);
1150
1151 return (kn->kn_data > 0);
1152 }
1153
1154 void
filt_tunwdetach(struct knote * kn)1155 filt_tunwdetach(struct knote *kn)
1156 {
1157 struct tun_softc *sc = kn->kn_hook;
1158
1159 klist_remove(&sc->sc_wklist, kn);
1160 }
1161
1162 int
filt_tunwrite(struct knote * kn,long hint)1163 filt_tunwrite(struct knote *kn, long hint)
1164 {
1165 struct tun_softc *sc = kn->kn_hook;
1166 struct ifnet *ifp = &sc->sc_if;
1167
1168 MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
1169
1170 kn->kn_data = ifp->if_hdrlen + ifp->if_hardmtu;
1171
1172 return (1);
1173 }
1174
1175 int
filt_tunmodify(struct kevent * kev,struct knote * kn)1176 filt_tunmodify(struct kevent *kev, struct knote *kn)
1177 {
1178 struct tun_softc *sc = kn->kn_hook;
1179 int active;
1180
1181 mtx_enter(&sc->sc_mtx);
1182 active = knote_modify(kev, kn);
1183 mtx_leave(&sc->sc_mtx);
1184
1185 return (active);
1186 }
1187
1188 int
filt_tunprocess(struct knote * kn,struct kevent * kev)1189 filt_tunprocess(struct knote *kn, struct kevent *kev)
1190 {
1191 struct tun_softc *sc = kn->kn_hook;
1192 int active;
1193
1194 mtx_enter(&sc->sc_mtx);
1195 active = knote_process(kn, kev);
1196 mtx_leave(&sc->sc_mtx);
1197
1198 return (active);
1199 }
1200
1201 void
tun_start(struct ifnet * ifp)1202 tun_start(struct ifnet *ifp)
1203 {
1204 struct tun_softc *sc = ifp->if_softc;
1205
1206 splassert(IPL_NET);
1207
1208 if (ifq_len(&ifp->if_snd))
1209 tun_wakeup(sc);
1210 }
1211
1212 void
tun_link_state(struct ifnet * ifp,int link_state)1213 tun_link_state(struct ifnet *ifp, int link_state)
1214 {
1215 if (ifp->if_link_state != link_state) {
1216 ifp->if_link_state = link_state;
1217 if_link_state_change(ifp);
1218 }
1219 }
1220