xref: /openbsd/sys/net/if_tun.c (revision b9ae17a0)
1 /*	$OpenBSD: if_tun.c,v 1.250 2024/12/30 02:46:00 guenther Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/sigio.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/fcntl.h>
51 #include <sys/time.h>
52 #include <sys/device.h>
53 #include <sys/vnode.h>
54 #include <sys/signalvar.h>
55 #include <sys/conf.h>
56 #include <sys/event.h>
57 #include <sys/mutex.h>
58 #include <sys/smr.h>
59 
60 #include <net/if.h>
61 #include <net/if_types.h>
62 #include <net/netisr.h>
63 #include <net/rtable.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/if_ether.h>
67 
68 #include "bpfilter.h"
69 #if NBPFILTER > 0
70 #include <net/bpf.h>
71 #endif
72 
73 #ifdef MPLS
74 #include <netmpls/mpls.h>
75 #endif /* MPLS */
76 
77 #include <net/if_tun.h>
78 
79 struct tun_softc {
80 	struct arpcom		sc_ac;		/* ethernet common data */
81 #define sc_if			sc_ac.ac_if
82 	struct mutex		sc_mtx;
83 	struct klist		sc_rklist;	/* knotes for read */
84 	struct klist		sc_wklist;	/* knotes for write (unused) */
85 	SMR_LIST_ENTRY(tun_softc)
86 				sc_entry;	/* all tunnel interfaces */
87 	int			sc_unit;
88 	struct sigio_ref	sc_sigio;	/* async I/O registration */
89 	unsigned int		sc_flags;	/* misc flags */
90 #define TUN_DEAD			(1 << 16)
91 #define TUN_HDR				(1 << 17)
92 
93 	dev_t			sc_dev;
94 	struct refcnt		sc_refs;
95 	unsigned int		sc_reading;
96 };
97 
98 #ifdef	TUN_DEBUG
99 int	tundebug = TUN_DEBUG;
100 #define TUNDEBUG(a)	(tundebug? printf a : 0)
101 #else
102 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
103 #endif
104 
105 /* Pretend that these IFF flags are changeable by TUNSIFINFO */
106 #define TUN_IFF_FLAGS (IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
107 
108 #define TUN_IF_CAPS ( \
109 	IFCAP_CSUM_IPv4 | \
110 	IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4|IFCAP_CSUM_TCPv6|IFCAP_CSUM_UDPv6 | \
111 	IFCAP_VLAN_MTU|IFCAP_VLAN_HWTAGGING|IFCAP_VLAN_HWOFFLOAD | \
112 	IFCAP_TSOv4|IFCAP_TSOv6|IFCAP_LRO \
113 )
114 
115 void	tunattach(int);
116 
117 int	tun_dev_open(dev_t, const struct if_clone *, int, struct proc *);
118 int	tun_dev_close(dev_t, struct proc *);
119 int	tun_dev_ioctl(dev_t, u_long, void *);
120 int	tun_dev_read(dev_t, struct uio *, int);
121 int	tun_dev_write(dev_t, struct uio *, int, int);
122 int	tun_dev_kqfilter(dev_t, struct knote *);
123 
124 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
125 void	tun_input(struct ifnet *, struct mbuf *);
126 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
127 	    struct rtentry *);
128 int	tun_enqueue(struct ifnet *, struct mbuf *);
129 int	tun_clone_create(struct if_clone *, int);
130 int	tap_clone_create(struct if_clone *, int);
131 int	tun_create(struct if_clone *, int, int);
132 int	tun_clone_destroy(struct ifnet *);
133 void	tun_wakeup(struct tun_softc *);
134 void	tun_start(struct ifnet *);
135 int	filt_tunread(struct knote *, long);
136 int	filt_tunwrite(struct knote *, long);
137 int	filt_tunmodify(struct kevent *, struct knote *);
138 int	filt_tunprocess(struct knote *, struct kevent *);
139 void	filt_tunrdetach(struct knote *);
140 void	filt_tunwdetach(struct knote *);
141 void	tun_link_state(struct ifnet *, int);
142 
143 const struct filterops tunread_filtops = {
144 	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
145 	.f_attach	= NULL,
146 	.f_detach	= filt_tunrdetach,
147 	.f_event	= filt_tunread,
148 	.f_modify	= filt_tunmodify,
149 	.f_process	= filt_tunprocess,
150 };
151 
152 const struct filterops tunwrite_filtops = {
153 	.f_flags	= FILTEROP_ISFD | FILTEROP_MPSAFE,
154 	.f_attach	= NULL,
155 	.f_detach	= filt_tunwdetach,
156 	.f_event	= filt_tunwrite,
157 	.f_modify	= filt_tunmodify,
158 	.f_process	= filt_tunprocess,
159 };
160 
161 SMR_LIST_HEAD(tun_list, tun_softc);
162 
163 struct if_clone tun_cloner =
164     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
165 
166 struct if_clone tap_cloner =
167     IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy);
168 
169 void
tunattach(int n)170 tunattach(int n)
171 {
172 	if_clone_attach(&tun_cloner);
173 	if_clone_attach(&tap_cloner);
174 }
175 
176 int
tun_clone_create(struct if_clone * ifc,int unit)177 tun_clone_create(struct if_clone *ifc, int unit)
178 {
179 	return (tun_create(ifc, unit, 0));
180 }
181 
182 int
tap_clone_create(struct if_clone * ifc,int unit)183 tap_clone_create(struct if_clone *ifc, int unit)
184 {
185 	return (tun_create(ifc, unit, TUN_LAYER2));
186 }
187 
188 struct tun_list tun_devs_list = SMR_LIST_HEAD_INITIALIZER(tun_list);
189 
190 struct tun_softc *
tun_name_lookup(const char * name)191 tun_name_lookup(const char *name)
192 {
193 	struct tun_softc *sc;
194 
195 	KERNEL_ASSERT_LOCKED();
196 
197 	SMR_LIST_FOREACH_LOCKED(sc, &tun_devs_list, sc_entry) {
198 		if (strcmp(sc->sc_if.if_xname, name) == 0)
199 			return (sc);
200 	}
201 
202 	return (NULL);
203 }
204 
205 int
tun_insert(struct tun_softc * sc)206 tun_insert(struct tun_softc *sc)
207 {
208 	int error = 0;
209 
210 	/* check for a race */
211 	if (tun_name_lookup(sc->sc_if.if_xname) != NULL)
212 		error = EEXIST;
213 	else {
214 		/* tun_name_lookup checks for the right lock already */
215 		SMR_LIST_INSERT_HEAD_LOCKED(&tun_devs_list, sc, sc_entry);
216 	}
217 
218 	return (error);
219 }
220 
221 int
tun_create(struct if_clone * ifc,int unit,int flags)222 tun_create(struct if_clone *ifc, int unit, int flags)
223 {
224 	struct tun_softc	*sc;
225 	struct ifnet		*ifp;
226 
227 	if (unit > minor(~0U))
228 		return (ENXIO);
229 
230 	KERNEL_ASSERT_LOCKED();
231 
232 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
233 	refcnt_init(&sc->sc_refs);
234 
235 	ifp = &sc->sc_if;
236 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
237 	    "%s%d", ifc->ifc_name, unit);
238 	mtx_init(&sc->sc_mtx, IPL_NET);
239 	klist_init_mutex(&sc->sc_rklist, &sc->sc_mtx);
240 	klist_init_mutex(&sc->sc_wklist, &sc->sc_mtx);
241 	ifp->if_softc = sc;
242 
243 	/* this is enough state for tun_dev_open to work with */
244 
245 	if (tun_insert(sc) != 0)
246 		goto exists;
247 
248 	/* build the interface */
249 
250 	ifp->if_ioctl = tun_ioctl;
251 	ifp->if_enqueue = tun_enqueue;
252 	ifp->if_start = tun_start;
253 	ifp->if_hardmtu = TUNMRU;
254 	ifp->if_link_state = LINK_STATE_DOWN;
255 
256 	if_counters_alloc(ifp);
257 
258 	if ((flags & TUN_LAYER2) == 0) {
259 #if NBPFILTER > 0
260 		ifp->if_bpf_mtap = bpf_mtap;
261 #endif
262 		ifp->if_input = tun_input;
263 		ifp->if_output = tun_output;
264 		ifp->if_mtu = ETHERMTU;
265 		ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST);
266 		ifp->if_type = IFT_TUNNEL;
267 		ifp->if_hdrlen = sizeof(u_int32_t);
268 		ifp->if_rtrequest = p2p_rtrequest;
269 
270 		if_attach(ifp);
271 		if_alloc_sadl(ifp);
272 
273 #if NBPFILTER > 0
274 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
275 #endif
276 	} else {
277 		sc->sc_flags |= TUN_LAYER2;
278 		ether_fakeaddr(ifp);
279 		ifp->if_flags =
280 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
281 
282 		if_attach(ifp);
283 		ether_ifattach(ifp);
284 	}
285 
286 	sigio_init(&sc->sc_sigio);
287 
288 	/* tell tun_dev_open we're initialised */
289 
290 	sc->sc_flags |= TUN_INITED|TUN_STAYUP;
291 	wakeup(sc);
292 
293 	return (0);
294 
295 exists:
296 	klist_free(&sc->sc_rklist);
297 	klist_free(&sc->sc_wklist);
298 	free(sc, M_DEVBUF, sizeof(*sc));
299 	return (EEXIST);
300 }
301 
302 int
tun_clone_destroy(struct ifnet * ifp)303 tun_clone_destroy(struct ifnet *ifp)
304 {
305 	struct tun_softc	*sc = ifp->if_softc;
306 	dev_t			 dev;
307 
308 	KERNEL_ASSERT_LOCKED();
309 
310 	if (ISSET(sc->sc_flags, TUN_DEAD))
311 		return (ENXIO);
312 	SET(sc->sc_flags, TUN_DEAD);
313 
314 	/* kick userland off the device */
315 	dev = sc->sc_dev;
316 	if (dev) {
317 		struct vnode *vp;
318 
319 		if (vfinddev(dev, VCHR, &vp))
320 			VOP_REVOKE(vp, REVOKEALL);
321 
322 		KASSERT(sc->sc_dev == 0);
323 	}
324 
325 	/* prevent userland from getting to the device again */
326 	SMR_LIST_REMOVE_LOCKED(sc, sc_entry);
327 	smr_barrier();
328 
329 	/* help read() give up */
330 	if (sc->sc_reading)
331 		wakeup(&ifp->if_snd);
332 
333 	/* wait for device entrypoints to finish */
334 	refcnt_finalize(&sc->sc_refs, "tundtor");
335 
336 	klist_invalidate(&sc->sc_rklist);
337 	klist_invalidate(&sc->sc_wklist);
338 
339 	klist_free(&sc->sc_rklist);
340 	klist_free(&sc->sc_wklist);
341 
342 	if (ISSET(sc->sc_flags, TUN_LAYER2))
343 		ether_ifdetach(ifp);
344 
345 	if_detach(ifp);
346 	sigio_free(&sc->sc_sigio);
347 
348 	free(sc, M_DEVBUF, sizeof *sc);
349 	return (0);
350 }
351 
352 static struct tun_softc *
tun_get(dev_t dev)353 tun_get(dev_t dev)
354 {
355 	struct tun_softc *sc;
356 
357 	smr_read_enter();
358 	SMR_LIST_FOREACH(sc, &tun_devs_list, sc_entry) {
359 		if (sc->sc_dev == dev) {
360 			refcnt_take(&sc->sc_refs);
361 			break;
362 		}
363 	}
364 	smr_read_leave();
365 
366 	return (sc);
367 }
368 
369 static inline void
tun_put(struct tun_softc * sc)370 tun_put(struct tun_softc *sc)
371 {
372 	refcnt_rele_wake(&sc->sc_refs);
373 }
374 
375 int
tunopen(dev_t dev,int flag,int mode,struct proc * p)376 tunopen(dev_t dev, int flag, int mode, struct proc *p)
377 {
378 	return (tun_dev_open(dev, &tun_cloner, mode, p));
379 }
380 
381 int
tapopen(dev_t dev,int flag,int mode,struct proc * p)382 tapopen(dev_t dev, int flag, int mode, struct proc *p)
383 {
384 	return (tun_dev_open(dev, &tap_cloner, mode, p));
385 }
386 
387 int
tun_dev_open(dev_t dev,const struct if_clone * ifc,int mode,struct proc * p)388 tun_dev_open(dev_t dev, const struct if_clone *ifc, int mode, struct proc *p)
389 {
390 	struct tun_softc *sc;
391 	struct ifnet *ifp;
392 	int error;
393 	u_short stayup = 0;
394 	struct vnode *vp;
395 
396 	char name[IFNAMSIZ];
397 	unsigned int rdomain;
398 
399 	/*
400 	 * Find the vnode associated with this open before we sleep
401 	 * and let something else revoke it. Our caller has a reference
402 	 * to it so we don't need to account for it.
403 	 */
404 	if (!vfinddev(dev, VCHR, &vp))
405 		panic("%s vfinddev failed", __func__);
406 
407 	snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
408 	rdomain = rtable_l2(p->p_p->ps_rtableid);
409 
410 	/* let's find or make an interface to work with */
411 	while ((sc = tun_name_lookup(name)) == NULL) {
412 		error = if_clone_create(name, rdomain);
413 		switch (error) {
414 		case 0: /* it's probably ours */
415 			stayup = TUN_STAYUP;
416 			/* FALLTHROUGH */
417 		case EEXIST: /* we may have lost a race with someone else */
418 			break;
419 		default:
420 			return (error);
421 		}
422 	}
423 
424 	refcnt_take(&sc->sc_refs);
425 
426 	/* wait for it to be fully constructed before we use it */
427 	for (;;) {
428 		if (ISSET(sc->sc_flags, TUN_DEAD)) {
429 			error = ENXIO;
430 			goto done;
431 		}
432 
433 		if (ISSET(sc->sc_flags, TUN_INITED))
434 			break;
435 
436 		error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
437 		if (error != 0) {
438 			/* XXX if_clone_destroy if stayup? */
439 			goto done;
440 		}
441 	}
442 
443 	/* Has tun_clone_destroy torn the rug out under us? */
444 	if (vp->v_type == VBAD) {
445 		error = ENXIO;
446 		goto done;
447 	}
448 
449 	if (sc->sc_dev != 0) {
450 		/* aww, we lost */
451 		error = EBUSY;
452 		goto done;
453 	}
454 	/* it's ours now */
455 	sc->sc_dev = dev;
456 	CLR(sc->sc_flags, stayup);
457 
458 	/* automatically mark the interface running on open */
459 	ifp = &sc->sc_if;
460 	NET_LOCK();
461 	SET(ifp->if_flags, IFF_UP | IFF_RUNNING);
462 	NET_UNLOCK();
463 	tun_link_state(ifp, LINK_STATE_FULL_DUPLEX);
464 	error = 0;
465 
466 done:
467 	tun_put(sc);
468 	return (error);
469 }
470 
471 /*
472  * tunclose - close the device; if closing the real device, flush pending
473  *  output and unless STAYUP bring down and destroy the interface.
474  */
475 int
tunclose(dev_t dev,int flag,int mode,struct proc * p)476 tunclose(dev_t dev, int flag, int mode, struct proc *p)
477 {
478 	return (tun_dev_close(dev, p));
479 }
480 
481 int
tapclose(dev_t dev,int flag,int mode,struct proc * p)482 tapclose(dev_t dev, int flag, int mode, struct proc *p)
483 {
484 	return (tun_dev_close(dev, p));
485 }
486 
487 int
tun_dev_close(dev_t dev,struct proc * p)488 tun_dev_close(dev_t dev, struct proc *p)
489 {
490 	struct tun_softc	*sc;
491 	struct ifnet		*ifp;
492 	int			 error = 0;
493 	char			 name[IFNAMSIZ];
494 	int			 destroy = 0;
495 
496 	sc = tun_get(dev);
497 	if (sc == NULL)
498 		return (ENXIO);
499 
500 	ifp = &sc->sc_if;
501 
502 	/*
503 	 * junk all pending output
504 	 */
505 	NET_LOCK();
506 	CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
507 	CLR(ifp->if_capabilities, TUN_IF_CAPS);
508 	NET_UNLOCK();
509 	ifq_purge(&ifp->if_snd);
510 
511 	CLR(sc->sc_flags, TUN_ASYNC|TUN_HDR);
512 	sigio_free(&sc->sc_sigio);
513 
514 	if (!ISSET(sc->sc_flags, TUN_DEAD)) {
515 		/* we can't hold a reference to sc before we start a dtor */
516 		if (!ISSET(sc->sc_flags, TUN_STAYUP)) {
517 			destroy = 1;
518 			strlcpy(name, ifp->if_xname, sizeof(name));
519 		} else {
520 			tun_link_state(ifp, LINK_STATE_DOWN);
521 		}
522 	}
523 
524 	sc->sc_dev = 0;
525 
526 	tun_put(sc);
527 
528 	if (destroy)
529 		if_clone_destroy(name);
530 
531 	return (error);
532 }
533 
534 /*
535  * Process an ioctl request.
536  */
537 int
tun_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)538 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
539 {
540 	struct tun_softc	*sc = (struct tun_softc *)(ifp->if_softc);
541 	struct ifreq		*ifr = (struct ifreq *)data;
542 	int			 error = 0;
543 
544 	switch (cmd) {
545 	case SIOCSIFADDR:
546 		SET(ifp->if_flags, IFF_UP);
547 		/* FALLTHROUGH */
548 	case SIOCSIFFLAGS:
549 		if (ISSET(ifp->if_flags, IFF_UP))
550 			SET(ifp->if_flags, IFF_RUNNING);
551 		else
552 			CLR(ifp->if_flags, IFF_RUNNING);
553 		break;
554 
555 	case SIOCSIFMTU:
556 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
557 			error = EINVAL;
558 		else
559 			ifp->if_mtu = ifr->ifr_mtu;
560 		break;
561 	case SIOCADDMULTI:
562 	case SIOCDELMULTI:
563 		break;
564 	default:
565 		if (sc->sc_flags & TUN_LAYER2)
566 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
567 		else
568 			error = ENOTTY;
569 	}
570 
571 	return (error);
572 }
573 
574 /*
575  * tun_output - queue packets from higher level ready to put out.
576  */
577 int
tun_output(struct ifnet * ifp,struct mbuf * m0,struct sockaddr * dst,struct rtentry * rt)578 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
579     struct rtentry *rt)
580 {
581 	u_int32_t		*af;
582 
583 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
584 		m_freem(m0);
585 		return (EHOSTDOWN);
586 	}
587 
588 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
589 	if (m0 == NULL)
590 		return (ENOBUFS);
591 	af = mtod(m0, u_int32_t *);
592 	*af = htonl(dst->sa_family);
593 
594 	return (if_enqueue(ifp, m0));
595 }
596 
597 int
tun_enqueue(struct ifnet * ifp,struct mbuf * m0)598 tun_enqueue(struct ifnet *ifp, struct mbuf *m0)
599 {
600 	struct tun_softc	*sc = ifp->if_softc;
601 	int			 error;
602 
603 	error = ifq_enqueue(&ifp->if_snd, m0);
604 	if (error != 0)
605 		return (error);
606 
607 	tun_wakeup(sc);
608 
609 	return (0);
610 }
611 
612 void
tun_wakeup(struct tun_softc * sc)613 tun_wakeup(struct tun_softc *sc)
614 {
615 	if (sc->sc_reading)
616 		wakeup(&sc->sc_if.if_snd);
617 
618 	knote(&sc->sc_rklist, 0);
619 
620 	if (sc->sc_flags & TUN_ASYNC)
621 		pgsigio(&sc->sc_sigio, SIGIO, 0);
622 }
623 
624 /*
625  * the cdevsw interface is now pretty minimal.
626  */
627 int
tunioctl(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)628 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
629 {
630 	return (tun_dev_ioctl(dev, cmd, data));
631 }
632 
633 int
tapioctl(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)634 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
635 {
636 	return (tun_dev_ioctl(dev, cmd, data));
637 }
638 
639 static int
tun_set_capabilities(struct tun_softc * sc,const struct tun_capabilities * cap)640 tun_set_capabilities(struct tun_softc *sc, const struct tun_capabilities *cap)
641 {
642 	if (ISSET(cap->tun_if_capabilities, ~TUN_IF_CAPS))
643 		return (EINVAL);
644 
645 	KERNEL_ASSERT_LOCKED();
646 	SET(sc->sc_flags, TUN_HDR);
647 
648 	NET_LOCK();
649 	CLR(sc->sc_if.if_capabilities, TUN_IF_CAPS);
650 	SET(sc->sc_if.if_capabilities, cap->tun_if_capabilities);
651 	NET_UNLOCK();
652 	return (0);
653 }
654 
655 static int
tun_get_capabilities(struct tun_softc * sc,struct tun_capabilities * cap)656 tun_get_capabilities(struct tun_softc *sc, struct tun_capabilities *cap)
657 {
658 	int error = 0;
659 
660 	NET_LOCK_SHARED();
661 	if (ISSET(sc->sc_flags, TUN_HDR)) {
662 		cap->tun_if_capabilities =
663 		    (sc->sc_if.if_capabilities & TUN_IF_CAPS);
664 	} else
665 		error = ENODEV;
666 	NET_UNLOCK_SHARED();
667 
668 	return (error);
669 }
670 
671 static int
tun_del_capabilities(struct tun_softc * sc)672 tun_del_capabilities(struct tun_softc *sc)
673 {
674 	NET_LOCK();
675 	CLR(sc->sc_if.if_capabilities, TUN_IF_CAPS);
676 	NET_UNLOCK();
677 
678 	KERNEL_ASSERT_LOCKED();
679 	CLR(sc->sc_flags, TUN_HDR);
680 
681 	return (0);
682 }
683 
684 static int
tun_hdatalen(struct tun_softc * sc)685 tun_hdatalen(struct tun_softc *sc)
686 {
687 	struct ifnet		*ifp = &sc->sc_if;
688 	int			 len;
689 
690 	len = ifq_hdatalen(&ifp->if_snd);
691 	if (len > 0 && ISSET(sc->sc_flags, TUN_HDR))
692 		len += sizeof(struct tun_hdr);
693 
694 	return (len);
695 }
696 
697 int
tun_dev_ioctl(dev_t dev,u_long cmd,void * data)698 tun_dev_ioctl(dev_t dev, u_long cmd, void *data)
699 {
700 	struct tun_softc	*sc;
701 	struct tuninfo		*tunp;
702 	int			 error = 0;
703 
704 	sc = tun_get(dev);
705 	if (sc == NULL)
706 		return (ENXIO);
707 
708 	switch (cmd) {
709 	case TUNSIFINFO:
710 		tunp = (struct tuninfo *)data;
711 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
712 			error = EINVAL;
713 			break;
714 		}
715 		if (tunp->type != sc->sc_if.if_type) {
716 			error = EINVAL;
717 			break;
718 		}
719 		if (tunp->flags != (sc->sc_if.if_flags & TUN_IFF_FLAGS)) {
720 			error = EINVAL;
721 			break;
722 		}
723 		sc->sc_if.if_mtu = tunp->mtu;
724 		sc->sc_if.if_baudrate = tunp->baudrate;
725 		break;
726 	case TUNGIFINFO:
727 		tunp = (struct tuninfo *)data;
728 		tunp->mtu = sc->sc_if.if_mtu;
729 		tunp->type = sc->sc_if.if_type;
730 		tunp->flags = sc->sc_if.if_flags & TUN_IFF_FLAGS;
731 		tunp->baudrate = sc->sc_if.if_baudrate;
732 		break;
733 #ifdef TUN_DEBUG
734 	case TUNSDEBUG:
735 		tundebug = *(int *)data;
736 		break;
737 	case TUNGDEBUG:
738 		*(int *)data = tundebug;
739 		break;
740 #endif
741 	case TUNSIFMODE:
742 		if (*(int *)data != (sc->sc_if.if_flags & TUN_IFF_FLAGS)) {
743 			error = EINVAL;
744 			break;
745 		}
746 		break;
747 
748 	case TUNSCAP:
749 		error = tun_set_capabilities(sc,
750 		    (const struct tun_capabilities *)data);
751 		break;
752 	case TUNGCAP:
753 		error = tun_get_capabilities(sc,
754 		    (struct tun_capabilities *)data);
755 		break;
756 	case TUNDCAP:
757 		error = tun_del_capabilities(sc);
758 		break;
759 
760 	case FIOASYNC:
761 		if (*(int *)data)
762 			sc->sc_flags |= TUN_ASYNC;
763 		else
764 			sc->sc_flags &= ~TUN_ASYNC;
765 		break;
766 	case FIONREAD:
767 		*(int *)data = tun_hdatalen(sc);
768 		break;
769 	case FIOSETOWN:
770 	case TIOCSPGRP:
771 		error = sigio_setown(&sc->sc_sigio, cmd, data);
772 		break;
773 	case FIOGETOWN:
774 	case TIOCGPGRP:
775 		sigio_getown(&sc->sc_sigio, cmd, data);
776 		break;
777 	case SIOCGIFADDR:
778 		if (!(sc->sc_flags & TUN_LAYER2)) {
779 			error = EINVAL;
780 			break;
781 		}
782 		bcopy(sc->sc_ac.ac_enaddr, data,
783 		    sizeof(sc->sc_ac.ac_enaddr));
784 		break;
785 
786 	case SIOCSIFADDR:
787 		if (!(sc->sc_flags & TUN_LAYER2)) {
788 			error = EINVAL;
789 			break;
790 		}
791 		bcopy(data, sc->sc_ac.ac_enaddr,
792 		    sizeof(sc->sc_ac.ac_enaddr));
793 		break;
794 	default:
795 		error = ENOTTY;
796 		break;
797 	}
798 
799 	tun_put(sc);
800 	return (error);
801 }
802 
803 /*
804  * The cdevsw read interface - reads a packet at a time, or at
805  * least as much of a packet as can be read.
806  */
807 int
tunread(dev_t dev,struct uio * uio,int ioflag)808 tunread(dev_t dev, struct uio *uio, int ioflag)
809 {
810 	return (tun_dev_read(dev, uio, ioflag));
811 }
812 
813 int
tapread(dev_t dev,struct uio * uio,int ioflag)814 tapread(dev_t dev, struct uio *uio, int ioflag)
815 {
816 	return (tun_dev_read(dev, uio, ioflag));
817 }
818 
819 int
tun_dev_read(dev_t dev,struct uio * uio,int ioflag)820 tun_dev_read(dev_t dev, struct uio *uio, int ioflag)
821 {
822 	struct tun_softc	*sc;
823 	struct ifnet		*ifp;
824 	struct mbuf		*m, *m0;
825 	size_t			 len;
826 	int			 error = 0;
827 
828 	sc = tun_get(dev);
829 	if (sc == NULL)
830 		return (ENXIO);
831 
832 	ifp = &sc->sc_if;
833 
834 	error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY),
835 	    (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev);
836 	if (error != 0)
837 		goto put;
838 
839 #if NBPFILTER > 0
840 	if (ifp->if_bpf)
841 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
842 #endif
843 
844 	if (ISSET(sc->sc_flags, TUN_HDR)) {
845 		struct tun_hdr th;
846 
847 		KASSERT(ISSET(m0->m_flags, M_PKTHDR));
848 
849 		th.th_flags = 0;
850 		if (ISSET(m0->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT))
851 			SET(th.th_flags, TUN_H_IPV4_CSUM);
852 		if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_CSUM_OUT))
853 			SET(th.th_flags, TUN_H_TCP_CSUM);
854 		if (ISSET(m0->m_pkthdr.csum_flags, M_UDP_CSUM_OUT))
855 			SET(th.th_flags, TUN_H_UDP_CSUM);
856 		if (ISSET(m0->m_pkthdr.csum_flags, M_ICMP_CSUM_OUT))
857 			SET(th.th_flags, TUN_H_ICMP_CSUM);
858 
859 		th.th_pad = 0;
860 
861 		th.th_vtag = 0;
862 		if (ISSET(m0->m_flags, M_VLANTAG)) {
863 			SET(th.th_flags, TUN_H_VTAG);
864 			th.th_vtag = m0->m_pkthdr.ether_vtag;
865 		}
866 
867 		th.th_mss = 0;
868 		if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) {
869 			SET(th.th_flags, TUN_H_TCP_MSS);
870 			th.th_mss = m0->m_pkthdr.ph_mss;
871 		}
872 
873 		len = ulmin(uio->uio_resid, sizeof(th));
874 		if (len > 0) {
875 			error = uiomove(&th, len, uio);
876 			if (error != 0)
877 				goto free;
878 		}
879 	}
880 
881 	m = m0;
882 	while (uio->uio_resid > 0) {
883 		len = ulmin(uio->uio_resid, m->m_len);
884 		if (len > 0) {
885 			error = uiomove(mtod(m, void *), len, uio);
886 			if (error != 0)
887 				break;
888 		}
889 
890 		m = m->m_next;
891 		if (m == NULL)
892 			break;
893 	}
894 
895 free:
896 	m_freem(m0);
897 
898 put:
899 	tun_put(sc);
900 	return (error);
901 }
902 
903 /*
904  * the cdevsw write interface - an atomic write is a packet - or else!
905  */
906 int
tunwrite(dev_t dev,struct uio * uio,int ioflag)907 tunwrite(dev_t dev, struct uio *uio, int ioflag)
908 {
909 	return (tun_dev_write(dev, uio, ioflag, 0));
910 }
911 
912 int
tapwrite(dev_t dev,struct uio * uio,int ioflag)913 tapwrite(dev_t dev, struct uio *uio, int ioflag)
914 {
915 	return (tun_dev_write(dev, uio, ioflag, ETHER_ALIGN));
916 }
917 
918 int
tun_dev_write(dev_t dev,struct uio * uio,int ioflag,int align)919 tun_dev_write(dev_t dev, struct uio *uio, int ioflag, int align)
920 {
921 	struct tun_softc	*sc;
922 	struct ifnet		*ifp;
923 	struct mbuf		*m0, *m, *n;
924 	int			error = 0;
925 	size_t			len, alen, mlen;
926 	size_t			hlen;
927 	struct tun_hdr		th;
928 
929 	sc = tun_get(dev);
930 	if (sc == NULL)
931 		return (ENXIO);
932 
933 	ifp = &sc->sc_if;
934 
935 	hlen = ifp->if_hdrlen;
936 	if (ISSET(sc->sc_flags, TUN_HDR))
937 		hlen += sizeof(th);
938 	if (uio->uio_resid < hlen ||
939 	    uio->uio_resid > (hlen + MAXMCLBYTES)) {
940 		error = EMSGSIZE;
941 		goto put;
942 	}
943 
944 	m0 = m_gethdr(M_DONTWAIT, MT_DATA);
945 	if (m0 == NULL) {
946 		error = ENOMEM;
947 		goto put;
948 	}
949 
950 	if (ISSET(sc->sc_flags, TUN_HDR)) {
951 		error = uiomove(&th, sizeof(th), uio);
952 		if (error != 0)
953 			goto drop;
954 
955 		if (ISSET(th.th_flags, TUN_H_IPV4_CSUM)) {
956 			SET(m0->m_pkthdr.csum_flags,
957 			    M_IPV4_CSUM_OUT | M_IPV4_CSUM_IN_OK);
958 		}
959 
960 		switch (th.th_flags &
961 		    (TUN_H_TCP_CSUM|TUN_H_UDP_CSUM|TUN_H_ICMP_CSUM)) {
962 		case 0:
963 			break;
964 		case TUN_H_TCP_CSUM:
965 			SET(m0->m_pkthdr.csum_flags,
966 			    M_TCP_CSUM_OUT | M_TCP_CSUM_IN_OK);
967 			break;
968 		case TUN_H_UDP_CSUM:
969 			SET(m0->m_pkthdr.csum_flags,
970 			    M_UDP_CSUM_OUT | M_UDP_CSUM_IN_OK);
971 			break;
972 		case TUN_H_ICMP_CSUM:
973 			SET(m0->m_pkthdr.csum_flags,
974 			    M_ICMP_CSUM_OUT | M_ICMP_CSUM_IN_OK);
975 			break;
976 		default:
977 			error = EINVAL;
978 			goto drop;
979 		}
980 
981 		if (ISSET(th.th_flags, TUN_H_VTAG)) {
982 			if (!ISSET(sc->sc_flags, TUN_LAYER2)) {
983 				error = EINVAL;
984 				goto drop;
985 			}
986 			SET(m0->m_flags, M_VLANTAG);
987 			m0->m_pkthdr.ether_vtag = th.th_vtag;
988 		}
989 
990 		if (ISSET(th.th_flags, TUN_H_TCP_MSS)) {
991 			SET(m0->m_pkthdr.csum_flags, M_TCP_TSO);
992 			m0->m_pkthdr.ph_mss = th.th_mss;
993 		}
994 	}
995 
996 	align += roundup(max_linkhdr, sizeof(long));
997 	mlen = MHLEN; /* how much space in the mbuf */
998 
999 	len = uio->uio_resid;
1000 	m0->m_pkthdr.len = len;
1001 
1002 	m = m0;
1003 	for (;;) {
1004 		alen = align + len; /* what we want to put in this mbuf */
1005 		if (alen > mlen) {
1006 			if (alen > MAXMCLBYTES)
1007 				alen = MAXMCLBYTES;
1008 			m_clget(m, M_DONTWAIT, alen);
1009 			if (!ISSET(m->m_flags, M_EXT)) {
1010 				error = ENOMEM;
1011 				goto put;
1012 			}
1013 		}
1014 
1015 		m->m_len = alen;
1016 		if (align > 0) {
1017 			/* avoid m_adj to protect m0->m_pkthdr.len */
1018 			m->m_data += align;
1019 			m->m_len -= align;
1020 		}
1021 
1022 		error = uiomove(mtod(m, void *), m->m_len, uio);
1023 		if (error != 0)
1024 			goto drop;
1025 
1026 		len = uio->uio_resid;
1027 		if (len == 0)
1028 			break;
1029 
1030 		n = m_get(M_DONTWAIT, MT_DATA);
1031 		if (n == NULL) {
1032 			error = ENOMEM;
1033 			goto put;
1034 		}
1035 
1036 		align = 0;
1037 		mlen = MLEN;
1038 
1039 		m->m_next = n;
1040 		m = n;
1041 	}
1042 
1043 	NET_LOCK();
1044 	if_vinput(ifp, m0);
1045 	NET_UNLOCK();
1046 
1047 	tun_put(sc);
1048 	return (0);
1049 
1050 drop:
1051 	m_freem(m0);
1052 put:
1053 	tun_put(sc);
1054 	return (error);
1055 }
1056 
1057 void
tun_input(struct ifnet * ifp,struct mbuf * m0)1058 tun_input(struct ifnet *ifp, struct mbuf *m0)
1059 {
1060 	uint32_t		af;
1061 
1062 	KASSERT(m0->m_len >= sizeof(af));
1063 
1064 	af = *mtod(m0, uint32_t *);
1065 	/* strip the tunnel header */
1066 	m_adj(m0, sizeof(af));
1067 
1068 	switch (ntohl(af)) {
1069 	case AF_INET:
1070 		ipv4_input(ifp, m0);
1071 		break;
1072 #ifdef INET6
1073 	case AF_INET6:
1074 		ipv6_input(ifp, m0);
1075 		break;
1076 #endif
1077 #ifdef MPLS
1078 	case AF_MPLS:
1079 		mpls_input(ifp, m0);
1080 		break;
1081 #endif
1082 	default:
1083 		m_freem(m0);
1084 		break;
1085 	}
1086 }
1087 
1088 int
tunkqfilter(dev_t dev,struct knote * kn)1089 tunkqfilter(dev_t dev, struct knote *kn)
1090 {
1091 	return (tun_dev_kqfilter(dev, kn));
1092 }
1093 
1094 int
tapkqfilter(dev_t dev,struct knote * kn)1095 tapkqfilter(dev_t dev, struct knote *kn)
1096 {
1097 	return (tun_dev_kqfilter(dev, kn));
1098 }
1099 
1100 int
tun_dev_kqfilter(dev_t dev,struct knote * kn)1101 tun_dev_kqfilter(dev_t dev, struct knote *kn)
1102 {
1103 	struct tun_softc	*sc;
1104 	struct klist		*klist;
1105 	int			 error = 0;
1106 
1107 	sc = tun_get(dev);
1108 	if (sc == NULL)
1109 		return (ENXIO);
1110 
1111 	switch (kn->kn_filter) {
1112 	case EVFILT_READ:
1113 		klist = &sc->sc_rklist;
1114 		kn->kn_fop = &tunread_filtops;
1115 		break;
1116 	case EVFILT_WRITE:
1117 		klist = &sc->sc_wklist;
1118 		kn->kn_fop = &tunwrite_filtops;
1119 		break;
1120 	default:
1121 		error = EINVAL;
1122 		goto put;
1123 	}
1124 
1125 	kn->kn_hook = sc;
1126 
1127 	klist_insert(klist, kn);
1128 
1129 put:
1130 	tun_put(sc);
1131 	return (error);
1132 }
1133 
1134 void
filt_tunrdetach(struct knote * kn)1135 filt_tunrdetach(struct knote *kn)
1136 {
1137 	struct tun_softc	*sc = kn->kn_hook;
1138 
1139 	klist_remove(&sc->sc_rklist, kn);
1140 }
1141 
1142 int
filt_tunread(struct knote * kn,long hint)1143 filt_tunread(struct knote *kn, long hint)
1144 {
1145 	struct tun_softc	*sc = kn->kn_hook;
1146 
1147 	MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
1148 
1149 	kn->kn_data = tun_hdatalen(sc);
1150 
1151 	return (kn->kn_data > 0);
1152 }
1153 
1154 void
filt_tunwdetach(struct knote * kn)1155 filt_tunwdetach(struct knote *kn)
1156 {
1157 	struct tun_softc	*sc = kn->kn_hook;
1158 
1159 	klist_remove(&sc->sc_wklist, kn);
1160 }
1161 
1162 int
filt_tunwrite(struct knote * kn,long hint)1163 filt_tunwrite(struct knote *kn, long hint)
1164 {
1165 	struct tun_softc	*sc = kn->kn_hook;
1166 	struct ifnet		*ifp = &sc->sc_if;
1167 
1168 	MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
1169 
1170 	kn->kn_data = ifp->if_hdrlen + ifp->if_hardmtu;
1171 
1172 	return (1);
1173 }
1174 
1175 int
filt_tunmodify(struct kevent * kev,struct knote * kn)1176 filt_tunmodify(struct kevent *kev, struct knote *kn)
1177 {
1178 	struct tun_softc	*sc = kn->kn_hook;
1179 	int			 active;
1180 
1181 	mtx_enter(&sc->sc_mtx);
1182 	active = knote_modify(kev, kn);
1183 	mtx_leave(&sc->sc_mtx);
1184 
1185 	return (active);
1186 }
1187 
1188 int
filt_tunprocess(struct knote * kn,struct kevent * kev)1189 filt_tunprocess(struct knote *kn, struct kevent *kev)
1190 {
1191 	struct tun_softc	*sc = kn->kn_hook;
1192 	int			 active;
1193 
1194 	mtx_enter(&sc->sc_mtx);
1195 	active = knote_process(kn, kev);
1196 	mtx_leave(&sc->sc_mtx);
1197 
1198 	return (active);
1199 }
1200 
1201 void
tun_start(struct ifnet * ifp)1202 tun_start(struct ifnet *ifp)
1203 {
1204 	struct tun_softc	*sc = ifp->if_softc;
1205 
1206 	splassert(IPL_NET);
1207 
1208 	if (ifq_len(&ifp->if_snd))
1209 		tun_wakeup(sc);
1210 }
1211 
1212 void
tun_link_state(struct ifnet * ifp,int link_state)1213 tun_link_state(struct ifnet *ifp, int link_state)
1214 {
1215 	if (ifp->if_link_state != link_state) {
1216 		ifp->if_link_state = link_state;
1217 		if_link_state_change(ifp);
1218 	}
1219 }
1220