xref: /openbsd/sys/net/if_tun.c (revision 3cab2bb3)
1 /*	$OpenBSD: if_tun.c,v 1.225 2020/07/22 02:16:02 dlg Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/sigio.h>
47 #include <sys/socket.h>
48 #include <sys/ioctl.h>
49 #include <sys/errno.h>
50 #include <sys/syslog.h>
51 #include <sys/selinfo.h>
52 #include <sys/fcntl.h>
53 #include <sys/time.h>
54 #include <sys/device.h>
55 #include <sys/vnode.h>
56 #include <sys/signalvar.h>
57 #include <sys/poll.h>
58 #include <sys/conf.h>
59 #include <sys/smr.h>
60 
61 #include <net/if.h>
62 #include <net/if_types.h>
63 #include <net/netisr.h>
64 #include <net/rtable.h>
65 
66 #include <netinet/in.h>
67 #include <netinet/if_ether.h>
68 
69 #include "bpfilter.h"
70 #if NBPFILTER > 0
71 #include <net/bpf.h>
72 #endif
73 
74 #ifdef MPLS
75 #include <netmpls/mpls.h>
76 #endif /* MPLS */
77 
78 #include <net/if_tun.h>
79 
80 struct tun_softc {
81 	struct arpcom		sc_ac;		/* ethernet common data */
82 #define sc_if			sc_ac.ac_if
83 	struct selinfo		sc_rsel;	/* read select */
84 	struct selinfo		sc_wsel;	/* write select (not used) */
85 	SMR_LIST_ENTRY(tun_softc)
86 				sc_entry;	/* all tunnel interfaces */
87 	int			sc_unit;
88 	struct sigio_ref	sc_sigio;	/* async I/O registration */
89 	unsigned int		sc_flags;	/* misc flags */
90 #define TUN_DEAD			(1 << 16)
91 
92 	dev_t			sc_dev;
93 	struct refcnt		sc_refs;
94 	unsigned int		sc_reading;
95 };
96 
97 #ifdef	TUN_DEBUG
98 int	tundebug = TUN_DEBUG;
99 #define TUNDEBUG(a)	(tundebug? printf a : 0)
100 #else
101 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
102 #endif
103 
104 /* Only these IFF flags are changeable by TUNSIFINFO */
105 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
106 
107 void	tunattach(int);
108 
109 int	tun_dev_open(dev_t, const struct if_clone *, int, struct proc *);
110 int	tun_dev_close(dev_t, struct proc *);
111 int	tun_dev_ioctl(dev_t, u_long, void *);
112 int	tun_dev_read(dev_t, struct uio *, int);
113 int	tun_dev_write(dev_t, struct uio *, int, int);
114 int	tun_dev_poll(dev_t, int, struct proc *);
115 int	tun_dev_kqfilter(dev_t, struct knote *);
116 
117 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
118 void	tun_input(struct ifnet *, struct mbuf *);
119 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
120 	    struct rtentry *);
121 int	tun_enqueue(struct ifnet *, struct mbuf *);
122 int	tun_clone_create(struct if_clone *, int);
123 int	tap_clone_create(struct if_clone *, int);
124 int	tun_create(struct if_clone *, int, int);
125 int	tun_clone_destroy(struct ifnet *);
126 void	tun_wakeup(struct tun_softc *);
127 int	tun_init(struct tun_softc *);
128 void	tun_start(struct ifnet *);
129 int	filt_tunread(struct knote *, long);
130 int	filt_tunwrite(struct knote *, long);
131 void	filt_tunrdetach(struct knote *);
132 void	filt_tunwdetach(struct knote *);
133 void	tun_link_state(struct tun_softc *, int);
134 
135 const struct filterops tunread_filtops = {
136 	.f_flags	= FILTEROP_ISFD,
137 	.f_attach	= NULL,
138 	.f_detach	= filt_tunrdetach,
139 	.f_event	= filt_tunread,
140 };
141 
142 const struct filterops tunwrite_filtops = {
143 	.f_flags	= FILTEROP_ISFD,
144 	.f_attach	= NULL,
145 	.f_detach	= filt_tunwdetach,
146 	.f_event	= filt_tunwrite,
147 };
148 
149 SMR_LIST_HEAD(tun_list, tun_softc);
150 
151 struct if_clone tun_cloner =
152     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
153 
154 struct if_clone tap_cloner =
155     IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy);
156 
157 void
158 tunattach(int n)
159 {
160 	if_clone_attach(&tun_cloner);
161 	if_clone_attach(&tap_cloner);
162 }
163 
164 int
165 tun_clone_create(struct if_clone *ifc, int unit)
166 {
167 	return (tun_create(ifc, unit, 0));
168 }
169 
170 int
171 tap_clone_create(struct if_clone *ifc, int unit)
172 {
173 	return (tun_create(ifc, unit, TUN_LAYER2));
174 }
175 
176 struct tun_list tun_devs_list = SMR_LIST_HEAD_INITIALIZER(tun_list);
177 
178 struct tun_softc *
179 tun_name_lookup(const char *name)
180 {
181 	struct tun_softc *sc;
182 
183 	KERNEL_ASSERT_LOCKED();
184 
185 	SMR_LIST_FOREACH_LOCKED(sc, &tun_devs_list, sc_entry) {
186 		if (strcmp(sc->sc_if.if_xname, name) == 0)
187 			return (sc);
188 	}
189 
190 	return (NULL);
191 }
192 
193 int
194 tun_insert(struct tun_softc *sc)
195 {
196 	int error = 0;
197 
198 	/* check for a race */
199 	if (tun_name_lookup(sc->sc_if.if_xname) != NULL)
200 		error = EEXIST;
201 	else {
202 		/* tun_name_lookup checks for the right lock already */
203 		SMR_LIST_INSERT_HEAD_LOCKED(&tun_devs_list, sc, sc_entry);
204 	}
205 
206 	return (error);
207 }
208 
209 int
210 tun_create(struct if_clone *ifc, int unit, int flags)
211 {
212 	struct tun_softc	*sc;
213 	struct ifnet		*ifp;
214 
215 	if (unit > minor(~0U))
216 		return (ENXIO);
217 
218 	KERNEL_ASSERT_LOCKED();
219 
220 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
221 	ifp = &sc->sc_if;
222 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
223 	    "%s%d", ifc->ifc_name, unit);
224 	ifp->if_softc = sc;
225 
226 	/* this is enough state for tun_dev_open to work with */
227 
228 	if (tun_insert(sc) != 0)
229 		goto exists;
230 
231 	/* build the interface */
232 
233 	ifp->if_ioctl = tun_ioctl;
234 	ifp->if_enqueue = tun_enqueue;
235 	ifp->if_start = tun_start;
236 	ifp->if_hardmtu = TUNMRU;
237 	ifp->if_link_state = LINK_STATE_DOWN;
238 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
239 
240 	if_counters_alloc(ifp);
241 
242 	if ((flags & TUN_LAYER2) == 0) {
243 		ifp->if_input = tun_input;
244 		ifp->if_output = tun_output;
245 		ifp->if_mtu = ETHERMTU;
246 		ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST);
247 		ifp->if_type = IFT_TUNNEL;
248 		ifp->if_hdrlen = sizeof(u_int32_t);
249 		ifp->if_rtrequest = p2p_rtrequest;
250 
251 		if_attach(ifp);
252 		if_alloc_sadl(ifp);
253 
254 #if NBPFILTER > 0
255 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
256 #endif
257 	} else {
258 		sc->sc_flags |= TUN_LAYER2;
259 		ether_fakeaddr(ifp);
260 		ifp->if_flags =
261 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
262 
263 		if_attach(ifp);
264 		ether_ifattach(ifp);
265 	}
266 
267 	sigio_init(&sc->sc_sigio);
268 	refcnt_init(&sc->sc_refs);
269 
270 	/* tell tun_dev_open we're initialised */
271 
272 	sc->sc_flags |= TUN_INITED|TUN_STAYUP;
273 	wakeup(sc);
274 
275 	return (0);
276 
277 exists:
278 	free(sc, M_DEVBUF, sizeof(*sc));
279 	return (EEXIST);
280 }
281 
282 int
283 tun_clone_destroy(struct ifnet *ifp)
284 {
285 	struct tun_softc	*sc = ifp->if_softc;
286 	dev_t			 dev;
287 	int			 s;
288 
289 	KERNEL_ASSERT_LOCKED();
290 
291 	if (ISSET(sc->sc_flags, TUN_DEAD))
292 		return (ENXIO);
293 	SET(sc->sc_flags, TUN_DEAD);
294 
295 	/* kick userland off the device */
296 	dev = sc->sc_dev;
297 	if (dev) {
298 		struct vnode *vp;
299 
300 		if (vfinddev(dev, VCHR, &vp))
301                         VOP_REVOKE(vp, REVOKEALL);
302 
303 		KASSERT(sc->sc_dev == 0);
304 	}
305 
306 	/* prevent userland from getting to the device again */
307 	SMR_LIST_REMOVE_LOCKED(sc, sc_entry);
308 	smr_barrier();
309 
310 	/* help read() give up */
311 	if (sc->sc_reading)
312 		wakeup(&ifp->if_snd);
313 
314 	/* wait for device entrypoints to finish */
315 	refcnt_finalize(&sc->sc_refs, "tundtor");
316 
317 	s = splhigh();
318 	klist_invalidate(&sc->sc_rsel.si_note);
319 	klist_invalidate(&sc->sc_wsel.si_note);
320 	splx(s);
321 
322 	if (ISSET(sc->sc_flags, TUN_LAYER2))
323 		ether_ifdetach(ifp);
324 
325 	if_detach(ifp);
326 	sigio_free(&sc->sc_sigio);
327 
328 	free(sc, M_DEVBUF, sizeof *sc);
329 	return (0);
330 }
331 
332 static struct tun_softc *
333 tun_get(dev_t dev)
334 {
335 	struct tun_softc *sc;
336 
337 	smr_read_enter();
338 	SMR_LIST_FOREACH(sc, &tun_devs_list, sc_entry) {
339 		if (sc->sc_dev == dev) {
340 			refcnt_take(&sc->sc_refs);
341 			break;
342 		}
343 	}
344 	smr_read_leave();
345 
346 	return (sc);
347 }
348 
349 static inline void
350 tun_put(struct tun_softc *sc)
351 {
352 	refcnt_rele_wake(&sc->sc_refs);
353 }
354 
355 int
356 tunopen(dev_t dev, int flag, int mode, struct proc *p)
357 {
358 	return (tun_dev_open(dev, &tun_cloner, mode, p));
359 }
360 
361 int
362 tapopen(dev_t dev, int flag, int mode, struct proc *p)
363 {
364 	return (tun_dev_open(dev, &tap_cloner, mode, p));
365 }
366 
367 int
368 tun_dev_open(dev_t dev, const struct if_clone *ifc, int mode, struct proc *p)
369 {
370 	struct tun_softc *sc;
371 	struct ifnet *ifp;
372 	int error;
373 	u_short stayup = 0;
374 
375 	char name[IFNAMSIZ];
376 	unsigned int rdomain;
377 
378 	snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
379 	rdomain = rtable_l2(p->p_p->ps_rtableid);
380 
381 	/* let's find or make an interface to work with */
382 	while ((ifp = ifunit(name)) == NULL) {
383 		error = if_clone_create(name, rdomain);
384 		switch (error) {
385 		case 0: /* it's probably ours */
386 			stayup = TUN_STAYUP;
387 			/* FALLTHROUGH */
388 		case EEXIST: /* we may have lost a race with someone else */
389 			break;
390 		default:
391 			return (error);
392 		}
393 	}
394 
395 	sc = ifp->if_softc;
396 	/* wait for it to be fully constructed before we use it */
397 	while (!ISSET(sc->sc_flags, TUN_INITED)) {
398 		error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP);
399 		if (error != 0) {
400 			/* XXX if_clone_destroy if stayup? */
401 			return (error);
402 		}
403 	}
404 
405 	if (sc->sc_dev != 0) {
406 		/* aww, we lost */
407 		return (EBUSY);
408 	}
409 	/* it's ours now */
410 	sc->sc_dev = dev;
411 	CLR(sc->sc_flags, stayup);
412 
413 	/* automatically mark the interface running on open */
414 	SET(ifp->if_flags, IFF_UP | IFF_RUNNING);
415 	tun_link_state(sc, LINK_STATE_FULL_DUPLEX);
416 
417 	return (0);
418 }
419 
420 /*
421  * tunclose - close the device; if closing the real device, flush pending
422  *  output and unless STAYUP bring down and destroy the interface.
423  */
424 int
425 tunclose(dev_t dev, int flag, int mode, struct proc *p)
426 {
427 	return (tun_dev_close(dev, p));
428 }
429 
430 int
431 tapclose(dev_t dev, int flag, int mode, struct proc *p)
432 {
433 	return (tun_dev_close(dev, p));
434 }
435 
436 int
437 tun_dev_close(dev_t dev, struct proc *p)
438 {
439 	struct tun_softc	*sc;
440 	struct ifnet		*ifp;
441 	int			 error = 0;
442 	char			 name[IFNAMSIZ];
443 	int			 destroy = 0;
444 
445 	sc = tun_get(dev);
446 	if (sc == NULL)
447 		return (ENXIO);
448 
449 	ifp = &sc->sc_if;
450 
451 	/*
452 	 * junk all pending output
453 	 */
454 	CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
455 	ifq_purge(&ifp->if_snd);
456 
457 	CLR(sc->sc_flags, TUN_ASYNC);
458 	selwakeup(&sc->sc_rsel);
459 	sigio_free(&sc->sc_sigio);
460 
461 	if (!ISSET(sc->sc_flags, TUN_DEAD)) {
462 		/* we can't hold a reference to sc before we start a dtor */
463 		if (!ISSET(sc->sc_flags, TUN_STAYUP)) {
464 			destroy = 1;
465 			strlcpy(name, ifp->if_xname, sizeof(name));
466 		} else {
467 			CLR(ifp->if_flags, IFF_UP | IFF_RUNNING);
468 			tun_link_state(sc, LINK_STATE_DOWN);
469 		}
470 	}
471 
472 	sc->sc_dev = 0;
473 
474 	tun_put(sc);
475 
476 	if (destroy)
477 		if_clone_destroy(name);
478 
479 	return (error);
480 }
481 
482 int
483 tun_init(struct tun_softc *sc)
484 {
485 	struct ifnet	*ifp = &sc->sc_if;
486 	struct ifaddr	*ifa;
487 
488 	TUNDEBUG(("%s: tun_init\n", ifp->if_xname));
489 
490 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
491 
492 	sc->sc_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
493 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
494 		if (ifa->ifa_addr->sa_family == AF_INET) {
495 			struct sockaddr_in *sin;
496 
497 			sin = satosin(ifa->ifa_addr);
498 			if (sin && sin->sin_addr.s_addr)
499 				sc->sc_flags |= TUN_IASET;
500 
501 			if (ifp->if_flags & IFF_POINTOPOINT) {
502 				sin = satosin(ifa->ifa_dstaddr);
503 				if (sin && sin->sin_addr.s_addr)
504 					sc->sc_flags |= TUN_DSTADDR;
505 			} else
506 				sc->sc_flags &= ~TUN_DSTADDR;
507 
508 			if (ifp->if_flags & IFF_BROADCAST) {
509 				sin = satosin(ifa->ifa_broadaddr);
510 				if (sin && sin->sin_addr.s_addr)
511 					sc->sc_flags |= TUN_BRDADDR;
512 			} else
513 				sc->sc_flags &= ~TUN_BRDADDR;
514 		}
515 #ifdef INET6
516 		if (ifa->ifa_addr->sa_family == AF_INET6) {
517 			struct sockaddr_in6 *sin6;
518 
519 			sin6 = satosin6(ifa->ifa_addr);
520 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
521 				sc->sc_flags |= TUN_IASET;
522 
523 			if (ifp->if_flags & IFF_POINTOPOINT) {
524 				sin6 = satosin6(ifa->ifa_dstaddr);
525 				if (sin6 &&
526 				    !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
527 					sc->sc_flags |= TUN_DSTADDR;
528 			} else
529 				sc->sc_flags &= ~TUN_DSTADDR;
530 		}
531 #endif /* INET6 */
532 	}
533 
534 	return (0);
535 }
536 
537 /*
538  * Process an ioctl request.
539  */
540 int
541 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
542 {
543 	struct tun_softc	*sc = (struct tun_softc *)(ifp->if_softc);
544 	struct ifreq		*ifr = (struct ifreq *)data;
545 	int			 error = 0;
546 
547 	switch (cmd) {
548 	case SIOCSIFADDR:
549 		tun_init(sc);
550 		break;
551 	case SIOCSIFFLAGS:
552 		if (ISSET(ifp->if_flags, IFF_UP))
553 			SET(ifp->if_flags, IFF_RUNNING);
554 		else
555 			CLR(ifp->if_flags, IFF_RUNNING);
556 		break;
557 
558 	case SIOCSIFDSTADDR:
559 		tun_init(sc);
560 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
561 		break;
562 	case SIOCSIFMTU:
563 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
564 			error = EINVAL;
565 		else
566 			ifp->if_mtu = ifr->ifr_mtu;
567 		break;
568 	case SIOCADDMULTI:
569 	case SIOCDELMULTI:
570 		break;
571 	default:
572 		if (sc->sc_flags & TUN_LAYER2)
573 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
574 		else
575 			error = ENOTTY;
576 	}
577 
578 	return (error);
579 }
580 
581 /*
582  * tun_output - queue packets from higher level ready to put out.
583  */
584 int
585 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
586     struct rtentry *rt)
587 {
588 	u_int32_t		*af;
589 
590 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
591 		m_freem(m0);
592 		return (EHOSTDOWN);
593 	}
594 
595 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
596 	if (m0 == NULL)
597 		return (ENOBUFS);
598 	af = mtod(m0, u_int32_t *);
599 	*af = htonl(dst->sa_family);
600 
601 	return (if_enqueue(ifp, m0));
602 }
603 
604 int
605 tun_enqueue(struct ifnet *ifp, struct mbuf *m0)
606 {
607 	struct tun_softc	*sc = ifp->if_softc;
608 	int			 error;
609 
610 	error = ifq_enqueue(&ifp->if_snd, m0);
611 	if (error != 0)
612 		return (error);
613 
614 	tun_wakeup(sc);
615 
616 	return (0);
617 }
618 
619 void
620 tun_wakeup(struct tun_softc *sc)
621 {
622 	if (sc->sc_reading)
623 		wakeup(&sc->sc_if.if_snd);
624 
625 	selwakeup(&sc->sc_rsel);
626 	if (sc->sc_flags & TUN_ASYNC)
627 		pgsigio(&sc->sc_sigio, SIGIO, 0);
628 }
629 
630 /*
631  * the cdevsw interface is now pretty minimal.
632  */
633 int
634 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
635 {
636 	return (tun_dev_ioctl(dev, cmd, data));
637 }
638 
639 int
640 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
641 {
642 	return (tun_dev_ioctl(dev, cmd, data));
643 }
644 
645 int
646 tun_dev_ioctl(dev_t dev, u_long cmd, void *data)
647 {
648 	struct tun_softc	*sc;
649 	struct tuninfo		*tunp;
650 	int			 error = 0;
651 
652 	sc = tun_get(dev);
653 	if (sc == NULL)
654 		return (ENXIO);
655 
656 	switch (cmd) {
657 	case TUNSIFINFO:
658 		tunp = (struct tuninfo *)data;
659 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
660 			error = EINVAL;
661 			break;
662 		}
663 		if (tunp->type != sc->sc_if.if_type) {
664 			error = EINVAL;
665 			break;
666 		}
667 		sc->sc_if.if_mtu = tunp->mtu;
668 		sc->sc_if.if_flags =
669 		    (tunp->flags & TUN_IFF_FLAGS) |
670 		    (sc->sc_if.if_flags & ~TUN_IFF_FLAGS);
671 		sc->sc_if.if_baudrate = tunp->baudrate;
672 		break;
673 	case TUNGIFINFO:
674 		tunp = (struct tuninfo *)data;
675 		tunp->mtu = sc->sc_if.if_mtu;
676 		tunp->type = sc->sc_if.if_type;
677 		tunp->flags = sc->sc_if.if_flags;
678 		tunp->baudrate = sc->sc_if.if_baudrate;
679 		break;
680 #ifdef TUN_DEBUG
681 	case TUNSDEBUG:
682 		tundebug = *(int *)data;
683 		break;
684 	case TUNGDEBUG:
685 		*(int *)data = tundebug;
686 		break;
687 #endif
688 	case TUNSIFMODE:
689 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
690 		case IFF_POINTOPOINT:
691 		case IFF_BROADCAST:
692 			sc->sc_if.if_flags &= ~TUN_IFF_FLAGS;
693 			sc->sc_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
694 			break;
695 		default:
696 			error = EINVAL;
697 			break;
698 		}
699 		break;
700 
701 	case FIONBIO:
702 		break;
703 	case FIOASYNC:
704 		if (*(int *)data)
705 			sc->sc_flags |= TUN_ASYNC;
706 		else
707 			sc->sc_flags &= ~TUN_ASYNC;
708 		break;
709 	case FIONREAD:
710 		*(int *)data = ifq_hdatalen(&sc->sc_if.if_snd);
711 		break;
712 	case FIOSETOWN:
713 	case TIOCSPGRP:
714 		return (sigio_setown(&sc->sc_sigio, cmd, data));
715 	case FIOGETOWN:
716 	case TIOCGPGRP:
717 		sigio_getown(&sc->sc_sigio, cmd, data);
718 		break;
719 	case SIOCGIFADDR:
720 		if (!(sc->sc_flags & TUN_LAYER2)) {
721 			error = EINVAL;
722 			break;
723 		}
724 		bcopy(sc->sc_ac.ac_enaddr, data,
725 		    sizeof(sc->sc_ac.ac_enaddr));
726 		break;
727 
728 	case SIOCSIFADDR:
729 		if (!(sc->sc_flags & TUN_LAYER2)) {
730 			error = EINVAL;
731 			break;
732 		}
733 		bcopy(data, sc->sc_ac.ac_enaddr,
734 		    sizeof(sc->sc_ac.ac_enaddr));
735 		break;
736 	default:
737 		error = ENOTTY;
738 		break;
739 	}
740 
741 	tun_put(sc);
742 	return (error);
743 }
744 
745 /*
746  * The cdevsw read interface - reads a packet at a time, or at
747  * least as much of a packet as can be read.
748  */
749 int
750 tunread(dev_t dev, struct uio *uio, int ioflag)
751 {
752 	return (tun_dev_read(dev, uio, ioflag));
753 }
754 
755 int
756 tapread(dev_t dev, struct uio *uio, int ioflag)
757 {
758 	return (tun_dev_read(dev, uio, ioflag));
759 }
760 
761 int
762 tun_dev_read(dev_t dev, struct uio *uio, int ioflag)
763 {
764 	struct tun_softc	*sc;
765 	struct ifnet		*ifp;
766 	struct mbuf		*m, *m0;
767 	int			 error = 0;
768 
769 	sc = tun_get(dev);
770 	if (sc == NULL)
771 		return (ENXIO);
772 
773 	ifp = &sc->sc_if;
774 
775 	error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY),
776 	    (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev);
777 	if (error != 0)
778 		goto put;
779 
780 #if NBPFILTER > 0
781 	if (ifp->if_bpf)
782 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
783 #endif
784 
785 	m = m0;
786 	while (uio->uio_resid > 0) {
787 		size_t len = ulmin(uio->uio_resid, m->m_len);
788 		if (len > 0) {
789 			error = uiomove(mtod(m, void *), len, uio);
790 			if (error != 0)
791 				break;
792 		}
793 
794 		m = m->m_next;
795 		if (m == NULL)
796 			break;
797 	}
798 
799 	m_freem(m0);
800 
801 put:
802 	tun_put(sc);
803 	return (error);
804 }
805 
806 /*
807  * the cdevsw write interface - an atomic write is a packet - or else!
808  */
809 int
810 tunwrite(dev_t dev, struct uio *uio, int ioflag)
811 {
812 	return (tun_dev_write(dev, uio, ioflag, 0));
813 }
814 
815 int
816 tapwrite(dev_t dev, struct uio *uio, int ioflag)
817 {
818 	return (tun_dev_write(dev, uio, ioflag, ETHER_ALIGN));
819 }
820 
821 int
822 tun_dev_write(dev_t dev, struct uio *uio, int ioflag, int align)
823 {
824 	struct tun_softc	*sc;
825 	struct ifnet		*ifp;
826 	struct mbuf		*m0;
827 	int			error = 0;
828 	size_t			mlen;
829 
830 	sc = tun_get(dev);
831 	if (sc == NULL)
832 		return (ENXIO);
833 
834 	ifp = &sc->sc_if;
835 
836 	if (uio->uio_resid < ifp->if_hdrlen ||
837 	    uio->uio_resid > (ifp->if_hdrlen + ifp->if_hardmtu)) {
838 		error = EMSGSIZE;
839 		goto put;
840 	}
841 
842 	align += max_linkhdr;
843 	mlen = align + uio->uio_resid;
844 
845 	m0 = m_gethdr(M_DONTWAIT, MT_DATA);
846 	if (m0 == NULL) {
847 		error = ENOMEM;
848 		goto put;
849 	}
850 	if (mlen > MHLEN) {
851 		m_clget(m0, M_DONTWAIT, mlen);
852 		if (!ISSET(m0->m_flags, M_EXT)) {
853 			error = ENOMEM;
854 			goto drop;
855 		}
856 	}
857 
858 	m_align(m0, mlen);
859 	m0->m_pkthdr.len = m0->m_len = mlen;
860 	m_adj(m0, align);
861 
862 	error = uiomove(mtod(m0, void *), m0->m_len, uio);
863 	if (error != 0)
864 		goto drop;
865 
866 	NET_LOCK();
867 	if_vinput(ifp, m0);
868 	NET_UNLOCK();
869 
870 	tun_put(sc);
871 	return (0);
872 
873 drop:
874 	m_freem(m0);
875 put:
876 	tun_put(sc);
877 	return (error);
878 }
879 
880 void
881 tun_input(struct ifnet *ifp, struct mbuf *m0)
882 {
883 	uint32_t		af;
884 
885 	KASSERT(m0->m_len >= sizeof(af));
886 
887 	af = *mtod(m0, uint32_t *);
888 	/* strip the tunnel header */
889 	m_adj(m0, sizeof(af));
890 
891 	switch (ntohl(af)) {
892 	case AF_INET:
893 		ipv4_input(ifp, m0);
894 		break;
895 #ifdef INET6
896 	case AF_INET6:
897 		ipv6_input(ifp, m0);
898 		break;
899 #endif
900 #ifdef MPLS
901 	case AF_MPLS:
902 		mpls_input(ifp, m0);
903 		break;
904 #endif
905 	default:
906 		m_freem(m0);
907 		break;
908 	}
909 }
910 
911 /*
912  * tunpoll - the poll interface, this is only useful on reads
913  * really. The write detect always returns true, write never blocks
914  * anyway, it either accepts the packet or drops it.
915  */
916 int
917 tunpoll(dev_t dev, int events, struct proc *p)
918 {
919 	return (tun_dev_poll(dev, events, p));
920 }
921 
922 int
923 tappoll(dev_t dev, int events, struct proc *p)
924 {
925 	return (tun_dev_poll(dev, events, p));
926 }
927 
928 int
929 tun_dev_poll(dev_t dev, int events, struct proc *p)
930 {
931 	struct tun_softc	*sc;
932 	struct ifnet		*ifp;
933 	int			 revents;
934 
935 	sc = tun_get(dev);
936 	if (sc == NULL)
937 		return (POLLERR);
938 
939 	ifp = &sc->sc_if;
940 	revents = 0;
941 
942 	if (events & (POLLIN | POLLRDNORM)) {
943 		if (!ifq_empty(&ifp->if_snd))
944 			revents |= events & (POLLIN | POLLRDNORM);
945 		else
946 			selrecord(p, &sc->sc_rsel);
947 	}
948 	if (events & (POLLOUT | POLLWRNORM))
949 		revents |= events & (POLLOUT | POLLWRNORM);
950 
951 	tun_put(sc);
952 	return (revents);
953 }
954 
955 int
956 tunkqfilter(dev_t dev, struct knote *kn)
957 {
958 	return (tun_dev_kqfilter(dev, kn));
959 }
960 
961 int
962 tapkqfilter(dev_t dev, struct knote *kn)
963 {
964 	return (tun_dev_kqfilter(dev, kn));
965 }
966 
967 int
968 tun_dev_kqfilter(dev_t dev, struct knote *kn)
969 {
970 	struct tun_softc	*sc;
971 	struct ifnet		*ifp;
972 	struct klist		*klist;
973 	int			 error = 0;
974 	int			 s;
975 
976 	sc = tun_get(dev);
977 	if (sc == NULL)
978 		return (ENXIO);
979 
980 	ifp = &sc->sc_if;
981 
982 	switch (kn->kn_filter) {
983 	case EVFILT_READ:
984 		klist = &sc->sc_rsel.si_note;
985 		kn->kn_fop = &tunread_filtops;
986 		break;
987 	case EVFILT_WRITE:
988 		klist = &sc->sc_wsel.si_note;
989 		kn->kn_fop = &tunwrite_filtops;
990 		break;
991 	default:
992 		error = EINVAL;
993 		goto put;
994 	}
995 
996 	kn->kn_hook = (caddr_t)sc; /* XXX give the sc_ref to the hook? */
997 
998 	s = splhigh();
999 	klist_insert(klist, kn);
1000 	splx(s);
1001 
1002 put:
1003 	tun_put(sc);
1004 	return (error);
1005 }
1006 
1007 void
1008 filt_tunrdetach(struct knote *kn)
1009 {
1010 	int			 s;
1011 	struct tun_softc	*sc = kn->kn_hook;
1012 
1013 	s = splhigh();
1014 	klist_remove(&sc->sc_rsel.si_note, kn);
1015 	splx(s);
1016 }
1017 
1018 int
1019 filt_tunread(struct knote *kn, long hint)
1020 {
1021 	struct tun_softc	*sc = kn->kn_hook;
1022 	struct ifnet		*ifp = &sc->sc_if;
1023 
1024 	kn->kn_data = ifq_hdatalen(&ifp->if_snd);
1025 
1026 	return (kn->kn_data > 0);
1027 }
1028 
1029 void
1030 filt_tunwdetach(struct knote *kn)
1031 {
1032 	int			 s;
1033 	struct tun_softc	*sc = kn->kn_hook;
1034 
1035 	s = splhigh();
1036 	klist_remove(&sc->sc_wsel.si_note, kn);
1037 	splx(s);
1038 }
1039 
1040 int
1041 filt_tunwrite(struct knote *kn, long hint)
1042 {
1043 	struct tun_softc	*sc = kn->kn_hook;
1044 	struct ifnet		*ifp = &sc->sc_if;
1045 
1046 	kn->kn_data = ifp->if_hdrlen + ifp->if_hardmtu;
1047 
1048 	return (1);
1049 }
1050 
1051 void
1052 tun_start(struct ifnet *ifp)
1053 {
1054 	struct tun_softc	*sc = ifp->if_softc;
1055 
1056 	splassert(IPL_NET);
1057 
1058 	if (ifq_len(&ifp->if_snd))
1059 		tun_wakeup(sc);
1060 }
1061 
1062 void
1063 tun_link_state(struct tun_softc *sc, int link_state)
1064 {
1065 	struct ifnet *ifp = &sc->sc_if;
1066 
1067 	if (ifp->if_link_state != link_state) {
1068 		ifp->if_link_state = link_state;
1069 		if_link_state_change(ifp);
1070 	}
1071 }
1072