xref: /openbsd/sys/net/if_tun.c (revision 8932bfb7)
1 /*	$OpenBSD: if_tun.c,v 1.112 2011/07/09 00:47:18 henning Exp $	*/
2 /*	$NetBSD: if_tun.c,v 1.24 1996/05/07 02:40:48 thorpej Exp $	*/
3 
4 /*
5  * Copyright (c) 1988, Julian Onions <Julian.Onions@nexor.co.uk>
6  * Nottingham University 1987.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * This driver takes packets off the IP i/f and hands them up to a
32  * user process to have its wicked way with. This driver has its
33  * roots in a similar driver written by Phil Cockcroft (formerly) at
34  * UCL. This driver is based much more on read/write/select mode of
35  * operation though.
36  */
37 
38 /* #define	TUN_DEBUG	9 */
39 
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/ioctl.h>
48 #include <sys/errno.h>
49 #include <sys/syslog.h>
50 #include <sys/selinfo.h>
51 #include <sys/file.h>
52 #include <sys/time.h>
53 #include <sys/device.h>
54 #include <sys/vnode.h>
55 #include <sys/signalvar.h>
56 #include <sys/poll.h>
57 #include <sys/conf.h>
58 
59 #include <machine/cpu.h>
60 
61 #include <net/if.h>
62 #include <net/if_types.h>
63 #include <net/netisr.h>
64 #include <net/route.h>
65 
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_systm.h>
69 #include <netinet/in_var.h>
70 #include <netinet/ip.h>
71 #include <netinet/if_ether.h>
72 #endif
73 
74 #ifdef PIPEX
75 #include <net/pipex.h>
76 #endif
77 
78 #include "bpfilter.h"
79 #if NBPFILTER > 0
80 #include <net/bpf.h>
81 #endif
82 
83 #include <net/if_tun.h>
84 
85 struct tun_softc {
86 	struct arpcom	arpcom;		/* ethernet common data */
87 	struct selinfo	tun_rsel;	/* read select */
88 	struct selinfo	tun_wsel;	/* write select (not used) */
89 	LIST_ENTRY(tun_softc) tun_list;	/* all tunnel interfaces */
90 	int		tun_unit;
91 	uid_t		tun_siguid;	/* uid for process that set tun_pgid */
92 	uid_t		tun_sigeuid;	/* euid for process that set tun_pgid */
93 	pid_t		tun_pgid;	/* the process group - if any */
94 	u_short		tun_flags;	/* misc flags */
95 #define tun_if	arpcom.ac_if
96 #ifdef PIPEX
97 	struct pipex_iface_context pipex_iface; /* pipex context */
98 #endif
99 };
100 
101 #ifdef	TUN_DEBUG
102 int	tundebug = TUN_DEBUG;
103 #define TUNDEBUG(a)	(tundebug? printf a : 0)
104 #else
105 #define TUNDEBUG(a)	/* (tundebug? printf a : 0) */
106 #endif
107 
108 /* Only these IFF flags are changeable by TUNSIFINFO */
109 #define TUN_IFF_FLAGS (IFF_UP|IFF_POINTOPOINT|IFF_MULTICAST|IFF_BROADCAST)
110 
111 void	tunattach(int);
112 int	tunopen(dev_t, int, int, struct proc *);
113 int	tunclose(dev_t, int, int, struct proc *);
114 int	tun_ioctl(struct ifnet *, u_long, caddr_t);
115 int	tun_output(struct ifnet *, struct mbuf *, struct sockaddr *,
116 	    struct rtentry *);
117 int	tunioctl(dev_t, u_long, caddr_t, int, struct proc *);
118 int	tunread(dev_t, struct uio *, int);
119 int	tunwrite(dev_t, struct uio *, int);
120 int	tunpoll(dev_t, int, struct proc *);
121 int	tunkqfilter(dev_t, struct knote *);
122 int	tun_clone_create(struct if_clone *, int);
123 int	tun_create(struct if_clone *, int, int);
124 int	tun_clone_destroy(struct ifnet *);
125 struct	tun_softc *tun_lookup(int);
126 void	tun_wakeup(struct tun_softc *);
127 int	tun_switch(struct tun_softc *, int);
128 
129 int	tuninit(struct tun_softc *);
130 int	filt_tunread(struct knote *, long);
131 int	filt_tunwrite(struct knote *, long);
132 void	filt_tunrdetach(struct knote *);
133 void	filt_tunwdetach(struct knote *);
134 void	tunstart(struct ifnet *);
135 void	tun_link_state(struct tun_softc *);
136 
137 struct filterops tunread_filtops =
138 	{ 1, NULL, filt_tunrdetach, filt_tunread};
139 
140 struct filterops tunwrite_filtops =
141 	{ 1, NULL, filt_tunwdetach, filt_tunwrite};
142 
143 LIST_HEAD(, tun_softc) tun_softc_list;
144 
145 struct if_clone tun_cloner =
146     IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);
147 
148 void
149 tunattach(int n)
150 {
151 	LIST_INIT(&tun_softc_list);
152 	if_clone_attach(&tun_cloner);
153 #ifdef PIPEX
154 	pipex_init();
155 #endif
156 }
157 
158 int
159 tun_clone_create(struct if_clone *ifc, int unit)
160 {
161 	return (tun_create(ifc, unit, 0));
162 }
163 
164 int
165 tun_create(struct if_clone *ifc, int unit, int flags)
166 {
167 	struct tun_softc	*tp;
168 	struct ifnet		*ifp;
169 	int			 s;
170 
171 	tp = malloc(sizeof(*tp), M_DEVBUF, M_NOWAIT|M_ZERO);
172 	if (!tp)
173 		return (ENOMEM);
174 
175 	tp->tun_unit = unit;
176 	tp->tun_flags = TUN_INITED|TUN_STAYUP;
177 
178 	ifp = &tp->tun_if;
179 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
180 	    unit);
181 	ether_fakeaddr(ifp);
182 
183 	ifp->if_softc = tp;
184 	ifp->if_ioctl = tun_ioctl;
185 	ifp->if_output = tun_output;
186 	ifp->if_start = tunstart;
187 	ifp->if_hardmtu = TUNMRU;
188 	ifp->if_link_state = LINK_STATE_DOWN;
189 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
190 	IFQ_SET_READY(&ifp->if_snd);
191 
192 	if ((flags & TUN_LAYER2) == 0) {
193 		tp->tun_flags &= ~TUN_LAYER2;
194 		ifp->if_mtu = ETHERMTU;
195 		ifp->if_flags = IFF_POINTOPOINT;
196 		ifp->if_type = IFT_TUNNEL;
197 		ifp->if_hdrlen = sizeof(u_int32_t);
198 
199 		if_attach(ifp);
200 		if_alloc_sadl(ifp);
201 #if NBPFILTER > 0
202 		bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
203 #endif
204 	} else {
205 		tp->tun_flags |= TUN_LAYER2;
206 		ifp->if_flags =
207 		    (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST|IFF_LINK0);
208 		ifp->if_capabilities = IFCAP_VLAN_MTU;
209 
210 		if_attach(ifp);
211 		ether_ifattach(ifp);
212 	}
213 	/* force output function to our function */
214 	ifp->if_output = tun_output;
215 
216 	s = splnet();
217 	LIST_INSERT_HEAD(&tun_softc_list, tp, tun_list);
218 	splx(s);
219 #ifdef PIPEX
220 	pipex_iface_init(&tp->pipex_iface, ifp);
221 #endif
222 
223 	return (0);
224 }
225 
226 int
227 tun_clone_destroy(struct ifnet *ifp)
228 {
229 	struct tun_softc	*tp = ifp->if_softc;
230 	int			 s;
231 
232 #ifdef PIPEX
233 	pipex_iface_stop(&tp->pipex_iface);
234 #endif
235 	tun_wakeup(tp);
236 
237 	s = splhigh();
238 	klist_invalidate(&tp->tun_rsel.si_note);
239 	klist_invalidate(&tp->tun_wsel.si_note);
240 	splx(s);
241 
242 	s = splnet();
243 	LIST_REMOVE(tp, tun_list);
244 	splx(s);
245 
246 	if (tp->tun_flags & TUN_LAYER2)
247 		ether_ifdetach(ifp);
248 
249 	if_detach(ifp);
250 
251 	free(tp, M_DEVBUF);
252 	return (0);
253 }
254 
255 struct tun_softc *
256 tun_lookup(int unit)
257 {
258 	struct tun_softc *tp;
259 
260 	LIST_FOREACH(tp, &tun_softc_list, tun_list)
261 		if (tp->tun_unit == unit)
262 			return (tp);
263 	return (NULL);
264 }
265 
266 int
267 tun_switch(struct tun_softc *tp, int flags)
268 {
269 	struct ifnet		*ifp = &tp->tun_if;
270 	int			 unit, open, r, s;
271 	struct ifg_list		*ifgl;
272 	u_int			ifgr_len;
273 	char			*ifgrpnames, *p;
274 
275 	if ((tp->tun_flags & TUN_LAYER2) == (flags & TUN_LAYER2))
276 		return (0);
277 
278 	/* tp will be removed so store unit number */
279 	unit = tp->tun_unit;
280 	open = tp->tun_flags & (TUN_OPEN|TUN_NBIO|TUN_ASYNC);
281 	TUNDEBUG(("%s: switching to layer %d\n", ifp->if_xname,
282 		    flags & TUN_LAYER2 ? 2 : 3));
283 
284 	/* remember joined groups */
285 	ifgr_len = 0;
286 	ifgrpnames = NULL;
287 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
288 		ifgr_len += IFNAMSIZ;
289 	if (ifgr_len)
290 		ifgrpnames = malloc(ifgr_len + 1, M_TEMP, M_NOWAIT|M_ZERO);
291 	if (ifgrpnames) {
292 		p = ifgrpnames;
293 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
294 			strlcpy(p, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
295 			p += IFNAMSIZ;
296 		}
297 	}
298 
299 	/* remove old device and ... */
300 	tun_clone_destroy(ifp);
301 	/* attach new interface */
302 	r = tun_create(&tun_cloner, unit, flags);
303 
304 	if (r == 0) {
305 		if ((tp = tun_lookup(unit)) == NULL) {
306 			/* this should never fail */
307 			r = ENXIO;
308 			goto abort;
309 		}
310 
311 		/* rejoin groups */
312 		ifp = &tp->tun_if;
313 		for (p = ifgrpnames; p && *p; p += IFNAMSIZ)
314 			if_addgroup(ifp, p);
315 	}
316 	if (open && r == 0) {
317 		/* already opened before ifconfig tunX link0 */
318 		s = splnet();
319 		tp->tun_flags |= open;
320 		tun_link_state(tp);
321 		splx(s);
322 		TUNDEBUG(("%s: already open\n", tp->tun_if.if_xname));
323 	}
324  abort:
325 	if (ifgrpnames)
326 		free(ifgrpnames, M_TEMP);
327 	return (r);
328 }
329 
330 /*
331  * tunnel open - must be superuser & the device must be
332  * configured in
333  */
334 int
335 tunopen(dev_t dev, int flag, int mode, struct proc *p)
336 {
337 	struct tun_softc	*tp;
338 	struct ifnet		*ifp;
339 	int			 error, s;
340 
341 	if ((tp = tun_lookup(minor(dev))) == NULL) {	/* create on demand */
342 		char	xname[IFNAMSIZ];
343 
344 		snprintf(xname, sizeof(xname), "%s%d", "tun", minor(dev));
345 		if ((error = if_clone_create(xname)) != 0)
346 			return (error);
347 
348 		if ((tp = tun_lookup(minor(dev))) == NULL)
349 			return (ENXIO);
350 		tp->tun_flags &= ~TUN_STAYUP;
351 	}
352 
353 	if (tp->tun_flags & TUN_OPEN)
354 		return (EBUSY);
355 
356 	ifp = &tp->tun_if;
357 	tp->tun_flags |= TUN_OPEN;
358 
359 	/* automatically mark the interface running on open */
360 	s = splnet();
361 	ifp->if_flags |= IFF_RUNNING;
362 	tun_link_state(tp);
363 	splx(s);
364 
365 	TUNDEBUG(("%s: open\n", ifp->if_xname));
366 	return (0);
367 }
368 
369 /*
370  * tunclose - close the device; if closing the real device, flush pending
371  *  output and unless STAYUP bring down and destroy the interface.
372  */
373 int
374 tunclose(dev_t dev, int flag, int mode, struct proc *p)
375 {
376 	int			 s;
377 	struct tun_softc	*tp;
378 	struct ifnet		*ifp;
379 
380 	if ((tp = tun_lookup(minor(dev))) == NULL)
381 		return (ENXIO);
382 
383 	ifp = &tp->tun_if;
384 	tp->tun_flags &= ~(TUN_OPEN|TUN_NBIO|TUN_ASYNC);
385 
386 	/*
387 	 * junk all pending output
388 	 */
389 	s = splnet();
390 	ifp->if_flags &= ~IFF_RUNNING;
391 	tun_link_state(tp);
392 	IFQ_PURGE(&ifp->if_snd);
393 	splx(s);
394 
395 	TUNDEBUG(("%s: closed\n", ifp->if_xname));
396 
397 	if (!(tp->tun_flags & TUN_STAYUP))
398 		return (if_clone_destroy(ifp->if_xname));
399 	else {
400 		tp->tun_pgid = 0;
401 		selwakeup(&tp->tun_rsel);
402 	}
403 
404 	return (0);
405 }
406 
407 int
408 tuninit(struct tun_softc *tp)
409 {
410 	struct ifnet	*ifp = &tp->tun_if;
411 	struct ifaddr	*ifa;
412 
413 	TUNDEBUG(("%s: tuninit\n", ifp->if_xname));
414 
415 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
416 	ifp->if_flags &= ~IFF_OACTIVE; /* we are never active */
417 
418 	tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR|TUN_BRDADDR);
419 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
420 #ifdef INET
421 		if (ifa->ifa_addr->sa_family == AF_INET) {
422 			struct sockaddr_in *sin;
423 
424 			sin = satosin(ifa->ifa_addr);
425 			if (sin && sin->sin_addr.s_addr)
426 				tp->tun_flags |= TUN_IASET;
427 
428 			if (ifp->if_flags & IFF_POINTOPOINT) {
429 				sin = satosin(ifa->ifa_dstaddr);
430 				if (sin && sin->sin_addr.s_addr)
431 					tp->tun_flags |= TUN_DSTADDR;
432 			} else
433 				tp->tun_flags &= ~TUN_DSTADDR;
434 
435 			if (ifp->if_flags & IFF_BROADCAST) {
436 				sin = satosin(ifa->ifa_broadaddr);
437 				if (sin && sin->sin_addr.s_addr)
438 					tp->tun_flags |= TUN_BRDADDR;
439 			} else
440 				tp->tun_flags &= ~TUN_BRDADDR;
441 		}
442 #endif
443 #ifdef INET6
444 		if (ifa->ifa_addr->sa_family == AF_INET6) {
445 			struct sockaddr_in6 *sin;
446 
447 			sin = (struct sockaddr_in6 *)ifa->ifa_addr;
448 			if (!IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
449 				tp->tun_flags |= TUN_IASET;
450 
451 			if (ifp->if_flags & IFF_POINTOPOINT) {
452 				sin = (struct sockaddr_in6 *)ifa->ifa_dstaddr;
453 				if (sin &&
454 				    !IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
455 					tp->tun_flags |= TUN_DSTADDR;
456 			} else
457 				tp->tun_flags &= ~TUN_DSTADDR;
458 		}
459 #endif /* INET6 */
460 	}
461 
462 	return (0);
463 }
464 
465 /*
466  * Process an ioctl request.
467  */
468 int
469 tun_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
470 {
471 	struct tun_softc	*tp = (struct tun_softc *)(ifp->if_softc);
472 	struct ifreq		*ifr = (struct ifreq *)data;
473 	int			 error = 0, s;
474 
475 	s = splnet();
476 
477 	switch (cmd) {
478 	case SIOCSIFADDR:
479 		tuninit(tp);
480 		TUNDEBUG(("%s: address set\n", ifp->if_xname));
481 		if (tp->tun_flags & TUN_LAYER2)
482 			switch (((struct ifaddr *)data)->ifa_addr->sa_family) {
483 #ifdef INET
484 			case AF_INET:
485 				arp_ifinit(&tp->arpcom, (struct ifaddr *)data);
486 				break;
487 #endif
488 			default:
489 				break;
490 			}
491 		break;
492 	case SIOCSIFDSTADDR:
493 		tuninit(tp);
494 		TUNDEBUG(("%s: destination address set\n", ifp->if_xname));
495 		break;
496 	case SIOCSIFBRDADDR:
497 		tuninit(tp);
498 		TUNDEBUG(("%s: broadcast address set\n", ifp->if_xname));
499 		break;
500 	case SIOCSIFMTU:
501 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TUNMRU)
502 			error = EINVAL;
503 		else
504 			ifp->if_mtu = ifr->ifr_mtu;
505 		break;
506 	case SIOCADDMULTI:
507 	case SIOCDELMULTI: {
508 		if (ifr == 0) {
509 			error = EAFNOSUPPORT;	   /* XXX */
510 			break;
511 		}
512 
513 		if (tp->tun_flags & TUN_LAYER2) {
514 			error = (cmd == SIOCADDMULTI) ?
515 			    ether_addmulti(ifr, &tp->arpcom) :
516 			    ether_delmulti(ifr, &tp->arpcom);
517 			if (error == ENETRESET) {
518 				/*
519 				 * Multicast list has changed; set the hardware
520 				 * filter accordingly. The good thing is we do
521 				 * not have a hardware filter (:
522 				 */
523 				error = 0;
524 			}
525 			break;
526 		}
527 
528 		switch (ifr->ifr_addr.sa_family) {
529 #ifdef INET
530 		case AF_INET:
531 			break;
532 #endif
533 #ifdef INET6
534 		case AF_INET6:
535 			break;
536 #endif
537 		default:
538 			error = EAFNOSUPPORT;
539 			break;
540 		}
541 		break;
542 	}
543 
544 	case SIOCSIFFLAGS:
545 		error = tun_switch(tp,
546 		    ifp->if_flags & IFF_LINK0 ? TUN_LAYER2 : 0);
547 		break;
548 	default:
549 		if (tp->tun_flags & TUN_LAYER2)
550 			error = ether_ioctl(ifp, &tp->arpcom, cmd, data);
551 		else
552 			error = ENOTTY;
553 	}
554 
555 	splx(s);
556 	return (error);
557 }
558 
559 /*
560  * tun_output - queue packets from higher level ready to put out.
561  */
562 int
563 tun_output(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
564     struct rtentry *rt)
565 {
566 	struct tun_softc	*tp = ifp->if_softc;
567 	int			 s, len, error;
568 	u_int32_t		*af;
569 
570 	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) {
571 		m_freem(m0);
572 		return (EHOSTDOWN);
573 	}
574 
575 	TUNDEBUG(("%s: tun_output\n", ifp->if_xname));
576 
577 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
578 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname,
579 		     tp->tun_flags));
580 		m_freem(m0);
581 		return (EHOSTDOWN);
582 	}
583 
584 	if (tp->tun_flags & TUN_LAYER2)
585 		/* call ether_output and that will call tunstart at the end */
586 		return (ether_output(ifp, m0, dst, rt));
587 
588 	M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
589 	if (m0 == NULL)
590 		return (ENOBUFS);
591 	af = mtod(m0, u_int32_t *);
592 	*af = htonl(dst->sa_family);
593 
594 	s = splnet();
595 
596 #if NBPFILTER > 0
597 	if (ifp->if_bpf)
598 		bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
599 #endif
600 #ifdef PIPEX
601 	if ((m0 = pipex_output(m0, dst->sa_family, sizeof(u_int32_t),
602 	    &tp->pipex_iface)) == NULL) {
603 		splx(s);
604 		return (0);
605 	}
606 #endif
607 
608 	len = m0->m_pkthdr.len;
609 	IFQ_ENQUEUE(&ifp->if_snd, m0, NULL, error);
610 	if (error) {
611 		splx(s);
612 		ifp->if_collisions++;
613 		return (error);
614 	}
615 	splx(s);
616 	ifp->if_opackets++;
617 	ifp->if_obytes += len;
618 
619 	tun_wakeup(tp);
620 	return (0);
621 }
622 
623 void
624 tun_wakeup(struct tun_softc *tp)
625 {
626 	if (tp->tun_flags & TUN_RWAIT) {
627 		tp->tun_flags &= ~TUN_RWAIT;
628 		wakeup((caddr_t)tp);
629 	}
630 	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
631 		csignal(tp->tun_pgid, SIGIO,
632 		    tp->tun_siguid, tp->tun_sigeuid);
633 	selwakeup(&tp->tun_rsel);
634 }
635 
636 /*
637  * the cdevsw interface is now pretty minimal.
638  */
639 int
640 tunioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
641 {
642 	int			 s;
643 	struct tun_softc	*tp;
644 	struct tuninfo		*tunp;
645 	struct mbuf		*m;
646 
647 	if ((tp = tun_lookup(minor(dev))) == NULL)
648 		return (ENXIO);
649 
650 	s = splnet();
651 	switch (cmd) {
652 	case TUNSIFINFO:
653 		tunp = (struct tuninfo *)data;
654 		if (tunp->mtu < ETHERMIN || tunp->mtu > TUNMRU) {
655 			splx(s);
656 			return (EINVAL);
657 		}
658 		tp->tun_if.if_mtu = tunp->mtu;
659 		tp->tun_if.if_type = tunp->type;
660 		tp->tun_if.if_flags =
661 		    (tunp->flags & TUN_IFF_FLAGS) |
662 		    (tp->tun_if.if_flags & ~TUN_IFF_FLAGS);
663 		tp->tun_if.if_baudrate = tunp->baudrate;
664 		break;
665 	case TUNGIFINFO:
666 		tunp = (struct tuninfo *)data;
667 		tunp->mtu = tp->tun_if.if_mtu;
668 		tunp->type = tp->tun_if.if_type;
669 		tunp->flags = tp->tun_if.if_flags;
670 		tunp->baudrate = tp->tun_if.if_baudrate;
671 		break;
672 #ifdef TUN_DEBUG
673 	case TUNSDEBUG:
674 		tundebug = *(int *)data;
675 		break;
676 	case TUNGDEBUG:
677 		*(int *)data = tundebug;
678 		break;
679 #endif
680 	case TUNSIFMODE:
681 		switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
682 		case IFF_POINTOPOINT:
683 		case IFF_BROADCAST:
684 			tp->tun_if.if_flags &= ~TUN_IFF_FLAGS;
685 			tp->tun_if.if_flags |= *(int *)data & TUN_IFF_FLAGS;
686 			break;
687 		default:
688 			splx(s);
689 			return (EINVAL);
690 		}
691 		break;
692 
693 	case FIONBIO:
694 		if (*(int *)data)
695 			tp->tun_flags |= TUN_NBIO;
696 		else
697 			tp->tun_flags &= ~TUN_NBIO;
698 		break;
699 	case FIOASYNC:
700 		if (*(int *)data)
701 			tp->tun_flags |= TUN_ASYNC;
702 		else
703 			tp->tun_flags &= ~TUN_ASYNC;
704 		break;
705 	case FIONREAD:
706 		IFQ_POLL(&tp->tun_if.if_snd, m);
707 		if (m != NULL)
708 			*(int *)data = m->m_pkthdr.len;
709 		else
710 			*(int *)data = 0;
711 		break;
712 	case TIOCSPGRP:
713 		tp->tun_pgid = *(int *)data;
714 		tp->tun_siguid = p->p_cred->p_ruid;
715 		tp->tun_sigeuid = p->p_ucred->cr_uid;
716 		break;
717 	case TIOCGPGRP:
718 		*(int *)data = tp->tun_pgid;
719 		break;
720 	case OSIOCGIFADDR:
721 	case SIOCGIFADDR:
722 		if (!(tp->tun_flags & TUN_LAYER2)) {
723 			splx(s);
724 			return (EINVAL);
725 		}
726 		bcopy(tp->arpcom.ac_enaddr, data,
727 		    sizeof(tp->arpcom.ac_enaddr));
728 		break;
729 
730 	case SIOCSIFADDR:
731 		if (!(tp->tun_flags & TUN_LAYER2)) {
732 			splx(s);
733 			return (EINVAL);
734 		}
735 		bcopy(data, tp->arpcom.ac_enaddr,
736 		    sizeof(tp->arpcom.ac_enaddr));
737 		break;
738 	default:
739 #ifdef PIPEX
740 	    {
741 		int ret;
742 		ret = pipex_ioctl(&tp->pipex_iface, cmd, data);
743 		splx(s);
744 		return (ret);
745 	    }
746 #else
747 		splx(s);
748 		return (ENOTTY);
749 #endif
750 	}
751 	splx(s);
752 	return (0);
753 }
754 
755 /*
756  * The cdevsw read interface - reads a packet at a time, or at
757  * least as much of a packet as can be read.
758  */
759 int
760 tunread(dev_t dev, struct uio *uio, int ioflag)
761 {
762 	struct tun_softc	*tp;
763 	struct ifnet		*ifp;
764 	struct mbuf		*m, *m0;
765 	int			 error = 0, len, s;
766 
767 	if ((tp = tun_lookup(minor(dev))) == NULL)
768 		return (ENXIO);
769 
770 	ifp = &tp->tun_if;
771 	TUNDEBUG(("%s: read\n", ifp->if_xname));
772 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
773 		TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname, tp->tun_flags));
774 		return (EHOSTDOWN);
775 	}
776 
777 	tp->tun_flags &= ~TUN_RWAIT;
778 
779 	s = splnet();
780 	do {
781 		while ((tp->tun_flags & TUN_READY) != TUN_READY)
782 			if ((error = tsleep((caddr_t)tp,
783 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
784 				splx(s);
785 				return (error);
786 			}
787 		IFQ_DEQUEUE(&ifp->if_snd, m0);
788 		if (m0 == NULL) {
789 			if (tp->tun_flags & TUN_NBIO && ioflag & IO_NDELAY) {
790 				splx(s);
791 				return (EWOULDBLOCK);
792 			}
793 			tp->tun_flags |= TUN_RWAIT;
794 			if ((error = tsleep((caddr_t)tp,
795 			    (PZERO + 1)|PCATCH, "tunread", 0)) != 0) {
796 				splx(s);
797 				return (error);
798 			}
799 		}
800 	} while (m0 == NULL);
801 	splx(s);
802 
803 	while (m0 != NULL && uio->uio_resid > 0 && error == 0) {
804 		len = min(uio->uio_resid, m0->m_len);
805 		if (len != 0)
806 			error = uiomove(mtod(m0, caddr_t), len, uio);
807 		MFREE(m0, m);
808 		m0 = m;
809 	}
810 
811 	if (m0 != NULL) {
812 		TUNDEBUG(("Dropping mbuf\n"));
813 		m_freem(m0);
814 	}
815 	if (error)
816 		ifp->if_oerrors++;
817 
818 	return (error);
819 }
820 
821 /*
822  * the cdevsw write interface - an atomic write is a packet - or else!
823  */
824 int
825 tunwrite(dev_t dev, struct uio *uio, int ioflag)
826 {
827 	struct tun_softc	*tp;
828 	struct ifnet		*ifp;
829 	struct ifqueue		*ifq;
830 	u_int32_t		*th;
831 	struct mbuf		*top, **mp, *m;
832 	int			 isr;
833 	int			 error=0, s, tlen, mlen;
834 
835 	if ((tp = tun_lookup(minor(dev))) == NULL)
836 		return (ENXIO);
837 
838 	ifp = &tp->tun_if;
839 	TUNDEBUG(("%s: tunwrite\n", ifp->if_xname));
840 
841 	if (uio->uio_resid == 0 || uio->uio_resid > ifp->if_mtu +
842 	    (tp->tun_flags & TUN_LAYER2 ? ETHER_HDR_LEN : sizeof(*th))) {
843 		TUNDEBUG(("%s: len=%d!\n", ifp->if_xname, uio->uio_resid));
844 		return (EMSGSIZE);
845 	}
846 	tlen = uio->uio_resid;
847 
848 	/* get a header mbuf */
849 	MGETHDR(m, M_DONTWAIT, MT_DATA);
850 	if (m == NULL)
851 		return (ENOBUFS);
852 	mlen = MHLEN;
853 	if (uio->uio_resid >= MINCLSIZE) {
854 		MCLGET(m, M_DONTWAIT);
855 		if (!(m->m_flags & M_EXT)) {
856 			m_free(m);
857 			return (ENOBUFS);
858 		}
859 		mlen = MCLBYTES;
860 	}
861 
862 	top = NULL;
863 	mp = &top;
864 	if (tp->tun_flags & TUN_LAYER2) {
865 		/*
866 		 * Pad so that IP header is correctly aligned
867 		 * this is necessary for all strict aligned architectures.
868 		 */
869 		mlen -= ETHER_ALIGN;
870 		m->m_data += ETHER_ALIGN;
871 	}
872 	while (error == 0 && uio->uio_resid > 0) {
873 		m->m_len = min(mlen, uio->uio_resid);
874 		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
875 		*mp = m;
876 		mp = &m->m_next;
877 		if (error == 0 && uio->uio_resid > 0) {
878 			MGET(m, M_DONTWAIT, MT_DATA);
879 			if (m == NULL) {
880 				error = ENOBUFS;
881 				break;
882 			}
883 			mlen = MLEN;
884 			if (uio->uio_resid >= MINCLSIZE) {
885 				MCLGET(m, M_DONTWAIT);
886 				if (!(m->m_flags & M_EXT)) {
887 					error = ENOBUFS;
888 					m_free(m);
889 					break;
890 				}
891 				mlen = MCLBYTES;
892 			}
893 		}
894 	}
895 	if (error) {
896 		if (top != NULL)
897 			m_freem(top);
898 		ifp->if_ierrors++;
899 		return (error);
900 	}
901 
902 	top->m_pkthdr.len = tlen;
903 	top->m_pkthdr.rcvif = ifp;
904 
905 #if NBPFILTER > 0
906 	if (ifp->if_bpf) {
907 		s = splnet();
908 		bpf_mtap(ifp->if_bpf, top, BPF_DIRECTION_IN);
909 		splx(s);
910 	}
911 #endif
912 
913 	if (tp->tun_flags & TUN_LAYER2) {
914 		/* quirk to not add randomness from a virtual device */
915 		atomic_setbits_int(&netisr, (1 << NETISR_RND_DONE));
916 
917 		s = splnet();
918 		ether_input_mbuf(ifp, top);
919 		splx(s);
920 
921 		ifp->if_ipackets++; /* ibytes are counted in ether_input */
922 
923 		return (0);
924 	}
925 
926 	th = mtod(top, u_int32_t *);
927 	/* strip the tunnel header */
928 	top->m_data += sizeof(*th);
929 	top->m_len  -= sizeof(*th);
930 	top->m_pkthdr.len -= sizeof(*th);
931 	top->m_pkthdr.rdomain = ifp->if_rdomain;
932 
933 	switch (ntohl(*th)) {
934 #ifdef INET
935 	case AF_INET:
936 		ifq = &ipintrq;
937 		isr = NETISR_IP;
938 		break;
939 #endif
940 #ifdef INET6
941 	case AF_INET6:
942 		ifq = &ip6intrq;
943 		isr = NETISR_IPV6;
944 		break;
945 #endif
946 	default:
947 		m_freem(top);
948 		return (EAFNOSUPPORT);
949 	}
950 
951 	s = splnet();
952 	if (IF_QFULL(ifq)) {
953 		IF_DROP(ifq);
954 		splx(s);
955 		ifp->if_collisions++;
956 		m_freem(top);
957 		if (!ifq->ifq_congestion)
958 			if_congestion(ifq);
959 		return (ENOBUFS);
960 	}
961 	IF_ENQUEUE(ifq, top);
962 	schednetisr(isr);
963 	ifp->if_ipackets++;
964 	ifp->if_ibytes += top->m_pkthdr.len;
965 	splx(s);
966 	return (error);
967 }
968 
969 /*
970  * tunpoll - the poll interface, this is only useful on reads
971  * really. The write detect always returns true, write never blocks
972  * anyway, it either accepts the packet or drops it.
973  */
974 int
975 tunpoll(dev_t dev, int events, struct proc *p)
976 {
977 	int			 revents, s;
978 	struct tun_softc	*tp;
979 	struct ifnet		*ifp;
980 	struct mbuf		*m;
981 
982 	if ((tp = tun_lookup(minor(dev))) == NULL)
983 		return (POLLERR);
984 
985 	ifp = &tp->tun_if;
986 	revents = 0;
987 	s = splnet();
988 	TUNDEBUG(("%s: tunpoll\n", ifp->if_xname));
989 
990 	if (events & (POLLIN | POLLRDNORM)) {
991 		IFQ_POLL(&ifp->if_snd, m);
992 		if (m != NULL) {
993 			TUNDEBUG(("%s: tunselect q=%d\n", ifp->if_xname,
994 			    IFQ_LEN(ifp->if_snd)));
995 			revents |= events & (POLLIN | POLLRDNORM);
996 		} else {
997 			TUNDEBUG(("%s: tunpoll waiting\n", ifp->if_xname));
998 			selrecord(p, &tp->tun_rsel);
999 		}
1000 	}
1001 	if (events & (POLLOUT | POLLWRNORM))
1002 		revents |= events & (POLLOUT | POLLWRNORM);
1003 	splx(s);
1004 	return (revents);
1005 }
1006 
1007 /*
1008  * kqueue(2) support.
1009  *
1010  * The tun driver uses an array of tun_softc's based on the minor number
1011  * of the device.  kn->kn_hook gets set to the specific tun_softc.
1012  *
1013  * filt_tunread() sets kn->kn_data to the iface qsize
1014  * filt_tunwrite() sets kn->kn_data to the MTU size
1015  */
1016 int
1017 tunkqfilter(dev_t dev, struct knote *kn)
1018 {
1019 	int			 s;
1020 	struct klist		*klist;
1021 	struct tun_softc	*tp;
1022 	struct ifnet		*ifp;
1023 
1024 	if ((tp = tun_lookup(minor(dev))) == NULL)
1025 		return (ENXIO);
1026 
1027 	ifp = &tp->tun_if;
1028 
1029 	s = splnet();
1030 	TUNDEBUG(("%s: tunkqfilter\n", ifp->if_xname));
1031 	splx(s);
1032 
1033 	switch (kn->kn_filter) {
1034 		case EVFILT_READ:
1035 			klist = &tp->tun_rsel.si_note;
1036 			kn->kn_fop = &tunread_filtops;
1037 			break;
1038 		case EVFILT_WRITE:
1039 			klist = &tp->tun_wsel.si_note;
1040 			kn->kn_fop = &tunwrite_filtops;
1041 			break;
1042 		default:
1043 			return (EINVAL);
1044 	}
1045 
1046 	kn->kn_hook = (caddr_t)tp;
1047 
1048 	s = splhigh();
1049 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
1050 	splx(s);
1051 
1052 	return (0);
1053 }
1054 
1055 void
1056 filt_tunrdetach(struct knote *kn)
1057 {
1058 	int			 s;
1059 	struct tun_softc	*tp;
1060 
1061 	tp = (struct tun_softc *)kn->kn_hook;
1062 	s = splhigh();
1063 	if (!(kn->kn_status & KN_DETACHED))
1064 		SLIST_REMOVE(&tp->tun_rsel.si_note, kn, knote, kn_selnext);
1065 	splx(s);
1066 }
1067 
1068 int
1069 filt_tunread(struct knote *kn, long hint)
1070 {
1071 	int			 s;
1072 	struct tun_softc	*tp;
1073 	struct ifnet		*ifp;
1074 	struct mbuf		*m;
1075 
1076 	if (kn->kn_status & KN_DETACHED) {
1077 		kn->kn_data = 0;
1078 		return (1);
1079 	}
1080 
1081 	tp = (struct tun_softc *)kn->kn_hook;
1082 	ifp = &tp->tun_if;
1083 
1084 	s = splnet();
1085 	IFQ_POLL(&ifp->if_snd, m);
1086 	if (m != NULL) {
1087 		splx(s);
1088 		kn->kn_data = IFQ_LEN(&ifp->if_snd);
1089 
1090 		TUNDEBUG(("%s: tunkqread q=%d\n", ifp->if_xname,
1091 		    IFQ_LEN(&ifp->if_snd)));
1092 		return (1);
1093 	}
1094 	splx(s);
1095 	TUNDEBUG(("%s: tunkqread waiting\n", ifp->if_xname));
1096 	return (0);
1097 }
1098 
1099 void
1100 filt_tunwdetach(struct knote *kn)
1101 {
1102 	int			 s;
1103 	struct tun_softc	*tp;
1104 
1105 	tp = (struct tun_softc *)kn->kn_hook;
1106 	s = splhigh();
1107 	if (!(kn->kn_status & KN_DETACHED))
1108 		SLIST_REMOVE(&tp->tun_wsel.si_note, kn, knote, kn_selnext);
1109 	splx(s);
1110 }
1111 
1112 int
1113 filt_tunwrite(struct knote *kn, long hint)
1114 {
1115 	struct tun_softc	*tp;
1116 	struct ifnet		*ifp;
1117 
1118 	if (kn->kn_status & KN_DETACHED) {
1119 		kn->kn_data = 0;
1120 		return (1);
1121 	}
1122 
1123 	tp = (struct tun_softc *)kn->kn_hook;
1124 	ifp = &tp->tun_if;
1125 
1126 	kn->kn_data = ifp->if_mtu;
1127 
1128 	return (1);
1129 }
1130 
1131 /*
1132  * Start packet transmission on the interface.
1133  * when the interface queue is rate-limited by ALTQ or TBR,
1134  * if_start is needed to drain packets from the queue in order
1135  * to notify readers when outgoing packets become ready.
1136  * In layer 2 mode this function is called from ether_output.
1137  */
1138 void
1139 tunstart(struct ifnet *ifp)
1140 {
1141 	struct tun_softc	*tp = ifp->if_softc;
1142 	struct mbuf		*m;
1143 
1144 	splassert(IPL_NET);
1145 
1146 	if (!(tp->tun_flags & TUN_LAYER2) &&
1147 	    !ALTQ_IS_ENABLED(&ifp->if_snd) &&
1148 	    !TBR_IS_ENABLED(&ifp->if_snd))
1149 		return;
1150 
1151 	IFQ_POLL(&ifp->if_snd, m);
1152 	if (m != NULL) {
1153 		if (tp->tun_flags & TUN_LAYER2) {
1154 #if NBPFILTER > 0
1155 			if (ifp->if_bpf)
1156 				bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1157 #endif
1158 			ifp->if_opackets++;
1159 		}
1160 		tun_wakeup(tp);
1161 	}
1162 }
1163 
1164 void
1165 tun_link_state(struct tun_softc *tp)
1166 {
1167 	struct ifnet *ifp = &tp->tun_if;
1168 	int link_state = LINK_STATE_DOWN;
1169 
1170 	if (tp->tun_flags & TUN_OPEN) {
1171 		if (tp->tun_flags & TUN_LAYER2)
1172 			link_state = LINK_STATE_FULL_DUPLEX;
1173 		else
1174 			link_state = LINK_STATE_UP;
1175 	}
1176 	if (ifp->if_link_state != link_state) {
1177 		ifp->if_link_state = link_state;
1178 		if_link_state_change(ifp);
1179 	}
1180 }
1181