xref: /dragonfly/sys/net/tap/if_tap.c (revision ce0e08e2)
1 /*
2  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * BASED ON:
27  * -------------------------------------------------------------------------
28  *
29  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30  * Nottingham University 1987.
31  */
32 
33 /*
34  * $FreeBSD: src/sys/net/if_tap.c,v 1.3.2.3 2002/04/14 21:41:48 luigi Exp $
35  * $DragonFly: src/sys/net/tap/if_tap.c,v 1.41 2008/09/05 17:03:15 dillon Exp $
36  * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
37  */
38 
39 #include "opt_inet.h"
40 
41 #include <sys/param.h>
42 #include <sys/conf.h>
43 #include <sys/device.h>
44 #include <sys/filedesc.h>
45 #include <sys/filio.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/poll.h>
50 #include <sys/proc.h>
51 #include <sys/signalvar.h>
52 #include <sys/socket.h>
53 #include <sys/sockio.h>
54 #include <sys/sysctl.h>
55 #include <sys/systm.h>
56 #include <sys/thread2.h>
57 #include <sys/ttycom.h>
58 #include <sys/uio.h>
59 #include <sys/vnode.h>
60 #include <sys/serialize.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/ifq_var.h>
66 #include <net/if_arp.h>
67 #include <net/route.h>
68 
69 #include <netinet/in.h>
70 
71 #include "if_tapvar.h"
72 #include "if_tap.h"
73 
74 
75 #define CDEV_NAME	"tap"
76 #define CDEV_MAJOR	149
77 #define TAPDEBUG	if (tapdebug) if_printf
78 
79 #define TAP		"tap"
80 #define VMNET		"vmnet"
81 #define VMNET_DEV_MASK	0x00010000
82 
83 /* module */
84 static int 		tapmodevent	(module_t, int, void *);
85 
86 /* device */
87 static void		tapcreate	(cdev_t);
88 
89 /* network interface */
90 static void		tapifstart	(struct ifnet *);
91 static int		tapifioctl	(struct ifnet *, u_long, caddr_t,
92 					 struct ucred *);
93 static void		tapifinit	(void *);
94 static void		tapifstop(struct tap_softc *, int);
95 
96 /* character device */
97 static d_open_t		tapopen;
98 static d_close_t	tapclose;
99 static d_read_t		tapread;
100 static d_write_t	tapwrite;
101 static d_ioctl_t	tapioctl;
102 static d_poll_t		tappoll;
103 static d_kqfilter_t	tapkqfilter;
104 
105 static struct dev_ops	tap_ops = {
106 	{ CDEV_NAME, CDEV_MAJOR, 0 },
107 	.d_open =	tapopen,
108 	.d_close =	tapclose,
109 	.d_read =	tapread,
110 	.d_write =	tapwrite,
111 	.d_ioctl =	tapioctl,
112 	.d_poll =	tappoll,
113 	.d_kqfilter =	tapkqfilter
114 };
115 
116 static int		taprefcnt = 0;		/* module ref. counter   */
117 static int		taplastunit = -1;	/* max. open unit number */
118 static int		tapdebug = 0;		/* debug flag            */
119 
120 MALLOC_DECLARE(M_TAP);
121 MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
122 SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
123 DEV_MODULE(if_tap, tapmodevent, NULL);
124 
125 /*
126  * tapmodevent
127  *
128  * module event handler
129  */
130 static int
131 tapmodevent(module_t mod, int type, void *data)
132 {
133 	static int		 attached = 0;
134 	struct ifnet		*ifp = NULL;
135 	int			 unit;
136 
137 	switch (type) {
138 	case MOD_LOAD:
139 		if (attached)
140 			return (EEXIST);
141 
142 		dev_ops_add(&tap_ops, 0, 0);
143 		attached = 1;
144 		break;
145 
146 	case MOD_UNLOAD:
147 		if (taprefcnt > 0)
148 			return (EBUSY);
149 
150 		dev_ops_remove(&tap_ops, 0, 0);
151 
152 		/* XXX: maintain tap ifs in a local list */
153 		unit = 0;
154 		while (unit <= taplastunit) {
155 			TAILQ_FOREACH(ifp, &ifnet, if_link) {
156 				if ((strcmp(ifp->if_dname, TAP) == 0) ||
157 				    (strcmp(ifp->if_dname, VMNET) == 0)) {
158 					if (ifp->if_dunit == unit)
159 						break;
160 				}
161 			}
162 
163 			if (ifp != NULL) {
164 				struct tap_softc	*tp = ifp->if_softc;
165 
166 				TAPDEBUG(ifp, "detached. minor = %#x, " \
167 					"taplastunit = %d\n",
168 					minor(tp->tap_dev), taplastunit);
169 
170 				lwkt_serialize_enter(ifp->if_serializer);
171 				tapifstop(tp, 1);
172 				lwkt_serialize_exit(ifp->if_serializer);
173 
174 				ether_ifdetach(ifp);
175 				destroy_dev(tp->tap_dev);
176 				kfree(tp, M_TAP);
177 			} else {
178 				unit++;
179 			}
180 		}
181 		attached = 0;
182 		break;
183 
184 	default:
185 		return (EOPNOTSUPP);
186 	}
187 
188 	return (0);
189 } /* tapmodevent */
190 
191 
192 /*
193  * tapcreate
194  *
195  * to create interface
196  */
197 static void
198 tapcreate(cdev_t dev)
199 {
200 	struct ifnet		*ifp = NULL;
201 	struct tap_softc	*tp = NULL;
202 	uint8_t			ether_addr[ETHER_ADDR_LEN];
203 	int			 unit;
204 	char			*name = NULL;
205 
206 	/* allocate driver storage and create device */
207 	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
208 
209 	/* select device: tap or vmnet */
210 	if (minor(dev) & VMNET_DEV_MASK) {
211 		name = VMNET;
212 		unit = lminor(dev) & 0xff;
213 		tp->tap_flags |= TAP_VMNET;
214 	}
215 	else {
216 		name = TAP;
217 		unit = lminor(dev);
218 	}
219 
220 	tp->tap_dev = make_dev(&tap_ops, minor(dev), UID_ROOT, GID_WHEEL,
221 						0600, "%s%d", name, unit);
222 	tp->tap_dev->si_drv1 = dev->si_drv1 = tp;
223 	reference_dev(tp->tap_dev);	/* so we can destroy it later */
224 
225 	/* generate fake MAC address: 00 bd xx xx xx unit_no */
226 	ether_addr[0] = 0x00;
227 	ether_addr[1] = 0xbd;
228 	bcopy(&ticks, &ether_addr[2], 3);
229 	ether_addr[5] = (u_char)unit;
230 
231 	/* fill the rest and attach interface */
232 	ifp = &tp->tap_if;
233 	ifp->if_softc = tp;
234 
235 	if_initname(ifp, name, unit);
236 	if (unit > taplastunit)
237 		taplastunit = unit;
238 
239 	ifp->if_init = tapifinit;
240 	ifp->if_start = tapifstart;
241 	ifp->if_ioctl = tapifioctl;
242 	ifp->if_mtu = ETHERMTU;
243 	ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
244 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
245 	ifq_set_ready(&ifp->if_snd);
246 
247 	ether_ifattach(ifp, ether_addr, NULL);
248 
249 	tp->tap_flags |= TAP_INITED;
250 	tp->tap_devq.ifq_maxlen = ifqmaxlen;
251 
252 	TAPDEBUG(ifp, "created. minor = %#x\n", minor(tp->tap_dev));
253 } /* tapcreate */
254 
255 
256 /*
257  * tapopen
258  *
259  * to open tunnel. must be superuser
260  */
261 static int
262 tapopen(struct dev_open_args *ap)
263 {
264 	cdev_t dev = ap->a_head.a_dev;
265 	struct tap_softc *tp = NULL;
266 	struct ifnet *ifp = NULL;
267 	int error;
268 
269 	if ((error = suser_cred(ap->a_cred, 0)) != 0)
270 		return (error);
271 
272 	get_mplock();
273 	tp = dev->si_drv1;
274 	if (tp == NULL) {
275 		tapcreate(dev);
276 		tp = dev->si_drv1;
277 		ifp = &tp->arpcom.ac_if;
278 	} else {
279 		if (tp->tap_flags & TAP_OPEN) {
280 			rel_mplock();
281 			return (EBUSY);
282 		}
283 
284 		ifp = &tp->arpcom.ac_if;
285 
286                 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
287 
288 		/* Announce the return of the interface. */
289 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
290 	}
291 
292 	bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
293 
294 	if (curthread->td_proc)
295 		fsetown(curthread->td_proc->p_pid, &tp->tap_sigtd);
296 	tp->tap_flags |= TAP_OPEN;
297 	taprefcnt ++;
298 
299 	TAPDEBUG(ifp, "opened. minor = %#x, refcnt = %d, taplastunit = %d\n",
300 		 minor(tp->tap_dev), taprefcnt, taplastunit);
301 
302 	rel_mplock();
303 	return (0);
304 }
305 
306 
307 /*
308  * tapclose
309  *
310  * close the device - mark i/f down & delete routing info
311  */
312 static int
313 tapclose(struct dev_close_args *ap)
314 {
315 	cdev_t dev = ap->a_head.a_dev;
316 	struct tap_softc	*tp = dev->si_drv1;
317 	struct ifnet		*ifp = &tp->tap_if;
318 	int clear_flags = 1;
319 
320 	/* junk all pending output */
321 
322 	get_mplock();
323 	ifq_purge(&ifp->if_snd);
324 
325 	/*
326 	 * do not bring the interface down, and do not anything with
327 	 * interface, if we are in VMnet mode. just close the device.
328 	 */
329 
330 	if ((tp->tap_flags & TAP_VMNET) == 0) {
331 		if (ifp->if_flags & IFF_UP)
332 			if_down(ifp);
333 		clear_flags = 0;
334 	}
335 	lwkt_serialize_enter(ifp->if_serializer);
336 	tapifstop(tp, clear_flags);
337 	lwkt_serialize_exit(ifp->if_serializer);
338 
339 	if_purgeaddrs_nolink(ifp);
340 
341 	EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
342 
343 	/* Announce the departure of the interface. */
344 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
345 
346 	funsetown(tp->tap_sigio);
347 	tp->tap_sigio = NULL;
348 	selwakeup(&tp->tap_rsel);
349 
350 	tp->tap_flags &= ~TAP_OPEN;
351 	funsetown(tp->tap_sigtd);
352 	tp->tap_sigtd = NULL;
353 
354 	taprefcnt --;
355 	if (taprefcnt < 0) {
356 		taprefcnt = 0;
357 		if_printf(ifp, "minor = %#x, refcnt = %d is out of sync. "
358 			"set refcnt to 0\n", minor(tp->tap_dev), taprefcnt);
359 	}
360 
361 	TAPDEBUG(ifp, "closed. minor = %#x, refcnt = %d, taplastunit = %d\n",
362 		 minor(tp->tap_dev), taprefcnt, taplastunit);
363 
364 	rel_mplock();
365 	return (0);
366 }
367 
368 
369 /*
370  * tapifinit
371  *
372  * Network interface initialization function (called with if serializer held)
373  *
374  * MPSAFE
375  */
376 static void
377 tapifinit(void *xtp)
378 {
379 	struct tap_softc *tp = xtp;
380 	struct ifnet *ifp = &tp->tap_if;
381 
382 	TAPDEBUG(ifp, "initializing, minor = %#x\n", minor(tp->tap_dev));
383 
384 	ASSERT_SERIALIZED(ifp->if_serializer);
385 
386 	tapifstop(tp, 1);
387 
388 	ifp->if_flags |= IFF_RUNNING;
389 	ifp->if_flags &= ~IFF_OACTIVE;
390 
391 	/* attempt to start output */
392 	tapifstart(ifp);
393 }
394 
395 
396 /*
397  * tapifioctl
398  *
399  * Process an ioctl request on network interface (called with if serializer
400  * held).
401  *
402  * MPSAFE
403  */
404 static int
405 tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
406 {
407 	struct tap_softc 	*tp = (struct tap_softc *)(ifp->if_softc);
408 	struct ifstat		*ifs = NULL;
409 	int			 dummy;
410 
411 	switch (cmd) {
412 		case SIOCSIFADDR:
413 		case SIOCGIFADDR:
414 		case SIOCSIFMTU:
415 			dummy = ether_ioctl(ifp, cmd, data);
416 			return (dummy);
417 
418 		case SIOCSIFFLAGS:
419 			if ((tp->tap_flags & TAP_VMNET) == 0) {
420 				/*
421 				 * Only for non-vmnet tap(4)
422 				 */
423 				if (ifp->if_flags & IFF_UP) {
424 					if ((ifp->if_flags & IFF_RUNNING) == 0)
425 						tapifinit(tp);
426 				} else {
427 					tapifstop(tp, 1);
428 				}
429 			} else {
430 				/* XXX */
431 			}
432 			break;
433 		case SIOCADDMULTI: /* XXX -- just like vmnet does */
434 		case SIOCDELMULTI:
435 			break;
436 
437 		case SIOCGIFSTATUS:
438 			ifs = (struct ifstat *)data;
439 			dummy = strlen(ifs->ascii);
440 			if ((tp->tap_flags & TAP_OPEN) &&
441 			    dummy < sizeof(ifs->ascii)) {
442 				if (tp->tap_sigtd && tp->tap_sigtd->sio_proc) {
443 				    ksnprintf(ifs->ascii + dummy,
444 					sizeof(ifs->ascii) - dummy,
445 					"\tOpened by pid %d\n",
446 					(int)tp->tap_sigtd->sio_proc->p_pid);
447 				} else {
448 				    ksnprintf(ifs->ascii + dummy,
449 					sizeof(ifs->ascii) - dummy,
450 					"\tOpened by <unknown>\n");
451 				}
452 			}
453 			break;
454 
455 		default:
456 			return (EINVAL);
457 	}
458 
459 	return (0);
460 }
461 
462 
463 /*
464  * tapifstart
465  *
466  * Queue packets from higher level ready to put out (called with if serializer
467  * held)
468  *
469  * MPSAFE
470  */
471 static void
472 tapifstart(struct ifnet *ifp)
473 {
474 	struct tap_softc *tp = ifp->if_softc;
475 	struct ifqueue *ifq;
476 	struct mbuf *m;
477 	int has_data = 0;
478 
479 	TAPDEBUG(ifp, "starting, minor = %#x\n", minor(tp->tap_dev));
480 
481 	/*
482 	 * do not junk pending output if we are in VMnet mode.
483 	 * XXX: can this do any harm because of queue overflow?
484 	 */
485 
486 	if (((tp->tap_flags & TAP_VMNET) == 0) &&
487 	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
488 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
489 			 minor(tp->tap_dev), tp->tap_flags);
490 		ifq_purge(&ifp->if_snd);
491 		return;
492 	}
493 
494 	ifp->if_flags |= IFF_OACTIVE;
495 
496 	ifq = &tp->tap_devq;
497 	while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
498 		if (IF_QFULL(ifq)) {
499 			IF_DROP(ifq);
500 			ifp->if_oerrors++;
501 			m_freem(m);
502 		} else {
503 			IF_ENQUEUE(ifq, m);
504 			ifp->if_opackets++;
505 			has_data = 1;
506 		}
507 	}
508 
509 	if (has_data) {
510 		if (tp->tap_flags & TAP_RWAIT) {
511 			tp->tap_flags &= ~TAP_RWAIT;
512 			wakeup((caddr_t)tp);
513 		}
514 
515 		get_mplock();
516 		KNOTE(&tp->tap_rsel.si_note, 0);
517 		rel_mplock();
518 
519 		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
520 			get_mplock();
521 			pgsigio(tp->tap_sigio, SIGIO, 0);
522 			rel_mplock();
523 		}
524 
525 		/*
526 		 * selwakeup is not MPSAFE.  tapifstart is.
527 		 */
528 		get_mplock();
529 		selwakeup(&tp->tap_rsel);
530 		rel_mplock();
531 	}
532 
533 	ifp->if_flags &= ~IFF_OACTIVE;
534 }
535 
536 
537 /*
538  * tapioctl
539  *
540  * The ops interface is now pretty minimal.  Called via fileops with nothing
541  * held.
542  *
543  * MPSAFE
544  */
545 static int
546 tapioctl(struct dev_ioctl_args *ap)
547 {
548 	cdev_t dev = ap->a_head.a_dev;
549 	caddr_t data = ap->a_data;
550 	struct tap_softc	*tp = dev->si_drv1;
551 	struct ifnet		*ifp = &tp->tap_if;
552  	struct tapinfo		*tapp = NULL;
553 	struct mbuf *mb;
554 	short f;
555 	int error;
556 
557 	lwkt_serialize_enter(ifp->if_serializer);
558 	error = 0;
559 
560 	switch (ap->a_cmd) {
561 	case TAPSIFINFO:
562 		tapp = (struct tapinfo *)data;
563 		ifp->if_mtu = tapp->mtu;
564 		ifp->if_type = tapp->type;
565 		ifp->if_baudrate = tapp->baudrate;
566 		break;
567 
568 	case TAPGIFINFO:
569 		tapp = (struct tapinfo *)data;
570 		tapp->mtu = ifp->if_mtu;
571 		tapp->type = ifp->if_type;
572 		tapp->baudrate = ifp->if_baudrate;
573 		break;
574 
575 	case TAPSDEBUG:
576 		tapdebug = *(int *)data;
577 		break;
578 
579 	case TAPGDEBUG:
580 		*(int *)data = tapdebug;
581 		break;
582 
583 	case FIOASYNC:
584 		if (*(int *)data)
585 			tp->tap_flags |= TAP_ASYNC;
586 		else
587 			tp->tap_flags &= ~TAP_ASYNC;
588 		break;
589 
590 	case FIONREAD:
591 		*(int *)data = 0;
592 
593 		/* Take a look at devq first */
594 		IF_POLL(&tp->tap_devq, mb);
595 		if (mb == NULL)
596 			mb = ifq_poll(&ifp->if_snd);
597 
598 		if (mb != NULL) {
599 			for(; mb != NULL; mb = mb->m_next)
600 				*(int *)data += mb->m_len;
601 		}
602 		break;
603 
604 	case FIOSETOWN:
605 		error = fsetown(*(int *)data, &tp->tap_sigio);
606 		break;
607 
608 	case FIOGETOWN:
609 		*(int *)data = fgetown(tp->tap_sigio);
610 		break;
611 
612 	/* this is deprecated, FIOSETOWN should be used instead */
613 	case TIOCSPGRP:
614 		error = fsetown(-(*(int *)data), &tp->tap_sigio);
615 		break;
616 
617 	/* this is deprecated, FIOGETOWN should be used instead */
618 	case TIOCGPGRP:
619 		*(int *)data = -fgetown(tp->tap_sigio);
620 		break;
621 
622 	/* VMware/VMnet port ioctl's */
623 
624 	case SIOCGIFFLAGS:	/* get ifnet flags */
625 		bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
626 		break;
627 
628 	case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
629 		f = *(short *)data;
630 		f &= 0x0fff;
631 		f &= ~IFF_CANTCHANGE;
632 		f |= IFF_UP;
633 		ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
634 		break;
635 
636 	case OSIOCGIFADDR:	/* get MAC address of the remote side */
637 	case SIOCGIFADDR:
638 		bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
639 		break;
640 
641 	case SIOCSIFADDR:	/* set MAC address of the remote side */
642 		bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
643 		break;
644 
645 	default:
646 		error = ENOTTY;
647 		break;
648 	}
649 	lwkt_serialize_exit(ifp->if_serializer);
650 	return (error);
651 }
652 
653 
654 /*
655  * tapread
656  *
657  * The ops read interface - reads a packet at a time, or at
658  * least as much of a packet as can be read.
659  *
660  * Called from the fileops interface with nothing held.
661  *
662  * MPSAFE
663  */
664 static int
665 tapread(struct dev_read_args *ap)
666 {
667 	cdev_t dev = ap->a_head.a_dev;
668 	struct uio *uio = ap->a_uio;
669 	struct tap_softc	*tp = dev->si_drv1;
670 	struct ifnet		*ifp = &tp->tap_if;
671 	struct mbuf		*m0 = NULL;
672 	int			 error = 0, len;
673 
674 	TAPDEBUG(ifp, "reading, minor = %#x\n", minor(tp->tap_dev));
675 
676 	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
677 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
678 			 minor(tp->tap_dev), tp->tap_flags);
679 
680 		return (EHOSTDOWN);
681 	}
682 
683 	tp->tap_flags &= ~TAP_RWAIT;
684 
685 	/* sleep until we get a packet */
686 	do {
687 		lwkt_serialize_enter(ifp->if_serializer);
688 		IF_DEQUEUE(&tp->tap_devq, m0);
689 		if (m0 == NULL) {
690 			if (ap->a_ioflag & IO_NDELAY) {
691 				lwkt_serialize_exit(ifp->if_serializer);
692 				return (EWOULDBLOCK);
693 			}
694 			tp->tap_flags |= TAP_RWAIT;
695 			crit_enter();
696 			tsleep_interlock(tp);
697 			lwkt_serialize_exit(ifp->if_serializer);
698 			error = tsleep(tp, PCATCH, "taprd", 0);
699 			crit_exit();
700 			if (error)
701 				return (error);
702 		} else {
703 			lwkt_serialize_exit(ifp->if_serializer);
704 		}
705 	} while (m0 == NULL);
706 
707 	BPF_MTAP(ifp, m0);
708 
709 	/* xfer packet to user space */
710 	while ((m0 != NULL) && (uio->uio_resid > 0) && (error == 0)) {
711 		len = min(uio->uio_resid, m0->m_len);
712 		if (len == 0)
713 			break;
714 
715 		error = uiomove(mtod(m0, caddr_t), len, uio);
716 		m0 = m_free(m0);
717 	}
718 
719 	if (m0 != NULL) {
720 		TAPDEBUG(ifp, "dropping mbuf, minor = %#x\n",
721 			 minor(tp->tap_dev));
722 		m_freem(m0);
723 	}
724 
725 	return (error);
726 }
727 
728 /*
729  * tapwrite
730  *
731  * The ops write interface - an atomic write is a packet - or else!
732  *
733  * Called from the fileops interface with nothing held.
734  *
735  * MPSAFE
736  */
737 static int
738 tapwrite(struct dev_write_args *ap)
739 {
740 	cdev_t dev = ap->a_head.a_dev;
741 	struct uio *uio = ap->a_uio;
742 	struct tap_softc	*tp = dev->si_drv1;
743 	struct ifnet		*ifp = &tp->tap_if;
744 	struct mbuf		*top = NULL, **mp = NULL, *m = NULL;
745 	int		 	 error = 0, tlen, mlen;
746 
747 	TAPDEBUG(ifp, "writing, minor = %#x\n", minor(tp->tap_dev));
748 
749 	if (uio->uio_resid == 0)
750 		return (0);
751 
752 	if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
753 		TAPDEBUG(ifp, "invalid packet len = %d, minor = %#x\n",
754 			 uio->uio_resid, minor(tp->tap_dev));
755 
756 		return (EIO);
757 	}
758 	tlen = uio->uio_resid;
759 
760 	/* get a header mbuf */
761 	MGETHDR(m, MB_DONTWAIT, MT_DATA);
762 	if (m == NULL)
763 		return (ENOBUFS);
764 	mlen = MHLEN;
765 
766 	top = 0;
767 	mp = &top;
768 	while ((error == 0) && (uio->uio_resid > 0)) {
769 		m->m_len = min(mlen, uio->uio_resid);
770 		error = uiomove(mtod(m, caddr_t), m->m_len, uio);
771 		*mp = m;
772 		mp = &m->m_next;
773 		if (uio->uio_resid > 0) {
774 			MGET(m, MB_DONTWAIT, MT_DATA);
775 			if (m == NULL) {
776 				error = ENOBUFS;
777 				break;
778 			}
779 			mlen = MLEN;
780 		}
781 	}
782 	if (error) {
783 		ifp->if_ierrors ++;
784 		if (top)
785 			m_freem(top);
786 		return (error);
787 	}
788 
789 	top->m_pkthdr.len = tlen;
790 	top->m_pkthdr.rcvif = ifp;
791 
792 	/*
793 	 * Ethernet bridge and bpf are handled in ether_input
794 	 *
795 	 * adjust mbuf and give packet to the ether_input
796 	 */
797 	lwkt_serialize_enter(ifp->if_serializer);
798 	ifp->if_input(ifp, top);
799 	ifp->if_ipackets ++; /* ibytes are counted in ether_input */
800 	lwkt_serialize_exit(ifp->if_serializer);
801 
802 	return (0);
803 }
804 
805 /*
806  * tappoll
807  *
808  * The poll interface, this is only useful on reads really. The write
809  * detect always returns true, write never blocks anyway, it either
810  * accepts the packet or drops it
811  *
812  * Called from the fileops interface with nothing held.
813  *
814  * MPSAFE
815  */
816 static int
817 tappoll(struct dev_poll_args *ap)
818 {
819 	cdev_t dev = ap->a_head.a_dev;
820 	struct tap_softc	*tp = dev->si_drv1;
821 	struct ifnet		*ifp = &tp->tap_if;
822 	int		 	 revents = 0;
823 
824 	TAPDEBUG(ifp, "polling, minor = %#x\n", minor(tp->tap_dev));
825 
826 	if (ap->a_events & (POLLIN | POLLRDNORM)) {
827 		if (!IF_QEMPTY(&tp->tap_devq)) {
828 			TAPDEBUG(ifp,
829 				 "has data in queue. minor = %#x\n",
830 				 minor(tp->tap_dev));
831 
832 			revents |= (ap->a_events & (POLLIN | POLLRDNORM));
833 		} else {
834 			TAPDEBUG(ifp, "waiting for data, minor = %#x\n",
835 				 minor(tp->tap_dev));
836 
837 			get_mplock();
838 			selrecord(curthread, &tp->tap_rsel);
839 			rel_mplock();
840 		}
841 	}
842 
843 	if (ap->a_events & (POLLOUT | POLLWRNORM))
844 		revents |= (ap->a_events & (POLLOUT | POLLWRNORM));
845 	ap->a_events = revents;
846 	return(0);
847 }
848 
849 /*
850  * tapkqfilter - called from the fileops interface with nothing held
851  *
852  * MPSAFE
853  */
854 static int filt_tapread(struct knote *kn, long hint);
855 static void filt_tapdetach(struct knote *kn);
856 static struct filterops tapread_filtops =
857 	{ 1, NULL, filt_tapdetach, filt_tapread };
858 
859 static int
860 tapkqfilter(struct dev_kqfilter_args *ap)
861 {
862 	cdev_t dev = ap->a_head.a_dev;
863 	struct knote *kn = ap->a_kn;
864 	struct tap_softc *tp;
865 	struct klist *list;
866 	struct ifnet *ifp;
867 
868 	get_mplock();
869 	tp = dev->si_drv1;
870 	ifp = &tp->tap_if;
871 	ap->a_result =0;
872 
873 	switch(kn->kn_filter) {
874 	case EVFILT_READ:
875 		list = &tp->tap_rsel.si_note;
876 		kn->kn_fop = &tapread_filtops;
877 		kn->kn_hook = (void *)tp;
878 		break;
879 	case EVFILT_WRITE:
880 		/* fall through */
881 	default:
882 		ap->a_result = 1;
883 		rel_mplock();
884 		return(0);
885 	}
886 	crit_enter();
887 	SLIST_INSERT_HEAD(list, kn, kn_selnext);
888 	crit_exit();
889 	rel_mplock();
890 	return(0);
891 }
892 
893 static int
894 filt_tapread(struct knote *kn, long hint)
895 {
896 	struct tap_softc *tp = (void *)kn->kn_hook;
897 
898 	if (IF_QEMPTY(&tp->tap_devq) == 0)	/* XXX serializer */
899 		return(1);
900 	else
901 		return(0);
902 }
903 
904 static void
905 filt_tapdetach(struct knote *kn)
906 {
907 	struct tap_softc *tp = (void *)kn->kn_hook;
908 
909 	SLIST_REMOVE(&tp->tap_rsel.si_note, kn, knote, kn_selnext);
910 }
911 
912 static void
913 tapifstop(struct tap_softc *tp, int clear_flags)
914 {
915 	struct ifnet *ifp = &tp->tap_if;
916 
917 	ASSERT_SERIALIZED(ifp->if_serializer);
918 	IF_DRAIN(&tp->tap_devq);
919 	if (clear_flags)
920 		ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
921 }
922