xref: /dragonfly/sys/net/tap/if_tap.c (revision ad7a2457)
1 /*
2  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * BASED ON:
27  * -------------------------------------------------------------------------
28  *
29  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30  * Nottingham University 1987.
31  */
32 
33 /*
34  * $FreeBSD: src/sys/net/if_tap.c,v 1.3.2.3 2002/04/14 21:41:48 luigi Exp $
35  * $DragonFly: src/sys/net/tap/if_tap.c,v 1.41 2008/09/05 17:03:15 dillon Exp $
36  * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
37  */
38 
39 #include "opt_inet.h"
40 
41 #include <sys/param.h>
42 #include <sys/conf.h>
43 #include <sys/device.h>
44 #include <sys/filedesc.h>
45 #include <sys/filio.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/poll.h>
50 #include <sys/proc.h>
51 #include <sys/priv.h>
52 #include <sys/signalvar.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/systm.h>
57 #include <sys/thread2.h>
58 #include <sys/ttycom.h>
59 #include <sys/uio.h>
60 #include <sys/vnode.h>
61 #include <sys/serialize.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/ifq_var.h>
67 #include <net/if_arp.h>
68 #include <net/route.h>
69 
70 #include <netinet/in.h>
71 
72 #include "if_tapvar.h"
73 #include "if_tap.h"
74 
75 
76 #define CDEV_NAME	"tap"
77 #define CDEV_MAJOR	149
78 #define TAPDEBUG	if (tapdebug) if_printf
79 
80 #define TAP		"tap"
81 #define VMNET		"vmnet"
82 #define VMNET_DEV_MASK	0x00010000
83 
84 /* module */
85 static int 		tapmodevent	(module_t, int, void *);
86 
87 /* device */
88 static void		tapcreate	(cdev_t);
89 
90 /* network interface */
91 static void		tapifstart	(struct ifnet *);
92 static int		tapifioctl	(struct ifnet *, u_long, caddr_t,
93 					 struct ucred *);
94 static void		tapifinit	(void *);
95 static void		tapifstop(struct tap_softc *, int);
96 
97 /* character device */
98 static d_open_t		tapopen;
99 static d_close_t	tapclose;
100 static d_read_t		tapread;
101 static d_write_t	tapwrite;
102 static d_ioctl_t	tapioctl;
103 static d_poll_t		tappoll;
104 static d_kqfilter_t	tapkqfilter;
105 
106 static struct dev_ops	tap_ops = {
107 	{ CDEV_NAME, CDEV_MAJOR, 0 },
108 	.d_open =	tapopen,
109 	.d_close =	tapclose,
110 	.d_read =	tapread,
111 	.d_write =	tapwrite,
112 	.d_ioctl =	tapioctl,
113 	.d_poll =	tappoll,
114 	.d_kqfilter =	tapkqfilter
115 };
116 
117 static int		taprefcnt = 0;		/* module ref. counter   */
118 static int		taplastunit = -1;	/* max. open unit number */
119 static int		tapdebug = 0;		/* debug flag            */
120 
121 MALLOC_DECLARE(M_TAP);
122 MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
123 SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
124 DEV_MODULE(if_tap, tapmodevent, NULL);
125 
126 /*
127  * tapmodevent
128  *
129  * module event handler
130  */
131 static int
132 tapmodevent(module_t mod, int type, void *data)
133 {
134 	static int		 attached = 0;
135 	struct ifnet		*ifp = NULL;
136 	int			 unit;
137 
138 	switch (type) {
139 	case MOD_LOAD:
140 		if (attached)
141 			return (EEXIST);
142 
143 		dev_ops_add(&tap_ops, 0, 0);
144 		attached = 1;
145 		break;
146 
147 	case MOD_UNLOAD:
148 		if (taprefcnt > 0)
149 			return (EBUSY);
150 
151 		dev_ops_remove(&tap_ops, 0, 0);
152 
153 		/* XXX: maintain tap ifs in a local list */
154 		unit = 0;
155 		while (unit <= taplastunit) {
156 			TAILQ_FOREACH(ifp, &ifnet, if_link) {
157 				if ((strcmp(ifp->if_dname, TAP) == 0) ||
158 				    (strcmp(ifp->if_dname, VMNET) == 0)) {
159 					if (ifp->if_dunit == unit)
160 						break;
161 				}
162 			}
163 
164 			if (ifp != NULL) {
165 				struct tap_softc	*tp = ifp->if_softc;
166 
167 				TAPDEBUG(ifp, "detached. minor = %#x, " \
168 					"taplastunit = %d\n",
169 					minor(tp->tap_dev), taplastunit);
170 
171 				lwkt_serialize_enter(ifp->if_serializer);
172 				tapifstop(tp, 1);
173 				lwkt_serialize_exit(ifp->if_serializer);
174 
175 				ether_ifdetach(ifp);
176 				destroy_dev(tp->tap_dev);
177 				kfree(tp, M_TAP);
178 			} else {
179 				unit++;
180 			}
181 		}
182 		attached = 0;
183 		break;
184 
185 	default:
186 		return (EOPNOTSUPP);
187 	}
188 
189 	return (0);
190 } /* tapmodevent */
191 
192 
193 /*
194  * tapcreate
195  *
196  * to create interface
197  */
198 static void
199 tapcreate(cdev_t dev)
200 {
201 	struct ifnet		*ifp = NULL;
202 	struct tap_softc	*tp = NULL;
203 	uint8_t			ether_addr[ETHER_ADDR_LEN];
204 	int			 unit;
205 	char			*name = NULL;
206 
207 	/* allocate driver storage and create device */
208 	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
209 
210 	/* select device: tap or vmnet */
211 	if (minor(dev) & VMNET_DEV_MASK) {
212 		name = VMNET;
213 		unit = lminor(dev) & 0xff;
214 		tp->tap_flags |= TAP_VMNET;
215 	}
216 	else {
217 		name = TAP;
218 		unit = lminor(dev);
219 	}
220 
221 	tp->tap_dev = make_dev(&tap_ops, minor(dev), UID_ROOT, GID_WHEEL,
222 						0600, "%s%d", name, unit);
223 	tp->tap_dev->si_drv1 = dev->si_drv1 = tp;
224 	reference_dev(tp->tap_dev);	/* so we can destroy it later */
225 
226 	/* generate fake MAC address: 00 bd xx xx xx unit_no */
227 	ether_addr[0] = 0x00;
228 	ether_addr[1] = 0xbd;
229 	bcopy(&ticks, &ether_addr[2], 3);
230 	ether_addr[5] = (u_char)unit;
231 
232 	/* fill the rest and attach interface */
233 	ifp = &tp->tap_if;
234 	ifp->if_softc = tp;
235 
236 	if_initname(ifp, name, unit);
237 	if (unit > taplastunit)
238 		taplastunit = unit;
239 
240 	ifp->if_init = tapifinit;
241 	ifp->if_start = tapifstart;
242 	ifp->if_ioctl = tapifioctl;
243 	ifp->if_mtu = ETHERMTU;
244 	ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
245 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
246 	ifq_set_ready(&ifp->if_snd);
247 
248 	ether_ifattach(ifp, ether_addr, NULL);
249 
250 	tp->tap_flags |= TAP_INITED;
251 	tp->tap_devq.ifq_maxlen = ifqmaxlen;
252 
253 	TAPDEBUG(ifp, "created. minor = %#x\n", minor(tp->tap_dev));
254 } /* tapcreate */
255 
256 
257 /*
258  * tapopen
259  *
260  * to open tunnel. must be superuser
261  */
262 static int
263 tapopen(struct dev_open_args *ap)
264 {
265 	cdev_t dev = ap->a_head.a_dev;
266 	struct tap_softc *tp = NULL;
267 	struct ifnet *ifp = NULL;
268 	int error;
269 
270 	if ((error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0)) != 0)
271 		return (error);
272 
273 	get_mplock();
274 	tp = dev->si_drv1;
275 	if (tp == NULL) {
276 		tapcreate(dev);
277 		tp = dev->si_drv1;
278 		ifp = &tp->arpcom.ac_if;
279 	} else {
280 		if (tp->tap_flags & TAP_OPEN) {
281 			rel_mplock();
282 			return (EBUSY);
283 		}
284 
285 		ifp = &tp->arpcom.ac_if;
286 
287                 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
288 
289 		/* Announce the return of the interface. */
290 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
291 	}
292 
293 	bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
294 
295 	if (curthread->td_proc)
296 		fsetown(curthread->td_proc->p_pid, &tp->tap_sigtd);
297 	tp->tap_flags |= TAP_OPEN;
298 	taprefcnt ++;
299 
300 	TAPDEBUG(ifp, "opened. minor = %#x, refcnt = %d, taplastunit = %d\n",
301 		 minor(tp->tap_dev), taprefcnt, taplastunit);
302 
303 	rel_mplock();
304 	return (0);
305 }
306 
307 
308 /*
309  * tapclose
310  *
311  * close the device - mark i/f down & delete routing info
312  */
313 static int
314 tapclose(struct dev_close_args *ap)
315 {
316 	cdev_t dev = ap->a_head.a_dev;
317 	struct tap_softc	*tp = dev->si_drv1;
318 	struct ifnet		*ifp = &tp->tap_if;
319 	int clear_flags = 1;
320 
321 	/* junk all pending output */
322 
323 	get_mplock();
324 	ifq_purge(&ifp->if_snd);
325 
326 	/*
327 	 * do not bring the interface down, and do not anything with
328 	 * interface, if we are in VMnet mode. just close the device.
329 	 */
330 
331 	if ((tp->tap_flags & TAP_VMNET) == 0) {
332 		if (ifp->if_flags & IFF_UP)
333 			if_down(ifp);
334 		clear_flags = 0;
335 	}
336 	lwkt_serialize_enter(ifp->if_serializer);
337 	tapifstop(tp, clear_flags);
338 	lwkt_serialize_exit(ifp->if_serializer);
339 
340 	if_purgeaddrs_nolink(ifp);
341 
342 	EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
343 
344 	/* Announce the departure of the interface. */
345 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
346 
347 	funsetown(tp->tap_sigio);
348 	tp->tap_sigio = NULL;
349 	selwakeup(&tp->tap_rsel);
350 
351 	tp->tap_flags &= ~TAP_OPEN;
352 	funsetown(tp->tap_sigtd);
353 	tp->tap_sigtd = NULL;
354 
355 	taprefcnt --;
356 	if (taprefcnt < 0) {
357 		taprefcnt = 0;
358 		if_printf(ifp, "minor = %#x, refcnt = %d is out of sync. "
359 			"set refcnt to 0\n", minor(tp->tap_dev), taprefcnt);
360 	}
361 
362 	TAPDEBUG(ifp, "closed. minor = %#x, refcnt = %d, taplastunit = %d\n",
363 		 minor(tp->tap_dev), taprefcnt, taplastunit);
364 
365 	rel_mplock();
366 	return (0);
367 }
368 
369 
370 /*
371  * tapifinit
372  *
373  * Network interface initialization function (called with if serializer held)
374  *
375  * MPSAFE
376  */
377 static void
378 tapifinit(void *xtp)
379 {
380 	struct tap_softc *tp = xtp;
381 	struct ifnet *ifp = &tp->tap_if;
382 
383 	TAPDEBUG(ifp, "initializing, minor = %#x\n", minor(tp->tap_dev));
384 
385 	ASSERT_SERIALIZED(ifp->if_serializer);
386 
387 	tapifstop(tp, 1);
388 
389 	ifp->if_flags |= IFF_RUNNING;
390 	ifp->if_flags &= ~IFF_OACTIVE;
391 
392 	/* attempt to start output */
393 	tapifstart(ifp);
394 }
395 
396 
397 /*
398  * tapifioctl
399  *
400  * Process an ioctl request on network interface (called with if serializer
401  * held).
402  *
403  * MPSAFE
404  */
405 static int
406 tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
407 {
408 	struct tap_softc 	*tp = (struct tap_softc *)(ifp->if_softc);
409 	struct ifstat		*ifs = NULL;
410 	int			 dummy;
411 
412 	switch (cmd) {
413 		case SIOCSIFADDR:
414 		case SIOCGIFADDR:
415 		case SIOCSIFMTU:
416 			dummy = ether_ioctl(ifp, cmd, data);
417 			return (dummy);
418 
419 		case SIOCSIFFLAGS:
420 			if ((tp->tap_flags & TAP_VMNET) == 0) {
421 				/*
422 				 * Only for non-vmnet tap(4)
423 				 */
424 				if (ifp->if_flags & IFF_UP) {
425 					if ((ifp->if_flags & IFF_RUNNING) == 0)
426 						tapifinit(tp);
427 				} else {
428 					tapifstop(tp, 1);
429 				}
430 			} else {
431 				/* XXX */
432 			}
433 			break;
434 		case SIOCADDMULTI: /* XXX -- just like vmnet does */
435 		case SIOCDELMULTI:
436 			break;
437 
438 		case SIOCGIFSTATUS:
439 			ifs = (struct ifstat *)data;
440 			dummy = strlen(ifs->ascii);
441 			if ((tp->tap_flags & TAP_OPEN) &&
442 			    dummy < sizeof(ifs->ascii)) {
443 				if (tp->tap_sigtd && tp->tap_sigtd->sio_proc) {
444 				    ksnprintf(ifs->ascii + dummy,
445 					sizeof(ifs->ascii) - dummy,
446 					"\tOpened by pid %d\n",
447 					(int)tp->tap_sigtd->sio_proc->p_pid);
448 				} else {
449 				    ksnprintf(ifs->ascii + dummy,
450 					sizeof(ifs->ascii) - dummy,
451 					"\tOpened by <unknown>\n");
452 				}
453 			}
454 			break;
455 
456 		default:
457 			return (EINVAL);
458 	}
459 
460 	return (0);
461 }
462 
463 
464 /*
465  * tapifstart
466  *
467  * Queue packets from higher level ready to put out (called with if serializer
468  * held)
469  *
470  * MPSAFE
471  */
472 static void
473 tapifstart(struct ifnet *ifp)
474 {
475 	struct tap_softc *tp = ifp->if_softc;
476 	struct ifqueue *ifq;
477 	struct mbuf *m;
478 	int has_data = 0;
479 
480 	TAPDEBUG(ifp, "starting, minor = %#x\n", minor(tp->tap_dev));
481 
482 	/*
483 	 * do not junk pending output if we are in VMnet mode.
484 	 * XXX: can this do any harm because of queue overflow?
485 	 */
486 
487 	if (((tp->tap_flags & TAP_VMNET) == 0) &&
488 	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
489 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
490 			 minor(tp->tap_dev), tp->tap_flags);
491 		ifq_purge(&ifp->if_snd);
492 		return;
493 	}
494 
495 	ifp->if_flags |= IFF_OACTIVE;
496 
497 	ifq = &tp->tap_devq;
498 	while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
499 		if (IF_QFULL(ifq)) {
500 			IF_DROP(ifq);
501 			ifp->if_oerrors++;
502 			m_freem(m);
503 		} else {
504 			IF_ENQUEUE(ifq, m);
505 			ifp->if_opackets++;
506 			has_data = 1;
507 		}
508 	}
509 
510 	if (has_data) {
511 		if (tp->tap_flags & TAP_RWAIT) {
512 			tp->tap_flags &= ~TAP_RWAIT;
513 			wakeup((caddr_t)tp);
514 		}
515 
516 		get_mplock();
517 		KNOTE(&tp->tap_rsel.si_note, 0);
518 		rel_mplock();
519 
520 		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
521 			get_mplock();
522 			pgsigio(tp->tap_sigio, SIGIO, 0);
523 			rel_mplock();
524 		}
525 
526 		/*
527 		 * selwakeup is not MPSAFE.  tapifstart is.
528 		 */
529 		get_mplock();
530 		selwakeup(&tp->tap_rsel);
531 		rel_mplock();
532 	}
533 
534 	ifp->if_flags &= ~IFF_OACTIVE;
535 }
536 
537 
538 /*
539  * tapioctl
540  *
541  * The ops interface is now pretty minimal.  Called via fileops with nothing
542  * held.
543  *
544  * MPSAFE
545  */
546 static int
547 tapioctl(struct dev_ioctl_args *ap)
548 {
549 	cdev_t dev = ap->a_head.a_dev;
550 	caddr_t data = ap->a_data;
551 	struct tap_softc	*tp = dev->si_drv1;
552 	struct ifnet		*ifp = &tp->tap_if;
553  	struct tapinfo		*tapp = NULL;
554 	struct mbuf *mb;
555 	short f;
556 	int error;
557 
558 	lwkt_serialize_enter(ifp->if_serializer);
559 	error = 0;
560 
561 	switch (ap->a_cmd) {
562 	case TAPSIFINFO:
563 		tapp = (struct tapinfo *)data;
564 		ifp->if_mtu = tapp->mtu;
565 		ifp->if_type = tapp->type;
566 		ifp->if_baudrate = tapp->baudrate;
567 		break;
568 
569 	case TAPGIFINFO:
570 		tapp = (struct tapinfo *)data;
571 		tapp->mtu = ifp->if_mtu;
572 		tapp->type = ifp->if_type;
573 		tapp->baudrate = ifp->if_baudrate;
574 		break;
575 
576 	case TAPSDEBUG:
577 		tapdebug = *(int *)data;
578 		break;
579 
580 	case TAPGDEBUG:
581 		*(int *)data = tapdebug;
582 		break;
583 
584 	case FIOASYNC:
585 		if (*(int *)data)
586 			tp->tap_flags |= TAP_ASYNC;
587 		else
588 			tp->tap_flags &= ~TAP_ASYNC;
589 		break;
590 
591 	case FIONREAD:
592 		*(int *)data = 0;
593 
594 		/* Take a look at devq first */
595 		IF_POLL(&tp->tap_devq, mb);
596 		if (mb == NULL)
597 			mb = ifq_poll(&ifp->if_snd);
598 
599 		if (mb != NULL) {
600 			for(; mb != NULL; mb = mb->m_next)
601 				*(int *)data += mb->m_len;
602 		}
603 		break;
604 
605 	case FIOSETOWN:
606 		error = fsetown(*(int *)data, &tp->tap_sigio);
607 		break;
608 
609 	case FIOGETOWN:
610 		*(int *)data = fgetown(tp->tap_sigio);
611 		break;
612 
613 	/* this is deprecated, FIOSETOWN should be used instead */
614 	case TIOCSPGRP:
615 		error = fsetown(-(*(int *)data), &tp->tap_sigio);
616 		break;
617 
618 	/* this is deprecated, FIOGETOWN should be used instead */
619 	case TIOCGPGRP:
620 		*(int *)data = -fgetown(tp->tap_sigio);
621 		break;
622 
623 	/* VMware/VMnet port ioctl's */
624 
625 	case SIOCGIFFLAGS:	/* get ifnet flags */
626 		bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
627 		break;
628 
629 	case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
630 		f = *(short *)data;
631 		f &= 0x0fff;
632 		f &= ~IFF_CANTCHANGE;
633 		f |= IFF_UP;
634 		ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
635 		break;
636 
637 	case OSIOCGIFADDR:	/* get MAC address of the remote side */
638 	case SIOCGIFADDR:
639 		bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
640 		break;
641 
642 	case SIOCSIFADDR:	/* set MAC address of the remote side */
643 		bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
644 		break;
645 
646 	default:
647 		error = ENOTTY;
648 		break;
649 	}
650 	lwkt_serialize_exit(ifp->if_serializer);
651 	return (error);
652 }
653 
654 
655 /*
656  * tapread
657  *
658  * The ops read interface - reads a packet at a time, or at
659  * least as much of a packet as can be read.
660  *
661  * Called from the fileops interface with nothing held.
662  *
663  * MPSAFE
664  */
665 static int
666 tapread(struct dev_read_args *ap)
667 {
668 	cdev_t dev = ap->a_head.a_dev;
669 	struct uio *uio = ap->a_uio;
670 	struct tap_softc	*tp = dev->si_drv1;
671 	struct ifnet		*ifp = &tp->tap_if;
672 	struct mbuf		*m0 = NULL;
673 	int			 error = 0, len;
674 
675 	TAPDEBUG(ifp, "reading, minor = %#x\n", minor(tp->tap_dev));
676 
677 	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
678 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
679 			 minor(tp->tap_dev), tp->tap_flags);
680 
681 		return (EHOSTDOWN);
682 	}
683 
684 	tp->tap_flags &= ~TAP_RWAIT;
685 
686 	/* sleep until we get a packet */
687 	do {
688 		lwkt_serialize_enter(ifp->if_serializer);
689 		IF_DEQUEUE(&tp->tap_devq, m0);
690 		if (m0 == NULL) {
691 			if (ap->a_ioflag & IO_NDELAY) {
692 				lwkt_serialize_exit(ifp->if_serializer);
693 				return (EWOULDBLOCK);
694 			}
695 			tp->tap_flags |= TAP_RWAIT;
696 			crit_enter();
697 			tsleep_interlock(tp);
698 			lwkt_serialize_exit(ifp->if_serializer);
699 			error = tsleep(tp, PCATCH, "taprd", 0);
700 			crit_exit();
701 			if (error)
702 				return (error);
703 		} else {
704 			lwkt_serialize_exit(ifp->if_serializer);
705 		}
706 	} while (m0 == NULL);
707 
708 	BPF_MTAP(ifp, m0);
709 
710 	/* xfer packet to user space */
711 	while ((m0 != NULL) && (uio->uio_resid > 0) && (error == 0)) {
712 		len = min(uio->uio_resid, m0->m_len);
713 		if (len == 0)
714 			break;
715 
716 		error = uiomove(mtod(m0, caddr_t), len, uio);
717 		m0 = m_free(m0);
718 	}
719 
720 	if (m0 != NULL) {
721 		TAPDEBUG(ifp, "dropping mbuf, minor = %#x\n",
722 			 minor(tp->tap_dev));
723 		m_freem(m0);
724 	}
725 
726 	return (error);
727 }
728 
729 /*
730  * tapwrite
731  *
732  * The ops write interface - an atomic write is a packet - or else!
733  *
734  * Called from the fileops interface with nothing held.
735  *
736  * MPSAFE
737  */
738 static int
739 tapwrite(struct dev_write_args *ap)
740 {
741 	cdev_t dev = ap->a_head.a_dev;
742 	struct uio *uio = ap->a_uio;
743 	struct tap_softc	*tp = dev->si_drv1;
744 	struct ifnet		*ifp = &tp->tap_if;
745 	struct mbuf		*top = NULL, **mp = NULL, *m = NULL;
746 	int		 	 error = 0, tlen, mlen;
747 
748 	TAPDEBUG(ifp, "writing, minor = %#x\n", minor(tp->tap_dev));
749 
750 	if (uio->uio_resid == 0)
751 		return (0);
752 
753 	if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
754 		TAPDEBUG(ifp, "invalid packet len = %d, minor = %#x\n",
755 			 uio->uio_resid, minor(tp->tap_dev));
756 
757 		return (EIO);
758 	}
759 	tlen = uio->uio_resid;
760 
761 	/* get a header mbuf */
762 	MGETHDR(m, MB_DONTWAIT, MT_DATA);
763 	if (m == NULL)
764 		return (ENOBUFS);
765 	mlen = MHLEN;
766 
767 	top = 0;
768 	mp = &top;
769 	while ((error == 0) && (uio->uio_resid > 0)) {
770 		m->m_len = min(mlen, uio->uio_resid);
771 		error = uiomove(mtod(m, caddr_t), m->m_len, uio);
772 		*mp = m;
773 		mp = &m->m_next;
774 		if (uio->uio_resid > 0) {
775 			MGET(m, MB_DONTWAIT, MT_DATA);
776 			if (m == NULL) {
777 				error = ENOBUFS;
778 				break;
779 			}
780 			mlen = MLEN;
781 		}
782 	}
783 	if (error) {
784 		ifp->if_ierrors ++;
785 		if (top)
786 			m_freem(top);
787 		return (error);
788 	}
789 
790 	top->m_pkthdr.len = tlen;
791 	top->m_pkthdr.rcvif = ifp;
792 
793 	/*
794 	 * Ethernet bridge and bpf are handled in ether_input
795 	 *
796 	 * adjust mbuf and give packet to the ether_input
797 	 */
798 	lwkt_serialize_enter(ifp->if_serializer);
799 	ifp->if_input(ifp, top);
800 	ifp->if_ipackets ++; /* ibytes are counted in ether_input */
801 	lwkt_serialize_exit(ifp->if_serializer);
802 
803 	return (0);
804 }
805 
806 /*
807  * tappoll
808  *
809  * The poll interface, this is only useful on reads really. The write
810  * detect always returns true, write never blocks anyway, it either
811  * accepts the packet or drops it
812  *
813  * Called from the fileops interface with nothing held.
814  *
815  * MPSAFE
816  */
817 static int
818 tappoll(struct dev_poll_args *ap)
819 {
820 	cdev_t dev = ap->a_head.a_dev;
821 	struct tap_softc	*tp = dev->si_drv1;
822 	struct ifnet		*ifp = &tp->tap_if;
823 	int		 	 revents = 0;
824 
825 	TAPDEBUG(ifp, "polling, minor = %#x\n", minor(tp->tap_dev));
826 
827 	if (ap->a_events & (POLLIN | POLLRDNORM)) {
828 		if (!IF_QEMPTY(&tp->tap_devq)) {
829 			TAPDEBUG(ifp,
830 				 "has data in queue. minor = %#x\n",
831 				 minor(tp->tap_dev));
832 
833 			revents |= (ap->a_events & (POLLIN | POLLRDNORM));
834 		} else {
835 			TAPDEBUG(ifp, "waiting for data, minor = %#x\n",
836 				 minor(tp->tap_dev));
837 
838 			get_mplock();
839 			selrecord(curthread, &tp->tap_rsel);
840 			rel_mplock();
841 		}
842 	}
843 
844 	if (ap->a_events & (POLLOUT | POLLWRNORM))
845 		revents |= (ap->a_events & (POLLOUT | POLLWRNORM));
846 	ap->a_events = revents;
847 	return(0);
848 }
849 
850 /*
851  * tapkqfilter - called from the fileops interface with nothing held
852  *
853  * MPSAFE
854  */
855 static int filt_tapread(struct knote *kn, long hint);
856 static void filt_tapdetach(struct knote *kn);
857 static struct filterops tapread_filtops =
858 	{ 1, NULL, filt_tapdetach, filt_tapread };
859 
860 static int
861 tapkqfilter(struct dev_kqfilter_args *ap)
862 {
863 	cdev_t dev = ap->a_head.a_dev;
864 	struct knote *kn = ap->a_kn;
865 	struct tap_softc *tp;
866 	struct klist *list;
867 	struct ifnet *ifp;
868 
869 	get_mplock();
870 	tp = dev->si_drv1;
871 	ifp = &tp->tap_if;
872 	ap->a_result =0;
873 
874 	switch(kn->kn_filter) {
875 	case EVFILT_READ:
876 		list = &tp->tap_rsel.si_note;
877 		kn->kn_fop = &tapread_filtops;
878 		kn->kn_hook = (void *)tp;
879 		break;
880 	case EVFILT_WRITE:
881 		/* fall through */
882 	default:
883 		ap->a_result = 1;
884 		rel_mplock();
885 		return(0);
886 	}
887 	crit_enter();
888 	SLIST_INSERT_HEAD(list, kn, kn_selnext);
889 	crit_exit();
890 	rel_mplock();
891 	return(0);
892 }
893 
894 static int
895 filt_tapread(struct knote *kn, long hint)
896 {
897 	struct tap_softc *tp = (void *)kn->kn_hook;
898 
899 	if (IF_QEMPTY(&tp->tap_devq) == 0)	/* XXX serializer */
900 		return(1);
901 	else
902 		return(0);
903 }
904 
905 static void
906 filt_tapdetach(struct knote *kn)
907 {
908 	struct tap_softc *tp = (void *)kn->kn_hook;
909 
910 	SLIST_REMOVE(&tp->tap_rsel.si_note, kn, knote, kn_selnext);
911 }
912 
913 static void
914 tapifstop(struct tap_softc *tp, int clear_flags)
915 {
916 	struct ifnet *ifp = &tp->tap_if;
917 
918 	ASSERT_SERIALIZED(ifp->if_serializer);
919 	IF_DRAIN(&tp->tap_devq);
920 	if (clear_flags)
921 		ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
922 }
923