xref: /dragonfly/sys/net/tap/if_tap.c (revision c03f08f3)
1 /*
2  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * BASED ON:
27  * -------------------------------------------------------------------------
28  *
29  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30  * Nottingham University 1987.
31  */
32 
33 /*
34  * $FreeBSD: src/sys/net/if_tap.c,v 1.3.2.3 2002/04/14 21:41:48 luigi Exp $
35  * $DragonFly: src/sys/net/tap/if_tap.c,v 1.36 2007/07/03 17:40:51 dillon Exp $
36  * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
37  */
38 
39 #include "opt_inet.h"
40 
41 #include <sys/param.h>
42 #include <sys/conf.h>
43 #include <sys/device.h>
44 #include <sys/filedesc.h>
45 #include <sys/filio.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/poll.h>
50 #include <sys/proc.h>
51 #include <sys/signalvar.h>
52 #include <sys/socket.h>
53 #include <sys/sockio.h>
54 #include <sys/sysctl.h>
55 #include <sys/systm.h>
56 #include <sys/thread2.h>
57 #include <sys/ttycom.h>
58 #include <sys/uio.h>
59 #include <sys/vnode.h>
60 #include <sys/serialize.h>
61 
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/ifq_var.h>
66 #include <net/if_arp.h>
67 #include <net/route.h>
68 
69 #include <netinet/in.h>
70 
71 #include "if_tapvar.h"
72 #include "if_tap.h"
73 
74 
75 #define CDEV_NAME	"tap"
76 #define CDEV_MAJOR	149
77 #define TAPDEBUG	if (tapdebug) if_printf
78 
79 #define TAP		"tap"
80 #define VMNET		"vmnet"
81 #define VMNET_DEV_MASK	0x00010000
82 
83 /* module */
84 static int 		tapmodevent	(module_t, int, void *);
85 
86 /* device */
87 static void		tapcreate	(cdev_t);
88 
89 /* network interface */
90 static void		tapifstart	(struct ifnet *);
91 static int		tapifioctl	(struct ifnet *, u_long, caddr_t,
92 					 struct ucred *);
93 static void		tapifinit	(void *);
94 
95 /* character device */
96 static d_open_t		tapopen;
97 static d_close_t	tapclose;
98 static d_read_t		tapread;
99 static d_write_t	tapwrite;
100 static d_ioctl_t	tapioctl;
101 static d_poll_t		tappoll;
102 static d_kqfilter_t	tapkqfilter;
103 
104 static struct dev_ops	tap_ops = {
105 	{ CDEV_NAME, CDEV_MAJOR, 0 },
106 	.d_open =	tapopen,
107 	.d_close =	tapclose,
108 	.d_read =	tapread,
109 	.d_write =	tapwrite,
110 	.d_ioctl =	tapioctl,
111 	.d_poll =	tappoll,
112 	.d_kqfilter =	tapkqfilter
113 };
114 
115 static int		taprefcnt = 0;		/* module ref. counter   */
116 static int		taplastunit = -1;	/* max. open unit number */
117 static int		tapdebug = 0;		/* debug flag            */
118 
119 MALLOC_DECLARE(M_TAP);
120 MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
121 SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
122 DEV_MODULE(if_tap, tapmodevent, NULL);
123 
124 /*
125  * tapmodevent
126  *
127  * module event handler
128  */
129 static int
130 tapmodevent(module_t mod, int type, void *data)
131 {
132 	static int		 attached = 0;
133 	struct ifnet		*ifp = NULL;
134 	int			 unit;
135 
136 	switch (type) {
137 	case MOD_LOAD:
138 		if (attached)
139 			return (EEXIST);
140 
141 		dev_ops_add(&tap_ops, 0, 0);
142 		attached = 1;
143 	break;
144 
145 	case MOD_UNLOAD:
146 		if (taprefcnt > 0)
147 			return (EBUSY);
148 
149 		dev_ops_remove(&tap_ops, 0, 0);
150 
151 		/* XXX: maintain tap ifs in a local list */
152 		unit = 0;
153 		while (unit <= taplastunit) {
154 			TAILQ_FOREACH(ifp, &ifnet, if_link) {
155 				if ((strcmp(ifp->if_dname, TAP) == 0) ||
156 				    (strcmp(ifp->if_dname, VMNET) == 0)) {
157 					if (ifp->if_dunit == unit)
158 						break;
159 				}
160 			}
161 
162 			if (ifp != NULL) {
163 				struct tap_softc	*tp = ifp->if_softc;
164 
165 				TAPDEBUG(ifp, "detached. minor = %#x, " \
166 					"taplastunit = %d\n",
167 					minor(tp->tap_dev), taplastunit);
168 
169 				ether_ifdetach(ifp);
170 				destroy_dev(tp->tap_dev);
171 				kfree(tp, M_TAP);
172 			}
173 			else
174 				unit ++;
175 		}
176 
177 		attached = 0;
178 	break;
179 
180 	default:
181 		return (EOPNOTSUPP);
182 	}
183 
184 	return (0);
185 } /* tapmodevent */
186 
187 
188 /*
189  * tapcreate
190  *
191  * to create interface
192  */
193 static void
194 tapcreate(cdev_t dev)
195 {
196 	struct ifnet		*ifp = NULL;
197 	struct tap_softc	*tp = NULL;
198 	uint8_t			ether_addr[ETHER_ADDR_LEN];
199 	int			 unit;
200 	char			*name = NULL;
201 
202 	/* allocate driver storage and create device */
203 	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK);
204 	bzero(tp, sizeof(*tp));
205 
206 	/* select device: tap or vmnet */
207 	if (minor(dev) & VMNET_DEV_MASK) {
208 		name = VMNET;
209 		unit = lminor(dev) & 0xff;
210 		tp->tap_flags |= TAP_VMNET;
211 	}
212 	else {
213 		name = TAP;
214 		unit = lminor(dev);
215 	}
216 
217 	tp->tap_dev = make_dev(&tap_ops, minor(dev), UID_ROOT, GID_WHEEL,
218 						0600, "%s%d", name, unit);
219 	tp->tap_dev->si_drv1 = dev->si_drv1 = tp;
220 	reference_dev(tp->tap_dev);	/* so we can destroy it later */
221 
222 	/* generate fake MAC address: 00 bd xx xx xx unit_no */
223 	ether_addr[0] = 0x00;
224 	ether_addr[1] = 0xbd;
225 	bcopy(&ticks, &ether_addr[2], 3);
226 	ether_addr[5] = (u_char)unit;
227 
228 	/* fill the rest and attach interface */
229 	ifp = &tp->tap_if;
230 	ifp->if_softc = tp;
231 
232 	if_initname(ifp, name, unit);
233 	if (unit > taplastunit)
234 		taplastunit = unit;
235 
236 	ifp->if_init = tapifinit;
237 	ifp->if_start = tapifstart;
238 	ifp->if_ioctl = tapifioctl;
239 	ifp->if_mtu = ETHERMTU;
240 	ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
241 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
242 	ifq_set_ready(&ifp->if_snd);
243 
244 	ether_ifattach(ifp, ether_addr, NULL);
245 
246 	tp->tap_flags |= TAP_INITED;
247 
248 	TAPDEBUG(ifp, "created. minor = %#x\n", minor(tp->tap_dev));
249 } /* tapcreate */
250 
251 
252 /*
253  * tapopen
254  *
255  * to open tunnel. must be superuser
256  */
257 static int
258 tapopen(struct dev_open_args *ap)
259 {
260 	cdev_t dev = ap->a_head.a_dev;
261 	struct tap_softc *tp = NULL;
262 	struct ifnet *ifp = NULL;
263 	int error;
264 
265 	if ((error = suser_cred(ap->a_cred, 0)) != 0)
266 		return (error);
267 
268 	get_mplock();
269 	tp = dev->si_drv1;
270 	if (tp == NULL) {
271 		tapcreate(dev);
272 		tp = dev->si_drv1;
273 		ifp = &tp->arpcom.ac_if;
274 	} else {
275 		ifp = &tp->arpcom.ac_if;
276 
277                 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
278 
279 		/* Announce the return of the interface. */
280 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
281 	}
282 
283 	if (tp->tap_flags & TAP_OPEN) {
284 		rel_mplock();
285 		return (EBUSY);
286 	}
287 
288 	bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
289 
290 	tp->tap_td = curthread;
291 	tp->tap_flags |= TAP_OPEN;
292 	taprefcnt ++;
293 
294 	TAPDEBUG(ifp, "opened. minor = %#x, refcnt = %d, taplastunit = %d\n",
295 		 minor(tp->tap_dev), taprefcnt, taplastunit);
296 
297 	rel_mplock();
298 	return (0);
299 }
300 
301 
302 /*
303  * tapclose
304  *
305  * close the device - mark i/f down & delete routing info
306  */
307 static int
308 tapclose(struct dev_close_args *ap)
309 {
310 	cdev_t dev = ap->a_head.a_dev;
311 	struct tap_softc	*tp = dev->si_drv1;
312 	struct ifnet		*ifp = &tp->tap_if;
313 
314 	/* junk all pending output */
315 
316 	get_mplock();
317 	lwkt_serialize_enter(ifp->if_serializer);
318 	ifq_purge(&ifp->if_snd);
319 	lwkt_serialize_exit(ifp->if_serializer);
320 
321 	/*
322 	 * do not bring the interface down, and do not anything with
323 	 * interface, if we are in VMnet mode. just close the device.
324 	 */
325 
326 	if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
327 		EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
328 
329 		/* Announce the departure of the interface. */
330 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
331 
332 		if_down(ifp);
333 		lwkt_serialize_enter(ifp->if_serializer);
334 		if (ifp->if_flags & IFF_RUNNING) {
335 			/* find internet addresses and delete routes */
336 			struct ifaddr	*ifa = NULL;
337 
338 			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
339 				if (ifa->ifa_addr->sa_family == AF_INET) {
340 					rtinit(ifa, (int)RTM_DELETE, 0);
341 
342 					/* remove address from interface */
343 					bzero(ifa->ifa_addr,
344 						   sizeof(*(ifa->ifa_addr)));
345 					bzero(ifa->ifa_dstaddr,
346 						   sizeof(*(ifa->ifa_dstaddr)));
347 					bzero(ifa->ifa_netmask,
348 						   sizeof(*(ifa->ifa_netmask)));
349 				}
350 			}
351 
352 			ifp->if_flags &= ~IFF_RUNNING;
353 		}
354 		lwkt_serialize_exit(ifp->if_serializer);
355 	}
356 
357 	funsetown(tp->tap_sigio);
358 	selwakeup(&tp->tap_rsel);
359 
360 	tp->tap_flags &= ~TAP_OPEN;
361 	tp->tap_td = NULL;
362 
363 	taprefcnt --;
364 	if (taprefcnt < 0) {
365 		taprefcnt = 0;
366 		if_printf(ifp, "minor = %#x, refcnt = %d is out of sync. "
367 			"set refcnt to 0\n", minor(tp->tap_dev), taprefcnt);
368 	}
369 
370 	TAPDEBUG(ifp, "closed. minor = %#x, refcnt = %d, taplastunit = %d\n",
371 		 minor(tp->tap_dev), taprefcnt, taplastunit);
372 
373 	rel_mplock();
374 	return (0);
375 }
376 
377 
378 /*
379  * tapifinit
380  *
381  * Network interface initialization function (called with if serializer held)
382  *
383  * MPSAFE
384  */
385 static void
386 tapifinit(void *xtp)
387 {
388 	struct tap_softc	*tp = (struct tap_softc *)xtp;
389 	struct ifnet		*ifp = &tp->tap_if;
390 
391 	TAPDEBUG(ifp, "initializing, minor = %#x\n", minor(tp->tap_dev));
392 
393 	ifp->if_flags |= IFF_RUNNING;
394 	ifp->if_flags &= ~IFF_OACTIVE;
395 
396 	/* attempt to start output */
397 	tapifstart(ifp);
398 }
399 
400 
401 /*
402  * tapifioctl
403  *
404  * Process an ioctl request on network interface (called with if serializer
405  * held).
406  *
407  * MPSAFE
408  */
409 int
410 tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
411 {
412 	struct tap_softc 	*tp = (struct tap_softc *)(ifp->if_softc);
413 	struct ifstat		*ifs = NULL;
414 	int			 dummy;
415 
416 	switch (cmd) {
417 		case SIOCSIFADDR:
418 		case SIOCGIFADDR:
419 		case SIOCSIFMTU:
420 			dummy = ether_ioctl(ifp, cmd, data);
421 			return (dummy);
422 
423 		case SIOCSIFFLAGS:
424 			if ((tp->tap_flags & TAP_VMNET) == 0) {
425 				/*
426 				 * Only for non-vmnet tap(4)
427 				 */
428 				if (ifp->if_flags & IFF_UP) {
429 					if ((ifp->if_flags & IFF_RUNNING) == 0)
430 						tapifinit(tp);
431 				}
432 			}
433 			break;
434 		case SIOCADDMULTI: /* XXX -- just like vmnet does */
435 		case SIOCDELMULTI:
436 			break;
437 
438 		case SIOCGIFSTATUS:
439 			ifs = (struct ifstat *)data;
440 			dummy = strlen(ifs->ascii);
441 			if (tp->tap_td != NULL && dummy < sizeof(ifs->ascii)) {
442 				if (tp->tap_td->td_proc) {
443 				    ksnprintf(ifs->ascii + dummy,
444 					sizeof(ifs->ascii) - dummy,
445 					"\tOpened by pid %d\n",
446 					(int)tp->tap_td->td_proc->p_pid);
447 				} else {
448 				    ksnprintf(ifs->ascii + dummy,
449 					sizeof(ifs->ascii) - dummy,
450 					"\tOpened by td %p\n", tp->tap_td);
451 				}
452 			}
453 			break;
454 
455 		default:
456 			return (EINVAL);
457 	}
458 
459 	return (0);
460 }
461 
462 
463 /*
464  * tapifstart
465  *
466  * Queue packets from higher level ready to put out (called with if serializer
467  * held)
468  *
469  * MPSAFE
470  */
471 static void
472 tapifstart(struct ifnet *ifp)
473 {
474 	struct tap_softc	*tp = ifp->if_softc;
475 
476 	TAPDEBUG(ifp, "starting, minor = %#x\n", minor(tp->tap_dev));
477 
478 	/*
479 	 * do not junk pending output if we are in VMnet mode.
480 	 * XXX: can this do any harm because of queue overflow?
481 	 */
482 
483 	if (((tp->tap_flags & TAP_VMNET) == 0) &&
484 	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
485 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
486 			 minor(tp->tap_dev), tp->tap_flags);
487 
488 		ifq_purge(&ifp->if_snd);
489 		return;
490 	}
491 
492 	ifp->if_flags |= IFF_OACTIVE;
493 
494 	if (!ifq_is_empty(&ifp->if_snd)) {
495 		if (tp->tap_flags & TAP_RWAIT) {
496 			tp->tap_flags &= ~TAP_RWAIT;
497 			wakeup((caddr_t)tp);
498 		}
499 		KNOTE(&tp->tap_rsel.si_note, 0);
500 
501 		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
502 			get_mplock();
503 			pgsigio(tp->tap_sigio, SIGIO, 0);
504 			rel_mplock();
505 		}
506 
507 		/*
508 		 * selwakeup is not MPSAFE.  tapifstart is.
509 		 */
510 		get_mplock();
511 		selwakeup(&tp->tap_rsel);
512 		rel_mplock();
513 		ifp->if_opackets ++; /* obytes are counted in ether_output */
514 	}
515 
516 	ifp->if_flags &= ~IFF_OACTIVE;
517 }
518 
519 
520 /*
521  * tapioctl
522  *
523  * The ops interface is now pretty minimal.  Called via fileops with nothing
524  * held.
525  *
526  * MPSAFE
527  */
528 static int
529 tapioctl(struct dev_ioctl_args *ap)
530 {
531 	cdev_t dev = ap->a_head.a_dev;
532 	caddr_t data = ap->a_data;
533 	struct tap_softc	*tp = dev->si_drv1;
534 	struct ifnet		*ifp = &tp->tap_if;
535  	struct tapinfo		*tapp = NULL;
536 	struct mbuf *mb;
537 	short f;
538 	int error;
539 
540 	lwkt_serialize_enter(ifp->if_serializer);
541 	error = 0;
542 
543 	switch (ap->a_cmd) {
544 	case TAPSIFINFO:
545 		tapp = (struct tapinfo *)data;
546 		ifp->if_mtu = tapp->mtu;
547 		ifp->if_type = tapp->type;
548 		ifp->if_baudrate = tapp->baudrate;
549 		break;
550 
551 	case TAPGIFINFO:
552 		tapp = (struct tapinfo *)data;
553 		tapp->mtu = ifp->if_mtu;
554 		tapp->type = ifp->if_type;
555 		tapp->baudrate = ifp->if_baudrate;
556 		break;
557 
558 	case TAPSDEBUG:
559 		tapdebug = *(int *)data;
560 		break;
561 
562 	case TAPGDEBUG:
563 		*(int *)data = tapdebug;
564 		break;
565 
566 	case FIOASYNC:
567 		if (*(int *)data)
568 			tp->tap_flags |= TAP_ASYNC;
569 		else
570 			tp->tap_flags &= ~TAP_ASYNC;
571 		break;
572 
573 	case FIONREAD:
574 		*(int *)data = 0;
575 		if ((mb = ifq_poll(&ifp->if_snd)) != NULL) {
576 			for(; mb != NULL; mb = mb->m_next)
577 				*(int *)data += mb->m_len;
578 		}
579 		break;
580 
581 	case FIOSETOWN:
582 		error = fsetown(*(int *)data, &tp->tap_sigio);
583 		break;
584 
585 	case FIOGETOWN:
586 		*(int *)data = fgetown(tp->tap_sigio);
587 		break;
588 
589 	/* this is deprecated, FIOSETOWN should be used instead */
590 	case TIOCSPGRP:
591 		error = fsetown(-(*(int *)data), &tp->tap_sigio);
592 		break;
593 
594 	/* this is deprecated, FIOGETOWN should be used instead */
595 	case TIOCGPGRP:
596 		*(int *)data = -fgetown(tp->tap_sigio);
597 		break;
598 
599 	/* VMware/VMnet port ioctl's */
600 
601 	case SIOCGIFFLAGS:	/* get ifnet flags */
602 		bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
603 		break;
604 
605 	case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
606 		f = *(short *)data;
607 		f &= 0x0fff;
608 		f &= ~IFF_CANTCHANGE;
609 		f |= IFF_UP;
610 		ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
611 		break;
612 
613 	case OSIOCGIFADDR:	/* get MAC address of the remote side */
614 	case SIOCGIFADDR:
615 		bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
616 		break;
617 
618 	case SIOCSIFADDR:	/* set MAC address of the remote side */
619 		bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
620 		break;
621 
622 	default:
623 		error = ENOTTY;
624 		break;
625 	}
626 	lwkt_serialize_exit(ifp->if_serializer);
627 	return (error);
628 }
629 
630 
631 /*
632  * tapread
633  *
634  * The ops read interface - reads a packet at a time, or at
635  * least as much of a packet as can be read.
636  *
637  * Called from the fileops interface with nothing held.
638  *
639  * MPSAFE
640  */
641 static int
642 tapread(struct dev_read_args *ap)
643 {
644 	cdev_t dev = ap->a_head.a_dev;
645 	struct uio *uio = ap->a_uio;
646 	struct tap_softc	*tp = dev->si_drv1;
647 	struct ifnet		*ifp = &tp->tap_if;
648 	struct mbuf		*m0 = NULL;
649 	int			 error = 0, len;
650 
651 	TAPDEBUG(ifp, "reading, minor = %#x\n", minor(tp->tap_dev));
652 
653 	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
654 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
655 			 minor(tp->tap_dev), tp->tap_flags);
656 
657 		return (EHOSTDOWN);
658 	}
659 
660 	tp->tap_flags &= ~TAP_RWAIT;
661 
662 	/* sleep until we get a packet */
663 	do {
664 		lwkt_serialize_enter(ifp->if_serializer);
665 		m0 = ifq_dequeue(&ifp->if_snd, NULL);
666 		if (m0 == NULL) {
667 			if (ap->a_ioflag & IO_NDELAY) {
668 				lwkt_serialize_exit(ifp->if_serializer);
669 				return (EWOULDBLOCK);
670 			}
671 			tp->tap_flags |= TAP_RWAIT;
672 			crit_enter();
673 			tsleep_interlock(tp);
674 			lwkt_serialize_exit(ifp->if_serializer);
675 			error = tsleep(tp, PCATCH, "taprd", 0);
676 			crit_exit();
677 			if (error)
678 				return (error);
679 		} else {
680 			lwkt_serialize_exit(ifp->if_serializer);
681 		}
682 	} while (m0 == NULL);
683 
684 	BPF_MTAP(ifp, m0);
685 
686 	/* xfer packet to user space */
687 	while ((m0 != NULL) && (uio->uio_resid > 0) && (error == 0)) {
688 		len = min(uio->uio_resid, m0->m_len);
689 		if (len == 0)
690 			break;
691 
692 		error = uiomove(mtod(m0, caddr_t), len, uio);
693 		m0 = m_free(m0);
694 	}
695 
696 	if (m0 != NULL) {
697 		TAPDEBUG(ifp, "dropping mbuf, minor = %#x\n",
698 			 minor(tp->tap_dev));
699 		m_freem(m0);
700 	}
701 
702 	return (error);
703 }
704 
705 /*
706  * tapwrite
707  *
708  * The ops write interface - an atomic write is a packet - or else!
709  *
710  * Called from the fileops interface with nothing held.
711  *
712  * MPSAFE
713  */
714 static int
715 tapwrite(struct dev_write_args *ap)
716 {
717 	cdev_t dev = ap->a_head.a_dev;
718 	struct uio *uio = ap->a_uio;
719 	struct tap_softc	*tp = dev->si_drv1;
720 	struct ifnet		*ifp = &tp->tap_if;
721 	struct mbuf		*top = NULL, **mp = NULL, *m = NULL;
722 	int		 	 error = 0, tlen, mlen;
723 
724 	TAPDEBUG(ifp, "writing, minor = %#x\n", minor(tp->tap_dev));
725 
726 	if (uio->uio_resid == 0)
727 		return (0);
728 
729 	if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
730 		TAPDEBUG(ifp, "invalid packet len = %d, minor = %#x\n",
731 			 uio->uio_resid, minor(tp->tap_dev));
732 
733 		return (EIO);
734 	}
735 	tlen = uio->uio_resid;
736 
737 	/* get a header mbuf */
738 	MGETHDR(m, MB_DONTWAIT, MT_DATA);
739 	if (m == NULL)
740 		return (ENOBUFS);
741 	mlen = MHLEN;
742 
743 	top = 0;
744 	mp = &top;
745 	while ((error == 0) && (uio->uio_resid > 0)) {
746 		m->m_len = min(mlen, uio->uio_resid);
747 		error = uiomove(mtod(m, caddr_t), m->m_len, uio);
748 		*mp = m;
749 		mp = &m->m_next;
750 		if (uio->uio_resid > 0) {
751 			MGET(m, MB_DONTWAIT, MT_DATA);
752 			if (m == NULL) {
753 				error = ENOBUFS;
754 				break;
755 			}
756 			mlen = MLEN;
757 		}
758 	}
759 	if (error) {
760 		ifp->if_ierrors ++;
761 		if (top)
762 			m_freem(top);
763 		return (error);
764 	}
765 
766 	top->m_pkthdr.len = tlen;
767 	top->m_pkthdr.rcvif = ifp;
768 
769 	/*
770 	 * Ethernet bridge and bpf are handled in ether_input
771 	 *
772 	 * adjust mbuf and give packet to the ether_input
773 	 */
774 	lwkt_serialize_enter(ifp->if_serializer);
775 	ifp->if_input(ifp, top);
776 	ifp->if_ipackets ++; /* ibytes are counted in ether_input */
777 	lwkt_serialize_exit(ifp->if_serializer);
778 
779 	return (0);
780 }
781 
782 /*
783  * tappoll
784  *
785  * The poll interface, this is only useful on reads really. The write
786  * detect always returns true, write never blocks anyway, it either
787  * accepts the packet or drops it
788  *
789  * Called from the fileops interface with nothing held.
790  *
791  * MPSAFE
792  */
793 static int
794 tappoll(struct dev_poll_args *ap)
795 {
796 	cdev_t dev = ap->a_head.a_dev;
797 	struct tap_softc	*tp = dev->si_drv1;
798 	struct ifnet		*ifp = &tp->tap_if;
799 	int		 	 revents = 0;
800 
801 	TAPDEBUG(ifp, "polling, minor = %#x\n", minor(tp->tap_dev));
802 
803 	lwkt_serialize_enter(ifp->if_serializer);
804 	if (ap->a_events & (POLLIN | POLLRDNORM)) {
805 		if (!ifq_is_empty(&ifp->if_snd)) {
806 			TAPDEBUG(ifp,
807 				 "has data in queue. minor = %#x\n",
808 				 minor(tp->tap_dev));
809 
810 			revents |= (ap->a_events & (POLLIN | POLLRDNORM));
811 		} else {
812 			TAPDEBUG(ifp, "waiting for data, minor = %#x\n",
813 				 minor(tp->tap_dev));
814 
815 			get_mplock();
816 			selrecord(curthread, &tp->tap_rsel);
817 			rel_mplock();
818 		}
819 	}
820 	lwkt_serialize_exit(ifp->if_serializer);
821 
822 	if (ap->a_events & (POLLOUT | POLLWRNORM))
823 		revents |= (ap->a_events & (POLLOUT | POLLWRNORM));
824 	ap->a_events = revents;
825 	return(0);
826 }
827 
828 /*
829  * tapkqfilter - called from the fileops interface with nothing held
830  *
831  * MPSAFE
832  */
833 static int filt_tapread(struct knote *kn, long hint);
834 static void filt_tapdetach(struct knote *kn);
835 static struct filterops tapread_filtops =
836 	{ 1, NULL, filt_tapdetach, filt_tapread };
837 
838 int
839 tapkqfilter(struct dev_kqfilter_args *ap)
840 {
841 	cdev_t dev = ap->a_head.a_dev;
842 	struct knote *kn = ap->a_kn;
843 	struct tap_softc *tp;
844 	struct klist *list;
845 	struct ifnet *ifp;
846 
847 	get_mplock();
848 	tp = dev->si_drv1;
849 	ifp = &tp->tap_if;
850 	ap->a_result =0;
851 
852 	switch(kn->kn_filter) {
853 	case EVFILT_READ:
854 		list = &tp->tap_rsel.si_note;
855 		kn->kn_fop = &tapread_filtops;
856 		kn->kn_hook = (void *)tp;
857 		break;
858 	case EVFILT_WRITE:
859 		/* fall through */
860 	default:
861 		ap->a_result = 1;
862 		rel_mplock();
863 		return(0);
864 	}
865 	crit_enter();
866 	SLIST_INSERT_HEAD(list, kn, kn_selnext);
867 	crit_exit();
868 	rel_mplock();
869 	return(0);
870 }
871 
872 static int
873 filt_tapread(struct knote *kn, long hint)
874 {
875 	struct tap_softc *tp = (void *)kn->kn_hook;
876 	struct ifnet *ifp = &tp->tap_if;
877 
878 	if (ifq_is_empty(&ifp->if_snd) == 0) {
879 		return(1);
880 	} else {
881 		return(0);
882 	}
883 }
884 
885 static void
886 filt_tapdetach(struct knote *kn)
887 {
888 	struct tap_softc *tp = (void *)kn->kn_hook;
889 
890 	SLIST_REMOVE(&tp->tap_rsel.si_note, kn, knote, kn_selnext);
891 }
892