xref: /dragonfly/sys/net/tap/if_tap.c (revision c39dd9c0)
1 /*
2  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * BASED ON:
27  * -------------------------------------------------------------------------
28  *
29  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30  * Nottingham University 1987.
31  */
32 
33 /*
34  * $FreeBSD: src/sys/net/if_tap.c,v 1.3.2.3 2002/04/14 21:41:48 luigi Exp $
35  * $DragonFly: src/sys/net/tap/if_tap.c,v 1.41 2008/09/05 17:03:15 dillon Exp $
36  * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
37  */
38 
39 #include "opt_inet.h"
40 
41 #include <sys/param.h>
42 #include <sys/conf.h>
43 #include <sys/device.h>
44 #include <sys/filedesc.h>
45 #include <sys/filio.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/poll.h>
50 #include <sys/proc.h>
51 #include <sys/priv.h>
52 #include <sys/signalvar.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/systm.h>
57 #include <sys/thread2.h>
58 #include <sys/ttycom.h>
59 #include <sys/uio.h>
60 #include <sys/vnode.h>
61 #include <sys/serialize.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/ifq_var.h>
67 #include <net/if_arp.h>
68 #include <net/if_clone.h>
69 #include <net/route.h>
70 
71 #include <netinet/in.h>
72 
73 #include "if_tapvar.h"
74 #include "if_tap.h"
75 
76 #define TAP_IFFLAGS	(IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST)
77 
78 #define CDEV_NAME	"tap"
79 #define CDEV_MAJOR	149
80 #define TAPDEBUG	if (tapdebug) if_printf
81 
82 #define TAP		"tap"
83 #define VMNET		"vmnet"
84 #define VMNET_DEV_MASK	0x00010000
85 
86 /* module */
87 static int 		tapmodevent	(module_t, int, void *);
88 
89 /* device */
90 static void		tapcreate	(cdev_t);
91 static void		tapdestroy(struct tap_softc *);
92 
93 /* clone */
94 static int		tap_clone_create(struct if_clone *, int);
95 static void		tap_clone_destroy(struct ifnet *);
96 
97 
98 /* network interface */
99 static void		tapifstart	(struct ifnet *);
100 static int		tapifioctl	(struct ifnet *, u_long, caddr_t,
101 					 struct ucred *);
102 static void		tapifinit	(void *);
103 static void		tapifstop(struct tap_softc *, int);
104 static void		tapifflags(struct tap_softc *);
105 
106 /* character device */
107 static d_open_t		tapopen;
108 static d_close_t	tapclose;
109 static d_read_t		tapread;
110 static d_write_t	tapwrite;
111 static d_ioctl_t	tapioctl;
112 static d_poll_t		tappoll;
113 static d_kqfilter_t	tapkqfilter;
114 
115 static struct dev_ops	tap_ops = {
116 	{ CDEV_NAME, CDEV_MAJOR, 0 },
117 	.d_open =	tapopen,
118 	.d_close =	tapclose,
119 	.d_read =	tapread,
120 	.d_write =	tapwrite,
121 	.d_ioctl =	tapioctl,
122 	.d_poll =	tappoll,
123 	.d_kqfilter =	tapkqfilter
124 };
125 
126 static int		taprefcnt = 0;		/* module ref. counter   */
127 static int		taplastunit = -1;	/* max. open unit number */
128 static int		tapdebug = 0;		/* debug flag            */
129 static int		tapuopen = 0;		/* all user open()       */
130 static int		tapuponopen = 0;	/* IFF_UP       */
131 
132 MALLOC_DECLARE(M_TAP);
133 MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
134 struct if_clone tap_cloner = IF_CLONE_INITIALIZER("tap",
135 			     tap_clone_create, tap_clone_destroy,
136 			     0, IF_MAXUNIT);
137 static SLIST_HEAD(,tap_softc) tap_listhead =
138 	SLIST_HEAD_INITIALIZER(&tap_listhead);
139 
140 SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
141 SYSCTL_DECL(_net_link);
142 SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
143 	    "Ethernet tunnel software network interface");
144 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
145 	   "Allow user to open /dev/tap (based on node permissions)");
146 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
147 	   "Bring interface up when /dev/tap is opened");
148 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
149 
150 DEV_MODULE(if_tap, tapmodevent, NULL);
151 
152 /*
153  * tapmodevent
154  *
155  * module event handler
156  */
157 static int
158 tapmodevent(module_t mod, int type, void *data)
159 {
160 	static int attached = 0;
161 	struct tap_softc *tp, *ntp;
162 
163 	switch (type) {
164 	case MOD_LOAD:
165 		if (attached)
166 			return (EEXIST);
167 
168 		dev_ops_add(&tap_ops, 0, 0);
169 		SLIST_INIT(&tap_listhead);
170 		if_clone_attach(&tap_cloner);
171 
172 		attached = 1;
173 		break;
174 
175 	case MOD_UNLOAD:
176 		if (taprefcnt > 0)
177 			return (EBUSY);
178 
179 		if_clone_detach(&tap_cloner);
180 		dev_ops_remove(&tap_ops, 0, 0);
181 
182 		/* Maintain tap ifs in a local list */
183 		SLIST_FOREACH_MUTABLE(tp, &tap_listhead, tap_link, ntp)
184 			tapdestroy(tp);
185 
186 		attached = 0;
187 		break;
188 
189 	default:
190 		return (EOPNOTSUPP);
191 	}
192 
193 	return (0);
194 } /* tapmodevent */
195 
196 
197 /*
198  * tapcreate
199  *
200  * to create interface
201  */
202 static void
203 tapcreate(cdev_t dev)
204 {
205 	struct ifnet		*ifp = NULL;
206 	struct tap_softc	*tp = NULL;
207 	uint8_t			ether_addr[ETHER_ADDR_LEN];
208 	int			 unit;
209 	char			*name = NULL;
210 
211 	/* allocate driver storage and create device */
212 	MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
213 
214 	/* select device: tap or vmnet */
215 	if (minor(dev) & VMNET_DEV_MASK) {
216 		name = VMNET;
217 		unit = lminor(dev) & 0xff;
218 		tp->tap_flags |= TAP_VMNET;
219 	}
220 	else {
221 		name = TAP;
222 		unit = lminor(dev);
223 	}
224 
225 	tp->tap_dev = make_dev(&tap_ops, minor(dev), UID_ROOT, GID_WHEEL,
226 						0600, "%s%d", name, unit);
227 	tp->tap_dev->si_drv1 = dev->si_drv1 = tp;
228 	reference_dev(tp->tap_dev);	/* so we can destroy it later */
229 
230 	/* generate fake MAC address: 00 bd xx xx xx unit_no */
231 	ether_addr[0] = 0x00;
232 	ether_addr[1] = 0xbd;
233 	bcopy(&ticks, &ether_addr[2], 3);
234 	ether_addr[5] = (u_char)unit;
235 
236 	/* fill the rest and attach interface */
237 	ifp = &tp->tap_if;
238 	ifp->if_softc = tp;
239 
240 	if_initname(ifp, name, unit);
241 	if (unit > taplastunit)
242 		taplastunit = unit;
243 
244 	ifp->if_init = tapifinit;
245 	ifp->if_start = tapifstart;
246 	ifp->if_ioctl = tapifioctl;
247 	ifp->if_mtu = ETHERMTU;
248 	ifp->if_flags = TAP_IFFLAGS;
249 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
250 	ifq_set_ready(&ifp->if_snd);
251 
252 	ether_ifattach(ifp, ether_addr, NULL);
253 
254 	tp->tap_flags |= TAP_INITED;
255 	tp->tap_devq.ifq_maxlen = ifqmaxlen;
256 
257 	SLIST_INSERT_HEAD(&tap_listhead, tp, tap_link);
258 
259 	TAPDEBUG(ifp, "created. minor = %#x\n", minor(tp->tap_dev));
260 } /* tapcreate */
261 
262 /*
263  * tap_clone_create:
264  *
265  *	Create a new tap instance.
266  */
267 static int
268 tap_clone_create(struct if_clone *ifc __unused, int unit)
269 {
270 	struct tap_softc *tp = NULL;
271 	cdev_t dev;
272 
273 	dev = get_dev(CDEV_MAJOR, unit);
274 	tapcreate(dev);
275 
276 	tp = dev->si_drv1;
277 	tp->tap_flags |= TAP_CLONE;
278 	TAPDEBUG(&tp->tap_if, "clone created. minor = %#x tap_flags = 0x%x\n",
279 		 minor(tp->tap_dev), tp->tap_flags);
280 
281 	return (0);
282 }
283 
284 /*
285  * tapopen
286  *
287  * to open tunnel. must be superuser
288  */
289 static int
290 tapopen(struct dev_open_args *ap)
291 {
292 	cdev_t dev = NULL;
293 	struct tap_softc *tp = NULL;
294 	struct ifnet *ifp = NULL;
295 	int error;
296 
297 	if (tapuopen == 0 &&
298 	    (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0)) != 0)
299 		return (error);
300 
301 	get_mplock();
302 	dev = ap->a_head.a_dev;
303 	tp = dev->si_drv1;
304 	if (tp == NULL) {
305 		tapcreate(dev);
306 		tp = dev->si_drv1;
307 		ifp = &tp->arpcom.ac_if;
308 	} else {
309 		if (tp->tap_flags & TAP_OPEN) {
310 			rel_mplock();
311 			return (EBUSY);
312 		}
313 		ifp = &tp->arpcom.ac_if;
314 
315 		if ((tp->tap_flags & TAP_CLONE) == 0) {
316 			EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
317 
318 			/* Announce the return of the interface. */
319 			rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
320 		}
321 	}
322 
323 	bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
324 
325 	if (curthread->td_proc)
326 		fsetown(curthread->td_proc->p_pid, &tp->tap_sigtd);
327 	tp->tap_flags |= TAP_OPEN;
328 	taprefcnt ++;
329 
330 	if (tapuponopen && (ifp->if_flags & IFF_UP) == 0) {
331 		crit_enter();
332 		if_up(ifp);
333 		crit_exit();
334 
335 		ifnet_serialize_all(ifp);
336 		tapifflags(tp);
337 		ifnet_deserialize_all(ifp);
338 
339 		tp->tap_flags |= TAP_CLOSEDOWN;
340 	}
341 
342 	TAPDEBUG(ifp, "opened. minor = %#x, refcnt = %d, taplastunit = %d\n",
343 		 minor(tp->tap_dev), taprefcnt, taplastunit);
344 
345 	rel_mplock();
346 	return (0);
347 }
348 
349 
350 /*
351  * tapclose
352  *
353  * close the device - mark i/f down & delete routing info
354  */
355 static int
356 tapclose(struct dev_close_args *ap)
357 {
358 	cdev_t dev = ap->a_head.a_dev;
359 	struct tap_softc *tp = dev->si_drv1;
360 	struct ifnet *ifp = &tp->tap_if;
361 	int clear_flags = 0;
362 
363 	get_mplock();
364 
365 	/* Junk all pending output */
366 	ifq_purge(&ifp->if_snd);
367 
368 	/*
369 	 * Do not bring the interface down, and do not anything with
370 	 * interface, if we are in VMnet mode. just close the device.
371 	 *
372 	 * If the interface is not cloned, we always bring it down.
373 	 *
374 	 * If the interface is cloned, then we bring it down during
375 	 * closing only if it was brought up during opening.
376 	 */
377 	if ((tp->tap_flags & TAP_VMNET) == 0 &&
378 	    ((tp->tap_flags & TAP_CLONE) == 0 ||
379 	     (tp->tap_flags & TAP_CLOSEDOWN))) {
380 		if (ifp->if_flags & IFF_UP)
381 			if_down(ifp);
382 		clear_flags = 1;
383 	}
384 	ifnet_serialize_all(ifp);
385 	tapifstop(tp, clear_flags);
386 	ifnet_deserialize_all(ifp);
387 
388 	if ((tp->tap_flags & TAP_CLONE) == 0) {
389 		if_purgeaddrs_nolink(ifp);
390 
391 		EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
392 
393 		/* Announce the departure of the interface. */
394 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
395 	}
396 
397 	funsetown(tp->tap_sigio);
398 	tp->tap_sigio = NULL;
399 	selwakeup(&tp->tap_rsel);
400 
401 	tp->tap_flags &= ~TAP_OPEN;
402 	funsetown(tp->tap_sigtd);
403 	tp->tap_sigtd = NULL;
404 
405 	taprefcnt --;
406 	if (taprefcnt < 0) {
407 		taprefcnt = 0;
408 		if_printf(ifp, "minor = %#x, refcnt = %d is out of sync. "
409 			"set refcnt to 0\n", minor(tp->tap_dev), taprefcnt);
410 	}
411 
412 	TAPDEBUG(ifp, "closed. minor = %#x, refcnt = %d, taplastunit = %d\n",
413 		 minor(tp->tap_dev), taprefcnt, taplastunit);
414 
415 #ifdef foo
416 	if ((tp->tap_flags & TAP_CLONE) == 0)
417 		tapdestroy(tp);
418 #endif
419 
420 	rel_mplock();
421 	return (0);
422 }
423 
424 /*
425  * tapdestroy:
426  *
427  *	Destroy a tap instance.
428  */
429 static void
430 tapdestroy(struct tap_softc *tp)
431 {
432 	struct ifnet *ifp = &tp->arpcom.ac_if;
433 
434 	TAPDEBUG(ifp, "destroyed. minor = %#x, refcnt = %d, taplastunit = %d\n",
435 		 minor(tp->tap_dev), taprefcnt, taplastunit);
436 
437 	ifnet_serialize_all(ifp);
438 	tapifstop(tp, 1);
439 	ifnet_deserialize_all(ifp);
440 
441 	ether_ifdetach(ifp);
442 	SLIST_REMOVE(&tap_listhead, tp, tap_softc, tap_link);
443 
444 	destroy_dev(tp->tap_dev);
445 	kfree(tp, M_TAP);
446 
447 	taplastunit--;
448 }
449 
450 /*
451  * tap_clone_destroy:
452  *
453  *	Destroy a tap instance.
454  */
455 static void
456 tap_clone_destroy(struct ifnet *ifp)
457 {
458 	struct tap_softc *tp = ifp->if_softc;
459 
460 	TAPDEBUG(&tp->tap_if, "clone destroyed. minor = %#x tap_flags = 0x%x\n",
461 		 minor(tp->tap_dev), tp->tap_flags);
462 	if (tp->tap_flags & TAP_CLONE)
463 		tapdestroy(tp);
464 }
465 
466 /*
467  * tapifinit
468  *
469  * Network interface initialization function (called with if serializer held)
470  *
471  * MPSAFE
472  */
473 static void
474 tapifinit(void *xtp)
475 {
476 	struct tap_softc *tp = xtp;
477 	struct ifnet *ifp = &tp->tap_if;
478 
479 	TAPDEBUG(ifp, "initializing, minor = %#x tap_flags = 0x%x\n",
480 		 minor(tp->tap_dev), tp->tap_flags);
481 
482 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
483 
484 	tapifstop(tp, 1);
485 
486 	ifp->if_flags |= IFF_RUNNING;
487 	ifp->if_flags &= ~IFF_OACTIVE;
488 
489 	/* attempt to start output */
490 	tapifstart(ifp);
491 }
492 
493 
494 /*
495  * tapifioctl
496  *
497  * Process an ioctl request on network interface (called with if serializer
498  * held).
499  *
500  * MPSAFE
501  */
502 static int
503 tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
504 {
505 	struct tap_softc 	*tp = (struct tap_softc *)(ifp->if_softc);
506 	struct ifstat		*ifs = NULL;
507 	int			 dummy;
508 
509 	switch (cmd) {
510 		case SIOCSIFADDR:
511 		case SIOCGIFADDR:
512 		case SIOCSIFMTU:
513 			dummy = ether_ioctl(ifp, cmd, data);
514 			return (dummy);
515 
516 		case SIOCSIFFLAGS:
517 			tapifflags(tp);
518 			break;
519 
520 		case SIOCADDMULTI: /* XXX -- just like vmnet does */
521 		case SIOCDELMULTI:
522 			break;
523 
524 		case SIOCGIFSTATUS:
525 			ifs = (struct ifstat *)data;
526 			dummy = strlen(ifs->ascii);
527 			if ((tp->tap_flags & TAP_OPEN) &&
528 			    dummy < sizeof(ifs->ascii)) {
529 				if (tp->tap_sigtd && tp->tap_sigtd->sio_proc) {
530 				    ksnprintf(ifs->ascii + dummy,
531 					sizeof(ifs->ascii) - dummy,
532 					"\tOpened by pid %d\n",
533 					(int)tp->tap_sigtd->sio_proc->p_pid);
534 				} else {
535 				    ksnprintf(ifs->ascii + dummy,
536 					sizeof(ifs->ascii) - dummy,
537 					"\tOpened by <unknown>\n");
538 				}
539 			}
540 			break;
541 
542 		default:
543 			return (EINVAL);
544 	}
545 
546 	return (0);
547 }
548 
549 
550 /*
551  * tapifstart
552  *
553  * Queue packets from higher level ready to put out (called with if serializer
554  * held)
555  *
556  * MPSAFE
557  */
558 static void
559 tapifstart(struct ifnet *ifp)
560 {
561 	struct tap_softc *tp = ifp->if_softc;
562 	struct ifqueue *ifq;
563 	struct mbuf *m;
564 	int has_data = 0;
565 
566 	TAPDEBUG(ifp, "starting, minor = %#x\n", minor(tp->tap_dev));
567 
568 	/*
569 	 * do not junk pending output if we are in VMnet mode.
570 	 * XXX: can this do any harm because of queue overflow?
571 	 */
572 
573 	if (((tp->tap_flags & TAP_VMNET) == 0) &&
574 	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
575 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
576 			 minor(tp->tap_dev), tp->tap_flags);
577 		ifq_purge(&ifp->if_snd);
578 		return;
579 	}
580 
581 	ifp->if_flags |= IFF_OACTIVE;
582 
583 	ifq = &tp->tap_devq;
584 	while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
585 		if (IF_QFULL(ifq)) {
586 			IF_DROP(ifq);
587 			ifp->if_oerrors++;
588 			m_freem(m);
589 		} else {
590 			IF_ENQUEUE(ifq, m);
591 			ifp->if_opackets++;
592 			has_data = 1;
593 		}
594 	}
595 
596 	if (has_data) {
597 		if (tp->tap_flags & TAP_RWAIT) {
598 			tp->tap_flags &= ~TAP_RWAIT;
599 			wakeup((caddr_t)tp);
600 		}
601 
602 		get_mplock();
603 		KNOTE(&tp->tap_rsel.si_note, 0);
604 		rel_mplock();
605 
606 		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
607 			get_mplock();
608 			pgsigio(tp->tap_sigio, SIGIO, 0);
609 			rel_mplock();
610 		}
611 
612 		/*
613 		 * selwakeup is not MPSAFE.  tapifstart is.
614 		 */
615 		get_mplock();
616 		selwakeup(&tp->tap_rsel);
617 		rel_mplock();
618 	}
619 
620 	ifp->if_flags &= ~IFF_OACTIVE;
621 }
622 
623 
624 /*
625  * tapioctl
626  *
627  * The ops interface is now pretty minimal.  Called via fileops with nothing
628  * held.
629  *
630  * MPSAFE
631  */
632 static int
633 tapioctl(struct dev_ioctl_args *ap)
634 {
635 	cdev_t dev = ap->a_head.a_dev;
636 	caddr_t data = ap->a_data;
637 	struct tap_softc	*tp = dev->si_drv1;
638 	struct ifnet		*ifp = &tp->tap_if;
639  	struct tapinfo		*tapp = NULL;
640 	struct mbuf *mb;
641 	short f;
642 	int error;
643 
644 	ifnet_serialize_all(ifp);
645 	error = 0;
646 
647 	switch (ap->a_cmd) {
648 	case TAPSIFINFO:
649 		tapp = (struct tapinfo *)data;
650 		ifp->if_mtu = tapp->mtu;
651 		ifp->if_type = tapp->type;
652 		ifp->if_baudrate = tapp->baudrate;
653 		break;
654 
655 	case TAPGIFINFO:
656 		tapp = (struct tapinfo *)data;
657 		tapp->mtu = ifp->if_mtu;
658 		tapp->type = ifp->if_type;
659 		tapp->baudrate = ifp->if_baudrate;
660 		break;
661 
662 	case TAPSDEBUG:
663 		tapdebug = *(int *)data;
664 		break;
665 
666 	case TAPGDEBUG:
667 		*(int *)data = tapdebug;
668 		break;
669 
670 	case FIOASYNC:
671 		if (*(int *)data)
672 			tp->tap_flags |= TAP_ASYNC;
673 		else
674 			tp->tap_flags &= ~TAP_ASYNC;
675 		break;
676 
677 	case FIONREAD:
678 		*(int *)data = 0;
679 
680 		/* Take a look at devq first */
681 		IF_POLL(&tp->tap_devq, mb);
682 		if (mb == NULL)
683 			mb = ifq_poll(&ifp->if_snd);
684 
685 		if (mb != NULL) {
686 			for(; mb != NULL; mb = mb->m_next)
687 				*(int *)data += mb->m_len;
688 		}
689 		break;
690 
691 	case FIOSETOWN:
692 		error = fsetown(*(int *)data, &tp->tap_sigio);
693 		break;
694 
695 	case FIOGETOWN:
696 		*(int *)data = fgetown(tp->tap_sigio);
697 		break;
698 
699 	/* this is deprecated, FIOSETOWN should be used instead */
700 	case TIOCSPGRP:
701 		error = fsetown(-(*(int *)data), &tp->tap_sigio);
702 		break;
703 
704 	/* this is deprecated, FIOGETOWN should be used instead */
705 	case TIOCGPGRP:
706 		*(int *)data = -fgetown(tp->tap_sigio);
707 		break;
708 
709 	/* VMware/VMnet port ioctl's */
710 
711 	case SIOCGIFFLAGS:	/* get ifnet flags */
712 		bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
713 		break;
714 
715 	case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
716 		f = *(short *)data;
717 		f &= 0x0fff;
718 		f &= ~IFF_CANTCHANGE;
719 		f |= IFF_UP;
720 		ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
721 		break;
722 
723 	case OSIOCGIFADDR:	/* get MAC address of the remote side */
724 	case SIOCGIFADDR:
725 		bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
726 		break;
727 
728 	case SIOCSIFADDR:	/* set MAC address of the remote side */
729 		bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
730 		break;
731 
732 	default:
733 		error = ENOTTY;
734 		break;
735 	}
736 	ifnet_deserialize_all(ifp);
737 	return (error);
738 }
739 
740 
741 /*
742  * tapread
743  *
744  * The ops read interface - reads a packet at a time, or at
745  * least as much of a packet as can be read.
746  *
747  * Called from the fileops interface with nothing held.
748  *
749  * MPSAFE
750  */
751 static int
752 tapread(struct dev_read_args *ap)
753 {
754 	cdev_t dev = ap->a_head.a_dev;
755 	struct uio *uio = ap->a_uio;
756 	struct tap_softc	*tp = dev->si_drv1;
757 	struct ifnet		*ifp = &tp->tap_if;
758 	struct mbuf		*m0 = NULL;
759 	int			 error = 0, len;
760 
761 	TAPDEBUG(ifp, "reading, minor = %#x\n", minor(tp->tap_dev));
762 
763 	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
764 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
765 			 minor(tp->tap_dev), tp->tap_flags);
766 
767 		return (EHOSTDOWN);
768 	}
769 
770 	tp->tap_flags &= ~TAP_RWAIT;
771 
772 	/* sleep until we get a packet */
773 	do {
774 		ifnet_serialize_all(ifp);
775 		IF_DEQUEUE(&tp->tap_devq, m0);
776 		if (m0 == NULL) {
777 			if (ap->a_ioflag & IO_NDELAY) {
778 				ifnet_deserialize_all(ifp);
779 				return (EWOULDBLOCK);
780 			}
781 			tp->tap_flags |= TAP_RWAIT;
782 			crit_enter();
783 			tsleep_interlock(tp);
784 			ifnet_deserialize_all(ifp);
785 			error = tsleep(tp, PCATCH, "taprd", 0);
786 			crit_exit();
787 			if (error)
788 				return (error);
789 		} else {
790 			ifnet_deserialize_all(ifp);
791 		}
792 	} while (m0 == NULL);
793 
794 	BPF_MTAP(ifp, m0);
795 
796 	/* xfer packet to user space */
797 	while ((m0 != NULL) && (uio->uio_resid > 0) && (error == 0)) {
798 		len = min(uio->uio_resid, m0->m_len);
799 		if (len == 0)
800 			break;
801 
802 		error = uiomove(mtod(m0, caddr_t), len, uio);
803 		m0 = m_free(m0);
804 	}
805 
806 	if (m0 != NULL) {
807 		TAPDEBUG(ifp, "dropping mbuf, minor = %#x\n",
808 			 minor(tp->tap_dev));
809 		m_freem(m0);
810 	}
811 
812 	return (error);
813 }
814 
815 /*
816  * tapwrite
817  *
818  * The ops write interface - an atomic write is a packet - or else!
819  *
820  * Called from the fileops interface with nothing held.
821  *
822  * MPSAFE
823  */
824 static int
825 tapwrite(struct dev_write_args *ap)
826 {
827 	cdev_t dev = ap->a_head.a_dev;
828 	struct uio *uio = ap->a_uio;
829 	struct tap_softc	*tp = dev->si_drv1;
830 	struct ifnet		*ifp = &tp->tap_if;
831 	struct mbuf		*top = NULL, **mp = NULL, *m = NULL;
832 	int		 	 error = 0, tlen, mlen;
833 
834 	TAPDEBUG(ifp, "writing, minor = %#x\n", minor(tp->tap_dev));
835 
836 	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
837 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
838 			 minor(tp->tap_dev), tp->tap_flags);
839 		return (EHOSTDOWN);
840 	}
841 
842 	if (uio->uio_resid == 0)
843 		return (0);
844 
845 	if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
846 		TAPDEBUG(ifp, "invalid packet len = %d, minor = %#x\n",
847 			 uio->uio_resid, minor(tp->tap_dev));
848 
849 		return (EIO);
850 	}
851 	tlen = uio->uio_resid;
852 
853 	/* get a header mbuf */
854 	MGETHDR(m, MB_DONTWAIT, MT_DATA);
855 	if (m == NULL)
856 		return (ENOBUFS);
857 	mlen = MHLEN;
858 
859 	top = 0;
860 	mp = &top;
861 	while ((error == 0) && (uio->uio_resid > 0)) {
862 		m->m_len = min(mlen, uio->uio_resid);
863 		error = uiomove(mtod(m, caddr_t), m->m_len, uio);
864 		*mp = m;
865 		mp = &m->m_next;
866 		if (uio->uio_resid > 0) {
867 			MGET(m, MB_DONTWAIT, MT_DATA);
868 			if (m == NULL) {
869 				error = ENOBUFS;
870 				break;
871 			}
872 			mlen = MLEN;
873 		}
874 	}
875 	if (error) {
876 		ifp->if_ierrors ++;
877 		if (top)
878 			m_freem(top);
879 		return (error);
880 	}
881 
882 	top->m_pkthdr.len = tlen;
883 	top->m_pkthdr.rcvif = ifp;
884 
885 	/*
886 	 * Ethernet bridge and bpf are handled in ether_input
887 	 *
888 	 * adjust mbuf and give packet to the ether_input
889 	 */
890 	ifnet_serialize_all(ifp);
891 	ifp->if_input(ifp, top);
892 	ifp->if_ipackets ++; /* ibytes are counted in ether_input */
893 	ifnet_deserialize_all(ifp);
894 
895 	return (0);
896 }
897 
898 /*
899  * tappoll
900  *
901  * The poll interface, this is only useful on reads really. The write
902  * detect always returns true, write never blocks anyway, it either
903  * accepts the packet or drops it
904  *
905  * Called from the fileops interface with nothing held.
906  *
907  * MPSAFE
908  */
909 static int
910 tappoll(struct dev_poll_args *ap)
911 {
912 	cdev_t dev = ap->a_head.a_dev;
913 	struct tap_softc	*tp = dev->si_drv1;
914 	struct ifnet		*ifp = &tp->tap_if;
915 	int		 	 revents = 0;
916 
917 	TAPDEBUG(ifp, "polling, minor = %#x\n", minor(tp->tap_dev));
918 
919 	if (ap->a_events & (POLLIN | POLLRDNORM)) {
920 		if (!IF_QEMPTY(&tp->tap_devq)) {
921 			TAPDEBUG(ifp,
922 				 "has data in queue. minor = %#x\n",
923 				 minor(tp->tap_dev));
924 
925 			revents |= (ap->a_events & (POLLIN | POLLRDNORM));
926 		} else {
927 			TAPDEBUG(ifp, "waiting for data, minor = %#x\n",
928 				 minor(tp->tap_dev));
929 
930 			get_mplock();
931 			selrecord(curthread, &tp->tap_rsel);
932 			rel_mplock();
933 		}
934 	}
935 
936 	if (ap->a_events & (POLLOUT | POLLWRNORM))
937 		revents |= (ap->a_events & (POLLOUT | POLLWRNORM));
938 	ap->a_events = revents;
939 	return (0);
940 }
941 
942 /*
943  * tapkqfilter - called from the fileops interface with nothing held
944  *
945  * MPSAFE
946  */
947 static int filt_tapread(struct knote *kn, long hint);
948 static void filt_tapdetach(struct knote *kn);
949 static struct filterops tapread_filtops =
950 	{ 1, NULL, filt_tapdetach, filt_tapread };
951 
952 static int
953 tapkqfilter(struct dev_kqfilter_args *ap)
954 {
955 	cdev_t dev = ap->a_head.a_dev;
956 	struct knote *kn = ap->a_kn;
957 	struct tap_softc *tp;
958 	struct klist *list;
959 	struct ifnet *ifp;
960 
961 	get_mplock();
962 	tp = dev->si_drv1;
963 	ifp = &tp->tap_if;
964 	ap->a_result =0;
965 
966 	switch(kn->kn_filter) {
967 	case EVFILT_READ:
968 		list = &tp->tap_rsel.si_note;
969 		kn->kn_fop = &tapread_filtops;
970 		kn->kn_hook = (void *)tp;
971 		break;
972 	case EVFILT_WRITE:
973 		/* fall through */
974 	default:
975 		ap->a_result = 1;
976 		rel_mplock();
977 		return(0);
978 	}
979 	crit_enter();
980 	SLIST_INSERT_HEAD(list, kn, kn_selnext);
981 	crit_exit();
982 	rel_mplock();
983 	return(0);
984 }
985 
986 static int
987 filt_tapread(struct knote *kn, long hint)
988 {
989 	struct tap_softc *tp = (void *)kn->kn_hook;
990 
991 	if (IF_QEMPTY(&tp->tap_devq) == 0)	/* XXX serializer */
992 		return(1);
993 	else
994 		return(0);
995 }
996 
997 static void
998 filt_tapdetach(struct knote *kn)
999 {
1000 	struct tap_softc *tp = (void *)kn->kn_hook;
1001 
1002 	SLIST_REMOVE(&tp->tap_rsel.si_note, kn, knote, kn_selnext);
1003 }
1004 
1005 static void
1006 tapifstop(struct tap_softc *tp, int clear_flags)
1007 {
1008 	struct ifnet *ifp = &tp->tap_if;
1009 
1010 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1011 	IF_DRAIN(&tp->tap_devq);
1012 	tp->tap_flags &= ~TAP_CLOSEDOWN;
1013 	if (clear_flags)
1014 		ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1015 }
1016 
1017 static void
1018 tapifflags(struct tap_softc *tp)
1019 {
1020 	struct ifnet *ifp = &tp->arpcom.ac_if;
1021 
1022 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1023 	if ((tp->tap_flags & TAP_VMNET) == 0) {
1024 		/*
1025 		 * Only for non-vmnet tap(4)
1026 		 */
1027 		if (ifp->if_flags & IFF_UP) {
1028 			if ((ifp->if_flags & IFF_RUNNING) == 0)
1029 				tapifinit(tp);
1030 		} else {
1031 			tapifstop(tp, 1);
1032 		}
1033 	} else {
1034 		/* XXX */
1035 	}
1036 }
1037