xref: /dragonfly/sys/net/tap/if_tap.c (revision 65d793b5)
1 /*
2  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * BASED ON:
27  * -------------------------------------------------------------------------
28  *
29  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30  * Nottingham University 1987.
31  */
32 
33 /*
34  * $FreeBSD: src/sys/net/if_tap.c,v 1.3.2.3 2002/04/14 21:41:48 luigi Exp $
35  * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
36  */
37 
38 #include "opt_inet.h"
39 #include "use_tap.h"
40 
41 #include <sys/param.h>
42 #include <sys/conf.h>
43 #include <sys/device.h>
44 #include <sys/filedesc.h>
45 #include <sys/filio.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/proc.h>
50 #include <sys/priv.h>
51 #include <sys/signalvar.h>
52 #include <sys/socket.h>
53 #include <sys/sockio.h>
54 #include <sys/sysctl.h>
55 #include <sys/systm.h>
56 #include <sys/ttycom.h>
57 #include <sys/uio.h>
58 #include <sys/vnode.h>
59 #include <sys/serialize.h>
60 
61 #include <sys/thread2.h>
62 #include <sys/mplock2.h>
63 
64 #include <net/bpf.h>
65 #include <net/ethernet.h>
66 #include <net/if.h>
67 #include <net/ifq_var.h>
68 #include <net/if_arp.h>
69 #include <net/if_clone.h>
70 #include <net/if_media.h>
71 #include <net/route.h>
72 #include <sys/devfs.h>
73 
74 #include <netinet/in.h>
75 
76 #include "if_tapvar.h"
77 #include "if_tap.h"
78 
79 #define TAP_IFFLAGS	(IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST)
80 
81 #if NTAP <= 1
82 #define TAP_PREALLOCATED_UNITS	4
83 #else
84 #define TAP_PREALLOCATED_UNITS	NTAP
85 #endif
86 
87 #define CDEV_NAME	"tap"
88 #define TAPDEBUG	if (tapdebug) if_printf
89 
90 #define TAP		"tap"
91 #define VMNET		"vmnet"
92 #define VMNET_DEV_MASK	0x00010000
93 
94 DEVFS_DECLARE_CLONE_BITMAP(tap);
95 
96 /* module */
97 static int 		tapmodevent	(module_t, int, void *);
98 
99 /* device */
100 static struct tap_softc *tapcreate(int, cdev_t, int);
101 static void		tapdestroy(struct tap_softc *);
102 
103 /* clone */
104 static int		tap_clone_create(struct if_clone *, int, caddr_t);
105 static int		tap_clone_destroy(struct ifnet *);
106 
107 
108 /* network interface */
109 static void		tapifstart	(struct ifnet *,
110 					 struct ifaltq_subque *);
111 static int		tapifioctl	(struct ifnet *, u_long, caddr_t,
112 					 struct ucred *);
113 static void		tapifinit	(void *);
114 static void		tapifstop(struct tap_softc *, int);
115 static void		tapifflags(struct tap_softc *);
116 
117 
118 /* character device */
119 static d_open_t		tapopen;
120 static d_clone_t	tapclone;
121 static d_close_t	tapclose;
122 static d_read_t		tapread;
123 static d_write_t	tapwrite;
124 static d_ioctl_t	tapioctl;
125 static d_kqfilter_t	tapkqfilter;
126 
127 static struct dev_ops	tap_ops = {
128 	{ CDEV_NAME, 0, 0 },
129 	.d_open =	tapopen,
130 	.d_close =	tapclose,
131 	.d_read =	tapread,
132 	.d_write =	tapwrite,
133 	.d_ioctl =	tapioctl,
134 	.d_kqfilter =	tapkqfilter
135 };
136 
137 static int		taprefcnt = 0;		/* module ref. counter   */
138 static int		taplastunit = -1;	/* max. open unit number */
139 static int		tapdebug = 0;		/* debug flag            */
140 static int		tapuopen = 0;		/* all user open()       */
141 static int		tapuponopen = 0;	/* IFF_UP       */
142 
143 MALLOC_DECLARE(M_TAP);
144 MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
145 struct if_clone tap_cloner = IF_CLONE_INITIALIZER("tap",
146 			     tap_clone_create, tap_clone_destroy,
147 			     0, IF_MAXUNIT);
148 static SLIST_HEAD(,tap_softc) tap_listhead =
149 	SLIST_HEAD_INITIALIZER(&tap_listhead);
150 
151 SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
152 SYSCTL_DECL(_net_link);
153 SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
154 	    "Ethernet tunnel software network interface");
155 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
156 	   "Allow user to open /dev/tap (based on node permissions)");
157 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
158 	   "Bring interface up when /dev/tap is opened");
159 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
160 
161 DEV_MODULE(if_tap, tapmodevent, NULL);
162 
163 /*
164  * tapmodevent
165  *
166  * module event handler
167  */
168 static int
169 tapmodevent(module_t mod, int type, void *data)
170 {
171 	static int attached = 0;
172 	struct tap_softc *tp, *ntp;
173 	int i;
174 
175 	switch (type) {
176 	case MOD_LOAD:
177 		if (attached)
178 			return (EEXIST);
179 
180 		make_autoclone_dev(&tap_ops, &DEVFS_CLONE_BITMAP(tap), tapclone,
181 				   UID_ROOT, GID_WHEEL, 0600, "tap");
182 		SLIST_INIT(&tap_listhead);
183 		if_clone_attach(&tap_cloner);
184 
185 		for (i = 0; i < TAP_PREALLOCATED_UNITS; ++i) {
186 			make_dev(&tap_ops, i, UID_ROOT, GID_WHEEL,
187 				 0600, "tap%d", i);
188 			devfs_clone_bitmap_set(&DEVFS_CLONE_BITMAP(tap), i);
189 		}
190 
191 		attached = 1;
192 		break;
193 
194 	case MOD_UNLOAD:
195 		if (taprefcnt > 0)
196 			return (EBUSY);
197 
198 		if_clone_detach(&tap_cloner);
199 
200 		/* Maintain tap ifs in a local list */
201 		SLIST_FOREACH_MUTABLE(tp, &tap_listhead, tap_link, ntp)
202 			tapdestroy(tp);
203 
204 		attached = 0;
205 
206 		devfs_clone_handler_del("tap");
207 		dev_ops_remove_all(&tap_ops);
208 		devfs_clone_bitmap_uninit(&DEVFS_CLONE_BITMAP(tap));
209 		break;
210 
211 	default:
212 		return (EOPNOTSUPP);
213 	}
214 
215 	return (0);
216 } /* tapmodevent */
217 
218 
219 /*
220  * tapcreate - create or clone an interface
221  */
222 static struct tap_softc *
223 tapcreate(int unit, cdev_t dev, int flags)
224 {
225 	const char	*name = TAP;
226 	struct ifnet	*ifp;
227 	struct tap_softc *tp;
228 	uint8_t		ether_addr[ETHER_ADDR_LEN];
229 
230 	tp = kmalloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
231 	dev->si_drv1 = tp;
232 	tp->tap_dev = dev;
233 	tp->tap_unit = unit;
234 	tp->tap_flags |= flags;
235 
236 	reference_dev(dev);	/* tp association */
237 
238 	/* generate fake MAC address: 00 bd xx xx xx unit_no */
239 	ether_addr[0] = 0x00;
240 	ether_addr[1] = 0xbd;
241 	bcopy(&ticks, &ether_addr[2], 3);
242 	ether_addr[5] = (u_char)unit;
243 
244 	/* fill the rest and attach interface */
245 	ifp = &tp->tap_if;
246 	ifp->if_softc = tp;
247 
248 	if_initname(ifp, name, unit);
249 	if (unit > taplastunit)
250 		taplastunit = unit;
251 
252 	ifp->if_init = tapifinit;
253 	ifp->if_start = tapifstart;
254 	ifp->if_ioctl = tapifioctl;
255 	ifp->if_mtu = ETHERMTU;
256 	ifp->if_flags = TAP_IFFLAGS;
257 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
258 	ifq_set_ready(&ifp->if_snd);
259 
260 	ether_ifattach(ifp, ether_addr, NULL);
261 
262 	tp->tap_flags |= TAP_INITED;
263 	tp->tap_devq.ifq_maxlen = ifqmaxlen;
264 
265 	SLIST_INSERT_HEAD(&tap_listhead, tp, tap_link);
266 
267 	TAPDEBUG(ifp, "created. minor = %#x\n", minor(dev));
268 	return (tp);
269 }
270 
271 static
272 struct tap_softc *
273 tapfind(int unit)
274 {
275 	struct tap_softc *tp;
276 
277 	SLIST_FOREACH(tp, &tap_listhead, tap_link) {
278 		if (tp->tap_unit == unit)
279 			return(tp);
280 	}
281 	return (NULL);
282 }
283 
284 /*
285  * tap_clone_create:
286  *
287  * Create a new tap instance via ifconfig.
288  */
289 static int
290 tap_clone_create(struct if_clone *ifc __unused, int unit,
291     caddr_t param __unused)
292 {
293 	struct tap_softc *tp;
294 	cdev_t dev;
295 
296 	tp = tapfind(unit);
297 	if (tp == NULL) {
298 		if (!devfs_clone_bitmap_chk(&DEVFS_CLONE_BITMAP(tap), unit)) {
299 			devfs_clone_bitmap_set(&DEVFS_CLONE_BITMAP(tap), unit);
300 			dev = make_dev(&tap_ops, unit, UID_ROOT, GID_WHEEL,
301 					   0600, "%s%d", TAP, unit);
302 		} else {
303 			dev = devfs_find_device_by_name("%s%d", TAP, unit);
304 		}
305 
306 		KKASSERT(dev != NULL);
307 		tp = tapcreate(unit, dev, TAP_MANUALMAKE);
308 	}
309 	tp->tap_flags |= TAP_CLONE;
310 	TAPDEBUG(&tp->tap_if, "clone created. minor = %#x tap_flags = 0x%x\n",
311 		 minor(tp->tap_dev), tp->tap_flags);
312 
313 	return (0);
314 }
315 
316 /*
317  * tapopen
318  *
319  * to open tunnel. must be superuser
320  */
321 static int
322 tapopen(struct dev_open_args *ap)
323 {
324 	cdev_t dev = NULL;
325 	struct tap_softc *tp = NULL;
326 	struct ifnet *ifp = NULL;
327 	int error;
328 
329 	if (tapuopen == 0 &&
330 	    (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0)) != 0)
331 		return (error);
332 
333 	get_mplock();
334 	dev = ap->a_head.a_dev;
335 	tp = dev->si_drv1;
336 	if (tp == NULL)
337 		tp = tapcreate(minor(dev), dev, TAP_MANUALMAKE);
338 	if (tp->tap_flags & TAP_OPEN) {
339 		rel_mplock();
340 		return (EBUSY);
341 	}
342 	ifp = &tp->arpcom.ac_if;
343 
344 	if ((tp->tap_flags & TAP_CLONE) == 0) {
345 		EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
346 
347 		/* Announce the return of the interface. */
348 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
349 	}
350 
351 	bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
352 
353 	if (curthread->td_proc)
354 		fsetown(curthread->td_proc->p_pid, &tp->tap_sigtd);
355 	tp->tap_flags |= TAP_OPEN;
356 	taprefcnt ++;
357 
358 	if (tapuponopen && (ifp->if_flags & IFF_UP) == 0) {
359 		crit_enter();
360 		if_up(ifp);
361 		crit_exit();
362 
363 		ifnet_serialize_all(ifp);
364 		tapifflags(tp);
365 		ifnet_deserialize_all(ifp);
366 
367 		tp->tap_flags |= TAP_CLOSEDOWN;
368 	}
369 
370 	TAPDEBUG(ifp, "opened. minor = %#x, refcnt = %d, taplastunit = %d\n",
371 		 minor(tp->tap_dev), taprefcnt, taplastunit);
372 
373 	rel_mplock();
374 	return (0);
375 }
376 
377 static int
378 tapclone(struct dev_clone_args *ap)
379 {
380 	int unit;
381 
382 	unit = devfs_clone_bitmap_get(&DEVFS_CLONE_BITMAP(tap), 0);
383 	ap->a_dev = make_only_dev(&tap_ops, unit, UID_ROOT, GID_WHEEL,
384 				  0600, "%s%d", TAP, unit);
385 	tapcreate(unit, ap->a_dev, 0);
386 	return (0);
387 }
388 
389 /*
390  * tapclose
391  *
392  * close the device - mark i/f down & delete routing info
393  */
394 static int
395 tapclose(struct dev_close_args *ap)
396 {
397 	cdev_t dev = ap->a_head.a_dev;
398 	struct tap_softc *tp = dev->si_drv1;
399 	struct ifnet *ifp = &tp->tap_if;
400 	int clear_flags = 0;
401 
402 	get_mplock();
403 
404 	/* Junk all pending output */
405 	ifq_purge_all(&ifp->if_snd);
406 
407 	/*
408 	 * Do not bring the interface down, and do not anything with
409 	 * interface, if we are in VMnet mode. just close the device.
410 	 *
411 	 * If the interface is not cloned, we always bring it down.
412 	 *
413 	 * If the interface is cloned, then we bring it down during
414 	 * closing only if it was brought up during opening.
415 	 */
416 	if ((tp->tap_flags & TAP_VMNET) == 0 &&
417 	    ((tp->tap_flags & TAP_CLONE) == 0 ||
418 	     (tp->tap_flags & TAP_CLOSEDOWN))) {
419 		if (ifp->if_flags & IFF_UP)
420 			if_down(ifp);
421 		clear_flags = 1;
422 	}
423 	ifnet_serialize_all(ifp);
424 	tapifstop(tp, clear_flags);
425 	ifnet_deserialize_all(ifp);
426 
427 	if ((tp->tap_flags & TAP_CLONE) == 0) {
428 		if_purgeaddrs_nolink(ifp);
429 
430 		EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
431 
432 		/* Announce the departure of the interface. */
433 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
434 	}
435 
436 	funsetown(&tp->tap_sigio);
437 	tp->tap_sigio = NULL;
438 	KNOTE(&tp->tap_rkq.ki_note, 0);
439 
440 	tp->tap_flags &= ~TAP_OPEN;
441 	funsetown(&tp->tap_sigtd);
442 	tp->tap_sigtd = NULL;
443 
444 	taprefcnt --;
445 	if (taprefcnt < 0) {
446 		taprefcnt = 0;
447 		if_printf(ifp, "minor = %#x, refcnt = %d is out of sync. "
448 			"set refcnt to 0\n", minor(tp->tap_dev), taprefcnt);
449 	}
450 
451 	TAPDEBUG(ifp, "closed. minor = %#x, refcnt = %d, taplastunit = %d\n",
452 		 minor(tp->tap_dev), taprefcnt, taplastunit);
453 
454 	/*
455 	 * Only auto-destroy if the interface was not manually
456 	 * created.
457 	 */
458 	if ((tp->tap_flags & TAP_MANUALMAKE) == 0 &&
459 	    tp->tap_unit >= TAP_PREALLOCATED_UNITS) {
460 		tapdestroy(tp);
461 	}
462 
463 	rel_mplock();
464 	return (0);
465 }
466 
467 /*
468  * tapdestroy:
469  *
470  *	Destroy a tap instance.
471  */
472 static void
473 tapdestroy(struct tap_softc *tp)
474 {
475 	struct ifnet *ifp = &tp->arpcom.ac_if;
476 	cdev_t dev;
477 
478 	TAPDEBUG(ifp, "destroyed. minor = %#x, refcnt = %d, taplastunit = %d\n",
479 		 minor(tp->tap_dev), taprefcnt, taplastunit);
480 
481 	ifnet_serialize_all(ifp);
482 	tapifstop(tp, 1);
483 	ifnet_deserialize_all(ifp);
484 
485 	ether_ifdetach(ifp);
486 	SLIST_REMOVE(&tap_listhead, tp, tap_softc, tap_link);
487 
488 	dev = tp->tap_dev;
489 	tp->tap_dev = NULL;
490 	dev->si_drv1 = NULL;
491 
492 	release_dev(dev);	/* tp association */
493 
494 	/*
495 	 * Also destroy the cloned device
496 	 */
497 	if (tp->tap_unit >= TAP_PREALLOCATED_UNITS) {
498 		destroy_dev(dev);
499 		devfs_clone_bitmap_put(&DEVFS_CLONE_BITMAP(tap), tp->tap_unit);
500 	}
501 
502 	kfree(tp, M_TAP);
503 
504 	taplastunit--;
505 }
506 
507 /*
508  * tap_clone_destroy:
509  *
510  *	Destroy a tap instance.
511  */
512 static int
513 tap_clone_destroy(struct ifnet *ifp)
514 {
515 	struct tap_softc *tp = ifp->if_softc;
516 
517 	if ((tp->tap_flags & TAP_CLONE) == 0)
518 		return ENXIO;
519 
520 	TAPDEBUG(&tp->tap_if, "clone destroyed. minor = %#x tap_flags = 0x%x\n",
521 		 minor(tp->tap_dev), tp->tap_flags);
522 	tapdestroy(tp);
523 
524 	return 0;
525 }
526 
527 /*
528  * tapifinit
529  *
530  * Network interface initialization function (called with if serializer held)
531  *
532  * MPSAFE
533  */
534 static void
535 tapifinit(void *xtp)
536 {
537 	struct tap_softc *tp = xtp;
538 	struct ifnet *ifp = &tp->tap_if;
539 	struct ifaltq_subque *ifsq = ifq_get_subq_default(&ifp->if_snd);
540 
541 	TAPDEBUG(ifp, "initializing, minor = %#x tap_flags = 0x%x\n",
542 		 minor(tp->tap_dev), tp->tap_flags);
543 
544 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
545 
546 	tapifstop(tp, 1);
547 
548 	ifp->if_flags |= IFF_RUNNING;
549 	ifsq_clr_oactive(ifsq);
550 
551 	/* attempt to start output */
552 	tapifstart(ifp, ifsq);
553 }
554 
555 
556 /*
557  * tapifioctl
558  *
559  * Process an ioctl request on network interface (called with if serializer
560  * held).
561  *
562  * MPSAFE
563  */
564 static int
565 tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
566 {
567 	struct tap_softc 	*tp = (struct tap_softc *)(ifp->if_softc);
568 	struct ifstat		*ifs = NULL;
569 	struct ifmediareq	*ifmr = NULL;
570 	int			error = 0;
571 	int			dummy;
572 
573 	switch (cmd) {
574 		case SIOCSIFADDR:
575 		case SIOCGIFADDR:
576 		case SIOCSIFMTU:
577 			error = ether_ioctl(ifp, cmd, data);
578 			break;
579 
580 		case SIOCSIFFLAGS:
581 			tapifflags(tp);
582 			break;
583 
584 		case SIOCADDMULTI: /* XXX -- just like vmnet does */
585 		case SIOCDELMULTI:
586 			break;
587 
588 		case SIOCGIFMEDIA:
589 			/*
590 			 * The bridge code needs this when running the
591 			 * spanning tree protocol.
592 			 */
593 			ifmr = (struct ifmediareq *)data;
594 			dummy = ifmr->ifm_count;
595 			ifmr->ifm_count = 1;
596 			ifmr->ifm_status = IFM_AVALID;
597 			ifmr->ifm_active = IFM_ETHER;
598 			if (tp->tap_flags & TAP_OPEN)
599 				ifmr->ifm_status |= IFM_ACTIVE;
600 			ifmr->ifm_current = ifmr->ifm_active;
601 			if (dummy >= 1) {
602 				int media = IFM_ETHER;
603 				error = copyout(&media,
604 						ifmr->ifm_ulist,
605 						sizeof(int));
606 			}
607 			break;
608 
609 		case SIOCGIFSTATUS:
610 			ifs = (struct ifstat *)data;
611 			dummy = strlen(ifs->ascii);
612 			if ((tp->tap_flags & TAP_OPEN) &&
613 			    dummy < sizeof(ifs->ascii)) {
614 				if (tp->tap_sigtd && tp->tap_sigtd->sio_proc) {
615 				    ksnprintf(ifs->ascii + dummy,
616 					sizeof(ifs->ascii) - dummy,
617 					"\tOpened by pid %d\n",
618 					(int)tp->tap_sigtd->sio_proc->p_pid);
619 				} else {
620 				    ksnprintf(ifs->ascii + dummy,
621 					sizeof(ifs->ascii) - dummy,
622 					"\tOpened by <unknown>\n");
623 				}
624 			}
625 			break;
626 
627 		default:
628 			error = EINVAL;
629 			break;
630 	}
631 
632 	return (error);
633 }
634 
635 
636 /*
637  * tapifstart
638  *
639  * Queue packets from higher level ready to put out (called with if serializer
640  * held)
641  *
642  * MPSAFE
643  */
644 static void
645 tapifstart(struct ifnet *ifp, struct ifaltq_subque *ifsq)
646 {
647 	struct tap_softc *tp = ifp->if_softc;
648 	struct ifqueue *ifq;
649 	struct mbuf *m;
650 	int has_data = 0;
651 
652 	ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
653 	TAPDEBUG(ifp, "starting, minor = %#x\n", minor(tp->tap_dev));
654 
655 	/*
656 	 * do not junk pending output if we are in VMnet mode.
657 	 * XXX: can this do any harm because of queue overflow?
658 	 */
659 
660 	if (((tp->tap_flags & TAP_VMNET) == 0) &&
661 	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
662 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
663 			 minor(tp->tap_dev), tp->tap_flags);
664 		ifsq_purge(ifsq);
665 		return;
666 	}
667 
668 	ifsq_set_oactive(ifsq);
669 
670 	ifq = &tp->tap_devq;
671 	while ((m = ifsq_dequeue(ifsq, NULL)) != NULL) {
672 		if (IF_QFULL(ifq)) {
673 			IF_DROP(ifq);
674 			IFNET_STAT_INC(ifp, oerrors, 1);
675 			m_freem(m);
676 		} else {
677 			IF_ENQUEUE(ifq, m);
678 			IFNET_STAT_INC(ifp, opackets, 1);
679 			has_data = 1;
680 		}
681 	}
682 
683 	if (has_data) {
684 		if (tp->tap_flags & TAP_RWAIT) {
685 			tp->tap_flags &= ~TAP_RWAIT;
686 			wakeup((caddr_t)tp);
687 		}
688 
689 		KNOTE(&tp->tap_rkq.ki_note, 0);
690 
691 		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
692 			get_mplock();
693 			pgsigio(tp->tap_sigio, SIGIO, 0);
694 			rel_mplock();
695 		}
696 	}
697 
698 	ifsq_clr_oactive(ifsq);
699 }
700 
701 
702 /*
703  * tapioctl
704  *
705  * The ops interface is now pretty minimal.  Called via fileops with nothing
706  * held.
707  *
708  * MPSAFE
709  */
710 static int
711 tapioctl(struct dev_ioctl_args *ap)
712 {
713 	cdev_t dev = ap->a_head.a_dev;
714 	caddr_t data = ap->a_data;
715 	struct tap_softc	*tp = dev->si_drv1;
716 	struct ifnet		*ifp = &tp->tap_if;
717  	struct tapinfo		*tapp = NULL;
718 	struct mbuf *mb;
719 	short f;
720 	int error;
721 
722 	ifnet_serialize_all(ifp);
723 	error = 0;
724 
725 	switch (ap->a_cmd) {
726 	case TAPSIFINFO:
727 		tapp = (struct tapinfo *)data;
728 		ifp->if_mtu = tapp->mtu;
729 		ifp->if_type = tapp->type;
730 		ifp->if_baudrate = tapp->baudrate;
731 		break;
732 
733 	case TAPGIFINFO:
734 		tapp = (struct tapinfo *)data;
735 		tapp->mtu = ifp->if_mtu;
736 		tapp->type = ifp->if_type;
737 		tapp->baudrate = ifp->if_baudrate;
738 		break;
739 
740 	case TAPSDEBUG:
741 		tapdebug = *(int *)data;
742 		break;
743 
744 	case TAPGDEBUG:
745 		*(int *)data = tapdebug;
746 		break;
747 
748 	case FIOASYNC:
749 		if (*(int *)data)
750 			tp->tap_flags |= TAP_ASYNC;
751 		else
752 			tp->tap_flags &= ~TAP_ASYNC;
753 		break;
754 
755 	case FIONREAD:
756 		*(int *)data = 0;
757 
758 		/* Take a look at devq first */
759 		IF_POLL(&tp->tap_devq, mb);
760 		if (mb == NULL)
761 			mb = ifsq_poll(ifq_get_subq_default(&ifp->if_snd));
762 
763 		if (mb != NULL) {
764 			for(; mb != NULL; mb = mb->m_next)
765 				*(int *)data += mb->m_len;
766 		}
767 		break;
768 
769 	case FIOSETOWN:
770 		error = fsetown(*(int *)data, &tp->tap_sigio);
771 		break;
772 
773 	case FIOGETOWN:
774 		*(int *)data = fgetown(&tp->tap_sigio);
775 		break;
776 
777 	/* this is deprecated, FIOSETOWN should be used instead */
778 	case TIOCSPGRP:
779 		error = fsetown(-(*(int *)data), &tp->tap_sigio);
780 		break;
781 
782 	/* this is deprecated, FIOGETOWN should be used instead */
783 	case TIOCGPGRP:
784 		*(int *)data = -fgetown(&tp->tap_sigio);
785 		break;
786 
787 	/* VMware/VMnet port ioctl's */
788 
789 	case SIOCGIFFLAGS:	/* get ifnet flags */
790 		bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
791 		break;
792 
793 	case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
794 		f = *(short *)data;
795 		f &= 0x0fff;
796 		f &= ~IFF_CANTCHANGE;
797 		f |= IFF_UP;
798 		ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
799 		break;
800 
801 	case OSIOCGIFADDR:	/* get MAC address of the remote side */
802 	case SIOCGIFADDR:
803 		bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
804 		break;
805 
806 	case SIOCSIFADDR:	/* set MAC address of the remote side */
807 		bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
808 		break;
809 
810 	default:
811 		error = ENOTTY;
812 		break;
813 	}
814 	ifnet_deserialize_all(ifp);
815 	return (error);
816 }
817 
818 
819 /*
820  * tapread
821  *
822  * The ops read interface - reads a packet at a time, or at
823  * least as much of a packet as can be read.
824  *
825  * Called from the fileops interface with nothing held.
826  *
827  * MPSAFE
828  */
829 static int
830 tapread(struct dev_read_args *ap)
831 {
832 	cdev_t dev = ap->a_head.a_dev;
833 	struct uio *uio = ap->a_uio;
834 	struct tap_softc	*tp = dev->si_drv1;
835 	struct ifnet		*ifp = &tp->tap_if;
836 	struct mbuf		*m0 = NULL;
837 	int			 error = 0, len;
838 
839 	TAPDEBUG(ifp, "reading, minor = %#x\n", minor(tp->tap_dev));
840 
841 	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
842 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
843 			 minor(tp->tap_dev), tp->tap_flags);
844 
845 		return (EHOSTDOWN);
846 	}
847 
848 	tp->tap_flags &= ~TAP_RWAIT;
849 
850 	/* sleep until we get a packet */
851 	do {
852 		ifnet_serialize_all(ifp);
853 		IF_DEQUEUE(&tp->tap_devq, m0);
854 		if (m0 == NULL) {
855 			if (ap->a_ioflag & IO_NDELAY) {
856 				ifnet_deserialize_all(ifp);
857 				return (EWOULDBLOCK);
858 			}
859 			tp->tap_flags |= TAP_RWAIT;
860 			tsleep_interlock(tp, PCATCH);
861 			ifnet_deserialize_all(ifp);
862 			error = tsleep(tp, PCATCH | PINTERLOCKED, "taprd", 0);
863 			if (error)
864 				return (error);
865 		} else {
866 			ifnet_deserialize_all(ifp);
867 		}
868 	} while (m0 == NULL);
869 
870 	BPF_MTAP(ifp, m0);
871 
872 	/* xfer packet to user space */
873 	while ((m0 != NULL) && (uio->uio_resid > 0) && (error == 0)) {
874 		len = (int)szmin(uio->uio_resid, m0->m_len);
875 		if (len == 0)
876 			break;
877 
878 		error = uiomove(mtod(m0, caddr_t), (size_t)len, uio);
879 		m0 = m_free(m0);
880 	}
881 
882 	if (m0 != NULL) {
883 		TAPDEBUG(ifp, "dropping mbuf, minor = %#x\n",
884 			 minor(tp->tap_dev));
885 		m_freem(m0);
886 	}
887 
888 	return (error);
889 }
890 
891 /*
892  * tapwrite
893  *
894  * The ops write interface - an atomic write is a packet - or else!
895  *
896  * Called from the fileops interface with nothing held.
897  *
898  * MPSAFE
899  */
900 static int
901 tapwrite(struct dev_write_args *ap)
902 {
903 	cdev_t dev = ap->a_head.a_dev;
904 	struct uio *uio = ap->a_uio;
905 	struct tap_softc	*tp = dev->si_drv1;
906 	struct ifnet		*ifp = &tp->tap_if;
907 	struct mbuf		*top = NULL, **mp = NULL, *m = NULL;
908 	int			error;
909 	size_t			tlen, mlen;
910 
911 	TAPDEBUG(ifp, "writing, minor = %#x\n", minor(tp->tap_dev));
912 
913 	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
914 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
915 			 minor(tp->tap_dev), tp->tap_flags);
916 		return (EHOSTDOWN);
917 	}
918 
919 	if (uio->uio_resid == 0)
920 		return (0);
921 
922 	if (uio->uio_resid > TAPMRU) {
923 		TAPDEBUG(ifp, "invalid packet len = %zu, minor = %#x\n",
924 			 uio->uio_resid, minor(tp->tap_dev));
925 
926 		return (EIO);
927 	}
928 	tlen = uio->uio_resid;
929 
930 	/* get a header mbuf */
931 	MGETHDR(m, MB_WAIT, MT_DATA);
932 	if (m == NULL)
933 		return (ENOBUFS);
934 	mlen = MHLEN;
935 
936 	top = NULL;
937 	mp = &top;
938 	error = 0;
939 
940 	while (error == 0 && uio->uio_resid > 0) {
941 		m->m_len = (int)szmin(mlen, uio->uio_resid);
942 		error = uiomove(mtod(m, caddr_t), (size_t)m->m_len, uio);
943 		*mp = m;
944 		mp = &m->m_next;
945 		if (uio->uio_resid > 0) {
946 			MGET(m, MB_WAIT, MT_DATA);
947 			if (m == NULL) {
948 				error = ENOBUFS;
949 				break;
950 			}
951 			mlen = MLEN;
952 		}
953 	}
954 	if (error) {
955 		IFNET_STAT_INC(ifp, ierrors, 1);
956 		if (top)
957 			m_freem(top);
958 		return (error);
959 	}
960 
961 	top->m_pkthdr.len = (int)tlen;
962 	top->m_pkthdr.rcvif = ifp;
963 
964 	/*
965 	 * Ethernet bridge and bpf are handled in ether_input
966 	 *
967 	 * adjust mbuf and give packet to the ether_input
968 	 */
969 	ifnet_serialize_all(ifp);
970 	ifp->if_input(ifp, top);
971 	IFNET_STAT_INC(ifp, ipackets, 1);/* ibytes are counted in ether_input */
972 	ifnet_deserialize_all(ifp);
973 
974 	return (0);
975 }
976 
977 /*
978  * tapkqfilter - called from the fileops interface with nothing held
979  *
980  * MPSAFE
981  */
982 static int filt_tapread(struct knote *kn, long hint);
983 static int filt_tapwrite(struct knote *kn, long hint);
984 static void filt_tapdetach(struct knote *kn);
985 static struct filterops tapread_filtops =
986 	{ FILTEROP_ISFD, NULL, filt_tapdetach, filt_tapread };
987 static struct filterops tapwrite_filtops =
988 	{ FILTEROP_ISFD, NULL, filt_tapdetach, filt_tapwrite };
989 
990 static int
991 tapkqfilter(struct dev_kqfilter_args *ap)
992 {
993 	cdev_t dev = ap->a_head.a_dev;
994 	struct knote *kn = ap->a_kn;
995 	struct tap_softc *tp;
996 	struct klist *list;
997 
998 	tp = dev->si_drv1;
999 	list = &tp->tap_rkq.ki_note;
1000 	ap->a_result =0;
1001 
1002 	switch(kn->kn_filter) {
1003 	case EVFILT_READ:
1004 		kn->kn_fop = &tapread_filtops;
1005 		kn->kn_hook = (void *)tp;
1006 		break;
1007 	case EVFILT_WRITE:
1008 		kn->kn_fop = &tapwrite_filtops;
1009 		kn->kn_hook = (void *)tp;
1010 		break;
1011 	default:
1012 		ap->a_result = EOPNOTSUPP;
1013 		return(0);
1014 	}
1015 
1016 	knote_insert(list, kn);
1017 	return(0);
1018 }
1019 
1020 static int
1021 filt_tapread(struct knote *kn, long hint)
1022 {
1023 	struct tap_softc *tp = (void *)kn->kn_hook;
1024 
1025 	if (IF_QEMPTY(&tp->tap_devq) == 0)	/* XXX serializer */
1026 		return(1);
1027 	else
1028 		return(0);
1029 }
1030 
1031 static int
1032 filt_tapwrite(struct knote *kn, long hint)
1033 {
1034 	/* Always ready for a write */
1035 	return (1);
1036 }
1037 
1038 static void
1039 filt_tapdetach(struct knote *kn)
1040 {
1041 	struct tap_softc *tp = (void *)kn->kn_hook;
1042 
1043 	knote_remove(&tp->tap_rkq.ki_note, kn);
1044 }
1045 
1046 static void
1047 tapifstop(struct tap_softc *tp, int clear_flags)
1048 {
1049 	struct ifnet *ifp = &tp->tap_if;
1050 
1051 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1052 	IF_DRAIN(&tp->tap_devq);
1053 	tp->tap_flags &= ~TAP_CLOSEDOWN;
1054 	if (clear_flags) {
1055 		ifp->if_flags &= ~IFF_RUNNING;
1056 		ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd));
1057 	}
1058 }
1059 
1060 static void
1061 tapifflags(struct tap_softc *tp)
1062 {
1063 	struct ifnet *ifp = &tp->arpcom.ac_if;
1064 
1065 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1066 	if ((tp->tap_flags & TAP_VMNET) == 0) {
1067 		/*
1068 		 * Only for non-vmnet tap(4)
1069 		 */
1070 		if (ifp->if_flags & IFF_UP) {
1071 			if ((ifp->if_flags & IFF_RUNNING) == 0)
1072 				tapifinit(tp);
1073 		} else {
1074 			tapifstop(tp, 1);
1075 		}
1076 	} else {
1077 		/* XXX */
1078 	}
1079 }
1080