xref: /dragonfly/sys/net/tap/if_tap.c (revision dca3c15d)
1 /*
2  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * BASED ON:
27  * -------------------------------------------------------------------------
28  *
29  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30  * Nottingham University 1987.
31  */
32 
33 /*
34  * $FreeBSD: src/sys/net/if_tap.c,v 1.3.2.3 2002/04/14 21:41:48 luigi Exp $
35  * $DragonFly: src/sys/net/tap/if_tap.c,v 1.41 2008/09/05 17:03:15 dillon Exp $
36  * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
37  */
38 
39 #include "opt_inet.h"
40 
41 #include <sys/param.h>
42 #include <sys/conf.h>
43 #include <sys/device.h>
44 #include <sys/filedesc.h>
45 #include <sys/filio.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/poll.h>
50 #include <sys/proc.h>
51 #include <sys/priv.h>
52 #include <sys/signalvar.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/systm.h>
57 #include <sys/thread2.h>
58 #include <sys/ttycom.h>
59 #include <sys/uio.h>
60 #include <sys/vnode.h>
61 #include <sys/serialize.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/ifq_var.h>
67 #include <net/if_arp.h>
68 #include <net/if_clone.h>
69 #include <net/route.h>
70 #include <sys/devfs.h>
71 
72 #include <netinet/in.h>
73 
74 #include "if_tapvar.h"
75 #include "if_tap.h"
76 
77 #define TAP_IFFLAGS	(IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST)
78 
79 #define TAP_PREALLOCATED_UNITS	4
80 
81 #define CDEV_NAME	"tap"
82 #define CDEV_MAJOR	149
83 #define TAPDEBUG	if (tapdebug) if_printf
84 
85 #define TAP		"tap"
86 #define VMNET		"vmnet"
87 #define VMNET_DEV_MASK	0x00010000
88 
89 DEVFS_DECLARE_CLONE_BITMAP(tap);
90 
91 /* module */
92 static int 		tapmodevent	(module_t, int, void *);
93 
94 /* device */
95 static struct tap_softc *tapcreate(int, cdev_t);
96 static void		tapdestroy(struct tap_softc *);
97 
98 /* clone */
99 static int		tap_clone_create(struct if_clone *, int);
100 static void		tap_clone_destroy(struct ifnet *);
101 
102 
103 /* network interface */
104 static void		tapifstart	(struct ifnet *);
105 static int		tapifioctl	(struct ifnet *, u_long, caddr_t,
106 					 struct ucred *);
107 static void		tapifinit	(void *);
108 static void		tapifstop(struct tap_softc *, int);
109 static void		tapifflags(struct tap_softc *);
110 
111 /* character device */
112 static d_open_t		tapopen;
113 static d_clone_t	tapclone;
114 static d_close_t	tapclose;
115 static d_read_t		tapread;
116 static d_write_t	tapwrite;
117 static d_ioctl_t	tapioctl;
118 static d_poll_t		tappoll;
119 static d_kqfilter_t	tapkqfilter;
120 
121 static struct dev_ops	tap_ops = {
122 	{ CDEV_NAME, CDEV_MAJOR, 0 },
123 	.d_open =	tapopen,
124 	.d_close =	tapclose,
125 	.d_read =	tapread,
126 	.d_write =	tapwrite,
127 	.d_ioctl =	tapioctl,
128 	.d_poll =	tappoll,
129 	.d_kqfilter =	tapkqfilter
130 };
131 
132 static int		taprefcnt = 0;		/* module ref. counter   */
133 static int		taplastunit = -1;	/* max. open unit number */
134 static int		tapdebug = 0;		/* debug flag            */
135 static int		tapuopen = 0;		/* all user open()       */
136 static int		tapuponopen = 0;	/* IFF_UP       */
137 
138 MALLOC_DECLARE(M_TAP);
139 MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
140 struct if_clone tap_cloner = IF_CLONE_INITIALIZER("tap",
141 			     tap_clone_create, tap_clone_destroy,
142 			     0, IF_MAXUNIT);
143 static SLIST_HEAD(,tap_softc) tap_listhead =
144 	SLIST_HEAD_INITIALIZER(&tap_listhead);
145 
146 SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
147 SYSCTL_DECL(_net_link);
148 SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
149 	    "Ethernet tunnel software network interface");
150 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
151 	   "Allow user to open /dev/tap (based on node permissions)");
152 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
153 	   "Bring interface up when /dev/tap is opened");
154 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
155 
156 DEV_MODULE(if_tap, tapmodevent, NULL);
157 
158 /*
159  * tapmodevent
160  *
161  * module event handler
162  */
163 static int
164 tapmodevent(module_t mod, int type, void *data)
165 {
166 	static int attached = 0;
167 	struct tap_softc *tp, *ntp;
168 	int i;
169 
170 	switch (type) {
171 	case MOD_LOAD:
172 		if (attached)
173 			return (EEXIST);
174 
175 		make_autoclone_dev(&tap_ops, &DEVFS_CLONE_BITMAP(tap), tapclone,
176 				   UID_ROOT, GID_WHEEL, 0600, "tap");
177 		SLIST_INIT(&tap_listhead);
178 		if_clone_attach(&tap_cloner);
179 
180 		for (i = 0; i < TAP_PREALLOCATED_UNITS; ++i) {
181 			make_dev(&tap_ops, i, UID_ROOT, GID_WHEEL,
182 				 0600, "tap%d", i);
183 			devfs_clone_bitmap_set(&DEVFS_CLONE_BITMAP(tap), i);
184 		}
185 
186 		attached = 1;
187 		break;
188 
189 	case MOD_UNLOAD:
190 		if (taprefcnt > 0)
191 			return (EBUSY);
192 
193 		if_clone_detach(&tap_cloner);
194 
195 		/* Maintain tap ifs in a local list */
196 		SLIST_FOREACH_MUTABLE(tp, &tap_listhead, tap_link, ntp)
197 			tapdestroy(tp);
198 
199 		attached = 0;
200 
201 		devfs_clone_handler_del("tap");
202 		dev_ops_remove_all(&tap_ops);
203 		devfs_clone_bitmap_uninit(&DEVFS_CLONE_BITMAP(tap));
204 		break;
205 
206 	default:
207 		return (EOPNOTSUPP);
208 	}
209 
210 	return (0);
211 } /* tapmodevent */
212 
213 
214 /*
215  * tapcreate - create or clone an interface
216  */
217 static struct tap_softc *
218 tapcreate(int unit, cdev_t dev)
219 {
220 	const char	*name = TAP;
221 	struct ifnet	*ifp;
222 	struct tap_softc *tp;
223 	uint8_t		ether_addr[ETHER_ADDR_LEN];
224 
225 	tp = kmalloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
226 	dev->si_drv1 = tp;
227 	tp->tap_dev = dev;
228 	tp->tap_unit = unit;
229 
230 	reference_dev(dev);	/* tp association */
231 
232 	/* generate fake MAC address: 00 bd xx xx xx unit_no */
233 	ether_addr[0] = 0x00;
234 	ether_addr[1] = 0xbd;
235 	bcopy(&ticks, &ether_addr[2], 3);
236 	ether_addr[5] = (u_char)unit;
237 
238 	/* fill the rest and attach interface */
239 	ifp = &tp->tap_if;
240 	ifp->if_softc = tp;
241 
242 	if_initname(ifp, name, unit);
243 	if (unit > taplastunit)
244 		taplastunit = unit;
245 
246 	ifp->if_init = tapifinit;
247 	ifp->if_start = tapifstart;
248 	ifp->if_ioctl = tapifioctl;
249 	ifp->if_mtu = ETHERMTU;
250 	ifp->if_flags = TAP_IFFLAGS;
251 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
252 	ifq_set_ready(&ifp->if_snd);
253 
254 	ether_ifattach(ifp, ether_addr, NULL);
255 
256 	tp->tap_flags |= TAP_INITED;
257 	tp->tap_devq.ifq_maxlen = ifqmaxlen;
258 
259 	SLIST_INSERT_HEAD(&tap_listhead, tp, tap_link);
260 
261 	TAPDEBUG(ifp, "created. minor = %#x\n", minor(dev));
262 	return (tp);
263 }
264 
265 static
266 struct tap_softc *
267 tapfind(int unit)
268 {
269 	struct tap_softc *tp;
270 
271 	SLIST_FOREACH(tp, &tap_listhead, tap_link) {
272 		if (tp->tap_unit == unit)
273 			return(tp);
274 	}
275 	return (NULL);
276 }
277 
278 /*
279  * tap_clone_create:
280  *
281  * Create a new tap instance via ifconfig.
282  */
283 static int
284 tap_clone_create(struct if_clone *ifc __unused, int unit)
285 {
286 	struct tap_softc *tp;
287 	cdev_t dev;
288 
289 	tp = tapfind(unit);
290 	if (tp == NULL) {
291 		if (!devfs_clone_bitmap_chk(&DEVFS_CLONE_BITMAP(tap), unit)) {
292 			devfs_clone_bitmap_set(&DEVFS_CLONE_BITMAP(tap), unit);
293 			dev = make_dev(&tap_ops, unit, UID_ROOT, GID_WHEEL,
294 					   0600, "%s%d", TAP, unit);
295 		} else {
296 			dev = devfs_find_device_by_name("%s%d", TAP, unit);
297 		}
298 
299 		KKASSERT(dev != NULL);
300 		tp = tapcreate(unit, dev);
301 	}
302 	tp->tap_flags |= TAP_CLONE;
303 	TAPDEBUG(&tp->tap_if, "clone created. minor = %#x tap_flags = 0x%x\n",
304 		 minor(tp->tap_dev), tp->tap_flags);
305 
306 	return (0);
307 }
308 
309 /*
310  * tapopen
311  *
312  * to open tunnel. must be superuser
313  */
314 static int
315 tapopen(struct dev_open_args *ap)
316 {
317 	cdev_t dev = NULL;
318 	struct tap_softc *tp = NULL;
319 	struct ifnet *ifp = NULL;
320 	int error;
321 
322 	if (tapuopen == 0 &&
323 	    (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0)) != 0)
324 		return (error);
325 
326 	get_mplock();
327 	dev = ap->a_head.a_dev;
328 	tp = dev->si_drv1;
329 	if (tp == NULL)
330 		tp = tapcreate(minor(dev), dev);
331 	if (tp->tap_flags & TAP_OPEN) {
332 		rel_mplock();
333 		return (EBUSY);
334 	}
335 	ifp = &tp->arpcom.ac_if;
336 
337 	if ((tp->tap_flags & TAP_CLONE) == 0) {
338 		EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
339 
340 		/* Announce the return of the interface. */
341 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
342 	}
343 
344 	bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
345 
346 	if (curthread->td_proc)
347 		fsetown(curthread->td_proc->p_pid, &tp->tap_sigtd);
348 	tp->tap_flags |= TAP_OPEN;
349 	taprefcnt ++;
350 
351 	if (tapuponopen && (ifp->if_flags & IFF_UP) == 0) {
352 		crit_enter();
353 		if_up(ifp);
354 		crit_exit();
355 
356 		ifnet_serialize_all(ifp);
357 		tapifflags(tp);
358 		ifnet_deserialize_all(ifp);
359 
360 		tp->tap_flags |= TAP_CLOSEDOWN;
361 	}
362 
363 	TAPDEBUG(ifp, "opened. minor = %#x, refcnt = %d, taplastunit = %d\n",
364 		 minor(tp->tap_dev), taprefcnt, taplastunit);
365 
366 	rel_mplock();
367 	return (0);
368 }
369 
370 static int
371 tapclone(struct dev_clone_args *ap)
372 {
373 	int unit;
374 
375 	unit = devfs_clone_bitmap_get(&DEVFS_CLONE_BITMAP(tap), 0);
376 	ap->a_dev = make_only_dev(&tap_ops, unit, UID_ROOT, GID_WHEEL,
377 				  0600, "%s%d", TAP, unit);
378 	tapcreate(unit, ap->a_dev);
379 	return (0);
380 }
381 
382 /*
383  * tapclose
384  *
385  * close the device - mark i/f down & delete routing info
386  */
387 static int
388 tapclose(struct dev_close_args *ap)
389 {
390 	cdev_t dev = ap->a_head.a_dev;
391 	struct tap_softc *tp = dev->si_drv1;
392 	struct ifnet *ifp = &tp->tap_if;
393 	int clear_flags = 0;
394 
395 	get_mplock();
396 
397 	/* Junk all pending output */
398 	ifq_purge(&ifp->if_snd);
399 
400 	/*
401 	 * Do not bring the interface down, and do not anything with
402 	 * interface, if we are in VMnet mode. just close the device.
403 	 *
404 	 * If the interface is not cloned, we always bring it down.
405 	 *
406 	 * If the interface is cloned, then we bring it down during
407 	 * closing only if it was brought up during opening.
408 	 */
409 	if ((tp->tap_flags & TAP_VMNET) == 0 &&
410 	    ((tp->tap_flags & TAP_CLONE) == 0 ||
411 	     (tp->tap_flags & TAP_CLOSEDOWN))) {
412 		if (ifp->if_flags & IFF_UP)
413 			if_down(ifp);
414 		clear_flags = 1;
415 	}
416 	ifnet_serialize_all(ifp);
417 	tapifstop(tp, clear_flags);
418 	ifnet_deserialize_all(ifp);
419 
420 	if ((tp->tap_flags & TAP_CLONE) == 0) {
421 		if_purgeaddrs_nolink(ifp);
422 
423 		EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
424 
425 		/* Announce the departure of the interface. */
426 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
427 	}
428 
429 	funsetown(tp->tap_sigio);
430 	tp->tap_sigio = NULL;
431 	selwakeup(&tp->tap_rsel);
432 
433 	tp->tap_flags &= ~TAP_OPEN;
434 	funsetown(tp->tap_sigtd);
435 	tp->tap_sigtd = NULL;
436 
437 	taprefcnt --;
438 	if (taprefcnt < 0) {
439 		taprefcnt = 0;
440 		if_printf(ifp, "minor = %#x, refcnt = %d is out of sync. "
441 			"set refcnt to 0\n", minor(tp->tap_dev), taprefcnt);
442 	}
443 
444 	TAPDEBUG(ifp, "closed. minor = %#x, refcnt = %d, taplastunit = %d\n",
445 		 minor(tp->tap_dev), taprefcnt, taplastunit);
446 
447 	if (tp->tap_unit >= TAP_PREALLOCATED_UNITS)
448 		tapdestroy(tp);
449 
450 	rel_mplock();
451 	return (0);
452 }
453 
454 /*
455  * tapdestroy:
456  *
457  *	Destroy a tap instance.
458  */
459 static void
460 tapdestroy(struct tap_softc *tp)
461 {
462 	struct ifnet *ifp = &tp->arpcom.ac_if;
463 	cdev_t dev;
464 
465 	TAPDEBUG(ifp, "destroyed. minor = %#x, refcnt = %d, taplastunit = %d\n",
466 		 minor(tp->tap_dev), taprefcnt, taplastunit);
467 
468 	ifnet_serialize_all(ifp);
469 	tapifstop(tp, 1);
470 	ifnet_deserialize_all(ifp);
471 
472 	ether_ifdetach(ifp);
473 	SLIST_REMOVE(&tap_listhead, tp, tap_softc, tap_link);
474 
475 	dev = tp->tap_dev;
476 	tp->tap_dev = NULL;
477 	dev->si_drv1 = NULL;
478 
479 	release_dev(dev);	/* tp association */
480 
481 	/*
482 	 * Also destroy the cloned device
483 	 */
484 	if (tp->tap_unit >= TAP_PREALLOCATED_UNITS) {
485 		destroy_dev(dev);
486 		devfs_clone_bitmap_put(&DEVFS_CLONE_BITMAP(tap), tp->tap_unit);
487 	}
488 
489 	kfree(tp, M_TAP);
490 
491 	taplastunit--;
492 }
493 
494 /*
495  * tap_clone_destroy:
496  *
497  *	Destroy a tap instance.
498  */
499 static void
500 tap_clone_destroy(struct ifnet *ifp)
501 {
502 	struct tap_softc *tp = ifp->if_softc;
503 
504 	TAPDEBUG(&tp->tap_if, "clone destroyed. minor = %#x tap_flags = 0x%x\n",
505 		 minor(tp->tap_dev), tp->tap_flags);
506 	if (tp->tap_flags & TAP_CLONE)
507 		tapdestroy(tp);
508 }
509 
510 /*
511  * tapifinit
512  *
513  * Network interface initialization function (called with if serializer held)
514  *
515  * MPSAFE
516  */
517 static void
518 tapifinit(void *xtp)
519 {
520 	struct tap_softc *tp = xtp;
521 	struct ifnet *ifp = &tp->tap_if;
522 
523 	TAPDEBUG(ifp, "initializing, minor = %#x tap_flags = 0x%x\n",
524 		 minor(tp->tap_dev), tp->tap_flags);
525 
526 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
527 
528 	tapifstop(tp, 1);
529 
530 	ifp->if_flags |= IFF_RUNNING;
531 	ifp->if_flags &= ~IFF_OACTIVE;
532 
533 	/* attempt to start output */
534 	tapifstart(ifp);
535 }
536 
537 
538 /*
539  * tapifioctl
540  *
541  * Process an ioctl request on network interface (called with if serializer
542  * held).
543  *
544  * MPSAFE
545  */
546 static int
547 tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
548 {
549 	struct tap_softc 	*tp = (struct tap_softc *)(ifp->if_softc);
550 	struct ifstat		*ifs = NULL;
551 	int			 dummy;
552 
553 	switch (cmd) {
554 		case SIOCSIFADDR:
555 		case SIOCGIFADDR:
556 		case SIOCSIFMTU:
557 			dummy = ether_ioctl(ifp, cmd, data);
558 			return (dummy);
559 
560 		case SIOCSIFFLAGS:
561 			tapifflags(tp);
562 			break;
563 
564 		case SIOCADDMULTI: /* XXX -- just like vmnet does */
565 		case SIOCDELMULTI:
566 			break;
567 
568 		case SIOCGIFSTATUS:
569 			ifs = (struct ifstat *)data;
570 			dummy = strlen(ifs->ascii);
571 			if ((tp->tap_flags & TAP_OPEN) &&
572 			    dummy < sizeof(ifs->ascii)) {
573 				if (tp->tap_sigtd && tp->tap_sigtd->sio_proc) {
574 				    ksnprintf(ifs->ascii + dummy,
575 					sizeof(ifs->ascii) - dummy,
576 					"\tOpened by pid %d\n",
577 					(int)tp->tap_sigtd->sio_proc->p_pid);
578 				} else {
579 				    ksnprintf(ifs->ascii + dummy,
580 					sizeof(ifs->ascii) - dummy,
581 					"\tOpened by <unknown>\n");
582 				}
583 			}
584 			break;
585 
586 		default:
587 			return (EINVAL);
588 	}
589 
590 	return (0);
591 }
592 
593 
594 /*
595  * tapifstart
596  *
597  * Queue packets from higher level ready to put out (called with if serializer
598  * held)
599  *
600  * MPSAFE
601  */
602 static void
603 tapifstart(struct ifnet *ifp)
604 {
605 	struct tap_softc *tp = ifp->if_softc;
606 	struct ifqueue *ifq;
607 	struct mbuf *m;
608 	int has_data = 0;
609 
610 	TAPDEBUG(ifp, "starting, minor = %#x\n", minor(tp->tap_dev));
611 
612 	/*
613 	 * do not junk pending output if we are in VMnet mode.
614 	 * XXX: can this do any harm because of queue overflow?
615 	 */
616 
617 	if (((tp->tap_flags & TAP_VMNET) == 0) &&
618 	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
619 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
620 			 minor(tp->tap_dev), tp->tap_flags);
621 		ifq_purge(&ifp->if_snd);
622 		return;
623 	}
624 
625 	ifp->if_flags |= IFF_OACTIVE;
626 
627 	ifq = &tp->tap_devq;
628 	while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
629 		if (IF_QFULL(ifq)) {
630 			IF_DROP(ifq);
631 			ifp->if_oerrors++;
632 			m_freem(m);
633 		} else {
634 			IF_ENQUEUE(ifq, m);
635 			ifp->if_opackets++;
636 			has_data = 1;
637 		}
638 	}
639 
640 	if (has_data) {
641 		if (tp->tap_flags & TAP_RWAIT) {
642 			tp->tap_flags &= ~TAP_RWAIT;
643 			wakeup((caddr_t)tp);
644 		}
645 
646 		get_mplock();
647 		KNOTE(&tp->tap_rsel.si_note, 0);
648 		rel_mplock();
649 
650 		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
651 			get_mplock();
652 			pgsigio(tp->tap_sigio, SIGIO, 0);
653 			rel_mplock();
654 		}
655 
656 		/*
657 		 * selwakeup is not MPSAFE.  tapifstart is.
658 		 */
659 		get_mplock();
660 		selwakeup(&tp->tap_rsel);
661 		rel_mplock();
662 	}
663 
664 	ifp->if_flags &= ~IFF_OACTIVE;
665 }
666 
667 
668 /*
669  * tapioctl
670  *
671  * The ops interface is now pretty minimal.  Called via fileops with nothing
672  * held.
673  *
674  * MPSAFE
675  */
676 static int
677 tapioctl(struct dev_ioctl_args *ap)
678 {
679 	cdev_t dev = ap->a_head.a_dev;
680 	caddr_t data = ap->a_data;
681 	struct tap_softc	*tp = dev->si_drv1;
682 	struct ifnet		*ifp = &tp->tap_if;
683  	struct tapinfo		*tapp = NULL;
684 	struct mbuf *mb;
685 	short f;
686 	int error;
687 
688 	ifnet_serialize_all(ifp);
689 	error = 0;
690 
691 	switch (ap->a_cmd) {
692 	case TAPSIFINFO:
693 		tapp = (struct tapinfo *)data;
694 		ifp->if_mtu = tapp->mtu;
695 		ifp->if_type = tapp->type;
696 		ifp->if_baudrate = tapp->baudrate;
697 		break;
698 
699 	case TAPGIFINFO:
700 		tapp = (struct tapinfo *)data;
701 		tapp->mtu = ifp->if_mtu;
702 		tapp->type = ifp->if_type;
703 		tapp->baudrate = ifp->if_baudrate;
704 		break;
705 
706 	case TAPSDEBUG:
707 		tapdebug = *(int *)data;
708 		break;
709 
710 	case TAPGDEBUG:
711 		*(int *)data = tapdebug;
712 		break;
713 
714 	case FIOASYNC:
715 		if (*(int *)data)
716 			tp->tap_flags |= TAP_ASYNC;
717 		else
718 			tp->tap_flags &= ~TAP_ASYNC;
719 		break;
720 
721 	case FIONREAD:
722 		*(int *)data = 0;
723 
724 		/* Take a look at devq first */
725 		IF_POLL(&tp->tap_devq, mb);
726 		if (mb == NULL)
727 			mb = ifq_poll(&ifp->if_snd);
728 
729 		if (mb != NULL) {
730 			for(; mb != NULL; mb = mb->m_next)
731 				*(int *)data += mb->m_len;
732 		}
733 		break;
734 
735 	case FIOSETOWN:
736 		error = fsetown(*(int *)data, &tp->tap_sigio);
737 		break;
738 
739 	case FIOGETOWN:
740 		*(int *)data = fgetown(tp->tap_sigio);
741 		break;
742 
743 	/* this is deprecated, FIOSETOWN should be used instead */
744 	case TIOCSPGRP:
745 		error = fsetown(-(*(int *)data), &tp->tap_sigio);
746 		break;
747 
748 	/* this is deprecated, FIOGETOWN should be used instead */
749 	case TIOCGPGRP:
750 		*(int *)data = -fgetown(tp->tap_sigio);
751 		break;
752 
753 	/* VMware/VMnet port ioctl's */
754 
755 	case SIOCGIFFLAGS:	/* get ifnet flags */
756 		bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
757 		break;
758 
759 	case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
760 		f = *(short *)data;
761 		f &= 0x0fff;
762 		f &= ~IFF_CANTCHANGE;
763 		f |= IFF_UP;
764 		ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
765 		break;
766 
767 	case OSIOCGIFADDR:	/* get MAC address of the remote side */
768 	case SIOCGIFADDR:
769 		bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
770 		break;
771 
772 	case SIOCSIFADDR:	/* set MAC address of the remote side */
773 		bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
774 		break;
775 
776 	default:
777 		error = ENOTTY;
778 		break;
779 	}
780 	ifnet_deserialize_all(ifp);
781 	return (error);
782 }
783 
784 
785 /*
786  * tapread
787  *
788  * The ops read interface - reads a packet at a time, or at
789  * least as much of a packet as can be read.
790  *
791  * Called from the fileops interface with nothing held.
792  *
793  * MPSAFE
794  */
795 static int
796 tapread(struct dev_read_args *ap)
797 {
798 	cdev_t dev = ap->a_head.a_dev;
799 	struct uio *uio = ap->a_uio;
800 	struct tap_softc	*tp = dev->si_drv1;
801 	struct ifnet		*ifp = &tp->tap_if;
802 	struct mbuf		*m0 = NULL;
803 	int			 error = 0, len;
804 
805 	TAPDEBUG(ifp, "reading, minor = %#x\n", minor(tp->tap_dev));
806 
807 	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
808 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
809 			 minor(tp->tap_dev), tp->tap_flags);
810 
811 		return (EHOSTDOWN);
812 	}
813 
814 	tp->tap_flags &= ~TAP_RWAIT;
815 
816 	/* sleep until we get a packet */
817 	do {
818 		ifnet_serialize_all(ifp);
819 		IF_DEQUEUE(&tp->tap_devq, m0);
820 		if (m0 == NULL) {
821 			if (ap->a_ioflag & IO_NDELAY) {
822 				ifnet_deserialize_all(ifp);
823 				return (EWOULDBLOCK);
824 			}
825 			tp->tap_flags |= TAP_RWAIT;
826 			tsleep_interlock(tp, PCATCH);
827 			ifnet_deserialize_all(ifp);
828 			error = tsleep(tp, PCATCH | PINTERLOCKED, "taprd", 0);
829 			if (error)
830 				return (error);
831 		} else {
832 			ifnet_deserialize_all(ifp);
833 		}
834 	} while (m0 == NULL);
835 
836 	BPF_MTAP(ifp, m0);
837 
838 	/* xfer packet to user space */
839 	while ((m0 != NULL) && (uio->uio_resid > 0) && (error == 0)) {
840 		len = (int)szmin(uio->uio_resid, m0->m_len);
841 		if (len == 0)
842 			break;
843 
844 		error = uiomove(mtod(m0, caddr_t), (size_t)len, uio);
845 		m0 = m_free(m0);
846 	}
847 
848 	if (m0 != NULL) {
849 		TAPDEBUG(ifp, "dropping mbuf, minor = %#x\n",
850 			 minor(tp->tap_dev));
851 		m_freem(m0);
852 	}
853 
854 	return (error);
855 }
856 
857 /*
858  * tapwrite
859  *
860  * The ops write interface - an atomic write is a packet - or else!
861  *
862  * Called from the fileops interface with nothing held.
863  *
864  * MPSAFE
865  */
866 static int
867 tapwrite(struct dev_write_args *ap)
868 {
869 	cdev_t dev = ap->a_head.a_dev;
870 	struct uio *uio = ap->a_uio;
871 	struct tap_softc	*tp = dev->si_drv1;
872 	struct ifnet		*ifp = &tp->tap_if;
873 	struct mbuf		*top = NULL, **mp = NULL, *m = NULL;
874 	int			error = 0;
875 	size_t			tlen, mlen;
876 
877 	TAPDEBUG(ifp, "writing, minor = %#x\n", minor(tp->tap_dev));
878 
879 	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
880 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
881 			 minor(tp->tap_dev), tp->tap_flags);
882 		return (EHOSTDOWN);
883 	}
884 
885 	if (uio->uio_resid == 0)
886 		return (0);
887 
888 	if (uio->uio_resid > TAPMRU) {
889 		TAPDEBUG(ifp, "invalid packet len = %zu, minor = %#x\n",
890 			 uio->uio_resid, minor(tp->tap_dev));
891 
892 		return (EIO);
893 	}
894 	tlen = uio->uio_resid;
895 
896 	/* get a header mbuf */
897 	MGETHDR(m, MB_DONTWAIT, MT_DATA);
898 	if (m == NULL)
899 		return (ENOBUFS);
900 	mlen = MHLEN;
901 
902 	top = 0;
903 	mp = &top;
904 	while ((error == 0) && (uio->uio_resid > 0)) {
905 		m->m_len = (int)szmin(mlen, uio->uio_resid);
906 		error = uiomove(mtod(m, caddr_t), (size_t)m->m_len, uio);
907 		*mp = m;
908 		mp = &m->m_next;
909 		if (uio->uio_resid > 0) {
910 			MGET(m, MB_DONTWAIT, MT_DATA);
911 			if (m == NULL) {
912 				error = ENOBUFS;
913 				break;
914 			}
915 			mlen = MLEN;
916 		}
917 	}
918 	if (error) {
919 		ifp->if_ierrors ++;
920 		if (top)
921 			m_freem(top);
922 		return (error);
923 	}
924 
925 	top->m_pkthdr.len = (int)tlen;
926 	top->m_pkthdr.rcvif = ifp;
927 
928 	/*
929 	 * Ethernet bridge and bpf are handled in ether_input
930 	 *
931 	 * adjust mbuf and give packet to the ether_input
932 	 */
933 	ifnet_serialize_all(ifp);
934 	ifp->if_input(ifp, top);
935 	ifp->if_ipackets ++; /* ibytes are counted in ether_input */
936 	ifnet_deserialize_all(ifp);
937 
938 	return (0);
939 }
940 
941 /*
942  * tappoll
943  *
944  * The poll interface, this is only useful on reads really. The write
945  * detect always returns true, write never blocks anyway, it either
946  * accepts the packet or drops it
947  *
948  * Called from the fileops interface with nothing held.
949  *
950  * MPSAFE
951  */
952 static int
953 tappoll(struct dev_poll_args *ap)
954 {
955 	cdev_t dev = ap->a_head.a_dev;
956 	struct tap_softc	*tp = dev->si_drv1;
957 	struct ifnet		*ifp = &tp->tap_if;
958 	int		 	 revents = 0;
959 
960 	TAPDEBUG(ifp, "polling, minor = %#x\n", minor(tp->tap_dev));
961 
962 	if (ap->a_events & (POLLIN | POLLRDNORM)) {
963 		if (!IF_QEMPTY(&tp->tap_devq)) {
964 			TAPDEBUG(ifp,
965 				 "has data in queue. minor = %#x\n",
966 				 minor(tp->tap_dev));
967 
968 			revents |= (ap->a_events & (POLLIN | POLLRDNORM));
969 		} else {
970 			TAPDEBUG(ifp, "waiting for data, minor = %#x\n",
971 				 minor(tp->tap_dev));
972 
973 			get_mplock();
974 			selrecord(curthread, &tp->tap_rsel);
975 			rel_mplock();
976 		}
977 	}
978 
979 	if (ap->a_events & (POLLOUT | POLLWRNORM))
980 		revents |= (ap->a_events & (POLLOUT | POLLWRNORM));
981 	ap->a_events = revents;
982 	return (0);
983 }
984 
985 /*
986  * tapkqfilter - called from the fileops interface with nothing held
987  *
988  * MPSAFE
989  */
990 static int filt_tapread(struct knote *kn, long hint);
991 static void filt_tapdetach(struct knote *kn);
992 static struct filterops tapread_filtops =
993 	{ 1, NULL, filt_tapdetach, filt_tapread };
994 
995 static int
996 tapkqfilter(struct dev_kqfilter_args *ap)
997 {
998 	cdev_t dev = ap->a_head.a_dev;
999 	struct knote *kn = ap->a_kn;
1000 	struct tap_softc *tp;
1001 	struct klist *list;
1002 	struct ifnet *ifp;
1003 
1004 	get_mplock();
1005 	tp = dev->si_drv1;
1006 	ifp = &tp->tap_if;
1007 	ap->a_result =0;
1008 
1009 	switch(kn->kn_filter) {
1010 	case EVFILT_READ:
1011 		list = &tp->tap_rsel.si_note;
1012 		kn->kn_fop = &tapread_filtops;
1013 		kn->kn_hook = (void *)tp;
1014 		break;
1015 	case EVFILT_WRITE:
1016 		/* fall through */
1017 	default:
1018 		ap->a_result = 1;
1019 		rel_mplock();
1020 		return(0);
1021 	}
1022 	crit_enter();
1023 	SLIST_INSERT_HEAD(list, kn, kn_selnext);
1024 	crit_exit();
1025 	rel_mplock();
1026 	return(0);
1027 }
1028 
1029 static int
1030 filt_tapread(struct knote *kn, long hint)
1031 {
1032 	struct tap_softc *tp = (void *)kn->kn_hook;
1033 
1034 	if (IF_QEMPTY(&tp->tap_devq) == 0)	/* XXX serializer */
1035 		return(1);
1036 	else
1037 		return(0);
1038 }
1039 
1040 static void
1041 filt_tapdetach(struct knote *kn)
1042 {
1043 	struct tap_softc *tp = (void *)kn->kn_hook;
1044 
1045 	SLIST_REMOVE(&tp->tap_rsel.si_note, kn, knote, kn_selnext);
1046 }
1047 
1048 static void
1049 tapifstop(struct tap_softc *tp, int clear_flags)
1050 {
1051 	struct ifnet *ifp = &tp->tap_if;
1052 
1053 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1054 	IF_DRAIN(&tp->tap_devq);
1055 	tp->tap_flags &= ~TAP_CLOSEDOWN;
1056 	if (clear_flags)
1057 		ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1058 }
1059 
1060 static void
1061 tapifflags(struct tap_softc *tp)
1062 {
1063 	struct ifnet *ifp = &tp->arpcom.ac_if;
1064 
1065 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1066 	if ((tp->tap_flags & TAP_VMNET) == 0) {
1067 		/*
1068 		 * Only for non-vmnet tap(4)
1069 		 */
1070 		if (ifp->if_flags & IFF_UP) {
1071 			if ((ifp->if_flags & IFF_RUNNING) == 0)
1072 				tapifinit(tp);
1073 		} else {
1074 			tapifstop(tp, 1);
1075 		}
1076 	} else {
1077 		/* XXX */
1078 	}
1079 }
1080