xref: /dragonfly/sys/net/tap/if_tap.c (revision 6693db17)
1 /*
2  * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * BASED ON:
27  * -------------------------------------------------------------------------
28  *
29  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30  * Nottingham University 1987.
31  */
32 
33 /*
34  * $FreeBSD: src/sys/net/if_tap.c,v 1.3.2.3 2002/04/14 21:41:48 luigi Exp $
35  * $DragonFly: src/sys/net/tap/if_tap.c,v 1.41 2008/09/05 17:03:15 dillon Exp $
36  * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
37  */
38 
39 #include "opt_inet.h"
40 
41 #include <sys/param.h>
42 #include <sys/conf.h>
43 #include <sys/device.h>
44 #include <sys/filedesc.h>
45 #include <sys/filio.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/poll.h>
50 #include <sys/proc.h>
51 #include <sys/priv.h>
52 #include <sys/signalvar.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/sysctl.h>
56 #include <sys/systm.h>
57 #include <sys/ttycom.h>
58 #include <sys/uio.h>
59 #include <sys/vnode.h>
60 #include <sys/serialize.h>
61 
62 #include <sys/thread2.h>
63 #include <sys/mplock2.h>
64 
65 #include <net/bpf.h>
66 #include <net/ethernet.h>
67 #include <net/if.h>
68 #include <net/ifq_var.h>
69 #include <net/if_arp.h>
70 #include <net/if_clone.h>
71 #include <net/route.h>
72 #include <sys/devfs.h>
73 
74 #include <netinet/in.h>
75 
76 #include "if_tapvar.h"
77 #include "if_tap.h"
78 
79 #define TAP_IFFLAGS	(IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST)
80 
81 #define TAP_PREALLOCATED_UNITS	4
82 
83 #define CDEV_NAME	"tap"
84 #define CDEV_MAJOR	149
85 #define TAPDEBUG	if (tapdebug) if_printf
86 
87 #define TAP		"tap"
88 #define VMNET		"vmnet"
89 #define VMNET_DEV_MASK	0x00010000
90 
91 DEVFS_DECLARE_CLONE_BITMAP(tap);
92 
93 /* module */
94 static int 		tapmodevent	(module_t, int, void *);
95 
96 /* device */
97 static struct tap_softc *tapcreate(int, cdev_t);
98 static void		tapdestroy(struct tap_softc *);
99 
100 /* clone */
101 static int		tap_clone_create(struct if_clone *, int);
102 static void		tap_clone_destroy(struct ifnet *);
103 
104 
105 /* network interface */
106 static void		tapifstart	(struct ifnet *);
107 static int		tapifioctl	(struct ifnet *, u_long, caddr_t,
108 					 struct ucred *);
109 static void		tapifinit	(void *);
110 static void		tapifstop(struct tap_softc *, int);
111 static void		tapifflags(struct tap_softc *);
112 
113 /* character device */
114 static d_open_t		tapopen;
115 static d_clone_t	tapclone;
116 static d_close_t	tapclose;
117 static d_read_t		tapread;
118 static d_write_t	tapwrite;
119 static d_ioctl_t	tapioctl;
120 static d_poll_t		tappoll;
121 static d_kqfilter_t	tapkqfilter;
122 
123 static struct dev_ops	tap_ops = {
124 	{ CDEV_NAME, CDEV_MAJOR, 0 },
125 	.d_open =	tapopen,
126 	.d_close =	tapclose,
127 	.d_read =	tapread,
128 	.d_write =	tapwrite,
129 	.d_ioctl =	tapioctl,
130 	.d_poll =	tappoll,
131 	.d_kqfilter =	tapkqfilter
132 };
133 
134 static int		taprefcnt = 0;		/* module ref. counter   */
135 static int		taplastunit = -1;	/* max. open unit number */
136 static int		tapdebug = 0;		/* debug flag            */
137 static int		tapuopen = 0;		/* all user open()       */
138 static int		tapuponopen = 0;	/* IFF_UP       */
139 
140 MALLOC_DECLARE(M_TAP);
141 MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
142 struct if_clone tap_cloner = IF_CLONE_INITIALIZER("tap",
143 			     tap_clone_create, tap_clone_destroy,
144 			     0, IF_MAXUNIT);
145 static SLIST_HEAD(,tap_softc) tap_listhead =
146 	SLIST_HEAD_INITIALIZER(&tap_listhead);
147 
148 SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
149 SYSCTL_DECL(_net_link);
150 SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
151 	    "Ethernet tunnel software network interface");
152 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
153 	   "Allow user to open /dev/tap (based on node permissions)");
154 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
155 	   "Bring interface up when /dev/tap is opened");
156 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
157 
158 DEV_MODULE(if_tap, tapmodevent, NULL);
159 
160 /*
161  * tapmodevent
162  *
163  * module event handler
164  */
165 static int
166 tapmodevent(module_t mod, int type, void *data)
167 {
168 	static int attached = 0;
169 	struct tap_softc *tp, *ntp;
170 	int i;
171 
172 	switch (type) {
173 	case MOD_LOAD:
174 		if (attached)
175 			return (EEXIST);
176 
177 		make_autoclone_dev(&tap_ops, &DEVFS_CLONE_BITMAP(tap), tapclone,
178 				   UID_ROOT, GID_WHEEL, 0600, "tap");
179 		SLIST_INIT(&tap_listhead);
180 		if_clone_attach(&tap_cloner);
181 
182 		for (i = 0; i < TAP_PREALLOCATED_UNITS; ++i) {
183 			make_dev(&tap_ops, i, UID_ROOT, GID_WHEEL,
184 				 0600, "tap%d", i);
185 			devfs_clone_bitmap_set(&DEVFS_CLONE_BITMAP(tap), i);
186 		}
187 
188 		attached = 1;
189 		break;
190 
191 	case MOD_UNLOAD:
192 		if (taprefcnt > 0)
193 			return (EBUSY);
194 
195 		if_clone_detach(&tap_cloner);
196 
197 		/* Maintain tap ifs in a local list */
198 		SLIST_FOREACH_MUTABLE(tp, &tap_listhead, tap_link, ntp)
199 			tapdestroy(tp);
200 
201 		attached = 0;
202 
203 		devfs_clone_handler_del("tap");
204 		dev_ops_remove_all(&tap_ops);
205 		devfs_clone_bitmap_uninit(&DEVFS_CLONE_BITMAP(tap));
206 		break;
207 
208 	default:
209 		return (EOPNOTSUPP);
210 	}
211 
212 	return (0);
213 } /* tapmodevent */
214 
215 
216 /*
217  * tapcreate - create or clone an interface
218  */
219 static struct tap_softc *
220 tapcreate(int unit, cdev_t dev)
221 {
222 	const char	*name = TAP;
223 	struct ifnet	*ifp;
224 	struct tap_softc *tp;
225 	uint8_t		ether_addr[ETHER_ADDR_LEN];
226 
227 	tp = kmalloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
228 	dev->si_drv1 = tp;
229 	tp->tap_dev = dev;
230 	tp->tap_unit = unit;
231 
232 	reference_dev(dev);	/* tp association */
233 
234 	/* generate fake MAC address: 00 bd xx xx xx unit_no */
235 	ether_addr[0] = 0x00;
236 	ether_addr[1] = 0xbd;
237 	bcopy(&ticks, &ether_addr[2], 3);
238 	ether_addr[5] = (u_char)unit;
239 
240 	/* fill the rest and attach interface */
241 	ifp = &tp->tap_if;
242 	ifp->if_softc = tp;
243 
244 	if_initname(ifp, name, unit);
245 	if (unit > taplastunit)
246 		taplastunit = unit;
247 
248 	ifp->if_init = tapifinit;
249 	ifp->if_start = tapifstart;
250 	ifp->if_ioctl = tapifioctl;
251 	ifp->if_mtu = ETHERMTU;
252 	ifp->if_flags = TAP_IFFLAGS;
253 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
254 	ifq_set_ready(&ifp->if_snd);
255 
256 	ether_ifattach(ifp, ether_addr, NULL);
257 
258 	tp->tap_flags |= TAP_INITED;
259 	tp->tap_devq.ifq_maxlen = ifqmaxlen;
260 
261 	SLIST_INSERT_HEAD(&tap_listhead, tp, tap_link);
262 
263 	TAPDEBUG(ifp, "created. minor = %#x\n", minor(dev));
264 	return (tp);
265 }
266 
267 static
268 struct tap_softc *
269 tapfind(int unit)
270 {
271 	struct tap_softc *tp;
272 
273 	SLIST_FOREACH(tp, &tap_listhead, tap_link) {
274 		if (tp->tap_unit == unit)
275 			return(tp);
276 	}
277 	return (NULL);
278 }
279 
280 /*
281  * tap_clone_create:
282  *
283  * Create a new tap instance via ifconfig.
284  */
285 static int
286 tap_clone_create(struct if_clone *ifc __unused, int unit)
287 {
288 	struct tap_softc *tp;
289 	cdev_t dev;
290 
291 	tp = tapfind(unit);
292 	if (tp == NULL) {
293 		if (!devfs_clone_bitmap_chk(&DEVFS_CLONE_BITMAP(tap), unit)) {
294 			devfs_clone_bitmap_set(&DEVFS_CLONE_BITMAP(tap), unit);
295 			dev = make_dev(&tap_ops, unit, UID_ROOT, GID_WHEEL,
296 					   0600, "%s%d", TAP, unit);
297 		} else {
298 			dev = devfs_find_device_by_name("%s%d", TAP, unit);
299 		}
300 
301 		KKASSERT(dev != NULL);
302 		tp = tapcreate(unit, dev);
303 	}
304 	tp->tap_flags |= TAP_CLONE;
305 	TAPDEBUG(&tp->tap_if, "clone created. minor = %#x tap_flags = 0x%x\n",
306 		 minor(tp->tap_dev), tp->tap_flags);
307 
308 	return (0);
309 }
310 
311 /*
312  * tapopen
313  *
314  * to open tunnel. must be superuser
315  */
316 static int
317 tapopen(struct dev_open_args *ap)
318 {
319 	cdev_t dev = NULL;
320 	struct tap_softc *tp = NULL;
321 	struct ifnet *ifp = NULL;
322 	int error;
323 
324 	if (tapuopen == 0 &&
325 	    (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0)) != 0)
326 		return (error);
327 
328 	get_mplock();
329 	dev = ap->a_head.a_dev;
330 	tp = dev->si_drv1;
331 	if (tp == NULL)
332 		tp = tapcreate(minor(dev), dev);
333 	if (tp->tap_flags & TAP_OPEN) {
334 		rel_mplock();
335 		return (EBUSY);
336 	}
337 	ifp = &tp->arpcom.ac_if;
338 
339 	if ((tp->tap_flags & TAP_CLONE) == 0) {
340 		EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
341 
342 		/* Announce the return of the interface. */
343 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
344 	}
345 
346 	bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
347 
348 	if (curthread->td_proc)
349 		fsetown(curthread->td_proc->p_pid, &tp->tap_sigtd);
350 	tp->tap_flags |= TAP_OPEN;
351 	taprefcnt ++;
352 
353 	if (tapuponopen && (ifp->if_flags & IFF_UP) == 0) {
354 		crit_enter();
355 		if_up(ifp);
356 		crit_exit();
357 
358 		ifnet_serialize_all(ifp);
359 		tapifflags(tp);
360 		ifnet_deserialize_all(ifp);
361 
362 		tp->tap_flags |= TAP_CLOSEDOWN;
363 	}
364 
365 	TAPDEBUG(ifp, "opened. minor = %#x, refcnt = %d, taplastunit = %d\n",
366 		 minor(tp->tap_dev), taprefcnt, taplastunit);
367 
368 	rel_mplock();
369 	return (0);
370 }
371 
372 static int
373 tapclone(struct dev_clone_args *ap)
374 {
375 	int unit;
376 
377 	unit = devfs_clone_bitmap_get(&DEVFS_CLONE_BITMAP(tap), 0);
378 	ap->a_dev = make_only_dev(&tap_ops, unit, UID_ROOT, GID_WHEEL,
379 				  0600, "%s%d", TAP, unit);
380 	tapcreate(unit, ap->a_dev);
381 	return (0);
382 }
383 
384 /*
385  * tapclose
386  *
387  * close the device - mark i/f down & delete routing info
388  */
389 static int
390 tapclose(struct dev_close_args *ap)
391 {
392 	cdev_t dev = ap->a_head.a_dev;
393 	struct tap_softc *tp = dev->si_drv1;
394 	struct ifnet *ifp = &tp->tap_if;
395 	int clear_flags = 0;
396 
397 	get_mplock();
398 
399 	/* Junk all pending output */
400 	ifq_purge(&ifp->if_snd);
401 
402 	/*
403 	 * Do not bring the interface down, and do not anything with
404 	 * interface, if we are in VMnet mode. just close the device.
405 	 *
406 	 * If the interface is not cloned, we always bring it down.
407 	 *
408 	 * If the interface is cloned, then we bring it down during
409 	 * closing only if it was brought up during opening.
410 	 */
411 	if ((tp->tap_flags & TAP_VMNET) == 0 &&
412 	    ((tp->tap_flags & TAP_CLONE) == 0 ||
413 	     (tp->tap_flags & TAP_CLOSEDOWN))) {
414 		if (ifp->if_flags & IFF_UP)
415 			if_down(ifp);
416 		clear_flags = 1;
417 	}
418 	ifnet_serialize_all(ifp);
419 	tapifstop(tp, clear_flags);
420 	ifnet_deserialize_all(ifp);
421 
422 	if ((tp->tap_flags & TAP_CLONE) == 0) {
423 		if_purgeaddrs_nolink(ifp);
424 
425 		EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
426 
427 		/* Announce the departure of the interface. */
428 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
429 	}
430 
431 	funsetown(tp->tap_sigio);
432 	tp->tap_sigio = NULL;
433 	selwakeup(&tp->tap_rsel);
434 
435 	tp->tap_flags &= ~TAP_OPEN;
436 	funsetown(tp->tap_sigtd);
437 	tp->tap_sigtd = NULL;
438 
439 	taprefcnt --;
440 	if (taprefcnt < 0) {
441 		taprefcnt = 0;
442 		if_printf(ifp, "minor = %#x, refcnt = %d is out of sync. "
443 			"set refcnt to 0\n", minor(tp->tap_dev), taprefcnt);
444 	}
445 
446 	TAPDEBUG(ifp, "closed. minor = %#x, refcnt = %d, taplastunit = %d\n",
447 		 minor(tp->tap_dev), taprefcnt, taplastunit);
448 
449 	if (tp->tap_unit >= TAP_PREALLOCATED_UNITS)
450 		tapdestroy(tp);
451 
452 	rel_mplock();
453 	return (0);
454 }
455 
456 /*
457  * tapdestroy:
458  *
459  *	Destroy a tap instance.
460  */
461 static void
462 tapdestroy(struct tap_softc *tp)
463 {
464 	struct ifnet *ifp = &tp->arpcom.ac_if;
465 	cdev_t dev;
466 
467 	TAPDEBUG(ifp, "destroyed. minor = %#x, refcnt = %d, taplastunit = %d\n",
468 		 minor(tp->tap_dev), taprefcnt, taplastunit);
469 
470 	ifnet_serialize_all(ifp);
471 	tapifstop(tp, 1);
472 	ifnet_deserialize_all(ifp);
473 
474 	ether_ifdetach(ifp);
475 	SLIST_REMOVE(&tap_listhead, tp, tap_softc, tap_link);
476 
477 	dev = tp->tap_dev;
478 	tp->tap_dev = NULL;
479 	dev->si_drv1 = NULL;
480 
481 	release_dev(dev);	/* tp association */
482 
483 	/*
484 	 * Also destroy the cloned device
485 	 */
486 	if (tp->tap_unit >= TAP_PREALLOCATED_UNITS) {
487 		destroy_dev(dev);
488 		devfs_clone_bitmap_put(&DEVFS_CLONE_BITMAP(tap), tp->tap_unit);
489 	}
490 
491 	kfree(tp, M_TAP);
492 
493 	taplastunit--;
494 }
495 
496 /*
497  * tap_clone_destroy:
498  *
499  *	Destroy a tap instance.
500  */
501 static void
502 tap_clone_destroy(struct ifnet *ifp)
503 {
504 	struct tap_softc *tp = ifp->if_softc;
505 
506 	TAPDEBUG(&tp->tap_if, "clone destroyed. minor = %#x tap_flags = 0x%x\n",
507 		 minor(tp->tap_dev), tp->tap_flags);
508 	if (tp->tap_flags & TAP_CLONE)
509 		tapdestroy(tp);
510 }
511 
512 /*
513  * tapifinit
514  *
515  * Network interface initialization function (called with if serializer held)
516  *
517  * MPSAFE
518  */
519 static void
520 tapifinit(void *xtp)
521 {
522 	struct tap_softc *tp = xtp;
523 	struct ifnet *ifp = &tp->tap_if;
524 
525 	TAPDEBUG(ifp, "initializing, minor = %#x tap_flags = 0x%x\n",
526 		 minor(tp->tap_dev), tp->tap_flags);
527 
528 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
529 
530 	tapifstop(tp, 1);
531 
532 	ifp->if_flags |= IFF_RUNNING;
533 	ifp->if_flags &= ~IFF_OACTIVE;
534 
535 	/* attempt to start output */
536 	tapifstart(ifp);
537 }
538 
539 
540 /*
541  * tapifioctl
542  *
543  * Process an ioctl request on network interface (called with if serializer
544  * held).
545  *
546  * MPSAFE
547  */
548 static int
549 tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
550 {
551 	struct tap_softc 	*tp = (struct tap_softc *)(ifp->if_softc);
552 	struct ifstat		*ifs = NULL;
553 	int			 dummy;
554 
555 	switch (cmd) {
556 		case SIOCSIFADDR:
557 		case SIOCGIFADDR:
558 		case SIOCSIFMTU:
559 			dummy = ether_ioctl(ifp, cmd, data);
560 			return (dummy);
561 
562 		case SIOCSIFFLAGS:
563 			tapifflags(tp);
564 			break;
565 
566 		case SIOCADDMULTI: /* XXX -- just like vmnet does */
567 		case SIOCDELMULTI:
568 			break;
569 
570 		case SIOCGIFSTATUS:
571 			ifs = (struct ifstat *)data;
572 			dummy = strlen(ifs->ascii);
573 			if ((tp->tap_flags & TAP_OPEN) &&
574 			    dummy < sizeof(ifs->ascii)) {
575 				if (tp->tap_sigtd && tp->tap_sigtd->sio_proc) {
576 				    ksnprintf(ifs->ascii + dummy,
577 					sizeof(ifs->ascii) - dummy,
578 					"\tOpened by pid %d\n",
579 					(int)tp->tap_sigtd->sio_proc->p_pid);
580 				} else {
581 				    ksnprintf(ifs->ascii + dummy,
582 					sizeof(ifs->ascii) - dummy,
583 					"\tOpened by <unknown>\n");
584 				}
585 			}
586 			break;
587 
588 		default:
589 			return (EINVAL);
590 	}
591 
592 	return (0);
593 }
594 
595 
596 /*
597  * tapifstart
598  *
599  * Queue packets from higher level ready to put out (called with if serializer
600  * held)
601  *
602  * MPSAFE
603  */
604 static void
605 tapifstart(struct ifnet *ifp)
606 {
607 	struct tap_softc *tp = ifp->if_softc;
608 	struct ifqueue *ifq;
609 	struct mbuf *m;
610 	int has_data = 0;
611 
612 	TAPDEBUG(ifp, "starting, minor = %#x\n", minor(tp->tap_dev));
613 
614 	/*
615 	 * do not junk pending output if we are in VMnet mode.
616 	 * XXX: can this do any harm because of queue overflow?
617 	 */
618 
619 	if (((tp->tap_flags & TAP_VMNET) == 0) &&
620 	    ((tp->tap_flags & TAP_READY) != TAP_READY)) {
621 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
622 			 minor(tp->tap_dev), tp->tap_flags);
623 		ifq_purge(&ifp->if_snd);
624 		return;
625 	}
626 
627 	ifp->if_flags |= IFF_OACTIVE;
628 
629 	ifq = &tp->tap_devq;
630 	while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
631 		if (IF_QFULL(ifq)) {
632 			IF_DROP(ifq);
633 			ifp->if_oerrors++;
634 			m_freem(m);
635 		} else {
636 			IF_ENQUEUE(ifq, m);
637 			ifp->if_opackets++;
638 			has_data = 1;
639 		}
640 	}
641 
642 	if (has_data) {
643 		if (tp->tap_flags & TAP_RWAIT) {
644 			tp->tap_flags &= ~TAP_RWAIT;
645 			wakeup((caddr_t)tp);
646 		}
647 
648 		get_mplock();
649 		KNOTE(&tp->tap_rsel.si_note, 0);
650 		rel_mplock();
651 
652 		if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
653 			get_mplock();
654 			pgsigio(tp->tap_sigio, SIGIO, 0);
655 			rel_mplock();
656 		}
657 
658 		/*
659 		 * selwakeup is not MPSAFE.  tapifstart is.
660 		 */
661 		get_mplock();
662 		selwakeup(&tp->tap_rsel);
663 		rel_mplock();
664 	}
665 
666 	ifp->if_flags &= ~IFF_OACTIVE;
667 }
668 
669 
670 /*
671  * tapioctl
672  *
673  * The ops interface is now pretty minimal.  Called via fileops with nothing
674  * held.
675  *
676  * MPSAFE
677  */
678 static int
679 tapioctl(struct dev_ioctl_args *ap)
680 {
681 	cdev_t dev = ap->a_head.a_dev;
682 	caddr_t data = ap->a_data;
683 	struct tap_softc	*tp = dev->si_drv1;
684 	struct ifnet		*ifp = &tp->tap_if;
685  	struct tapinfo		*tapp = NULL;
686 	struct mbuf *mb;
687 	short f;
688 	int error;
689 
690 	ifnet_serialize_all(ifp);
691 	error = 0;
692 
693 	switch (ap->a_cmd) {
694 	case TAPSIFINFO:
695 		tapp = (struct tapinfo *)data;
696 		ifp->if_mtu = tapp->mtu;
697 		ifp->if_type = tapp->type;
698 		ifp->if_baudrate = tapp->baudrate;
699 		break;
700 
701 	case TAPGIFINFO:
702 		tapp = (struct tapinfo *)data;
703 		tapp->mtu = ifp->if_mtu;
704 		tapp->type = ifp->if_type;
705 		tapp->baudrate = ifp->if_baudrate;
706 		break;
707 
708 	case TAPSDEBUG:
709 		tapdebug = *(int *)data;
710 		break;
711 
712 	case TAPGDEBUG:
713 		*(int *)data = tapdebug;
714 		break;
715 
716 	case FIOASYNC:
717 		if (*(int *)data)
718 			tp->tap_flags |= TAP_ASYNC;
719 		else
720 			tp->tap_flags &= ~TAP_ASYNC;
721 		break;
722 
723 	case FIONREAD:
724 		*(int *)data = 0;
725 
726 		/* Take a look at devq first */
727 		IF_POLL(&tp->tap_devq, mb);
728 		if (mb == NULL)
729 			mb = ifq_poll(&ifp->if_snd);
730 
731 		if (mb != NULL) {
732 			for(; mb != NULL; mb = mb->m_next)
733 				*(int *)data += mb->m_len;
734 		}
735 		break;
736 
737 	case FIOSETOWN:
738 		error = fsetown(*(int *)data, &tp->tap_sigio);
739 		break;
740 
741 	case FIOGETOWN:
742 		*(int *)data = fgetown(tp->tap_sigio);
743 		break;
744 
745 	/* this is deprecated, FIOSETOWN should be used instead */
746 	case TIOCSPGRP:
747 		error = fsetown(-(*(int *)data), &tp->tap_sigio);
748 		break;
749 
750 	/* this is deprecated, FIOGETOWN should be used instead */
751 	case TIOCGPGRP:
752 		*(int *)data = -fgetown(tp->tap_sigio);
753 		break;
754 
755 	/* VMware/VMnet port ioctl's */
756 
757 	case SIOCGIFFLAGS:	/* get ifnet flags */
758 		bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
759 		break;
760 
761 	case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
762 		f = *(short *)data;
763 		f &= 0x0fff;
764 		f &= ~IFF_CANTCHANGE;
765 		f |= IFF_UP;
766 		ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
767 		break;
768 
769 	case OSIOCGIFADDR:	/* get MAC address of the remote side */
770 	case SIOCGIFADDR:
771 		bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
772 		break;
773 
774 	case SIOCSIFADDR:	/* set MAC address of the remote side */
775 		bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
776 		break;
777 
778 	default:
779 		error = ENOTTY;
780 		break;
781 	}
782 	ifnet_deserialize_all(ifp);
783 	return (error);
784 }
785 
786 
787 /*
788  * tapread
789  *
790  * The ops read interface - reads a packet at a time, or at
791  * least as much of a packet as can be read.
792  *
793  * Called from the fileops interface with nothing held.
794  *
795  * MPSAFE
796  */
797 static int
798 tapread(struct dev_read_args *ap)
799 {
800 	cdev_t dev = ap->a_head.a_dev;
801 	struct uio *uio = ap->a_uio;
802 	struct tap_softc	*tp = dev->si_drv1;
803 	struct ifnet		*ifp = &tp->tap_if;
804 	struct mbuf		*m0 = NULL;
805 	int			 error = 0, len;
806 
807 	TAPDEBUG(ifp, "reading, minor = %#x\n", minor(tp->tap_dev));
808 
809 	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
810 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
811 			 minor(tp->tap_dev), tp->tap_flags);
812 
813 		return (EHOSTDOWN);
814 	}
815 
816 	tp->tap_flags &= ~TAP_RWAIT;
817 
818 	/* sleep until we get a packet */
819 	do {
820 		ifnet_serialize_all(ifp);
821 		IF_DEQUEUE(&tp->tap_devq, m0);
822 		if (m0 == NULL) {
823 			if (ap->a_ioflag & IO_NDELAY) {
824 				ifnet_deserialize_all(ifp);
825 				return (EWOULDBLOCK);
826 			}
827 			tp->tap_flags |= TAP_RWAIT;
828 			tsleep_interlock(tp, PCATCH);
829 			ifnet_deserialize_all(ifp);
830 			error = tsleep(tp, PCATCH | PINTERLOCKED, "taprd", 0);
831 			if (error)
832 				return (error);
833 		} else {
834 			ifnet_deserialize_all(ifp);
835 		}
836 	} while (m0 == NULL);
837 
838 	BPF_MTAP(ifp, m0);
839 
840 	/* xfer packet to user space */
841 	while ((m0 != NULL) && (uio->uio_resid > 0) && (error == 0)) {
842 		len = (int)szmin(uio->uio_resid, m0->m_len);
843 		if (len == 0)
844 			break;
845 
846 		error = uiomove(mtod(m0, caddr_t), (size_t)len, uio);
847 		m0 = m_free(m0);
848 	}
849 
850 	if (m0 != NULL) {
851 		TAPDEBUG(ifp, "dropping mbuf, minor = %#x\n",
852 			 minor(tp->tap_dev));
853 		m_freem(m0);
854 	}
855 
856 	return (error);
857 }
858 
859 /*
860  * tapwrite
861  *
862  * The ops write interface - an atomic write is a packet - or else!
863  *
864  * Called from the fileops interface with nothing held.
865  *
866  * MPSAFE
867  */
868 static int
869 tapwrite(struct dev_write_args *ap)
870 {
871 	cdev_t dev = ap->a_head.a_dev;
872 	struct uio *uio = ap->a_uio;
873 	struct tap_softc	*tp = dev->si_drv1;
874 	struct ifnet		*ifp = &tp->tap_if;
875 	struct mbuf		*top = NULL, **mp = NULL, *m = NULL;
876 	int			error = 0;
877 	size_t			tlen, mlen;
878 
879 	TAPDEBUG(ifp, "writing, minor = %#x\n", minor(tp->tap_dev));
880 
881 	if ((tp->tap_flags & TAP_READY) != TAP_READY) {
882 		TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
883 			 minor(tp->tap_dev), tp->tap_flags);
884 		return (EHOSTDOWN);
885 	}
886 
887 	if (uio->uio_resid == 0)
888 		return (0);
889 
890 	if (uio->uio_resid > TAPMRU) {
891 		TAPDEBUG(ifp, "invalid packet len = %zu, minor = %#x\n",
892 			 uio->uio_resid, minor(tp->tap_dev));
893 
894 		return (EIO);
895 	}
896 	tlen = uio->uio_resid;
897 
898 	/* get a header mbuf */
899 	MGETHDR(m, MB_DONTWAIT, MT_DATA);
900 	if (m == NULL)
901 		return (ENOBUFS);
902 	mlen = MHLEN;
903 
904 	top = 0;
905 	mp = &top;
906 	while ((error == 0) && (uio->uio_resid > 0)) {
907 		m->m_len = (int)szmin(mlen, uio->uio_resid);
908 		error = uiomove(mtod(m, caddr_t), (size_t)m->m_len, uio);
909 		*mp = m;
910 		mp = &m->m_next;
911 		if (uio->uio_resid > 0) {
912 			MGET(m, MB_DONTWAIT, MT_DATA);
913 			if (m == NULL) {
914 				error = ENOBUFS;
915 				break;
916 			}
917 			mlen = MLEN;
918 		}
919 	}
920 	if (error) {
921 		ifp->if_ierrors ++;
922 		if (top)
923 			m_freem(top);
924 		return (error);
925 	}
926 
927 	top->m_pkthdr.len = (int)tlen;
928 	top->m_pkthdr.rcvif = ifp;
929 
930 	/*
931 	 * Ethernet bridge and bpf are handled in ether_input
932 	 *
933 	 * adjust mbuf and give packet to the ether_input
934 	 */
935 	ifnet_serialize_all(ifp);
936 	ifp->if_input(ifp, top);
937 	ifp->if_ipackets ++; /* ibytes are counted in ether_input */
938 	ifnet_deserialize_all(ifp);
939 
940 	return (0);
941 }
942 
943 /*
944  * tappoll
945  *
946  * The poll interface, this is only useful on reads really. The write
947  * detect always returns true, write never blocks anyway, it either
948  * accepts the packet or drops it
949  *
950  * Called from the fileops interface with nothing held.
951  *
952  * MPSAFE
953  */
954 static int
955 tappoll(struct dev_poll_args *ap)
956 {
957 	cdev_t dev = ap->a_head.a_dev;
958 	struct tap_softc	*tp = dev->si_drv1;
959 	struct ifnet		*ifp = &tp->tap_if;
960 	int		 	 revents = 0;
961 
962 	TAPDEBUG(ifp, "polling, minor = %#x\n", minor(tp->tap_dev));
963 
964 	if (ap->a_events & (POLLIN | POLLRDNORM)) {
965 		if (!IF_QEMPTY(&tp->tap_devq)) {
966 			TAPDEBUG(ifp,
967 				 "has data in queue. minor = %#x\n",
968 				 minor(tp->tap_dev));
969 
970 			revents |= (ap->a_events & (POLLIN | POLLRDNORM));
971 		} else {
972 			TAPDEBUG(ifp, "waiting for data, minor = %#x\n",
973 				 minor(tp->tap_dev));
974 
975 			get_mplock();
976 			selrecord(curthread, &tp->tap_rsel);
977 			rel_mplock();
978 		}
979 	}
980 
981 	if (ap->a_events & (POLLOUT | POLLWRNORM))
982 		revents |= (ap->a_events & (POLLOUT | POLLWRNORM));
983 	ap->a_events = revents;
984 	return (0);
985 }
986 
987 /*
988  * tapkqfilter - called from the fileops interface with nothing held
989  *
990  * MPSAFE
991  */
992 static int filt_tapread(struct knote *kn, long hint);
993 static void filt_tapdetach(struct knote *kn);
994 static struct filterops tapread_filtops =
995 	{ 1, NULL, filt_tapdetach, filt_tapread };
996 
997 static int
998 tapkqfilter(struct dev_kqfilter_args *ap)
999 {
1000 	cdev_t dev = ap->a_head.a_dev;
1001 	struct knote *kn = ap->a_kn;
1002 	struct tap_softc *tp;
1003 	struct klist *list;
1004 	struct ifnet *ifp;
1005 
1006 	get_mplock();
1007 	tp = dev->si_drv1;
1008 	ifp = &tp->tap_if;
1009 	ap->a_result =0;
1010 
1011 	switch(kn->kn_filter) {
1012 	case EVFILT_READ:
1013 		list = &tp->tap_rsel.si_note;
1014 		kn->kn_fop = &tapread_filtops;
1015 		kn->kn_hook = (void *)tp;
1016 		break;
1017 	case EVFILT_WRITE:
1018 		/* fall through */
1019 	default:
1020 		ap->a_result = 1;
1021 		rel_mplock();
1022 		return(0);
1023 	}
1024 	crit_enter();
1025 	SLIST_INSERT_HEAD(list, kn, kn_selnext);
1026 	crit_exit();
1027 	rel_mplock();
1028 	return(0);
1029 }
1030 
1031 static int
1032 filt_tapread(struct knote *kn, long hint)
1033 {
1034 	struct tap_softc *tp = (void *)kn->kn_hook;
1035 
1036 	if (IF_QEMPTY(&tp->tap_devq) == 0)	/* XXX serializer */
1037 		return(1);
1038 	else
1039 		return(0);
1040 }
1041 
1042 static void
1043 filt_tapdetach(struct knote *kn)
1044 {
1045 	struct tap_softc *tp = (void *)kn->kn_hook;
1046 
1047 	SLIST_REMOVE(&tp->tap_rsel.si_note, kn, knote, kn_selnext);
1048 }
1049 
1050 static void
1051 tapifstop(struct tap_softc *tp, int clear_flags)
1052 {
1053 	struct ifnet *ifp = &tp->tap_if;
1054 
1055 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1056 	IF_DRAIN(&tp->tap_devq);
1057 	tp->tap_flags &= ~TAP_CLOSEDOWN;
1058 	if (clear_flags)
1059 		ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1060 }
1061 
1062 static void
1063 tapifflags(struct tap_softc *tp)
1064 {
1065 	struct ifnet *ifp = &tp->arpcom.ac_if;
1066 
1067 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1068 	if ((tp->tap_flags & TAP_VMNET) == 0) {
1069 		/*
1070 		 * Only for non-vmnet tap(4)
1071 		 */
1072 		if (ifp->if_flags & IFF_UP) {
1073 			if ((ifp->if_flags & IFF_RUNNING) == 0)
1074 				tapifinit(tp);
1075 		} else {
1076 			tapifstop(tp, 1);
1077 		}
1078 	} else {
1079 		/* XXX */
1080 	}
1081 }
1082