xref: /dragonfly/sys/net/bpf.c (revision 820c5b08)
1 /*
2  * Copyright (c) 1990, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from the Stanford/CMU enet packet filter,
6  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8  * Berkeley Laboratory.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)bpf.c	8.2 (Berkeley) 3/28/94
35  *
36  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.12 2002/04/14 21:41:48 luigi Exp $
37  */
38 
39 #include "use_bpf.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/conf.h>
44 #include <sys/device.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/time.h>
48 #include <sys/proc.h>
49 #include <sys/signalvar.h>
50 #include <sys/filio.h>
51 #include <sys/sockio.h>
52 #include <sys/ttycom.h>
53 #include <sys/filedesc.h>
54 
55 #include <sys/event.h>
56 
57 #include <sys/socket.h>
58 #include <sys/vnode.h>
59 
60 #include <sys/thread2.h>
61 
62 #include <net/if.h>
63 #include <net/bpf.h>
64 #include <net/bpfdesc.h>
65 #include <net/netmsg2.h>
66 #include <net/netisr2.h>
67 
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <sys/kernel.h>
71 #include <sys/sysctl.h>
72 
73 #include <netproto/802_11/ieee80211_dragonfly.h>
74 
75 #include <sys/devfs.h>
76 
77 struct netmsg_bpf_output {
78 	struct netmsg_base base;
79 	struct mbuf	*nm_mbuf;
80 	struct ifnet	*nm_ifp;
81 	struct sockaddr	*nm_dst;
82 };
83 
84 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
85 DEVFS_DECLARE_CLONE_BITMAP(bpf);
86 
87 #if NBPF <= 1
88 #define BPF_PREALLOCATED_UNITS	4
89 #else
90 #define BPF_PREALLOCATED_UNITS	NBPF
91 #endif
92 
93 #if NBPF > 0
94 
95 /*
96  * The default read buffer size is patchable.
97  */
98 static int bpf_bufsize = BPF_DEFAULTBUFSIZE;
99 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW,
100    &bpf_bufsize, 0, "Current size of bpf buffer");
101 int bpf_maxbufsize = BPF_MAXBUFSIZE;
102 SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW,
103    &bpf_maxbufsize, 0, "Maximum size of bpf buffer");
104 
105 /*
106  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
107  */
108 static struct bpf_if	*bpf_iflist;
109 
110 static struct lwkt_token bpf_token = LWKT_TOKEN_INITIALIZER(bpf_token);
111 
112 static int	bpf_allocbufs(struct bpf_d *);
113 static void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
114 static void	bpf_detachd(struct bpf_d *d);
115 static void	bpf_resetd(struct bpf_d *);
116 static void	bpf_freed(struct bpf_d *);
117 static void	bpf_mcopy(const void *, void *, size_t);
118 static int	bpf_movein(struct uio *, int, struct mbuf **,
119 			   struct sockaddr *, int *, struct bpf_insn *);
120 static int	bpf_setif(struct bpf_d *, struct ifreq *);
121 static void	bpf_timed_out(void *);
122 static void	bpf_wakeup(struct bpf_d *);
123 static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
124 			    void (*)(const void *, void *, size_t),
125 			    const struct timeval *);
126 static int	bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
127 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
128 static int	bpf_setdlt(struct bpf_d *, u_int);
129 static void	bpf_drvinit(void *unused);
130 static void	bpf_filter_detach(struct knote *kn);
131 static int	bpf_filter_read(struct knote *kn, long hint);
132 
133 static d_open_t		bpfopen;
134 static d_clone_t	bpfclone;
135 static d_close_t	bpfclose;
136 static d_read_t		bpfread;
137 static d_write_t	bpfwrite;
138 static d_ioctl_t	bpfioctl;
139 static d_kqfilter_t	bpfkqfilter;
140 
141 #define CDEV_MAJOR 23
142 static struct dev_ops bpf_ops = {
143 	{ "bpf", 0, D_MPSAFE },
144 	.d_open =	bpfopen,
145 	.d_close =	bpfclose,
146 	.d_read =	bpfread,
147 	.d_write =	bpfwrite,
148 	.d_ioctl =	bpfioctl,
149 	.d_kqfilter =	bpfkqfilter
150 };
151 
152 
153 static int
154 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
155 	   struct sockaddr *sockp, int *datlen, struct bpf_insn *wfilter)
156 {
157 	const struct ieee80211_bpf_params *p;
158 	struct mbuf *m;
159 	int error;
160 	int len;
161 	int hlen;
162 	int slen;
163 
164 	*datlen = 0;
165 	*mp = NULL;
166 
167 	/*
168 	 * Build a sockaddr based on the data link layer type.
169 	 * We do this at this level because the ethernet header
170 	 * is copied directly into the data field of the sockaddr.
171 	 * In the case of SLIP, there is no header and the packet
172 	 * is forwarded as is.
173 	 * Also, we are careful to leave room at the front of the mbuf
174 	 * for the link level header.
175 	 */
176 	switch (linktype) {
177 	case DLT_SLIP:
178 		sockp->sa_family = AF_INET;
179 		hlen = 0;
180 		break;
181 
182 	case DLT_EN10MB:
183 		sockp->sa_family = AF_UNSPEC;
184 		/* XXX Would MAXLINKHDR be better? */
185 		hlen = sizeof(struct ether_header);
186 		break;
187 
188 	case DLT_RAW:
189 	case DLT_NULL:
190 		sockp->sa_family = AF_UNSPEC;
191 		hlen = 0;
192 		break;
193 
194 	case DLT_ATM_RFC1483:
195 		/*
196 		 * en atm driver requires 4-byte atm pseudo header.
197 		 * though it isn't standard, vpi:vci needs to be
198 		 * specified anyway.
199 		 */
200 		sockp->sa_family = AF_UNSPEC;
201 		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
202 		break;
203 
204 	case DLT_PPP:
205 		sockp->sa_family = AF_UNSPEC;
206 		hlen = 4;	/* This should match PPP_HDRLEN */
207 		break;
208 
209 	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
210 		sockp->sa_family = AF_IEEE80211;
211 		hlen = 0;
212 		break;
213 
214 	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
215 		sockp->sa_family = AF_IEEE80211;
216 		sockp->sa_len = 12;	/* XXX != 0 */
217 		hlen = sizeof(struct ieee80211_bpf_params);
218 		break;
219 
220 	default:
221 		return(EIO);
222 	}
223 
224 	len = uio->uio_resid;
225 	*datlen = len - hlen;
226 	if ((unsigned)len > MCLBYTES)
227 		return(EIO);
228 
229 	m = m_getl(len, M_WAITOK, MT_DATA, M_PKTHDR, NULL);
230 	if (m == NULL)
231 		return(ENOBUFS);
232 	m->m_pkthdr.len = m->m_len = len;
233 	m->m_pkthdr.rcvif = NULL;
234 	*mp = m;
235 
236 	if (m->m_len < hlen) {
237 		error = EPERM;
238 		goto bad;
239 	}
240 
241 	error = uiomove(mtod(m, u_char *), len, uio);
242 	if (error)
243 		goto bad;
244 
245 	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
246 	if (slen == 0) {
247 		error = EPERM;
248 		goto bad;
249 	}
250 
251 	/*
252 	 * Make room for link header, and copy it to sockaddr.
253 	 */
254 	if (hlen != 0) {
255 		if (sockp->sa_family == AF_IEEE80211) {
256 			/*
257 			 * Collect true length from the parameter header
258 			 * NB: sockp is known to be zero'd so if we do a
259 			 *     short copy unspecified parameters will be
260 			 *     zero.
261 			 * NB: packet may not be aligned after stripping
262 			 *     bpf params
263 			 * XXX check ibp_vers
264 			 */
265 			p = mtod(m, const struct ieee80211_bpf_params *);
266 			hlen = p->ibp_len;
267 			if (hlen > sizeof(sockp->sa_data)) {
268 				error = EINVAL;
269 				goto bad;
270 			}
271 		}
272 		bcopy(m->m_data, sockp->sa_data, hlen);
273 		m->m_pkthdr.len -= hlen;
274 		m->m_len -= hlen;
275 		m->m_data += hlen; /* XXX */
276 	}
277 	return (0);
278 bad:
279 	m_freem(m);
280 	return(error);
281 }
282 
283 /*
284  * Attach file to the bpf interface, i.e. make d listen on bp.
285  * Must be called at splimp.
286  */
287 static void
288 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
289 {
290 	/*
291 	 * Point d at bp, and add d to the interface's list of listeners.
292 	 * Finally, point the driver's bpf cookie at the interface so
293 	 * it will divert packets to bpf.
294 	 */
295 	lwkt_gettoken(&bpf_token);
296 	d->bd_bif = bp;
297 	SLIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
298 	*bp->bif_driverp = bp;
299 
300 	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
301 	lwkt_reltoken(&bpf_token);
302 }
303 
304 /*
305  * Detach a file from its interface.
306  */
307 static void
308 bpf_detachd(struct bpf_d *d)
309 {
310 	int error;
311 	struct bpf_if *bp;
312 	struct ifnet *ifp;
313 
314 	lwkt_gettoken(&bpf_token);
315 	bp = d->bd_bif;
316 	ifp = bp->bif_ifp;
317 
318 	/* Remove d from the interface's descriptor list. */
319 	SLIST_REMOVE(&bp->bif_dlist, d, bpf_d, bd_next);
320 
321 	if (SLIST_EMPTY(&bp->bif_dlist)) {
322 		/*
323 		 * Let the driver know that there are no more listeners.
324 		 */
325 		*bp->bif_driverp = NULL;
326 	}
327 	d->bd_bif = NULL;
328 
329 	EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
330 
331 	/*
332 	 * Check if this descriptor had requested promiscuous mode.
333 	 * If so, turn it off.
334 	 */
335 	if (d->bd_promisc) {
336 		d->bd_promisc = 0;
337 		error = ifpromisc(ifp, 0);
338 		if (error != 0 && error != ENXIO) {
339 			/*
340 			 * ENXIO can happen if a pccard is unplugged,
341 			 * Something is really wrong if we were able to put
342 			 * the driver into promiscuous mode, but can't
343 			 * take it out.
344 			 */
345 			if_printf(ifp, "bpf_detach: ifpromisc failed(%d)\n",
346 				  error);
347 		}
348 	}
349 	lwkt_reltoken(&bpf_token);
350 }
351 
352 /*
353  * Open ethernet device.  Returns ENXIO for illegal minor device number,
354  * EBUSY if file is open by another process.
355  */
356 /* ARGSUSED */
357 static int
358 bpfopen(struct dev_open_args *ap)
359 {
360 	cdev_t dev = ap->a_head.a_dev;
361 	struct bpf_d *d;
362 
363 	lwkt_gettoken(&bpf_token);
364 	if (ap->a_cred->cr_prison) {
365 		lwkt_reltoken(&bpf_token);
366 		return(EPERM);
367 	}
368 
369 	d = dev->si_drv1;
370 	/*
371 	 * Each minor can be opened by only one process.  If the requested
372 	 * minor is in use, return EBUSY.
373 	 */
374 	if (d != NULL) {
375 		lwkt_reltoken(&bpf_token);
376 		return(EBUSY);
377 	}
378 
379 	d = kmalloc(sizeof *d, M_BPF, M_WAITOK | M_ZERO);
380 	dev->si_drv1 = d;
381 	d->bd_bufsize = bpf_bufsize;
382 	d->bd_sig = SIGIO;
383 	d->bd_seesent = 1;
384 	callout_init(&d->bd_callout);
385 	lwkt_reltoken(&bpf_token);
386 
387 	return(0);
388 }
389 
390 static int
391 bpfclone(struct dev_clone_args *ap)
392 {
393 	int unit;
394 
395 	unit = devfs_clone_bitmap_get(&DEVFS_CLONE_BITMAP(bpf), 0);
396 	ap->a_dev = make_only_dev(&bpf_ops, unit, 0, 0, 0600, "bpf%d", unit);
397 
398 	return 0;
399 }
400 
401 /*
402  * Close the descriptor by detaching it from its interface,
403  * deallocating its buffers, and marking it free.
404  */
405 /* ARGSUSED */
406 static int
407 bpfclose(struct dev_close_args *ap)
408 {
409 	cdev_t dev = ap->a_head.a_dev;
410 	struct bpf_d *d = dev->si_drv1;
411 
412 	lwkt_gettoken(&bpf_token);
413 	funsetown(&d->bd_sigio);
414 	if (d->bd_state == BPF_WAITING)
415 		callout_stop(&d->bd_callout);
416 	d->bd_state = BPF_IDLE;
417 	if (d->bd_bif != NULL)
418 		bpf_detachd(d);
419 	bpf_freed(d);
420 	dev->si_drv1 = NULL;
421 	if (dev->si_uminor >= BPF_PREALLOCATED_UNITS) {
422 		devfs_clone_bitmap_put(&DEVFS_CLONE_BITMAP(bpf), dev->si_uminor);
423 		destroy_dev(dev);
424 	}
425 	kfree(d, M_BPF);
426 	lwkt_reltoken(&bpf_token);
427 
428 	return(0);
429 }
430 
431 /*
432  * Rotate the packet buffers in descriptor d.  Move the store buffer
433  * into the hold slot, and the free buffer into the store slot.
434  * Zero the length of the new store buffer.
435  */
436 #define ROTATE_BUFFERS(d) \
437 	(d)->bd_hbuf = (d)->bd_sbuf; \
438 	(d)->bd_hlen = (d)->bd_slen; \
439 	(d)->bd_sbuf = (d)->bd_fbuf; \
440 	(d)->bd_slen = 0; \
441 	(d)->bd_fbuf = NULL;
442 /*
443  *  bpfread - read next chunk of packets from buffers
444  */
445 static int
446 bpfread(struct dev_read_args *ap)
447 {
448 	cdev_t dev = ap->a_head.a_dev;
449 	struct bpf_d *d = dev->si_drv1;
450 	int timed_out;
451 	int error;
452 
453 	lwkt_gettoken(&bpf_token);
454 	/*
455 	 * Restrict application to use a buffer the same size as
456 	 * as kernel buffers.
457 	 */
458 	if (ap->a_uio->uio_resid != d->bd_bufsize) {
459 		lwkt_reltoken(&bpf_token);
460 		return(EINVAL);
461 	}
462 
463 	if (d->bd_state == BPF_WAITING)
464 		callout_stop(&d->bd_callout);
465 	timed_out = (d->bd_state == BPF_TIMED_OUT);
466 	d->bd_state = BPF_IDLE;
467 	/*
468 	 * If the hold buffer is empty, then do a timed sleep, which
469 	 * ends when the timeout expires or when enough packets
470 	 * have arrived to fill the store buffer.
471 	 */
472 	while (d->bd_hbuf == NULL) {
473 		if ((d->bd_immediate || (ap->a_ioflag & IO_NDELAY) || timed_out)
474 		    && d->bd_slen != 0) {
475 			/*
476 			 * A packet(s) either arrived since the previous,
477 			 * We're in immediate mode, or are reading
478 			 * in non-blocking mode, and a packet(s)
479 			 * either arrived since the previous
480 			 * read or arrived while we were asleep.
481 			 * Rotate the buffers and return what's here.
482 			 */
483 			ROTATE_BUFFERS(d);
484 			break;
485 		}
486 
487 		/*
488 		 * No data is available, check to see if the bpf device
489 		 * is still pointed at a real interface.  If not, return
490 		 * ENXIO so that the userland process knows to rebind
491 		 * it before using it again.
492 		 */
493 		if (d->bd_bif == NULL) {
494 			lwkt_reltoken(&bpf_token);
495 			return(ENXIO);
496 		}
497 
498 		if (ap->a_ioflag & IO_NDELAY) {
499 			lwkt_reltoken(&bpf_token);
500 			return(EWOULDBLOCK);
501 		}
502 		error = tsleep(d, PCATCH, "bpf", d->bd_rtout);
503 		if (error == EINTR || error == ERESTART) {
504 			lwkt_reltoken(&bpf_token);
505 			return(error);
506 		}
507 		if (error == EWOULDBLOCK) {
508 			/*
509 			 * On a timeout, return what's in the buffer,
510 			 * which may be nothing.  If there is something
511 			 * in the store buffer, we can rotate the buffers.
512 			 */
513 			if (d->bd_hbuf)
514 				/*
515 				 * We filled up the buffer in between
516 				 * getting the timeout and arriving
517 				 * here, so we don't need to rotate.
518 				 */
519 				break;
520 
521 			if (d->bd_slen == 0) {
522 				lwkt_reltoken(&bpf_token);
523 				return(0);
524 			}
525 			ROTATE_BUFFERS(d);
526 			break;
527 		}
528 	}
529 	/*
530 	 * At this point, we know we have something in the hold slot.
531 	 */
532 
533 	/*
534 	 * Move data from hold buffer into user space.
535 	 * We know the entire buffer is transferred since
536 	 * we checked above that the read buffer is bpf_bufsize bytes.
537 	 */
538 	error = uiomove(d->bd_hbuf, d->bd_hlen, ap->a_uio);
539 
540 	d->bd_fbuf = d->bd_hbuf;
541 	d->bd_hbuf = NULL;
542 	d->bd_hlen = 0;
543 	lwkt_reltoken(&bpf_token);
544 
545 	return(error);
546 }
547 
548 
549 /*
550  * If there are processes sleeping on this descriptor, wake them up.
551  */
552 static void
553 bpf_wakeup(struct bpf_d *d)
554 {
555 	if (d->bd_state == BPF_WAITING) {
556 		callout_stop(&d->bd_callout);
557 		d->bd_state = BPF_IDLE;
558 	}
559 	wakeup(d);
560 	if (d->bd_async && d->bd_sig && d->bd_sigio)
561 		pgsigio(d->bd_sigio, d->bd_sig, 0);
562 
563 	KNOTE(&d->bd_kq.ki_note, 0);
564 }
565 
566 static void
567 bpf_timed_out(void *arg)
568 {
569 	struct bpf_d *d = (struct bpf_d *)arg;
570 
571 	if (d->bd_state == BPF_WAITING) {
572 		d->bd_state = BPF_TIMED_OUT;
573 		if (d->bd_slen != 0)
574 			bpf_wakeup(d);
575 	}
576 }
577 
578 static void
579 bpf_output_dispatch(netmsg_t msg)
580 {
581 	struct netmsg_bpf_output *bmsg = (struct netmsg_bpf_output *)msg;
582 	struct ifnet *ifp = bmsg->nm_ifp;
583 	int error;
584 
585 	/*
586 	 * The driver frees the mbuf.
587 	 */
588 	error = ifp->if_output(ifp, bmsg->nm_mbuf, bmsg->nm_dst, NULL);
589 	lwkt_replymsg(&msg->lmsg, error);
590 }
591 
592 static int
593 bpfwrite(struct dev_write_args *ap)
594 {
595 	cdev_t dev = ap->a_head.a_dev;
596 	struct bpf_d *d = dev->si_drv1;
597 	struct ifnet *ifp;
598 	struct mbuf *m;
599 	int error, ret;
600 	struct sockaddr dst;
601 	int datlen;
602 	struct netmsg_bpf_output bmsg;
603 
604 	lwkt_gettoken(&bpf_token);
605 	if (d->bd_bif == NULL) {
606 		lwkt_reltoken(&bpf_token);
607 		return(ENXIO);
608 	}
609 
610 	ifp = d->bd_bif->bif_ifp;
611 
612 	if (ap->a_uio->uio_resid == 0) {
613 		lwkt_reltoken(&bpf_token);
614 		return(0);
615 	}
616 
617 	error = bpf_movein(ap->a_uio, (int)d->bd_bif->bif_dlt, &m,
618 			   &dst, &datlen, d->bd_wfilter);
619 	if (error) {
620 		lwkt_reltoken(&bpf_token);
621 		return(error);
622 	}
623 
624 	if (datlen > ifp->if_mtu) {
625 		m_freem(m);
626 		lwkt_reltoken(&bpf_token);
627 		return(EMSGSIZE);
628 	}
629 
630 	if (d->bd_hdrcmplt)
631 		dst.sa_family = pseudo_AF_HDRCMPLT;
632 
633 	netmsg_init(&bmsg.base, NULL, &curthread->td_msgport,
634 		    0, bpf_output_dispatch);
635 	bmsg.nm_mbuf = m;
636 	bmsg.nm_ifp = ifp;
637 	bmsg.nm_dst = &dst;
638 
639 	ret = lwkt_domsg(netisr_cpuport(0), &bmsg.base.lmsg, 0);
640 	lwkt_reltoken(&bpf_token);
641 
642 	return ret;
643 }
644 
645 /*
646  * Reset a descriptor by flushing its packet buffer and clearing the
647  * receive and drop counts.  Should be called at splimp.
648  */
649 static void
650 bpf_resetd(struct bpf_d *d)
651 {
652 	if (d->bd_hbuf) {
653 		/* Free the hold buffer. */
654 		d->bd_fbuf = d->bd_hbuf;
655 		d->bd_hbuf = NULL;
656 	}
657 	d->bd_slen = 0;
658 	d->bd_hlen = 0;
659 	d->bd_rcount = 0;
660 	d->bd_dcount = 0;
661 }
662 
663 /*
664  *  FIONREAD		Check for read packet available.
665  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
666  *  BIOCGBLEN		Get buffer len [for read()].
667  *  BIOCSETF		Set ethernet read filter.
668  *  BIOCSETWF		Set ethernet write filter.
669  *  BIOCFLUSH		Flush read packet buffer.
670  *  BIOCPROMISC		Put interface into promiscuous mode.
671  *  BIOCGDLT		Get link layer type.
672  *  BIOCGETIF		Get interface name.
673  *  BIOCSETIF		Set interface.
674  *  BIOCSRTIMEOUT	Set read timeout.
675  *  BIOCGRTIMEOUT	Get read timeout.
676  *  BIOCGSTATS		Get packet stats.
677  *  BIOCIMMEDIATE	Set immediate mode.
678  *  BIOCVERSION		Get filter language version.
679  *  BIOCGHDRCMPLT	Get "header already complete" flag
680  *  BIOCSHDRCMPLT	Set "header already complete" flag
681  *  BIOCGSEESENT	Get "see packets sent" flag
682  *  BIOCSSEESENT	Set "see packets sent" flag
683  *  BIOCLOCK		Set "locked" flag
684  */
685 /* ARGSUSED */
686 static int
687 bpfioctl(struct dev_ioctl_args *ap)
688 {
689 	cdev_t dev = ap->a_head.a_dev;
690 	struct bpf_d *d = dev->si_drv1;
691 	int error = 0;
692 
693 	lwkt_gettoken(&bpf_token);
694 	if (d->bd_state == BPF_WAITING)
695 		callout_stop(&d->bd_callout);
696 	d->bd_state = BPF_IDLE;
697 
698 	if (d->bd_locked == 1) {
699 		switch (ap->a_cmd) {
700 		case BIOCGBLEN:
701 		case BIOCFLUSH:
702 		case BIOCGDLT:
703 		case BIOCGDLTLIST:
704 		case BIOCGETIF:
705 		case BIOCGRTIMEOUT:
706 		case BIOCGSTATS:
707 		case BIOCVERSION:
708 		case BIOCGRSIG:
709 		case BIOCGHDRCMPLT:
710 		case FIONREAD:
711 		case BIOCLOCK:
712 		case BIOCSRTIMEOUT:
713 		case BIOCIMMEDIATE:
714 		case TIOCGPGRP:
715 			break;
716 		default:
717 			lwkt_reltoken(&bpf_token);
718 			return (EPERM);
719 		}
720 	}
721 	switch (ap->a_cmd) {
722 	default:
723 		error = EINVAL;
724 		break;
725 
726 	/*
727 	 * Check for read packet available.
728 	 */
729 	case FIONREAD:
730 		{
731 			int n;
732 
733 			n = d->bd_slen;
734 			if (d->bd_hbuf)
735 				n += d->bd_hlen;
736 
737 			*(int *)ap->a_data = n;
738 			break;
739 		}
740 
741 	case SIOCGIFADDR:
742 		{
743 			struct ifnet *ifp;
744 
745 			if (d->bd_bif == NULL) {
746 				error = EINVAL;
747 			} else {
748 				ifp = d->bd_bif->bif_ifp;
749 				ifnet_serialize_all(ifp);
750 				error = ifp->if_ioctl(ifp, ap->a_cmd,
751 						      ap->a_data, ap->a_cred);
752 				ifnet_deserialize_all(ifp);
753 			}
754 			break;
755 		}
756 
757 	/*
758 	 * Get buffer len [for read()].
759 	 */
760 	case BIOCGBLEN:
761 		*(u_int *)ap->a_data = d->bd_bufsize;
762 		break;
763 
764 	/*
765 	 * Set buffer length.
766 	 */
767 	case BIOCSBLEN:
768 		if (d->bd_bif != NULL) {
769 			error = EINVAL;
770 		} else {
771 			u_int size = *(u_int *)ap->a_data;
772 
773 			if (size > bpf_maxbufsize)
774 				*(u_int *)ap->a_data = size = bpf_maxbufsize;
775 			else if (size < BPF_MINBUFSIZE)
776 				*(u_int *)ap->a_data = size = BPF_MINBUFSIZE;
777 			d->bd_bufsize = size;
778 		}
779 		break;
780 
781 	/*
782 	 * Set link layer read filter.
783 	 */
784 	case BIOCSETF:
785 	case BIOCSETWF:
786 		error = bpf_setf(d, (struct bpf_program *)ap->a_data,
787 			ap->a_cmd);
788 		break;
789 
790 	/*
791 	 * Flush read packet buffer.
792 	 */
793 	case BIOCFLUSH:
794 		bpf_resetd(d);
795 		break;
796 
797 	/*
798 	 * Put interface into promiscuous mode.
799 	 */
800 	case BIOCPROMISC:
801 		if (d->bd_bif == NULL) {
802 			/*
803 			 * No interface attached yet.
804 			 */
805 			error = EINVAL;
806 			break;
807 		}
808 		if (d->bd_promisc == 0) {
809 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
810 			if (error == 0)
811 				d->bd_promisc = 1;
812 		}
813 		break;
814 
815 	/*
816 	 * Get device parameters.
817 	 */
818 	case BIOCGDLT:
819 		if (d->bd_bif == NULL)
820 			error = EINVAL;
821 		else
822 			*(u_int *)ap->a_data = d->bd_bif->bif_dlt;
823 		break;
824 
825 	/*
826 	 * Get a list of supported data link types.
827 	 */
828 	case BIOCGDLTLIST:
829 		if (d->bd_bif == NULL) {
830 			error = EINVAL;
831 		} else {
832 			error = bpf_getdltlist(d,
833 				(struct bpf_dltlist *)ap->a_data);
834 		}
835 		break;
836 
837 	/*
838 	 * Set data link type.
839 	 */
840 	case BIOCSDLT:
841 		if (d->bd_bif == NULL)
842 			error = EINVAL;
843 		else
844 			error = bpf_setdlt(d, *(u_int *)ap->a_data);
845 		break;
846 
847 	/*
848 	 * Get interface name.
849 	 */
850 	case BIOCGETIF:
851 		if (d->bd_bif == NULL) {
852 			error = EINVAL;
853 		} else {
854 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
855 			struct ifreq *const ifr = (struct ifreq *)ap->a_data;
856 
857 			strlcpy(ifr->ifr_name, ifp->if_xname,
858 				sizeof ifr->ifr_name);
859 		}
860 		break;
861 
862 	/*
863 	 * Set interface.
864 	 */
865 	case BIOCSETIF:
866 		error = bpf_setif(d, (struct ifreq *)ap->a_data);
867 		break;
868 
869 	/*
870 	 * Set read timeout.
871 	 */
872 	case BIOCSRTIMEOUT:
873 		{
874 			struct timeval *tv = (struct timeval *)ap->a_data;
875 
876 			/*
877 			 * Subtract 1 tick from tvtohz() since this isn't
878 			 * a one-shot timer.
879 			 */
880 			if ((error = itimerfix(tv)) == 0)
881 				d->bd_rtout = tvtohz_low(tv);
882 			break;
883 		}
884 
885 	/*
886 	 * Get read timeout.
887 	 */
888 	case BIOCGRTIMEOUT:
889 		{
890 			struct timeval *tv = (struct timeval *)ap->a_data;
891 
892 			tv->tv_sec = d->bd_rtout / hz;
893 			tv->tv_usec = (d->bd_rtout % hz) * ustick;
894 			break;
895 		}
896 
897 	/*
898 	 * Get packet stats.
899 	 */
900 	case BIOCGSTATS:
901 		{
902 			struct bpf_stat *bs = (struct bpf_stat *)ap->a_data;
903 
904 			bs->bs_recv = d->bd_rcount;
905 			bs->bs_drop = d->bd_dcount;
906 			break;
907 		}
908 
909 	/*
910 	 * Set immediate mode.
911 	 */
912 	case BIOCIMMEDIATE:
913 		d->bd_immediate = *(u_int *)ap->a_data;
914 		break;
915 
916 	case BIOCVERSION:
917 		{
918 			struct bpf_version *bv = (struct bpf_version *)ap->a_data;
919 
920 			bv->bv_major = BPF_MAJOR_VERSION;
921 			bv->bv_minor = BPF_MINOR_VERSION;
922 			break;
923 		}
924 
925 	/*
926 	 * Get "header already complete" flag
927 	 */
928 	case BIOCGHDRCMPLT:
929 		*(u_int *)ap->a_data = d->bd_hdrcmplt;
930 		break;
931 
932 	/*
933 	 * Set "header already complete" flag
934 	 */
935 	case BIOCSHDRCMPLT:
936 		d->bd_hdrcmplt = *(u_int *)ap->a_data ? 1 : 0;
937 		break;
938 
939 	/*
940 	 * Get "see sent packets" flag
941 	 */
942 	case BIOCGSEESENT:
943 		*(u_int *)ap->a_data = d->bd_seesent;
944 		break;
945 
946 	/*
947 	 * Set "see sent packets" flag
948 	 */
949 	case BIOCSSEESENT:
950 		d->bd_seesent = *(u_int *)ap->a_data;
951 		break;
952 
953 	case FIOASYNC:		/* Send signal on receive packets */
954 		d->bd_async = *(int *)ap->a_data;
955 		break;
956 
957 	case FIOSETOWN:
958 		error = fsetown(*(int *)ap->a_data, &d->bd_sigio);
959 		break;
960 
961 	case FIOGETOWN:
962 		*(int *)ap->a_data = fgetown(&d->bd_sigio);
963 		break;
964 
965 	/* This is deprecated, FIOSETOWN should be used instead. */
966 	case TIOCSPGRP:
967 		error = fsetown(-(*(int *)ap->a_data), &d->bd_sigio);
968 		break;
969 
970 	/* This is deprecated, FIOGETOWN should be used instead. */
971 	case TIOCGPGRP:
972 		*(int *)ap->a_data = -fgetown(&d->bd_sigio);
973 		break;
974 
975 	case BIOCSRSIG:		/* Set receive signal */
976 		{
977 			u_int sig;
978 
979 			sig = *(u_int *)ap->a_data;
980 
981 			if (sig >= NSIG)
982 				error = EINVAL;
983 			else
984 				d->bd_sig = sig;
985 			break;
986 		}
987 	case BIOCGRSIG:
988 		*(u_int *)ap->a_data = d->bd_sig;
989 		break;
990 	case BIOCLOCK:
991 		d->bd_locked = 1;
992 		break;
993 	}
994 	lwkt_reltoken(&bpf_token);
995 
996 	return(error);
997 }
998 
999 /*
1000  * Set d's packet filter program to fp.  If this file already has a filter,
1001  * free it and replace it.  Returns EINVAL for bogus requests.
1002  */
1003 static int
1004 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1005 {
1006 	struct bpf_insn *fcode, *old;
1007 	u_int wfilter, flen, size;
1008 
1009 	if (cmd == BIOCSETWF) {
1010 		old = d->bd_wfilter;
1011 		wfilter = 1;
1012 	} else {
1013 		wfilter = 0;
1014 		old = d->bd_rfilter;
1015 	}
1016 	if (fp->bf_insns == NULL) {
1017 		if (fp->bf_len != 0)
1018 			return(EINVAL);
1019 		if (wfilter)
1020 			d->bd_wfilter = NULL;
1021 		else
1022 			d->bd_rfilter = NULL;
1023 		bpf_resetd(d);
1024 		if (old != NULL)
1025 			kfree(old, M_BPF);
1026 		return(0);
1027 	}
1028 	flen = fp->bf_len;
1029 	if (flen > BPF_MAXINSNS)
1030 		return(EINVAL);
1031 
1032 	size = flen * sizeof *fp->bf_insns;
1033 	fcode = (struct bpf_insn *)kmalloc(size, M_BPF, M_WAITOK);
1034 	if (copyin(fp->bf_insns, fcode, size) == 0 &&
1035 	    bpf_validate(fcode, (int)flen)) {
1036 		if (wfilter)
1037 			d->bd_wfilter = fcode;
1038 		else
1039 			d->bd_rfilter = fcode;
1040 		bpf_resetd(d);
1041 		if (old != NULL)
1042 			kfree(old, M_BPF);
1043 
1044 		return(0);
1045 	}
1046 	kfree(fcode, M_BPF);
1047 	return(EINVAL);
1048 }
1049 
1050 /*
1051  * Detach a file from its current interface (if attached at all) and attach
1052  * to the interface indicated by the name stored in ifr.
1053  * Return an errno or 0.
1054  */
1055 static int
1056 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1057 {
1058 	struct bpf_if *bp;
1059 	int error;
1060 	struct ifnet *theywant;
1061 
1062 	ifnet_lock();
1063 
1064 	theywant = ifunit(ifr->ifr_name);
1065 	if (theywant == NULL) {
1066 		ifnet_unlock();
1067 		return(ENXIO);
1068 	}
1069 
1070 	/*
1071 	 * Look through attached interfaces for the named one.
1072 	 */
1073 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1074 		struct ifnet *ifp = bp->bif_ifp;
1075 
1076 		if (ifp == NULL || ifp != theywant)
1077 			continue;
1078 		/* skip additional entry */
1079 		if (bp->bif_driverp != &ifp->if_bpf)
1080 			continue;
1081 		/*
1082 		 * We found the requested interface.
1083 		 * Allocate the packet buffers if we need to.
1084 		 * If we're already attached to requested interface,
1085 		 * just flush the buffer.
1086 		 */
1087 		if (d->bd_sbuf == NULL) {
1088 			error = bpf_allocbufs(d);
1089 			if (error != 0) {
1090 				ifnet_unlock();
1091 				return(error);
1092 			}
1093 		}
1094 		if (bp != d->bd_bif) {
1095 			if (d->bd_bif != NULL) {
1096 				/*
1097 				 * Detach if attached to something else.
1098 				 */
1099 				bpf_detachd(d);
1100 			}
1101 
1102 			bpf_attachd(d, bp);
1103 		}
1104 		bpf_resetd(d);
1105 
1106 		ifnet_unlock();
1107 		return(0);
1108 	}
1109 
1110 	ifnet_unlock();
1111 
1112 	/* Not found. */
1113 	return(ENXIO);
1114 }
1115 
1116 static struct filterops bpf_read_filtops =
1117 	{ FILTEROP_ISFD, NULL, bpf_filter_detach, bpf_filter_read };
1118 
1119 static int
1120 bpfkqfilter(struct dev_kqfilter_args *ap)
1121 {
1122 	cdev_t dev = ap->a_head.a_dev;
1123 	struct knote *kn = ap->a_kn;
1124 	struct klist *klist;
1125 	struct bpf_d *d;
1126 
1127 	lwkt_gettoken(&bpf_token);
1128 	d = dev->si_drv1;
1129 	if (d->bd_bif == NULL) {
1130 		ap->a_result = 1;
1131 		lwkt_reltoken(&bpf_token);
1132 		return (0);
1133 	}
1134 
1135 	ap->a_result = 0;
1136 	switch (kn->kn_filter) {
1137 	case EVFILT_READ:
1138 		kn->kn_fop = &bpf_read_filtops;
1139 		kn->kn_hook = (caddr_t)d;
1140 		break;
1141 	default:
1142 		ap->a_result = EOPNOTSUPP;
1143 		lwkt_reltoken(&bpf_token);
1144 		return (0);
1145 	}
1146 
1147 	klist = &d->bd_kq.ki_note;
1148 	knote_insert(klist, kn);
1149 	lwkt_reltoken(&bpf_token);
1150 
1151 	return (0);
1152 }
1153 
1154 static void
1155 bpf_filter_detach(struct knote *kn)
1156 {
1157 	struct klist *klist;
1158 	struct bpf_d *d;
1159 
1160 	d = (struct bpf_d *)kn->kn_hook;
1161 	klist = &d->bd_kq.ki_note;
1162 	knote_remove(klist, kn);
1163 }
1164 
1165 static int
1166 bpf_filter_read(struct knote *kn, long hint)
1167 {
1168 	struct bpf_d *d;
1169 	int ready = 0;
1170 
1171 	d = (struct bpf_d *)kn->kn_hook;
1172 	if (d->bd_hlen != 0 ||
1173 	    ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1174 	    d->bd_slen != 0)) {
1175 		ready = 1;
1176 	} else {
1177 		/* Start the read timeout if necessary. */
1178 		if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1179 			callout_reset(&d->bd_callout, d->bd_rtout,
1180 			    bpf_timed_out, d);
1181 			d->bd_state = BPF_WAITING;
1182 		}
1183 	}
1184 
1185 	return (ready);
1186 }
1187 
1188 
1189 /*
1190  * Process the packet pkt of length pktlen.  The packet is parsed
1191  * by each listener's filter, and if accepted, stashed into the
1192  * corresponding buffer.
1193  */
1194 void
1195 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1196 {
1197 	struct bpf_d *d;
1198 	struct timeval tv;
1199 	int gottime = 0;
1200 	u_int slen;
1201 
1202 	lwkt_gettoken(&bpf_token);
1203 	/* Re-check */
1204 	if (bp == NULL) {
1205 		lwkt_reltoken(&bpf_token);
1206 		return;
1207 	}
1208 
1209 	/*
1210 	 * Note that the ipl does not have to be raised at this point.
1211 	 * The only problem that could arise here is that if two different
1212 	 * interfaces shared any data.  This is not the case.
1213 	 */
1214 	SLIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1215 		++d->bd_rcount;
1216 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1217 		if (slen != 0) {
1218 			if (!gottime) {
1219 				microtime(&tv);
1220 				gottime = 1;
1221 			}
1222 			catchpacket(d, pkt, pktlen, slen, ovbcopy, &tv);
1223 		}
1224 	}
1225 	lwkt_reltoken(&bpf_token);
1226 }
1227 
1228 /*
1229  * Copy data from an mbuf chain into a buffer.  This code is derived
1230  * from m_copydata in sys/uipc_mbuf.c.
1231  */
1232 static void
1233 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1234 {
1235 	const struct mbuf *m;
1236 	u_int count;
1237 	u_char *dst;
1238 
1239 	m = src_arg;
1240 	dst = dst_arg;
1241 	while (len > 0) {
1242 		if (m == NULL)
1243 			panic("bpf_mcopy");
1244 		count = min(m->m_len, len);
1245 		bcopy(mtod(m, void *), dst, count);
1246 		m = m->m_next;
1247 		dst += count;
1248 		len -= count;
1249 	}
1250 }
1251 
1252 /*
1253  * Process the packet in the mbuf chain m.  The packet is parsed by each
1254  * listener's filter, and if accepted, stashed into the corresponding
1255  * buffer.
1256  */
1257 void
1258 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1259 {
1260 	struct bpf_d *d;
1261 	u_int pktlen, slen;
1262 	struct timeval tv;
1263 	int gottime = 0;
1264 
1265 	lwkt_gettoken(&bpf_token);
1266 	/* Re-check */
1267 	if (bp == NULL) {
1268 		lwkt_reltoken(&bpf_token);
1269 		return;
1270 	}
1271 
1272 	/* Don't compute pktlen, if no descriptor is attached. */
1273 	if (SLIST_EMPTY(&bp->bif_dlist)) {
1274 		lwkt_reltoken(&bpf_token);
1275 		return;
1276 	}
1277 
1278 	pktlen = m_lengthm(m, NULL);
1279 
1280 	SLIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1281 		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1282 			continue;
1283 		++d->bd_rcount;
1284 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1285 		if (slen != 0) {
1286 			if (!gottime) {
1287 				microtime(&tv);
1288 				gottime = 1;
1289 			}
1290 			catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy,
1291 				    &tv);
1292 		}
1293 	}
1294 	lwkt_reltoken(&bpf_token);
1295 }
1296 
1297 /*
1298  * Incoming linkage from device drivers, where we have a mbuf chain
1299  * but need to prepend some arbitrary header from a linear buffer.
1300  *
1301  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
1302  * struct m_hdr on the stack.  This is safe as bpf only reads from the
1303  * fields in this header that we initialize, and will not try to free
1304  * it or keep a pointer to it.
1305  */
1306 void
1307 bpf_mtap_hdr(struct bpf_if *arg, caddr_t data, u_int dlen, struct mbuf *m,
1308     u_int direction)
1309 {
1310 	struct m_hdr mh;
1311 
1312 	mh.mh_flags = 0;
1313 	mh.mh_next = m;
1314 	mh.mh_len = dlen;
1315 	mh.mh_data = data;
1316 
1317 	bpf_mtap(arg, (struct mbuf *) &mh);
1318 }
1319 
1320 void
1321 bpf_mtap_family(struct bpf_if *bp, struct mbuf *m, sa_family_t family)
1322 {
1323 	u_int family4;
1324 
1325 	KKASSERT(family != AF_UNSPEC);
1326 
1327 	family4 = (u_int)family;
1328 	bpf_ptap(bp, m, &family4, sizeof(family4));
1329 }
1330 
1331 /*
1332  * Process the packet in the mbuf chain m with the header in m prepended.
1333  * The packet is parsed by each listener's filter, and if accepted,
1334  * stashed into the corresponding buffer.
1335  */
1336 void
1337 bpf_ptap(struct bpf_if *bp, struct mbuf *m, const void *data, u_int dlen)
1338 {
1339 	struct mbuf mb;
1340 
1341 	/*
1342 	 * Craft on-stack mbuf suitable for passing to bpf_mtap.
1343 	 * Note that we cut corners here; we only setup what's
1344 	 * absolutely needed--this mbuf should never go anywhere else.
1345 	 */
1346 	mb.m_next = m;
1347 	mb.m_data = __DECONST(void *, data); /* LINTED */
1348 	mb.m_len = dlen;
1349 	mb.m_pkthdr.rcvif = m->m_pkthdr.rcvif;
1350 
1351 	bpf_mtap(bp, &mb);
1352 }
1353 
1354 /*
1355  * Move the packet data from interface memory (pkt) into the
1356  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1357  * otherwise 0.  "copy" is the routine called to do the actual data
1358  * transfer.  bcopy is passed in to copy contiguous chunks, while
1359  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1360  * pkt is really an mbuf.
1361  */
1362 static void
1363 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1364 	    void (*cpfn)(const void *, void *, size_t),
1365 	    const struct timeval *tv)
1366 {
1367 	struct bpf_hdr *hp;
1368 	int totlen, curlen;
1369 	int hdrlen = d->bd_bif->bif_hdrlen;
1370 	int wakeup = 0;
1371 	/*
1372 	 * Figure out how many bytes to move.  If the packet is
1373 	 * greater or equal to the snapshot length, transfer that
1374 	 * much.  Otherwise, transfer the whole packet (unless
1375 	 * we hit the buffer size limit).
1376 	 */
1377 	totlen = hdrlen + min(snaplen, pktlen);
1378 	if (totlen > d->bd_bufsize)
1379 		totlen = d->bd_bufsize;
1380 
1381 	/*
1382 	 * Round up the end of the previous packet to the next longword.
1383 	 */
1384 	curlen = BPF_WORDALIGN(d->bd_slen);
1385 	if (curlen + totlen > d->bd_bufsize) {
1386 		/*
1387 		 * This packet will overflow the storage buffer.
1388 		 * Rotate the buffers if we can, then wakeup any
1389 		 * pending reads.
1390 		 */
1391 		if (d->bd_fbuf == NULL) {
1392 			/*
1393 			 * We haven't completed the previous read yet,
1394 			 * so drop the packet.
1395 			 */
1396 			++d->bd_dcount;
1397 			return;
1398 		}
1399 		ROTATE_BUFFERS(d);
1400 		wakeup = 1;
1401 		curlen = 0;
1402 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
1403 		/*
1404 		 * Immediate mode is set, or the read timeout has
1405 		 * already expired during a select call.  A packet
1406 		 * arrived, so the reader should be woken up.
1407 		 */
1408 		wakeup = 1;
1409 	}
1410 
1411 	/*
1412 	 * Append the bpf header.
1413 	 */
1414 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1415 	hp->bh_tstamp = *tv;
1416 	hp->bh_datalen = pktlen;
1417 	hp->bh_hdrlen = hdrlen;
1418 	/*
1419 	 * Copy the packet data into the store buffer and update its length.
1420 	 */
1421 	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1422 	d->bd_slen = curlen + totlen;
1423 
1424 	if (wakeup)
1425 		bpf_wakeup(d);
1426 }
1427 
1428 /*
1429  * Initialize all nonzero fields of a descriptor.
1430  */
1431 static int
1432 bpf_allocbufs(struct bpf_d *d)
1433 {
1434 	d->bd_fbuf = kmalloc(d->bd_bufsize, M_BPF, M_WAITOK);
1435 	d->bd_sbuf = kmalloc(d->bd_bufsize, M_BPF, M_WAITOK);
1436 	d->bd_slen = 0;
1437 	d->bd_hlen = 0;
1438 	return(0);
1439 }
1440 
1441 /*
1442  * Free buffers and packet filter program currently in use by a descriptor.
1443  * Called on close.
1444  */
1445 static void
1446 bpf_freed(struct bpf_d *d)
1447 {
1448 	/*
1449 	 * We don't need to lock out interrupts since this descriptor has
1450 	 * been detached from its interface and it yet hasn't been marked
1451 	 * free.
1452 	 */
1453 	if (d->bd_sbuf != NULL) {
1454 		kfree(d->bd_sbuf, M_BPF);
1455 		if (d->bd_hbuf != NULL)
1456 			kfree(d->bd_hbuf, M_BPF);
1457 		if (d->bd_fbuf != NULL)
1458 			kfree(d->bd_fbuf, M_BPF);
1459 	}
1460 	if (d->bd_rfilter)
1461 		kfree(d->bd_rfilter, M_BPF);
1462 	if (d->bd_wfilter)
1463 		kfree(d->bd_wfilter, M_BPF);
1464 }
1465 
1466 /*
1467  * Attach an interface to bpf.  ifp is a pointer to the structure
1468  * defining the interface to be attached, dlt is the link layer type,
1469  * and hdrlen is the fixed size of the link header (variable length
1470  * headers are not yet supported).
1471  */
1472 void
1473 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1474 {
1475 	bpfattach_dlt(ifp, dlt, hdrlen, &ifp->if_bpf);
1476 }
1477 
1478 void
1479 bpfattach_dlt(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1480 {
1481 	struct bpf_if *bp;
1482 
1483 	bp = kmalloc(sizeof *bp, M_BPF, M_WAITOK | M_ZERO);
1484 
1485 	lwkt_gettoken(&bpf_token);
1486 
1487 	SLIST_INIT(&bp->bif_dlist);
1488 	bp->bif_ifp = ifp;
1489 	bp->bif_dlt = dlt;
1490 	bp->bif_driverp = driverp;
1491 	*bp->bif_driverp = NULL;
1492 
1493 	bp->bif_next = bpf_iflist;
1494 	bpf_iflist = bp;
1495 
1496 	/*
1497 	 * Compute the length of the bpf header.  This is not necessarily
1498 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1499 	 * that the network layer header begins on a longword boundary (for
1500 	 * performance reasons and to alleviate alignment restrictions).
1501 	 */
1502 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1503 
1504 	lwkt_reltoken(&bpf_token);
1505 
1506 	if (bootverbose)
1507 		if_printf(ifp, "bpf attached\n");
1508 }
1509 
1510 /*
1511  * Detach bpf from an interface.  This involves detaching each descriptor
1512  * associated with the interface, and leaving bd_bif NULL.  Notify each
1513  * descriptor as it's detached so that any sleepers wake up and get
1514  * ENXIO.
1515  */
1516 void
1517 bpfdetach(struct ifnet *ifp)
1518 {
1519 	struct bpf_if *bp, *bp_prev;
1520 	struct bpf_d *d;
1521 
1522 	lwkt_gettoken(&bpf_token);
1523 
1524 	/* Locate BPF interface information */
1525 	bp_prev = NULL;
1526 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1527 		if (ifp == bp->bif_ifp)
1528 			break;
1529 		bp_prev = bp;
1530 	}
1531 
1532 	/* Interface wasn't attached */
1533 	if (bp->bif_ifp == NULL) {
1534 		lwkt_reltoken(&bpf_token);
1535 		kprintf("bpfdetach: %s was not attached\n", ifp->if_xname);
1536 		return;
1537 	}
1538 
1539 	while ((d = SLIST_FIRST(&bp->bif_dlist)) != NULL) {
1540 		bpf_detachd(d);
1541 		bpf_wakeup(d);
1542 	}
1543 
1544 	if (bp_prev != NULL)
1545 		bp_prev->bif_next = bp->bif_next;
1546 	else
1547 		bpf_iflist = bp->bif_next;
1548 
1549 	kfree(bp, M_BPF);
1550 
1551 	lwkt_reltoken(&bpf_token);
1552 }
1553 
1554 /*
1555  * Get a list of available data link type of the interface.
1556  */
1557 static int
1558 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1559 {
1560 	int n, error;
1561 	struct ifnet *ifp;
1562 	struct bpf_if *bp;
1563 
1564 	ifp = d->bd_bif->bif_ifp;
1565 	n = 0;
1566 	error = 0;
1567 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1568 		if (bp->bif_ifp != ifp)
1569 			continue;
1570 		if (bfl->bfl_list != NULL) {
1571 			if (n >= bfl->bfl_len) {
1572 				return (ENOMEM);
1573 			}
1574 			error = copyout(&bp->bif_dlt,
1575 			    bfl->bfl_list + n, sizeof(u_int));
1576 		}
1577 		n++;
1578 	}
1579 	bfl->bfl_len = n;
1580 	return(error);
1581 }
1582 
1583 /*
1584  * Set the data link type of a BPF instance.
1585  */
1586 static int
1587 bpf_setdlt(struct bpf_d *d, u_int dlt)
1588 {
1589 	int error, opromisc;
1590 	struct ifnet *ifp;
1591 	struct bpf_if *bp;
1592 
1593 	if (d->bd_bif->bif_dlt == dlt)
1594 		return (0);
1595 	ifp = d->bd_bif->bif_ifp;
1596 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1597 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1598 			break;
1599 	}
1600 	if (bp != NULL) {
1601 		opromisc = d->bd_promisc;
1602 		bpf_detachd(d);
1603 		bpf_attachd(d, bp);
1604 		bpf_resetd(d);
1605 		if (opromisc) {
1606 			error = ifpromisc(bp->bif_ifp, 1);
1607 			if (error) {
1608 				if_printf(bp->bif_ifp,
1609 					"bpf_setdlt: ifpromisc failed (%d)\n",
1610 					error);
1611 			} else {
1612 				d->bd_promisc = 1;
1613 			}
1614 		}
1615 	}
1616 	return(bp == NULL ? EINVAL : 0);
1617 }
1618 
1619 void
1620 bpf_gettoken(void)
1621 {
1622 	lwkt_gettoken(&bpf_token);
1623 }
1624 
1625 void
1626 bpf_reltoken(void)
1627 {
1628 	lwkt_reltoken(&bpf_token);
1629 }
1630 
1631 static void
1632 bpf_drvinit(void *unused)
1633 {
1634 	int i;
1635 
1636 	make_autoclone_dev(&bpf_ops, &DEVFS_CLONE_BITMAP(bpf),
1637 		bpfclone, 0, 0, 0600, "bpf");
1638 	for (i = 0; i < BPF_PREALLOCATED_UNITS; i++) {
1639 		make_dev(&bpf_ops, i, 0, 0, 0600, "bpf%d", i);
1640 		devfs_clone_bitmap_set(&DEVFS_CLONE_BITMAP(bpf), i);
1641 	}
1642 }
1643 
1644 static void
1645 bpf_drvuninit(void *unused)
1646 {
1647 	devfs_clone_handler_del("bpf");
1648 	dev_ops_remove_all(&bpf_ops);
1649 	devfs_clone_bitmap_uninit(&DEVFS_CLONE_BITMAP(bpf));
1650 }
1651 
1652 SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+CDEV_MAJOR, bpf_drvinit, NULL);
1653 SYSUNINIT(bpfdev, SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvuninit, NULL);
1654 
1655 #else /* !BPF */
1656 /*
1657  * NOP stubs to allow bpf-using drivers to load and function.
1658  *
1659  * A 'better' implementation would allow the core bpf functionality
1660  * to be loaded at runtime.
1661  */
1662 
1663 void
1664 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1665 {
1666 }
1667 
1668 void
1669 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1670 {
1671 }
1672 
1673 void
1674 bpf_ptap(struct bpf_if *bp, struct mbuf *m, const void *data, u_int dlen)
1675 {
1676 }
1677 
1678 void
1679 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1680 {
1681 }
1682 
1683 void
1684 bpfattach_dlt(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1685 {
1686 }
1687 
1688 void
1689 bpfdetach(struct ifnet *ifp)
1690 {
1691 }
1692 
1693 u_int
1694 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
1695 {
1696 	return -1;	/* "no filter" behaviour */
1697 }
1698 
1699 void
1700 bpf_gettoken(void)
1701 {
1702 }
1703 
1704 void
1705 bpf_reltoken(void)
1706 {
1707 }
1708 
1709 #endif /* !BPF */
1710