xref: /dragonfly/sys/net/bpf.c (revision d5f516c3)
1 /*
2  * Copyright (c) 1990, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from the Stanford/CMU enet packet filter,
6  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8  * Berkeley Laboratory.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *      @(#)bpf.c	8.2 (Berkeley) 3/28/94
39  *
40  * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.12 2002/04/14 21:41:48 luigi Exp $
41  * $DragonFly: src/sys/net/bpf.c,v 1.19 2004/07/07 15:16:04 joerg Exp $
42  */
43 
44 #include "use_bpf.h"
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/conf.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/time.h>
52 #include <sys/proc.h>
53 #include <sys/signalvar.h>
54 #include <sys/filio.h>
55 #include <sys/sockio.h>
56 #include <sys/ttycom.h>
57 #include <sys/filedesc.h>
58 
59 #include <sys/poll.h>
60 
61 #include <sys/socket.h>
62 #include <sys/vnode.h>
63 
64 #include <net/if.h>
65 #include <net/bpf.h>
66 #include <net/bpfdesc.h>
67 
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <sys/kernel.h>
71 #include <sys/sysctl.h>
72 
73 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
74 
75 #if NBPF > 0
76 
77 /*
78  * The default read buffer size is patchable.
79  */
80 static int bpf_bufsize = BPF_DEFAULTBUFSIZE;
81 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW,
82 	   &bpf_bufsize, 0, "");
83 static int bpf_maxbufsize = BPF_MAXBUFSIZE;
84 SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW,
85 	   &bpf_maxbufsize, 0, "");
86 
87 /*
88  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
89  */
90 static struct bpf_if	*bpf_iflist;
91 
92 static int	bpf_allocbufs(struct bpf_d *);
93 static void	bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
94 static void	bpf_detachd(struct bpf_d *d);
95 static void	bpf_freed(struct bpf_d *);
96 static void	bpf_mcopy(const void *, void *, size_t);
97 static int	bpf_movein(struct uio *, int, struct mbuf **,
98 			   struct sockaddr *, int *);
99 static int	bpf_setif(struct bpf_d *, struct ifreq *);
100 static void	bpf_timed_out(void *);
101 static void	bpf_wakeup(struct bpf_d *);
102 static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
103 			    void (*)(const void *, void *, size_t));
104 static void	reset_d(struct bpf_d *);
105 static int	bpf_setf(struct bpf_d *, struct bpf_program *);
106 static void	bpf_drvinit(void *unused);
107 
108 static d_open_t		bpfopen;
109 static d_close_t	bpfclose;
110 static d_read_t		bpfread;
111 static d_write_t	bpfwrite;
112 static d_ioctl_t	bpfioctl;
113 static d_poll_t		bpfpoll;
114 
115 #define CDEV_MAJOR 23
116 static struct cdevsw bpf_cdevsw = {
117 	/* name */	"bpf",
118 	/* maj */	CDEV_MAJOR,
119 	/* flags */	0,
120 	/* port */	NULL,
121 	/* clone */	NULL,
122 
123 	/* open */	bpfopen,
124 	/* close */	bpfclose,
125 	/* read */	bpfread,
126 	/* write */	bpfwrite,
127 	/* ioctl */	bpfioctl,
128 	/* poll */	bpfpoll,
129 	/* mmap */	nommap,
130 	/* strategy */	nostrategy,
131 	/* dump */	nodump,
132 	/* psize */	nopsize
133 };
134 
135 
136 static int
137 bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
138 	   struct sockaddr *sockp, int *datlen)
139 {
140 	struct mbuf *m;
141 	int error;
142 	int len;
143 	int hlen;
144 
145 	/*
146 	 * Build a sockaddr based on the data link layer type.
147 	 * We do this at this level because the ethernet header
148 	 * is copied directly into the data field of the sockaddr.
149 	 * In the case of SLIP, there is no header and the packet
150 	 * is forwarded as is.
151 	 * Also, we are careful to leave room at the front of the mbuf
152 	 * for the link level header.
153 	 */
154 	switch (linktype) {
155 
156 	case DLT_SLIP:
157 		sockp->sa_family = AF_INET;
158 		hlen = 0;
159 		break;
160 
161 	case DLT_EN10MB:
162 		sockp->sa_family = AF_UNSPEC;
163 		/* XXX Would MAXLINKHDR be better? */
164 		hlen = sizeof(struct ether_header);
165 		break;
166 
167 	case DLT_FDDI:
168 		sockp->sa_family = AF_IMPLINK;
169 		hlen = 0;
170 		break;
171 
172 	case DLT_RAW:
173 	case DLT_NULL:
174 		sockp->sa_family = AF_UNSPEC;
175 		hlen = 0;
176 		break;
177 
178 	case DLT_ATM_RFC1483:
179 		/*
180 		 * en atm driver requires 4-byte atm pseudo header.
181 		 * though it isn't standard, vpi:vci needs to be
182 		 * specified anyway.
183 		 */
184 		sockp->sa_family = AF_UNSPEC;
185 		hlen = 12; 	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
186 		break;
187 
188 	case DLT_PPP:
189 		sockp->sa_family = AF_UNSPEC;
190 		hlen = 4;	/* This should match PPP_HDRLEN */
191 		break;
192 
193 	default:
194 		return(EIO);
195 	}
196 
197 	len = uio->uio_resid;
198 	*datlen = len - hlen;
199 	if ((unsigned)len > MCLBYTES)
200 		return(EIO);
201 
202 	MGETHDR(m, MB_WAIT, MT_DATA);
203 	if (m == 0)
204 		return(ENOBUFS);
205 	if (len > MHLEN) {
206 		MCLGET(m, MB_WAIT);
207 		if ((m->m_flags & M_EXT) == 0) {
208 			error = ENOBUFS;
209 			goto bad;
210 		}
211 	}
212 	m->m_pkthdr.len = m->m_len = len;
213 	m->m_pkthdr.rcvif = NULL;
214 	*mp = m;
215 	/*
216 	 * Make room for link header.
217 	 */
218 	if (hlen != 0) {
219 		m->m_pkthdr.len -= hlen;
220 		m->m_len -= hlen;
221 		m->m_data += hlen; /* XXX */
222 		error = uiomove(sockp->sa_data, hlen, uio);
223 		if (error)
224 			goto bad;
225 	}
226 	error = uiomove(mtod(m, caddr_t), len - hlen, uio);
227 	if (!error)
228 		return(0);
229 bad:
230 	m_freem(m);
231 	return(error);
232 }
233 
234 /*
235  * Attach file to the bpf interface, i.e. make d listen on bp.
236  * Must be called at splimp.
237  */
238 static void
239 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
240 {
241 	/*
242 	 * Point d at bp, and add d to the interface's list of listeners.
243 	 * Finally, point the driver's bpf cookie at the interface so
244 	 * it will divert packets to bpf.
245 	 */
246 	d->bd_bif = bp;
247 	d->bd_next = bp->bif_dlist;
248 	bp->bif_dlist = d;
249 
250 	bp->bif_ifp->if_bpf = bp;
251 }
252 
253 /*
254  * Detach a file from its interface.
255  */
256 static void
257 bpf_detachd(struct bpf_d *d)
258 {
259 	struct bpf_d **p;
260 	struct bpf_if *bp;
261 
262 	bp = d->bd_bif;
263 	/*
264 	 * Check if this descriptor had requested promiscuous mode.
265 	 * If so, turn it off.
266 	 */
267 	if (d->bd_promisc) {
268 		d->bd_promisc = 0;
269 		if (ifpromisc(bp->bif_ifp, 0)) {
270 			/*
271 			 * Something is really wrong if we were able to put
272 			 * the driver into promiscuous mode, but can't
273 			 * take it out.
274 			 */
275 			panic("bpf: ifpromisc failed");
276 		}
277 	}
278 	/* Remove d from the interface's descriptor list. */
279 	p = &bp->bif_dlist;
280 	while (*p != d) {
281 		p = &(*p)->bd_next;
282 		if (*p == NULL)
283 			panic("bpf_detachd: descriptor not in list");
284 	}
285 	*p = (*p)->bd_next;
286 	if (bp->bif_dlist == NULL) {
287 		/*
288 		 * Let the driver know that there are no more listeners.
289 		 */
290 		d->bd_bif->bif_ifp->if_bpf = 0;
291 	}
292 	d->bd_bif = 0;
293 }
294 
295 /*
296  * Open ethernet device.  Returns ENXIO for illegal minor device number,
297  * EBUSY if file is open by another process.
298  */
299 /* ARGSUSED */
300 static int
301 bpfopen(dev_t dev, int flags, int fmt, struct thread *td)
302 {
303 	struct bpf_d *d;
304 	struct proc *p = td->td_proc;
305 
306 	KKASSERT(p != NULL);
307 
308 	if (p->p_ucred->cr_prison)
309 		return(EPERM);
310 
311 	d = dev->si_drv1;
312 	/*
313 	 * Each minor can be opened by only one process.  If the requested
314 	 * minor is in use, return EBUSY.
315 	 */
316 	if (d != NULL)
317 		return(EBUSY);
318 	make_dev(&bpf_cdevsw, minor(dev), 0, 0, 0600, "bpf%d", lminor(dev));
319 	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
320 	dev->si_drv1 = d;
321 	d->bd_bufsize = bpf_bufsize;
322 	d->bd_sig = SIGIO;
323 	d->bd_seesent = 1;
324 	callout_init(&d->bd_callout);
325 	return(0);
326 }
327 
328 /*
329  * Close the descriptor by detaching it from its interface,
330  * deallocating its buffers, and marking it free.
331  */
332 /* ARGSUSED */
333 static int
334 bpfclose(dev_t dev, int flags, int fmt, struct thread *td)
335 {
336 	struct bpf_d *d = dev->si_drv1;
337 	int s;
338 
339 	funsetown(d->bd_sigio);
340 	s = splimp();
341 	if (d->bd_state == BPF_WAITING)
342 		callout_stop(&d->bd_callout);
343 	d->bd_state = BPF_IDLE;
344 	if (d->bd_bif != NULL)
345 		bpf_detachd(d);
346 	splx(s);
347 	bpf_freed(d);
348 	dev->si_drv1 = 0;
349 	free(d, M_BPF);
350 
351 	return(0);
352 }
353 
354 /*
355  * Rotate the packet buffers in descriptor d.  Move the store buffer
356  * into the hold slot, and the free buffer into the store slot.
357  * Zero the length of the new store buffer.
358  */
359 #define ROTATE_BUFFERS(d) \
360 	(d)->bd_hbuf = (d)->bd_sbuf; \
361 	(d)->bd_hlen = (d)->bd_slen; \
362 	(d)->bd_sbuf = (d)->bd_fbuf; \
363 	(d)->bd_slen = 0; \
364 	(d)->bd_fbuf = 0;
365 /*
366  *  bpfread - read next chunk of packets from buffers
367  */
368 static int
369 bpfread(dev_t dev, struct uio *uio, int ioflag)
370 {
371 	struct bpf_d *d = dev->si_drv1;
372 	int timed_out;
373 	int error;
374 	int s;
375 
376 	/*
377 	 * Restrict application to use a buffer the same size as
378 	 * as kernel buffers.
379 	 */
380 	if (uio->uio_resid != d->bd_bufsize)
381 		return(EINVAL);
382 
383 	s = splimp();
384 	if (d->bd_state == BPF_WAITING)
385 		callout_stop(&d->bd_callout);
386 	timed_out = (d->bd_state == BPF_TIMED_OUT);
387 	d->bd_state = BPF_IDLE;
388 	/*
389 	 * If the hold buffer is empty, then do a timed sleep, which
390 	 * ends when the timeout expires or when enough packets
391 	 * have arrived to fill the store buffer.
392 	 */
393 	while (d->bd_hbuf == 0) {
394 		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
395 			/*
396 			 * A packet(s) either arrived since the previous
397 			 * read or arrived while we were asleep.
398 			 * Rotate the buffers and return what's here.
399 			 */
400 			ROTATE_BUFFERS(d);
401 			break;
402 		}
403 
404 		/*
405 		 * No data is available, check to see if the bpf device
406 		 * is still pointed at a real interface.  If not, return
407 		 * ENXIO so that the userland process knows to rebind
408 		 * it before using it again.
409 		 */
410 		if (d->bd_bif == NULL) {
411 			splx(s);
412 			return(ENXIO);
413 		}
414 
415 		if (ioflag & IO_NDELAY) {
416 			splx(s);
417 			return(EWOULDBLOCK);
418 		}
419 		error = tsleep(d, PCATCH, "bpf", d->bd_rtout);
420 		if (error == EINTR || error == ERESTART) {
421 			splx(s);
422 			return(error);
423 		}
424 		if (error == EWOULDBLOCK) {
425 			/*
426 			 * On a timeout, return what's in the buffer,
427 			 * which may be nothing.  If there is something
428 			 * in the store buffer, we can rotate the buffers.
429 			 */
430 			if (d->bd_hbuf)
431 				/*
432 				 * We filled up the buffer in between
433 				 * getting the timeout and arriving
434 				 * here, so we don't need to rotate.
435 				 */
436 				break;
437 
438 			if (d->bd_slen == 0) {
439 				splx(s);
440 				return(0);
441 			}
442 			ROTATE_BUFFERS(d);
443 			break;
444 		}
445 	}
446 	/*
447 	 * At this point, we know we have something in the hold slot.
448 	 */
449 	splx(s);
450 
451 	/*
452 	 * Move data from hold buffer into user space.
453 	 * We know the entire buffer is transferred since
454 	 * we checked above that the read buffer is bpf_bufsize bytes.
455 	 */
456 	error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
457 
458 	s = splimp();
459 	d->bd_fbuf = d->bd_hbuf;
460 	d->bd_hbuf = 0;
461 	d->bd_hlen = 0;
462 	splx(s);
463 
464 	return(error);
465 }
466 
467 
468 /*
469  * If there are processes sleeping on this descriptor, wake them up.
470  */
471 static void
472 bpf_wakeup(struct bpf_d *d)
473 {
474 	if (d->bd_state == BPF_WAITING) {
475 		callout_stop(&d->bd_callout);
476 		d->bd_state = BPF_IDLE;
477 	}
478 	wakeup((caddr_t)d);
479 	if (d->bd_async && d->bd_sig && d->bd_sigio)
480 		pgsigio(d->bd_sigio, d->bd_sig, 0);
481 
482 	selwakeup(&d->bd_sel);
483 	/* XXX */
484 	d->bd_sel.si_pid = 0;
485 }
486 
487 static void
488 bpf_timed_out(void *arg)
489 {
490 	struct bpf_d *d = (struct bpf_d *)arg;
491 	int s;
492 
493 	s = splimp();
494 	if (d->bd_state == BPF_WAITING) {
495 		d->bd_state = BPF_TIMED_OUT;
496 		if (d->bd_slen != 0)
497 			bpf_wakeup(d);
498 	}
499 	splx(s);
500 }
501 
502 static	int
503 bpfwrite(dev_t dev, struct uio *uio, int ioflag)
504 {
505 	struct bpf_d *d = dev->si_drv1;
506 	struct ifnet *ifp;
507 	struct mbuf *m;
508 	int error, s;
509 	static struct sockaddr dst;
510 	int datlen;
511 
512 	if (d->bd_bif == 0)
513 		return(ENXIO);
514 
515 	ifp = d->bd_bif->bif_ifp;
516 
517 	if (uio->uio_resid == 0)
518 		return(0);
519 
520 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
521 	if (error)
522 		return(error);
523 
524 	if (datlen > ifp->if_mtu)
525 		return(EMSGSIZE);
526 
527 	if (d->bd_hdrcmplt)
528 		dst.sa_family = pseudo_AF_HDRCMPLT;
529 
530 	s = splnet();
531 	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)NULL);
532 	splx(s);
533 	/*
534 	 * The driver frees the mbuf.
535 	 */
536 	return(error);
537 }
538 
539 /*
540  * Reset a descriptor by flushing its packet buffer and clearing the
541  * receive and drop counts.  Should be called at splimp.
542  */
543 static void
544 reset_d(struct bpf_d *d)
545 {
546 	if (d->bd_hbuf) {
547 		/* Free the hold buffer. */
548 		d->bd_fbuf = d->bd_hbuf;
549 		d->bd_hbuf = 0;
550 	}
551 	d->bd_slen = 0;
552 	d->bd_hlen = 0;
553 	d->bd_rcount = 0;
554 	d->bd_dcount = 0;
555 }
556 
557 /*
558  *  FIONREAD		Check for read packet available.
559  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
560  *  BIOCGBLEN		Get buffer len [for read()].
561  *  BIOCSETF		Set ethernet read filter.
562  *  BIOCFLUSH		Flush read packet buffer.
563  *  BIOCPROMISC		Put interface into promiscuous mode.
564  *  BIOCGDLT		Get link layer type.
565  *  BIOCGETIF		Get interface name.
566  *  BIOCSETIF		Set interface.
567  *  BIOCSRTIMEOUT	Set read timeout.
568  *  BIOCGRTIMEOUT	Get read timeout.
569  *  BIOCGSTATS		Get packet stats.
570  *  BIOCIMMEDIATE	Set immediate mode.
571  *  BIOCVERSION		Get filter language version.
572  *  BIOCGHDRCMPLT	Get "header already complete" flag
573  *  BIOCSHDRCMPLT	Set "header already complete" flag
574  *  BIOCGSEESENT	Get "see packets sent" flag
575  *  BIOCSSEESENT	Set "see packets sent" flag
576  */
577 /* ARGSUSED */
578 static int
579 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
580 {
581 	struct bpf_d *d = dev->si_drv1;
582 	int s, error = 0;
583 
584 	s = splimp();
585 	if (d->bd_state == BPF_WAITING)
586 		callout_stop(&d->bd_callout);
587 	d->bd_state = BPF_IDLE;
588 	splx(s);
589 
590 	switch (cmd) {
591 
592 	default:
593 		error = EINVAL;
594 		break;
595 
596 	/*
597 	 * Check for read packet available.
598 	 */
599 	case FIONREAD:
600 		{
601 			int n;
602 
603 			s = splimp();
604 			n = d->bd_slen;
605 			if (d->bd_hbuf)
606 				n += d->bd_hlen;
607 			splx(s);
608 
609 			*(int *)addr = n;
610 			break;
611 		}
612 
613 	case SIOCGIFADDR:
614 		{
615 			struct ifnet *ifp;
616 
617 			if (d->bd_bif == 0)
618 				error = EINVAL;
619 			else {
620 				ifp = d->bd_bif->bif_ifp;
621 				error = (*ifp->if_ioctl)(ifp, cmd, addr,
622 							 td->td_proc->p_ucred);
623 			}
624 			break;
625 		}
626 
627 	/*
628 	 * Get buffer len [for read()].
629 	 */
630 	case BIOCGBLEN:
631 		*(u_int *)addr = d->bd_bufsize;
632 		break;
633 
634 	/*
635 	 * Set buffer length.
636 	 */
637 	case BIOCSBLEN:
638 		if (d->bd_bif != 0)
639 			error = EINVAL;
640 		else {
641 			u_int size = *(u_int *)addr;
642 
643 			if (size > bpf_maxbufsize)
644 				*(u_int *)addr = size = bpf_maxbufsize;
645 			else if (size < BPF_MINBUFSIZE)
646 				*(u_int *)addr = size = BPF_MINBUFSIZE;
647 			d->bd_bufsize = size;
648 		}
649 		break;
650 
651 	/*
652 	 * Set link layer read filter.
653 	 */
654 	case BIOCSETF:
655 		error = bpf_setf(d, (struct bpf_program *)addr);
656 		break;
657 
658 	/*
659 	 * Flush read packet buffer.
660 	 */
661 	case BIOCFLUSH:
662 		s = splimp();
663 		reset_d(d);
664 		splx(s);
665 		break;
666 
667 	/*
668 	 * Put interface into promiscuous mode.
669 	 */
670 	case BIOCPROMISC:
671 		if (d->bd_bif == 0) {
672 			/*
673 			 * No interface attached yet.
674 			 */
675 			error = EINVAL;
676 			break;
677 		}
678 		s = splimp();
679 		if (d->bd_promisc == 0) {
680 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
681 			if (error == 0)
682 				d->bd_promisc = 1;
683 		}
684 		splx(s);
685 		break;
686 
687 	/*
688 	 * Get device parameters.
689 	 */
690 	case BIOCGDLT:
691 		if (d->bd_bif == 0)
692 			error = EINVAL;
693 		else
694 			*(u_int *)addr = d->bd_bif->bif_dlt;
695 		break;
696 
697 	/*
698 	 * Get interface name.
699 	 */
700 	case BIOCGETIF:
701 		if (d->bd_bif == 0)
702 			error = EINVAL;
703 		else {
704 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
705 			struct ifreq *const ifr = (struct ifreq *)addr;
706 
707 			strlcpy(ifr->ifr_name, ifp->if_xname,
708 			    sizeof(ifr->ifr_name));
709 		}
710 		break;
711 
712 	/*
713 	 * Set interface.
714 	 */
715 	case BIOCSETIF:
716 		error = bpf_setif(d, (struct ifreq *)addr);
717 		break;
718 
719 	/*
720 	 * Set read timeout.
721 	 */
722 	case BIOCSRTIMEOUT:
723 		{
724 			struct timeval *tv = (struct timeval *)addr;
725 
726 			/*
727 			 * Subtract 1 tick from tvtohz() since this isn't
728 			 * a one-shot timer.
729 			 */
730 			if ((error = itimerfix(tv)) == 0)
731 				d->bd_rtout = tvtohz_low(tv);
732 			break;
733 		}
734 
735 	/*
736 	 * Get read timeout.
737 	 */
738 	case BIOCGRTIMEOUT:
739 		{
740 			struct timeval *tv = (struct timeval *)addr;
741 
742 			tv->tv_sec = d->bd_rtout / hz;
743 			tv->tv_usec = (d->bd_rtout % hz) * tick;
744 			break;
745 		}
746 
747 	/*
748 	 * Get packet stats.
749 	 */
750 	case BIOCGSTATS:
751 		{
752 			struct bpf_stat *bs = (struct bpf_stat *)addr;
753 
754 			bs->bs_recv = d->bd_rcount;
755 			bs->bs_drop = d->bd_dcount;
756 			break;
757 		}
758 
759 	/*
760 	 * Set immediate mode.
761 	 */
762 	case BIOCIMMEDIATE:
763 		d->bd_immediate = *(u_int *)addr;
764 		break;
765 
766 	case BIOCVERSION:
767 		{
768 			struct bpf_version *bv = (struct bpf_version *)addr;
769 
770 			bv->bv_major = BPF_MAJOR_VERSION;
771 			bv->bv_minor = BPF_MINOR_VERSION;
772 			break;
773 		}
774 
775 	/*
776 	 * Get "header already complete" flag
777 	 */
778 	case BIOCGHDRCMPLT:
779 		*(u_int *)addr = d->bd_hdrcmplt;
780 		break;
781 
782 	/*
783 	 * Set "header already complete" flag
784 	 */
785 	case BIOCSHDRCMPLT:
786 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
787 		break;
788 
789 	/*
790 	 * Get "see sent packets" flag
791 	 */
792 	case BIOCGSEESENT:
793 		*(u_int *)addr = d->bd_seesent;
794 		break;
795 
796 	/*
797 	 * Set "see sent packets" flag
798 	 */
799 	case BIOCSSEESENT:
800 		d->bd_seesent = *(u_int *)addr;
801 		break;
802 
803 	case FIONBIO:		/* Non-blocking I/O */
804 		break;
805 
806 	case FIOASYNC:		/* Send signal on receive packets */
807 		d->bd_async = *(int *)addr;
808 		break;
809 
810 	case FIOSETOWN:
811 		error = fsetown(*(int *)addr, &d->bd_sigio);
812 		break;
813 
814 	case FIOGETOWN:
815 		*(int *)addr = fgetown(d->bd_sigio);
816 		break;
817 
818 	/* This is deprecated, FIOSETOWN should be used instead. */
819 	case TIOCSPGRP:
820 		error = fsetown(-(*(int *)addr), &d->bd_sigio);
821 		break;
822 
823 	/* This is deprecated, FIOGETOWN should be used instead. */
824 	case TIOCGPGRP:
825 		*(int *)addr = -fgetown(d->bd_sigio);
826 		break;
827 
828 	case BIOCSRSIG:		/* Set receive signal */
829 		{
830 		 	u_int sig;
831 
832 			sig = *(u_int *)addr;
833 
834 			if (sig >= NSIG)
835 				error = EINVAL;
836 			else
837 				d->bd_sig = sig;
838 			break;
839 		}
840 	case BIOCGRSIG:
841 		*(u_int *)addr = d->bd_sig;
842 		break;
843 	}
844 	return(error);
845 }
846 
847 /*
848  * Set d's packet filter program to fp.  If this file already has a filter,
849  * free it and replace it.  Returns EINVAL for bogus requests.
850  */
851 static int
852 bpf_setf(struct bpf_d *d, struct bpf_program *fp)
853 {
854 	struct bpf_insn *fcode, *old;
855 	u_int flen, size;
856 	int s;
857 
858 	old = d->bd_filter;
859 	if (fp->bf_insns == 0) {
860 		if (fp->bf_len != 0)
861 			return(EINVAL);
862 		s = splimp();
863 		d->bd_filter = 0;
864 		reset_d(d);
865 		splx(s);
866 		if (old != 0)
867 			free((caddr_t)old, M_BPF);
868 		return(0);
869 	}
870 	flen = fp->bf_len;
871 	if (flen > BPF_MAXINSNS)
872 		return(EINVAL);
873 
874 	size = flen * sizeof(*fp->bf_insns);
875 	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
876 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
877 	    bpf_validate(fcode, (int)flen)) {
878 		s = splimp();
879 		d->bd_filter = fcode;
880 		reset_d(d);
881 		splx(s);
882 		if (old != 0)
883 			free((caddr_t)old, M_BPF);
884 
885 		return(0);
886 	}
887 	free((caddr_t)fcode, M_BPF);
888 	return(EINVAL);
889 }
890 
891 /*
892  * Detach a file from its current interface (if attached at all) and attach
893  * to the interface indicated by the name stored in ifr.
894  * Return an errno or 0.
895  */
896 static int
897 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
898 {
899 	struct bpf_if *bp;
900 	int s, error;
901 	struct ifnet *theywant;
902 
903 	theywant = ifunit(ifr->ifr_name);
904 	if (theywant == 0)
905 		return(ENXIO);
906 
907 	/*
908 	 * Look through attached interfaces for the named one.
909 	 */
910 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
911 		struct ifnet *ifp = bp->bif_ifp;
912 
913 		if (ifp == 0 || ifp != theywant)
914 			continue;
915 		/*
916 		 * We found the requested interface.
917 		 * If it's not up, return an error.
918 		 * Allocate the packet buffers if we need to.
919 		 * If we're already attached to requested interface,
920 		 * just flush the buffer.
921 		 */
922 		if ((ifp->if_flags & IFF_UP) == 0)
923 			return(ENETDOWN);
924 
925 		if (d->bd_sbuf == NULL) {
926 			error = bpf_allocbufs(d);
927 			if (error != 0)
928 				return(error);
929 		}
930 		s = splimp();
931 		if (bp != d->bd_bif) {
932 			if (d->bd_bif != NULL) {
933 				/*
934 				 * Detach if attached to something else.
935 				 */
936 				bpf_detachd(d);
937 			}
938 
939 			bpf_attachd(d, bp);
940 		}
941 		reset_d(d);
942 		splx(s);
943 		return(0);
944 	}
945 
946 	/* Not found. */
947 	return(ENXIO);
948 }
949 
950 /*
951  * Support for select() and poll() system calls
952  *
953  * Return true iff the specific operation will not block indefinitely.
954  * Otherwise, return false but make a note that a selwakeup() must be done.
955  */
956 int
957 bpfpoll(dev_t dev, int events, struct thread *td)
958 {
959 	struct bpf_d *d;
960 	int s;
961 	int revents;
962 
963 	d = dev->si_drv1;
964 	if (d->bd_bif == NULL)
965 		return(ENXIO);
966 
967 	revents = events & (POLLOUT | POLLWRNORM);
968 	s = splimp();
969 	if (events & (POLLIN | POLLRDNORM)) {
970 		/*
971 		 * An imitation of the FIONREAD ioctl code.
972 		 * XXX not quite.  An exact imitation:
973 		 *	if (d->b_slen != 0 ||
974 		 *	    (d->bd_hbuf != NULL && d->bd_hlen != 0)
975 		 */
976 		if (d->bd_hlen != 0 ||
977 		    ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
978 		    d->bd_slen != 0))
979 			revents |= events & (POLLIN | POLLRDNORM);
980 		else {
981 			selrecord(td, &d->bd_sel);
982 			/* Start the read timeout if necessary. */
983 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
984 				callout_reset(&d->bd_callout, d->bd_rtout,
985 				    bpf_timed_out, d);
986 				d->bd_state = BPF_WAITING;
987 			}
988 		}
989 	}
990 	splx(s);
991 	return(revents);
992 }
993 
994 /*
995  * Incoming linkage from device drivers.  Process the packet pkt, of length
996  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
997  * by each process' filter, and if accepted, stashed into the corresponding
998  * buffer.
999  */
1000 void
1001 bpf_tap(struct ifnet *ifp, u_char *pkt, u_int pktlen)
1002 {
1003 	struct bpf_if *bp;
1004 	struct bpf_d *d;
1005 	u_int slen;
1006 	/*
1007 	 * Note that the ipl does not have to be raised at this point.
1008 	 * The only problem that could arise here is that if two different
1009 	 * interfaces shared any data.  This is not the case.
1010 	 */
1011 	bp = ifp->if_bpf;
1012 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1013 		++d->bd_rcount;
1014 		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
1015 		if (slen != 0)
1016 			catchpacket(d, pkt, pktlen, slen, ovbcopy);
1017 	}
1018 }
1019 
1020 /*
1021  * Copy data from an mbuf chain into a buffer.  This code is derived
1022  * from m_copydata in sys/uipc_mbuf.c.
1023  */
1024 static void
1025 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1026 {
1027 	const struct mbuf *m;
1028 	u_int count;
1029 	u_char *dst;
1030 
1031 	m = src_arg;
1032 	dst = dst_arg;
1033 	while (len > 0) {
1034 		if (m == NULL)
1035 			panic("bpf_mcopy");
1036 		count = min(m->m_len, len);
1037 		bcopy(mtod(m, void *), dst, count);
1038 		m = m->m_next;
1039 		dst += count;
1040 		len -= count;
1041 	}
1042 }
1043 
1044 /*
1045  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1046  */
1047 void
1048 bpf_mtap(struct ifnet *ifp, struct mbuf *m)
1049 {
1050 	struct bpf_if *bp = ifp->if_bpf;
1051 	struct bpf_d *d;
1052 	u_int pktlen, slen;
1053 	struct mbuf *m0;
1054 
1055 	pktlen = 0;
1056 	for (m0 = m; m0 != 0; m0 = m0->m_next)
1057 		pktlen += m0->m_len;
1058 
1059 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
1060 		if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL))
1061 			continue;
1062 		++d->bd_rcount;
1063 		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
1064 		if (slen != 0)
1065 			catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
1066 	}
1067 }
1068 
1069 /*
1070  * Move the packet data from interface memory (pkt) into the
1071  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1072  * otherwise 0.  "copy" is the routine called to do the actual data
1073  * transfer.  bcopy is passed in to copy contiguous chunks, while
1074  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
1075  * pkt is really an mbuf.
1076  */
1077 static void
1078 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1079 	    void (*cpfn)(const void *, void *, size_t))
1080 {
1081 	struct bpf_hdr *hp;
1082 	int totlen, curlen;
1083 	int hdrlen = d->bd_bif->bif_hdrlen;
1084 	/*
1085 	 * Figure out how many bytes to move.  If the packet is
1086 	 * greater or equal to the snapshot length, transfer that
1087 	 * much.  Otherwise, transfer the whole packet (unless
1088 	 * we hit the buffer size limit).
1089 	 */
1090 	totlen = hdrlen + min(snaplen, pktlen);
1091 	if (totlen > d->bd_bufsize)
1092 		totlen = d->bd_bufsize;
1093 
1094 	/*
1095 	 * Round up the end of the previous packet to the next longword.
1096 	 */
1097 	curlen = BPF_WORDALIGN(d->bd_slen);
1098 	if (curlen + totlen > d->bd_bufsize) {
1099 		/*
1100 		 * This packet will overflow the storage buffer.
1101 		 * Rotate the buffers if we can, then wakeup any
1102 		 * pending reads.
1103 		 */
1104 		if (d->bd_fbuf == 0) {
1105 			/*
1106 			 * We haven't completed the previous read yet,
1107 			 * so drop the packet.
1108 			 */
1109 			++d->bd_dcount;
1110 			return;
1111 		}
1112 		ROTATE_BUFFERS(d);
1113 		bpf_wakeup(d);
1114 		curlen = 0;
1115 	}
1116 	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1117 		/*
1118 		 * Immediate mode is set, or the read timeout has
1119 		 * already expired during a select call.  A packet
1120 		 * arrived, so the reader should be woken up.
1121 		 */
1122 		bpf_wakeup(d);
1123 
1124 	/*
1125 	 * Append the bpf header.
1126 	 */
1127 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1128 	microtime(&hp->bh_tstamp);
1129 	hp->bh_datalen = pktlen;
1130 	hp->bh_hdrlen = hdrlen;
1131 	/*
1132 	 * Copy the packet data into the store buffer and update its length.
1133 	 */
1134 	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
1135 	d->bd_slen = curlen + totlen;
1136 }
1137 
1138 /*
1139  * Initialize all nonzero fields of a descriptor.
1140  */
1141 static int
1142 bpf_allocbufs(struct bpf_d *d)
1143 {
1144 	d->bd_fbuf = malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1145 	if (d->bd_fbuf == NULL)
1146 		return(ENOBUFS);
1147 
1148 	d->bd_sbuf = malloc(d->bd_bufsize, M_BPF, M_WAITOK);
1149 	if (d->bd_sbuf == NULL) {
1150 		free(d->bd_fbuf, M_BPF);
1151 		return(ENOBUFS);
1152 	}
1153 	d->bd_slen = 0;
1154 	d->bd_hlen = 0;
1155 	return(0);
1156 }
1157 
1158 /*
1159  * Free buffers currently in use by a descriptor.
1160  * Called on close.
1161  */
1162 static void
1163 bpf_freed(struct bpf_d *d)
1164 {
1165 	/*
1166 	 * We don't need to lock out interrupts since this descriptor has
1167 	 * been detached from its interface and it yet hasn't been marked
1168 	 * free.
1169 	 */
1170 	if (d->bd_sbuf != 0) {
1171 		free(d->bd_sbuf, M_BPF);
1172 		if (d->bd_hbuf != 0)
1173 			free(d->bd_hbuf, M_BPF);
1174 		if (d->bd_fbuf != 0)
1175 			free(d->bd_fbuf, M_BPF);
1176 	}
1177 	if (d->bd_filter)
1178 		free((caddr_t)d->bd_filter, M_BPF);
1179 }
1180 
1181 /*
1182  * Attach an interface to bpf.  ifp is a pointer to the structure
1183  * defining the interface to be attached, dlt is the link layer type,
1184  * and hdrlen is the fixed size of the link header (variable length
1185  * headers are not yet supporrted).
1186  */
1187 void
1188 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1189 {
1190 	struct bpf_if *bp;
1191 
1192 	bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
1193 
1194 	bp->bif_ifp = ifp;
1195 	bp->bif_dlt = dlt;
1196 
1197 	bp->bif_next = bpf_iflist;
1198 	bpf_iflist = bp;
1199 
1200 	bp->bif_ifp->if_bpf = 0;
1201 
1202 	/*
1203 	 * Compute the length of the bpf header.  This is not necessarily
1204 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1205 	 * that the network layer header begins on a longword boundary (for
1206 	 * performance reasons and to alleviate alignment restrictions).
1207 	 */
1208 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1209 
1210 	if (bootverbose)
1211 		printf("bpf: %s attached\n", ifp->if_xname);
1212 }
1213 
1214 /*
1215  * Detach bpf from an interface.  This involves detaching each descriptor
1216  * associated with the interface, and leaving bd_bif NULL.  Notify each
1217  * descriptor as it's detached so that any sleepers wake up and get
1218  * ENXIO.
1219  */
1220 void
1221 bpfdetach(struct ifnet *ifp)
1222 {
1223 	struct bpf_if *bp, *bp_prev;
1224 	struct bpf_d *d;
1225 	int s;
1226 
1227 	s = splimp();
1228 
1229 	/* Locate BPF interface information */
1230 	bp_prev = NULL;
1231 	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1232 		if (ifp == bp->bif_ifp)
1233 			break;
1234 		bp_prev = bp;
1235 	}
1236 
1237 	/* Interface wasn't attached */
1238 	if (bp->bif_ifp == NULL) {
1239 		splx(s);
1240 		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1241 		return;
1242 	}
1243 
1244 	while ((d = bp->bif_dlist) != NULL) {
1245 		bpf_detachd(d);
1246 		bpf_wakeup(d);
1247 	}
1248 
1249 	if (bp_prev != NULL)
1250 		bp_prev->bif_next = bp->bif_next;
1251 	else
1252 		bpf_iflist = bp->bif_next;
1253 
1254 	free(bp, M_BPF);
1255 
1256 	splx(s);
1257 }
1258 
1259 static void
1260 bpf_drvinit(void *unused)
1261 {
1262 	cdevsw_add(&bpf_cdevsw, 0, 0);
1263 }
1264 
1265 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
1266 
1267 #else /* !BPF */
1268 /*
1269  * NOP stubs to allow bpf-using drivers to load and function.
1270  *
1271  * A 'better' implementation would allow the core bpf functionality
1272  * to be loaded at runtime.
1273  */
1274 
1275 void
1276 bpf_tap(struct ifnet *ifp, u_char *pkt, u_int pktlen)
1277 {
1278 }
1279 
1280 void
1281 bpf_mtap(struct ifnet *ifp, struct mbuf *m)
1282 {
1283 }
1284 
1285 void
1286 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1287 {
1288 }
1289 
1290 void
1291 bpfdetach(struct ifnet *ifp)
1292 {
1293 }
1294 
1295 u_int
1296 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
1297 {
1298 	return -1;	/* "no filter" behaviour */
1299 }
1300 
1301 #endif /* !BPF */
1302