1 /* $OpenBSD: bpf.c,v 1.222 2024/01/26 21:14:08 jan Exp $ */
2 /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */
3
4 /*
5 * Copyright (c) 1990, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org>
8 *
9 * This code is derived from the Stanford/CMU enet packet filter,
10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
12 * Berkeley Laboratory.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
39 */
40
41 #include "bpfilter.h"
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/ioctl.h>
49 #include <sys/conf.h>
50 #include <sys/vnode.h>
51 #include <sys/fcntl.h>
52 #include <sys/socket.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/rwlock.h>
56 #include <sys/atomic.h>
57 #include <sys/event.h>
58 #include <sys/mutex.h>
59 #include <sys/refcnt.h>
60 #include <sys/smr.h>
61 #include <sys/specdev.h>
62 #include <sys/sigio.h>
63 #include <sys/task.h>
64 #include <sys/time.h>
65
66 #include <net/if.h>
67 #include <net/bpf.h>
68 #include <net/bpfdesc.h>
69
70 #include <netinet/in.h>
71 #include <netinet/if_ether.h>
72
73 #include "vlan.h"
74 #if NVLAN > 0
75 #include <net/if_vlan_var.h>
76 #endif
77
78 #define BPF_BUFSIZE 32768
79
80 #define BPF_S_IDLE 0
81 #define BPF_S_WAIT 1
82 #define BPF_S_DONE 2
83
84 #define PRINET 26 /* interruptible */
85
86 /*
87 * The default read buffer size is patchable.
88 */
89 int bpf_bufsize = BPF_BUFSIZE;
90 int bpf_maxbufsize = BPF_MAXBUFSIZE;
91
92 /*
93 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
94 * bpf_d_list is the list of descriptors
95 */
96 struct bpf_if *bpf_iflist;
97 LIST_HEAD(, bpf_d) bpf_d_list;
98
99 int bpf_allocbufs(struct bpf_d *);
100 void bpf_ifname(struct bpf_if*, struct ifreq *);
101 void bpf_mcopy(const void *, void *, size_t);
102 int bpf_movein(struct uio *, struct bpf_d *, struct mbuf **,
103 struct sockaddr *);
104 int bpf_setif(struct bpf_d *, struct ifreq *);
105 int bpfkqfilter(dev_t, struct knote *);
106 void bpf_wakeup(struct bpf_d *);
107 void bpf_wakeup_cb(void *);
108 void bpf_wait_cb(void *);
109 int _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int);
110 void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
111 const struct bpf_hdr *);
112 int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
113 int bpf_setdlt(struct bpf_d *, u_int);
114
115 void filt_bpfrdetach(struct knote *);
116 int filt_bpfread(struct knote *, long);
117 int filt_bpfreadmodify(struct kevent *, struct knote *);
118 int filt_bpfreadprocess(struct knote *, struct kevent *);
119
120 int bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t);
121
122 struct bpf_d *bpfilter_lookup(int);
123
124 /*
125 * Called holding ``bd_mtx''.
126 */
127 void bpf_attachd(struct bpf_d *, struct bpf_if *);
128 void bpf_detachd(struct bpf_d *);
129 void bpf_resetd(struct bpf_d *);
130
131 void bpf_prog_smr(void *);
132 void bpf_d_smr(void *);
133
134 /*
135 * Reference count access to descriptor buffers
136 */
137 void bpf_get(struct bpf_d *);
138 void bpf_put(struct bpf_d *);
139
140
141 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz");
142
143 int
bpf_movein(struct uio * uio,struct bpf_d * d,struct mbuf ** mp,struct sockaddr * sockp)144 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp,
145 struct sockaddr *sockp)
146 {
147 struct bpf_program_smr *bps;
148 struct bpf_insn *fcode = NULL;
149 struct mbuf *m;
150 struct m_tag *mtag;
151 int error;
152 u_int hlen, alen, mlen;
153 u_int len;
154 u_int linktype;
155 u_int slen;
156
157 /*
158 * Build a sockaddr based on the data link layer type.
159 * We do this at this level because the ethernet header
160 * is copied directly into the data field of the sockaddr.
161 * In the case of SLIP, there is no header and the packet
162 * is forwarded as is.
163 * Also, we are careful to leave room at the front of the mbuf
164 * for the link level header.
165 */
166 linktype = d->bd_bif->bif_dlt;
167 switch (linktype) {
168
169 case DLT_SLIP:
170 sockp->sa_family = AF_INET;
171 hlen = 0;
172 break;
173
174 case DLT_PPP:
175 sockp->sa_family = AF_UNSPEC;
176 hlen = 0;
177 break;
178
179 case DLT_EN10MB:
180 sockp->sa_family = AF_UNSPEC;
181 /* XXX Would MAXLINKHDR be better? */
182 hlen = ETHER_HDR_LEN;
183 break;
184
185 case DLT_IEEE802_11:
186 case DLT_IEEE802_11_RADIO:
187 sockp->sa_family = AF_UNSPEC;
188 hlen = 0;
189 break;
190
191 case DLT_RAW:
192 case DLT_NULL:
193 sockp->sa_family = AF_UNSPEC;
194 hlen = 0;
195 break;
196
197 case DLT_LOOP:
198 sockp->sa_family = AF_UNSPEC;
199 hlen = sizeof(u_int32_t);
200 break;
201
202 default:
203 return (EIO);
204 }
205
206 if (uio->uio_resid > MAXMCLBYTES)
207 return (EMSGSIZE);
208 len = uio->uio_resid;
209 if (len < hlen)
210 return (EINVAL);
211
212 /*
213 * Get the length of the payload so we can align it properly.
214 */
215 alen = len - hlen;
216
217 /*
218 * Allocate enough space for headers and the aligned payload.
219 */
220 mlen = max(max_linkhdr, hlen) + roundup(alen, sizeof(long));
221 if (mlen > MAXMCLBYTES)
222 return (EMSGSIZE);
223
224 MGETHDR(m, M_WAIT, MT_DATA);
225 if (mlen > MHLEN) {
226 MCLGETL(m, M_WAIT, mlen);
227 if ((m->m_flags & M_EXT) == 0) {
228 error = ENOBUFS;
229 goto bad;
230 }
231 }
232
233 m_align(m, alen); /* Align the payload. */
234 m->m_data -= hlen;
235
236 m->m_pkthdr.ph_ifidx = 0;
237 m->m_pkthdr.len = len;
238 m->m_len = len;
239
240 error = uiomove(mtod(m, caddr_t), len, uio);
241 if (error)
242 goto bad;
243
244 smr_read_enter();
245 bps = SMR_PTR_GET(&d->bd_wfilter);
246 if (bps != NULL)
247 fcode = bps->bps_bf.bf_insns;
248 slen = bpf_filter(fcode, mtod(m, u_char *), len, len);
249 smr_read_leave();
250
251 if (slen < len) {
252 error = EPERM;
253 goto bad;
254 }
255
256 /*
257 * Make room for link header, and copy it to sockaddr
258 */
259 if (hlen != 0) {
260 if (linktype == DLT_LOOP) {
261 u_int32_t af;
262
263 /* the link header indicates the address family */
264 KASSERT(hlen == sizeof(u_int32_t));
265 memcpy(&af, m->m_data, hlen);
266 sockp->sa_family = ntohl(af);
267 } else
268 memcpy(sockp->sa_data, m->m_data, hlen);
269
270 m->m_pkthdr.len -= hlen;
271 m->m_len -= hlen;
272 m->m_data += hlen;
273 }
274
275 /*
276 * Prepend the data link type as a mbuf tag
277 */
278 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT);
279 *(u_int *)(mtag + 1) = linktype;
280 m_tag_prepend(m, mtag);
281
282 *mp = m;
283 return (0);
284 bad:
285 m_freem(m);
286 return (error);
287 }
288
289 /*
290 * Attach file to the bpf interface, i.e. make d listen on bp.
291 */
292 void
bpf_attachd(struct bpf_d * d,struct bpf_if * bp)293 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
294 {
295 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
296
297 /*
298 * Point d at bp, and add d to the interface's list of listeners.
299 * Finally, point the driver's bpf cookie at the interface so
300 * it will divert packets to bpf.
301 */
302
303 d->bd_bif = bp;
304
305 KERNEL_ASSERT_LOCKED();
306 SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next);
307
308 *bp->bif_driverp = bp;
309 }
310
311 /*
312 * Detach a file from its interface.
313 */
314 void
bpf_detachd(struct bpf_d * d)315 bpf_detachd(struct bpf_d *d)
316 {
317 struct bpf_if *bp;
318
319 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
320
321 bp = d->bd_bif;
322 /* Not attached. */
323 if (bp == NULL)
324 return;
325
326 /* Remove ``d'' from the interface's descriptor list. */
327 KERNEL_ASSERT_LOCKED();
328 SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next);
329
330 if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) {
331 /*
332 * Let the driver know that there are no more listeners.
333 */
334 *bp->bif_driverp = NULL;
335 }
336
337 d->bd_bif = NULL;
338
339 /*
340 * Check if this descriptor had requested promiscuous mode.
341 * If so, turn it off.
342 */
343 if (d->bd_promisc) {
344 int error;
345
346 KASSERT(bp->bif_ifp != NULL);
347
348 d->bd_promisc = 0;
349
350 bpf_get(d);
351 mtx_leave(&d->bd_mtx);
352 NET_LOCK();
353 error = ifpromisc(bp->bif_ifp, 0);
354 NET_UNLOCK();
355 mtx_enter(&d->bd_mtx);
356 bpf_put(d);
357
358 if (error && !(error == EINVAL || error == ENODEV ||
359 error == ENXIO))
360 /*
361 * Something is really wrong if we were able to put
362 * the driver into promiscuous mode, but can't
363 * take it out.
364 */
365 panic("bpf: ifpromisc failed");
366 }
367 }
368
369 void
bpfilterattach(int n)370 bpfilterattach(int n)
371 {
372 LIST_INIT(&bpf_d_list);
373 }
374
375 /*
376 * Open ethernet device. Returns ENXIO for illegal minor device number,
377 * EBUSY if file is open by another process.
378 */
379 int
bpfopen(dev_t dev,int flag,int mode,struct proc * p)380 bpfopen(dev_t dev, int flag, int mode, struct proc *p)
381 {
382 struct bpf_d *bd;
383 int unit = minor(dev);
384
385 if (unit & ((1 << CLONE_SHIFT) - 1))
386 return (ENXIO);
387
388 KASSERT(bpfilter_lookup(unit) == NULL);
389
390 /* create on demand */
391 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
392 return (EBUSY);
393
394 /* Mark "free" and do most initialization. */
395 bd->bd_unit = unit;
396 bd->bd_bufsize = bpf_bufsize;
397 bd->bd_sig = SIGIO;
398 mtx_init(&bd->bd_mtx, IPL_NET);
399 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
400 timeout_set(&bd->bd_wait_tmo, bpf_wait_cb, bd);
401 smr_init(&bd->bd_smr);
402 sigio_init(&bd->bd_sigio);
403 klist_init_mutex(&bd->bd_klist, &bd->bd_mtx);
404
405 bd->bd_rtout = 0; /* no timeout by default */
406 bd->bd_wtout = INFSLP; /* wait for the buffer to fill by default */
407
408 refcnt_init(&bd->bd_refcnt);
409 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
410
411 return (0);
412 }
413
414 /*
415 * Close the descriptor by detaching it from its interface,
416 * deallocating its buffers, and marking it free.
417 */
418 int
bpfclose(dev_t dev,int flag,int mode,struct proc * p)419 bpfclose(dev_t dev, int flag, int mode, struct proc *p)
420 {
421 struct bpf_d *d;
422
423 d = bpfilter_lookup(minor(dev));
424 mtx_enter(&d->bd_mtx);
425 bpf_detachd(d);
426 bpf_wakeup(d);
427 LIST_REMOVE(d, bd_list);
428 mtx_leave(&d->bd_mtx);
429 bpf_put(d);
430
431 return (0);
432 }
433
434 /*
435 * Rotate the packet buffers in descriptor d. Move the store buffer
436 * into the hold slot, and the free buffer into the store slot.
437 * Zero the length of the new store buffer.
438 */
439 #define ROTATE_BUFFERS(d) \
440 KASSERT(d->bd_in_uiomove == 0); \
441 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \
442 (d)->bd_hbuf = (d)->bd_sbuf; \
443 (d)->bd_hlen = (d)->bd_slen; \
444 (d)->bd_sbuf = (d)->bd_fbuf; \
445 (d)->bd_state = BPF_S_IDLE; \
446 (d)->bd_slen = 0; \
447 (d)->bd_fbuf = NULL;
448
449 /*
450 * bpfread - read next chunk of packets from buffers
451 */
452 int
bpfread(dev_t dev,struct uio * uio,int ioflag)453 bpfread(dev_t dev, struct uio *uio, int ioflag)
454 {
455 uint64_t end, now;
456 struct bpf_d *d;
457 caddr_t hbuf;
458 int error, hlen;
459
460 KERNEL_ASSERT_LOCKED();
461
462 d = bpfilter_lookup(minor(dev));
463 if (d->bd_bif == NULL)
464 return (ENXIO);
465
466 bpf_get(d);
467 mtx_enter(&d->bd_mtx);
468
469 /*
470 * Restrict application to use a buffer the same size as
471 * as kernel buffers.
472 */
473 if (uio->uio_resid != d->bd_bufsize) {
474 error = EINVAL;
475 goto out;
476 }
477
478 /*
479 * If there's a timeout, mark when the read should end.
480 */
481 if (d->bd_rtout != 0) {
482 now = nsecuptime();
483 end = now + d->bd_rtout;
484 if (end < now)
485 end = UINT64_MAX;
486 }
487
488 /*
489 * If the hold buffer is empty, then do a timed sleep, which
490 * ends when the timeout expires or when enough packets
491 * have arrived to fill the store buffer.
492 */
493 while (d->bd_hbuf == NULL) {
494 if (d->bd_bif == NULL) {
495 /* interface is gone */
496 if (d->bd_slen == 0) {
497 error = EIO;
498 goto out;
499 }
500 ROTATE_BUFFERS(d);
501 break;
502 }
503 if (d->bd_state == BPF_S_DONE) {
504 /*
505 * A packet(s) either arrived since the previous
506 * read or arrived while we were asleep.
507 * Rotate the buffers and return what's here.
508 */
509 ROTATE_BUFFERS(d);
510 break;
511 }
512 if (ISSET(ioflag, IO_NDELAY)) {
513 /* User requested non-blocking I/O */
514 error = EWOULDBLOCK;
515 } else if (d->bd_rtout == 0) {
516 /* No read timeout set. */
517 d->bd_nreaders++;
518 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH,
519 "bpf", INFSLP);
520 d->bd_nreaders--;
521 } else if ((now = nsecuptime()) < end) {
522 /* Read timeout has not expired yet. */
523 d->bd_nreaders++;
524 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH,
525 "bpf", end - now);
526 d->bd_nreaders--;
527 } else {
528 /* Read timeout has expired. */
529 error = EWOULDBLOCK;
530 }
531 if (error == EINTR || error == ERESTART)
532 goto out;
533 if (error == EWOULDBLOCK) {
534 /*
535 * On a timeout, return what's in the buffer,
536 * which may be nothing. If there is something
537 * in the store buffer, we can rotate the buffers.
538 */
539 if (d->bd_hbuf != NULL)
540 /*
541 * We filled up the buffer in between
542 * getting the timeout and arriving
543 * here, so we don't need to rotate.
544 */
545 break;
546
547 if (d->bd_slen == 0) {
548 error = 0;
549 goto out;
550 }
551 ROTATE_BUFFERS(d);
552 break;
553 }
554 }
555 /*
556 * At this point, we know we have something in the hold slot.
557 */
558 hbuf = d->bd_hbuf;
559 hlen = d->bd_hlen;
560 d->bd_hbuf = NULL;
561 d->bd_hlen = 0;
562 d->bd_fbuf = NULL;
563 d->bd_in_uiomove = 1;
564
565 /*
566 * Move data from hold buffer into user space.
567 * We know the entire buffer is transferred since
568 * we checked above that the read buffer is bpf_bufsize bytes.
569 */
570 mtx_leave(&d->bd_mtx);
571 error = uiomove(hbuf, hlen, uio);
572 mtx_enter(&d->bd_mtx);
573
574 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */
575 KASSERT(d->bd_fbuf == NULL);
576 KASSERT(d->bd_hbuf == NULL);
577 d->bd_fbuf = hbuf;
578 d->bd_in_uiomove = 0;
579 out:
580 mtx_leave(&d->bd_mtx);
581 bpf_put(d);
582
583 return (error);
584 }
585
586 /*
587 * If there are processes sleeping on this descriptor, wake them up.
588 */
589 void
bpf_wakeup(struct bpf_d * d)590 bpf_wakeup(struct bpf_d *d)
591 {
592 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
593
594 if (d->bd_nreaders)
595 wakeup(d);
596
597 knote_locked(&d->bd_klist, 0);
598
599 /*
600 * As long as pgsigio() needs to be protected
601 * by the KERNEL_LOCK() we have to delay the wakeup to
602 * another context to keep the hot path KERNEL_LOCK()-free.
603 */
604 if (d->bd_async && d->bd_sig) {
605 bpf_get(d);
606 if (!task_add(systq, &d->bd_wake_task))
607 bpf_put(d);
608 }
609 }
610
611 void
bpf_wakeup_cb(void * xd)612 bpf_wakeup_cb(void *xd)
613 {
614 struct bpf_d *d = xd;
615
616 if (d->bd_async && d->bd_sig)
617 pgsigio(&d->bd_sigio, d->bd_sig, 0);
618
619 bpf_put(d);
620 }
621
622 void
bpf_wait_cb(void * xd)623 bpf_wait_cb(void *xd)
624 {
625 struct bpf_d *d = xd;
626
627 mtx_enter(&d->bd_mtx);
628 if (d->bd_state == BPF_S_WAIT) {
629 d->bd_state = BPF_S_DONE;
630 bpf_wakeup(d);
631 }
632 mtx_leave(&d->bd_mtx);
633
634 bpf_put(d);
635 }
636
637 int
bpfwrite(dev_t dev,struct uio * uio,int ioflag)638 bpfwrite(dev_t dev, struct uio *uio, int ioflag)
639 {
640 struct bpf_d *d;
641 struct ifnet *ifp;
642 struct mbuf *m;
643 int error;
644 struct sockaddr_storage dst;
645
646 KERNEL_ASSERT_LOCKED();
647
648 d = bpfilter_lookup(minor(dev));
649 if (d->bd_bif == NULL)
650 return (ENXIO);
651
652 bpf_get(d);
653 ifp = d->bd_bif->bif_ifp;
654
655 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) {
656 error = ENETDOWN;
657 goto out;
658 }
659
660 if (uio->uio_resid == 0) {
661 error = 0;
662 goto out;
663 }
664
665 error = bpf_movein(uio, d, &m, sstosa(&dst));
666 if (error)
667 goto out;
668
669 if (m->m_pkthdr.len > ifp->if_mtu) {
670 m_freem(m);
671 error = EMSGSIZE;
672 goto out;
673 }
674
675 m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
676 m->m_pkthdr.pf.prio = ifp->if_llprio;
677
678 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
679 dst.ss_family = pseudo_AF_HDRCMPLT;
680
681 NET_LOCK();
682 error = ifp->if_output(ifp, m, sstosa(&dst), NULL);
683 NET_UNLOCK();
684
685 out:
686 bpf_put(d);
687 return (error);
688 }
689
690 /*
691 * Reset a descriptor by flushing its packet buffer and clearing the
692 * receive and drop counts.
693 */
694 void
bpf_resetd(struct bpf_d * d)695 bpf_resetd(struct bpf_d *d)
696 {
697 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
698 KASSERT(d->bd_in_uiomove == 0);
699
700 if (timeout_del(&d->bd_wait_tmo))
701 bpf_put(d);
702
703 if (d->bd_hbuf != NULL) {
704 /* Free the hold buffer. */
705 d->bd_fbuf = d->bd_hbuf;
706 d->bd_hbuf = NULL;
707 }
708 d->bd_state = BPF_S_IDLE;
709 d->bd_slen = 0;
710 d->bd_hlen = 0;
711 d->bd_rcount = 0;
712 d->bd_dcount = 0;
713 }
714
715 static int
bpf_set_wtout(struct bpf_d * d,uint64_t wtout)716 bpf_set_wtout(struct bpf_d *d, uint64_t wtout)
717 {
718 mtx_enter(&d->bd_mtx);
719 d->bd_wtout = wtout;
720 mtx_leave(&d->bd_mtx);
721
722 return (0);
723 }
724
725 static int
bpf_set_wtimeout(struct bpf_d * d,const struct timeval * tv)726 bpf_set_wtimeout(struct bpf_d *d, const struct timeval *tv)
727 {
728 uint64_t nsec;
729
730 if (tv->tv_sec < 0 || !timerisvalid(tv))
731 return (EINVAL);
732
733 nsec = TIMEVAL_TO_NSEC(tv);
734 if (nsec > MAXTSLP)
735 return (EOVERFLOW);
736
737 return (bpf_set_wtout(d, nsec));
738 }
739
740 static int
bpf_get_wtimeout(struct bpf_d * d,struct timeval * tv)741 bpf_get_wtimeout(struct bpf_d *d, struct timeval *tv)
742 {
743 uint64_t nsec;
744
745 mtx_enter(&d->bd_mtx);
746 nsec = d->bd_wtout;
747 mtx_leave(&d->bd_mtx);
748
749 if (nsec == INFSLP)
750 return (ENXIO);
751
752 memset(tv, 0, sizeof(*tv));
753 NSEC_TO_TIMEVAL(nsec, tv);
754
755 return (0);
756 }
757
758 /*
759 * FIONREAD Check for read packet available.
760 * BIOCGBLEN Get buffer len [for read()].
761 * BIOCSETF Set ethernet read filter.
762 * BIOCFLUSH Flush read packet buffer.
763 * BIOCPROMISC Put interface into promiscuous mode.
764 * BIOCGDLTLIST Get supported link layer types.
765 * BIOCGDLT Get link layer type.
766 * BIOCSDLT Set link layer type.
767 * BIOCGETIF Get interface name.
768 * BIOCSETIF Set interface.
769 * BIOCSRTIMEOUT Set read timeout.
770 * BIOCGRTIMEOUT Get read timeout.
771 * BIOCSWTIMEOUT Set wait timeout.
772 * BIOCGWTIMEOUT Get wait timeout.
773 * BIOCDWTIMEOUT Del wait timeout.
774 * BIOCGSTATS Get packet stats.
775 * BIOCIMMEDIATE Set immediate mode.
776 * BIOCVERSION Get filter language version.
777 * BIOCGHDRCMPLT Get "header already complete" flag
778 * BIOCSHDRCMPLT Set "header already complete" flag
779 */
780 int
bpfioctl(dev_t dev,u_long cmd,caddr_t addr,int flag,struct proc * p)781 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
782 {
783 struct bpf_d *d;
784 int error = 0;
785
786 d = bpfilter_lookup(minor(dev));
787 if (d->bd_locked && suser(p) != 0) {
788 /* list of allowed ioctls when locked and not root */
789 switch (cmd) {
790 case BIOCGBLEN:
791 case BIOCFLUSH:
792 case BIOCGDLT:
793 case BIOCGDLTLIST:
794 case BIOCGETIF:
795 case BIOCGRTIMEOUT:
796 case BIOCGWTIMEOUT:
797 case BIOCGSTATS:
798 case BIOCVERSION:
799 case BIOCGRSIG:
800 case BIOCGHDRCMPLT:
801 case FIONREAD:
802 case BIOCLOCK:
803 case BIOCSRTIMEOUT:
804 case BIOCSWTIMEOUT:
805 case BIOCDWTIMEOUT:
806 case BIOCIMMEDIATE:
807 case TIOCGPGRP:
808 case BIOCGDIRFILT:
809 break;
810 default:
811 return (EPERM);
812 }
813 }
814
815 bpf_get(d);
816
817 switch (cmd) {
818 default:
819 error = EINVAL;
820 break;
821
822 /*
823 * Check for read packet available.
824 */
825 case FIONREAD:
826 {
827 int n;
828
829 mtx_enter(&d->bd_mtx);
830 n = d->bd_slen;
831 if (d->bd_hbuf != NULL)
832 n += d->bd_hlen;
833 mtx_leave(&d->bd_mtx);
834
835 *(int *)addr = n;
836 break;
837 }
838
839 /*
840 * Get buffer len [for read()].
841 */
842 case BIOCGBLEN:
843 *(u_int *)addr = d->bd_bufsize;
844 break;
845
846 /*
847 * Set buffer length.
848 */
849 case BIOCSBLEN:
850 if (d->bd_bif != NULL)
851 error = EINVAL;
852 else {
853 u_int size = *(u_int *)addr;
854
855 if (size > bpf_maxbufsize)
856 *(u_int *)addr = size = bpf_maxbufsize;
857 else if (size < BPF_MINBUFSIZE)
858 *(u_int *)addr = size = BPF_MINBUFSIZE;
859 mtx_enter(&d->bd_mtx);
860 d->bd_bufsize = size;
861 mtx_leave(&d->bd_mtx);
862 }
863 break;
864
865 /*
866 * Set link layer read filter.
867 */
868 case BIOCSETF:
869 error = bpf_setf(d, (struct bpf_program *)addr, 0);
870 break;
871
872 /*
873 * Set link layer write filter.
874 */
875 case BIOCSETWF:
876 error = bpf_setf(d, (struct bpf_program *)addr, 1);
877 break;
878
879 /*
880 * Flush read packet buffer.
881 */
882 case BIOCFLUSH:
883 mtx_enter(&d->bd_mtx);
884 bpf_resetd(d);
885 mtx_leave(&d->bd_mtx);
886 break;
887
888 /*
889 * Put interface into promiscuous mode.
890 */
891 case BIOCPROMISC:
892 if (d->bd_bif == NULL) {
893 /*
894 * No interface attached yet.
895 */
896 error = EINVAL;
897 } else if (d->bd_bif->bif_ifp != NULL) {
898 if (d->bd_promisc == 0) {
899 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx);
900 NET_LOCK();
901 error = ifpromisc(d->bd_bif->bif_ifp, 1);
902 NET_UNLOCK();
903 if (error == 0)
904 d->bd_promisc = 1;
905 }
906 }
907 break;
908
909 /*
910 * Get a list of supported device parameters.
911 */
912 case BIOCGDLTLIST:
913 if (d->bd_bif == NULL)
914 error = EINVAL;
915 else
916 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
917 break;
918
919 /*
920 * Get device parameters.
921 */
922 case BIOCGDLT:
923 if (d->bd_bif == NULL)
924 error = EINVAL;
925 else
926 *(u_int *)addr = d->bd_bif->bif_dlt;
927 break;
928
929 /*
930 * Set device parameters.
931 */
932 case BIOCSDLT:
933 if (d->bd_bif == NULL)
934 error = EINVAL;
935 else {
936 mtx_enter(&d->bd_mtx);
937 error = bpf_setdlt(d, *(u_int *)addr);
938 mtx_leave(&d->bd_mtx);
939 }
940 break;
941
942 /*
943 * Set interface name.
944 */
945 case BIOCGETIF:
946 if (d->bd_bif == NULL)
947 error = EINVAL;
948 else
949 bpf_ifname(d->bd_bif, (struct ifreq *)addr);
950 break;
951
952 /*
953 * Set interface.
954 */
955 case BIOCSETIF:
956 error = bpf_setif(d, (struct ifreq *)addr);
957 break;
958
959 /*
960 * Set read timeout.
961 */
962 case BIOCSRTIMEOUT:
963 {
964 struct timeval *tv = (struct timeval *)addr;
965 uint64_t rtout;
966
967 if (tv->tv_sec < 0 || !timerisvalid(tv)) {
968 error = EINVAL;
969 break;
970 }
971 rtout = TIMEVAL_TO_NSEC(tv);
972 if (rtout > MAXTSLP) {
973 error = EOVERFLOW;
974 break;
975 }
976 mtx_enter(&d->bd_mtx);
977 d->bd_rtout = rtout;
978 mtx_leave(&d->bd_mtx);
979 break;
980 }
981
982 /*
983 * Get read timeout.
984 */
985 case BIOCGRTIMEOUT:
986 {
987 struct timeval *tv = (struct timeval *)addr;
988
989 memset(tv, 0, sizeof(*tv));
990 mtx_enter(&d->bd_mtx);
991 NSEC_TO_TIMEVAL(d->bd_rtout, tv);
992 mtx_leave(&d->bd_mtx);
993 break;
994 }
995
996 /*
997 * Get packet stats.
998 */
999 case BIOCGSTATS:
1000 {
1001 struct bpf_stat *bs = (struct bpf_stat *)addr;
1002
1003 bs->bs_recv = d->bd_rcount;
1004 bs->bs_drop = d->bd_dcount;
1005 break;
1006 }
1007
1008 /*
1009 * Set immediate mode.
1010 */
1011 case BIOCIMMEDIATE:
1012 error = bpf_set_wtout(d, *(int *)addr ? 0 : INFSLP);
1013 break;
1014
1015 /*
1016 * Wait timeout.
1017 */
1018 case BIOCSWTIMEOUT:
1019 error = bpf_set_wtimeout(d, (const struct timeval *)addr);
1020 break;
1021 case BIOCGWTIMEOUT:
1022 error = bpf_get_wtimeout(d, (struct timeval *)addr);
1023 break;
1024 case BIOCDWTIMEOUT:
1025 error = bpf_set_wtout(d, INFSLP);
1026 break;
1027
1028 case BIOCVERSION:
1029 {
1030 struct bpf_version *bv = (struct bpf_version *)addr;
1031
1032 bv->bv_major = BPF_MAJOR_VERSION;
1033 bv->bv_minor = BPF_MINOR_VERSION;
1034 break;
1035 }
1036
1037 case BIOCGHDRCMPLT: /* get "header already complete" flag */
1038 *(u_int *)addr = d->bd_hdrcmplt;
1039 break;
1040
1041 case BIOCSHDRCMPLT: /* set "header already complete" flag */
1042 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1043 break;
1044
1045 case BIOCLOCK: /* set "locked" flag (no reset) */
1046 d->bd_locked = 1;
1047 break;
1048
1049 case BIOCGFILDROP: /* get "filter-drop" flag */
1050 *(u_int *)addr = d->bd_fildrop;
1051 break;
1052
1053 case BIOCSFILDROP: { /* set "filter-drop" flag */
1054 unsigned int fildrop = *(u_int *)addr;
1055 switch (fildrop) {
1056 case BPF_FILDROP_PASS:
1057 case BPF_FILDROP_CAPTURE:
1058 case BPF_FILDROP_DROP:
1059 d->bd_fildrop = fildrop;
1060 break;
1061 default:
1062 error = EINVAL;
1063 break;
1064 }
1065 break;
1066 }
1067
1068 case BIOCGDIRFILT: /* get direction filter */
1069 *(u_int *)addr = d->bd_dirfilt;
1070 break;
1071
1072 case BIOCSDIRFILT: /* set direction filter */
1073 d->bd_dirfilt = (*(u_int *)addr) &
1074 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
1075 break;
1076
1077 case FIONBIO: /* Non-blocking I/O */
1078 /* let vfs to keep track of this */
1079 break;
1080
1081 case FIOASYNC: /* Send signal on receive packets */
1082 d->bd_async = *(int *)addr;
1083 break;
1084
1085 case FIOSETOWN: /* Process or group to send signals to */
1086 case TIOCSPGRP:
1087 error = sigio_setown(&d->bd_sigio, cmd, addr);
1088 break;
1089
1090 case FIOGETOWN:
1091 case TIOCGPGRP:
1092 sigio_getown(&d->bd_sigio, cmd, addr);
1093 break;
1094
1095 case BIOCSRSIG: /* Set receive signal */
1096 {
1097 u_int sig;
1098
1099 sig = *(u_int *)addr;
1100
1101 if (sig >= NSIG)
1102 error = EINVAL;
1103 else
1104 d->bd_sig = sig;
1105 break;
1106 }
1107 case BIOCGRSIG:
1108 *(u_int *)addr = d->bd_sig;
1109 break;
1110 }
1111
1112 bpf_put(d);
1113 return (error);
1114 }
1115
1116 /*
1117 * Set d's packet filter program to fp. If this file already has a filter,
1118 * free it and replace it. Returns EINVAL for bogus requests.
1119 */
1120 int
bpf_setf(struct bpf_d * d,struct bpf_program * fp,int wf)1121 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf)
1122 {
1123 struct bpf_program_smr *bps, *old_bps;
1124 struct bpf_insn *fcode;
1125 u_int flen, size;
1126
1127 KERNEL_ASSERT_LOCKED();
1128
1129 if (fp->bf_insns == 0) {
1130 if (fp->bf_len != 0)
1131 return (EINVAL);
1132 bps = NULL;
1133 } else {
1134 flen = fp->bf_len;
1135 if (flen > BPF_MAXINSNS)
1136 return (EINVAL);
1137
1138 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF,
1139 M_WAITOK | M_CANFAIL);
1140 if (fcode == NULL)
1141 return (ENOMEM);
1142
1143 size = flen * sizeof(*fp->bf_insns);
1144 if (copyin(fp->bf_insns, fcode, size) != 0 ||
1145 bpf_validate(fcode, (int)flen) == 0) {
1146 free(fcode, M_DEVBUF, size);
1147 return (EINVAL);
1148 }
1149
1150 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK);
1151 smr_init(&bps->bps_smr);
1152 bps->bps_bf.bf_len = flen;
1153 bps->bps_bf.bf_insns = fcode;
1154 }
1155
1156 if (wf == 0) {
1157 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter);
1158 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps);
1159 } else {
1160 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter);
1161 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps);
1162 }
1163
1164 mtx_enter(&d->bd_mtx);
1165 bpf_resetd(d);
1166 mtx_leave(&d->bd_mtx);
1167 if (old_bps != NULL)
1168 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps);
1169
1170 return (0);
1171 }
1172
1173 /*
1174 * Detach a file from its current interface (if attached at all) and attach
1175 * to the interface indicated by the name stored in ifr.
1176 * Return an errno or 0.
1177 */
1178 int
bpf_setif(struct bpf_d * d,struct ifreq * ifr)1179 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1180 {
1181 struct bpf_if *bp, *candidate = NULL;
1182 int error = 0;
1183
1184 /*
1185 * Look through attached interfaces for the named one.
1186 */
1187 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1188 if (strcmp(bp->bif_name, ifr->ifr_name) != 0)
1189 continue;
1190
1191 if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt)
1192 candidate = bp;
1193 }
1194
1195 /* Not found. */
1196 if (candidate == NULL)
1197 return (ENXIO);
1198
1199 /*
1200 * Allocate the packet buffers if we need to.
1201 * If we're already attached to requested interface,
1202 * just flush the buffer.
1203 */
1204 mtx_enter(&d->bd_mtx);
1205 if (d->bd_sbuf == NULL) {
1206 if ((error = bpf_allocbufs(d)))
1207 goto out;
1208 }
1209 if (candidate != d->bd_bif) {
1210 /*
1211 * Detach if attached to something else.
1212 */
1213 bpf_detachd(d);
1214 bpf_attachd(d, candidate);
1215 }
1216 bpf_resetd(d);
1217 out:
1218 mtx_leave(&d->bd_mtx);
1219 return (error);
1220 }
1221
1222 /*
1223 * Copy the interface name to the ifreq.
1224 */
1225 void
bpf_ifname(struct bpf_if * bif,struct ifreq * ifr)1226 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr)
1227 {
1228 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name));
1229 }
1230
1231 const struct filterops bpfread_filtops = {
1232 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
1233 .f_attach = NULL,
1234 .f_detach = filt_bpfrdetach,
1235 .f_event = filt_bpfread,
1236 .f_modify = filt_bpfreadmodify,
1237 .f_process = filt_bpfreadprocess,
1238 };
1239
1240 int
bpfkqfilter(dev_t dev,struct knote * kn)1241 bpfkqfilter(dev_t dev, struct knote *kn)
1242 {
1243 struct bpf_d *d;
1244 struct klist *klist;
1245
1246 KERNEL_ASSERT_LOCKED();
1247
1248 d = bpfilter_lookup(minor(dev));
1249 if (d == NULL)
1250 return (ENXIO);
1251
1252 switch (kn->kn_filter) {
1253 case EVFILT_READ:
1254 klist = &d->bd_klist;
1255 kn->kn_fop = &bpfread_filtops;
1256 break;
1257 default:
1258 return (EINVAL);
1259 }
1260
1261 bpf_get(d);
1262 kn->kn_hook = d;
1263 klist_insert(klist, kn);
1264
1265 return (0);
1266 }
1267
1268 void
filt_bpfrdetach(struct knote * kn)1269 filt_bpfrdetach(struct knote *kn)
1270 {
1271 struct bpf_d *d = kn->kn_hook;
1272
1273 klist_remove(&d->bd_klist, kn);
1274 bpf_put(d);
1275 }
1276
1277 int
filt_bpfread(struct knote * kn,long hint)1278 filt_bpfread(struct knote *kn, long hint)
1279 {
1280 struct bpf_d *d = kn->kn_hook;
1281
1282 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1283
1284 kn->kn_data = d->bd_hlen;
1285 if (d->bd_wtout == 0)
1286 kn->kn_data += d->bd_slen;
1287
1288 return (kn->kn_data > 0);
1289 }
1290
1291 int
filt_bpfreadmodify(struct kevent * kev,struct knote * kn)1292 filt_bpfreadmodify(struct kevent *kev, struct knote *kn)
1293 {
1294 struct bpf_d *d = kn->kn_hook;
1295 int active;
1296
1297 mtx_enter(&d->bd_mtx);
1298 active = knote_modify_fn(kev, kn, filt_bpfread);
1299 mtx_leave(&d->bd_mtx);
1300
1301 return (active);
1302 }
1303
1304 int
filt_bpfreadprocess(struct knote * kn,struct kevent * kev)1305 filt_bpfreadprocess(struct knote *kn, struct kevent *kev)
1306 {
1307 struct bpf_d *d = kn->kn_hook;
1308 int active;
1309
1310 mtx_enter(&d->bd_mtx);
1311 active = knote_process_fn(kn, kev, filt_bpfread);
1312 mtx_leave(&d->bd_mtx);
1313
1314 return (active);
1315 }
1316
1317 /*
1318 * Copy data from an mbuf chain into a buffer. This code is derived
1319 * from m_copydata in sys/uipc_mbuf.c.
1320 */
1321 void
bpf_mcopy(const void * src_arg,void * dst_arg,size_t len)1322 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1323 {
1324 const struct mbuf *m;
1325 u_int count;
1326 u_char *dst;
1327
1328 m = src_arg;
1329 dst = dst_arg;
1330 while (len > 0) {
1331 if (m == NULL)
1332 panic("bpf_mcopy");
1333 count = min(m->m_len, len);
1334 bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
1335 m = m->m_next;
1336 dst += count;
1337 len -= count;
1338 }
1339 }
1340
1341 int
bpf_mtap(caddr_t arg,const struct mbuf * m,u_int direction)1342 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
1343 {
1344 return _bpf_mtap(arg, m, m, direction);
1345 }
1346
1347 int
_bpf_mtap(caddr_t arg,const struct mbuf * mp,const struct mbuf * m,u_int direction)1348 _bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m,
1349 u_int direction)
1350 {
1351 struct bpf_if *bp = (struct bpf_if *)arg;
1352 struct bpf_d *d;
1353 size_t pktlen, slen;
1354 const struct mbuf *m0;
1355 struct bpf_hdr tbh;
1356 int gothdr = 0;
1357 int drop = 0;
1358
1359 if (m == NULL)
1360 return (0);
1361
1362 if (bp == NULL)
1363 return (0);
1364
1365 pktlen = 0;
1366 for (m0 = m; m0 != NULL; m0 = m0->m_next)
1367 pktlen += m0->m_len;
1368
1369 smr_read_enter();
1370 SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1371 struct bpf_program_smr *bps;
1372 struct bpf_insn *fcode = NULL;
1373
1374 atomic_inc_long(&d->bd_rcount);
1375
1376 if (ISSET(d->bd_dirfilt, direction))
1377 continue;
1378
1379 bps = SMR_PTR_GET(&d->bd_rfilter);
1380 if (bps != NULL)
1381 fcode = bps->bps_bf.bf_insns;
1382 slen = bpf_mfilter(fcode, m, pktlen);
1383
1384 if (slen == 0)
1385 continue;
1386 if (d->bd_fildrop != BPF_FILDROP_PASS)
1387 drop = 1;
1388 if (d->bd_fildrop != BPF_FILDROP_DROP) {
1389 if (!gothdr) {
1390 struct timeval tv;
1391 memset(&tbh, 0, sizeof(tbh));
1392
1393 if (ISSET(mp->m_flags, M_PKTHDR)) {
1394 tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx;
1395 tbh.bh_flowid = mp->m_pkthdr.ph_flowid;
1396 tbh.bh_flags = mp->m_pkthdr.pf.prio;
1397 if (ISSET(mp->m_pkthdr.csum_flags,
1398 M_FLOWID))
1399 SET(tbh.bh_flags, BPF_F_FLOWID);
1400 tbh.bh_csumflags =
1401 mp->m_pkthdr.csum_flags;
1402
1403 m_microtime(mp, &tv);
1404 } else
1405 microtime(&tv);
1406
1407 tbh.bh_tstamp.tv_sec = tv.tv_sec;
1408 tbh.bh_tstamp.tv_usec = tv.tv_usec;
1409 SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT);
1410
1411 gothdr = 1;
1412 }
1413
1414 mtx_enter(&d->bd_mtx);
1415 bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh);
1416 mtx_leave(&d->bd_mtx);
1417 }
1418 }
1419 smr_read_leave();
1420
1421 return (drop);
1422 }
1423
1424 /*
1425 * Incoming linkage from device drivers, where a data buffer should be
1426 * prepended by an arbitrary header. In this situation we already have a
1427 * way of representing a chain of memory buffers, ie, mbufs, so reuse
1428 * the existing functionality by attaching the buffers to mbufs.
1429 *
1430 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a
1431 * struct m_hdr each for the header and data on the stack.
1432 */
1433 int
bpf_tap_hdr(caddr_t arg,const void * hdr,unsigned int hdrlen,const void * buf,unsigned int buflen,u_int direction)1434 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen,
1435 const void *buf, unsigned int buflen, u_int direction)
1436 {
1437 struct m_hdr mh, md;
1438 struct mbuf *m0 = NULL;
1439 struct mbuf **mp = &m0;
1440
1441 if (hdr != NULL) {
1442 mh.mh_flags = 0;
1443 mh.mh_next = NULL;
1444 mh.mh_len = hdrlen;
1445 mh.mh_data = (void *)hdr;
1446
1447 *mp = (struct mbuf *)&mh;
1448 mp = &mh.mh_next;
1449 }
1450
1451 if (buf != NULL) {
1452 md.mh_flags = 0;
1453 md.mh_next = NULL;
1454 md.mh_len = buflen;
1455 md.mh_data = (void *)buf;
1456
1457 *mp = (struct mbuf *)&md;
1458 }
1459
1460 return bpf_mtap(arg, m0, direction);
1461 }
1462
1463 /*
1464 * Incoming linkage from device drivers, where we have a mbuf chain
1465 * but need to prepend some arbitrary header from a linear buffer.
1466 *
1467 * Con up a minimal dummy header to pacify bpf. Allocate (only) a
1468 * struct m_hdr on the stack. This is safe as bpf only reads from the
1469 * fields in this header that we initialize, and will not try to free
1470 * it or keep a pointer to it.
1471 */
1472 int
bpf_mtap_hdr(caddr_t arg,const void * data,u_int dlen,const struct mbuf * m,u_int direction)1473 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m,
1474 u_int direction)
1475 {
1476 struct m_hdr mh;
1477 const struct mbuf *m0;
1478
1479 if (dlen > 0) {
1480 mh.mh_flags = 0;
1481 mh.mh_next = (struct mbuf *)m;
1482 mh.mh_len = dlen;
1483 mh.mh_data = (void *)data;
1484 m0 = (struct mbuf *)&mh;
1485 } else
1486 m0 = m;
1487
1488 return _bpf_mtap(arg, m, m0, direction);
1489 }
1490
1491 /*
1492 * Incoming linkage from device drivers, where we have a mbuf chain
1493 * but need to prepend the address family.
1494 *
1495 * Con up a minimal dummy header to pacify bpf. We allocate (only) a
1496 * struct m_hdr on the stack. This is safe as bpf only reads from the
1497 * fields in this header that we initialize, and will not try to free
1498 * it or keep a pointer to it.
1499 */
1500 int
bpf_mtap_af(caddr_t arg,u_int32_t af,const struct mbuf * m,u_int direction)1501 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
1502 {
1503 u_int32_t afh;
1504
1505 afh = htonl(af);
1506
1507 return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction);
1508 }
1509
1510 /*
1511 * Incoming linkage from device drivers, where we have a mbuf chain
1512 * but need to prepend a VLAN encapsulation header.
1513 *
1514 * Con up a minimal dummy header to pacify bpf. Allocate (only) a
1515 * struct m_hdr on the stack. This is safe as bpf only reads from the
1516 * fields in this header that we initialize, and will not try to free
1517 * it or keep a pointer to it.
1518 */
1519 int
bpf_mtap_ether(caddr_t arg,const struct mbuf * m,u_int direction)1520 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
1521 {
1522 #if NVLAN > 0
1523 struct ether_vlan_header evh;
1524 struct m_hdr mh, md;
1525
1526 if ((m->m_flags & M_VLANTAG) == 0)
1527 #endif
1528 {
1529 return _bpf_mtap(arg, m, m, direction);
1530 }
1531
1532 #if NVLAN > 0
1533 KASSERT(m->m_len >= ETHER_HDR_LEN);
1534
1535 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN);
1536 evh.evl_proto = evh.evl_encap_proto;
1537 evh.evl_encap_proto = htons(ETHERTYPE_VLAN);
1538 evh.evl_tag = htons(m->m_pkthdr.ether_vtag);
1539
1540 mh.mh_flags = 0;
1541 mh.mh_data = (caddr_t)&evh;
1542 mh.mh_len = sizeof(evh);
1543 mh.mh_next = (struct mbuf *)&md;
1544
1545 md.mh_flags = 0;
1546 md.mh_data = m->m_data + ETHER_HDR_LEN;
1547 md.mh_len = m->m_len - ETHER_HDR_LEN;
1548 md.mh_next = m->m_next;
1549
1550 return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction);
1551 #endif
1552 }
1553
1554 /*
1555 * Move the packet data from interface memory (pkt) into the
1556 * store buffer. Wake up listeners if needed.
1557 * "copy" is the routine called to do the actual data
1558 * transfer. bcopy is passed in to copy contiguous chunks, while
1559 * bpf_mcopy is passed in to copy mbuf chains. In the latter case,
1560 * pkt is really an mbuf.
1561 */
1562 void
bpf_catchpacket(struct bpf_d * d,u_char * pkt,size_t pktlen,size_t snaplen,const struct bpf_hdr * tbh)1563 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
1564 const struct bpf_hdr *tbh)
1565 {
1566 struct bpf_hdr *bh;
1567 int totlen, curlen;
1568 int hdrlen, do_wakeup = 0;
1569
1570 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1571 if (d->bd_bif == NULL)
1572 return;
1573
1574 hdrlen = d->bd_bif->bif_hdrlen;
1575
1576 /*
1577 * Figure out how many bytes to move. If the packet is
1578 * greater or equal to the snapshot length, transfer that
1579 * much. Otherwise, transfer the whole packet (unless
1580 * we hit the buffer size limit).
1581 */
1582 totlen = hdrlen + min(snaplen, pktlen);
1583 if (totlen > d->bd_bufsize)
1584 totlen = d->bd_bufsize;
1585
1586 /*
1587 * Round up the end of the previous packet to the next longword.
1588 */
1589 curlen = BPF_WORDALIGN(d->bd_slen);
1590 if (curlen + totlen > d->bd_bufsize) {
1591 /*
1592 * This packet will overflow the storage buffer.
1593 * Rotate the buffers if we can, then wakeup any
1594 * pending reads.
1595 */
1596 if (d->bd_fbuf == NULL) {
1597 /*
1598 * We haven't completed the previous read yet,
1599 * so drop the packet.
1600 */
1601 ++d->bd_dcount;
1602 return;
1603 }
1604
1605 /* cancel pending wtime */
1606 if (timeout_del(&d->bd_wait_tmo))
1607 bpf_put(d);
1608
1609 ROTATE_BUFFERS(d);
1610 do_wakeup = 1;
1611 curlen = 0;
1612 }
1613
1614 /*
1615 * Append the bpf header.
1616 */
1617 bh = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1618 *bh = *tbh;
1619 bh->bh_datalen = pktlen;
1620 bh->bh_hdrlen = hdrlen;
1621 bh->bh_caplen = totlen - hdrlen;
1622
1623 /*
1624 * Copy the packet data into the store buffer and update its length.
1625 */
1626 bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen);
1627 d->bd_slen = curlen + totlen;
1628
1629 switch (d->bd_wtout) {
1630 case 0:
1631 /*
1632 * Immediate mode is set. A packet arrived so any
1633 * reads should be woken up.
1634 */
1635 if (d->bd_state == BPF_S_IDLE)
1636 d->bd_state = BPF_S_DONE;
1637 do_wakeup = 1;
1638 break;
1639 case INFSLP:
1640 break;
1641 default:
1642 if (d->bd_state == BPF_S_IDLE) {
1643 d->bd_state = BPF_S_WAIT;
1644
1645 bpf_get(d);
1646 if (!timeout_add_nsec(&d->bd_wait_tmo, d->bd_wtout))
1647 bpf_put(d);
1648 }
1649 break;
1650 }
1651
1652 if (do_wakeup)
1653 bpf_wakeup(d);
1654 }
1655
1656 /*
1657 * Initialize all nonzero fields of a descriptor.
1658 */
1659 int
bpf_allocbufs(struct bpf_d * d)1660 bpf_allocbufs(struct bpf_d *d)
1661 {
1662 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1663
1664 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1665 if (d->bd_fbuf == NULL)
1666 return (ENOMEM);
1667
1668 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1669 if (d->bd_sbuf == NULL) {
1670 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize);
1671 d->bd_fbuf = NULL;
1672 return (ENOMEM);
1673 }
1674
1675 d->bd_slen = 0;
1676 d->bd_hlen = 0;
1677
1678 return (0);
1679 }
1680
1681 void
bpf_prog_smr(void * bps_arg)1682 bpf_prog_smr(void *bps_arg)
1683 {
1684 struct bpf_program_smr *bps = bps_arg;
1685
1686 free(bps->bps_bf.bf_insns, M_DEVBUF,
1687 bps->bps_bf.bf_len * sizeof(struct bpf_insn));
1688 free(bps, M_DEVBUF, sizeof(struct bpf_program_smr));
1689 }
1690
1691 void
bpf_d_smr(void * smr)1692 bpf_d_smr(void *smr)
1693 {
1694 struct bpf_d *bd = smr;
1695
1696 sigio_free(&bd->bd_sigio);
1697 free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize);
1698 free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize);
1699 free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize);
1700
1701 if (bd->bd_rfilter != NULL)
1702 bpf_prog_smr(bd->bd_rfilter);
1703 if (bd->bd_wfilter != NULL)
1704 bpf_prog_smr(bd->bd_wfilter);
1705
1706 klist_free(&bd->bd_klist);
1707 free(bd, M_DEVBUF, sizeof(*bd));
1708 }
1709
1710 void
bpf_get(struct bpf_d * bd)1711 bpf_get(struct bpf_d *bd)
1712 {
1713 refcnt_take(&bd->bd_refcnt);
1714 }
1715
1716 /*
1717 * Free buffers currently in use by a descriptor
1718 * when the reference count drops to zero.
1719 */
1720 void
bpf_put(struct bpf_d * bd)1721 bpf_put(struct bpf_d *bd)
1722 {
1723 if (refcnt_rele(&bd->bd_refcnt) == 0)
1724 return;
1725
1726 smr_call(&bd->bd_smr, bpf_d_smr, bd);
1727 }
1728
1729 void *
bpfsattach(caddr_t * bpfp,const char * name,u_int dlt,u_int hdrlen)1730 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen)
1731 {
1732 struct bpf_if *bp;
1733
1734 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL)
1735 panic("bpfattach");
1736 SMR_SLIST_INIT(&bp->bif_dlist);
1737 bp->bif_driverp = (struct bpf_if **)bpfp;
1738 bp->bif_name = name;
1739 bp->bif_ifp = NULL;
1740 bp->bif_dlt = dlt;
1741
1742 bp->bif_next = bpf_iflist;
1743 bpf_iflist = bp;
1744
1745 *bp->bif_driverp = NULL;
1746
1747 /*
1748 * Compute the length of the bpf header. This is not necessarily
1749 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1750 * that the network layer header begins on a longword boundary (for
1751 * performance reasons and to alleviate alignment restrictions).
1752 */
1753 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1754
1755 return (bp);
1756 }
1757
1758 void
bpfattach(caddr_t * driverp,struct ifnet * ifp,u_int dlt,u_int hdrlen)1759 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen)
1760 {
1761 struct bpf_if *bp;
1762
1763 bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen);
1764 bp->bif_ifp = ifp;
1765 }
1766
1767 /* Detach an interface from its attached bpf device. */
1768 void
bpfdetach(struct ifnet * ifp)1769 bpfdetach(struct ifnet *ifp)
1770 {
1771 struct bpf_if *bp, *nbp;
1772
1773 KERNEL_ASSERT_LOCKED();
1774
1775 for (bp = bpf_iflist; bp; bp = nbp) {
1776 nbp = bp->bif_next;
1777 if (bp->bif_ifp == ifp)
1778 bpfsdetach(bp);
1779 }
1780 ifp->if_bpf = NULL;
1781 }
1782
1783 void
bpfsdetach(void * p)1784 bpfsdetach(void *p)
1785 {
1786 struct bpf_if *bp = p, *tbp;
1787 struct bpf_d *bd;
1788 int maj;
1789
1790 KERNEL_ASSERT_LOCKED();
1791
1792 /* Locate the major number. */
1793 for (maj = 0; maj < nchrdev; maj++)
1794 if (cdevsw[maj].d_open == bpfopen)
1795 break;
1796
1797 while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) {
1798 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR);
1799 klist_invalidate(&bd->bd_klist);
1800 }
1801
1802 for (tbp = bpf_iflist; tbp; tbp = tbp->bif_next) {
1803 if (tbp->bif_next == bp) {
1804 tbp->bif_next = bp->bif_next;
1805 break;
1806 }
1807 }
1808
1809 if (bpf_iflist == bp)
1810 bpf_iflist = bp->bif_next;
1811
1812 free(bp, M_DEVBUF, sizeof(*bp));
1813 }
1814
1815 int
bpf_sysctl_locked(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)1816 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1817 void *newp, size_t newlen)
1818 {
1819 switch (name[0]) {
1820 case NET_BPF_BUFSIZE:
1821 return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1822 &bpf_bufsize, BPF_MINBUFSIZE, bpf_maxbufsize);
1823 case NET_BPF_MAXBUFSIZE:
1824 return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1825 &bpf_maxbufsize, BPF_MINBUFSIZE, INT_MAX);
1826 default:
1827 return (EOPNOTSUPP);
1828 }
1829 }
1830
1831 int
bpf_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)1832 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1833 size_t newlen)
1834 {
1835 int flags = RW_INTR;
1836 int error;
1837
1838 if (namelen != 1)
1839 return (ENOTDIR);
1840
1841 flags |= (newp == NULL) ? RW_READ : RW_WRITE;
1842
1843 error = rw_enter(&bpf_sysctl_lk, flags);
1844 if (error != 0)
1845 return (error);
1846
1847 error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen);
1848
1849 rw_exit(&bpf_sysctl_lk);
1850
1851 return (error);
1852 }
1853
1854 struct bpf_d *
bpfilter_lookup(int unit)1855 bpfilter_lookup(int unit)
1856 {
1857 struct bpf_d *bd;
1858
1859 KERNEL_ASSERT_LOCKED();
1860
1861 LIST_FOREACH(bd, &bpf_d_list, bd_list)
1862 if (bd->bd_unit == unit)
1863 return (bd);
1864 return (NULL);
1865 }
1866
1867 /*
1868 * Get a list of available data link type of the interface.
1869 */
1870 int
bpf_getdltlist(struct bpf_d * d,struct bpf_dltlist * bfl)1871 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1872 {
1873 int n, error;
1874 struct bpf_if *bp;
1875 const char *name;
1876
1877 name = d->bd_bif->bif_name;
1878 n = 0;
1879 error = 0;
1880 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1881 if (strcmp(name, bp->bif_name) != 0)
1882 continue;
1883 if (bfl->bfl_list != NULL) {
1884 if (n >= bfl->bfl_len)
1885 return (ENOMEM);
1886 error = copyout(&bp->bif_dlt,
1887 bfl->bfl_list + n, sizeof(u_int));
1888 if (error)
1889 break;
1890 }
1891 n++;
1892 }
1893
1894 bfl->bfl_len = n;
1895 return (error);
1896 }
1897
1898 /*
1899 * Set the data link type of a BPF instance.
1900 */
1901 int
bpf_setdlt(struct bpf_d * d,u_int dlt)1902 bpf_setdlt(struct bpf_d *d, u_int dlt)
1903 {
1904 const char *name;
1905 struct bpf_if *bp;
1906
1907 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1908 if (d->bd_bif->bif_dlt == dlt)
1909 return (0);
1910 name = d->bd_bif->bif_name;
1911 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
1912 if (strcmp(name, bp->bif_name) != 0)
1913 continue;
1914 if (bp->bif_dlt == dlt)
1915 break;
1916 }
1917 if (bp == NULL)
1918 return (EINVAL);
1919 bpf_detachd(d);
1920 bpf_attachd(d, bp);
1921 bpf_resetd(d);
1922 return (0);
1923 }
1924
1925 u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *);
1926 u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *);
1927 u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *);
1928
1929 int bpf_mbuf_copy(const struct mbuf *, u_int32_t,
1930 void *, u_int32_t);
1931
1932 const struct bpf_ops bpf_mbuf_ops = {
1933 bpf_mbuf_ldw,
1934 bpf_mbuf_ldh,
1935 bpf_mbuf_ldb,
1936 };
1937
1938 int
bpf_mbuf_copy(const struct mbuf * m,u_int32_t off,void * buf,u_int32_t len)1939 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len)
1940 {
1941 u_int8_t *cp = buf;
1942 u_int32_t count;
1943
1944 while (off >= m->m_len) {
1945 off -= m->m_len;
1946
1947 m = m->m_next;
1948 if (m == NULL)
1949 return (-1);
1950 }
1951
1952 for (;;) {
1953 count = min(m->m_len - off, len);
1954
1955 memcpy(cp, m->m_data + off, count);
1956 len -= count;
1957
1958 if (len == 0)
1959 return (0);
1960
1961 m = m->m_next;
1962 if (m == NULL)
1963 break;
1964
1965 cp += count;
1966 off = 0;
1967 }
1968
1969 return (-1);
1970 }
1971
1972 u_int32_t
bpf_mbuf_ldw(const void * m0,u_int32_t k,int * err)1973 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err)
1974 {
1975 u_int32_t v;
1976
1977 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1978 *err = 1;
1979 return (0);
1980 }
1981
1982 *err = 0;
1983 return ntohl(v);
1984 }
1985
1986 u_int32_t
bpf_mbuf_ldh(const void * m0,u_int32_t k,int * err)1987 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err)
1988 {
1989 u_int16_t v;
1990
1991 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1992 *err = 1;
1993 return (0);
1994 }
1995
1996 *err = 0;
1997 return ntohs(v);
1998 }
1999
2000 u_int32_t
bpf_mbuf_ldb(const void * m0,u_int32_t k,int * err)2001 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err)
2002 {
2003 const struct mbuf *m = m0;
2004 u_int8_t v;
2005
2006 while (k >= m->m_len) {
2007 k -= m->m_len;
2008
2009 m = m->m_next;
2010 if (m == NULL) {
2011 *err = 1;
2012 return (0);
2013 }
2014 }
2015 v = m->m_data[k];
2016
2017 *err = 0;
2018 return v;
2019 }
2020
2021 u_int
bpf_mfilter(const struct bpf_insn * pc,const struct mbuf * m,u_int wirelen)2022 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen)
2023 {
2024 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen);
2025 }
2026