1 /* $NetBSD: bpf.c,v 1.162 2011/01/22 19:12:58 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from the Stanford/CMU enet packet filter, 8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 10 * Berkeley Laboratory. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 37 * static char rcsid[] = 38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp "; 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.162 2011/01/22 19:12:58 christos Exp $"); 43 44 #if defined(_KERNEL_OPT) 45 #include "opt_bpf.h" 46 #include "sl.h" 47 #include "strip.h" 48 #endif 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/mbuf.h> 53 #include <sys/buf.h> 54 #include <sys/time.h> 55 #include <sys/proc.h> 56 #include <sys/ioctl.h> 57 #include <sys/conf.h> 58 #include <sys/vnode.h> 59 #include <sys/queue.h> 60 #include <sys/stat.h> 61 #include <sys/module.h> 62 #include <sys/once.h> 63 #include <sys/atomic.h> 64 65 #include <sys/file.h> 66 #include <sys/filedesc.h> 67 #include <sys/tty.h> 68 #include <sys/uio.h> 69 70 #include <sys/protosw.h> 71 #include <sys/socket.h> 72 #include <sys/errno.h> 73 #include <sys/kernel.h> 74 #include <sys/poll.h> 75 #include <sys/sysctl.h> 76 #include <sys/kauth.h> 77 78 #include <net/if.h> 79 #include <net/slip.h> 80 81 #include <net/bpf.h> 82 #include <net/bpfdesc.h> 83 84 #include <net/if_arc.h> 85 #include <net/if_ether.h> 86 87 #include <netinet/in.h> 88 #include <netinet/if_inarp.h> 89 90 91 #include <compat/sys/sockio.h> 92 93 #ifndef BPF_BUFSIZE 94 /* 95 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet 96 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k). 97 */ 98 # define BPF_BUFSIZE 32768 99 #endif 100 101 #define PRINET 26 /* interruptible */ 102 103 /* 104 * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able. 105 * XXX the default values should be computed dynamically based 106 * on available memory size and available mbuf clusters. 107 */ 108 int bpf_bufsize = BPF_BUFSIZE; 109 int bpf_maxbufsize = BPF_DFLTBUFSIZE; /* XXX set dynamically, see above */ 110 111 112 /* 113 * Global BPF statistics returned by net.bpf.stats sysctl. 114 */ 115 struct bpf_stat bpf_gstats; 116 117 /* 118 * Use a mutex to avoid a race condition between gathering the stats/peers 119 * and opening/closing the device. 120 */ 121 static kmutex_t bpf_mtx; 122 123 /* 124 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 125 * bpf_dtab holds the descriptors, indexed by minor device # 126 */ 127 struct bpf_if *bpf_iflist; 128 LIST_HEAD(, bpf_d) bpf_list; 129 130 static int bpf_allocbufs(struct bpf_d *); 131 static void bpf_deliver(struct bpf_if *, 132 void *(*cpfn)(void *, const void *, size_t), 133 void *, u_int, u_int, struct ifnet *); 134 static void bpf_freed(struct bpf_d *); 135 static void bpf_ifname(struct ifnet *, struct ifreq *); 136 static void *bpf_mcpy(void *, const void *, size_t); 137 static int bpf_movein(struct uio *, int, uint64_t, 138 struct mbuf **, struct sockaddr *); 139 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 140 static void bpf_detachd(struct bpf_d *); 141 static int bpf_setif(struct bpf_d *, struct ifreq *); 142 static void bpf_timed_out(void *); 143 static inline void 144 bpf_wakeup(struct bpf_d *); 145 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 146 void *(*)(void *, const void *, size_t), struct timespec *); 147 static void reset_d(struct bpf_d *); 148 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 149 static int bpf_setdlt(struct bpf_d *, u_int); 150 151 static int bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t, 152 int); 153 static int bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t, 154 int); 155 static int bpf_ioctl(struct file *, u_long, void *); 156 static int bpf_poll(struct file *, int); 157 static int bpf_stat(struct file *, struct stat *); 158 static int bpf_close(struct file *); 159 static int bpf_kqfilter(struct file *, struct knote *); 160 static void bpf_softintr(void *); 161 162 static const struct fileops bpf_fileops = { 163 .fo_read = bpf_read, 164 .fo_write = bpf_write, 165 .fo_ioctl = bpf_ioctl, 166 .fo_fcntl = fnullop_fcntl, 167 .fo_poll = bpf_poll, 168 .fo_stat = bpf_stat, 169 .fo_close = bpf_close, 170 .fo_kqfilter = bpf_kqfilter, 171 .fo_restart = fnullop_restart, 172 }; 173 174 dev_type_open(bpfopen); 175 176 const struct cdevsw bpf_cdevsw = { 177 bpfopen, noclose, noread, nowrite, noioctl, 178 nostop, notty, nopoll, nommap, nokqfilter, D_OTHER 179 }; 180 181 static int 182 bpf_movein(struct uio *uio, int linktype, uint64_t mtu, struct mbuf **mp, 183 struct sockaddr *sockp) 184 { 185 struct mbuf *m; 186 int error; 187 size_t len; 188 size_t hlen; 189 size_t align; 190 191 /* 192 * Build a sockaddr based on the data link layer type. 193 * We do this at this level because the ethernet header 194 * is copied directly into the data field of the sockaddr. 195 * In the case of SLIP, there is no header and the packet 196 * is forwarded as is. 197 * Also, we are careful to leave room at the front of the mbuf 198 * for the link level header. 199 */ 200 switch (linktype) { 201 202 case DLT_SLIP: 203 sockp->sa_family = AF_INET; 204 hlen = 0; 205 align = 0; 206 break; 207 208 case DLT_PPP: 209 sockp->sa_family = AF_UNSPEC; 210 hlen = 0; 211 align = 0; 212 break; 213 214 case DLT_EN10MB: 215 sockp->sa_family = AF_UNSPEC; 216 /* XXX Would MAXLINKHDR be better? */ 217 /* 6(dst)+6(src)+2(type) */ 218 hlen = sizeof(struct ether_header); 219 align = 2; 220 break; 221 222 case DLT_ARCNET: 223 sockp->sa_family = AF_UNSPEC; 224 hlen = ARC_HDRLEN; 225 align = 5; 226 break; 227 228 case DLT_FDDI: 229 sockp->sa_family = AF_LINK; 230 /* XXX 4(FORMAC)+6(dst)+6(src) */ 231 hlen = 16; 232 align = 0; 233 break; 234 235 case DLT_ECONET: 236 sockp->sa_family = AF_UNSPEC; 237 hlen = 6; 238 align = 2; 239 break; 240 241 case DLT_NULL: 242 sockp->sa_family = AF_UNSPEC; 243 hlen = 0; 244 align = 0; 245 break; 246 247 default: 248 return (EIO); 249 } 250 251 len = uio->uio_resid; 252 /* 253 * If there aren't enough bytes for a link level header or the 254 * packet length exceeds the interface mtu, return an error. 255 */ 256 if (len - hlen > mtu) 257 return (EMSGSIZE); 258 259 /* 260 * XXX Avoid complicated buffer chaining --- 261 * bail if it won't fit in a single mbuf. 262 * (Take into account possible alignment bytes) 263 */ 264 if (len + align > MCLBYTES) 265 return (EIO); 266 267 m = m_gethdr(M_WAIT, MT_DATA); 268 m->m_pkthdr.rcvif = 0; 269 m->m_pkthdr.len = (int)(len - hlen); 270 if (len + align > MHLEN) { 271 m_clget(m, M_WAIT); 272 if ((m->m_flags & M_EXT) == 0) { 273 error = ENOBUFS; 274 goto bad; 275 } 276 } 277 278 /* Insure the data is properly aligned */ 279 if (align > 0) { 280 m->m_data += align; 281 m->m_len -= (int)align; 282 } 283 284 error = uiomove(mtod(m, void *), len, uio); 285 if (error) 286 goto bad; 287 if (hlen != 0) { 288 memcpy(sockp->sa_data, mtod(m, void *), hlen); 289 m->m_data += hlen; /* XXX */ 290 len -= hlen; 291 } 292 m->m_len = (int)len; 293 *mp = m; 294 return (0); 295 296 bad: 297 m_freem(m); 298 return (error); 299 } 300 301 /* 302 * Attach file to the bpf interface, i.e. make d listen on bp. 303 * Must be called at splnet. 304 */ 305 static void 306 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 307 { 308 /* 309 * Point d at bp, and add d to the interface's list of listeners. 310 * Finally, point the driver's bpf cookie at the interface so 311 * it will divert packets to bpf. 312 */ 313 d->bd_bif = bp; 314 d->bd_next = bp->bif_dlist; 315 bp->bif_dlist = d; 316 317 *bp->bif_driverp = bp; 318 } 319 320 /* 321 * Detach a file from its interface. 322 */ 323 static void 324 bpf_detachd(struct bpf_d *d) 325 { 326 struct bpf_d **p; 327 struct bpf_if *bp; 328 329 bp = d->bd_bif; 330 /* 331 * Check if this descriptor had requested promiscuous mode. 332 * If so, turn it off. 333 */ 334 if (d->bd_promisc) { 335 int error; 336 337 d->bd_promisc = 0; 338 /* 339 * Take device out of promiscuous mode. Since we were 340 * able to enter promiscuous mode, we should be able 341 * to turn it off. But we can get an error if 342 * the interface was configured down, so only panic 343 * if we don't get an unexpected error. 344 */ 345 error = ifpromisc(bp->bif_ifp, 0); 346 if (error && error != EINVAL) 347 panic("%s: ifpromisc failed: %d", __func__, error); 348 } 349 /* Remove d from the interface's descriptor list. */ 350 p = &bp->bif_dlist; 351 while (*p != d) { 352 p = &(*p)->bd_next; 353 if (*p == 0) 354 panic("%s: descriptor not in list", __func__); 355 } 356 *p = (*p)->bd_next; 357 if (bp->bif_dlist == 0) 358 /* 359 * Let the driver know that there are no more listeners. 360 */ 361 *d->bd_bif->bif_driverp = 0; 362 d->bd_bif = 0; 363 } 364 365 static int 366 doinit(void) 367 { 368 369 mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE); 370 371 LIST_INIT(&bpf_list); 372 373 bpf_gstats.bs_recv = 0; 374 bpf_gstats.bs_drop = 0; 375 bpf_gstats.bs_capt = 0; 376 377 return 0; 378 } 379 380 /* 381 * bpfilterattach() is called at boot time. 382 */ 383 /* ARGSUSED */ 384 void 385 bpfilterattach(int n) 386 { 387 static ONCE_DECL(control); 388 389 RUN_ONCE(&control, doinit); 390 } 391 392 /* 393 * Open ethernet device. Clones. 394 */ 395 /* ARGSUSED */ 396 int 397 bpfopen(dev_t dev, int flag, int mode, struct lwp *l) 398 { 399 struct bpf_d *d; 400 struct file *fp; 401 int error, fd; 402 403 /* falloc() will use the descriptor for us. */ 404 if ((error = fd_allocfile(&fp, &fd)) != 0) 405 return error; 406 407 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK|M_ZERO); 408 d->bd_bufsize = bpf_bufsize; 409 d->bd_seesent = 1; 410 d->bd_feedback = 0; 411 d->bd_pid = l->l_proc->p_pid; 412 getnanotime(&d->bd_btime); 413 d->bd_atime = d->bd_mtime = d->bd_btime; 414 callout_init(&d->bd_callout, 0); 415 selinit(&d->bd_sel); 416 d->bd_sih = softint_establish(SOFTINT_CLOCK, bpf_softintr, d); 417 418 mutex_enter(&bpf_mtx); 419 LIST_INSERT_HEAD(&bpf_list, d, bd_list); 420 mutex_exit(&bpf_mtx); 421 422 return fd_clone(fp, fd, flag, &bpf_fileops, d); 423 } 424 425 /* 426 * Close the descriptor by detaching it from its interface, 427 * deallocating its buffers, and marking it free. 428 */ 429 /* ARGSUSED */ 430 static int 431 bpf_close(struct file *fp) 432 { 433 struct bpf_d *d = fp->f_data; 434 int s; 435 436 KERNEL_LOCK(1, NULL); 437 438 /* 439 * Refresh the PID associated with this bpf file. 440 */ 441 d->bd_pid = curproc->p_pid; 442 443 s = splnet(); 444 if (d->bd_state == BPF_WAITING) 445 callout_stop(&d->bd_callout); 446 d->bd_state = BPF_IDLE; 447 if (d->bd_bif) 448 bpf_detachd(d); 449 splx(s); 450 bpf_freed(d); 451 mutex_enter(&bpf_mtx); 452 LIST_REMOVE(d, bd_list); 453 mutex_exit(&bpf_mtx); 454 callout_destroy(&d->bd_callout); 455 seldestroy(&d->bd_sel); 456 softint_disestablish(d->bd_sih); 457 free(d, M_DEVBUF); 458 fp->f_data = NULL; 459 460 KERNEL_UNLOCK_ONE(NULL); 461 462 return (0); 463 } 464 465 /* 466 * Rotate the packet buffers in descriptor d. Move the store buffer 467 * into the hold slot, and the free buffer into the store slot. 468 * Zero the length of the new store buffer. 469 */ 470 #define ROTATE_BUFFERS(d) \ 471 (d)->bd_hbuf = (d)->bd_sbuf; \ 472 (d)->bd_hlen = (d)->bd_slen; \ 473 (d)->bd_sbuf = (d)->bd_fbuf; \ 474 (d)->bd_slen = 0; \ 475 (d)->bd_fbuf = 0; 476 /* 477 * bpfread - read next chunk of packets from buffers 478 */ 479 static int 480 bpf_read(struct file *fp, off_t *offp, struct uio *uio, 481 kauth_cred_t cred, int flags) 482 { 483 struct bpf_d *d = fp->f_data; 484 int timed_out; 485 int error; 486 int s; 487 488 getnanotime(&d->bd_atime); 489 /* 490 * Restrict application to use a buffer the same size as 491 * the kernel buffers. 492 */ 493 if (uio->uio_resid != d->bd_bufsize) 494 return (EINVAL); 495 496 KERNEL_LOCK(1, NULL); 497 s = splnet(); 498 if (d->bd_state == BPF_WAITING) 499 callout_stop(&d->bd_callout); 500 timed_out = (d->bd_state == BPF_TIMED_OUT); 501 d->bd_state = BPF_IDLE; 502 /* 503 * If the hold buffer is empty, then do a timed sleep, which 504 * ends when the timeout expires or when enough packets 505 * have arrived to fill the store buffer. 506 */ 507 while (d->bd_hbuf == 0) { 508 if (fp->f_flag & FNONBLOCK) { 509 if (d->bd_slen == 0) { 510 splx(s); 511 KERNEL_UNLOCK_ONE(NULL); 512 return (EWOULDBLOCK); 513 } 514 ROTATE_BUFFERS(d); 515 break; 516 } 517 518 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 519 /* 520 * A packet(s) either arrived since the previous 521 * read or arrived while we were asleep. 522 * Rotate the buffers and return what's here. 523 */ 524 ROTATE_BUFFERS(d); 525 break; 526 } 527 error = tsleep(d, PRINET|PCATCH, "bpf", 528 d->bd_rtout); 529 if (error == EINTR || error == ERESTART) { 530 splx(s); 531 KERNEL_UNLOCK_ONE(NULL); 532 return (error); 533 } 534 if (error == EWOULDBLOCK) { 535 /* 536 * On a timeout, return what's in the buffer, 537 * which may be nothing. If there is something 538 * in the store buffer, we can rotate the buffers. 539 */ 540 if (d->bd_hbuf) 541 /* 542 * We filled up the buffer in between 543 * getting the timeout and arriving 544 * here, so we don't need to rotate. 545 */ 546 break; 547 548 if (d->bd_slen == 0) { 549 splx(s); 550 KERNEL_UNLOCK_ONE(NULL); 551 return (0); 552 } 553 ROTATE_BUFFERS(d); 554 break; 555 } 556 if (error != 0) 557 goto done; 558 } 559 /* 560 * At this point, we know we have something in the hold slot. 561 */ 562 splx(s); 563 564 /* 565 * Move data from hold buffer into user space. 566 * We know the entire buffer is transferred since 567 * we checked above that the read buffer is bpf_bufsize bytes. 568 */ 569 error = uiomove(d->bd_hbuf, d->bd_hlen, uio); 570 571 s = splnet(); 572 d->bd_fbuf = d->bd_hbuf; 573 d->bd_hbuf = 0; 574 d->bd_hlen = 0; 575 done: 576 splx(s); 577 KERNEL_UNLOCK_ONE(NULL); 578 return (error); 579 } 580 581 582 /* 583 * If there are processes sleeping on this descriptor, wake them up. 584 */ 585 static inline void 586 bpf_wakeup(struct bpf_d *d) 587 { 588 wakeup(d); 589 if (d->bd_async) 590 softint_schedule(d->bd_sih); 591 selnotify(&d->bd_sel, 0, 0); 592 } 593 594 static void 595 bpf_softintr(void *cookie) 596 { 597 struct bpf_d *d; 598 599 d = cookie; 600 if (d->bd_async) 601 fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL); 602 } 603 604 static void 605 bpf_timed_out(void *arg) 606 { 607 struct bpf_d *d = arg; 608 int s; 609 610 s = splnet(); 611 if (d->bd_state == BPF_WAITING) { 612 d->bd_state = BPF_TIMED_OUT; 613 if (d->bd_slen != 0) 614 bpf_wakeup(d); 615 } 616 splx(s); 617 } 618 619 620 static int 621 bpf_write(struct file *fp, off_t *offp, struct uio *uio, 622 kauth_cred_t cred, int flags) 623 { 624 struct bpf_d *d = fp->f_data; 625 struct ifnet *ifp; 626 struct mbuf *m, *mc; 627 int error, s; 628 static struct sockaddr_storage dst; 629 630 m = NULL; /* XXX gcc */ 631 632 KERNEL_LOCK(1, NULL); 633 634 if (d->bd_bif == 0) { 635 KERNEL_UNLOCK_ONE(NULL); 636 return (ENXIO); 637 } 638 getnanotime(&d->bd_mtime); 639 640 ifp = d->bd_bif->bif_ifp; 641 642 if (uio->uio_resid == 0) { 643 KERNEL_UNLOCK_ONE(NULL); 644 return (0); 645 } 646 647 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp->if_mtu, &m, 648 (struct sockaddr *) &dst); 649 if (error) { 650 KERNEL_UNLOCK_ONE(NULL); 651 return (error); 652 } 653 654 if (m->m_pkthdr.len > ifp->if_mtu) { 655 KERNEL_UNLOCK_ONE(NULL); 656 m_freem(m); 657 return (EMSGSIZE); 658 } 659 660 if (d->bd_hdrcmplt) 661 dst.ss_family = pseudo_AF_HDRCMPLT; 662 663 if (d->bd_feedback) { 664 mc = m_dup(m, 0, M_COPYALL, M_NOWAIT); 665 if (mc != NULL) 666 mc->m_pkthdr.rcvif = ifp; 667 /* Set M_PROMISC for outgoing packets to be discarded. */ 668 if (1 /*d->bd_direction == BPF_D_INOUT*/) 669 m->m_flags |= M_PROMISC; 670 } else 671 mc = NULL; 672 673 s = splsoftnet(); 674 error = (*ifp->if_output)(ifp, m, (struct sockaddr *) &dst, NULL); 675 676 if (mc != NULL) { 677 if (error == 0) 678 (*ifp->if_input)(ifp, mc); 679 } else 680 m_freem(mc); 681 splx(s); 682 KERNEL_UNLOCK_ONE(NULL); 683 /* 684 * The driver frees the mbuf. 685 */ 686 return (error); 687 } 688 689 /* 690 * Reset a descriptor by flushing its packet buffer and clearing the 691 * receive and drop counts. Should be called at splnet. 692 */ 693 static void 694 reset_d(struct bpf_d *d) 695 { 696 if (d->bd_hbuf) { 697 /* Free the hold buffer. */ 698 d->bd_fbuf = d->bd_hbuf; 699 d->bd_hbuf = 0; 700 } 701 d->bd_slen = 0; 702 d->bd_hlen = 0; 703 d->bd_rcount = 0; 704 d->bd_dcount = 0; 705 d->bd_ccount = 0; 706 } 707 708 /* 709 * FIONREAD Check for read packet available. 710 * BIOCGBLEN Get buffer len [for read()]. 711 * BIOCSETF Set ethernet read filter. 712 * BIOCFLUSH Flush read packet buffer. 713 * BIOCPROMISC Put interface into promiscuous mode. 714 * BIOCGDLT Get link layer type. 715 * BIOCGETIF Get interface name. 716 * BIOCSETIF Set interface. 717 * BIOCSRTIMEOUT Set read timeout. 718 * BIOCGRTIMEOUT Get read timeout. 719 * BIOCGSTATS Get packet stats. 720 * BIOCIMMEDIATE Set immediate mode. 721 * BIOCVERSION Get filter language version. 722 * BIOCGHDRCMPLT Get "header already complete" flag. 723 * BIOCSHDRCMPLT Set "header already complete" flag. 724 * BIOCSFEEDBACK Set packet feedback mode. 725 * BIOCGFEEDBACK Get packet feedback mode. 726 * BIOCGSEESENT Get "see sent packets" mode. 727 * BIOCSSEESENT Set "see sent packets" mode. 728 */ 729 /* ARGSUSED */ 730 static int 731 bpf_ioctl(struct file *fp, u_long cmd, void *addr) 732 { 733 struct bpf_d *d = fp->f_data; 734 int s, error = 0; 735 736 /* 737 * Refresh the PID associated with this bpf file. 738 */ 739 KERNEL_LOCK(1, NULL); 740 d->bd_pid = curproc->p_pid; 741 742 s = splnet(); 743 if (d->bd_state == BPF_WAITING) 744 callout_stop(&d->bd_callout); 745 d->bd_state = BPF_IDLE; 746 splx(s); 747 748 switch (cmd) { 749 750 default: 751 error = EINVAL; 752 break; 753 754 /* 755 * Check for read packet available. 756 */ 757 case FIONREAD: 758 { 759 int n; 760 761 s = splnet(); 762 n = d->bd_slen; 763 if (d->bd_hbuf) 764 n += d->bd_hlen; 765 splx(s); 766 767 *(int *)addr = n; 768 break; 769 } 770 771 /* 772 * Get buffer len [for read()]. 773 */ 774 case BIOCGBLEN: 775 *(u_int *)addr = d->bd_bufsize; 776 break; 777 778 /* 779 * Set buffer length. 780 */ 781 case BIOCSBLEN: 782 if (d->bd_bif != 0) 783 error = EINVAL; 784 else { 785 u_int size = *(u_int *)addr; 786 787 if (size > bpf_maxbufsize) 788 *(u_int *)addr = size = bpf_maxbufsize; 789 else if (size < BPF_MINBUFSIZE) 790 *(u_int *)addr = size = BPF_MINBUFSIZE; 791 d->bd_bufsize = size; 792 } 793 break; 794 795 /* 796 * Set link layer read filter. 797 */ 798 case BIOCSETF: 799 error = bpf_setf(d, addr); 800 break; 801 802 /* 803 * Flush read packet buffer. 804 */ 805 case BIOCFLUSH: 806 s = splnet(); 807 reset_d(d); 808 splx(s); 809 break; 810 811 /* 812 * Put interface into promiscuous mode. 813 */ 814 case BIOCPROMISC: 815 if (d->bd_bif == 0) { 816 /* 817 * No interface attached yet. 818 */ 819 error = EINVAL; 820 break; 821 } 822 s = splnet(); 823 if (d->bd_promisc == 0) { 824 error = ifpromisc(d->bd_bif->bif_ifp, 1); 825 if (error == 0) 826 d->bd_promisc = 1; 827 } 828 splx(s); 829 break; 830 831 /* 832 * Get device parameters. 833 */ 834 case BIOCGDLT: 835 if (d->bd_bif == 0) 836 error = EINVAL; 837 else 838 *(u_int *)addr = d->bd_bif->bif_dlt; 839 break; 840 841 /* 842 * Get a list of supported device parameters. 843 */ 844 case BIOCGDLTLIST: 845 if (d->bd_bif == 0) 846 error = EINVAL; 847 else 848 error = bpf_getdltlist(d, addr); 849 break; 850 851 /* 852 * Set device parameters. 853 */ 854 case BIOCSDLT: 855 if (d->bd_bif == 0) 856 error = EINVAL; 857 else 858 error = bpf_setdlt(d, *(u_int *)addr); 859 break; 860 861 /* 862 * Set interface name. 863 */ 864 #ifdef OBIOCGETIF 865 case OBIOCGETIF: 866 #endif 867 case BIOCGETIF: 868 if (d->bd_bif == 0) 869 error = EINVAL; 870 else 871 bpf_ifname(d->bd_bif->bif_ifp, addr); 872 break; 873 874 /* 875 * Set interface. 876 */ 877 #ifdef OBIOCSETIF 878 case OBIOCSETIF: 879 #endif 880 case BIOCSETIF: 881 error = bpf_setif(d, addr); 882 break; 883 884 /* 885 * Set read timeout. 886 */ 887 case BIOCSRTIMEOUT: 888 { 889 struct timeval *tv = addr; 890 891 /* Compute number of ticks. */ 892 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 893 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 894 d->bd_rtout = 1; 895 break; 896 } 897 898 #ifdef BIOCGORTIMEOUT 899 /* 900 * Get read timeout. 901 */ 902 case BIOCGORTIMEOUT: 903 { 904 struct timeval50 *tv = addr; 905 906 tv->tv_sec = d->bd_rtout / hz; 907 tv->tv_usec = (d->bd_rtout % hz) * tick; 908 break; 909 } 910 #endif 911 912 #ifdef BIOCSORTIMEOUT 913 /* 914 * Set read timeout. 915 */ 916 case BIOCSORTIMEOUT: 917 { 918 struct timeval50 *tv = addr; 919 920 /* Compute number of ticks. */ 921 d->bd_rtout = tv->tv_sec * hz + tv->tv_usec / tick; 922 if ((d->bd_rtout == 0) && (tv->tv_usec != 0)) 923 d->bd_rtout = 1; 924 break; 925 } 926 #endif 927 928 /* 929 * Get read timeout. 930 */ 931 case BIOCGRTIMEOUT: 932 { 933 struct timeval *tv = addr; 934 935 tv->tv_sec = d->bd_rtout / hz; 936 tv->tv_usec = (d->bd_rtout % hz) * tick; 937 break; 938 } 939 /* 940 * Get packet stats. 941 */ 942 case BIOCGSTATS: 943 { 944 struct bpf_stat *bs = addr; 945 946 bs->bs_recv = d->bd_rcount; 947 bs->bs_drop = d->bd_dcount; 948 bs->bs_capt = d->bd_ccount; 949 break; 950 } 951 952 case BIOCGSTATSOLD: 953 { 954 struct bpf_stat_old *bs = addr; 955 956 bs->bs_recv = d->bd_rcount; 957 bs->bs_drop = d->bd_dcount; 958 break; 959 } 960 961 /* 962 * Set immediate mode. 963 */ 964 case BIOCIMMEDIATE: 965 d->bd_immediate = *(u_int *)addr; 966 break; 967 968 case BIOCVERSION: 969 { 970 struct bpf_version *bv = addr; 971 972 bv->bv_major = BPF_MAJOR_VERSION; 973 bv->bv_minor = BPF_MINOR_VERSION; 974 break; 975 } 976 977 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 978 *(u_int *)addr = d->bd_hdrcmplt; 979 break; 980 981 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 982 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 983 break; 984 985 /* 986 * Get "see sent packets" flag 987 */ 988 case BIOCGSEESENT: 989 *(u_int *)addr = d->bd_seesent; 990 break; 991 992 /* 993 * Set "see sent" packets flag 994 */ 995 case BIOCSSEESENT: 996 d->bd_seesent = *(u_int *)addr; 997 break; 998 999 /* 1000 * Set "feed packets from bpf back to input" mode 1001 */ 1002 case BIOCSFEEDBACK: 1003 d->bd_feedback = *(u_int *)addr; 1004 break; 1005 1006 /* 1007 * Get "feed packets from bpf back to input" mode 1008 */ 1009 case BIOCGFEEDBACK: 1010 *(u_int *)addr = d->bd_feedback; 1011 break; 1012 1013 case FIONBIO: /* Non-blocking I/O */ 1014 /* 1015 * No need to do anything special as we use IO_NDELAY in 1016 * bpfread() as an indication of whether or not to block 1017 * the read. 1018 */ 1019 break; 1020 1021 case FIOASYNC: /* Send signal on receive packets */ 1022 d->bd_async = *(int *)addr; 1023 break; 1024 1025 case TIOCSPGRP: /* Process or group to send signals to */ 1026 case FIOSETOWN: 1027 error = fsetown(&d->bd_pgid, cmd, addr); 1028 break; 1029 1030 case TIOCGPGRP: 1031 case FIOGETOWN: 1032 error = fgetown(d->bd_pgid, cmd, addr); 1033 break; 1034 } 1035 KERNEL_UNLOCK_ONE(NULL); 1036 return (error); 1037 } 1038 1039 /* 1040 * Set d's packet filter program to fp. If this file already has a filter, 1041 * free it and replace it. Returns EINVAL for bogus requests. 1042 */ 1043 int 1044 bpf_setf(struct bpf_d *d, struct bpf_program *fp) 1045 { 1046 struct bpf_insn *fcode, *old; 1047 u_int flen, size; 1048 int s; 1049 1050 old = d->bd_filter; 1051 if (fp->bf_insns == 0) { 1052 if (fp->bf_len != 0) 1053 return (EINVAL); 1054 s = splnet(); 1055 d->bd_filter = 0; 1056 reset_d(d); 1057 splx(s); 1058 if (old != 0) 1059 free(old, M_DEVBUF); 1060 return (0); 1061 } 1062 flen = fp->bf_len; 1063 if (flen > BPF_MAXINSNS) 1064 return (EINVAL); 1065 1066 size = flen * sizeof(*fp->bf_insns); 1067 fcode = malloc(size, M_DEVBUF, M_WAITOK); 1068 if (copyin(fp->bf_insns, fcode, size) == 0 && 1069 bpf_validate(fcode, (int)flen)) { 1070 s = splnet(); 1071 d->bd_filter = fcode; 1072 reset_d(d); 1073 splx(s); 1074 if (old != 0) 1075 free(old, M_DEVBUF); 1076 1077 return (0); 1078 } 1079 free(fcode, M_DEVBUF); 1080 return (EINVAL); 1081 } 1082 1083 /* 1084 * Detach a file from its current interface (if attached at all) and attach 1085 * to the interface indicated by the name stored in ifr. 1086 * Return an errno or 0. 1087 */ 1088 static int 1089 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1090 { 1091 struct bpf_if *bp; 1092 char *cp; 1093 int unit_seen, i, s, error; 1094 1095 /* 1096 * Make sure the provided name has a unit number, and default 1097 * it to '0' if not specified. 1098 * XXX This is ugly ... do this differently? 1099 */ 1100 unit_seen = 0; 1101 cp = ifr->ifr_name; 1102 cp[sizeof(ifr->ifr_name) - 1] = '\0'; /* sanity */ 1103 while (*cp++) 1104 if (*cp >= '0' && *cp <= '9') 1105 unit_seen = 1; 1106 if (!unit_seen) { 1107 /* Make sure to leave room for the '\0'. */ 1108 for (i = 0; i < (IFNAMSIZ - 1); ++i) { 1109 if ((ifr->ifr_name[i] >= 'a' && 1110 ifr->ifr_name[i] <= 'z') || 1111 (ifr->ifr_name[i] >= 'A' && 1112 ifr->ifr_name[i] <= 'Z')) 1113 continue; 1114 ifr->ifr_name[i] = '0'; 1115 } 1116 } 1117 1118 /* 1119 * Look through attached interfaces for the named one. 1120 */ 1121 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) { 1122 struct ifnet *ifp = bp->bif_ifp; 1123 1124 if (ifp == 0 || 1125 strcmp(ifp->if_xname, ifr->ifr_name) != 0) 1126 continue; 1127 /* skip additional entry */ 1128 if (bp->bif_driverp != &ifp->if_bpf) 1129 continue; 1130 /* 1131 * We found the requested interface. 1132 * Allocate the packet buffers if we need to. 1133 * If we're already attached to requested interface, 1134 * just flush the buffer. 1135 */ 1136 if (d->bd_sbuf == 0) { 1137 error = bpf_allocbufs(d); 1138 if (error != 0) 1139 return (error); 1140 } 1141 s = splnet(); 1142 if (bp != d->bd_bif) { 1143 if (d->bd_bif) 1144 /* 1145 * Detach if attached to something else. 1146 */ 1147 bpf_detachd(d); 1148 1149 bpf_attachd(d, bp); 1150 } 1151 reset_d(d); 1152 splx(s); 1153 return (0); 1154 } 1155 /* Not found. */ 1156 return (ENXIO); 1157 } 1158 1159 /* 1160 * Copy the interface name to the ifreq. 1161 */ 1162 static void 1163 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr) 1164 { 1165 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 1166 } 1167 1168 static int 1169 bpf_stat(struct file *fp, struct stat *st) 1170 { 1171 struct bpf_d *d = fp->f_data; 1172 1173 (void)memset(st, 0, sizeof(*st)); 1174 KERNEL_LOCK(1, NULL); 1175 st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid); 1176 st->st_atimespec = d->bd_atime; 1177 st->st_mtimespec = d->bd_mtime; 1178 st->st_ctimespec = st->st_birthtimespec = d->bd_btime; 1179 st->st_uid = kauth_cred_geteuid(fp->f_cred); 1180 st->st_gid = kauth_cred_getegid(fp->f_cred); 1181 KERNEL_UNLOCK_ONE(NULL); 1182 return 0; 1183 } 1184 1185 /* 1186 * Support for poll() system call 1187 * 1188 * Return true iff the specific operation will not block indefinitely - with 1189 * the assumption that it is safe to positively acknowledge a request for the 1190 * ability to write to the BPF device. 1191 * Otherwise, return false but make a note that a selnotify() must be done. 1192 */ 1193 static int 1194 bpf_poll(struct file *fp, int events) 1195 { 1196 struct bpf_d *d = fp->f_data; 1197 int s = splnet(); 1198 int revents; 1199 1200 /* 1201 * Refresh the PID associated with this bpf file. 1202 */ 1203 KERNEL_LOCK(1, NULL); 1204 d->bd_pid = curproc->p_pid; 1205 1206 revents = events & (POLLOUT | POLLWRNORM); 1207 if (events & (POLLIN | POLLRDNORM)) { 1208 /* 1209 * An imitation of the FIONREAD ioctl code. 1210 */ 1211 if (d->bd_hlen != 0 || 1212 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1213 d->bd_slen != 0)) { 1214 revents |= events & (POLLIN | POLLRDNORM); 1215 } else { 1216 selrecord(curlwp, &d->bd_sel); 1217 /* Start the read timeout if necessary */ 1218 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1219 callout_reset(&d->bd_callout, d->bd_rtout, 1220 bpf_timed_out, d); 1221 d->bd_state = BPF_WAITING; 1222 } 1223 } 1224 } 1225 1226 KERNEL_UNLOCK_ONE(NULL); 1227 splx(s); 1228 return (revents); 1229 } 1230 1231 static void 1232 filt_bpfrdetach(struct knote *kn) 1233 { 1234 struct bpf_d *d = kn->kn_hook; 1235 int s; 1236 1237 KERNEL_LOCK(1, NULL); 1238 s = splnet(); 1239 SLIST_REMOVE(&d->bd_sel.sel_klist, kn, knote, kn_selnext); 1240 splx(s); 1241 KERNEL_UNLOCK_ONE(NULL); 1242 } 1243 1244 static int 1245 filt_bpfread(struct knote *kn, long hint) 1246 { 1247 struct bpf_d *d = kn->kn_hook; 1248 int rv; 1249 1250 KERNEL_LOCK(1, NULL); 1251 kn->kn_data = d->bd_hlen; 1252 if (d->bd_immediate) 1253 kn->kn_data += d->bd_slen; 1254 rv = (kn->kn_data > 0); 1255 KERNEL_UNLOCK_ONE(NULL); 1256 return rv; 1257 } 1258 1259 static const struct filterops bpfread_filtops = 1260 { 1, NULL, filt_bpfrdetach, filt_bpfread }; 1261 1262 static int 1263 bpf_kqfilter(struct file *fp, struct knote *kn) 1264 { 1265 struct bpf_d *d = fp->f_data; 1266 struct klist *klist; 1267 int s; 1268 1269 KERNEL_LOCK(1, NULL); 1270 1271 switch (kn->kn_filter) { 1272 case EVFILT_READ: 1273 klist = &d->bd_sel.sel_klist; 1274 kn->kn_fop = &bpfread_filtops; 1275 break; 1276 1277 default: 1278 KERNEL_UNLOCK_ONE(NULL); 1279 return (EINVAL); 1280 } 1281 1282 kn->kn_hook = d; 1283 1284 s = splnet(); 1285 SLIST_INSERT_HEAD(klist, kn, kn_selnext); 1286 splx(s); 1287 KERNEL_UNLOCK_ONE(NULL); 1288 1289 return (0); 1290 } 1291 1292 /* 1293 * Incoming linkage from device drivers. Process the packet pkt, of length 1294 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1295 * by each process' filter, and if accepted, stashed into the corresponding 1296 * buffer. 1297 */ 1298 static void 1299 _bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1300 { 1301 struct bpf_d *d; 1302 u_int slen; 1303 struct timespec ts; 1304 int gottime=0; 1305 1306 /* 1307 * Note that the ipl does not have to be raised at this point. 1308 * The only problem that could arise here is that if two different 1309 * interfaces shared any data. This is not the case. 1310 */ 1311 for (d = bp->bif_dlist; d != 0; d = d->bd_next) { 1312 ++d->bd_rcount; 1313 ++bpf_gstats.bs_recv; 1314 slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen); 1315 if (slen != 0) { 1316 if (!gottime) { 1317 nanotime(&ts); 1318 gottime = 1; 1319 } 1320 catchpacket(d, pkt, pktlen, slen, memcpy, &ts); 1321 } 1322 } 1323 } 1324 1325 /* 1326 * Copy data from an mbuf chain into a buffer. This code is derived 1327 * from m_copydata in sys/uipc_mbuf.c. 1328 */ 1329 static void * 1330 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len) 1331 { 1332 const struct mbuf *m; 1333 u_int count; 1334 u_char *dst; 1335 1336 m = src_arg; 1337 dst = dst_arg; 1338 while (len > 0) { 1339 if (m == NULL) 1340 panic("bpf_mcpy"); 1341 count = min(m->m_len, len); 1342 memcpy(dst, mtod(m, const void *), count); 1343 m = m->m_next; 1344 dst += count; 1345 len -= count; 1346 } 1347 return dst_arg; 1348 } 1349 1350 /* 1351 * Dispatch a packet to all the listeners on interface bp. 1352 * 1353 * marg pointer to the packet, either a data buffer or an mbuf chain 1354 * buflen buffer length, if marg is a data buffer 1355 * cpfn a function that can copy marg into the listener's buffer 1356 * pktlen length of the packet 1357 * rcvif either NULL or the interface the packet came in on. 1358 */ 1359 static inline void 1360 bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t), 1361 void *marg, u_int pktlen, u_int buflen, struct ifnet *rcvif) 1362 { 1363 u_int slen; 1364 struct bpf_d *d; 1365 struct timespec ts; 1366 int gottime = 0; 1367 1368 for (d = bp->bif_dlist; d != 0; d = d->bd_next) { 1369 if (!d->bd_seesent && (rcvif == NULL)) 1370 continue; 1371 ++d->bd_rcount; 1372 ++bpf_gstats.bs_recv; 1373 slen = bpf_filter(d->bd_filter, marg, pktlen, buflen); 1374 if (slen != 0) { 1375 if(!gottime) { 1376 nanotime(&ts); 1377 gottime = 1; 1378 } 1379 catchpacket(d, marg, pktlen, slen, cpfn, &ts); 1380 } 1381 } 1382 } 1383 1384 /* 1385 * Incoming linkage from device drivers, when the head of the packet is in 1386 * a buffer, and the tail is in an mbuf chain. 1387 */ 1388 static void 1389 _bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1390 { 1391 u_int pktlen; 1392 struct mbuf mb; 1393 1394 /* Skip outgoing duplicate packets. */ 1395 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1396 m->m_flags &= ~M_PROMISC; 1397 return; 1398 } 1399 1400 pktlen = m_length(m) + dlen; 1401 1402 /* 1403 * Craft on-stack mbuf suitable for passing to bpf_filter. 1404 * Note that we cut corners here; we only setup what's 1405 * absolutely needed--this mbuf should never go anywhere else. 1406 */ 1407 (void)memset(&mb, 0, sizeof(mb)); 1408 mb.m_next = m; 1409 mb.m_data = data; 1410 mb.m_len = dlen; 1411 1412 bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, m->m_pkthdr.rcvif); 1413 } 1414 1415 /* 1416 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1417 */ 1418 static void 1419 _bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1420 { 1421 void *(*cpfn)(void *, const void *, size_t); 1422 u_int pktlen, buflen; 1423 void *marg; 1424 1425 /* Skip outgoing duplicate packets. */ 1426 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1427 m->m_flags &= ~M_PROMISC; 1428 return; 1429 } 1430 1431 pktlen = m_length(m); 1432 1433 if (pktlen == m->m_len) { 1434 cpfn = (void *)memcpy; 1435 marg = mtod(m, void *); 1436 buflen = pktlen; 1437 } else { 1438 cpfn = bpf_mcpy; 1439 marg = m; 1440 buflen = 0; 1441 } 1442 1443 bpf_deliver(bp, cpfn, marg, pktlen, buflen, m->m_pkthdr.rcvif); 1444 } 1445 1446 /* 1447 * We need to prepend the address family as 1448 * a four byte field. Cons up a dummy header 1449 * to pacify bpf. This is safe because bpf 1450 * will only read from the mbuf (i.e., it won't 1451 * try to free it or keep a pointer a to it). 1452 */ 1453 static void 1454 _bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m) 1455 { 1456 struct mbuf m0; 1457 1458 m0.m_flags = 0; 1459 m0.m_next = m; 1460 m0.m_len = 4; 1461 m0.m_data = (char *)⁡ 1462 1463 _bpf_mtap(bp, &m0); 1464 } 1465 1466 /* 1467 * Put the SLIP pseudo-"link header" in place. 1468 * Note this M_PREPEND() should never fail, 1469 * swince we know we always have enough space 1470 * in the input buffer. 1471 */ 1472 static void 1473 _bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m) 1474 { 1475 int s; 1476 u_char *hp; 1477 1478 M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT); 1479 if (*m == NULL) 1480 return; 1481 1482 hp = mtod(*m, u_char *); 1483 hp[SLX_DIR] = SLIPDIR_IN; 1484 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1485 1486 s = splnet(); 1487 _bpf_mtap(bp, *m); 1488 splx(s); 1489 1490 m_adj(*m, SLIP_HDRLEN); 1491 } 1492 1493 /* 1494 * Put the SLIP pseudo-"link header" in 1495 * place. The compressed header is now 1496 * at the beginning of the mbuf. 1497 */ 1498 static void 1499 _bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m) 1500 { 1501 struct mbuf m0; 1502 u_char *hp; 1503 int s; 1504 1505 m0.m_flags = 0; 1506 m0.m_next = m; 1507 m0.m_data = m0.m_dat; 1508 m0.m_len = SLIP_HDRLEN; 1509 1510 hp = mtod(&m0, u_char *); 1511 1512 hp[SLX_DIR] = SLIPDIR_OUT; 1513 (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN); 1514 1515 s = splnet(); 1516 _bpf_mtap(bp, &m0); 1517 splx(s); 1518 m_freem(m); 1519 } 1520 1521 /* 1522 * Move the packet data from interface memory (pkt) into the 1523 * store buffer. Return 1 if it's time to wakeup a listener (buffer full), 1524 * otherwise 0. "copy" is the routine called to do the actual data 1525 * transfer. memcpy is passed in to copy contiguous chunks, while 1526 * bpf_mcpy is passed in to copy mbuf chains. In the latter case, 1527 * pkt is really an mbuf. 1528 */ 1529 static void 1530 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1531 void *(*cpfn)(void *, const void *, size_t), struct timespec *ts) 1532 { 1533 struct bpf_hdr *hp; 1534 int totlen, curlen; 1535 int hdrlen = d->bd_bif->bif_hdrlen; 1536 int do_wakeup = 0; 1537 1538 ++d->bd_ccount; 1539 ++bpf_gstats.bs_capt; 1540 /* 1541 * Figure out how many bytes to move. If the packet is 1542 * greater or equal to the snapshot length, transfer that 1543 * much. Otherwise, transfer the whole packet (unless 1544 * we hit the buffer size limit). 1545 */ 1546 totlen = hdrlen + min(snaplen, pktlen); 1547 if (totlen > d->bd_bufsize) 1548 totlen = d->bd_bufsize; 1549 1550 /* 1551 * Round up the end of the previous packet to the next longword. 1552 */ 1553 curlen = BPF_WORDALIGN(d->bd_slen); 1554 if (curlen + totlen > d->bd_bufsize) { 1555 /* 1556 * This packet will overflow the storage buffer. 1557 * Rotate the buffers if we can, then wakeup any 1558 * pending reads. 1559 */ 1560 if (d->bd_fbuf == 0) { 1561 /* 1562 * We haven't completed the previous read yet, 1563 * so drop the packet. 1564 */ 1565 ++d->bd_dcount; 1566 ++bpf_gstats.bs_drop; 1567 return; 1568 } 1569 ROTATE_BUFFERS(d); 1570 do_wakeup = 1; 1571 curlen = 0; 1572 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 1573 /* 1574 * Immediate mode is set, or the read timeout has 1575 * already expired during a select call. A packet 1576 * arrived, so the reader should be woken up. 1577 */ 1578 do_wakeup = 1; 1579 } 1580 1581 /* 1582 * Append the bpf header. 1583 */ 1584 hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen); 1585 hp->bh_tstamp.tv_sec = ts->tv_sec; 1586 hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000; 1587 hp->bh_datalen = pktlen; 1588 hp->bh_hdrlen = hdrlen; 1589 /* 1590 * Copy the packet data into the store buffer and update its length. 1591 */ 1592 (*cpfn)((u_char *)hp + hdrlen, pkt, (hp->bh_caplen = totlen - hdrlen)); 1593 d->bd_slen = curlen + totlen; 1594 1595 /* 1596 * Call bpf_wakeup after bd_slen has been updated so that kevent(2) 1597 * will cause filt_bpfread() to be called with it adjusted. 1598 */ 1599 if (do_wakeup) 1600 bpf_wakeup(d); 1601 } 1602 1603 /* 1604 * Initialize all nonzero fields of a descriptor. 1605 */ 1606 static int 1607 bpf_allocbufs(struct bpf_d *d) 1608 { 1609 1610 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1611 if (!d->bd_fbuf) 1612 return (ENOBUFS); 1613 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1614 if (!d->bd_sbuf) { 1615 free(d->bd_fbuf, M_DEVBUF); 1616 return (ENOBUFS); 1617 } 1618 d->bd_slen = 0; 1619 d->bd_hlen = 0; 1620 return (0); 1621 } 1622 1623 /* 1624 * Free buffers currently in use by a descriptor. 1625 * Called on close. 1626 */ 1627 static void 1628 bpf_freed(struct bpf_d *d) 1629 { 1630 /* 1631 * We don't need to lock out interrupts since this descriptor has 1632 * been detached from its interface and it yet hasn't been marked 1633 * free. 1634 */ 1635 if (d->bd_sbuf != 0) { 1636 free(d->bd_sbuf, M_DEVBUF); 1637 if (d->bd_hbuf != 0) 1638 free(d->bd_hbuf, M_DEVBUF); 1639 if (d->bd_fbuf != 0) 1640 free(d->bd_fbuf, M_DEVBUF); 1641 } 1642 if (d->bd_filter) 1643 free(d->bd_filter, M_DEVBUF); 1644 } 1645 1646 /* 1647 * Attach an interface to bpf. dlt is the link layer type; 1648 * hdrlen is the fixed size of the link header for the specified dlt 1649 * (variable length headers not yet supported). 1650 */ 1651 static void 1652 _bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 1653 { 1654 struct bpf_if *bp; 1655 bp = malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT); 1656 if (bp == 0) 1657 panic("bpfattach"); 1658 1659 bp->bif_dlist = 0; 1660 bp->bif_driverp = driverp; 1661 bp->bif_ifp = ifp; 1662 bp->bif_dlt = dlt; 1663 1664 bp->bif_next = bpf_iflist; 1665 bpf_iflist = bp; 1666 1667 *bp->bif_driverp = 0; 1668 1669 /* 1670 * Compute the length of the bpf header. This is not necessarily 1671 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1672 * that the network layer header begins on a longword boundary (for 1673 * performance reasons and to alleviate alignment restrictions). 1674 */ 1675 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1676 1677 #if 0 1678 printf("bpf: %s attached\n", ifp->if_xname); 1679 #endif 1680 } 1681 1682 /* 1683 * Remove an interface from bpf. 1684 */ 1685 static void 1686 _bpfdetach(struct ifnet *ifp) 1687 { 1688 struct bpf_if *bp, **pbp; 1689 struct bpf_d *d; 1690 int s; 1691 1692 /* Nuke the vnodes for any open instances */ 1693 LIST_FOREACH(d, &bpf_list, bd_list) { 1694 if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) { 1695 /* 1696 * Detach the descriptor from an interface now. 1697 * It will be free'ed later by close routine. 1698 */ 1699 s = splnet(); 1700 d->bd_promisc = 0; /* we can't touch device. */ 1701 bpf_detachd(d); 1702 splx(s); 1703 } 1704 } 1705 1706 again: 1707 for (bp = bpf_iflist, pbp = &bpf_iflist; 1708 bp != NULL; pbp = &bp->bif_next, bp = bp->bif_next) { 1709 if (bp->bif_ifp == ifp) { 1710 *pbp = bp->bif_next; 1711 free(bp, M_DEVBUF); 1712 goto again; 1713 } 1714 } 1715 } 1716 1717 /* 1718 * Change the data link type of a interface. 1719 */ 1720 static void 1721 _bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen) 1722 { 1723 struct bpf_if *bp; 1724 1725 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1726 if (bp->bif_driverp == &ifp->if_bpf) 1727 break; 1728 } 1729 if (bp == NULL) 1730 panic("bpf_change_type"); 1731 1732 bp->bif_dlt = dlt; 1733 1734 /* 1735 * Compute the length of the bpf header. This is not necessarily 1736 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1737 * that the network layer header begins on a longword boundary (for 1738 * performance reasons and to alleviate alignment restrictions). 1739 */ 1740 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1741 } 1742 1743 /* 1744 * Get a list of available data link type of the interface. 1745 */ 1746 static int 1747 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1748 { 1749 int n, error; 1750 struct ifnet *ifp; 1751 struct bpf_if *bp; 1752 1753 ifp = d->bd_bif->bif_ifp; 1754 n = 0; 1755 error = 0; 1756 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1757 if (bp->bif_ifp != ifp) 1758 continue; 1759 if (bfl->bfl_list != NULL) { 1760 if (n >= bfl->bfl_len) 1761 return ENOMEM; 1762 error = copyout(&bp->bif_dlt, 1763 bfl->bfl_list + n, sizeof(u_int)); 1764 } 1765 n++; 1766 } 1767 bfl->bfl_len = n; 1768 return error; 1769 } 1770 1771 /* 1772 * Set the data link type of a BPF instance. 1773 */ 1774 static int 1775 bpf_setdlt(struct bpf_d *d, u_int dlt) 1776 { 1777 int s, error, opromisc; 1778 struct ifnet *ifp; 1779 struct bpf_if *bp; 1780 1781 if (d->bd_bif->bif_dlt == dlt) 1782 return 0; 1783 ifp = d->bd_bif->bif_ifp; 1784 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1785 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1786 break; 1787 } 1788 if (bp == NULL) 1789 return EINVAL; 1790 s = splnet(); 1791 opromisc = d->bd_promisc; 1792 bpf_detachd(d); 1793 bpf_attachd(d, bp); 1794 reset_d(d); 1795 if (opromisc) { 1796 error = ifpromisc(bp->bif_ifp, 1); 1797 if (error) 1798 printf("%s: bpf_setdlt: ifpromisc failed (%d)\n", 1799 bp->bif_ifp->if_xname, error); 1800 else 1801 d->bd_promisc = 1; 1802 } 1803 splx(s); 1804 return 0; 1805 } 1806 1807 static int 1808 sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS) 1809 { 1810 int newsize, error; 1811 struct sysctlnode node; 1812 1813 node = *rnode; 1814 node.sysctl_data = &newsize; 1815 newsize = bpf_maxbufsize; 1816 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1817 if (error || newp == NULL) 1818 return (error); 1819 1820 if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE) 1821 return (EINVAL); 1822 1823 bpf_maxbufsize = newsize; 1824 1825 return (0); 1826 } 1827 1828 static int 1829 sysctl_net_bpf_peers(SYSCTLFN_ARGS) 1830 { 1831 int error, elem_count; 1832 struct bpf_d *dp; 1833 struct bpf_d_ext dpe; 1834 size_t len, needed, elem_size, out_size; 1835 char *sp; 1836 1837 if (namelen == 1 && name[0] == CTL_QUERY) 1838 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1839 1840 if (namelen != 2) 1841 return (EINVAL); 1842 1843 /* BPF peers is privileged information. */ 1844 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE, 1845 KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL); 1846 if (error) 1847 return (EPERM); 1848 1849 len = (oldp != NULL) ? *oldlenp : 0; 1850 sp = oldp; 1851 elem_size = name[0]; 1852 elem_count = name[1]; 1853 out_size = MIN(sizeof(dpe), elem_size); 1854 needed = 0; 1855 1856 if (elem_size < 1 || elem_count < 0) 1857 return (EINVAL); 1858 1859 mutex_enter(&bpf_mtx); 1860 LIST_FOREACH(dp, &bpf_list, bd_list) { 1861 if (len >= elem_size && elem_count > 0) { 1862 #define BPF_EXT(field) dpe.bde_ ## field = dp->bd_ ## field 1863 BPF_EXT(bufsize); 1864 BPF_EXT(promisc); 1865 BPF_EXT(promisc); 1866 BPF_EXT(state); 1867 BPF_EXT(immediate); 1868 BPF_EXT(hdrcmplt); 1869 BPF_EXT(seesent); 1870 BPF_EXT(pid); 1871 BPF_EXT(rcount); 1872 BPF_EXT(dcount); 1873 BPF_EXT(ccount); 1874 #undef BPF_EXT 1875 if (dp->bd_bif) 1876 (void)strlcpy(dpe.bde_ifname, 1877 dp->bd_bif->bif_ifp->if_xname, 1878 IFNAMSIZ - 1); 1879 else 1880 dpe.bde_ifname[0] = '\0'; 1881 1882 error = copyout(&dpe, sp, out_size); 1883 if (error) 1884 break; 1885 sp += elem_size; 1886 len -= elem_size; 1887 } 1888 needed += elem_size; 1889 if (elem_count > 0 && elem_count != INT_MAX) 1890 elem_count--; 1891 } 1892 mutex_exit(&bpf_mtx); 1893 1894 *oldlenp = needed; 1895 1896 return (error); 1897 } 1898 1899 static struct sysctllog *bpf_sysctllog; 1900 static void 1901 sysctl_net_bpf_setup(void) 1902 { 1903 const struct sysctlnode *node; 1904 1905 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 1906 CTLFLAG_PERMANENT, 1907 CTLTYPE_NODE, "net", NULL, 1908 NULL, 0, NULL, 0, 1909 CTL_NET, CTL_EOL); 1910 1911 node = NULL; 1912 sysctl_createv(&bpf_sysctllog, 0, NULL, &node, 1913 CTLFLAG_PERMANENT, 1914 CTLTYPE_NODE, "bpf", 1915 SYSCTL_DESCR("BPF options"), 1916 NULL, 0, NULL, 0, 1917 CTL_NET, CTL_CREATE, CTL_EOL); 1918 if (node != NULL) { 1919 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 1920 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1921 CTLTYPE_INT, "maxbufsize", 1922 SYSCTL_DESCR("Maximum size for data capture buffer"), 1923 sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0, 1924 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 1925 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 1926 CTLFLAG_PERMANENT, 1927 CTLTYPE_STRUCT, "stats", 1928 SYSCTL_DESCR("BPF stats"), 1929 NULL, 0, &bpf_gstats, sizeof(bpf_gstats), 1930 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 1931 sysctl_createv(&bpf_sysctllog, 0, NULL, NULL, 1932 CTLFLAG_PERMANENT, 1933 CTLTYPE_STRUCT, "peers", 1934 SYSCTL_DESCR("BPF peers"), 1935 sysctl_net_bpf_peers, 0, NULL, 0, 1936 CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL); 1937 } 1938 1939 } 1940 1941 struct bpf_ops bpf_ops_kernel = { 1942 .bpf_attach = _bpfattach, 1943 .bpf_detach = _bpfdetach, 1944 .bpf_change_type = _bpf_change_type, 1945 1946 .bpf_tap = _bpf_tap, 1947 .bpf_mtap = _bpf_mtap, 1948 .bpf_mtap2 = _bpf_mtap2, 1949 .bpf_mtap_af = _bpf_mtap_af, 1950 .bpf_mtap_sl_in = _bpf_mtap_sl_in, 1951 .bpf_mtap_sl_out = _bpf_mtap_sl_out, 1952 }; 1953 1954 MODULE(MODULE_CLASS_DRIVER, bpf, NULL); 1955 1956 static int 1957 bpf_modcmd(modcmd_t cmd, void *arg) 1958 { 1959 devmajor_t bmajor, cmajor; 1960 int error; 1961 1962 bmajor = cmajor = NODEVMAJOR; 1963 1964 switch (cmd) { 1965 case MODULE_CMD_INIT: 1966 bpfilterattach(0); 1967 error = devsw_attach("bpf", NULL, &bmajor, 1968 &bpf_cdevsw, &cmajor); 1969 if (error == EEXIST) 1970 error = 0; /* maybe built-in ... improve eventually */ 1971 if (error) 1972 break; 1973 1974 bpf_ops_handover_enter(&bpf_ops_kernel); 1975 atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel); 1976 bpf_ops_handover_exit(); 1977 sysctl_net_bpf_setup(); 1978 break; 1979 1980 case MODULE_CMD_FINI: 1981 /* 1982 * While there is no reference counting for bpf callers, 1983 * unload could at least in theory be done similarly to 1984 * system call disestablishment. This should even be 1985 * a little simpler: 1986 * 1987 * 1) replace op vector with stubs 1988 * 2) post update to all cpus with xc 1989 * 3) check that nobody is in bpf anymore 1990 * (it's doubtful we'd want something like l_sysent, 1991 * but we could do something like *signed* percpu 1992 * counters. if the sum is 0, we're good). 1993 * 4) if fail, unroll changes 1994 * 1995 * NOTE: change won't be atomic to the outside. some 1996 * packets may be not captured even if unload is 1997 * not succesful. I think packet capture not working 1998 * is a perfectly logical consequence of trying to 1999 * disable packet capture. 2000 */ 2001 error = EOPNOTSUPP; 2002 /* insert sysctl teardown */ 2003 break; 2004 2005 default: 2006 error = ENOTTY; 2007 break; 2008 } 2009 2010 return error; 2011 } 2012