1 /* $OpenBSD: bpf.c,v 1.204 2021/04/23 03:43:19 dlg Exp $ */ 2 /* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org> 8 * 9 * This code is derived from the Stanford/CMU enet packet filter, 10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 12 * Berkeley Laboratory. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94 39 */ 40 41 #include "bpfilter.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/ioctl.h> 49 #include <sys/conf.h> 50 #include <sys/vnode.h> 51 #include <sys/fcntl.h> 52 #include <sys/socket.h> 53 #include <sys/poll.h> 54 #include <sys/kernel.h> 55 #include <sys/sysctl.h> 56 #include <sys/rwlock.h> 57 #include <sys/atomic.h> 58 #include <sys/smr.h> 59 #include <sys/specdev.h> 60 #include <sys/selinfo.h> 61 #include <sys/sigio.h> 62 #include <sys/task.h> 63 #include <sys/time.h> 64 65 #include <net/if.h> 66 #include <net/bpf.h> 67 #include <net/bpfdesc.h> 68 69 #include <netinet/in.h> 70 #include <netinet/if_ether.h> 71 72 #include "vlan.h" 73 #if NVLAN > 0 74 #include <net/if_vlan_var.h> 75 #endif 76 77 #define BPF_BUFSIZE 32768 78 79 #define PRINET 26 /* interruptible */ 80 81 /* 82 * The default read buffer size is patchable. 83 */ 84 int bpf_bufsize = BPF_BUFSIZE; 85 int bpf_maxbufsize = BPF_MAXBUFSIZE; 86 87 /* 88 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 89 * bpf_d_list is the list of descriptors 90 */ 91 struct bpf_if *bpf_iflist; 92 LIST_HEAD(, bpf_d) bpf_d_list; 93 94 int bpf_allocbufs(struct bpf_d *); 95 void bpf_ifname(struct bpf_if*, struct ifreq *); 96 void bpf_mcopy(const void *, void *, size_t); 97 int bpf_movein(struct uio *, struct bpf_d *, struct mbuf **, 98 struct sockaddr *); 99 int bpf_setif(struct bpf_d *, struct ifreq *); 100 int bpfpoll(dev_t, int, struct proc *); 101 int bpfkqfilter(dev_t, struct knote *); 102 void bpf_wakeup(struct bpf_d *); 103 void bpf_wakeup_cb(void *); 104 int _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int); 105 void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, 106 const struct bpf_hdr *); 107 int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 108 int bpf_setdlt(struct bpf_d *, u_int); 109 110 void filt_bpfrdetach(struct knote *); 111 int filt_bpfread(struct knote *, long); 112 113 int bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t); 114 115 struct bpf_d *bpfilter_lookup(int); 116 117 /* 118 * Called holding ``bd_mtx''. 119 */ 120 void bpf_attachd(struct bpf_d *, struct bpf_if *); 121 void bpf_detachd(struct bpf_d *); 122 void bpf_resetd(struct bpf_d *); 123 124 void bpf_prog_smr(void *); 125 void bpf_d_smr(void *); 126 127 /* 128 * Reference count access to descriptor buffers 129 */ 130 void bpf_get(struct bpf_d *); 131 void bpf_put(struct bpf_d *); 132 133 134 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz"); 135 136 int 137 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp, 138 struct sockaddr *sockp) 139 { 140 struct bpf_program_smr *bps; 141 struct bpf_insn *fcode = NULL; 142 struct mbuf *m; 143 struct m_tag *mtag; 144 int error; 145 u_int hlen; 146 u_int len; 147 u_int linktype; 148 u_int slen; 149 150 /* 151 * Build a sockaddr based on the data link layer type. 152 * We do this at this level because the ethernet header 153 * is copied directly into the data field of the sockaddr. 154 * In the case of SLIP, there is no header and the packet 155 * is forwarded as is. 156 * Also, we are careful to leave room at the front of the mbuf 157 * for the link level header. 158 */ 159 linktype = d->bd_bif->bif_dlt; 160 switch (linktype) { 161 162 case DLT_SLIP: 163 sockp->sa_family = AF_INET; 164 hlen = 0; 165 break; 166 167 case DLT_PPP: 168 sockp->sa_family = AF_UNSPEC; 169 hlen = 0; 170 break; 171 172 case DLT_EN10MB: 173 sockp->sa_family = AF_UNSPEC; 174 /* XXX Would MAXLINKHDR be better? */ 175 hlen = ETHER_HDR_LEN; 176 break; 177 178 case DLT_IEEE802_11: 179 case DLT_IEEE802_11_RADIO: 180 sockp->sa_family = AF_UNSPEC; 181 hlen = 0; 182 break; 183 184 case DLT_RAW: 185 case DLT_NULL: 186 sockp->sa_family = AF_UNSPEC; 187 hlen = 0; 188 break; 189 190 case DLT_LOOP: 191 sockp->sa_family = AF_UNSPEC; 192 hlen = sizeof(u_int32_t); 193 break; 194 195 default: 196 return (EIO); 197 } 198 199 if (uio->uio_resid > MAXMCLBYTES) 200 return (EIO); 201 len = uio->uio_resid; 202 203 MGETHDR(m, M_WAIT, MT_DATA); 204 m->m_pkthdr.ph_ifidx = 0; 205 m->m_pkthdr.len = len - hlen; 206 207 if (len > MHLEN) { 208 MCLGETL(m, M_WAIT, len); 209 if ((m->m_flags & M_EXT) == 0) { 210 error = ENOBUFS; 211 goto bad; 212 } 213 } 214 m->m_len = len; 215 *mp = m; 216 217 error = uiomove(mtod(m, caddr_t), len, uio); 218 if (error) 219 goto bad; 220 221 smr_read_enter(); 222 bps = SMR_PTR_GET(&d->bd_wfilter); 223 if (bps != NULL) 224 fcode = bps->bps_bf.bf_insns; 225 slen = bpf_filter(fcode, mtod(m, u_char *), len, len); 226 smr_read_leave(); 227 228 if (slen < len) { 229 error = EPERM; 230 goto bad; 231 } 232 233 if (m->m_len < hlen) { 234 error = EPERM; 235 goto bad; 236 } 237 /* 238 * Make room for link header, and copy it to sockaddr 239 */ 240 if (hlen != 0) { 241 if (linktype == DLT_LOOP) { 242 u_int32_t af; 243 244 /* the link header indicates the address family */ 245 KASSERT(hlen == sizeof(u_int32_t)); 246 memcpy(&af, m->m_data, hlen); 247 sockp->sa_family = ntohl(af); 248 } else 249 memcpy(sockp->sa_data, m->m_data, hlen); 250 m->m_len -= hlen; 251 m->m_data += hlen; /* XXX */ 252 } 253 254 /* 255 * Prepend the data link type as a mbuf tag 256 */ 257 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT); 258 *(u_int *)(mtag + 1) = linktype; 259 m_tag_prepend(m, mtag); 260 261 return (0); 262 bad: 263 m_freem(m); 264 return (error); 265 } 266 267 /* 268 * Attach file to the bpf interface, i.e. make d listen on bp. 269 */ 270 void 271 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 272 { 273 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 274 275 /* 276 * Point d at bp, and add d to the interface's list of listeners. 277 * Finally, point the driver's bpf cookie at the interface so 278 * it will divert packets to bpf. 279 */ 280 281 d->bd_bif = bp; 282 283 KERNEL_ASSERT_LOCKED(); 284 SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next); 285 286 *bp->bif_driverp = bp; 287 } 288 289 /* 290 * Detach a file from its interface. 291 */ 292 void 293 bpf_detachd(struct bpf_d *d) 294 { 295 struct bpf_if *bp; 296 297 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 298 299 bp = d->bd_bif; 300 /* Not attached. */ 301 if (bp == NULL) 302 return; 303 304 /* Remove ``d'' from the interface's descriptor list. */ 305 KERNEL_ASSERT_LOCKED(); 306 SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next); 307 308 if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) { 309 /* 310 * Let the driver know that there are no more listeners. 311 */ 312 *bp->bif_driverp = NULL; 313 } 314 315 d->bd_bif = NULL; 316 317 /* 318 * Check if this descriptor had requested promiscuous mode. 319 * If so, turn it off. 320 */ 321 if (d->bd_promisc) { 322 int error; 323 324 KASSERT(bp->bif_ifp != NULL); 325 326 d->bd_promisc = 0; 327 328 bpf_get(d); 329 mtx_leave(&d->bd_mtx); 330 NET_LOCK(); 331 error = ifpromisc(bp->bif_ifp, 0); 332 NET_UNLOCK(); 333 mtx_enter(&d->bd_mtx); 334 bpf_put(d); 335 336 if (error && !(error == EINVAL || error == ENODEV || 337 error == ENXIO)) 338 /* 339 * Something is really wrong if we were able to put 340 * the driver into promiscuous mode, but can't 341 * take it out. 342 */ 343 panic("bpf: ifpromisc failed"); 344 } 345 } 346 347 void 348 bpfilterattach(int n) 349 { 350 LIST_INIT(&bpf_d_list); 351 } 352 353 /* 354 * Open ethernet device. Returns ENXIO for illegal minor device number, 355 * EBUSY if file is open by another process. 356 */ 357 int 358 bpfopen(dev_t dev, int flag, int mode, struct proc *p) 359 { 360 struct bpf_d *bd; 361 int unit = minor(dev); 362 363 if (unit & ((1 << CLONE_SHIFT) - 1)) 364 return (ENXIO); 365 366 KASSERT(bpfilter_lookup(unit) == NULL); 367 368 /* create on demand */ 369 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 370 return (EBUSY); 371 372 /* Mark "free" and do most initialization. */ 373 bd->bd_unit = unit; 374 bd->bd_bufsize = bpf_bufsize; 375 bd->bd_sig = SIGIO; 376 mtx_init(&bd->bd_mtx, IPL_NET); 377 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd); 378 smr_init(&bd->bd_smr); 379 sigio_init(&bd->bd_sigio); 380 381 bd->bd_rtout = 0; /* no timeout by default */ 382 383 bpf_get(bd); 384 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list); 385 386 return (0); 387 } 388 389 /* 390 * Close the descriptor by detaching it from its interface, 391 * deallocating its buffers, and marking it free. 392 */ 393 int 394 bpfclose(dev_t dev, int flag, int mode, struct proc *p) 395 { 396 struct bpf_d *d; 397 398 d = bpfilter_lookup(minor(dev)); 399 mtx_enter(&d->bd_mtx); 400 bpf_detachd(d); 401 bpf_wakeup(d); 402 LIST_REMOVE(d, bd_list); 403 mtx_leave(&d->bd_mtx); 404 bpf_put(d); 405 406 return (0); 407 } 408 409 /* 410 * Rotate the packet buffers in descriptor d. Move the store buffer 411 * into the hold slot, and the free buffer into the store slot. 412 * Zero the length of the new store buffer. 413 */ 414 #define ROTATE_BUFFERS(d) \ 415 KASSERT(d->bd_in_uiomove == 0); \ 416 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \ 417 (d)->bd_hbuf = (d)->bd_sbuf; \ 418 (d)->bd_hlen = (d)->bd_slen; \ 419 (d)->bd_sbuf = (d)->bd_fbuf; \ 420 (d)->bd_slen = 0; \ 421 (d)->bd_fbuf = NULL; 422 423 /* 424 * TODO Move nsecuptime() into kern_tc.c and document it when we have 425 * more users elsewhere in the kernel. 426 */ 427 static uint64_t 428 nsecuptime(void) 429 { 430 struct timespec now; 431 432 nanouptime(&now); 433 return TIMESPEC_TO_NSEC(&now); 434 } 435 436 /* 437 * bpfread - read next chunk of packets from buffers 438 */ 439 int 440 bpfread(dev_t dev, struct uio *uio, int ioflag) 441 { 442 uint64_t end, now; 443 struct bpf_d *d; 444 caddr_t hbuf; 445 int error, hlen; 446 447 KERNEL_ASSERT_LOCKED(); 448 449 d = bpfilter_lookup(minor(dev)); 450 if (d->bd_bif == NULL) 451 return (ENXIO); 452 453 bpf_get(d); 454 mtx_enter(&d->bd_mtx); 455 456 /* 457 * Restrict application to use a buffer the same size as 458 * as kernel buffers. 459 */ 460 if (uio->uio_resid != d->bd_bufsize) { 461 error = EINVAL; 462 goto out; 463 } 464 465 /* 466 * If there's a timeout, mark when the read should end. 467 */ 468 if (d->bd_rtout != 0) { 469 now = nsecuptime(); 470 end = now + d->bd_rtout; 471 if (end < now) 472 end = UINT64_MAX; 473 } 474 475 /* 476 * If the hold buffer is empty, then do a timed sleep, which 477 * ends when the timeout expires or when enough packets 478 * have arrived to fill the store buffer. 479 */ 480 while (d->bd_hbuf == NULL) { 481 if (d->bd_bif == NULL) { 482 /* interface is gone */ 483 if (d->bd_slen == 0) { 484 error = EIO; 485 goto out; 486 } 487 ROTATE_BUFFERS(d); 488 break; 489 } 490 if (d->bd_immediate && d->bd_slen != 0) { 491 /* 492 * A packet(s) either arrived since the previous 493 * read or arrived while we were asleep. 494 * Rotate the buffers and return what's here. 495 */ 496 ROTATE_BUFFERS(d); 497 break; 498 } 499 if (ISSET(ioflag, IO_NDELAY)) { 500 /* User requested non-blocking I/O */ 501 error = EWOULDBLOCK; 502 } else if (d->bd_rtout == 0) { 503 /* No read timeout set. */ 504 d->bd_nreaders++; 505 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 506 "bpf", INFSLP); 507 d->bd_nreaders--; 508 } else if ((now = nsecuptime()) < end) { 509 /* Read timeout has not expired yet. */ 510 d->bd_nreaders++; 511 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 512 "bpf", end - now); 513 d->bd_nreaders--; 514 } else { 515 /* Read timeout has expired. */ 516 error = EWOULDBLOCK; 517 } 518 if (error == EINTR || error == ERESTART) 519 goto out; 520 if (error == EWOULDBLOCK) { 521 /* 522 * On a timeout, return what's in the buffer, 523 * which may be nothing. If there is something 524 * in the store buffer, we can rotate the buffers. 525 */ 526 if (d->bd_hbuf != NULL) 527 /* 528 * We filled up the buffer in between 529 * getting the timeout and arriving 530 * here, so we don't need to rotate. 531 */ 532 break; 533 534 if (d->bd_slen == 0) { 535 error = 0; 536 goto out; 537 } 538 ROTATE_BUFFERS(d); 539 break; 540 } 541 } 542 /* 543 * At this point, we know we have something in the hold slot. 544 */ 545 hbuf = d->bd_hbuf; 546 hlen = d->bd_hlen; 547 d->bd_hbuf = NULL; 548 d->bd_hlen = 0; 549 d->bd_fbuf = NULL; 550 d->bd_in_uiomove = 1; 551 552 /* 553 * Move data from hold buffer into user space. 554 * We know the entire buffer is transferred since 555 * we checked above that the read buffer is bpf_bufsize bytes. 556 */ 557 mtx_leave(&d->bd_mtx); 558 error = uiomove(hbuf, hlen, uio); 559 mtx_enter(&d->bd_mtx); 560 561 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */ 562 KASSERT(d->bd_fbuf == NULL); 563 KASSERT(d->bd_hbuf == NULL); 564 d->bd_fbuf = hbuf; 565 d->bd_in_uiomove = 0; 566 out: 567 mtx_leave(&d->bd_mtx); 568 bpf_put(d); 569 570 return (error); 571 } 572 573 /* 574 * If there are processes sleeping on this descriptor, wake them up. 575 */ 576 void 577 bpf_wakeup(struct bpf_d *d) 578 { 579 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 580 581 if (d->bd_nreaders) 582 wakeup(d); 583 584 /* 585 * As long as pgsigio() and selwakeup() need to be protected 586 * by the KERNEL_LOCK() we have to delay the wakeup to 587 * another context to keep the hot path KERNEL_LOCK()-free. 588 */ 589 if ((d->bd_async && d->bd_sig) || 590 (!klist_empty(&d->bd_sel.si_note) || d->bd_sel.si_seltid != 0)) { 591 bpf_get(d); 592 if (!task_add(systq, &d->bd_wake_task)) 593 bpf_put(d); 594 } 595 } 596 597 void 598 bpf_wakeup_cb(void *xd) 599 { 600 struct bpf_d *d = xd; 601 602 if (d->bd_async && d->bd_sig) 603 pgsigio(&d->bd_sigio, d->bd_sig, 0); 604 605 selwakeup(&d->bd_sel); 606 bpf_put(d); 607 } 608 609 int 610 bpfwrite(dev_t dev, struct uio *uio, int ioflag) 611 { 612 struct bpf_d *d; 613 struct ifnet *ifp; 614 struct mbuf *m; 615 int error; 616 struct sockaddr_storage dst; 617 618 KERNEL_ASSERT_LOCKED(); 619 620 d = bpfilter_lookup(minor(dev)); 621 if (d->bd_bif == NULL) 622 return (ENXIO); 623 624 bpf_get(d); 625 ifp = d->bd_bif->bif_ifp; 626 627 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) { 628 error = ENETDOWN; 629 goto out; 630 } 631 632 if (uio->uio_resid == 0) { 633 error = 0; 634 goto out; 635 } 636 637 error = bpf_movein(uio, d, &m, sstosa(&dst)); 638 if (error) 639 goto out; 640 641 if (m->m_pkthdr.len > ifp->if_mtu) { 642 m_freem(m); 643 error = EMSGSIZE; 644 goto out; 645 } 646 647 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 648 m->m_pkthdr.pf.prio = ifp->if_llprio; 649 650 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC) 651 dst.ss_family = pseudo_AF_HDRCMPLT; 652 653 NET_LOCK(); 654 error = ifp->if_output(ifp, m, sstosa(&dst), NULL); 655 NET_UNLOCK(); 656 657 out: 658 bpf_put(d); 659 return (error); 660 } 661 662 /* 663 * Reset a descriptor by flushing its packet buffer and clearing the 664 * receive and drop counts. 665 */ 666 void 667 bpf_resetd(struct bpf_d *d) 668 { 669 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 670 KASSERT(d->bd_in_uiomove == 0); 671 672 if (d->bd_hbuf != NULL) { 673 /* Free the hold buffer. */ 674 d->bd_fbuf = d->bd_hbuf; 675 d->bd_hbuf = NULL; 676 } 677 d->bd_slen = 0; 678 d->bd_hlen = 0; 679 d->bd_rcount = 0; 680 d->bd_dcount = 0; 681 } 682 683 /* 684 * FIONREAD Check for read packet available. 685 * BIOCGBLEN Get buffer len [for read()]. 686 * BIOCSETF Set ethernet read filter. 687 * BIOCFLUSH Flush read packet buffer. 688 * BIOCPROMISC Put interface into promiscuous mode. 689 * BIOCGDLTLIST Get supported link layer types. 690 * BIOCGDLT Get link layer type. 691 * BIOCSDLT Set link layer type. 692 * BIOCGETIF Get interface name. 693 * BIOCSETIF Set interface. 694 * BIOCSRTIMEOUT Set read timeout. 695 * BIOCGRTIMEOUT Get read timeout. 696 * BIOCGSTATS Get packet stats. 697 * BIOCIMMEDIATE Set immediate mode. 698 * BIOCVERSION Get filter language version. 699 * BIOCGHDRCMPLT Get "header already complete" flag 700 * BIOCSHDRCMPLT Set "header already complete" flag 701 */ 702 int 703 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 704 { 705 struct bpf_d *d; 706 int error = 0; 707 708 d = bpfilter_lookup(minor(dev)); 709 if (d->bd_locked && suser(p) != 0) { 710 /* list of allowed ioctls when locked and not root */ 711 switch (cmd) { 712 case BIOCGBLEN: 713 case BIOCFLUSH: 714 case BIOCGDLT: 715 case BIOCGDLTLIST: 716 case BIOCGETIF: 717 case BIOCGRTIMEOUT: 718 case BIOCGSTATS: 719 case BIOCVERSION: 720 case BIOCGRSIG: 721 case BIOCGHDRCMPLT: 722 case FIONREAD: 723 case BIOCLOCK: 724 case BIOCSRTIMEOUT: 725 case BIOCIMMEDIATE: 726 case TIOCGPGRP: 727 case BIOCGDIRFILT: 728 break; 729 default: 730 return (EPERM); 731 } 732 } 733 734 bpf_get(d); 735 736 switch (cmd) { 737 default: 738 error = EINVAL; 739 break; 740 741 /* 742 * Check for read packet available. 743 */ 744 case FIONREAD: 745 { 746 int n; 747 748 mtx_enter(&d->bd_mtx); 749 n = d->bd_slen; 750 if (d->bd_hbuf != NULL) 751 n += d->bd_hlen; 752 mtx_leave(&d->bd_mtx); 753 754 *(int *)addr = n; 755 break; 756 } 757 758 /* 759 * Get buffer len [for read()]. 760 */ 761 case BIOCGBLEN: 762 *(u_int *)addr = d->bd_bufsize; 763 break; 764 765 /* 766 * Set buffer length. 767 */ 768 case BIOCSBLEN: 769 if (d->bd_bif != NULL) 770 error = EINVAL; 771 else { 772 u_int size = *(u_int *)addr; 773 774 if (size > bpf_maxbufsize) 775 *(u_int *)addr = size = bpf_maxbufsize; 776 else if (size < BPF_MINBUFSIZE) 777 *(u_int *)addr = size = BPF_MINBUFSIZE; 778 mtx_enter(&d->bd_mtx); 779 d->bd_bufsize = size; 780 mtx_leave(&d->bd_mtx); 781 } 782 break; 783 784 /* 785 * Set link layer read filter. 786 */ 787 case BIOCSETF: 788 error = bpf_setf(d, (struct bpf_program *)addr, 0); 789 break; 790 791 /* 792 * Set link layer write filter. 793 */ 794 case BIOCSETWF: 795 error = bpf_setf(d, (struct bpf_program *)addr, 1); 796 break; 797 798 /* 799 * Flush read packet buffer. 800 */ 801 case BIOCFLUSH: 802 mtx_enter(&d->bd_mtx); 803 bpf_resetd(d); 804 mtx_leave(&d->bd_mtx); 805 break; 806 807 /* 808 * Put interface into promiscuous mode. 809 */ 810 case BIOCPROMISC: 811 if (d->bd_bif == NULL) { 812 /* 813 * No interface attached yet. 814 */ 815 error = EINVAL; 816 } else if (d->bd_bif->bif_ifp != NULL) { 817 if (d->bd_promisc == 0) { 818 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx); 819 NET_LOCK(); 820 error = ifpromisc(d->bd_bif->bif_ifp, 1); 821 NET_UNLOCK(); 822 if (error == 0) 823 d->bd_promisc = 1; 824 } 825 } 826 break; 827 828 /* 829 * Get a list of supported device parameters. 830 */ 831 case BIOCGDLTLIST: 832 if (d->bd_bif == NULL) 833 error = EINVAL; 834 else 835 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 836 break; 837 838 /* 839 * Get device parameters. 840 */ 841 case BIOCGDLT: 842 if (d->bd_bif == NULL) 843 error = EINVAL; 844 else 845 *(u_int *)addr = d->bd_bif->bif_dlt; 846 break; 847 848 /* 849 * Set device parameters. 850 */ 851 case BIOCSDLT: 852 if (d->bd_bif == NULL) 853 error = EINVAL; 854 else { 855 mtx_enter(&d->bd_mtx); 856 error = bpf_setdlt(d, *(u_int *)addr); 857 mtx_leave(&d->bd_mtx); 858 } 859 break; 860 861 /* 862 * Set interface name. 863 */ 864 case BIOCGETIF: 865 if (d->bd_bif == NULL) 866 error = EINVAL; 867 else 868 bpf_ifname(d->bd_bif, (struct ifreq *)addr); 869 break; 870 871 /* 872 * Set interface. 873 */ 874 case BIOCSETIF: 875 error = bpf_setif(d, (struct ifreq *)addr); 876 break; 877 878 /* 879 * Set read timeout. 880 */ 881 case BIOCSRTIMEOUT: 882 { 883 struct timeval *tv = (struct timeval *)addr; 884 uint64_t rtout; 885 886 if (tv->tv_sec < 0 || !timerisvalid(tv)) { 887 error = EINVAL; 888 break; 889 } 890 rtout = TIMEVAL_TO_NSEC(tv); 891 if (rtout > MAXTSLP) { 892 error = EOVERFLOW; 893 break; 894 } 895 mtx_enter(&d->bd_mtx); 896 d->bd_rtout = rtout; 897 mtx_leave(&d->bd_mtx); 898 break; 899 } 900 901 /* 902 * Get read timeout. 903 */ 904 case BIOCGRTIMEOUT: 905 { 906 struct timeval *tv = (struct timeval *)addr; 907 908 memset(tv, 0, sizeof(*tv)); 909 mtx_enter(&d->bd_mtx); 910 NSEC_TO_TIMEVAL(d->bd_rtout, tv); 911 mtx_leave(&d->bd_mtx); 912 break; 913 } 914 915 /* 916 * Get packet stats. 917 */ 918 case BIOCGSTATS: 919 { 920 struct bpf_stat *bs = (struct bpf_stat *)addr; 921 922 bs->bs_recv = d->bd_rcount; 923 bs->bs_drop = d->bd_dcount; 924 break; 925 } 926 927 /* 928 * Set immediate mode. 929 */ 930 case BIOCIMMEDIATE: 931 d->bd_immediate = *(u_int *)addr; 932 break; 933 934 case BIOCVERSION: 935 { 936 struct bpf_version *bv = (struct bpf_version *)addr; 937 938 bv->bv_major = BPF_MAJOR_VERSION; 939 bv->bv_minor = BPF_MINOR_VERSION; 940 break; 941 } 942 943 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 944 *(u_int *)addr = d->bd_hdrcmplt; 945 break; 946 947 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 948 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 949 break; 950 951 case BIOCLOCK: /* set "locked" flag (no reset) */ 952 d->bd_locked = 1; 953 break; 954 955 case BIOCGFILDROP: /* get "filter-drop" flag */ 956 *(u_int *)addr = d->bd_fildrop; 957 break; 958 959 case BIOCSFILDROP: { /* set "filter-drop" flag */ 960 unsigned int fildrop = *(u_int *)addr; 961 switch (fildrop) { 962 case BPF_FILDROP_PASS: 963 case BPF_FILDROP_CAPTURE: 964 case BPF_FILDROP_DROP: 965 d->bd_fildrop = fildrop; 966 break; 967 default: 968 error = EINVAL; 969 break; 970 } 971 break; 972 } 973 974 case BIOCGDIRFILT: /* get direction filter */ 975 *(u_int *)addr = d->bd_dirfilt; 976 break; 977 978 case BIOCSDIRFILT: /* set direction filter */ 979 d->bd_dirfilt = (*(u_int *)addr) & 980 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT); 981 break; 982 983 case FIONBIO: /* Non-blocking I/O */ 984 /* let vfs to keep track of this */ 985 break; 986 987 case FIOASYNC: /* Send signal on receive packets */ 988 d->bd_async = *(int *)addr; 989 break; 990 991 case FIOSETOWN: /* Process or group to send signals to */ 992 case TIOCSPGRP: 993 error = sigio_setown(&d->bd_sigio, cmd, addr); 994 break; 995 996 case FIOGETOWN: 997 case TIOCGPGRP: 998 sigio_getown(&d->bd_sigio, cmd, addr); 999 break; 1000 1001 case BIOCSRSIG: /* Set receive signal */ 1002 { 1003 u_int sig; 1004 1005 sig = *(u_int *)addr; 1006 1007 if (sig >= NSIG) 1008 error = EINVAL; 1009 else 1010 d->bd_sig = sig; 1011 break; 1012 } 1013 case BIOCGRSIG: 1014 *(u_int *)addr = d->bd_sig; 1015 break; 1016 } 1017 1018 bpf_put(d); 1019 return (error); 1020 } 1021 1022 /* 1023 * Set d's packet filter program to fp. If this file already has a filter, 1024 * free it and replace it. Returns EINVAL for bogus requests. 1025 */ 1026 int 1027 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf) 1028 { 1029 struct bpf_program_smr *bps, *old_bps; 1030 struct bpf_insn *fcode; 1031 u_int flen, size; 1032 1033 KERNEL_ASSERT_LOCKED(); 1034 1035 if (fp->bf_insns == 0) { 1036 if (fp->bf_len != 0) 1037 return (EINVAL); 1038 bps = NULL; 1039 } else { 1040 flen = fp->bf_len; 1041 if (flen > BPF_MAXINSNS) 1042 return (EINVAL); 1043 1044 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF, 1045 M_WAITOK | M_CANFAIL); 1046 if (fcode == NULL) 1047 return (ENOMEM); 1048 1049 size = flen * sizeof(*fp->bf_insns); 1050 if (copyin(fp->bf_insns, fcode, size) != 0 || 1051 bpf_validate(fcode, (int)flen) == 0) { 1052 free(fcode, M_DEVBUF, size); 1053 return (EINVAL); 1054 } 1055 1056 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK); 1057 smr_init(&bps->bps_smr); 1058 bps->bps_bf.bf_len = flen; 1059 bps->bps_bf.bf_insns = fcode; 1060 } 1061 1062 if (wf == 0) { 1063 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter); 1064 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps); 1065 } else { 1066 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter); 1067 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps); 1068 } 1069 1070 mtx_enter(&d->bd_mtx); 1071 bpf_resetd(d); 1072 mtx_leave(&d->bd_mtx); 1073 if (old_bps != NULL) 1074 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps); 1075 1076 return (0); 1077 } 1078 1079 /* 1080 * Detach a file from its current interface (if attached at all) and attach 1081 * to the interface indicated by the name stored in ifr. 1082 * Return an errno or 0. 1083 */ 1084 int 1085 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1086 { 1087 struct bpf_if *bp, *candidate = NULL; 1088 int error = 0; 1089 1090 /* 1091 * Look through attached interfaces for the named one. 1092 */ 1093 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1094 if (strcmp(bp->bif_name, ifr->ifr_name) != 0) 1095 continue; 1096 1097 if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt) 1098 candidate = bp; 1099 } 1100 1101 /* Not found. */ 1102 if (candidate == NULL) 1103 return (ENXIO); 1104 1105 /* 1106 * Allocate the packet buffers if we need to. 1107 * If we're already attached to requested interface, 1108 * just flush the buffer. 1109 */ 1110 mtx_enter(&d->bd_mtx); 1111 if (d->bd_sbuf == NULL) { 1112 if ((error = bpf_allocbufs(d))) 1113 goto out; 1114 } 1115 if (candidate != d->bd_bif) { 1116 /* 1117 * Detach if attached to something else. 1118 */ 1119 bpf_detachd(d); 1120 bpf_attachd(d, candidate); 1121 } 1122 bpf_resetd(d); 1123 out: 1124 mtx_leave(&d->bd_mtx); 1125 return (error); 1126 } 1127 1128 /* 1129 * Copy the interface name to the ifreq. 1130 */ 1131 void 1132 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr) 1133 { 1134 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name)); 1135 } 1136 1137 /* 1138 * Support for poll() system call 1139 */ 1140 int 1141 bpfpoll(dev_t dev, int events, struct proc *p) 1142 { 1143 struct bpf_d *d; 1144 int revents; 1145 1146 KERNEL_ASSERT_LOCKED(); 1147 1148 /* 1149 * An imitation of the FIONREAD ioctl code. 1150 */ 1151 d = bpfilter_lookup(minor(dev)); 1152 1153 /* 1154 * XXX The USB stack manages it to trigger some race condition 1155 * which causes bpfilter_lookup to return NULL when a USB device 1156 * gets detached while it is up and has an open bpf handler (e.g. 1157 * dhclient). We still should recheck if we can fix the root 1158 * cause of this issue. 1159 */ 1160 if (d == NULL) 1161 return (POLLERR); 1162 1163 /* Always ready to write data */ 1164 revents = events & (POLLOUT | POLLWRNORM); 1165 1166 if (events & (POLLIN | POLLRDNORM)) { 1167 mtx_enter(&d->bd_mtx); 1168 if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0)) 1169 revents |= events & (POLLIN | POLLRDNORM); 1170 else 1171 selrecord(p, &d->bd_sel); 1172 mtx_leave(&d->bd_mtx); 1173 } 1174 return (revents); 1175 } 1176 1177 const struct filterops bpfread_filtops = { 1178 .f_flags = FILTEROP_ISFD, 1179 .f_attach = NULL, 1180 .f_detach = filt_bpfrdetach, 1181 .f_event = filt_bpfread, 1182 }; 1183 1184 int 1185 bpfkqfilter(dev_t dev, struct knote *kn) 1186 { 1187 struct bpf_d *d; 1188 struct klist *klist; 1189 1190 KERNEL_ASSERT_LOCKED(); 1191 1192 d = bpfilter_lookup(minor(dev)); 1193 1194 switch (kn->kn_filter) { 1195 case EVFILT_READ: 1196 klist = &d->bd_sel.si_note; 1197 kn->kn_fop = &bpfread_filtops; 1198 break; 1199 default: 1200 return (EINVAL); 1201 } 1202 1203 bpf_get(d); 1204 kn->kn_hook = d; 1205 klist_insert_locked(klist, kn); 1206 1207 return (0); 1208 } 1209 1210 void 1211 filt_bpfrdetach(struct knote *kn) 1212 { 1213 struct bpf_d *d = kn->kn_hook; 1214 1215 KERNEL_ASSERT_LOCKED(); 1216 1217 klist_remove_locked(&d->bd_sel.si_note, kn); 1218 bpf_put(d); 1219 } 1220 1221 int 1222 filt_bpfread(struct knote *kn, long hint) 1223 { 1224 struct bpf_d *d = kn->kn_hook; 1225 1226 KERNEL_ASSERT_LOCKED(); 1227 1228 mtx_enter(&d->bd_mtx); 1229 kn->kn_data = d->bd_hlen; 1230 if (d->bd_immediate) 1231 kn->kn_data += d->bd_slen; 1232 mtx_leave(&d->bd_mtx); 1233 1234 return (kn->kn_data > 0); 1235 } 1236 1237 /* 1238 * Copy data from an mbuf chain into a buffer. This code is derived 1239 * from m_copydata in sys/uipc_mbuf.c. 1240 */ 1241 void 1242 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) 1243 { 1244 const struct mbuf *m; 1245 u_int count; 1246 u_char *dst; 1247 1248 m = src_arg; 1249 dst = dst_arg; 1250 while (len > 0) { 1251 if (m == NULL) 1252 panic("bpf_mcopy"); 1253 count = min(m->m_len, len); 1254 bcopy(mtod(m, caddr_t), (caddr_t)dst, count); 1255 m = m->m_next; 1256 dst += count; 1257 len -= count; 1258 } 1259 } 1260 1261 int 1262 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction) 1263 { 1264 return _bpf_mtap(arg, m, m, direction); 1265 } 1266 1267 int 1268 _bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m, 1269 u_int direction) 1270 { 1271 struct bpf_if *bp = (struct bpf_if *)arg; 1272 struct bpf_d *d; 1273 size_t pktlen, slen; 1274 const struct mbuf *m0; 1275 struct bpf_hdr tbh; 1276 int gothdr = 0; 1277 int drop = 0; 1278 1279 if (m == NULL) 1280 return (0); 1281 1282 if (bp == NULL) 1283 return (0); 1284 1285 pktlen = 0; 1286 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1287 pktlen += m0->m_len; 1288 1289 smr_read_enter(); 1290 SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1291 struct bpf_program_smr *bps; 1292 struct bpf_insn *fcode = NULL; 1293 1294 atomic_inc_long(&d->bd_rcount); 1295 1296 if (ISSET(d->bd_dirfilt, direction)) 1297 continue; 1298 1299 bps = SMR_PTR_GET(&d->bd_rfilter); 1300 if (bps != NULL) 1301 fcode = bps->bps_bf.bf_insns; 1302 slen = bpf_mfilter(fcode, m, pktlen); 1303 1304 if (slen == 0) 1305 continue; 1306 if (d->bd_fildrop != BPF_FILDROP_PASS) 1307 drop = 1; 1308 if (d->bd_fildrop != BPF_FILDROP_DROP) { 1309 if (!gothdr) { 1310 struct timeval tv; 1311 memset(&tbh, 0, sizeof(tbh)); 1312 1313 if (ISSET(mp->m_flags, M_PKTHDR)) { 1314 tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx; 1315 tbh.bh_flowid = mp->m_pkthdr.ph_flowid; 1316 tbh.bh_flags = mp->m_pkthdr.pf.prio; 1317 if (ISSET(mp->m_pkthdr.csum_flags, 1318 M_FLOWID)) 1319 SET(tbh.bh_flags, BPF_F_FLOWID); 1320 1321 m_microtime(mp, &tv); 1322 } else 1323 microtime(&tv); 1324 1325 tbh.bh_tstamp.tv_sec = tv.tv_sec; 1326 tbh.bh_tstamp.tv_usec = tv.tv_usec; 1327 SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT); 1328 1329 gothdr = 1; 1330 } 1331 1332 mtx_enter(&d->bd_mtx); 1333 bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh); 1334 mtx_leave(&d->bd_mtx); 1335 } 1336 } 1337 smr_read_leave(); 1338 1339 return (drop); 1340 } 1341 1342 /* 1343 * Incoming linkage from device drivers, where a data buffer should be 1344 * prepended by an arbitrary header. In this situation we already have a 1345 * way of representing a chain of memory buffers, ie, mbufs, so reuse 1346 * the existing functionality by attaching the buffers to mbufs. 1347 * 1348 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a 1349 * struct m_hdr each for the header and data on the stack. 1350 */ 1351 int 1352 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen, 1353 const void *buf, unsigned int buflen, u_int direction) 1354 { 1355 struct m_hdr mh, md; 1356 struct mbuf *m0 = NULL; 1357 struct mbuf **mp = &m0; 1358 1359 if (hdr != NULL) { 1360 mh.mh_flags = 0; 1361 mh.mh_next = NULL; 1362 mh.mh_len = hdrlen; 1363 mh.mh_data = (void *)hdr; 1364 1365 *mp = (struct mbuf *)&mh; 1366 mp = &mh.mh_next; 1367 } 1368 1369 if (buf != NULL) { 1370 md.mh_flags = 0; 1371 md.mh_next = NULL; 1372 md.mh_len = buflen; 1373 md.mh_data = (void *)buf; 1374 1375 *mp = (struct mbuf *)&md; 1376 } 1377 1378 return bpf_mtap(arg, m0, direction); 1379 } 1380 1381 /* 1382 * Incoming linkage from device drivers, where we have a mbuf chain 1383 * but need to prepend some arbitrary header from a linear buffer. 1384 * 1385 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1386 * struct m_hdr on the stack. This is safe as bpf only reads from the 1387 * fields in this header that we initialize, and will not try to free 1388 * it or keep a pointer to it. 1389 */ 1390 int 1391 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m, 1392 u_int direction) 1393 { 1394 struct m_hdr mh; 1395 const struct mbuf *m0; 1396 1397 if (dlen > 0) { 1398 mh.mh_flags = 0; 1399 mh.mh_next = (struct mbuf *)m; 1400 mh.mh_len = dlen; 1401 mh.mh_data = (void *)data; 1402 m0 = (struct mbuf *)&mh; 1403 } else 1404 m0 = m; 1405 1406 return _bpf_mtap(arg, m, m0, direction); 1407 } 1408 1409 /* 1410 * Incoming linkage from device drivers, where we have a mbuf chain 1411 * but need to prepend the address family. 1412 * 1413 * Con up a minimal dummy header to pacify bpf. We allocate (only) a 1414 * struct m_hdr on the stack. This is safe as bpf only reads from the 1415 * fields in this header that we initialize, and will not try to free 1416 * it or keep a pointer to it. 1417 */ 1418 int 1419 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction) 1420 { 1421 u_int32_t afh; 1422 1423 afh = htonl(af); 1424 1425 return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction); 1426 } 1427 1428 /* 1429 * Incoming linkage from device drivers, where we have a mbuf chain 1430 * but need to prepend a VLAN encapsulation header. 1431 * 1432 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1433 * struct m_hdr on the stack. This is safe as bpf only reads from the 1434 * fields in this header that we initialize, and will not try to free 1435 * it or keep a pointer to it. 1436 */ 1437 int 1438 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction) 1439 { 1440 #if NVLAN > 0 1441 struct ether_vlan_header evh; 1442 struct m_hdr mh, md; 1443 1444 if ((m->m_flags & M_VLANTAG) == 0) 1445 #endif 1446 { 1447 return _bpf_mtap(arg, m, m, direction); 1448 } 1449 1450 #if NVLAN > 0 1451 KASSERT(m->m_len >= ETHER_HDR_LEN); 1452 1453 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN); 1454 evh.evl_proto = evh.evl_encap_proto; 1455 evh.evl_encap_proto = htons(ETHERTYPE_VLAN); 1456 evh.evl_tag = htons(m->m_pkthdr.ether_vtag); 1457 1458 mh.mh_flags = 0; 1459 mh.mh_data = (caddr_t)&evh; 1460 mh.mh_len = sizeof(evh); 1461 mh.mh_next = (struct mbuf *)&md; 1462 1463 md.mh_flags = 0; 1464 md.mh_data = m->m_data + ETHER_HDR_LEN; 1465 md.mh_len = m->m_len - ETHER_HDR_LEN; 1466 md.mh_next = m->m_next; 1467 1468 return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction); 1469 #endif 1470 } 1471 1472 /* 1473 * Move the packet data from interface memory (pkt) into the 1474 * store buffer. Wake up listeners if needed. 1475 * "copy" is the routine called to do the actual data 1476 * transfer. bcopy is passed in to copy contiguous chunks, while 1477 * bpf_mcopy is passed in to copy mbuf chains. In the latter case, 1478 * pkt is really an mbuf. 1479 */ 1480 void 1481 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, 1482 const struct bpf_hdr *tbh) 1483 { 1484 struct bpf_hdr *bh; 1485 int totlen, curlen; 1486 int hdrlen, do_wakeup = 0; 1487 1488 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1489 if (d->bd_bif == NULL) 1490 return; 1491 1492 hdrlen = d->bd_bif->bif_hdrlen; 1493 1494 /* 1495 * Figure out how many bytes to move. If the packet is 1496 * greater or equal to the snapshot length, transfer that 1497 * much. Otherwise, transfer the whole packet (unless 1498 * we hit the buffer size limit). 1499 */ 1500 totlen = hdrlen + min(snaplen, pktlen); 1501 if (totlen > d->bd_bufsize) 1502 totlen = d->bd_bufsize; 1503 1504 /* 1505 * Round up the end of the previous packet to the next longword. 1506 */ 1507 curlen = BPF_WORDALIGN(d->bd_slen); 1508 if (curlen + totlen > d->bd_bufsize) { 1509 /* 1510 * This packet will overflow the storage buffer. 1511 * Rotate the buffers if we can, then wakeup any 1512 * pending reads. 1513 */ 1514 if (d->bd_fbuf == NULL) { 1515 /* 1516 * We haven't completed the previous read yet, 1517 * so drop the packet. 1518 */ 1519 ++d->bd_dcount; 1520 return; 1521 } 1522 ROTATE_BUFFERS(d); 1523 do_wakeup = 1; 1524 curlen = 0; 1525 } 1526 1527 /* 1528 * Append the bpf header. 1529 */ 1530 bh = (struct bpf_hdr *)(d->bd_sbuf + curlen); 1531 *bh = *tbh; 1532 bh->bh_datalen = pktlen; 1533 bh->bh_hdrlen = hdrlen; 1534 bh->bh_caplen = totlen - hdrlen; 1535 1536 /* 1537 * Copy the packet data into the store buffer and update its length. 1538 */ 1539 bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen); 1540 d->bd_slen = curlen + totlen; 1541 1542 if (d->bd_immediate) { 1543 /* 1544 * Immediate mode is set. A packet arrived so any 1545 * reads should be woken up. 1546 */ 1547 do_wakeup = 1; 1548 } 1549 1550 if (do_wakeup) 1551 bpf_wakeup(d); 1552 } 1553 1554 /* 1555 * Initialize all nonzero fields of a descriptor. 1556 */ 1557 int 1558 bpf_allocbufs(struct bpf_d *d) 1559 { 1560 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1561 1562 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1563 if (d->bd_fbuf == NULL) 1564 return (ENOMEM); 1565 1566 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1567 if (d->bd_sbuf == NULL) { 1568 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); 1569 return (ENOMEM); 1570 } 1571 1572 d->bd_slen = 0; 1573 d->bd_hlen = 0; 1574 1575 return (0); 1576 } 1577 1578 void 1579 bpf_prog_smr(void *bps_arg) 1580 { 1581 struct bpf_program_smr *bps = bps_arg; 1582 1583 free(bps->bps_bf.bf_insns, M_DEVBUF, 1584 bps->bps_bf.bf_len * sizeof(struct bpf_insn)); 1585 free(bps, M_DEVBUF, sizeof(struct bpf_program_smr)); 1586 } 1587 1588 void 1589 bpf_d_smr(void *smr) 1590 { 1591 struct bpf_d *bd = smr; 1592 1593 sigio_free(&bd->bd_sigio); 1594 free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize); 1595 free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize); 1596 free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize); 1597 1598 if (bd->bd_rfilter != NULL) 1599 bpf_prog_smr(bd->bd_rfilter); 1600 if (bd->bd_wfilter != NULL) 1601 bpf_prog_smr(bd->bd_wfilter); 1602 1603 free(bd, M_DEVBUF, sizeof(*bd)); 1604 } 1605 1606 void 1607 bpf_get(struct bpf_d *bd) 1608 { 1609 atomic_inc_int(&bd->bd_ref); 1610 } 1611 1612 /* 1613 * Free buffers currently in use by a descriptor 1614 * when the reference count drops to zero. 1615 */ 1616 void 1617 bpf_put(struct bpf_d *bd) 1618 { 1619 if (atomic_dec_int_nv(&bd->bd_ref) > 0) 1620 return; 1621 1622 smr_call(&bd->bd_smr, bpf_d_smr, bd); 1623 } 1624 1625 void * 1626 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen) 1627 { 1628 struct bpf_if *bp; 1629 1630 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL) 1631 panic("bpfattach"); 1632 SMR_SLIST_INIT(&bp->bif_dlist); 1633 bp->bif_driverp = (struct bpf_if **)bpfp; 1634 bp->bif_name = name; 1635 bp->bif_ifp = NULL; 1636 bp->bif_dlt = dlt; 1637 1638 bp->bif_next = bpf_iflist; 1639 bpf_iflist = bp; 1640 1641 *bp->bif_driverp = NULL; 1642 1643 /* 1644 * Compute the length of the bpf header. This is not necessarily 1645 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1646 * that the network layer header begins on a longword boundary (for 1647 * performance reasons and to alleviate alignment restrictions). 1648 */ 1649 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1650 1651 return (bp); 1652 } 1653 1654 void 1655 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen) 1656 { 1657 struct bpf_if *bp; 1658 1659 bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen); 1660 bp->bif_ifp = ifp; 1661 } 1662 1663 /* Detach an interface from its attached bpf device. */ 1664 void 1665 bpfdetach(struct ifnet *ifp) 1666 { 1667 struct bpf_if *bp, *nbp; 1668 1669 KERNEL_ASSERT_LOCKED(); 1670 1671 for (bp = bpf_iflist; bp; bp = nbp) { 1672 nbp = bp->bif_next; 1673 if (bp->bif_ifp == ifp) 1674 bpfsdetach(bp); 1675 } 1676 ifp->if_bpf = NULL; 1677 } 1678 1679 void 1680 bpfsdetach(void *p) 1681 { 1682 struct bpf_if *bp = p, *tbp; 1683 struct bpf_d *bd; 1684 int maj; 1685 1686 KERNEL_ASSERT_LOCKED(); 1687 1688 /* Locate the major number. */ 1689 for (maj = 0; maj < nchrdev; maj++) 1690 if (cdevsw[maj].d_open == bpfopen) 1691 break; 1692 1693 while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) { 1694 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR); 1695 klist_invalidate(&bd->bd_sel.si_note); 1696 } 1697 1698 for (tbp = bpf_iflist; tbp; tbp = tbp->bif_next) { 1699 if (tbp->bif_next == bp) { 1700 tbp->bif_next = bp->bif_next; 1701 break; 1702 } 1703 } 1704 1705 if (bpf_iflist == bp) 1706 bpf_iflist = bp->bif_next; 1707 1708 free(bp, M_DEVBUF, sizeof(*bp)); 1709 } 1710 1711 int 1712 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp, 1713 void *newp, size_t newlen) 1714 { 1715 switch (name[0]) { 1716 case NET_BPF_BUFSIZE: 1717 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1718 &bpf_bufsize, BPF_MINBUFSIZE, bpf_maxbufsize); 1719 case NET_BPF_MAXBUFSIZE: 1720 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1721 &bpf_maxbufsize, BPF_MINBUFSIZE, INT_MAX); 1722 default: 1723 return (EOPNOTSUPP); 1724 } 1725 } 1726 1727 int 1728 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 1729 size_t newlen) 1730 { 1731 int flags = RW_INTR; 1732 int error; 1733 1734 if (namelen != 1) 1735 return (ENOTDIR); 1736 1737 flags |= (newp == NULL) ? RW_READ : RW_WRITE; 1738 1739 error = rw_enter(&bpf_sysctl_lk, flags); 1740 if (error != 0) 1741 return (error); 1742 1743 error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen); 1744 1745 rw_exit(&bpf_sysctl_lk); 1746 1747 return (error); 1748 } 1749 1750 struct bpf_d * 1751 bpfilter_lookup(int unit) 1752 { 1753 struct bpf_d *bd; 1754 1755 KERNEL_ASSERT_LOCKED(); 1756 1757 LIST_FOREACH(bd, &bpf_d_list, bd_list) 1758 if (bd->bd_unit == unit) 1759 return (bd); 1760 return (NULL); 1761 } 1762 1763 /* 1764 * Get a list of available data link type of the interface. 1765 */ 1766 int 1767 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1768 { 1769 int n, error; 1770 struct bpf_if *bp; 1771 const char *name; 1772 1773 name = d->bd_bif->bif_name; 1774 n = 0; 1775 error = 0; 1776 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1777 if (strcmp(name, bp->bif_name) != 0) 1778 continue; 1779 if (bfl->bfl_list != NULL) { 1780 if (n >= bfl->bfl_len) 1781 return (ENOMEM); 1782 error = copyout(&bp->bif_dlt, 1783 bfl->bfl_list + n, sizeof(u_int)); 1784 if (error) 1785 break; 1786 } 1787 n++; 1788 } 1789 1790 bfl->bfl_len = n; 1791 return (error); 1792 } 1793 1794 /* 1795 * Set the data link type of a BPF instance. 1796 */ 1797 int 1798 bpf_setdlt(struct bpf_d *d, u_int dlt) 1799 { 1800 const char *name; 1801 struct bpf_if *bp; 1802 1803 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1804 if (d->bd_bif->bif_dlt == dlt) 1805 return (0); 1806 name = d->bd_bif->bif_name; 1807 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) { 1808 if (strcmp(name, bp->bif_name) != 0) 1809 continue; 1810 if (bp->bif_dlt == dlt) 1811 break; 1812 } 1813 if (bp == NULL) 1814 return (EINVAL); 1815 bpf_detachd(d); 1816 bpf_attachd(d, bp); 1817 bpf_resetd(d); 1818 return (0); 1819 } 1820 1821 u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *); 1822 u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *); 1823 u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *); 1824 1825 int bpf_mbuf_copy(const struct mbuf *, u_int32_t, 1826 void *, u_int32_t); 1827 1828 const struct bpf_ops bpf_mbuf_ops = { 1829 bpf_mbuf_ldw, 1830 bpf_mbuf_ldh, 1831 bpf_mbuf_ldb, 1832 }; 1833 1834 int 1835 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len) 1836 { 1837 u_int8_t *cp = buf; 1838 u_int32_t count; 1839 1840 while (off >= m->m_len) { 1841 off -= m->m_len; 1842 1843 m = m->m_next; 1844 if (m == NULL) 1845 return (-1); 1846 } 1847 1848 for (;;) { 1849 count = min(m->m_len - off, len); 1850 1851 memcpy(cp, m->m_data + off, count); 1852 len -= count; 1853 1854 if (len == 0) 1855 return (0); 1856 1857 m = m->m_next; 1858 if (m == NULL) 1859 break; 1860 1861 cp += count; 1862 off = 0; 1863 } 1864 1865 return (-1); 1866 } 1867 1868 u_int32_t 1869 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err) 1870 { 1871 u_int32_t v; 1872 1873 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1874 *err = 1; 1875 return (0); 1876 } 1877 1878 *err = 0; 1879 return ntohl(v); 1880 } 1881 1882 u_int32_t 1883 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err) 1884 { 1885 u_int16_t v; 1886 1887 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1888 *err = 1; 1889 return (0); 1890 } 1891 1892 *err = 0; 1893 return ntohs(v); 1894 } 1895 1896 u_int32_t 1897 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err) 1898 { 1899 const struct mbuf *m = m0; 1900 u_int8_t v; 1901 1902 while (k >= m->m_len) { 1903 k -= m->m_len; 1904 1905 m = m->m_next; 1906 if (m == NULL) { 1907 *err = 1; 1908 return (0); 1909 } 1910 } 1911 v = m->m_data[k]; 1912 1913 *err = 0; 1914 return v; 1915 } 1916 1917 u_int 1918 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen) 1919 { 1920 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen); 1921 } 1922