10a0e9771SDarren Reed /* $NetBSD: bpf.c,v 1.143 2009/03/11 05:55:22 mrg Exp $ */ 20a0e9771SDarren Reed 30a0e9771SDarren Reed /* 40a0e9771SDarren Reed * Copyright (c) 1990, 1991, 1993 50a0e9771SDarren Reed * The Regents of the University of California. All rights reserved. 60a0e9771SDarren Reed * 70a0e9771SDarren Reed * This code is derived from the Stanford/CMU enet packet filter, 80a0e9771SDarren Reed * (net/enet.c) distributed as part of 4.3BSD, and code contributed 90a0e9771SDarren Reed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 100a0e9771SDarren Reed * Berkeley Laboratory. 110a0e9771SDarren Reed * 120a0e9771SDarren Reed * Redistribution and use in source and binary forms, with or without 130a0e9771SDarren Reed * modification, are permitted provided that the following conditions 140a0e9771SDarren Reed * are met: 150a0e9771SDarren Reed * 1. Redistributions of source code must retain the above copyright 160a0e9771SDarren Reed * notice, this list of conditions and the following disclaimer. 170a0e9771SDarren Reed * 2. Redistributions in binary form must reproduce the above copyright 180a0e9771SDarren Reed * notice, this list of conditions and the following disclaimer in the 190a0e9771SDarren Reed * documentation and/or other materials provided with the distribution. 200a0e9771SDarren Reed * 3. Neither the name of the University nor the names of its contributors 210a0e9771SDarren Reed * may be used to endorse or promote products derived from this software 220a0e9771SDarren Reed * without specific prior written permission. 230a0e9771SDarren Reed * 240a0e9771SDarren Reed * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 250a0e9771SDarren Reed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 260a0e9771SDarren Reed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 270a0e9771SDarren Reed * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 280a0e9771SDarren Reed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 290a0e9771SDarren Reed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 300a0e9771SDarren Reed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 310a0e9771SDarren Reed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 320a0e9771SDarren Reed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 330a0e9771SDarren Reed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 340a0e9771SDarren Reed * SUCH DAMAGE. 350a0e9771SDarren Reed * 360a0e9771SDarren Reed * @(#)bpf.c 8.4 (Berkeley) 1/9/95 370a0e9771SDarren Reed * static char rcsid[] = 380a0e9771SDarren Reed * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp "; 390a0e9771SDarren Reed */ 400a0e9771SDarren Reed /* 410a0e9771SDarren Reed * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 420a0e9771SDarren Reed * Use is subject to license terms. 4380d5689fSPatrick Mooney * Copyright 2017 Joyent, Inc. 440a0e9771SDarren Reed */ 450a0e9771SDarren Reed 460a0e9771SDarren Reed /* 470a0e9771SDarren Reed * The BPF implements the following access controls for zones attempting 480a0e9771SDarren Reed * to read and write data. Writing of data requires that the net_rawaccess 490a0e9771SDarren Reed * privilege is held whilst reading data requires either net_rawaccess or 500a0e9771SDarren Reed * net_observerability. 510a0e9771SDarren Reed * 520a0e9771SDarren Reed * | Shared | Exclusive | Global 530a0e9771SDarren Reed * -----------------------------+--------+------------+------------+ 540a0e9771SDarren Reed * DLT_IPNET in local zone | Read | Read | Read | 550a0e9771SDarren Reed * -----------------------------+--------+------------+------------+ 560a0e9771SDarren Reed * Raw access to local zone NIC | None | Read/Write | Read/Write | 570a0e9771SDarren Reed * -----------------------------+--------+------------+------------+ 580a0e9771SDarren Reed * Raw access to all NICs | None | None | Read/Write | 590a0e9771SDarren Reed * -----------------------------+--------+------------+------------+ 600a0e9771SDarren Reed * 610a0e9771SDarren Reed * The BPF driver is written as a cloning driver: each call to bpfopen() 620a0e9771SDarren Reed * allocates a new minor number. This provides BPF with a 1:1 relationship 630a0e9771SDarren Reed * between open's and close's. There is some amount of "descriptor state" 640a0e9771SDarren Reed * that is kept per open. Pointers to this data are stored in a hash table 650a0e9771SDarren Reed * (bpf_hash) that is index'd by the minor device number for each open file. 660a0e9771SDarren Reed */ 670a0e9771SDarren Reed #include <sys/param.h> 680a0e9771SDarren Reed #include <sys/systm.h> 690a0e9771SDarren Reed #include <sys/time.h> 700a0e9771SDarren Reed #include <sys/ioctl.h> 710a0e9771SDarren Reed #include <sys/queue.h> 720a0e9771SDarren Reed #include <sys/filio.h> 730a0e9771SDarren Reed #include <sys/policy.h> 740a0e9771SDarren Reed #include <sys/cmn_err.h> 750a0e9771SDarren Reed #include <sys/uio.h> 760a0e9771SDarren Reed #include <sys/file.h> 770a0e9771SDarren Reed #include <sys/sysmacros.h> 780a0e9771SDarren Reed #include <sys/zone.h> 790a0e9771SDarren Reed 800a0e9771SDarren Reed #include <sys/socket.h> 810a0e9771SDarren Reed #include <sys/errno.h> 820a0e9771SDarren Reed #include <sys/poll.h> 830a0e9771SDarren Reed #include <sys/dlpi.h> 840a0e9771SDarren Reed #include <sys/neti.h> 850a0e9771SDarren Reed 860a0e9771SDarren Reed #include <net/if.h> 870a0e9771SDarren Reed 880a0e9771SDarren Reed #include <net/bpf.h> 890a0e9771SDarren Reed #include <net/bpfdesc.h> 900a0e9771SDarren Reed #include <net/dlt.h> 910a0e9771SDarren Reed 920a0e9771SDarren Reed #include <netinet/in.h> 930a0e9771SDarren Reed #include <sys/mac.h> 940a0e9771SDarren Reed #include <sys/mac_client.h> 950a0e9771SDarren Reed #include <sys/mac_impl.h> 960a0e9771SDarren Reed #include <sys/time_std_impl.h> 970a0e9771SDarren Reed #include <sys/hook.h> 980a0e9771SDarren Reed #include <sys/hook_event.h> 990a0e9771SDarren Reed 1000a0e9771SDarren Reed 1010a0e9771SDarren Reed #define mtod(_v, _t) (_t)((_v)->b_rptr) 1020a0e9771SDarren Reed #define M_LEN(_m) ((_m)->b_wptr - (_m)->b_rptr) 1030a0e9771SDarren Reed 1040a0e9771SDarren Reed /* 1050a0e9771SDarren Reed * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet 1060a0e9771SDarren Reed * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k). 1070a0e9771SDarren Reed */ 1080a0e9771SDarren Reed #define BPF_BUFSIZE (32 * 1024) 1090a0e9771SDarren Reed 1100a0e9771SDarren Reed typedef void *(*cp_fn_t)(void *, const void *, size_t); 1110a0e9771SDarren Reed 1120a0e9771SDarren Reed /* 1130a0e9771SDarren Reed * The default read buffer size, and limit for BIOCSBLEN. 1140a0e9771SDarren Reed */ 1150a0e9771SDarren Reed int bpf_bufsize = BPF_BUFSIZE; 1160a0e9771SDarren Reed int bpf_maxbufsize = (16 * 1024 * 1024); 117b7ea883bSDarren Reed static mod_hash_t *bpf_hash = NULL; 1180a0e9771SDarren Reed 1190a0e9771SDarren Reed /* 1200a0e9771SDarren Reed * Use a mutex to avoid a race condition between gathering the stats/peers 1210a0e9771SDarren Reed * and opening/closing the device. 1220a0e9771SDarren Reed */ 1230a0e9771SDarren Reed static kcondvar_t bpf_dlt_waiter; 1240a0e9771SDarren Reed static kmutex_t bpf_mtx; 1250a0e9771SDarren Reed static bpf_kstats_t ks_stats; 1260a0e9771SDarren Reed static bpf_kstats_t bpf_kstats = { 1270a0e9771SDarren Reed { "readWait", KSTAT_DATA_UINT64 }, 1280a0e9771SDarren Reed { "writeOk", KSTAT_DATA_UINT64 }, 1290a0e9771SDarren Reed { "writeError", KSTAT_DATA_UINT64 }, 1300a0e9771SDarren Reed { "receive", KSTAT_DATA_UINT64 }, 1310a0e9771SDarren Reed { "captured", KSTAT_DATA_UINT64 }, 1320a0e9771SDarren Reed { "dropped", KSTAT_DATA_UINT64 }, 1330a0e9771SDarren Reed }; 1340a0e9771SDarren Reed static kstat_t *bpf_ksp; 1350a0e9771SDarren Reed 1360a0e9771SDarren Reed /* 137b7ea883bSDarren Reed * bpf_list is a list of the BPF descriptors currently open 1380a0e9771SDarren Reed */ 1390a0e9771SDarren Reed LIST_HEAD(, bpf_d) bpf_list; 1400a0e9771SDarren Reed 1410a0e9771SDarren Reed static int bpf_allocbufs(struct bpf_d *); 1420a0e9771SDarren Reed static void bpf_clear_timeout(struct bpf_d *); 1430a0e9771SDarren Reed static void bpf_deliver(struct bpf_d *, cp_fn_t, 1440a0e9771SDarren Reed void *, uint_t, uint_t, boolean_t); 1450a0e9771SDarren Reed static void bpf_freed(struct bpf_d *); 1460a0e9771SDarren Reed static int bpf_ifname(struct bpf_d *d, char *, int); 1470a0e9771SDarren Reed static void *bpf_mcpy(void *, const void *, size_t); 148b7ea883bSDarren Reed static int bpf_attachd(struct bpf_d *, const char *, int); 1490a0e9771SDarren Reed static void bpf_detachd(struct bpf_d *); 1500a0e9771SDarren Reed static int bpf_setif(struct bpf_d *, char *, int); 1510a0e9771SDarren Reed static void bpf_timed_out(void *); 1520a0e9771SDarren Reed static inline void 1530a0e9771SDarren Reed bpf_wakeup(struct bpf_d *); 1540a0e9771SDarren Reed static void catchpacket(struct bpf_d *, uchar_t *, uint_t, uint_t, 1550a0e9771SDarren Reed cp_fn_t, struct timeval *); 1560a0e9771SDarren Reed static void reset_d(struct bpf_d *); 1570a0e9771SDarren Reed static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 1580a0e9771SDarren Reed static int bpf_setdlt(struct bpf_d *, void *); 1590a0e9771SDarren Reed static void bpf_dev_add(struct bpf_d *); 1600a0e9771SDarren Reed static struct bpf_d *bpf_dev_find(minor_t); 1610a0e9771SDarren Reed static struct bpf_d *bpf_dev_get(minor_t); 1620a0e9771SDarren Reed static void bpf_dev_remove(struct bpf_d *); 1630a0e9771SDarren Reed 1640a0e9771SDarren Reed static int 1650a0e9771SDarren Reed bpf_movein(struct uio *uio, int linktype, int mtu, mblk_t **mp) 1660a0e9771SDarren Reed { 1670a0e9771SDarren Reed mblk_t *m; 1680a0e9771SDarren Reed int error; 1690a0e9771SDarren Reed int len; 1700a0e9771SDarren Reed int hlen; 1710a0e9771SDarren Reed int align; 1720a0e9771SDarren Reed 1730a0e9771SDarren Reed /* 1740a0e9771SDarren Reed * Build a sockaddr based on the data link layer type. 1750a0e9771SDarren Reed * We do this at this level because the ethernet header 1760a0e9771SDarren Reed * is copied directly into the data field of the sockaddr. 1770a0e9771SDarren Reed * In the case of SLIP, there is no header and the packet 1780a0e9771SDarren Reed * is forwarded as is. 1790a0e9771SDarren Reed * Also, we are careful to leave room at the front of the mbuf 1800a0e9771SDarren Reed * for the link level header. 1810a0e9771SDarren Reed */ 1820a0e9771SDarren Reed switch (linktype) { 1830a0e9771SDarren Reed 1840a0e9771SDarren Reed case DLT_EN10MB: 1850a0e9771SDarren Reed hlen = sizeof (struct ether_header); 1860a0e9771SDarren Reed break; 1870a0e9771SDarren Reed 1880a0e9771SDarren Reed case DLT_FDDI: 1890a0e9771SDarren Reed hlen = 16; 1900a0e9771SDarren Reed break; 1910a0e9771SDarren Reed 1920a0e9771SDarren Reed case DLT_NULL: 1930a0e9771SDarren Reed hlen = 0; 1940a0e9771SDarren Reed break; 1950a0e9771SDarren Reed 1960a0e9771SDarren Reed case DLT_IPOIB: 1970a0e9771SDarren Reed hlen = 44; 1980a0e9771SDarren Reed break; 1990a0e9771SDarren Reed 2000a0e9771SDarren Reed default: 2010a0e9771SDarren Reed return (EIO); 2020a0e9771SDarren Reed } 2030a0e9771SDarren Reed 2040a0e9771SDarren Reed align = 4 - (hlen & 3); 2050a0e9771SDarren Reed 2060a0e9771SDarren Reed len = uio->uio_resid; 2070a0e9771SDarren Reed /* 2080a0e9771SDarren Reed * If there aren't enough bytes for a link level header or the 2090a0e9771SDarren Reed * packet length exceeds the interface mtu, return an error. 2100a0e9771SDarren Reed */ 2110a0e9771SDarren Reed if (len < hlen || len - hlen > mtu) 2120a0e9771SDarren Reed return (EMSGSIZE); 2130a0e9771SDarren Reed 2140a0e9771SDarren Reed m = allocb(len + align, BPRI_MED); 2150a0e9771SDarren Reed if (m == NULL) { 2160a0e9771SDarren Reed error = ENOBUFS; 2170a0e9771SDarren Reed goto bad; 2180a0e9771SDarren Reed } 2190a0e9771SDarren Reed 2200a0e9771SDarren Reed /* Insure the data is properly aligned */ 2210a0e9771SDarren Reed if (align > 0) 2220a0e9771SDarren Reed m->b_rptr += align; 2230a0e9771SDarren Reed m->b_wptr = m->b_rptr + len; 2240a0e9771SDarren Reed 2250a0e9771SDarren Reed error = uiomove(mtod(m, void *), len, UIO_WRITE, uio); 2260a0e9771SDarren Reed if (error) 2270a0e9771SDarren Reed goto bad; 2280a0e9771SDarren Reed *mp = m; 2290a0e9771SDarren Reed return (0); 2300a0e9771SDarren Reed 2310a0e9771SDarren Reed bad: 2320a0e9771SDarren Reed if (m != NULL) 2330a0e9771SDarren Reed freemsg(m); 2340a0e9771SDarren Reed return (error); 2350a0e9771SDarren Reed } 2360a0e9771SDarren Reed 2370a0e9771SDarren Reed 2380a0e9771SDarren Reed /* 2390a0e9771SDarren Reed * Attach file to the bpf interface, i.e. make d listen on bp. 2400a0e9771SDarren Reed */ 241b7ea883bSDarren Reed static int 242b7ea883bSDarren Reed bpf_attachd(struct bpf_d *d, const char *ifname, int dlt) 2430a0e9771SDarren Reed { 244b7ea883bSDarren Reed bpf_provider_list_t *bp; 245b7ea883bSDarren Reed bpf_provider_t *bpr; 246b7ea883bSDarren Reed boolean_t zonematch; 247b7ea883bSDarren Reed zoneid_t niczone; 248b7ea883bSDarren Reed uintptr_t mcip; 249b7ea883bSDarren Reed zoneid_t zone; 250b7ea883bSDarren Reed uint_t nicdlt; 251b7ea883bSDarren Reed uintptr_t mh; 252b7ea883bSDarren Reed int hdrlen; 253b7ea883bSDarren Reed int error; 2540a0e9771SDarren Reed 255*359e2726SToomas Soome ASSERT(d->bd_bif == (uintptr_t)NULL); 256*359e2726SToomas Soome ASSERT(d->bd_mcip == (uintptr_t)NULL); 257b7ea883bSDarren Reed zone = d->bd_zone; 258b7ea883bSDarren Reed zonematch = B_TRUE; 259b7ea883bSDarren Reed again: 260b7ea883bSDarren Reed mh = 0; 261b7ea883bSDarren Reed mcip = 0; 262b7ea883bSDarren Reed LIST_FOREACH(bp, &bpf_providers, bpl_next) { 263b7ea883bSDarren Reed bpr = bp->bpl_what; 264b7ea883bSDarren Reed error = MBPF_OPEN(bpr, ifname, &mh, zone); 265b7ea883bSDarren Reed if (error != 0) 266b7ea883bSDarren Reed goto next; 267b7ea883bSDarren Reed error = MBPF_CLIENT_OPEN(bpr, mh, &mcip); 268b7ea883bSDarren Reed if (error != 0) 269b7ea883bSDarren Reed goto next; 270b7ea883bSDarren Reed error = MBPF_GET_DLT(bpr, mh, &nicdlt); 271b7ea883bSDarren Reed if (error != 0) 272b7ea883bSDarren Reed goto next; 2730a0e9771SDarren Reed 274b7ea883bSDarren Reed nicdlt = bpf_dl_to_dlt(nicdlt); 275b7ea883bSDarren Reed if (dlt != -1 && dlt != nicdlt) { 276b7ea883bSDarren Reed error = ENOENT; 277b7ea883bSDarren Reed goto next; 278b7ea883bSDarren Reed } 279b7ea883bSDarren Reed 280b7ea883bSDarren Reed error = MBPF_GET_ZONE(bpr, mh, &niczone); 281b7ea883bSDarren Reed if (error != 0) 282b7ea883bSDarren Reed goto next; 283b7ea883bSDarren Reed 284b7ea883bSDarren Reed DTRACE_PROBE4(bpf__attach, struct bpf_provider_s *, bpr, 285b7ea883bSDarren Reed uintptr_t, mh, int, nicdlt, zoneid_t, niczone); 286b7ea883bSDarren Reed 287b7ea883bSDarren Reed if (zonematch && niczone != zone) { 288b7ea883bSDarren Reed error = ENOENT; 289b7ea883bSDarren Reed goto next; 290b7ea883bSDarren Reed } 291b7ea883bSDarren Reed break; 292b7ea883bSDarren Reed next: 293b7ea883bSDarren Reed if (mcip != 0) { 294b7ea883bSDarren Reed MBPF_CLIENT_CLOSE(bpr, mcip); 295b7ea883bSDarren Reed mcip = 0; 296b7ea883bSDarren Reed } 297a3f59f6eSToomas Soome if (mh != 0) { 298b7ea883bSDarren Reed MBPF_CLOSE(bpr, mh); 299b7ea883bSDarren Reed mh = 0; 300b7ea883bSDarren Reed } 301b7ea883bSDarren Reed } 302b7ea883bSDarren Reed if (error != 0) { 303b7ea883bSDarren Reed if (zonematch && (zone == GLOBAL_ZONEID)) { 304b7ea883bSDarren Reed /* 305b7ea883bSDarren Reed * If we failed to do an exact match for the global 306b7ea883bSDarren Reed * zone using the global zoneid, try again in case 307b7ea883bSDarren Reed * the network interface is owned by a local zone. 308b7ea883bSDarren Reed */ 309b7ea883bSDarren Reed zonematch = B_FALSE; 310b7ea883bSDarren Reed goto again; 311b7ea883bSDarren Reed } 312b7ea883bSDarren Reed return (error); 313b7ea883bSDarren Reed } 314b7ea883bSDarren Reed 315b7ea883bSDarren Reed d->bd_mac = *bpr; 316b7ea883bSDarren Reed d->bd_mcip = mcip; 317b7ea883bSDarren Reed d->bd_bif = mh; 318b7ea883bSDarren Reed d->bd_dlt = nicdlt; 319b7ea883bSDarren Reed hdrlen = bpf_dl_hdrsize(nicdlt); 320b7ea883bSDarren Reed d->bd_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 321b7ea883bSDarren Reed 322b7ea883bSDarren Reed (void) strlcpy(d->bd_ifname, MBPF_CLIENT_NAME(&d->bd_mac, mcip), 323b7ea883bSDarren Reed sizeof (d->bd_ifname)); 324b7ea883bSDarren Reed 325b7ea883bSDarren Reed (void) MBPF_GET_LINKID(&d->bd_mac, d->bd_ifname, &d->bd_linkid, 326b7ea883bSDarren Reed zone); 327b7ea883bSDarren Reed (void) MBPF_PROMISC_ADD(&d->bd_mac, d->bd_mcip, 0, d, 3280a0e9771SDarren Reed &d->bd_promisc_handle, d->bd_promisc_flags); 329b7ea883bSDarren Reed return (0); 3300a0e9771SDarren Reed } 3310a0e9771SDarren Reed 3320a0e9771SDarren Reed /* 3330a0e9771SDarren Reed * Detach a file from its interface. 3340a0e9771SDarren Reed */ 3350a0e9771SDarren Reed static void 3360a0e9771SDarren Reed bpf_detachd(struct bpf_d *d) 3370a0e9771SDarren Reed { 3380a0e9771SDarren Reed uintptr_t mph; 3390a0e9771SDarren Reed uintptr_t mch; 340b7ea883bSDarren Reed uintptr_t mh; 3410a0e9771SDarren Reed 342b7ea883bSDarren Reed ASSERT(d->bd_inuse == -1); 3430a0e9771SDarren Reed mch = d->bd_mcip; 3440a0e9771SDarren Reed d->bd_mcip = 0; 345b7ea883bSDarren Reed mh = d->bd_bif; 346b7ea883bSDarren Reed d->bd_bif = 0; 3470a0e9771SDarren Reed 3480a0e9771SDarren Reed /* 3490a0e9771SDarren Reed * Check if this descriptor had requested promiscuous mode. 3500a0e9771SDarren Reed * If so, turn it off. There's no need to take any action 3510a0e9771SDarren Reed * here, that is done when MBPF_PROMISC_REMOVE is used; 3520a0e9771SDarren Reed * bd_promisc is just a local flag to stop promiscuous mode 3530a0e9771SDarren Reed * from being set more than once. 3540a0e9771SDarren Reed */ 3550a0e9771SDarren Reed if (d->bd_promisc) 3560a0e9771SDarren Reed d->bd_promisc = 0; 3570a0e9771SDarren Reed 3580a0e9771SDarren Reed /* 3590a0e9771SDarren Reed * Take device out of "promiscuous" mode. Since we were able to 3600a0e9771SDarren Reed * enter "promiscuous" mode, we should be able to turn it off. 3610a0e9771SDarren Reed * Note, this field stores a pointer used to support both 3620a0e9771SDarren Reed * promiscuous and non-promiscuous callbacks for packets. 3630a0e9771SDarren Reed */ 3640a0e9771SDarren Reed mph = d->bd_promisc_handle; 3650a0e9771SDarren Reed d->bd_promisc_handle = 0; 3660a0e9771SDarren Reed 3670a0e9771SDarren Reed /* 3680a0e9771SDarren Reed * The lock has to be dropped here because mac_promisc_remove may 3690a0e9771SDarren Reed * need to wait for mac_promisc_dispatch, which has called into 3700a0e9771SDarren Reed * bpf and catchpacket is waiting for bd_lock... 3710a0e9771SDarren Reed * i.e mac_promisc_remove() needs to be called with none of the 3720a0e9771SDarren Reed * locks held that are part of the bpf_mtap() call path. 3730a0e9771SDarren Reed */ 3740a0e9771SDarren Reed mutex_exit(&d->bd_lock); 3750a0e9771SDarren Reed if (mph != 0) 376b7ea883bSDarren Reed MBPF_PROMISC_REMOVE(&d->bd_mac, mph); 3770a0e9771SDarren Reed 3780a0e9771SDarren Reed if (mch != 0) 379b7ea883bSDarren Reed MBPF_CLIENT_CLOSE(&d->bd_mac, mch); 3800a0e9771SDarren Reed 381b7ea883bSDarren Reed if (mh != 0) 382b7ea883bSDarren Reed MBPF_CLOSE(&d->bd_mac, mh); 3830a0e9771SDarren Reed 3840a0e9771SDarren Reed /* 3850a0e9771SDarren Reed * Because this function is called with bd_lock held, so it must 3860a0e9771SDarren Reed * exit with it held. 3870a0e9771SDarren Reed */ 3880a0e9771SDarren Reed mutex_enter(&d->bd_lock); 389b7ea883bSDarren Reed *d->bd_ifname = '\0'; 390b50f3686SDarren Reed (void) memset(&d->bd_mac, 0, sizeof (d->bd_mac)); 3910a0e9771SDarren Reed } 3920a0e9771SDarren Reed 3930a0e9771SDarren Reed 3940a0e9771SDarren Reed /* 3950a0e9771SDarren Reed * bpfilterattach() is called at load time. 3960a0e9771SDarren Reed */ 3970a0e9771SDarren Reed int 3980a0e9771SDarren Reed bpfilterattach(void) 3990a0e9771SDarren Reed { 4000a0e9771SDarren Reed 4010a0e9771SDarren Reed bpf_hash = mod_hash_create_idhash("bpf_dev_tab", 31, 4020a0e9771SDarren Reed mod_hash_null_keydtor); 4030a0e9771SDarren Reed if (bpf_hash == NULL) 4040a0e9771SDarren Reed return (ENOMEM); 4050a0e9771SDarren Reed 4060a0e9771SDarren Reed (void) memcpy(&ks_stats, &bpf_kstats, sizeof (bpf_kstats)); 4070a0e9771SDarren Reed 4080a0e9771SDarren Reed bpf_ksp = kstat_create("bpf", 0, "global", "misc", 4090a0e9771SDarren Reed KSTAT_TYPE_NAMED, sizeof (bpf_kstats) / sizeof (kstat_named_t), 4100a0e9771SDarren Reed KSTAT_FLAG_VIRTUAL); 4110a0e9771SDarren Reed if (bpf_ksp != NULL) { 4120a0e9771SDarren Reed bpf_ksp->ks_data = &ks_stats; 4130a0e9771SDarren Reed kstat_install(bpf_ksp); 4140a0e9771SDarren Reed } else { 4150a0e9771SDarren Reed mod_hash_destroy_idhash(bpf_hash); 4160a0e9771SDarren Reed bpf_hash = NULL; 4170a0e9771SDarren Reed return (EEXIST); 4180a0e9771SDarren Reed } 4190a0e9771SDarren Reed 4200a0e9771SDarren Reed cv_init(&bpf_dlt_waiter, NULL, CV_DRIVER, NULL); 4210a0e9771SDarren Reed mutex_init(&bpf_mtx, NULL, MUTEX_DRIVER, NULL); 4220a0e9771SDarren Reed 4230a0e9771SDarren Reed LIST_INIT(&bpf_list); 4240a0e9771SDarren Reed 4250a0e9771SDarren Reed return (0); 4260a0e9771SDarren Reed } 4270a0e9771SDarren Reed 4280a0e9771SDarren Reed 4290a0e9771SDarren Reed /* 4300a0e9771SDarren Reed * bpfilterdetach() is called at unload time. 4310a0e9771SDarren Reed */ 4320a0e9771SDarren Reed int 4330a0e9771SDarren Reed bpfilterdetach(void) 4340a0e9771SDarren Reed { 4350a0e9771SDarren Reed 4360a0e9771SDarren Reed if (bpf_ksp != NULL) { 4370a0e9771SDarren Reed kstat_delete(bpf_ksp); 4380a0e9771SDarren Reed bpf_ksp = NULL; 4390a0e9771SDarren Reed } 4400a0e9771SDarren Reed 4410a0e9771SDarren Reed mod_hash_destroy_idhash(bpf_hash); 4420a0e9771SDarren Reed bpf_hash = NULL; 4430a0e9771SDarren Reed 4440a0e9771SDarren Reed cv_destroy(&bpf_dlt_waiter); 4450a0e9771SDarren Reed mutex_destroy(&bpf_mtx); 4460a0e9771SDarren Reed 4470a0e9771SDarren Reed return (0); 4480a0e9771SDarren Reed } 4490a0e9771SDarren Reed 4500a0e9771SDarren Reed /* 4510a0e9771SDarren Reed * Open ethernet device. Clones. 4520a0e9771SDarren Reed */ 4530a0e9771SDarren Reed /* ARGSUSED */ 4540a0e9771SDarren Reed int 4550a0e9771SDarren Reed bpfopen(dev_t *devp, int flag, int mode, cred_t *cred) 4560a0e9771SDarren Reed { 4570a0e9771SDarren Reed struct bpf_d *d; 4580a0e9771SDarren Reed uint_t dmin; 4590a0e9771SDarren Reed 4600a0e9771SDarren Reed /* 4610a0e9771SDarren Reed * The security policy described at the top of this file is 4620a0e9771SDarren Reed * enforced here. 4630a0e9771SDarren Reed */ 4640a0e9771SDarren Reed if ((flag & FWRITE) != 0) { 4650a0e9771SDarren Reed if (secpolicy_net_rawaccess(cred) != 0) 4660a0e9771SDarren Reed return (EACCES); 4670a0e9771SDarren Reed } 4680a0e9771SDarren Reed 4690a0e9771SDarren Reed if ((flag & FREAD) != 0) { 4700a0e9771SDarren Reed if ((secpolicy_net_observability(cred) != 0) && 4710a0e9771SDarren Reed (secpolicy_net_rawaccess(cred) != 0)) 4720a0e9771SDarren Reed return (EACCES); 4730a0e9771SDarren Reed } 4740a0e9771SDarren Reed 4750a0e9771SDarren Reed if ((flag & (FWRITE|FREAD)) == 0) 4760a0e9771SDarren Reed return (ENXIO); 4770a0e9771SDarren Reed 4780a0e9771SDarren Reed /* 4790a0e9771SDarren Reed * A structure is allocated per open file in BPF to store settings 4800a0e9771SDarren Reed * such as buffer capture size, provide private buffers, etc. 4810a0e9771SDarren Reed */ 4820a0e9771SDarren Reed d = (struct bpf_d *)kmem_zalloc(sizeof (*d), KM_SLEEP); 4830a0e9771SDarren Reed d->bd_bufsize = bpf_bufsize; 4840a0e9771SDarren Reed d->bd_fmode = flag; 4850a0e9771SDarren Reed d->bd_zone = crgetzoneid(cred); 4860a0e9771SDarren Reed d->bd_seesent = 1; 4870a0e9771SDarren Reed d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_PHYS| 4880a0e9771SDarren Reed MAC_PROMISC_FLAGS_NO_COPY; 4890a0e9771SDarren Reed mutex_init(&d->bd_lock, NULL, MUTEX_DRIVER, NULL); 4900a0e9771SDarren Reed cv_init(&d->bd_wait, NULL, CV_DRIVER, NULL); 4910a0e9771SDarren Reed 4920a0e9771SDarren Reed mutex_enter(&bpf_mtx); 4930a0e9771SDarren Reed /* 4940a0e9771SDarren Reed * Find an unused minor number. Obviously this is an O(n) algorithm 4950a0e9771SDarren Reed * and doesn't scale particularly well, so if there are large numbers 4960a0e9771SDarren Reed * of open file descriptors happening in real use, this design may 4970a0e9771SDarren Reed * need to be revisited. 4980a0e9771SDarren Reed */ 4990a0e9771SDarren Reed for (dmin = 0; dmin < L_MAXMIN; dmin++) 5000a0e9771SDarren Reed if (bpf_dev_find(dmin) == NULL) 5010a0e9771SDarren Reed break; 5020a0e9771SDarren Reed if (dmin == L_MAXMIN) { 5030a0e9771SDarren Reed mutex_exit(&bpf_mtx); 5040a0e9771SDarren Reed kmem_free(d, sizeof (*d)); 5050a0e9771SDarren Reed return (ENXIO); 5060a0e9771SDarren Reed } 5070a0e9771SDarren Reed d->bd_dev = dmin; 5080a0e9771SDarren Reed LIST_INSERT_HEAD(&bpf_list, d, bd_list); 5090a0e9771SDarren Reed bpf_dev_add(d); 5100a0e9771SDarren Reed mutex_exit(&bpf_mtx); 5110a0e9771SDarren Reed 5120a0e9771SDarren Reed *devp = makedevice(getmajor(*devp), dmin); 5130a0e9771SDarren Reed 5140a0e9771SDarren Reed return (0); 5150a0e9771SDarren Reed } 5160a0e9771SDarren Reed 5170a0e9771SDarren Reed /* 5180a0e9771SDarren Reed * Close the descriptor by detaching it from its interface, 5190a0e9771SDarren Reed * deallocating its buffers, and marking it free. 5200a0e9771SDarren Reed * 5210a0e9771SDarren Reed * Because we only allow a device to be opened once, there is always a 5220a0e9771SDarren Reed * 1 to 1 relationship between opens and closes supporting this function. 5230a0e9771SDarren Reed */ 5240a0e9771SDarren Reed /* ARGSUSED */ 5250a0e9771SDarren Reed int 5260a0e9771SDarren Reed bpfclose(dev_t dev, int flag, int otyp, cred_t *cred_p) 5270a0e9771SDarren Reed { 5280a0e9771SDarren Reed struct bpf_d *d = bpf_dev_get(getminor(dev)); 5290a0e9771SDarren Reed 5300a0e9771SDarren Reed mutex_enter(&d->bd_lock); 531b7ea883bSDarren Reed 532b7ea883bSDarren Reed while (d->bd_inuse != 0) { 533b7ea883bSDarren Reed d->bd_waiting++; 534b7ea883bSDarren Reed if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 535b7ea883bSDarren Reed d->bd_waiting--; 536b7ea883bSDarren Reed mutex_exit(&d->bd_lock); 537b7ea883bSDarren Reed return (EINTR); 538b7ea883bSDarren Reed } 539b7ea883bSDarren Reed d->bd_waiting--; 540b7ea883bSDarren Reed } 541b7ea883bSDarren Reed 542b7ea883bSDarren Reed d->bd_inuse = -1; 5430a0e9771SDarren Reed if (d->bd_state == BPF_WAITING) 5440a0e9771SDarren Reed bpf_clear_timeout(d); 5450a0e9771SDarren Reed d->bd_state = BPF_IDLE; 5460a0e9771SDarren Reed if (d->bd_bif) 5470a0e9771SDarren Reed bpf_detachd(d); 5480a0e9771SDarren Reed mutex_exit(&d->bd_lock); 5490a0e9771SDarren Reed 5500a0e9771SDarren Reed mutex_enter(&bpf_mtx); 5510a0e9771SDarren Reed LIST_REMOVE(d, bd_list); 5520a0e9771SDarren Reed bpf_dev_remove(d); 5530a0e9771SDarren Reed mutex_exit(&bpf_mtx); 5540a0e9771SDarren Reed 5550a0e9771SDarren Reed mutex_enter(&d->bd_lock); 5560a0e9771SDarren Reed mutex_destroy(&d->bd_lock); 5570a0e9771SDarren Reed cv_destroy(&d->bd_wait); 5580a0e9771SDarren Reed 5590a0e9771SDarren Reed bpf_freed(d); 5600a0e9771SDarren Reed kmem_free(d, sizeof (*d)); 5610a0e9771SDarren Reed 5620a0e9771SDarren Reed return (0); 5630a0e9771SDarren Reed } 5640a0e9771SDarren Reed 5650a0e9771SDarren Reed /* 5660a0e9771SDarren Reed * Rotate the packet buffers in descriptor d. Move the store buffer 5670a0e9771SDarren Reed * into the hold slot, and the free buffer into the store slot. 5680a0e9771SDarren Reed * Zero the length of the new store buffer. 5690a0e9771SDarren Reed */ 5700a0e9771SDarren Reed #define ROTATE_BUFFERS(d) \ 5710a0e9771SDarren Reed (d)->bd_hbuf = (d)->bd_sbuf; \ 5720a0e9771SDarren Reed (d)->bd_hlen = (d)->bd_slen; \ 5730a0e9771SDarren Reed (d)->bd_sbuf = (d)->bd_fbuf; \ 5740a0e9771SDarren Reed (d)->bd_slen = 0; \ 5750a0e9771SDarren Reed (d)->bd_fbuf = 0; 5760a0e9771SDarren Reed /* 5770a0e9771SDarren Reed * bpfread - read next chunk of packets from buffers 5780a0e9771SDarren Reed */ 5790a0e9771SDarren Reed /* ARGSUSED */ 5800a0e9771SDarren Reed int 5810a0e9771SDarren Reed bpfread(dev_t dev, struct uio *uio, cred_t *cred) 5820a0e9771SDarren Reed { 5830a0e9771SDarren Reed struct bpf_d *d = bpf_dev_get(getminor(dev)); 5840a0e9771SDarren Reed int timed_out; 5850a0e9771SDarren Reed ulong_t delay; 5860a0e9771SDarren Reed int error; 5870a0e9771SDarren Reed 5880a0e9771SDarren Reed if ((d->bd_fmode & FREAD) == 0) 5890a0e9771SDarren Reed return (EBADF); 5900a0e9771SDarren Reed 5910a0e9771SDarren Reed /* 5920a0e9771SDarren Reed * Restrict application to use a buffer the same size as 5930a0e9771SDarren Reed * the kernel buffers. 5940a0e9771SDarren Reed */ 5950a0e9771SDarren Reed if (uio->uio_resid != d->bd_bufsize) 5960a0e9771SDarren Reed return (EINVAL); 5970a0e9771SDarren Reed 5980a0e9771SDarren Reed mutex_enter(&d->bd_lock); 5990a0e9771SDarren Reed if (d->bd_state == BPF_WAITING) 6000a0e9771SDarren Reed bpf_clear_timeout(d); 6010a0e9771SDarren Reed timed_out = (d->bd_state == BPF_TIMED_OUT); 6020a0e9771SDarren Reed d->bd_state = BPF_IDLE; 6030a0e9771SDarren Reed /* 6040a0e9771SDarren Reed * If the hold buffer is empty, then do a timed sleep, which 6050a0e9771SDarren Reed * ends when the timeout expires or when enough packets 6060a0e9771SDarren Reed * have arrived to fill the store buffer. 6070a0e9771SDarren Reed */ 6080a0e9771SDarren Reed while (d->bd_hbuf == 0) { 6090a0e9771SDarren Reed if (d->bd_nonblock) { 6100a0e9771SDarren Reed if (d->bd_slen == 0) { 6110a0e9771SDarren Reed mutex_exit(&d->bd_lock); 6120a0e9771SDarren Reed return (EWOULDBLOCK); 6130a0e9771SDarren Reed } 6140a0e9771SDarren Reed ROTATE_BUFFERS(d); 6150a0e9771SDarren Reed break; 6160a0e9771SDarren Reed } 6170a0e9771SDarren Reed 6180a0e9771SDarren Reed if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 6190a0e9771SDarren Reed /* 6200a0e9771SDarren Reed * A packet(s) either arrived since the previous 6210a0e9771SDarren Reed * read or arrived while we were asleep. 6220a0e9771SDarren Reed * Rotate the buffers and return what's here. 6230a0e9771SDarren Reed */ 6240a0e9771SDarren Reed ROTATE_BUFFERS(d); 6250a0e9771SDarren Reed break; 6260a0e9771SDarren Reed } 6270a0e9771SDarren Reed ks_stats.kp_read_wait.value.ui64++; 6280a0e9771SDarren Reed delay = ddi_get_lbolt() + d->bd_rtout; 6290a0e9771SDarren Reed error = cv_timedwait_sig(&d->bd_wait, &d->bd_lock, delay); 6300a0e9771SDarren Reed if (error == 0) { 6310a0e9771SDarren Reed mutex_exit(&d->bd_lock); 6320a0e9771SDarren Reed return (EINTR); 6330a0e9771SDarren Reed } 6340a0e9771SDarren Reed if (error == -1) { 6350a0e9771SDarren Reed /* 6360a0e9771SDarren Reed * On a timeout, return what's in the buffer, 6370a0e9771SDarren Reed * which may be nothing. If there is something 6380a0e9771SDarren Reed * in the store buffer, we can rotate the buffers. 6390a0e9771SDarren Reed */ 6400a0e9771SDarren Reed if (d->bd_hbuf) 6410a0e9771SDarren Reed /* 6420a0e9771SDarren Reed * We filled up the buffer in between 6430a0e9771SDarren Reed * getting the timeout and arriving 6440a0e9771SDarren Reed * here, so we don't need to rotate. 6450a0e9771SDarren Reed */ 6460a0e9771SDarren Reed break; 6470a0e9771SDarren Reed 6480a0e9771SDarren Reed if (d->bd_slen == 0) { 6490a0e9771SDarren Reed mutex_exit(&d->bd_lock); 6500a0e9771SDarren Reed return (0); 6510a0e9771SDarren Reed } 6520a0e9771SDarren Reed ROTATE_BUFFERS(d); 6530a0e9771SDarren Reed } 6540a0e9771SDarren Reed } 6550a0e9771SDarren Reed /* 6560a0e9771SDarren Reed * At this point, we know we have something in the hold slot. 6570a0e9771SDarren Reed */ 6580a0e9771SDarren Reed mutex_exit(&d->bd_lock); 6590a0e9771SDarren Reed 6600a0e9771SDarren Reed /* 6610a0e9771SDarren Reed * Move data from hold buffer into user space. 6620a0e9771SDarren Reed * We know the entire buffer is transferred since 6630a0e9771SDarren Reed * we checked above that the read buffer is bpf_bufsize bytes. 6640a0e9771SDarren Reed */ 6650a0e9771SDarren Reed error = uiomove(d->bd_hbuf, d->bd_hlen, UIO_READ, uio); 6660a0e9771SDarren Reed 6670a0e9771SDarren Reed mutex_enter(&d->bd_lock); 6680a0e9771SDarren Reed d->bd_fbuf = d->bd_hbuf; 6690a0e9771SDarren Reed d->bd_hbuf = 0; 6700a0e9771SDarren Reed d->bd_hlen = 0; 6710a0e9771SDarren Reed done: 6720a0e9771SDarren Reed mutex_exit(&d->bd_lock); 6730a0e9771SDarren Reed return (error); 6740a0e9771SDarren Reed } 6750a0e9771SDarren Reed 6760a0e9771SDarren Reed 6770a0e9771SDarren Reed /* 6780a0e9771SDarren Reed * If there are processes sleeping on this descriptor, wake them up. 6790a0e9771SDarren Reed * NOTE: the lock for bd_wait is bd_lock and is held by bpf_deliver, 6800a0e9771SDarren Reed * so there is no code here grabbing it. 6810a0e9771SDarren Reed */ 6820a0e9771SDarren Reed static inline void 6830a0e9771SDarren Reed bpf_wakeup(struct bpf_d *d) 6840a0e9771SDarren Reed { 6850a0e9771SDarren Reed cv_signal(&d->bd_wait); 6860a0e9771SDarren Reed } 6870a0e9771SDarren Reed 6880a0e9771SDarren Reed static void 6890a0e9771SDarren Reed bpf_timed_out(void *arg) 6900a0e9771SDarren Reed { 6910a0e9771SDarren Reed struct bpf_d *d = arg; 6920a0e9771SDarren Reed 6930a0e9771SDarren Reed mutex_enter(&d->bd_lock); 6940a0e9771SDarren Reed if (d->bd_state == BPF_WAITING) { 6950a0e9771SDarren Reed d->bd_state = BPF_TIMED_OUT; 6960a0e9771SDarren Reed if (d->bd_slen != 0) 6970a0e9771SDarren Reed cv_signal(&d->bd_wait); 6980a0e9771SDarren Reed } 6990a0e9771SDarren Reed mutex_exit(&d->bd_lock); 7000a0e9771SDarren Reed } 7010a0e9771SDarren Reed 7020a0e9771SDarren Reed 7030a0e9771SDarren Reed /* ARGSUSED */ 7040a0e9771SDarren Reed int 7050a0e9771SDarren Reed bpfwrite(dev_t dev, struct uio *uio, cred_t *cred) 7060a0e9771SDarren Reed { 7070a0e9771SDarren Reed struct bpf_d *d = bpf_dev_get(getminor(dev)); 7080a0e9771SDarren Reed uintptr_t mch; 7090a0e9771SDarren Reed uint_t mtu; 7100a0e9771SDarren Reed mblk_t *m; 7110a0e9771SDarren Reed int error; 7120a0e9771SDarren Reed int dlt; 7130a0e9771SDarren Reed 7140a0e9771SDarren Reed if ((d->bd_fmode & FWRITE) == 0) 7150a0e9771SDarren Reed return (EBADF); 7160a0e9771SDarren Reed 7170a0e9771SDarren Reed mutex_enter(&d->bd_lock); 718b7ea883bSDarren Reed if (d->bd_bif == 0 || d->bd_mcip == 0 || d->bd_bif == 0) { 7190a0e9771SDarren Reed mutex_exit(&d->bd_lock); 7200a0e9771SDarren Reed return (EINTR); 7210a0e9771SDarren Reed } 7220a0e9771SDarren Reed 7230a0e9771SDarren Reed if (uio->uio_resid == 0) { 7240a0e9771SDarren Reed mutex_exit(&d->bd_lock); 7250a0e9771SDarren Reed return (0); 7260a0e9771SDarren Reed } 7270a0e9771SDarren Reed 7280a0e9771SDarren Reed while (d->bd_inuse < 0) { 7290a0e9771SDarren Reed d->bd_waiting++; 7300a0e9771SDarren Reed if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 7310a0e9771SDarren Reed d->bd_waiting--; 7320a0e9771SDarren Reed mutex_exit(&d->bd_lock); 7330a0e9771SDarren Reed return (EINTR); 7340a0e9771SDarren Reed } 7350a0e9771SDarren Reed d->bd_waiting--; 7360a0e9771SDarren Reed } 7370a0e9771SDarren Reed 7380a0e9771SDarren Reed mutex_exit(&d->bd_lock); 7390a0e9771SDarren Reed 740b7ea883bSDarren Reed dlt = d->bd_dlt; 7410a0e9771SDarren Reed mch = d->bd_mcip; 742b7ea883bSDarren Reed MBPF_SDU_GET(&d->bd_mac, d->bd_bif, &mtu); 7430a0e9771SDarren Reed d->bd_inuse++; 7440a0e9771SDarren Reed 7450a0e9771SDarren Reed m = NULL; 7460a0e9771SDarren Reed if (dlt == DLT_IPNET) { 7470a0e9771SDarren Reed error = EIO; 7480a0e9771SDarren Reed goto done; 7490a0e9771SDarren Reed } 7500a0e9771SDarren Reed 7510a0e9771SDarren Reed error = bpf_movein(uio, dlt, mtu, &m); 7520a0e9771SDarren Reed if (error) 7530a0e9771SDarren Reed goto done; 7540a0e9771SDarren Reed 755b7ea883bSDarren Reed DTRACE_PROBE4(bpf__tx, struct bpf_d *, d, int, dlt, 756b7ea883bSDarren Reed uint_t, mtu, mblk_t *, m); 7570a0e9771SDarren Reed 7580a0e9771SDarren Reed if (M_LEN(m) > mtu) { 7590a0e9771SDarren Reed error = EMSGSIZE; 7600a0e9771SDarren Reed goto done; 7610a0e9771SDarren Reed } 7620a0e9771SDarren Reed 763b7ea883bSDarren Reed error = MBPF_TX(&d->bd_mac, mch, m); 7640a0e9771SDarren Reed /* 7650a0e9771SDarren Reed * The "tx" action here is required to consume the mblk_t. 7660a0e9771SDarren Reed */ 7670a0e9771SDarren Reed m = NULL; 7680a0e9771SDarren Reed 7690a0e9771SDarren Reed done: 7700a0e9771SDarren Reed if (error == 0) 7710a0e9771SDarren Reed ks_stats.kp_write_ok.value.ui64++; 7720a0e9771SDarren Reed else 7730a0e9771SDarren Reed ks_stats.kp_write_error.value.ui64++; 7740a0e9771SDarren Reed if (m != NULL) 7750a0e9771SDarren Reed freemsg(m); 7760a0e9771SDarren Reed 7770a0e9771SDarren Reed mutex_enter(&d->bd_lock); 7780a0e9771SDarren Reed d->bd_inuse--; 7790a0e9771SDarren Reed if ((d->bd_inuse == 0) && (d->bd_waiting != 0)) 7800a0e9771SDarren Reed cv_signal(&d->bd_wait); 7810a0e9771SDarren Reed mutex_exit(&d->bd_lock); 7820a0e9771SDarren Reed 7830a0e9771SDarren Reed /* 7840a0e9771SDarren Reed * The driver frees the mbuf. 7850a0e9771SDarren Reed */ 7860a0e9771SDarren Reed return (error); 7870a0e9771SDarren Reed } 7880a0e9771SDarren Reed 7890a0e9771SDarren Reed 7900a0e9771SDarren Reed /* 7910a0e9771SDarren Reed * Reset a descriptor by flushing its packet buffer and clearing the 7920a0e9771SDarren Reed * receive and drop counts. Should be called at splnet. 7930a0e9771SDarren Reed */ 7940a0e9771SDarren Reed static void 7950a0e9771SDarren Reed reset_d(struct bpf_d *d) 7960a0e9771SDarren Reed { 7970a0e9771SDarren Reed if (d->bd_hbuf) { 7980a0e9771SDarren Reed /* Free the hold buffer. */ 7990a0e9771SDarren Reed d->bd_fbuf = d->bd_hbuf; 8000a0e9771SDarren Reed d->bd_hbuf = 0; 8010a0e9771SDarren Reed } 8020a0e9771SDarren Reed d->bd_slen = 0; 8030a0e9771SDarren Reed d->bd_hlen = 0; 8040a0e9771SDarren Reed d->bd_rcount = 0; 8050a0e9771SDarren Reed d->bd_dcount = 0; 8060a0e9771SDarren Reed d->bd_ccount = 0; 8070a0e9771SDarren Reed } 8080a0e9771SDarren Reed 8090a0e9771SDarren Reed /* 8100a0e9771SDarren Reed * FIONREAD Check for read packet available. 8110a0e9771SDarren Reed * BIOCGBLEN Get buffer len [for read()]. 8120a0e9771SDarren Reed * BIOCSETF Set ethernet read filter. 8130a0e9771SDarren Reed * BIOCFLUSH Flush read packet buffer. 8140a0e9771SDarren Reed * BIOCPROMISC Put interface into promiscuous mode. 8150a0e9771SDarren Reed * BIOCGDLT Get link layer type. 8160a0e9771SDarren Reed * BIOCGETIF Get interface name. 8170a0e9771SDarren Reed * BIOCSETIF Set interface. 8180a0e9771SDarren Reed * BIOCSRTIMEOUT Set read timeout. 8190a0e9771SDarren Reed * BIOCGRTIMEOUT Get read timeout. 8200a0e9771SDarren Reed * BIOCGSTATS Get packet stats. 8210a0e9771SDarren Reed * BIOCIMMEDIATE Set immediate mode. 8220a0e9771SDarren Reed * BIOCVERSION Get filter language version. 8230a0e9771SDarren Reed * BIOCGHDRCMPLT Get "header already complete" flag. 8240a0e9771SDarren Reed * BIOCSHDRCMPLT Set "header already complete" flag. 8250a0e9771SDarren Reed */ 8260a0e9771SDarren Reed /* ARGSUSED */ 8270a0e9771SDarren Reed int 8280a0e9771SDarren Reed bpfioctl(dev_t dev, int cmd, intptr_t addr, int mode, cred_t *cred, int *rval) 8290a0e9771SDarren Reed { 8300a0e9771SDarren Reed struct bpf_d *d = bpf_dev_get(getminor(dev)); 8310a0e9771SDarren Reed struct bpf_program prog; 8320a0e9771SDarren Reed struct lifreq lifreq; 8330a0e9771SDarren Reed struct ifreq ifreq; 8340a0e9771SDarren Reed int error = 0; 8350a0e9771SDarren Reed uint_t size; 8360a0e9771SDarren Reed 8370a0e9771SDarren Reed /* 8380a0e9771SDarren Reed * Refresh the PID associated with this bpf file. 8390a0e9771SDarren Reed */ 8400a0e9771SDarren Reed mutex_enter(&d->bd_lock); 8410a0e9771SDarren Reed if (d->bd_state == BPF_WAITING) 8420a0e9771SDarren Reed bpf_clear_timeout(d); 8430a0e9771SDarren Reed d->bd_state = BPF_IDLE; 8440a0e9771SDarren Reed mutex_exit(&d->bd_lock); 8450a0e9771SDarren Reed 8460a0e9771SDarren Reed switch (cmd) { 8470a0e9771SDarren Reed 8480a0e9771SDarren Reed default: 8490a0e9771SDarren Reed error = EINVAL; 8500a0e9771SDarren Reed break; 8510a0e9771SDarren Reed 8520a0e9771SDarren Reed /* 8530a0e9771SDarren Reed * Check for read packet available. 8540a0e9771SDarren Reed */ 8550a0e9771SDarren Reed case FIONREAD: 8560a0e9771SDarren Reed { 8570a0e9771SDarren Reed int n; 8580a0e9771SDarren Reed 8590a0e9771SDarren Reed mutex_enter(&d->bd_lock); 8600a0e9771SDarren Reed n = d->bd_slen; 8610a0e9771SDarren Reed if (d->bd_hbuf) 8620a0e9771SDarren Reed n += d->bd_hlen; 8630a0e9771SDarren Reed mutex_exit(&d->bd_lock); 8640a0e9771SDarren Reed 8650a0e9771SDarren Reed *(int *)addr = n; 8660a0e9771SDarren Reed break; 8670a0e9771SDarren Reed } 8680a0e9771SDarren Reed 8690a0e9771SDarren Reed /* 8700a0e9771SDarren Reed * Get buffer len [for read()]. 8710a0e9771SDarren Reed */ 8720a0e9771SDarren Reed case BIOCGBLEN: 8730a0e9771SDarren Reed error = copyout(&d->bd_bufsize, (void *)addr, 8740a0e9771SDarren Reed sizeof (d->bd_bufsize)); 8750a0e9771SDarren Reed break; 8760a0e9771SDarren Reed 8770a0e9771SDarren Reed /* 8780a0e9771SDarren Reed * Set buffer length. 8790a0e9771SDarren Reed */ 8800a0e9771SDarren Reed case BIOCSBLEN: 8810a0e9771SDarren Reed if (copyin((void *)addr, &size, sizeof (size)) != 0) { 8820a0e9771SDarren Reed error = EFAULT; 8830a0e9771SDarren Reed break; 8840a0e9771SDarren Reed } 8850a0e9771SDarren Reed 8860a0e9771SDarren Reed mutex_enter(&d->bd_lock); 8870a0e9771SDarren Reed if (d->bd_bif != 0) { 8880a0e9771SDarren Reed error = EINVAL; 8890a0e9771SDarren Reed } else { 8900a0e9771SDarren Reed if (size > bpf_maxbufsize) 8910a0e9771SDarren Reed size = bpf_maxbufsize; 8920a0e9771SDarren Reed else if (size < BPF_MINBUFSIZE) 8930a0e9771SDarren Reed size = BPF_MINBUFSIZE; 8940a0e9771SDarren Reed 8950a0e9771SDarren Reed d->bd_bufsize = size; 8960a0e9771SDarren Reed } 8970a0e9771SDarren Reed mutex_exit(&d->bd_lock); 8980a0e9771SDarren Reed 8990a0e9771SDarren Reed if (error == 0) 9000a0e9771SDarren Reed error = copyout(&size, (void *)addr, sizeof (size)); 9010a0e9771SDarren Reed break; 9020a0e9771SDarren Reed 9030a0e9771SDarren Reed /* 9040a0e9771SDarren Reed * Set link layer read filter. 9050a0e9771SDarren Reed */ 9060a0e9771SDarren Reed case BIOCSETF: 9070a0e9771SDarren Reed if (ddi_copyin((void *)addr, &prog, sizeof (prog), mode)) { 9080a0e9771SDarren Reed error = EFAULT; 9090a0e9771SDarren Reed break; 9100a0e9771SDarren Reed } 9110a0e9771SDarren Reed error = bpf_setf(d, &prog); 9120a0e9771SDarren Reed break; 9130a0e9771SDarren Reed 9140a0e9771SDarren Reed /* 9150a0e9771SDarren Reed * Flush read packet buffer. 9160a0e9771SDarren Reed */ 9170a0e9771SDarren Reed case BIOCFLUSH: 9180a0e9771SDarren Reed mutex_enter(&d->bd_lock); 9190a0e9771SDarren Reed reset_d(d); 9200a0e9771SDarren Reed mutex_exit(&d->bd_lock); 9210a0e9771SDarren Reed break; 9220a0e9771SDarren Reed 9230a0e9771SDarren Reed /* 9240a0e9771SDarren Reed * Put interface into promiscuous mode. 9250a0e9771SDarren Reed * This is a one-way ioctl, it is not used to turn promiscuous 9260a0e9771SDarren Reed * mode off. 9270a0e9771SDarren Reed */ 9280a0e9771SDarren Reed case BIOCPROMISC: 9290a0e9771SDarren Reed if (d->bd_bif == 0) { 9300a0e9771SDarren Reed /* 9310a0e9771SDarren Reed * No interface attached yet. 9320a0e9771SDarren Reed */ 9330a0e9771SDarren Reed error = EINVAL; 9340a0e9771SDarren Reed break; 9350a0e9771SDarren Reed } 9360a0e9771SDarren Reed mutex_enter(&d->bd_lock); 9370a0e9771SDarren Reed if (d->bd_promisc == 0) { 9380a0e9771SDarren Reed 9390a0e9771SDarren Reed if (d->bd_promisc_handle) { 9400a0e9771SDarren Reed uintptr_t mph; 9410a0e9771SDarren Reed 9420a0e9771SDarren Reed mph = d->bd_promisc_handle; 9430a0e9771SDarren Reed d->bd_promisc_handle = 0; 9440a0e9771SDarren Reed 9450a0e9771SDarren Reed mutex_exit(&d->bd_lock); 946b7ea883bSDarren Reed MBPF_PROMISC_REMOVE(&d->bd_mac, mph); 9470a0e9771SDarren Reed mutex_enter(&d->bd_lock); 9480a0e9771SDarren Reed } 9490a0e9771SDarren Reed 9500a0e9771SDarren Reed d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_COPY; 951b7ea883bSDarren Reed error = MBPF_PROMISC_ADD(&d->bd_mac, 9520a0e9771SDarren Reed d->bd_mcip, MAC_CLIENT_PROMISC_ALL, d, 9530a0e9771SDarren Reed &d->bd_promisc_handle, d->bd_promisc_flags); 9540a0e9771SDarren Reed if (error == 0) 9550a0e9771SDarren Reed d->bd_promisc = 1; 9560a0e9771SDarren Reed } 9570a0e9771SDarren Reed mutex_exit(&d->bd_lock); 9580a0e9771SDarren Reed break; 9590a0e9771SDarren Reed 9600a0e9771SDarren Reed /* 9610a0e9771SDarren Reed * Get device parameters. 9620a0e9771SDarren Reed */ 9630a0e9771SDarren Reed case BIOCGDLT: 9640a0e9771SDarren Reed if (d->bd_bif == 0) 9650a0e9771SDarren Reed error = EINVAL; 9660a0e9771SDarren Reed else 967b7ea883bSDarren Reed error = copyout(&d->bd_dlt, (void *)addr, 968b7ea883bSDarren Reed sizeof (d->bd_dlt)); 9690a0e9771SDarren Reed break; 9700a0e9771SDarren Reed 9710a0e9771SDarren Reed /* 9720a0e9771SDarren Reed * Get a list of supported device parameters. 9730a0e9771SDarren Reed */ 9740a0e9771SDarren Reed case BIOCGDLTLIST: 9750a0e9771SDarren Reed if (d->bd_bif == 0) { 9760a0e9771SDarren Reed error = EINVAL; 9770a0e9771SDarren Reed } else { 9780a0e9771SDarren Reed struct bpf_dltlist list; 9790a0e9771SDarren Reed 9800a0e9771SDarren Reed if (copyin((void *)addr, &list, sizeof (list)) != 0) { 9810a0e9771SDarren Reed error = EFAULT; 9820a0e9771SDarren Reed break; 9830a0e9771SDarren Reed } 9840a0e9771SDarren Reed error = bpf_getdltlist(d, &list); 9850a0e9771SDarren Reed if ((error == 0) && 9860a0e9771SDarren Reed copyout(&list, (void *)addr, sizeof (list)) != 0) 9870a0e9771SDarren Reed error = EFAULT; 9880a0e9771SDarren Reed } 9890a0e9771SDarren Reed break; 9900a0e9771SDarren Reed 9910a0e9771SDarren Reed /* 9920a0e9771SDarren Reed * Set device parameters. 9930a0e9771SDarren Reed */ 9940a0e9771SDarren Reed case BIOCSDLT: 9950a0e9771SDarren Reed error = bpf_setdlt(d, (void *)addr); 9960a0e9771SDarren Reed break; 9970a0e9771SDarren Reed 9980a0e9771SDarren Reed /* 9990a0e9771SDarren Reed * Get interface name. 10000a0e9771SDarren Reed */ 10010a0e9771SDarren Reed case BIOCGETIF: 10020a0e9771SDarren Reed if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) { 10030a0e9771SDarren Reed error = EFAULT; 10040a0e9771SDarren Reed break; 10050a0e9771SDarren Reed } 10060a0e9771SDarren Reed error = bpf_ifname(d, ifreq.ifr_name, sizeof (ifreq.ifr_name)); 10070a0e9771SDarren Reed if ((error == 0) && 10080a0e9771SDarren Reed copyout(&ifreq, (void *)addr, sizeof (ifreq)) != 0) { 10090a0e9771SDarren Reed error = EFAULT; 10100a0e9771SDarren Reed break; 10110a0e9771SDarren Reed } 10120a0e9771SDarren Reed break; 10130a0e9771SDarren Reed 10140a0e9771SDarren Reed /* 10150a0e9771SDarren Reed * Set interface. 10160a0e9771SDarren Reed */ 10170a0e9771SDarren Reed case BIOCSETIF: 10180a0e9771SDarren Reed if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) { 10190a0e9771SDarren Reed error = EFAULT; 10200a0e9771SDarren Reed break; 10210a0e9771SDarren Reed } 10220a0e9771SDarren Reed error = bpf_setif(d, ifreq.ifr_name, sizeof (ifreq.ifr_name)); 10230a0e9771SDarren Reed break; 10240a0e9771SDarren Reed 10250a0e9771SDarren Reed /* 10260a0e9771SDarren Reed * Get interface name. 10270a0e9771SDarren Reed */ 10280a0e9771SDarren Reed case BIOCGETLIF: 10290a0e9771SDarren Reed if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) { 10300a0e9771SDarren Reed error = EFAULT; 10310a0e9771SDarren Reed break; 10320a0e9771SDarren Reed } 10330a0e9771SDarren Reed error = bpf_ifname(d, lifreq.lifr_name, 10340a0e9771SDarren Reed sizeof (lifreq.lifr_name)); 10350a0e9771SDarren Reed if ((error == 0) && 10360a0e9771SDarren Reed copyout(&lifreq, (void *)addr, sizeof (lifreq)) != 0) { 10370a0e9771SDarren Reed error = EFAULT; 10380a0e9771SDarren Reed break; 10390a0e9771SDarren Reed } 10400a0e9771SDarren Reed break; 10410a0e9771SDarren Reed 10420a0e9771SDarren Reed /* 10430a0e9771SDarren Reed * Set interface. 10440a0e9771SDarren Reed */ 10450a0e9771SDarren Reed case BIOCSETLIF: 10460a0e9771SDarren Reed if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) { 10470a0e9771SDarren Reed error = EFAULT; 10480a0e9771SDarren Reed break; 10490a0e9771SDarren Reed } 10500a0e9771SDarren Reed error = bpf_setif(d, lifreq.lifr_name, 10510a0e9771SDarren Reed sizeof (lifreq.lifr_name)); 10520a0e9771SDarren Reed break; 10530a0e9771SDarren Reed 10540a0e9771SDarren Reed #ifdef _SYSCALL32_IMPL 10550a0e9771SDarren Reed /* 10560a0e9771SDarren Reed * Set read timeout. 10570a0e9771SDarren Reed */ 10580a0e9771SDarren Reed case BIOCSRTIMEOUT32: 10590a0e9771SDarren Reed { 10600a0e9771SDarren Reed struct timeval32 tv; 10610a0e9771SDarren Reed 10620a0e9771SDarren Reed if (copyin((void *)addr, &tv, sizeof (tv)) != 0) { 10630a0e9771SDarren Reed error = EFAULT; 10640a0e9771SDarren Reed break; 10650a0e9771SDarren Reed } 10660a0e9771SDarren Reed 10670a0e9771SDarren Reed /* Convert the timeout in microseconds to ticks */ 10680a0e9771SDarren Reed d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 + 10690a0e9771SDarren Reed tv.tv_usec); 10700a0e9771SDarren Reed if ((d->bd_rtout == 0) && (tv.tv_usec != 0)) 10710a0e9771SDarren Reed d->bd_rtout = 1; 10720a0e9771SDarren Reed break; 10730a0e9771SDarren Reed } 10740a0e9771SDarren Reed 10750a0e9771SDarren Reed /* 10760a0e9771SDarren Reed * Get read timeout. 10770a0e9771SDarren Reed */ 10780a0e9771SDarren Reed case BIOCGRTIMEOUT32: 10790a0e9771SDarren Reed { 10800a0e9771SDarren Reed struct timeval32 tv; 10810a0e9771SDarren Reed clock_t ticks; 10820a0e9771SDarren Reed 10830a0e9771SDarren Reed ticks = drv_hztousec(d->bd_rtout); 10840a0e9771SDarren Reed tv.tv_sec = ticks / 1000000; 10850a0e9771SDarren Reed tv.tv_usec = ticks - (tv.tv_sec * 1000000); 10860a0e9771SDarren Reed error = copyout(&tv, (void *)addr, sizeof (tv)); 10870a0e9771SDarren Reed break; 10880a0e9771SDarren Reed } 10890a0e9771SDarren Reed 10900a0e9771SDarren Reed /* 10910a0e9771SDarren Reed * Get a list of supported device parameters. 10920a0e9771SDarren Reed */ 10930a0e9771SDarren Reed case BIOCGDLTLIST32: 10940a0e9771SDarren Reed if (d->bd_bif == 0) { 10950a0e9771SDarren Reed error = EINVAL; 10960a0e9771SDarren Reed } else { 10970a0e9771SDarren Reed struct bpf_dltlist32 lst32; 10980a0e9771SDarren Reed struct bpf_dltlist list; 10990a0e9771SDarren Reed 11000a0e9771SDarren Reed if (copyin((void *)addr, &lst32, sizeof (lst32)) != 0) { 11010a0e9771SDarren Reed error = EFAULT; 11020a0e9771SDarren Reed break; 11030a0e9771SDarren Reed } 11040a0e9771SDarren Reed 11050a0e9771SDarren Reed list.bfl_len = lst32.bfl_len; 11060a0e9771SDarren Reed list.bfl_list = (void *)(uint64_t)lst32.bfl_list; 11070a0e9771SDarren Reed error = bpf_getdltlist(d, &list); 11080a0e9771SDarren Reed if (error == 0) { 11090a0e9771SDarren Reed lst32.bfl_len = list.bfl_len; 11100a0e9771SDarren Reed 11110a0e9771SDarren Reed if (copyout(&lst32, (void *)addr, 11120a0e9771SDarren Reed sizeof (lst32)) != 0) 11130a0e9771SDarren Reed error = EFAULT; 11140a0e9771SDarren Reed } 11150a0e9771SDarren Reed } 11160a0e9771SDarren Reed break; 11170a0e9771SDarren Reed 11180a0e9771SDarren Reed /* 11190a0e9771SDarren Reed * Set link layer read filter. 11200a0e9771SDarren Reed */ 11210a0e9771SDarren Reed case BIOCSETF32: { 11220a0e9771SDarren Reed struct bpf_program32 prog32; 11230a0e9771SDarren Reed 11240a0e9771SDarren Reed if (ddi_copyin((void *)addr, &prog32, sizeof (prog), mode)) { 11250a0e9771SDarren Reed error = EFAULT; 11260a0e9771SDarren Reed break; 11270a0e9771SDarren Reed } 11280a0e9771SDarren Reed prog.bf_len = prog32.bf_len; 11290a0e9771SDarren Reed prog.bf_insns = (void *)(uint64_t)prog32.bf_insns; 11300a0e9771SDarren Reed error = bpf_setf(d, &prog); 11310a0e9771SDarren Reed break; 11320a0e9771SDarren Reed } 11330a0e9771SDarren Reed #endif 11340a0e9771SDarren Reed 11350a0e9771SDarren Reed /* 11360a0e9771SDarren Reed * Set read timeout. 11370a0e9771SDarren Reed */ 11380a0e9771SDarren Reed case BIOCSRTIMEOUT: 11390a0e9771SDarren Reed { 11400a0e9771SDarren Reed struct timeval tv; 11410a0e9771SDarren Reed 11420a0e9771SDarren Reed if (copyin((void *)addr, &tv, sizeof (tv)) != 0) { 11430a0e9771SDarren Reed error = EFAULT; 11440a0e9771SDarren Reed break; 11450a0e9771SDarren Reed } 11460a0e9771SDarren Reed 11470a0e9771SDarren Reed /* Convert the timeout in microseconds to ticks */ 11480a0e9771SDarren Reed d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 + 11490a0e9771SDarren Reed tv.tv_usec); 11500a0e9771SDarren Reed if ((d->bd_rtout == 0) && (tv.tv_usec != 0)) 11510a0e9771SDarren Reed d->bd_rtout = 1; 11520a0e9771SDarren Reed break; 11530a0e9771SDarren Reed } 11540a0e9771SDarren Reed 11550a0e9771SDarren Reed /* 11560a0e9771SDarren Reed * Get read timeout. 11570a0e9771SDarren Reed */ 11580a0e9771SDarren Reed case BIOCGRTIMEOUT: 11590a0e9771SDarren Reed { 11600a0e9771SDarren Reed struct timeval tv; 11610a0e9771SDarren Reed clock_t ticks; 11620a0e9771SDarren Reed 11630a0e9771SDarren Reed ticks = drv_hztousec(d->bd_rtout); 11640a0e9771SDarren Reed tv.tv_sec = ticks / 1000000; 11650a0e9771SDarren Reed tv.tv_usec = ticks - (tv.tv_sec * 1000000); 11660a0e9771SDarren Reed if (copyout(&tv, (void *)addr, sizeof (tv)) != 0) 11670a0e9771SDarren Reed error = EFAULT; 11680a0e9771SDarren Reed break; 11690a0e9771SDarren Reed } 11700a0e9771SDarren Reed 11710a0e9771SDarren Reed /* 11720a0e9771SDarren Reed * Get packet stats. 11730a0e9771SDarren Reed */ 11740a0e9771SDarren Reed case BIOCGSTATS: 11750a0e9771SDarren Reed { 11760a0e9771SDarren Reed struct bpf_stat bs; 11770a0e9771SDarren Reed 11780a0e9771SDarren Reed bs.bs_recv = d->bd_rcount; 11790a0e9771SDarren Reed bs.bs_drop = d->bd_dcount; 11800a0e9771SDarren Reed bs.bs_capt = d->bd_ccount; 11810a0e9771SDarren Reed if (copyout(&bs, (void *)addr, sizeof (bs)) != 0) 11820a0e9771SDarren Reed error = EFAULT; 11830a0e9771SDarren Reed break; 11840a0e9771SDarren Reed } 11850a0e9771SDarren Reed 11860a0e9771SDarren Reed /* 11870a0e9771SDarren Reed * Set immediate mode. 11880a0e9771SDarren Reed */ 11890a0e9771SDarren Reed case BIOCIMMEDIATE: 11900a0e9771SDarren Reed if (copyin((void *)addr, &d->bd_immediate, 11910a0e9771SDarren Reed sizeof (d->bd_immediate)) != 0) 11920a0e9771SDarren Reed error = EFAULT; 11930a0e9771SDarren Reed break; 11940a0e9771SDarren Reed 11950a0e9771SDarren Reed case BIOCVERSION: 11960a0e9771SDarren Reed { 11970a0e9771SDarren Reed struct bpf_version bv; 11980a0e9771SDarren Reed 11990a0e9771SDarren Reed bv.bv_major = BPF_MAJOR_VERSION; 12000a0e9771SDarren Reed bv.bv_minor = BPF_MINOR_VERSION; 12010a0e9771SDarren Reed if (copyout(&bv, (void *)addr, sizeof (bv)) != 0) 12020a0e9771SDarren Reed error = EFAULT; 12030a0e9771SDarren Reed break; 12040a0e9771SDarren Reed } 12050a0e9771SDarren Reed 12060a0e9771SDarren Reed case BIOCGHDRCMPLT: /* get "header already complete" flag */ 12070a0e9771SDarren Reed if (copyout(&d->bd_hdrcmplt, (void *)addr, 12080a0e9771SDarren Reed sizeof (d->bd_hdrcmplt)) != 0) 12090a0e9771SDarren Reed error = EFAULT; 12100a0e9771SDarren Reed break; 12110a0e9771SDarren Reed 12120a0e9771SDarren Reed case BIOCSHDRCMPLT: /* set "header already complete" flag */ 12130a0e9771SDarren Reed if (copyin((void *)addr, &d->bd_hdrcmplt, 12140a0e9771SDarren Reed sizeof (d->bd_hdrcmplt)) != 0) 12150a0e9771SDarren Reed error = EFAULT; 12160a0e9771SDarren Reed break; 12170a0e9771SDarren Reed 12180a0e9771SDarren Reed /* 12190a0e9771SDarren Reed * Get "see sent packets" flag 12200a0e9771SDarren Reed */ 12210a0e9771SDarren Reed case BIOCGSEESENT: 12220a0e9771SDarren Reed if (copyout(&d->bd_seesent, (void *)addr, 12230a0e9771SDarren Reed sizeof (d->bd_seesent)) != 0) 12240a0e9771SDarren Reed error = EFAULT; 12250a0e9771SDarren Reed break; 12260a0e9771SDarren Reed 12270a0e9771SDarren Reed /* 12280a0e9771SDarren Reed * Set "see sent" packets flag 12290a0e9771SDarren Reed */ 12300a0e9771SDarren Reed case BIOCSSEESENT: 12310a0e9771SDarren Reed if (copyin((void *)addr, &d->bd_seesent, 12320a0e9771SDarren Reed sizeof (d->bd_seesent)) != 0) 12330a0e9771SDarren Reed error = EFAULT; 12340a0e9771SDarren Reed break; 12350a0e9771SDarren Reed 12360a0e9771SDarren Reed case FIONBIO: /* Non-blocking I/O */ 12370a0e9771SDarren Reed if (copyin((void *)addr, &d->bd_nonblock, 12380a0e9771SDarren Reed sizeof (d->bd_nonblock)) != 0) 12390a0e9771SDarren Reed error = EFAULT; 12400a0e9771SDarren Reed break; 12410a0e9771SDarren Reed } 12420a0e9771SDarren Reed return (error); 12430a0e9771SDarren Reed } 12440a0e9771SDarren Reed 12450a0e9771SDarren Reed /* 12460a0e9771SDarren Reed * Set d's packet filter program to fp. If this file already has a filter, 12470a0e9771SDarren Reed * free it and replace it. If the new filter is "empty" (has a 0 size), then 12480a0e9771SDarren Reed * the result is to just remove and free the existing filter. 12490a0e9771SDarren Reed * Returns EINVAL for bogus requests. 12500a0e9771SDarren Reed */ 12510a0e9771SDarren Reed int 12520a0e9771SDarren Reed bpf_setf(struct bpf_d *d, struct bpf_program *fp) 12530a0e9771SDarren Reed { 12540a0e9771SDarren Reed struct bpf_insn *fcode, *old; 12550a0e9771SDarren Reed uint_t flen, size; 12560a0e9771SDarren Reed size_t oldsize; 12570a0e9771SDarren Reed 12580a0e9771SDarren Reed if (fp->bf_insns == 0) { 12590a0e9771SDarren Reed if (fp->bf_len != 0) 12600a0e9771SDarren Reed return (EINVAL); 12610a0e9771SDarren Reed mutex_enter(&d->bd_lock); 12620a0e9771SDarren Reed old = d->bd_filter; 12630a0e9771SDarren Reed oldsize = d->bd_filter_size; 12640a0e9771SDarren Reed d->bd_filter = 0; 12650a0e9771SDarren Reed d->bd_filter_size = 0; 12660a0e9771SDarren Reed reset_d(d); 12670a0e9771SDarren Reed mutex_exit(&d->bd_lock); 12680a0e9771SDarren Reed if (old != 0) 12690a0e9771SDarren Reed kmem_free(old, oldsize); 12700a0e9771SDarren Reed return (0); 12710a0e9771SDarren Reed } 12720a0e9771SDarren Reed flen = fp->bf_len; 12730a0e9771SDarren Reed if (flen > BPF_MAXINSNS) 12740a0e9771SDarren Reed return (EINVAL); 12750a0e9771SDarren Reed 12760a0e9771SDarren Reed size = flen * sizeof (*fp->bf_insns); 12770a0e9771SDarren Reed fcode = kmem_alloc(size, KM_SLEEP); 12780a0e9771SDarren Reed if (copyin(fp->bf_insns, fcode, size) != 0) 12790a0e9771SDarren Reed return (EFAULT); 12800a0e9771SDarren Reed 12810a0e9771SDarren Reed if (bpf_validate(fcode, (int)flen)) { 12820a0e9771SDarren Reed mutex_enter(&d->bd_lock); 12830a0e9771SDarren Reed old = d->bd_filter; 12840a0e9771SDarren Reed oldsize = d->bd_filter_size; 12850a0e9771SDarren Reed d->bd_filter = fcode; 12860a0e9771SDarren Reed d->bd_filter_size = size; 12870a0e9771SDarren Reed reset_d(d); 12880a0e9771SDarren Reed mutex_exit(&d->bd_lock); 12890a0e9771SDarren Reed if (old != 0) 12900a0e9771SDarren Reed kmem_free(old, oldsize); 12910a0e9771SDarren Reed 12920a0e9771SDarren Reed return (0); 12930a0e9771SDarren Reed } 12940a0e9771SDarren Reed kmem_free(fcode, size); 12950a0e9771SDarren Reed return (EINVAL); 12960a0e9771SDarren Reed } 12970a0e9771SDarren Reed 12980a0e9771SDarren Reed /* 12990a0e9771SDarren Reed * Detach a file from its current interface (if attached at all) and attach 1300b7ea883bSDarren Reed * to the interface indicated by the name stored in ifname. 13010a0e9771SDarren Reed * Return an errno or 0. 13020a0e9771SDarren Reed */ 13030a0e9771SDarren Reed static int 13040a0e9771SDarren Reed bpf_setif(struct bpf_d *d, char *ifname, int namesize) 13050a0e9771SDarren Reed { 13060a0e9771SDarren Reed int unit_seen; 1307b7ea883bSDarren Reed int error = 0; 13080a0e9771SDarren Reed char *cp; 13090a0e9771SDarren Reed int i; 13100a0e9771SDarren Reed 13110a0e9771SDarren Reed /* 13120a0e9771SDarren Reed * Make sure the provided name has a unit number, and default 13130a0e9771SDarren Reed * it to '0' if not specified. 13140a0e9771SDarren Reed * XXX This is ugly ... do this differently? 13150a0e9771SDarren Reed */ 13160a0e9771SDarren Reed unit_seen = 0; 13170a0e9771SDarren Reed cp = ifname; 13180a0e9771SDarren Reed cp[namesize - 1] = '\0'; /* sanity */ 13190a0e9771SDarren Reed while (*cp++) 13200a0e9771SDarren Reed if (*cp >= '0' && *cp <= '9') 13210a0e9771SDarren Reed unit_seen = 1; 13220a0e9771SDarren Reed if (!unit_seen) { 13230a0e9771SDarren Reed /* Make sure to leave room for the '\0'. */ 13240a0e9771SDarren Reed for (i = 0; i < (namesize - 1); ++i) { 13250a0e9771SDarren Reed if ((ifname[i] >= 'a' && ifname[i] <= 'z') || 13260a0e9771SDarren Reed (ifname[i] >= 'A' && ifname[i] <= 'Z')) 13270a0e9771SDarren Reed continue; 13280a0e9771SDarren Reed ifname[i] = '0'; 13290a0e9771SDarren Reed } 13300a0e9771SDarren Reed } 13310a0e9771SDarren Reed 13320a0e9771SDarren Reed /* 13330a0e9771SDarren Reed * Make sure that only one call to this function happens at a time 13340a0e9771SDarren Reed * and that we're not interleaving a read/write 13350a0e9771SDarren Reed */ 13360a0e9771SDarren Reed mutex_enter(&d->bd_lock); 13370a0e9771SDarren Reed while (d->bd_inuse != 0) { 13380a0e9771SDarren Reed d->bd_waiting++; 13390a0e9771SDarren Reed if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 13400a0e9771SDarren Reed d->bd_waiting--; 13410a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13420a0e9771SDarren Reed return (EINTR); 13430a0e9771SDarren Reed } 13440a0e9771SDarren Reed d->bd_waiting--; 13450a0e9771SDarren Reed } 13460a0e9771SDarren Reed d->bd_inuse = -1; 13470a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13480a0e9771SDarren Reed 13490a0e9771SDarren Reed if (d->bd_sbuf == 0) 13500a0e9771SDarren Reed error = bpf_allocbufs(d); 13510a0e9771SDarren Reed 1352b7ea883bSDarren Reed if (error == 0) { 13530a0e9771SDarren Reed mutex_enter(&d->bd_lock); 13540a0e9771SDarren Reed if (d->bd_bif) 13550a0e9771SDarren Reed /* 13560a0e9771SDarren Reed * Detach if attached to something else. 13570a0e9771SDarren Reed */ 13580a0e9771SDarren Reed bpf_detachd(d); 13590a0e9771SDarren Reed 1360b7ea883bSDarren Reed error = bpf_attachd(d, ifname, -1); 13610a0e9771SDarren Reed reset_d(d); 13620a0e9771SDarren Reed d->bd_inuse = 0; 13630a0e9771SDarren Reed if (d->bd_waiting != 0) 13640a0e9771SDarren Reed cv_signal(&d->bd_wait); 13650a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13660a0e9771SDarren Reed return (error); 13670a0e9771SDarren Reed } 13680a0e9771SDarren Reed 13690a0e9771SDarren Reed mutex_enter(&d->bd_lock); 13700a0e9771SDarren Reed d->bd_inuse = 0; 13710a0e9771SDarren Reed if (d->bd_waiting != 0) 13720a0e9771SDarren Reed cv_signal(&d->bd_wait); 13730a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13740a0e9771SDarren Reed 13750a0e9771SDarren Reed /* 13760a0e9771SDarren Reed * Try tickle the mac layer into attaching the device... 13770a0e9771SDarren Reed */ 13780a0e9771SDarren Reed return (bpf_provider_tickle(ifname, d->bd_zone)); 13790a0e9771SDarren Reed } 13800a0e9771SDarren Reed 13810a0e9771SDarren Reed /* 13820a0e9771SDarren Reed * Copy the interface name to the ifreq. 13830a0e9771SDarren Reed */ 13840a0e9771SDarren Reed static int 13850a0e9771SDarren Reed bpf_ifname(struct bpf_d *d, char *buffer, int bufsize) 13860a0e9771SDarren Reed { 13870a0e9771SDarren Reed 13880a0e9771SDarren Reed mutex_enter(&d->bd_lock); 1389a3f59f6eSToomas Soome if (d->bd_bif == 0) { 13900a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13910a0e9771SDarren Reed return (EINVAL); 13920a0e9771SDarren Reed } 13930a0e9771SDarren Reed 1394b7ea883bSDarren Reed (void) strlcpy(buffer, d->bd_ifname, bufsize); 13950a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13960a0e9771SDarren Reed 13970a0e9771SDarren Reed return (0); 13980a0e9771SDarren Reed } 13990a0e9771SDarren Reed 140080d5689fSPatrick Mooney /* ARGSUSED */ 14010a0e9771SDarren Reed int 14020a0e9771SDarren Reed bpfchpoll(dev_t dev, short events, int anyyet, short *reventsp, 14030a0e9771SDarren Reed struct pollhead **phpp) 14040a0e9771SDarren Reed { 14050a0e9771SDarren Reed struct bpf_d *d = bpf_dev_get(getminor(dev)); 14060a0e9771SDarren Reed 140780d5689fSPatrick Mooney /* 140880d5689fSPatrick Mooney * Until this driver is modified to issue proper pollwakeup() calls on 140980d5689fSPatrick Mooney * its pollhead, edge-triggered polling is not allowed. 141080d5689fSPatrick Mooney */ 141180d5689fSPatrick Mooney if (events & POLLET) { 141280d5689fSPatrick Mooney return (EPERM); 141380d5689fSPatrick Mooney } 141480d5689fSPatrick Mooney 14150a0e9771SDarren Reed if (events & (POLLIN | POLLRDNORM)) { 14160a0e9771SDarren Reed /* 14170a0e9771SDarren Reed * An imitation of the FIONREAD ioctl code. 14180a0e9771SDarren Reed */ 14190a0e9771SDarren Reed mutex_enter(&d->bd_lock); 14200a0e9771SDarren Reed if (d->bd_hlen != 0 || 14210a0e9771SDarren Reed ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 14220a0e9771SDarren Reed d->bd_slen != 0)) { 14230a0e9771SDarren Reed *reventsp |= events & (POLLIN | POLLRDNORM); 14240a0e9771SDarren Reed } else { 142580d5689fSPatrick Mooney /* 142680d5689fSPatrick Mooney * Until the bpf driver has been updated to include 142780d5689fSPatrick Mooney * adequate pollwakeup() logic, no pollhead will be 142880d5689fSPatrick Mooney * emitted here, preventing the resource from being 142980d5689fSPatrick Mooney * cached by poll()/devpoll/epoll. 143080d5689fSPatrick Mooney */ 14310a0e9771SDarren Reed *reventsp = 0; 14320a0e9771SDarren Reed /* Start the read timeout if necessary */ 14330a0e9771SDarren Reed if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 14340a0e9771SDarren Reed bpf_clear_timeout(d); 14350a0e9771SDarren Reed /* 14360a0e9771SDarren Reed * Only allow the timeout to be set once. 14370a0e9771SDarren Reed */ 14380a0e9771SDarren Reed if (d->bd_callout == 0) 14390a0e9771SDarren Reed d->bd_callout = timeout(bpf_timed_out, 14400a0e9771SDarren Reed d, d->bd_rtout); 14410a0e9771SDarren Reed d->bd_state = BPF_WAITING; 14420a0e9771SDarren Reed } 14430a0e9771SDarren Reed } 14440a0e9771SDarren Reed mutex_exit(&d->bd_lock); 14450a0e9771SDarren Reed } 14460a0e9771SDarren Reed 14470a0e9771SDarren Reed return (0); 14480a0e9771SDarren Reed } 14490a0e9771SDarren Reed 14500a0e9771SDarren Reed /* 14510a0e9771SDarren Reed * Copy data from an mblk_t chain into a buffer. This works for ipnet 14520a0e9771SDarren Reed * because the dl_ipnetinfo_t is placed in an mblk_t that leads the 14530a0e9771SDarren Reed * packet itself. 14540a0e9771SDarren Reed */ 14550a0e9771SDarren Reed static void * 14560a0e9771SDarren Reed bpf_mcpy(void *dst_arg, const void *src_arg, size_t len) 14570a0e9771SDarren Reed { 14580a0e9771SDarren Reed const mblk_t *m; 14590a0e9771SDarren Reed uint_t count; 14600a0e9771SDarren Reed uchar_t *dst; 14610a0e9771SDarren Reed 14620a0e9771SDarren Reed m = src_arg; 14630a0e9771SDarren Reed dst = dst_arg; 14640a0e9771SDarren Reed while (len > 0) { 14650a0e9771SDarren Reed if (m == NULL) 14660a0e9771SDarren Reed panic("bpf_mcpy"); 14670a0e9771SDarren Reed count = (uint_t)min(M_LEN(m), len); 14680a0e9771SDarren Reed (void) memcpy(dst, mtod(m, const void *), count); 14690a0e9771SDarren Reed m = m->b_cont; 14700a0e9771SDarren Reed dst += count; 14710a0e9771SDarren Reed len -= count; 14720a0e9771SDarren Reed } 14730a0e9771SDarren Reed return (dst_arg); 14740a0e9771SDarren Reed } 14750a0e9771SDarren Reed 14760a0e9771SDarren Reed /* 14770a0e9771SDarren Reed * Dispatch a packet to all the listeners on interface bp. 14780a0e9771SDarren Reed * 14790a0e9771SDarren Reed * marg pointer to the packet, either a data buffer or an mbuf chain 14800a0e9771SDarren Reed * buflen buffer length, if marg is a data buffer 14810a0e9771SDarren Reed * cpfn a function that can copy marg into the listener's buffer 14820a0e9771SDarren Reed * pktlen length of the packet 14830a0e9771SDarren Reed * issent boolean indicating whether the packet was sent or receive 14840a0e9771SDarren Reed */ 14850a0e9771SDarren Reed static inline void 14860a0e9771SDarren Reed bpf_deliver(struct bpf_d *d, cp_fn_t cpfn, void *marg, uint_t pktlen, 14870a0e9771SDarren Reed uint_t buflen, boolean_t issent) 14880a0e9771SDarren Reed { 14890a0e9771SDarren Reed struct timeval tv; 14900a0e9771SDarren Reed uint_t slen; 14910a0e9771SDarren Reed 14920a0e9771SDarren Reed if (!d->bd_seesent && issent) 14930a0e9771SDarren Reed return; 14940a0e9771SDarren Reed 14950a0e9771SDarren Reed /* 14960a0e9771SDarren Reed * Accuracy of the packet counters in BPF is vital so it 14970a0e9771SDarren Reed * is important to protect even the outer ones. 14980a0e9771SDarren Reed */ 14990a0e9771SDarren Reed mutex_enter(&d->bd_lock); 15000a0e9771SDarren Reed slen = bpf_filter(d->bd_filter, marg, pktlen, buflen); 15010a0e9771SDarren Reed DTRACE_PROBE5(bpf__packet, struct bpf_if *, d->bd_bif, 15020a0e9771SDarren Reed struct bpf_d *, d, void *, marg, uint_t, pktlen, uint_t, slen); 15030a0e9771SDarren Reed d->bd_rcount++; 15040a0e9771SDarren Reed ks_stats.kp_receive.value.ui64++; 15050a0e9771SDarren Reed if (slen != 0) { 15060a0e9771SDarren Reed uniqtime(&tv); 15070a0e9771SDarren Reed catchpacket(d, marg, pktlen, slen, cpfn, &tv); 15080a0e9771SDarren Reed } 15090a0e9771SDarren Reed mutex_exit(&d->bd_lock); 15100a0e9771SDarren Reed } 15110a0e9771SDarren Reed 15120a0e9771SDarren Reed /* 15130a0e9771SDarren Reed * Incoming linkage from device drivers. 15140a0e9771SDarren Reed */ 15150a0e9771SDarren Reed /* ARGSUSED */ 15160a0e9771SDarren Reed void 15170a0e9771SDarren Reed bpf_mtap(void *arg, mac_resource_handle_t mrh, mblk_t *m, boolean_t issent) 15180a0e9771SDarren Reed { 15190a0e9771SDarren Reed cp_fn_t cpfn; 15200a0e9771SDarren Reed struct bpf_d *d = arg; 15210a0e9771SDarren Reed uint_t pktlen, buflen; 15220a0e9771SDarren Reed void *marg; 15230a0e9771SDarren Reed 15240a0e9771SDarren Reed pktlen = msgdsize(m); 15250a0e9771SDarren Reed 15260a0e9771SDarren Reed if (pktlen == M_LEN(m)) { 15270a0e9771SDarren Reed cpfn = (cp_fn_t)memcpy; 15280a0e9771SDarren Reed marg = mtod(m, void *); 15290a0e9771SDarren Reed buflen = pktlen; 15300a0e9771SDarren Reed } else { 15310a0e9771SDarren Reed cpfn = bpf_mcpy; 15320a0e9771SDarren Reed marg = m; 15330a0e9771SDarren Reed buflen = 0; 15340a0e9771SDarren Reed } 15350a0e9771SDarren Reed 15360a0e9771SDarren Reed bpf_deliver(d, cpfn, marg, pktlen, buflen, issent); 15370a0e9771SDarren Reed } 15380a0e9771SDarren Reed 15390a0e9771SDarren Reed /* 15400a0e9771SDarren Reed * Incoming linkage from ipnet. 15410a0e9771SDarren Reed * In ipnet, there is only one event, NH_OBSERVE, that delivers packets 15420a0e9771SDarren Reed * from all network interfaces. Thus the tap function needs to apply a 15430a0e9771SDarren Reed * filter using the interface index/id to immitate snoop'ing on just the 15440a0e9771SDarren Reed * specified interface. 15450a0e9771SDarren Reed */ 15460a0e9771SDarren Reed /* ARGSUSED */ 15470a0e9771SDarren Reed void 15480a0e9771SDarren Reed bpf_itap(void *arg, mblk_t *m, boolean_t issent, uint_t length) 15490a0e9771SDarren Reed { 15500a0e9771SDarren Reed hook_pkt_observe_t *hdr; 15510a0e9771SDarren Reed struct bpf_d *d = arg; 15520a0e9771SDarren Reed 15530a0e9771SDarren Reed hdr = (hook_pkt_observe_t *)m->b_rptr; 1554b7ea883bSDarren Reed if (ntohl(hdr->hpo_ifindex) != d->bd_linkid) 15550a0e9771SDarren Reed return; 15560a0e9771SDarren Reed bpf_deliver(d, bpf_mcpy, m, length, 0, issent); 15570a0e9771SDarren Reed 15580a0e9771SDarren Reed } 15590a0e9771SDarren Reed 15600a0e9771SDarren Reed /* 15610a0e9771SDarren Reed * Move the packet data from interface memory (pkt) into the 15620a0e9771SDarren Reed * store buffer. Return 1 if it's time to wakeup a listener (buffer full), 15630a0e9771SDarren Reed * otherwise 0. "copy" is the routine called to do the actual data 15640a0e9771SDarren Reed * transfer. memcpy is passed in to copy contiguous chunks, while 15650a0e9771SDarren Reed * bpf_mcpy is passed in to copy mbuf chains. In the latter case, 15660a0e9771SDarren Reed * pkt is really an mbuf. 15670a0e9771SDarren Reed */ 15680a0e9771SDarren Reed static void 15690a0e9771SDarren Reed catchpacket(struct bpf_d *d, uchar_t *pkt, uint_t pktlen, uint_t snaplen, 15700a0e9771SDarren Reed cp_fn_t cpfn, struct timeval *tv) 15710a0e9771SDarren Reed { 15720a0e9771SDarren Reed struct bpf_hdr *hp; 15730a0e9771SDarren Reed int totlen, curlen; 1574b7ea883bSDarren Reed int hdrlen = d->bd_hdrlen; 15750a0e9771SDarren Reed int do_wakeup = 0; 15760a0e9771SDarren Reed 15770a0e9771SDarren Reed ++d->bd_ccount; 15780a0e9771SDarren Reed ks_stats.kp_capture.value.ui64++; 15790a0e9771SDarren Reed /* 15800a0e9771SDarren Reed * Figure out how many bytes to move. If the packet is 15810a0e9771SDarren Reed * greater or equal to the snapshot length, transfer that 15820a0e9771SDarren Reed * much. Otherwise, transfer the whole packet (unless 15830a0e9771SDarren Reed * we hit the buffer size limit). 15840a0e9771SDarren Reed */ 15850a0e9771SDarren Reed totlen = hdrlen + min(snaplen, pktlen); 15860a0e9771SDarren Reed if (totlen > d->bd_bufsize) 15870a0e9771SDarren Reed totlen = d->bd_bufsize; 15880a0e9771SDarren Reed 15890a0e9771SDarren Reed /* 15900a0e9771SDarren Reed * Round up the end of the previous packet to the next longword. 15910a0e9771SDarren Reed */ 15920a0e9771SDarren Reed curlen = BPF_WORDALIGN(d->bd_slen); 15930a0e9771SDarren Reed if (curlen + totlen > d->bd_bufsize) { 15940a0e9771SDarren Reed /* 15950a0e9771SDarren Reed * This packet will overflow the storage buffer. 15960a0e9771SDarren Reed * Rotate the buffers if we can, then wakeup any 15970a0e9771SDarren Reed * pending reads. 15980a0e9771SDarren Reed */ 15990a0e9771SDarren Reed if (d->bd_fbuf == 0) { 16000a0e9771SDarren Reed /* 16010a0e9771SDarren Reed * We haven't completed the previous read yet, 16020a0e9771SDarren Reed * so drop the packet. 16030a0e9771SDarren Reed */ 16040a0e9771SDarren Reed ++d->bd_dcount; 16050a0e9771SDarren Reed ks_stats.kp_dropped.value.ui64++; 16060a0e9771SDarren Reed return; 16070a0e9771SDarren Reed } 16080a0e9771SDarren Reed ROTATE_BUFFERS(d); 16090a0e9771SDarren Reed do_wakeup = 1; 16100a0e9771SDarren Reed curlen = 0; 16110a0e9771SDarren Reed } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 16120a0e9771SDarren Reed /* 16130a0e9771SDarren Reed * Immediate mode is set, or the read timeout has 16140a0e9771SDarren Reed * already expired during a select call. A packet 16150a0e9771SDarren Reed * arrived, so the reader should be woken up. 16160a0e9771SDarren Reed */ 16170a0e9771SDarren Reed do_wakeup = 1; 16180a0e9771SDarren Reed } 16190a0e9771SDarren Reed 16200a0e9771SDarren Reed /* 16210a0e9771SDarren Reed * Append the bpf header to the existing buffer before we add 16220a0e9771SDarren Reed * on the actual packet data. 16230a0e9771SDarren Reed */ 16240a0e9771SDarren Reed hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen); 16250a0e9771SDarren Reed hp->bh_tstamp.tv_sec = tv->tv_sec; 16260a0e9771SDarren Reed hp->bh_tstamp.tv_usec = tv->tv_usec; 16270a0e9771SDarren Reed hp->bh_datalen = pktlen; 16280a0e9771SDarren Reed hp->bh_hdrlen = (uint16_t)hdrlen; 16290a0e9771SDarren Reed /* 16300a0e9771SDarren Reed * Copy the packet data into the store buffer and update its length. 16310a0e9771SDarren Reed */ 16320a0e9771SDarren Reed (*cpfn)((uchar_t *)hp + hdrlen, pkt, 16330a0e9771SDarren Reed (hp->bh_caplen = totlen - hdrlen)); 16340a0e9771SDarren Reed d->bd_slen = curlen + totlen; 16350a0e9771SDarren Reed 16360a0e9771SDarren Reed /* 16370a0e9771SDarren Reed * Call bpf_wakeup after bd_slen has been updated. 16380a0e9771SDarren Reed */ 16390a0e9771SDarren Reed if (do_wakeup) 16400a0e9771SDarren Reed bpf_wakeup(d); 16410a0e9771SDarren Reed } 16420a0e9771SDarren Reed 16430a0e9771SDarren Reed /* 16440a0e9771SDarren Reed * Initialize all nonzero fields of a descriptor. 16450a0e9771SDarren Reed */ 16460a0e9771SDarren Reed static int 16470a0e9771SDarren Reed bpf_allocbufs(struct bpf_d *d) 16480a0e9771SDarren Reed { 16490a0e9771SDarren Reed 16500a0e9771SDarren Reed d->bd_fbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP); 16510a0e9771SDarren Reed if (!d->bd_fbuf) 16520a0e9771SDarren Reed return (ENOBUFS); 16530a0e9771SDarren Reed d->bd_sbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP); 16540a0e9771SDarren Reed if (!d->bd_sbuf) { 16550a0e9771SDarren Reed kmem_free(d->bd_fbuf, d->bd_bufsize); 16560a0e9771SDarren Reed return (ENOBUFS); 16570a0e9771SDarren Reed } 16580a0e9771SDarren Reed d->bd_slen = 0; 16590a0e9771SDarren Reed d->bd_hlen = 0; 16600a0e9771SDarren Reed return (0); 16610a0e9771SDarren Reed } 16620a0e9771SDarren Reed 16630a0e9771SDarren Reed /* 16640a0e9771SDarren Reed * Free buffers currently in use by a descriptor. 16650a0e9771SDarren Reed * Called on close. 16660a0e9771SDarren Reed */ 16670a0e9771SDarren Reed static void 16680a0e9771SDarren Reed bpf_freed(struct bpf_d *d) 16690a0e9771SDarren Reed { 16700a0e9771SDarren Reed /* 16710a0e9771SDarren Reed * At this point the descriptor has been detached from its 16720a0e9771SDarren Reed * interface and it yet hasn't been marked free. 16730a0e9771SDarren Reed */ 16740a0e9771SDarren Reed if (d->bd_sbuf != 0) { 16750a0e9771SDarren Reed kmem_free(d->bd_sbuf, d->bd_bufsize); 16760a0e9771SDarren Reed if (d->bd_hbuf != 0) 16770a0e9771SDarren Reed kmem_free(d->bd_hbuf, d->bd_bufsize); 16780a0e9771SDarren Reed if (d->bd_fbuf != 0) 16790a0e9771SDarren Reed kmem_free(d->bd_fbuf, d->bd_bufsize); 16800a0e9771SDarren Reed } 16810a0e9771SDarren Reed if (d->bd_filter) 16820a0e9771SDarren Reed kmem_free(d->bd_filter, d->bd_filter_size); 16830a0e9771SDarren Reed } 16840a0e9771SDarren Reed 16850a0e9771SDarren Reed /* 16860a0e9771SDarren Reed * Get a list of available data link type of the interface. 16870a0e9771SDarren Reed */ 16880a0e9771SDarren Reed static int 16890a0e9771SDarren Reed bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *listp) 16900a0e9771SDarren Reed { 1691b7ea883bSDarren Reed bpf_provider_list_t *bp; 1692b7ea883bSDarren Reed bpf_provider_t *bpr; 1693b7ea883bSDarren Reed zoneid_t zoneid; 1694b7ea883bSDarren Reed uintptr_t mcip; 1695b7ea883bSDarren Reed uint_t nicdlt; 1696b7ea883bSDarren Reed uintptr_t mh; 1697b7ea883bSDarren Reed int error; 1698b7ea883bSDarren Reed int n; 16990a0e9771SDarren Reed 17000a0e9771SDarren Reed n = 0; 1701b7ea883bSDarren Reed mh = 0; 1702b7ea883bSDarren Reed mcip = 0; 17030a0e9771SDarren Reed error = 0; 1704b7ea883bSDarren Reed mutex_enter(&d->bd_lock); 1705b7ea883bSDarren Reed LIST_FOREACH(bp, &bpf_providers, bpl_next) { 1706b7ea883bSDarren Reed bpr = bp->bpl_what; 1707b7ea883bSDarren Reed error = MBPF_OPEN(bpr, d->bd_ifname, &mh, d->bd_zone); 1708b7ea883bSDarren Reed if (error != 0) 1709b7ea883bSDarren Reed goto next; 1710b7ea883bSDarren Reed error = MBPF_CLIENT_OPEN(bpr, mh, &mcip); 1711b7ea883bSDarren Reed if (error != 0) 1712b7ea883bSDarren Reed goto next; 1713b7ea883bSDarren Reed error = MBPF_GET_ZONE(bpr, mh, &zoneid); 1714b7ea883bSDarren Reed if (error != 0) 1715b7ea883bSDarren Reed goto next; 17160a0e9771SDarren Reed if (d->bd_zone != GLOBAL_ZONEID && 1717b7ea883bSDarren Reed d->bd_zone != zoneid) 1718b7ea883bSDarren Reed goto next; 1719b7ea883bSDarren Reed error = MBPF_GET_DLT(bpr, mh, &nicdlt); 1720b7ea883bSDarren Reed if (error != 0) 1721b7ea883bSDarren Reed goto next; 1722b7ea883bSDarren Reed nicdlt = bpf_dl_to_dlt(nicdlt); 17230a0e9771SDarren Reed if (listp->bfl_list != NULL) { 1724b7ea883bSDarren Reed if (n >= listp->bfl_len) { 1725b7ea883bSDarren Reed MBPF_CLIENT_CLOSE(bpr, mcip); 1726b7ea883bSDarren Reed MBPF_CLOSE(bpr, mh); 1727b7ea883bSDarren Reed break; 1728b7ea883bSDarren Reed } 17290a0e9771SDarren Reed /* 1730b7ea883bSDarren Reed * Bumping of bd_inuse ensures the structure does not 17310a0e9771SDarren Reed * disappear while the copyout runs and allows the for 17320a0e9771SDarren Reed * loop to be continued. 17330a0e9771SDarren Reed */ 1734b7ea883bSDarren Reed d->bd_inuse++; 1735b7ea883bSDarren Reed mutex_exit(&d->bd_lock); 1736b7ea883bSDarren Reed if (copyout(&nicdlt, 17370a0e9771SDarren Reed listp->bfl_list + n, sizeof (uint_t)) != 0) 17380a0e9771SDarren Reed error = EFAULT; 1739b7ea883bSDarren Reed mutex_enter(&d->bd_lock); 1740b7ea883bSDarren Reed if (error != 0) 1741b7ea883bSDarren Reed break; 1742b7ea883bSDarren Reed d->bd_inuse--; 17430a0e9771SDarren Reed } 17440a0e9771SDarren Reed n++; 1745b7ea883bSDarren Reed next: 1746b7ea883bSDarren Reed if (mcip != 0) { 1747b7ea883bSDarren Reed MBPF_CLIENT_CLOSE(bpr, mcip); 1748b7ea883bSDarren Reed mcip = 0; 17490a0e9771SDarren Reed } 1750b7ea883bSDarren Reed if (mh != 0) { 1751b7ea883bSDarren Reed MBPF_CLOSE(bpr, mh); 1752b7ea883bSDarren Reed mh = 0; 1753b7ea883bSDarren Reed } 1754b7ea883bSDarren Reed } 1755b7ea883bSDarren Reed mutex_exit(&d->bd_lock); 1756b7ea883bSDarren Reed 1757b7ea883bSDarren Reed /* 1758b7ea883bSDarren Reed * It is quite possible that one or more provider to BPF may not 1759b7ea883bSDarren Reed * know about a link name whlist others do. In that case, so long 1760b7ea883bSDarren Reed * as we have one success, do not declare an error unless it was 1761b7ea883bSDarren Reed * an EFAULT as this indicates a problem that needs to be reported. 1762b7ea883bSDarren Reed */ 1763b7ea883bSDarren Reed if ((error != EFAULT) && (n > 0)) 1764b7ea883bSDarren Reed error = 0; 1765b7ea883bSDarren Reed 17660a0e9771SDarren Reed listp->bfl_len = n; 17670a0e9771SDarren Reed return (error); 17680a0e9771SDarren Reed } 17690a0e9771SDarren Reed 17700a0e9771SDarren Reed /* 17710a0e9771SDarren Reed * Set the data link type of a BPF instance. 17720a0e9771SDarren Reed */ 17730a0e9771SDarren Reed static int 17740a0e9771SDarren Reed bpf_setdlt(struct bpf_d *d, void *addr) 17750a0e9771SDarren Reed { 17760a0e9771SDarren Reed char ifname[LIFNAMSIZ+1]; 1777b7ea883bSDarren Reed zoneid_t niczone; 17780a0e9771SDarren Reed int error; 17790a0e9771SDarren Reed int dlt; 17800a0e9771SDarren Reed 17810a0e9771SDarren Reed if (copyin(addr, &dlt, sizeof (dlt)) != 0) 17820a0e9771SDarren Reed return (EFAULT); 1783b7ea883bSDarren Reed 17840a0e9771SDarren Reed mutex_enter(&d->bd_lock); 17850a0e9771SDarren Reed 17860a0e9771SDarren Reed if (d->bd_bif == 0) { /* Interface not set */ 17870a0e9771SDarren Reed mutex_exit(&d->bd_lock); 17880a0e9771SDarren Reed return (EINVAL); 17890a0e9771SDarren Reed } 1790b7ea883bSDarren Reed if (d->bd_dlt == dlt) { /* NULL-op */ 17910a0e9771SDarren Reed mutex_exit(&d->bd_lock); 17920a0e9771SDarren Reed return (0); 17930a0e9771SDarren Reed } 17940a0e9771SDarren Reed 1795b7ea883bSDarren Reed error = MBPF_GET_ZONE(&d->bd_mac, d->bd_bif, &niczone); 1796b7ea883bSDarren Reed if (error != 0) { 1797b7ea883bSDarren Reed mutex_exit(&d->bd_lock); 1798b7ea883bSDarren Reed return (error); 1799b7ea883bSDarren Reed } 1800b7ea883bSDarren Reed 18010a0e9771SDarren Reed /* 18020a0e9771SDarren Reed * See the matrix at the top of the file for the permissions table 18030a0e9771SDarren Reed * enforced by this driver. 18040a0e9771SDarren Reed */ 18050a0e9771SDarren Reed if ((d->bd_zone != GLOBAL_ZONEID) && (dlt != DLT_IPNET) && 1806b7ea883bSDarren Reed (niczone != d->bd_zone)) { 18070a0e9771SDarren Reed mutex_exit(&d->bd_lock); 18080a0e9771SDarren Reed return (EINVAL); 18090a0e9771SDarren Reed } 18100a0e9771SDarren Reed 1811b7ea883bSDarren Reed (void) strlcpy(ifname, d->bd_ifname, sizeof (ifname)); 1812b7ea883bSDarren Reed d->bd_inuse = -1; 18130a0e9771SDarren Reed bpf_detachd(d); 1814b7ea883bSDarren Reed error = bpf_attachd(d, ifname, dlt); 18150a0e9771SDarren Reed reset_d(d); 1816b7ea883bSDarren Reed d->bd_inuse = 0; 18170a0e9771SDarren Reed 18180a0e9771SDarren Reed mutex_exit(&d->bd_lock); 18190a0e9771SDarren Reed return (error); 18200a0e9771SDarren Reed } 18210a0e9771SDarren Reed 18220a0e9771SDarren Reed /* 18230a0e9771SDarren Reed * bpf_clear_timeout is called with the bd_lock mutex held, providing it 18240a0e9771SDarren Reed * with the necessary protection to retrieve and modify bd_callout but it 18250a0e9771SDarren Reed * does not hold the lock for its entire duration... see below... 18260a0e9771SDarren Reed */ 18270a0e9771SDarren Reed static void 18280a0e9771SDarren Reed bpf_clear_timeout(struct bpf_d *d) 18290a0e9771SDarren Reed { 18300a0e9771SDarren Reed timeout_id_t tid = d->bd_callout; 18310a0e9771SDarren Reed d->bd_callout = 0; 18320a0e9771SDarren Reed d->bd_inuse++; 18330a0e9771SDarren Reed 18340a0e9771SDarren Reed /* 18350a0e9771SDarren Reed * If the timeout has fired and is waiting on bd_lock, we could 18360a0e9771SDarren Reed * deadlock here because untimeout if bd_lock is held and would 18370a0e9771SDarren Reed * wait for bpf_timed_out to finish and it never would. 18380a0e9771SDarren Reed */ 18390a0e9771SDarren Reed if (tid != 0) { 18400a0e9771SDarren Reed mutex_exit(&d->bd_lock); 18410a0e9771SDarren Reed (void) untimeout(tid); 18420a0e9771SDarren Reed mutex_enter(&d->bd_lock); 18430a0e9771SDarren Reed } 18440a0e9771SDarren Reed 18450a0e9771SDarren Reed d->bd_inuse--; 18460a0e9771SDarren Reed } 18470a0e9771SDarren Reed 18480a0e9771SDarren Reed /* 18490a0e9771SDarren Reed * As a cloning device driver, BPF needs to keep track of which device 18500a0e9771SDarren Reed * numbers are in use and which ones are not. A hash table, indexed by 18510a0e9771SDarren Reed * the minor device number, is used to store the pointers to the 18520a0e9771SDarren Reed * individual descriptors that are allocated in bpfopen(). 18530a0e9771SDarren Reed * The functions below present the interface for that hash table to 18540a0e9771SDarren Reed * the rest of the driver. 18550a0e9771SDarren Reed */ 18560a0e9771SDarren Reed static struct bpf_d * 18570a0e9771SDarren Reed bpf_dev_find(minor_t minor) 18580a0e9771SDarren Reed { 18590a0e9771SDarren Reed struct bpf_d *d = NULL; 18600a0e9771SDarren Reed 18610a0e9771SDarren Reed (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor, 18620a0e9771SDarren Reed (mod_hash_val_t *)&d); 18630a0e9771SDarren Reed 18640a0e9771SDarren Reed return (d); 18650a0e9771SDarren Reed } 18660a0e9771SDarren Reed 18670a0e9771SDarren Reed static void 18680a0e9771SDarren Reed bpf_dev_add(struct bpf_d *d) 18690a0e9771SDarren Reed { 18700a0e9771SDarren Reed (void) mod_hash_insert(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev, 18710a0e9771SDarren Reed (mod_hash_val_t)d); 18720a0e9771SDarren Reed } 18730a0e9771SDarren Reed 18740a0e9771SDarren Reed static void 18750a0e9771SDarren Reed bpf_dev_remove(struct bpf_d *d) 18760a0e9771SDarren Reed { 18770a0e9771SDarren Reed struct bpf_d *stor; 18780a0e9771SDarren Reed 18790a0e9771SDarren Reed (void) mod_hash_remove(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev, 18800a0e9771SDarren Reed (mod_hash_val_t *)&stor); 18810a0e9771SDarren Reed ASSERT(stor == d); 18820a0e9771SDarren Reed } 18830a0e9771SDarren Reed 18840a0e9771SDarren Reed /* 18850a0e9771SDarren Reed * bpf_def_get should only ever be called for a minor number that exists, 18860a0e9771SDarren Reed * thus there should always be a pointer in the hash table that corresponds 18870a0e9771SDarren Reed * to it. 18880a0e9771SDarren Reed */ 18890a0e9771SDarren Reed static struct bpf_d * 18900a0e9771SDarren Reed bpf_dev_get(minor_t minor) 18910a0e9771SDarren Reed { 18920a0e9771SDarren Reed struct bpf_d *d = NULL; 18930a0e9771SDarren Reed 18940a0e9771SDarren Reed (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor, 18950a0e9771SDarren Reed (mod_hash_val_t *)&d); 18960a0e9771SDarren Reed ASSERT(d != NULL); 18970a0e9771SDarren Reed 18980a0e9771SDarren Reed return (d); 18990a0e9771SDarren Reed } 1900