1c0c77d8fSBjörn Töpel // SPDX-License-Identifier: GPL-2.0 2c0c77d8fSBjörn Töpel /* XDP sockets 3c0c77d8fSBjörn Töpel * 4c0c77d8fSBjörn Töpel * AF_XDP sockets allows a channel between XDP programs and userspace 5c0c77d8fSBjörn Töpel * applications. 6c0c77d8fSBjörn Töpel * Copyright(c) 2018 Intel Corporation. 7c0c77d8fSBjörn Töpel * 8c0c77d8fSBjörn Töpel * Author(s): Björn Töpel <bjorn.topel@intel.com> 9c0c77d8fSBjörn Töpel * Magnus Karlsson <magnus.karlsson@intel.com> 10c0c77d8fSBjörn Töpel */ 11c0c77d8fSBjörn Töpel 12c0c77d8fSBjörn Töpel #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__ 13c0c77d8fSBjörn Töpel 14c0c77d8fSBjörn Töpel #include <linux/if_xdp.h> 15c0c77d8fSBjörn Töpel #include <linux/init.h> 16c0c77d8fSBjörn Töpel #include <linux/sched/mm.h> 17c0c77d8fSBjörn Töpel #include <linux/sched/signal.h> 18c0c77d8fSBjörn Töpel #include <linux/sched/task.h> 19c0c77d8fSBjörn Töpel #include <linux/socket.h> 20c0c77d8fSBjörn Töpel #include <linux/file.h> 21c0c77d8fSBjörn Töpel #include <linux/uaccess.h> 22c0c77d8fSBjörn Töpel #include <linux/net.h> 23c0c77d8fSBjörn Töpel #include <linux/netdevice.h> 24ac98d8aaSMagnus Karlsson #include <linux/rculist.h> 25951bce29SXuan Zhuo #include <linux/vmalloc.h> 26a71506a4SMagnus Karlsson #include <net/xdp_sock_drv.h> 27a0731952SBjörn Töpel #include <net/busy_poll.h> 28b9b6b68eSBjörn Töpel #include <net/xdp.h> 29c0c77d8fSBjörn Töpel 30423f3832SMagnus Karlsson #include "xsk_queue.h" 31c0c77d8fSBjörn Töpel #include "xdp_umem.h" 32a36b38aaSBjörn Töpel #include "xsk.h" 33c0c77d8fSBjörn Töpel 34e7a1c130SLi RongQing #define TX_BATCH_SIZE 32 3535fcde7fSMagnus Karlsson 36e312b9e7SBjörn Töpel static DEFINE_PER_CPU(struct list_head, xskmap_flush_list); 37e312b9e7SBjörn Töpel 38c4655761SMagnus Karlsson void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) 3977cd0d7bSMagnus Karlsson { 40c2d3d6a4SMagnus Karlsson if (pool->cached_need_wakeup & XDP_WAKEUP_RX) 4177cd0d7bSMagnus Karlsson return; 4277cd0d7bSMagnus Karlsson 437361f9c3SMagnus Karlsson pool->fq->ring->flags |= XDP_RING_NEED_WAKEUP; 44c2d3d6a4SMagnus Karlsson pool->cached_need_wakeup |= XDP_WAKEUP_RX; 4577cd0d7bSMagnus Karlsson } 4677cd0d7bSMagnus Karlsson EXPORT_SYMBOL(xsk_set_rx_need_wakeup); 4777cd0d7bSMagnus Karlsson 48c4655761SMagnus Karlsson void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool) 4977cd0d7bSMagnus Karlsson { 5077cd0d7bSMagnus Karlsson struct xdp_sock *xs; 5177cd0d7bSMagnus Karlsson 52c2d3d6a4SMagnus Karlsson if (pool->cached_need_wakeup & XDP_WAKEUP_TX) 5377cd0d7bSMagnus Karlsson return; 5477cd0d7bSMagnus Karlsson 5577cd0d7bSMagnus Karlsson rcu_read_lock(); 56a5aa8e52SMagnus Karlsson list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { 5777cd0d7bSMagnus Karlsson xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; 5877cd0d7bSMagnus Karlsson } 5977cd0d7bSMagnus Karlsson rcu_read_unlock(); 6077cd0d7bSMagnus Karlsson 61c2d3d6a4SMagnus Karlsson pool->cached_need_wakeup |= XDP_WAKEUP_TX; 6277cd0d7bSMagnus Karlsson } 6377cd0d7bSMagnus Karlsson EXPORT_SYMBOL(xsk_set_tx_need_wakeup); 6477cd0d7bSMagnus Karlsson 65c4655761SMagnus Karlsson void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool) 6677cd0d7bSMagnus Karlsson { 67c2d3d6a4SMagnus Karlsson if (!(pool->cached_need_wakeup & XDP_WAKEUP_RX)) 6877cd0d7bSMagnus Karlsson return; 6977cd0d7bSMagnus Karlsson 707361f9c3SMagnus Karlsson pool->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP; 71c2d3d6a4SMagnus Karlsson pool->cached_need_wakeup &= ~XDP_WAKEUP_RX; 7277cd0d7bSMagnus Karlsson } 7377cd0d7bSMagnus Karlsson EXPORT_SYMBOL(xsk_clear_rx_need_wakeup); 7477cd0d7bSMagnus Karlsson 75c4655761SMagnus Karlsson void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool) 7677cd0d7bSMagnus Karlsson { 7777cd0d7bSMagnus Karlsson struct xdp_sock *xs; 7877cd0d7bSMagnus Karlsson 79c2d3d6a4SMagnus Karlsson if (!(pool->cached_need_wakeup & XDP_WAKEUP_TX)) 8077cd0d7bSMagnus Karlsson return; 8177cd0d7bSMagnus Karlsson 8277cd0d7bSMagnus Karlsson rcu_read_lock(); 83a5aa8e52SMagnus Karlsson list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { 8477cd0d7bSMagnus Karlsson xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP; 8577cd0d7bSMagnus Karlsson } 8677cd0d7bSMagnus Karlsson rcu_read_unlock(); 8777cd0d7bSMagnus Karlsson 88c2d3d6a4SMagnus Karlsson pool->cached_need_wakeup &= ~XDP_WAKEUP_TX; 8977cd0d7bSMagnus Karlsson } 9077cd0d7bSMagnus Karlsson EXPORT_SYMBOL(xsk_clear_tx_need_wakeup); 9177cd0d7bSMagnus Karlsson 92c4655761SMagnus Karlsson bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool) 9377cd0d7bSMagnus Karlsson { 94c2d3d6a4SMagnus Karlsson return pool->uses_need_wakeup; 9577cd0d7bSMagnus Karlsson } 96c4655761SMagnus Karlsson EXPORT_SYMBOL(xsk_uses_need_wakeup); 9777cd0d7bSMagnus Karlsson 981c1efc2aSMagnus Karlsson struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, 991c1efc2aSMagnus Karlsson u16 queue_id) 1001c1efc2aSMagnus Karlsson { 1011c1efc2aSMagnus Karlsson if (queue_id < dev->real_num_rx_queues) 1021c1efc2aSMagnus Karlsson return dev->_rx[queue_id].pool; 1031c1efc2aSMagnus Karlsson if (queue_id < dev->real_num_tx_queues) 1041c1efc2aSMagnus Karlsson return dev->_tx[queue_id].pool; 1051c1efc2aSMagnus Karlsson 1061c1efc2aSMagnus Karlsson return NULL; 1071c1efc2aSMagnus Karlsson } 1081c1efc2aSMagnus Karlsson EXPORT_SYMBOL(xsk_get_pool_from_qid); 1091c1efc2aSMagnus Karlsson 1101c1efc2aSMagnus Karlsson void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) 1111c1efc2aSMagnus Karlsson { 112b425e24aSMaxim Mikityanskiy if (queue_id < dev->num_rx_queues) 1131c1efc2aSMagnus Karlsson dev->_rx[queue_id].pool = NULL; 114b425e24aSMaxim Mikityanskiy if (queue_id < dev->num_tx_queues) 1151c1efc2aSMagnus Karlsson dev->_tx[queue_id].pool = NULL; 1161c1efc2aSMagnus Karlsson } 1171c1efc2aSMagnus Karlsson 1181c1efc2aSMagnus Karlsson /* The buffer pool is stored both in the _rx struct and the _tx struct as we do 1191c1efc2aSMagnus Karlsson * not know if the device has more tx queues than rx, or the opposite. 1201c1efc2aSMagnus Karlsson * This might also change during run time. 1211c1efc2aSMagnus Karlsson */ 1221c1efc2aSMagnus Karlsson int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, 1231c1efc2aSMagnus Karlsson u16 queue_id) 1241c1efc2aSMagnus Karlsson { 1251c1efc2aSMagnus Karlsson if (queue_id >= max_t(unsigned int, 1261c1efc2aSMagnus Karlsson dev->real_num_rx_queues, 1271c1efc2aSMagnus Karlsson dev->real_num_tx_queues)) 1281c1efc2aSMagnus Karlsson return -EINVAL; 1291c1efc2aSMagnus Karlsson 1301c1efc2aSMagnus Karlsson if (queue_id < dev->real_num_rx_queues) 1311c1efc2aSMagnus Karlsson dev->_rx[queue_id].pool = pool; 1321c1efc2aSMagnus Karlsson if (queue_id < dev->real_num_tx_queues) 1331c1efc2aSMagnus Karlsson dev->_tx[queue_id].pool = pool; 1341c1efc2aSMagnus Karlsson 1351c1efc2aSMagnus Karlsson return 0; 1361c1efc2aSMagnus Karlsson } 1371c1efc2aSMagnus Karlsson 1382b43470aSBjörn Töpel static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) 139c05cd364SKevin Laatz { 1402b43470aSBjörn Töpel struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); 1412b43470aSBjörn Töpel u64 addr; 1422b43470aSBjörn Töpel int err; 143c05cd364SKevin Laatz 1442b43470aSBjörn Töpel addr = xp_get_handle(xskb); 1452b43470aSBjörn Töpel err = xskq_prod_reserve_desc(xs->rx, addr, len); 1462b43470aSBjörn Töpel if (err) { 1478aa5a335SCiara Loftus xs->rx_queue_full++; 1482b43470aSBjörn Töpel return err; 1492b43470aSBjörn Töpel } 150c05cd364SKevin Laatz 1512b43470aSBjörn Töpel xp_release(xskb); 1522b43470aSBjörn Töpel return 0; 1532b43470aSBjörn Töpel } 154c05cd364SKevin Laatz 1552b43470aSBjörn Töpel static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len) 1562b43470aSBjörn Töpel { 1572b43470aSBjörn Töpel void *from_buf, *to_buf; 1582b43470aSBjörn Töpel u32 metalen; 1592b43470aSBjörn Töpel 1602b43470aSBjörn Töpel if (unlikely(xdp_data_meta_unsupported(from))) { 1612b43470aSBjörn Töpel from_buf = from->data; 1622b43470aSBjörn Töpel to_buf = to->data; 1632b43470aSBjörn Töpel metalen = 0; 1642b43470aSBjörn Töpel } else { 1652b43470aSBjörn Töpel from_buf = from->data_meta; 1662b43470aSBjörn Töpel metalen = from->data - from->data_meta; 1672b43470aSBjörn Töpel to_buf = to->data - metalen; 168c05cd364SKevin Laatz } 169c05cd364SKevin Laatz 170c05cd364SKevin Laatz memcpy(to_buf, from_buf, len + metalen); 171c05cd364SKevin Laatz } 172c05cd364SKevin Laatz 173458f7272SBjörn Töpel static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) 174173d3adbSBjörn Töpel { 1752b43470aSBjörn Töpel struct xdp_buff *xsk_xdp; 1764e64c835SBjörn Töpel int err; 177458f7272SBjörn Töpel u32 len; 178c497176cSBjörn Töpel 179458f7272SBjörn Töpel len = xdp->data_end - xdp->data; 180c4655761SMagnus Karlsson if (len > xsk_pool_get_rx_frame_size(xs->pool)) { 181a509a955SBjörn Töpel xs->rx_dropped++; 182c497176cSBjörn Töpel return -ENOSPC; 183a509a955SBjörn Töpel } 184c497176cSBjörn Töpel 185c4655761SMagnus Karlsson xsk_xdp = xsk_buff_alloc(xs->pool); 1862b43470aSBjörn Töpel if (!xsk_xdp) { 1872b43470aSBjörn Töpel xs->rx_dropped++; 188c6c1f11bSBjörn Töpel return -ENOMEM; 18918baed26SBjörn Töpel } 19018baed26SBjörn Töpel 1912b43470aSBjörn Töpel xsk_copy_xdp(xsk_xdp, xdp, len); 1922b43470aSBjörn Töpel err = __xsk_rcv_zc(xs, xsk_xdp, len); 1932b43470aSBjörn Töpel if (err) { 1942b43470aSBjörn Töpel xsk_buff_free(xsk_xdp); 1952b43470aSBjörn Töpel return err; 1962b43470aSBjörn Töpel } 197173d3adbSBjörn Töpel return 0; 198173d3adbSBjörn Töpel } 199173d3adbSBjörn Töpel 2003413f041SXuan Zhuo static bool xsk_tx_writeable(struct xdp_sock *xs) 2013413f041SXuan Zhuo { 2023413f041SXuan Zhuo if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2) 2033413f041SXuan Zhuo return false; 2043413f041SXuan Zhuo 2053413f041SXuan Zhuo return true; 2063413f041SXuan Zhuo } 2073413f041SXuan Zhuo 20842fddcc7SBjörn Töpel static bool xsk_is_bound(struct xdp_sock *xs) 20942fddcc7SBjörn Töpel { 21042fddcc7SBjörn Töpel if (READ_ONCE(xs->state) == XSK_BOUND) { 21142fddcc7SBjörn Töpel /* Matches smp_wmb() in bind(). */ 21242fddcc7SBjörn Töpel smp_rmb(); 21342fddcc7SBjörn Töpel return true; 21442fddcc7SBjörn Töpel } 21542fddcc7SBjörn Töpel return false; 21642fddcc7SBjörn Töpel } 21742fddcc7SBjörn Töpel 218458f7272SBjörn Töpel static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp) 219c497176cSBjörn Töpel { 22042fddcc7SBjörn Töpel if (!xsk_is_bound(xs)) 2212be4a677SMaciej Fijalkowski return -ENXIO; 22242fddcc7SBjörn Töpel 223173d3adbSBjörn Töpel if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) 224173d3adbSBjörn Töpel return -EINVAL; 225c497176cSBjörn Töpel 226b02e5a0eSBjörn Töpel sk_mark_napi_id_once_xdp(&xs->sk, xdp); 227458f7272SBjörn Töpel return 0; 228c497176cSBjörn Töpel } 229c497176cSBjörn Töpel 230d817991cSBjörn Töpel static void xsk_flush(struct xdp_sock *xs) 231c497176cSBjörn Töpel { 23259e35e55SMagnus Karlsson xskq_prod_submit(xs->rx); 2337361f9c3SMagnus Karlsson __xskq_cons_release(xs->pool->fq); 23443a825afSBjörn Töpel sock_def_readable(&xs->sk); 235c497176cSBjörn Töpel } 236c497176cSBjörn Töpel 237c497176cSBjörn Töpel int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) 238c497176cSBjörn Töpel { 239c497176cSBjörn Töpel int err; 240c497176cSBjörn Töpel 241bf0bdd13SIlya Maximets spin_lock_bh(&xs->rx_lock); 242458f7272SBjörn Töpel err = xsk_rcv_check(xs, xdp); 243458f7272SBjörn Töpel if (!err) { 244458f7272SBjörn Töpel err = __xsk_rcv(xs, xdp); 2452b43470aSBjörn Töpel xsk_flush(xs); 246458f7272SBjörn Töpel } 247bf0bdd13SIlya Maximets spin_unlock_bh(&xs->rx_lock); 248c497176cSBjörn Töpel return err; 249c497176cSBjörn Töpel } 250c497176cSBjörn Töpel 251458f7272SBjörn Töpel static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) 252458f7272SBjörn Töpel { 253458f7272SBjörn Töpel int err; 254458f7272SBjörn Töpel u32 len; 255458f7272SBjörn Töpel 256458f7272SBjörn Töpel err = xsk_rcv_check(xs, xdp); 257458f7272SBjörn Töpel if (err) 258458f7272SBjörn Töpel return err; 259458f7272SBjörn Töpel 260458f7272SBjörn Töpel if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) { 261458f7272SBjörn Töpel len = xdp->data_end - xdp->data; 262458f7272SBjörn Töpel return __xsk_rcv_zc(xs, xdp, len); 263458f7272SBjörn Töpel } 264458f7272SBjörn Töpel 265458f7272SBjörn Töpel err = __xsk_rcv(xs, xdp); 266458f7272SBjörn Töpel if (!err) 267458f7272SBjörn Töpel xdp_return_buff(xdp); 268458f7272SBjörn Töpel return err; 269458f7272SBjörn Töpel } 270458f7272SBjörn Töpel 271e312b9e7SBjörn Töpel int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) 272d817991cSBjörn Töpel { 273e312b9e7SBjörn Töpel struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); 274d817991cSBjörn Töpel int err; 275d817991cSBjörn Töpel 276458f7272SBjörn Töpel err = xsk_rcv(xs, xdp); 277d817991cSBjörn Töpel if (err) 278d817991cSBjörn Töpel return err; 279d817991cSBjörn Töpel 280d817991cSBjörn Töpel if (!xs->flush_node.prev) 281d817991cSBjörn Töpel list_add(&xs->flush_node, flush_list); 282d817991cSBjörn Töpel 283d817991cSBjörn Töpel return 0; 284d817991cSBjörn Töpel } 285d817991cSBjörn Töpel 286e312b9e7SBjörn Töpel void __xsk_map_flush(void) 287d817991cSBjörn Töpel { 288e312b9e7SBjörn Töpel struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); 289d817991cSBjörn Töpel struct xdp_sock *xs, *tmp; 290d817991cSBjörn Töpel 291d817991cSBjörn Töpel list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { 292d817991cSBjörn Töpel xsk_flush(xs); 293d817991cSBjörn Töpel __list_del_clearprev(&xs->flush_node); 294d817991cSBjörn Töpel } 295d817991cSBjörn Töpel } 296d817991cSBjörn Töpel 297c4655761SMagnus Karlsson void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries) 298ac98d8aaSMagnus Karlsson { 2997361f9c3SMagnus Karlsson xskq_prod_submit_n(pool->cq, nb_entries); 300ac98d8aaSMagnus Karlsson } 301c4655761SMagnus Karlsson EXPORT_SYMBOL(xsk_tx_completed); 302ac98d8aaSMagnus Karlsson 303c4655761SMagnus Karlsson void xsk_tx_release(struct xsk_buff_pool *pool) 304ac98d8aaSMagnus Karlsson { 305ac98d8aaSMagnus Karlsson struct xdp_sock *xs; 306ac98d8aaSMagnus Karlsson 307ac98d8aaSMagnus Karlsson rcu_read_lock(); 308a5aa8e52SMagnus Karlsson list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { 30930744a68SMagnus Karlsson __xskq_cons_release(xs->tx); 3103413f041SXuan Zhuo if (xsk_tx_writeable(xs)) 311ac98d8aaSMagnus Karlsson xs->sk.sk_write_space(&xs->sk); 312ac98d8aaSMagnus Karlsson } 313ac98d8aaSMagnus Karlsson rcu_read_unlock(); 314ac98d8aaSMagnus Karlsson } 315c4655761SMagnus Karlsson EXPORT_SYMBOL(xsk_tx_release); 316ac98d8aaSMagnus Karlsson 317c4655761SMagnus Karlsson bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) 318ac98d8aaSMagnus Karlsson { 319ac98d8aaSMagnus Karlsson struct xdp_sock *xs; 320ac98d8aaSMagnus Karlsson 321ac98d8aaSMagnus Karlsson rcu_read_lock(); 322a5aa8e52SMagnus Karlsson list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { 3231c1efc2aSMagnus Karlsson if (!xskq_cons_peek_desc(xs->tx, desc, pool)) { 3248aa5a335SCiara Loftus xs->tx->queue_empty_descs++; 325ac98d8aaSMagnus Karlsson continue; 3268aa5a335SCiara Loftus } 327ac98d8aaSMagnus Karlsson 3280a05861fSTobias Klauser /* This is the backpressure mechanism for the Tx path. 32915d8c916SMagnus Karlsson * Reserve space in the completion queue and only proceed 33015d8c916SMagnus Karlsson * if there is space in it. This avoids having to implement 33115d8c916SMagnus Karlsson * any buffering in the Tx path. 33215d8c916SMagnus Karlsson */ 3337361f9c3SMagnus Karlsson if (xskq_prod_reserve_addr(pool->cq, desc->addr)) 334ac98d8aaSMagnus Karlsson goto out; 335ac98d8aaSMagnus Karlsson 336c5ed924bSMagnus Karlsson xskq_cons_release(xs->tx); 337ac98d8aaSMagnus Karlsson rcu_read_unlock(); 338ac98d8aaSMagnus Karlsson return true; 339ac98d8aaSMagnus Karlsson } 340ac98d8aaSMagnus Karlsson 341ac98d8aaSMagnus Karlsson out: 342ac98d8aaSMagnus Karlsson rcu_read_unlock(); 343ac98d8aaSMagnus Karlsson return false; 344ac98d8aaSMagnus Karlsson } 345c4655761SMagnus Karlsson EXPORT_SYMBOL(xsk_tx_peek_desc); 346ac98d8aaSMagnus Karlsson 347d1bc532eSMagnus Karlsson static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entries) 3489349eb3aSMagnus Karlsson { 349d1bc532eSMagnus Karlsson struct xdp_desc *descs = pool->tx_descs; 3509349eb3aSMagnus Karlsson u32 nb_pkts = 0; 3519349eb3aSMagnus Karlsson 3529349eb3aSMagnus Karlsson while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts])) 3539349eb3aSMagnus Karlsson nb_pkts++; 3549349eb3aSMagnus Karlsson 3559349eb3aSMagnus Karlsson xsk_tx_release(pool); 3569349eb3aSMagnus Karlsson return nb_pkts; 3579349eb3aSMagnus Karlsson } 3589349eb3aSMagnus Karlsson 359c00c4461SMaciej Fijalkowski u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 nb_pkts) 3609349eb3aSMagnus Karlsson { 3619349eb3aSMagnus Karlsson struct xdp_sock *xs; 3629349eb3aSMagnus Karlsson 3639349eb3aSMagnus Karlsson rcu_read_lock(); 3649349eb3aSMagnus Karlsson if (!list_is_singular(&pool->xsk_tx_list)) { 3659349eb3aSMagnus Karlsson /* Fallback to the non-batched version */ 3669349eb3aSMagnus Karlsson rcu_read_unlock(); 367c00c4461SMaciej Fijalkowski return xsk_tx_peek_release_fallback(pool, nb_pkts); 3689349eb3aSMagnus Karlsson } 3699349eb3aSMagnus Karlsson 3709349eb3aSMagnus Karlsson xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list); 3719349eb3aSMagnus Karlsson if (!xs) { 3729349eb3aSMagnus Karlsson nb_pkts = 0; 3739349eb3aSMagnus Karlsson goto out; 3749349eb3aSMagnus Karlsson } 3759349eb3aSMagnus Karlsson 376c00c4461SMaciej Fijalkowski nb_pkts = xskq_cons_nb_entries(xs->tx, nb_pkts); 3779349eb3aSMagnus Karlsson 3789349eb3aSMagnus Karlsson /* This is the backpressure mechanism for the Tx path. Try to 3799349eb3aSMagnus Karlsson * reserve space in the completion queue for all packets, but 3809349eb3aSMagnus Karlsson * if there are fewer slots available, just process that many 3819349eb3aSMagnus Karlsson * packets. This avoids having to implement any buffering in 3829349eb3aSMagnus Karlsson * the Tx path. 3839349eb3aSMagnus Karlsson */ 384c00c4461SMaciej Fijalkowski nb_pkts = xskq_prod_nb_free(pool->cq, nb_pkts); 3859349eb3aSMagnus Karlsson if (!nb_pkts) 3869349eb3aSMagnus Karlsson goto out; 3879349eb3aSMagnus Karlsson 388c00c4461SMaciej Fijalkowski nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, nb_pkts); 389c00c4461SMaciej Fijalkowski if (!nb_pkts) { 390c00c4461SMaciej Fijalkowski xs->tx->queue_empty_descs++; 391c00c4461SMaciej Fijalkowski goto out; 392c00c4461SMaciej Fijalkowski } 393c00c4461SMaciej Fijalkowski 3949349eb3aSMagnus Karlsson __xskq_cons_release(xs->tx); 395c00c4461SMaciej Fijalkowski xskq_prod_write_addr_batch(pool->cq, pool->tx_descs, nb_pkts); 3969349eb3aSMagnus Karlsson xs->sk.sk_write_space(&xs->sk); 3979349eb3aSMagnus Karlsson 3989349eb3aSMagnus Karlsson out: 3999349eb3aSMagnus Karlsson rcu_read_unlock(); 4009349eb3aSMagnus Karlsson return nb_pkts; 4019349eb3aSMagnus Karlsson } 4029349eb3aSMagnus Karlsson EXPORT_SYMBOL(xsk_tx_peek_release_desc_batch); 4039349eb3aSMagnus Karlsson 40406870682SMaxim Mikityanskiy static int xsk_wakeup(struct xdp_sock *xs, u8 flags) 405ac98d8aaSMagnus Karlsson { 406ac98d8aaSMagnus Karlsson struct net_device *dev = xs->dev; 407ac98d8aaSMagnus Karlsson 40818b1ab7aSMagnus Karlsson return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); 409ac98d8aaSMagnus Karlsson } 410ac98d8aaSMagnus Karlsson 41135fcde7fSMagnus Karlsson static void xsk_destruct_skb(struct sk_buff *skb) 41235fcde7fSMagnus Karlsson { 413bbff2f32SBjörn Töpel u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg; 41435fcde7fSMagnus Karlsson struct xdp_sock *xs = xdp_sk(skb->sk); 415a9744f7cSMagnus Karlsson unsigned long flags; 41635fcde7fSMagnus Karlsson 417f09ced40SMagnus Karlsson spin_lock_irqsave(&xs->pool->cq_lock, flags); 4187361f9c3SMagnus Karlsson xskq_prod_submit_addr(xs->pool->cq, addr); 419f09ced40SMagnus Karlsson spin_unlock_irqrestore(&xs->pool->cq_lock, flags); 42035fcde7fSMagnus Karlsson 42135fcde7fSMagnus Karlsson sock_wfree(skb); 42235fcde7fSMagnus Karlsson } 42335fcde7fSMagnus Karlsson 4249c8f21e6SXuan Zhuo static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, 4259c8f21e6SXuan Zhuo struct xdp_desc *desc) 4269c8f21e6SXuan Zhuo { 4279c8f21e6SXuan Zhuo struct xsk_buff_pool *pool = xs->pool; 4289c8f21e6SXuan Zhuo u32 hr, len, ts, offset, copy, copied; 4299c8f21e6SXuan Zhuo struct sk_buff *skb; 4309c8f21e6SXuan Zhuo struct page *page; 4319c8f21e6SXuan Zhuo void *buffer; 4329c8f21e6SXuan Zhuo int err, i; 4339c8f21e6SXuan Zhuo u64 addr; 4349c8f21e6SXuan Zhuo 4359c8f21e6SXuan Zhuo hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom)); 4369c8f21e6SXuan Zhuo 4379c8f21e6SXuan Zhuo skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err); 4389c8f21e6SXuan Zhuo if (unlikely(!skb)) 4399c8f21e6SXuan Zhuo return ERR_PTR(err); 4409c8f21e6SXuan Zhuo 4419c8f21e6SXuan Zhuo skb_reserve(skb, hr); 4429c8f21e6SXuan Zhuo 4439c8f21e6SXuan Zhuo addr = desc->addr; 4449c8f21e6SXuan Zhuo len = desc->len; 4459c8f21e6SXuan Zhuo ts = pool->unaligned ? len : pool->chunk_size; 4469c8f21e6SXuan Zhuo 4479c8f21e6SXuan Zhuo buffer = xsk_buff_raw_get_data(pool, addr); 4489c8f21e6SXuan Zhuo offset = offset_in_page(buffer); 4499c8f21e6SXuan Zhuo addr = buffer - pool->addrs; 4509c8f21e6SXuan Zhuo 4519c8f21e6SXuan Zhuo for (copied = 0, i = 0; copied < len; i++) { 4529c8f21e6SXuan Zhuo page = pool->umem->pgs[addr >> PAGE_SHIFT]; 4539c8f21e6SXuan Zhuo get_page(page); 4549c8f21e6SXuan Zhuo 4559c8f21e6SXuan Zhuo copy = min_t(u32, PAGE_SIZE - offset, len - copied); 4569c8f21e6SXuan Zhuo skb_fill_page_desc(skb, i, page, offset, copy); 4579c8f21e6SXuan Zhuo 4589c8f21e6SXuan Zhuo copied += copy; 4599c8f21e6SXuan Zhuo addr += copy; 4609c8f21e6SXuan Zhuo offset = 0; 4619c8f21e6SXuan Zhuo } 4629c8f21e6SXuan Zhuo 4639c8f21e6SXuan Zhuo skb->len += len; 4649c8f21e6SXuan Zhuo skb->data_len += len; 4659c8f21e6SXuan Zhuo skb->truesize += ts; 4669c8f21e6SXuan Zhuo 4679c8f21e6SXuan Zhuo refcount_add(ts, &xs->sk.sk_wmem_alloc); 4689c8f21e6SXuan Zhuo 4699c8f21e6SXuan Zhuo return skb; 4709c8f21e6SXuan Zhuo } 4719c8f21e6SXuan Zhuo 4729c8f21e6SXuan Zhuo static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, 4739c8f21e6SXuan Zhuo struct xdp_desc *desc) 4749c8f21e6SXuan Zhuo { 4759c8f21e6SXuan Zhuo struct net_device *dev = xs->dev; 4769c8f21e6SXuan Zhuo struct sk_buff *skb; 4779c8f21e6SXuan Zhuo 4789c8f21e6SXuan Zhuo if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) { 4799c8f21e6SXuan Zhuo skb = xsk_build_skb_zerocopy(xs, desc); 4809c8f21e6SXuan Zhuo if (IS_ERR(skb)) 4819c8f21e6SXuan Zhuo return skb; 4829c8f21e6SXuan Zhuo } else { 4839c8f21e6SXuan Zhuo u32 hr, tr, len; 4849c8f21e6SXuan Zhuo void *buffer; 4859c8f21e6SXuan Zhuo int err; 4869c8f21e6SXuan Zhuo 4879c8f21e6SXuan Zhuo hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom)); 4889c8f21e6SXuan Zhuo tr = dev->needed_tailroom; 4899c8f21e6SXuan Zhuo len = desc->len; 4909c8f21e6SXuan Zhuo 4919c8f21e6SXuan Zhuo skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err); 4929c8f21e6SXuan Zhuo if (unlikely(!skb)) 4939c8f21e6SXuan Zhuo return ERR_PTR(err); 4949c8f21e6SXuan Zhuo 4959c8f21e6SXuan Zhuo skb_reserve(skb, hr); 4969c8f21e6SXuan Zhuo skb_put(skb, len); 4979c8f21e6SXuan Zhuo 4989c8f21e6SXuan Zhuo buffer = xsk_buff_raw_get_data(xs->pool, desc->addr); 4999c8f21e6SXuan Zhuo err = skb_store_bits(skb, 0, buffer, len); 5009c8f21e6SXuan Zhuo if (unlikely(err)) { 5019c8f21e6SXuan Zhuo kfree_skb(skb); 5029c8f21e6SXuan Zhuo return ERR_PTR(err); 5039c8f21e6SXuan Zhuo } 5049c8f21e6SXuan Zhuo } 5059c8f21e6SXuan Zhuo 5069c8f21e6SXuan Zhuo skb->dev = dev; 5079c8f21e6SXuan Zhuo skb->priority = xs->sk.sk_priority; 5089c8f21e6SXuan Zhuo skb->mark = xs->sk.sk_mark; 5099c8f21e6SXuan Zhuo skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr; 5109c8f21e6SXuan Zhuo skb->destructor = xsk_destruct_skb; 5119c8f21e6SXuan Zhuo 5129c8f21e6SXuan Zhuo return skb; 5139c8f21e6SXuan Zhuo } 5149c8f21e6SXuan Zhuo 5151596dae2SMaciej Fijalkowski static int __xsk_generic_xmit(struct sock *sk) 51635fcde7fSMagnus Karlsson { 51735fcde7fSMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 518df551058SMagnus Karlsson u32 max_batch = TX_BATCH_SIZE; 51935fcde7fSMagnus Karlsson bool sent_frame = false; 52035fcde7fSMagnus Karlsson struct xdp_desc desc; 52135fcde7fSMagnus Karlsson struct sk_buff *skb; 522f09ced40SMagnus Karlsson unsigned long flags; 52335fcde7fSMagnus Karlsson int err = 0; 52435fcde7fSMagnus Karlsson 52535fcde7fSMagnus Karlsson mutex_lock(&xs->mutex); 52635fcde7fSMagnus Karlsson 52718b1ab7aSMagnus Karlsson /* Since we dropped the RCU read lock, the socket state might have changed. */ 52818b1ab7aSMagnus Karlsson if (unlikely(!xsk_is_bound(xs))) { 52918b1ab7aSMagnus Karlsson err = -ENXIO; 53018b1ab7aSMagnus Karlsson goto out; 53118b1ab7aSMagnus Karlsson } 53218b1ab7aSMagnus Karlsson 53367571640SIlya Maximets if (xs->queue_id >= xs->dev->real_num_tx_queues) 53467571640SIlya Maximets goto out; 53567571640SIlya Maximets 5361c1efc2aSMagnus Karlsson while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) { 53735fcde7fSMagnus Karlsson if (max_batch-- == 0) { 53835fcde7fSMagnus Karlsson err = -EAGAIN; 53935fcde7fSMagnus Karlsson goto out; 54035fcde7fSMagnus Karlsson } 54135fcde7fSMagnus Karlsson 5420a05861fSTobias Klauser /* This is the backpressure mechanism for the Tx path. 54315d8c916SMagnus Karlsson * Reserve space in the completion queue and only proceed 54415d8c916SMagnus Karlsson * if there is space in it. This avoids having to implement 54515d8c916SMagnus Karlsson * any buffering in the Tx path. 54615d8c916SMagnus Karlsson */ 547f09ced40SMagnus Karlsson spin_lock_irqsave(&xs->pool->cq_lock, flags); 5489c8f21e6SXuan Zhuo if (xskq_prod_reserve(xs->pool->cq)) { 549f09ced40SMagnus Karlsson spin_unlock_irqrestore(&xs->pool->cq_lock, flags); 55035fcde7fSMagnus Karlsson goto out; 55135fcde7fSMagnus Karlsson } 552f09ced40SMagnus Karlsson spin_unlock_irqrestore(&xs->pool->cq_lock, flags); 55335fcde7fSMagnus Karlsson 554a6e944f2SCiara Loftus skb = xsk_build_skb(xs, &desc); 555a6e944f2SCiara Loftus if (IS_ERR(skb)) { 556a6e944f2SCiara Loftus err = PTR_ERR(skb); 557a6e944f2SCiara Loftus spin_lock_irqsave(&xs->pool->cq_lock, flags); 558a6e944f2SCiara Loftus xskq_prod_cancel(xs->pool->cq); 559a6e944f2SCiara Loftus spin_unlock_irqrestore(&xs->pool->cq_lock, flags); 560a6e944f2SCiara Loftus goto out; 561a6e944f2SCiara Loftus } 562a6e944f2SCiara Loftus 56336ccdf85SBjörn Töpel err = __dev_direct_xmit(skb, xs->queue_id); 564642e450bSMagnus Karlsson if (err == NETDEV_TX_BUSY) { 565642e450bSMagnus Karlsson /* Tell user-space to retry the send */ 566642e450bSMagnus Karlsson skb->destructor = sock_wfree; 567b1b95cb5SMagnus Karlsson spin_lock_irqsave(&xs->pool->cq_lock, flags); 568b1b95cb5SMagnus Karlsson xskq_prod_cancel(xs->pool->cq); 569b1b95cb5SMagnus Karlsson spin_unlock_irqrestore(&xs->pool->cq_lock, flags); 570642e450bSMagnus Karlsson /* Free skb without triggering the perf drop trace */ 571642e450bSMagnus Karlsson consume_skb(skb); 572642e450bSMagnus Karlsson err = -EAGAIN; 573642e450bSMagnus Karlsson goto out; 574642e450bSMagnus Karlsson } 575642e450bSMagnus Karlsson 576c5ed924bSMagnus Karlsson xskq_cons_release(xs->tx); 57735fcde7fSMagnus Karlsson /* Ignore NET_XMIT_CN as packet might have been sent */ 578642e450bSMagnus Karlsson if (err == NET_XMIT_DROP) { 579fe588685SMagnus Karlsson /* SKB completed but not sent */ 580fe588685SMagnus Karlsson err = -EBUSY; 58135fcde7fSMagnus Karlsson goto out; 58235fcde7fSMagnus Karlsson } 58335fcde7fSMagnus Karlsson 58435fcde7fSMagnus Karlsson sent_frame = true; 58535fcde7fSMagnus Karlsson } 58635fcde7fSMagnus Karlsson 5878aa5a335SCiara Loftus xs->tx->queue_empty_descs++; 5888aa5a335SCiara Loftus 58935fcde7fSMagnus Karlsson out: 59035fcde7fSMagnus Karlsson if (sent_frame) 5913413f041SXuan Zhuo if (xsk_tx_writeable(xs)) 59235fcde7fSMagnus Karlsson sk->sk_write_space(sk); 59335fcde7fSMagnus Karlsson 59435fcde7fSMagnus Karlsson mutex_unlock(&xs->mutex); 59535fcde7fSMagnus Karlsson return err; 59635fcde7fSMagnus Karlsson } 59735fcde7fSMagnus Karlsson 5981596dae2SMaciej Fijalkowski static int xsk_generic_xmit(struct sock *sk) 599df551058SMagnus Karlsson { 60018b1ab7aSMagnus Karlsson int ret; 601df551058SMagnus Karlsson 60218b1ab7aSMagnus Karlsson /* Drop the RCU lock since the SKB path might sleep. */ 60318b1ab7aSMagnus Karlsson rcu_read_unlock(); 6041596dae2SMaciej Fijalkowski ret = __xsk_generic_xmit(sk); 60518b1ab7aSMagnus Karlsson /* Reaquire RCU lock before going into common code. */ 60618b1ab7aSMagnus Karlsson rcu_read_lock(); 60718b1ab7aSMagnus Karlsson 60818b1ab7aSMagnus Karlsson return ret; 609df551058SMagnus Karlsson } 610df551058SMagnus Karlsson 611a0731952SBjörn Töpel static bool xsk_no_wakeup(struct sock *sk) 612a0731952SBjörn Töpel { 613a0731952SBjörn Töpel #ifdef CONFIG_NET_RX_BUSY_POLL 614a0731952SBjörn Töpel /* Prefer busy-polling, skip the wakeup. */ 615a0731952SBjörn Töpel return READ_ONCE(sk->sk_prefer_busy_poll) && READ_ONCE(sk->sk_ll_usec) && 616a0731952SBjörn Töpel READ_ONCE(sk->sk_napi_id) >= MIN_NAPI_ID; 617a0731952SBjörn Töpel #else 618a0731952SBjörn Töpel return false; 619a0731952SBjörn Töpel #endif 620a0731952SBjörn Töpel } 621a0731952SBjörn Töpel 6221596dae2SMaciej Fijalkowski static int xsk_check_common(struct xdp_sock *xs) 6231596dae2SMaciej Fijalkowski { 6241596dae2SMaciej Fijalkowski if (unlikely(!xsk_is_bound(xs))) 6251596dae2SMaciej Fijalkowski return -ENXIO; 6261596dae2SMaciej Fijalkowski if (unlikely(!(xs->dev->flags & IFF_UP))) 6271596dae2SMaciej Fijalkowski return -ENETDOWN; 6281596dae2SMaciej Fijalkowski 6291596dae2SMaciej Fijalkowski return 0; 6301596dae2SMaciej Fijalkowski } 6311596dae2SMaciej Fijalkowski 63218b1ab7aSMagnus Karlsson static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) 63335fcde7fSMagnus Karlsson { 634ac98d8aaSMagnus Karlsson bool need_wait = !(m->msg_flags & MSG_DONTWAIT); 63535fcde7fSMagnus Karlsson struct sock *sk = sock->sk; 63635fcde7fSMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 637e3920818SBjörn Töpel struct xsk_buff_pool *pool; 6381596dae2SMaciej Fijalkowski int err; 63935fcde7fSMagnus Karlsson 6401596dae2SMaciej Fijalkowski err = xsk_check_common(xs); 6411596dae2SMaciej Fijalkowski if (err) 6421596dae2SMaciej Fijalkowski return err; 643df551058SMagnus Karlsson if (unlikely(need_wait)) 644ac98d8aaSMagnus Karlsson return -EOPNOTSUPP; 6451596dae2SMaciej Fijalkowski if (unlikely(!xs->tx)) 6461596dae2SMaciej Fijalkowski return -ENOBUFS; 64735fcde7fSMagnus Karlsson 648ca2e1a62SMaciej Fijalkowski if (sk_can_busy_loop(sk)) { 649ca2e1a62SMaciej Fijalkowski if (xs->zc) 650ca2e1a62SMaciej Fijalkowski __sk_mark_napi_id_once(sk, xsk_pool_get_napi_id(xs->pool)); 651a0731952SBjörn Töpel sk_busy_loop(sk, 1); /* only support non-blocking sockets */ 652ca2e1a62SMaciej Fijalkowski } 653a0731952SBjörn Töpel 6548de8b71bSMaciej Fijalkowski if (xs->zc && xsk_no_wakeup(sk)) 655a0731952SBjörn Töpel return 0; 656a0731952SBjörn Töpel 657e3920818SBjörn Töpel pool = xs->pool; 6581596dae2SMaciej Fijalkowski if (pool->cached_need_wakeup & XDP_WAKEUP_TX) { 6591596dae2SMaciej Fijalkowski if (xs->zc) 6601596dae2SMaciej Fijalkowski return xsk_wakeup(xs, XDP_WAKEUP_TX); 6611596dae2SMaciej Fijalkowski return xsk_generic_xmit(sk); 6621596dae2SMaciej Fijalkowski } 663e3920818SBjörn Töpel return 0; 66435fcde7fSMagnus Karlsson } 66535fcde7fSMagnus Karlsson 66618b1ab7aSMagnus Karlsson static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) 66718b1ab7aSMagnus Karlsson { 66818b1ab7aSMagnus Karlsson int ret; 66918b1ab7aSMagnus Karlsson 67018b1ab7aSMagnus Karlsson rcu_read_lock(); 67118b1ab7aSMagnus Karlsson ret = __xsk_sendmsg(sock, m, total_len); 67218b1ab7aSMagnus Karlsson rcu_read_unlock(); 67318b1ab7aSMagnus Karlsson 67418b1ab7aSMagnus Karlsson return ret; 67518b1ab7aSMagnus Karlsson } 67618b1ab7aSMagnus Karlsson 67718b1ab7aSMagnus Karlsson static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) 67845a86681SBjörn Töpel { 67945a86681SBjörn Töpel bool need_wait = !(flags & MSG_DONTWAIT); 68045a86681SBjörn Töpel struct sock *sk = sock->sk; 68145a86681SBjörn Töpel struct xdp_sock *xs = xdp_sk(sk); 6821596dae2SMaciej Fijalkowski int err; 68345a86681SBjörn Töpel 6841596dae2SMaciej Fijalkowski err = xsk_check_common(xs); 6851596dae2SMaciej Fijalkowski if (err) 6861596dae2SMaciej Fijalkowski return err; 68745a86681SBjörn Töpel if (unlikely(!xs->rx)) 68845a86681SBjörn Töpel return -ENOBUFS; 68945a86681SBjörn Töpel if (unlikely(need_wait)) 69045a86681SBjörn Töpel return -EOPNOTSUPP; 69145a86681SBjörn Töpel 692a0731952SBjörn Töpel if (sk_can_busy_loop(sk)) 693a0731952SBjörn Töpel sk_busy_loop(sk, 1); /* only support non-blocking sockets */ 694a0731952SBjörn Töpel 695a0731952SBjörn Töpel if (xsk_no_wakeup(sk)) 696a0731952SBjörn Töpel return 0; 697a0731952SBjörn Töpel 69845a86681SBjörn Töpel if (xs->pool->cached_need_wakeup & XDP_WAKEUP_RX && xs->zc) 69945a86681SBjörn Töpel return xsk_wakeup(xs, XDP_WAKEUP_RX); 70045a86681SBjörn Töpel return 0; 701c497176cSBjörn Töpel } 702c497176cSBjörn Töpel 70318b1ab7aSMagnus Karlsson static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) 70418b1ab7aSMagnus Karlsson { 70518b1ab7aSMagnus Karlsson int ret; 70618b1ab7aSMagnus Karlsson 70718b1ab7aSMagnus Karlsson rcu_read_lock(); 70818b1ab7aSMagnus Karlsson ret = __xsk_recvmsg(sock, m, len, flags); 70918b1ab7aSMagnus Karlsson rcu_read_unlock(); 71018b1ab7aSMagnus Karlsson 71118b1ab7aSMagnus Karlsson return ret; 71218b1ab7aSMagnus Karlsson } 71318b1ab7aSMagnus Karlsson 7145d946c5aSLuc Van Oostenryck static __poll_t xsk_poll(struct file *file, struct socket *sock, 715a11e1d43SLinus Torvalds struct poll_table_struct *wait) 716c497176cSBjörn Töpel { 717f5da5418SXuan Zhuo __poll_t mask = 0; 718df551058SMagnus Karlsson struct sock *sk = sock->sk; 719df551058SMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 720c2d3d6a4SMagnus Karlsson struct xsk_buff_pool *pool; 72142fddcc7SBjörn Töpel 7220706a78fSMagnus Karlsson sock_poll_wait(file, sock, wait); 7230706a78fSMagnus Karlsson 72418b1ab7aSMagnus Karlsson rcu_read_lock(); 7251596dae2SMaciej Fijalkowski if (xsk_check_common(xs)) 7261596dae2SMaciej Fijalkowski goto skip_tx; 72742fddcc7SBjörn Töpel 728c2d3d6a4SMagnus Karlsson pool = xs->pool; 72977cd0d7bSMagnus Karlsson 730c2d3d6a4SMagnus Karlsson if (pool->cached_need_wakeup) { 73106870682SMaxim Mikityanskiy if (xs->zc) 732c2d3d6a4SMagnus Karlsson xsk_wakeup(xs, pool->cached_need_wakeup); 7331596dae2SMaciej Fijalkowski else if (xs->tx) 734df551058SMagnus Karlsson /* Poll needs to drive Tx also in copy mode */ 7351596dae2SMaciej Fijalkowski xsk_generic_xmit(sk); 736df551058SMagnus Karlsson } 737c497176cSBjörn Töpel 7381596dae2SMaciej Fijalkowski skip_tx: 73959e35e55SMagnus Karlsson if (xs->rx && !xskq_prod_is_empty(xs->rx)) 7405d946c5aSLuc Van Oostenryck mask |= EPOLLIN | EPOLLRDNORM; 7413413f041SXuan Zhuo if (xs->tx && xsk_tx_writeable(xs)) 7425d946c5aSLuc Van Oostenryck mask |= EPOLLOUT | EPOLLWRNORM; 743c497176cSBjörn Töpel 74418b1ab7aSMagnus Karlsson rcu_read_unlock(); 745c497176cSBjörn Töpel return mask; 746c497176cSBjörn Töpel } 747c497176cSBjörn Töpel 748b9b6b68eSBjörn Töpel static int xsk_init_queue(u32 entries, struct xsk_queue **queue, 749b9b6b68eSBjörn Töpel bool umem_queue) 750423f3832SMagnus Karlsson { 751423f3832SMagnus Karlsson struct xsk_queue *q; 752423f3832SMagnus Karlsson 753423f3832SMagnus Karlsson if (entries == 0 || *queue || !is_power_of_2(entries)) 754423f3832SMagnus Karlsson return -EINVAL; 755423f3832SMagnus Karlsson 756b9b6b68eSBjörn Töpel q = xskq_create(entries, umem_queue); 757423f3832SMagnus Karlsson if (!q) 758423f3832SMagnus Karlsson return -ENOMEM; 759423f3832SMagnus Karlsson 76037b07693SBjörn Töpel /* Make sure queue is ready before it can be seen by others */ 76137b07693SBjörn Töpel smp_wmb(); 76294a99763SBjörn Töpel WRITE_ONCE(*queue, q); 763423f3832SMagnus Karlsson return 0; 764423f3832SMagnus Karlsson } 765423f3832SMagnus Karlsson 766455302d1SIlya Maximets static void xsk_unbind_dev(struct xdp_sock *xs) 767455302d1SIlya Maximets { 768455302d1SIlya Maximets struct net_device *dev = xs->dev; 769455302d1SIlya Maximets 77042fddcc7SBjörn Töpel if (xs->state != XSK_BOUND) 771455302d1SIlya Maximets return; 77242fddcc7SBjörn Töpel WRITE_ONCE(xs->state, XSK_UNBOUND); 773455302d1SIlya Maximets 774455302d1SIlya Maximets /* Wait for driver to stop using the xdp socket. */ 775a5aa8e52SMagnus Karlsson xp_del_xsk(xs->pool, xs); 776455302d1SIlya Maximets synchronize_net(); 777455302d1SIlya Maximets dev_put(dev); 778455302d1SIlya Maximets } 779455302d1SIlya Maximets 7800402acd6SBjörn Töpel static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs, 781782347b6SToke Høiland-Jørgensen struct xdp_sock __rcu ***map_entry) 7820402acd6SBjörn Töpel { 7830402acd6SBjörn Töpel struct xsk_map *map = NULL; 7840402acd6SBjörn Töpel struct xsk_map_node *node; 7850402acd6SBjörn Töpel 7860402acd6SBjörn Töpel *map_entry = NULL; 7870402acd6SBjörn Töpel 7880402acd6SBjörn Töpel spin_lock_bh(&xs->map_list_lock); 7890402acd6SBjörn Töpel node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node, 7900402acd6SBjörn Töpel node); 7910402acd6SBjörn Töpel if (node) { 792bb1b25caSZhu Yanjun bpf_map_inc(&node->map->map); 7930402acd6SBjörn Töpel map = node->map; 7940402acd6SBjörn Töpel *map_entry = node->map_entry; 7950402acd6SBjörn Töpel } 7960402acd6SBjörn Töpel spin_unlock_bh(&xs->map_list_lock); 7970402acd6SBjörn Töpel return map; 7980402acd6SBjörn Töpel } 7990402acd6SBjörn Töpel 8000402acd6SBjörn Töpel static void xsk_delete_from_maps(struct xdp_sock *xs) 8010402acd6SBjörn Töpel { 8020402acd6SBjörn Töpel /* This function removes the current XDP socket from all the 8030402acd6SBjörn Töpel * maps it resides in. We need to take extra care here, due to 8040402acd6SBjörn Töpel * the two locks involved. Each map has a lock synchronizing 8050402acd6SBjörn Töpel * updates to the entries, and each socket has a lock that 8060402acd6SBjörn Töpel * synchronizes access to the list of maps (map_list). For 8070402acd6SBjörn Töpel * deadlock avoidance the locks need to be taken in the order 8080402acd6SBjörn Töpel * "map lock"->"socket map list lock". We start off by 8090402acd6SBjörn Töpel * accessing the socket map list, and take a reference to the 8100402acd6SBjörn Töpel * map to guarantee existence between the 8110402acd6SBjörn Töpel * xsk_get_map_list_entry() and xsk_map_try_sock_delete() 8120402acd6SBjörn Töpel * calls. Then we ask the map to remove the socket, which 8130402acd6SBjörn Töpel * tries to remove the socket from the map. Note that there 8140402acd6SBjörn Töpel * might be updates to the map between 8150402acd6SBjörn Töpel * xsk_get_map_list_entry() and xsk_map_try_sock_delete(). 8160402acd6SBjörn Töpel */ 817782347b6SToke Høiland-Jørgensen struct xdp_sock __rcu **map_entry = NULL; 8180402acd6SBjörn Töpel struct xsk_map *map; 8190402acd6SBjörn Töpel 8200402acd6SBjörn Töpel while ((map = xsk_get_map_list_entry(xs, &map_entry))) { 8210402acd6SBjörn Töpel xsk_map_try_sock_delete(map, xs, map_entry); 822bb1b25caSZhu Yanjun bpf_map_put(&map->map); 8230402acd6SBjörn Töpel } 8240402acd6SBjörn Töpel } 8250402acd6SBjörn Töpel 826c0c77d8fSBjörn Töpel static int xsk_release(struct socket *sock) 827c0c77d8fSBjörn Töpel { 828c0c77d8fSBjörn Töpel struct sock *sk = sock->sk; 829965a9909SMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 830c0c77d8fSBjörn Töpel struct net *net; 831c0c77d8fSBjörn Töpel 832c0c77d8fSBjörn Töpel if (!sk) 833c0c77d8fSBjörn Töpel return 0; 834c0c77d8fSBjörn Töpel 835c0c77d8fSBjörn Töpel net = sock_net(sk); 836c0c77d8fSBjörn Töpel 8371d0dc069SBjörn Töpel mutex_lock(&net->xdp.lock); 8381d0dc069SBjörn Töpel sk_del_node_init_rcu(sk); 8391d0dc069SBjörn Töpel mutex_unlock(&net->xdp.lock); 8401d0dc069SBjörn Töpel 841c0c77d8fSBjörn Töpel sock_prot_inuse_add(net, sk->sk_prot, -1); 842c0c77d8fSBjörn Töpel 8430402acd6SBjörn Töpel xsk_delete_from_maps(xs); 84442fddcc7SBjörn Töpel mutex_lock(&xs->mutex); 845455302d1SIlya Maximets xsk_unbind_dev(xs); 84642fddcc7SBjörn Töpel mutex_unlock(&xs->mutex); 847965a9909SMagnus Karlsson 848541d7fddSBjörn Töpel xskq_destroy(xs->rx); 849541d7fddSBjörn Töpel xskq_destroy(xs->tx); 8507361f9c3SMagnus Karlsson xskq_destroy(xs->fq_tmp); 8517361f9c3SMagnus Karlsson xskq_destroy(xs->cq_tmp); 852541d7fddSBjörn Töpel 853c0c77d8fSBjörn Töpel sock_orphan(sk); 854c0c77d8fSBjörn Töpel sock->sk = NULL; 855c0c77d8fSBjörn Töpel 856c0c77d8fSBjörn Töpel sock_put(sk); 857c0c77d8fSBjörn Töpel 858c0c77d8fSBjörn Töpel return 0; 859c0c77d8fSBjörn Töpel } 860c0c77d8fSBjörn Töpel 861965a9909SMagnus Karlsson static struct socket *xsk_lookup_xsk_from_fd(int fd) 862965a9909SMagnus Karlsson { 863965a9909SMagnus Karlsson struct socket *sock; 864965a9909SMagnus Karlsson int err; 865965a9909SMagnus Karlsson 866965a9909SMagnus Karlsson sock = sockfd_lookup(fd, &err); 867965a9909SMagnus Karlsson if (!sock) 868965a9909SMagnus Karlsson return ERR_PTR(-ENOTSOCK); 869965a9909SMagnus Karlsson 870965a9909SMagnus Karlsson if (sock->sk->sk_family != PF_XDP) { 871965a9909SMagnus Karlsson sockfd_put(sock); 872965a9909SMagnus Karlsson return ERR_PTR(-ENOPROTOOPT); 873965a9909SMagnus Karlsson } 874965a9909SMagnus Karlsson 875965a9909SMagnus Karlsson return sock; 876965a9909SMagnus Karlsson } 877965a9909SMagnus Karlsson 8787361f9c3SMagnus Karlsson static bool xsk_validate_queues(struct xdp_sock *xs) 8797361f9c3SMagnus Karlsson { 8807361f9c3SMagnus Karlsson return xs->fq_tmp && xs->cq_tmp; 8817361f9c3SMagnus Karlsson } 8827361f9c3SMagnus Karlsson 883965a9909SMagnus Karlsson static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) 884965a9909SMagnus Karlsson { 885965a9909SMagnus Karlsson struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; 886965a9909SMagnus Karlsson struct sock *sk = sock->sk; 887965a9909SMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 888959b71dbSBjörn Töpel struct net_device *dev; 889*f7306aceSIlya Maximets int bound_dev_if; 890173d3adbSBjörn Töpel u32 flags, qid; 891965a9909SMagnus Karlsson int err = 0; 892965a9909SMagnus Karlsson 893965a9909SMagnus Karlsson if (addr_len < sizeof(struct sockaddr_xdp)) 894965a9909SMagnus Karlsson return -EINVAL; 895965a9909SMagnus Karlsson if (sxdp->sxdp_family != AF_XDP) 896965a9909SMagnus Karlsson return -EINVAL; 897965a9909SMagnus Karlsson 898f54ba391SBjörn Töpel flags = sxdp->sxdp_flags; 89977cd0d7bSMagnus Karlsson if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY | 90077cd0d7bSMagnus Karlsson XDP_USE_NEED_WAKEUP)) 901f54ba391SBjörn Töpel return -EINVAL; 902f54ba391SBjörn Töpel 903*f7306aceSIlya Maximets bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); 904*f7306aceSIlya Maximets if (bound_dev_if && bound_dev_if != sxdp->sxdp_ifindex) 905*f7306aceSIlya Maximets return -EINVAL; 906*f7306aceSIlya Maximets 9075464c3a0SIlya Maximets rtnl_lock(); 908965a9909SMagnus Karlsson mutex_lock(&xs->mutex); 909455302d1SIlya Maximets if (xs->state != XSK_READY) { 910959b71dbSBjörn Töpel err = -EBUSY; 911959b71dbSBjörn Töpel goto out_release; 912959b71dbSBjörn Töpel } 913959b71dbSBjörn Töpel 914965a9909SMagnus Karlsson dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex); 915965a9909SMagnus Karlsson if (!dev) { 916965a9909SMagnus Karlsson err = -ENODEV; 917965a9909SMagnus Karlsson goto out_release; 918965a9909SMagnus Karlsson } 919965a9909SMagnus Karlsson 920f6145903SMagnus Karlsson if (!xs->rx && !xs->tx) { 921965a9909SMagnus Karlsson err = -EINVAL; 922965a9909SMagnus Karlsson goto out_unlock; 923965a9909SMagnus Karlsson } 924965a9909SMagnus Karlsson 925173d3adbSBjörn Töpel qid = sxdp->sxdp_queue_id; 926173d3adbSBjörn Töpel 927173d3adbSBjörn Töpel if (flags & XDP_SHARED_UMEM) { 928965a9909SMagnus Karlsson struct xdp_sock *umem_xs; 929965a9909SMagnus Karlsson struct socket *sock; 930965a9909SMagnus Karlsson 93177cd0d7bSMagnus Karlsson if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) || 93277cd0d7bSMagnus Karlsson (flags & XDP_USE_NEED_WAKEUP)) { 933173d3adbSBjörn Töpel /* Cannot specify flags for shared sockets. */ 934173d3adbSBjörn Töpel err = -EINVAL; 935173d3adbSBjörn Töpel goto out_unlock; 936173d3adbSBjörn Töpel } 937173d3adbSBjörn Töpel 938965a9909SMagnus Karlsson if (xs->umem) { 939965a9909SMagnus Karlsson /* We have already our own. */ 940965a9909SMagnus Karlsson err = -EINVAL; 941965a9909SMagnus Karlsson goto out_unlock; 942965a9909SMagnus Karlsson } 943965a9909SMagnus Karlsson 944965a9909SMagnus Karlsson sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd); 945965a9909SMagnus Karlsson if (IS_ERR(sock)) { 946965a9909SMagnus Karlsson err = PTR_ERR(sock); 947965a9909SMagnus Karlsson goto out_unlock; 948965a9909SMagnus Karlsson } 949965a9909SMagnus Karlsson 950965a9909SMagnus Karlsson umem_xs = xdp_sk(sock->sk); 95142fddcc7SBjörn Töpel if (!xsk_is_bound(umem_xs)) { 952965a9909SMagnus Karlsson err = -EBADF; 953965a9909SMagnus Karlsson sockfd_put(sock); 954965a9909SMagnus Karlsson goto out_unlock; 95542fddcc7SBjörn Töpel } 956965a9909SMagnus Karlsson 957a1132430SMagnus Karlsson if (umem_xs->queue_id != qid || umem_xs->dev != dev) { 958a1132430SMagnus Karlsson /* Share the umem with another socket on another qid 959a1132430SMagnus Karlsson * and/or device. 960a1132430SMagnus Karlsson */ 961b5aea28dSMagnus Karlsson xs->pool = xp_create_and_assign_umem(xs, 962b5aea28dSMagnus Karlsson umem_xs->umem); 963b5aea28dSMagnus Karlsson if (!xs->pool) { 9641fd17c8cSMagnus Karlsson err = -ENOMEM; 965b5aea28dSMagnus Karlsson sockfd_put(sock); 966b5aea28dSMagnus Karlsson goto out_unlock; 967b5aea28dSMagnus Karlsson } 968b5aea28dSMagnus Karlsson 96960240bc2SJalal Mostafa err = xp_assign_dev_shared(xs->pool, umem_xs, dev, 97060240bc2SJalal Mostafa qid); 971b5aea28dSMagnus Karlsson if (err) { 972b5aea28dSMagnus Karlsson xp_destroy(xs->pool); 97383cf5c68SMagnus Karlsson xs->pool = NULL; 974b5aea28dSMagnus Karlsson sockfd_put(sock); 975b5aea28dSMagnus Karlsson goto out_unlock; 976b5aea28dSMagnus Karlsson } 977b5aea28dSMagnus Karlsson } else { 9781c1efc2aSMagnus Karlsson /* Share the buffer pool with the other socket. */ 979b5aea28dSMagnus Karlsson if (xs->fq_tmp || xs->cq_tmp) { 980b5aea28dSMagnus Karlsson /* Do not allow setting your own fq or cq. */ 981b5aea28dSMagnus Karlsson err = -EINVAL; 982b5aea28dSMagnus Karlsson sockfd_put(sock); 983b5aea28dSMagnus Karlsson goto out_unlock; 984b5aea28dSMagnus Karlsson } 985b5aea28dSMagnus Karlsson 9861c1efc2aSMagnus Karlsson xp_get_pool(umem_xs->pool); 9871c1efc2aSMagnus Karlsson xs->pool = umem_xs->pool; 988ba3beec2SMaciej Fijalkowski 989ba3beec2SMaciej Fijalkowski /* If underlying shared umem was created without Tx 990ba3beec2SMaciej Fijalkowski * ring, allocate Tx descs array that Tx batching API 991ba3beec2SMaciej Fijalkowski * utilizes 992ba3beec2SMaciej Fijalkowski */ 993ba3beec2SMaciej Fijalkowski if (xs->tx && !xs->pool->tx_descs) { 994ba3beec2SMaciej Fijalkowski err = xp_alloc_tx_descs(xs->pool, xs); 995ba3beec2SMaciej Fijalkowski if (err) { 996ba3beec2SMaciej Fijalkowski xp_put_pool(xs->pool); 997ba3beec2SMaciej Fijalkowski sockfd_put(sock); 998ba3beec2SMaciej Fijalkowski goto out_unlock; 999ba3beec2SMaciej Fijalkowski } 1000ba3beec2SMaciej Fijalkowski } 1001b5aea28dSMagnus Karlsson } 1002b5aea28dSMagnus Karlsson 1003965a9909SMagnus Karlsson xdp_get_umem(umem_xs->umem); 10049764f4b3SBjörn Töpel WRITE_ONCE(xs->umem, umem_xs->umem); 1005965a9909SMagnus Karlsson sockfd_put(sock); 10067361f9c3SMagnus Karlsson } else if (!xs->umem || !xsk_validate_queues(xs)) { 1007965a9909SMagnus Karlsson err = -EINVAL; 1008965a9909SMagnus Karlsson goto out_unlock; 1009c497176cSBjörn Töpel } else { 1010c497176cSBjörn Töpel /* This xsk has its own umem. */ 10111c1efc2aSMagnus Karlsson xs->pool = xp_create_and_assign_umem(xs, xs->umem); 10121c1efc2aSMagnus Karlsson if (!xs->pool) { 10131c1efc2aSMagnus Karlsson err = -ENOMEM; 1014173d3adbSBjörn Töpel goto out_unlock; 1015965a9909SMagnus Karlsson } 1016965a9909SMagnus Karlsson 10171c1efc2aSMagnus Karlsson err = xp_assign_dev(xs->pool, dev, qid, flags); 10181c1efc2aSMagnus Karlsson if (err) { 10191c1efc2aSMagnus Karlsson xp_destroy(xs->pool); 10201c1efc2aSMagnus Karlsson xs->pool = NULL; 10211c1efc2aSMagnus Karlsson goto out_unlock; 10221c1efc2aSMagnus Karlsson } 10231c1efc2aSMagnus Karlsson } 10241c1efc2aSMagnus Karlsson 10258bee6833SMagnus Karlsson /* FQ and CQ are now owned by the buffer pool and cleaned up with it. */ 10268bee6833SMagnus Karlsson xs->fq_tmp = NULL; 10278bee6833SMagnus Karlsson xs->cq_tmp = NULL; 10288bee6833SMagnus Karlsson 1029965a9909SMagnus Karlsson xs->dev = dev; 1030ac98d8aaSMagnus Karlsson xs->zc = xs->umem->zc; 1031ac98d8aaSMagnus Karlsson xs->queue_id = qid; 1032a5aa8e52SMagnus Karlsson xp_add_xsk(xs->pool, xs); 1033965a9909SMagnus Karlsson 1034965a9909SMagnus Karlsson out_unlock: 103542fddcc7SBjörn Töpel if (err) { 1036965a9909SMagnus Karlsson dev_put(dev); 103742fddcc7SBjörn Töpel } else { 103842fddcc7SBjörn Töpel /* Matches smp_rmb() in bind() for shared umem 103942fddcc7SBjörn Töpel * sockets, and xsk_is_bound(). 104042fddcc7SBjörn Töpel */ 104142fddcc7SBjörn Töpel smp_wmb(); 104242fddcc7SBjörn Töpel WRITE_ONCE(xs->state, XSK_BOUND); 104342fddcc7SBjörn Töpel } 1044965a9909SMagnus Karlsson out_release: 1045965a9909SMagnus Karlsson mutex_unlock(&xs->mutex); 10465464c3a0SIlya Maximets rtnl_unlock(); 1047965a9909SMagnus Karlsson return err; 1048965a9909SMagnus Karlsson } 1049965a9909SMagnus Karlsson 1050c05cd364SKevin Laatz struct xdp_umem_reg_v1 { 1051c05cd364SKevin Laatz __u64 addr; /* Start of packet data area */ 1052c05cd364SKevin Laatz __u64 len; /* Length of packet data area */ 1053c05cd364SKevin Laatz __u32 chunk_size; 1054c05cd364SKevin Laatz __u32 headroom; 1055c05cd364SKevin Laatz }; 1056c05cd364SKevin Laatz 1057c0c77d8fSBjörn Töpel static int xsk_setsockopt(struct socket *sock, int level, int optname, 1058a7b75c5aSChristoph Hellwig sockptr_t optval, unsigned int optlen) 1059c0c77d8fSBjörn Töpel { 1060c0c77d8fSBjörn Töpel struct sock *sk = sock->sk; 1061c0c77d8fSBjörn Töpel struct xdp_sock *xs = xdp_sk(sk); 1062c0c77d8fSBjörn Töpel int err; 1063c0c77d8fSBjörn Töpel 1064c0c77d8fSBjörn Töpel if (level != SOL_XDP) 1065c0c77d8fSBjörn Töpel return -ENOPROTOOPT; 1066c0c77d8fSBjörn Töpel 1067c0c77d8fSBjörn Töpel switch (optname) { 1068b9b6b68eSBjörn Töpel case XDP_RX_RING: 1069f6145903SMagnus Karlsson case XDP_TX_RING: 1070b9b6b68eSBjörn Töpel { 1071b9b6b68eSBjörn Töpel struct xsk_queue **q; 1072b9b6b68eSBjörn Töpel int entries; 1073b9b6b68eSBjörn Töpel 1074b9b6b68eSBjörn Töpel if (optlen < sizeof(entries)) 1075b9b6b68eSBjörn Töpel return -EINVAL; 1076a7b75c5aSChristoph Hellwig if (copy_from_sockptr(&entries, optval, sizeof(entries))) 1077b9b6b68eSBjörn Töpel return -EFAULT; 1078b9b6b68eSBjörn Töpel 1079b9b6b68eSBjörn Töpel mutex_lock(&xs->mutex); 1080455302d1SIlya Maximets if (xs->state != XSK_READY) { 1081455302d1SIlya Maximets mutex_unlock(&xs->mutex); 1082455302d1SIlya Maximets return -EBUSY; 1083455302d1SIlya Maximets } 1084f6145903SMagnus Karlsson q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx; 1085b9b6b68eSBjörn Töpel err = xsk_init_queue(entries, q, false); 108677cd0d7bSMagnus Karlsson if (!err && optname == XDP_TX_RING) 108777cd0d7bSMagnus Karlsson /* Tx needs to be explicitly woken up the first time */ 108877cd0d7bSMagnus Karlsson xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; 1089b9b6b68eSBjörn Töpel mutex_unlock(&xs->mutex); 1090b9b6b68eSBjörn Töpel return err; 1091b9b6b68eSBjörn Töpel } 1092c0c77d8fSBjörn Töpel case XDP_UMEM_REG: 1093c0c77d8fSBjörn Töpel { 1094c05cd364SKevin Laatz size_t mr_size = sizeof(struct xdp_umem_reg); 1095c05cd364SKevin Laatz struct xdp_umem_reg mr = {}; 1096c0c77d8fSBjörn Töpel struct xdp_umem *umem; 1097c0c77d8fSBjörn Töpel 1098c05cd364SKevin Laatz if (optlen < sizeof(struct xdp_umem_reg_v1)) 1099c05cd364SKevin Laatz return -EINVAL; 1100c05cd364SKevin Laatz else if (optlen < sizeof(mr)) 1101c05cd364SKevin Laatz mr_size = sizeof(struct xdp_umem_reg_v1); 1102c05cd364SKevin Laatz 1103a7b75c5aSChristoph Hellwig if (copy_from_sockptr(&mr, optval, mr_size)) 1104c0c77d8fSBjörn Töpel return -EFAULT; 1105c0c77d8fSBjörn Töpel 1106c0c77d8fSBjörn Töpel mutex_lock(&xs->mutex); 1107455302d1SIlya Maximets if (xs->state != XSK_READY || xs->umem) { 1108c0c77d8fSBjörn Töpel mutex_unlock(&xs->mutex); 1109a49049eaSBjörn Töpel return -EBUSY; 1110a49049eaSBjörn Töpel } 1111a49049eaSBjörn Töpel 1112a49049eaSBjörn Töpel umem = xdp_umem_create(&mr); 1113a49049eaSBjörn Töpel if (IS_ERR(umem)) { 1114a49049eaSBjörn Töpel mutex_unlock(&xs->mutex); 1115a49049eaSBjörn Töpel return PTR_ERR(umem); 1116c0c77d8fSBjörn Töpel } 1117c0c77d8fSBjörn Töpel 1118c0c77d8fSBjörn Töpel /* Make sure umem is ready before it can be seen by others */ 1119c0c77d8fSBjörn Töpel smp_wmb(); 11209764f4b3SBjörn Töpel WRITE_ONCE(xs->umem, umem); 1121c0c77d8fSBjörn Töpel mutex_unlock(&xs->mutex); 1122c0c77d8fSBjörn Töpel return 0; 1123c0c77d8fSBjörn Töpel } 1124423f3832SMagnus Karlsson case XDP_UMEM_FILL_RING: 1125fe230832SMagnus Karlsson case XDP_UMEM_COMPLETION_RING: 1126423f3832SMagnus Karlsson { 1127423f3832SMagnus Karlsson struct xsk_queue **q; 1128423f3832SMagnus Karlsson int entries; 1129423f3832SMagnus Karlsson 1130a7b75c5aSChristoph Hellwig if (copy_from_sockptr(&entries, optval, sizeof(entries))) 1131423f3832SMagnus Karlsson return -EFAULT; 1132423f3832SMagnus Karlsson 1133423f3832SMagnus Karlsson mutex_lock(&xs->mutex); 1134455302d1SIlya Maximets if (xs->state != XSK_READY) { 1135455302d1SIlya Maximets mutex_unlock(&xs->mutex); 1136455302d1SIlya Maximets return -EBUSY; 1137455302d1SIlya Maximets } 1138a49049eaSBjörn Töpel 11397361f9c3SMagnus Karlsson q = (optname == XDP_UMEM_FILL_RING) ? &xs->fq_tmp : 11407361f9c3SMagnus Karlsson &xs->cq_tmp; 1141b9b6b68eSBjörn Töpel err = xsk_init_queue(entries, q, true); 1142423f3832SMagnus Karlsson mutex_unlock(&xs->mutex); 1143423f3832SMagnus Karlsson return err; 1144423f3832SMagnus Karlsson } 1145c0c77d8fSBjörn Töpel default: 1146c0c77d8fSBjörn Töpel break; 1147c0c77d8fSBjörn Töpel } 1148c0c77d8fSBjörn Töpel 1149c0c77d8fSBjörn Töpel return -ENOPROTOOPT; 1150c0c77d8fSBjörn Töpel } 1151c0c77d8fSBjörn Töpel 115277cd0d7bSMagnus Karlsson static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring) 115377cd0d7bSMagnus Karlsson { 115477cd0d7bSMagnus Karlsson ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); 115577cd0d7bSMagnus Karlsson ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); 115677cd0d7bSMagnus Karlsson ring->desc = offsetof(struct xdp_rxtx_ring, desc); 115777cd0d7bSMagnus Karlsson } 115877cd0d7bSMagnus Karlsson 115977cd0d7bSMagnus Karlsson static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring) 116077cd0d7bSMagnus Karlsson { 116177cd0d7bSMagnus Karlsson ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer); 116277cd0d7bSMagnus Karlsson ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); 116377cd0d7bSMagnus Karlsson ring->desc = offsetof(struct xdp_umem_ring, desc); 116477cd0d7bSMagnus Karlsson } 116577cd0d7bSMagnus Karlsson 11668aa5a335SCiara Loftus struct xdp_statistics_v1 { 11678aa5a335SCiara Loftus __u64 rx_dropped; 11688aa5a335SCiara Loftus __u64 rx_invalid_descs; 11698aa5a335SCiara Loftus __u64 tx_invalid_descs; 11708aa5a335SCiara Loftus }; 11718aa5a335SCiara Loftus 1172af75d9e0SMagnus Karlsson static int xsk_getsockopt(struct socket *sock, int level, int optname, 1173af75d9e0SMagnus Karlsson char __user *optval, int __user *optlen) 1174af75d9e0SMagnus Karlsson { 1175af75d9e0SMagnus Karlsson struct sock *sk = sock->sk; 1176af75d9e0SMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 1177af75d9e0SMagnus Karlsson int len; 1178af75d9e0SMagnus Karlsson 1179af75d9e0SMagnus Karlsson if (level != SOL_XDP) 1180af75d9e0SMagnus Karlsson return -ENOPROTOOPT; 1181af75d9e0SMagnus Karlsson 1182af75d9e0SMagnus Karlsson if (get_user(len, optlen)) 1183af75d9e0SMagnus Karlsson return -EFAULT; 1184af75d9e0SMagnus Karlsson if (len < 0) 1185af75d9e0SMagnus Karlsson return -EINVAL; 1186af75d9e0SMagnus Karlsson 1187af75d9e0SMagnus Karlsson switch (optname) { 1188af75d9e0SMagnus Karlsson case XDP_STATISTICS: 1189af75d9e0SMagnus Karlsson { 11903c4f850eSPeilin Ye struct xdp_statistics stats = {}; 11918aa5a335SCiara Loftus bool extra_stats = true; 11928aa5a335SCiara Loftus size_t stats_size; 1193af75d9e0SMagnus Karlsson 11948aa5a335SCiara Loftus if (len < sizeof(struct xdp_statistics_v1)) { 1195af75d9e0SMagnus Karlsson return -EINVAL; 11968aa5a335SCiara Loftus } else if (len < sizeof(stats)) { 11978aa5a335SCiara Loftus extra_stats = false; 11988aa5a335SCiara Loftus stats_size = sizeof(struct xdp_statistics_v1); 11998aa5a335SCiara Loftus } else { 12008aa5a335SCiara Loftus stats_size = sizeof(stats); 12018aa5a335SCiara Loftus } 1202af75d9e0SMagnus Karlsson 1203af75d9e0SMagnus Karlsson mutex_lock(&xs->mutex); 1204af75d9e0SMagnus Karlsson stats.rx_dropped = xs->rx_dropped; 12058aa5a335SCiara Loftus if (extra_stats) { 12068aa5a335SCiara Loftus stats.rx_ring_full = xs->rx_queue_full; 12078aa5a335SCiara Loftus stats.rx_fill_ring_empty_descs = 12087361f9c3SMagnus Karlsson xs->pool ? xskq_nb_queue_empty_descs(xs->pool->fq) : 0; 12098aa5a335SCiara Loftus stats.tx_ring_empty_descs = xskq_nb_queue_empty_descs(xs->tx); 12108aa5a335SCiara Loftus } else { 12118aa5a335SCiara Loftus stats.rx_dropped += xs->rx_queue_full; 12128aa5a335SCiara Loftus } 1213af75d9e0SMagnus Karlsson stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx); 1214af75d9e0SMagnus Karlsson stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx); 1215af75d9e0SMagnus Karlsson mutex_unlock(&xs->mutex); 1216af75d9e0SMagnus Karlsson 12178aa5a335SCiara Loftus if (copy_to_user(optval, &stats, stats_size)) 1218af75d9e0SMagnus Karlsson return -EFAULT; 12198aa5a335SCiara Loftus if (put_user(stats_size, optlen)) 1220af75d9e0SMagnus Karlsson return -EFAULT; 1221af75d9e0SMagnus Karlsson 1222af75d9e0SMagnus Karlsson return 0; 1223af75d9e0SMagnus Karlsson } 1224b3a9e0beSBjörn Töpel case XDP_MMAP_OFFSETS: 1225b3a9e0beSBjörn Töpel { 1226b3a9e0beSBjörn Töpel struct xdp_mmap_offsets off; 122777cd0d7bSMagnus Karlsson struct xdp_mmap_offsets_v1 off_v1; 122877cd0d7bSMagnus Karlsson bool flags_supported = true; 122977cd0d7bSMagnus Karlsson void *to_copy; 1230b3a9e0beSBjörn Töpel 123177cd0d7bSMagnus Karlsson if (len < sizeof(off_v1)) 1232b3a9e0beSBjörn Töpel return -EINVAL; 123377cd0d7bSMagnus Karlsson else if (len < sizeof(off)) 123477cd0d7bSMagnus Karlsson flags_supported = false; 1235b3a9e0beSBjörn Töpel 123677cd0d7bSMagnus Karlsson if (flags_supported) { 123777cd0d7bSMagnus Karlsson /* xdp_ring_offset is identical to xdp_ring_offset_v1 123877cd0d7bSMagnus Karlsson * except for the flags field added to the end. 123977cd0d7bSMagnus Karlsson */ 124077cd0d7bSMagnus Karlsson xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) 124177cd0d7bSMagnus Karlsson &off.rx); 124277cd0d7bSMagnus Karlsson xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) 124377cd0d7bSMagnus Karlsson &off.tx); 124477cd0d7bSMagnus Karlsson xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) 124577cd0d7bSMagnus Karlsson &off.fr); 124677cd0d7bSMagnus Karlsson xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) 124777cd0d7bSMagnus Karlsson &off.cr); 124877cd0d7bSMagnus Karlsson off.rx.flags = offsetof(struct xdp_rxtx_ring, 124977cd0d7bSMagnus Karlsson ptrs.flags); 125077cd0d7bSMagnus Karlsson off.tx.flags = offsetof(struct xdp_rxtx_ring, 125177cd0d7bSMagnus Karlsson ptrs.flags); 125277cd0d7bSMagnus Karlsson off.fr.flags = offsetof(struct xdp_umem_ring, 125377cd0d7bSMagnus Karlsson ptrs.flags); 125477cd0d7bSMagnus Karlsson off.cr.flags = offsetof(struct xdp_umem_ring, 125577cd0d7bSMagnus Karlsson ptrs.flags); 1256b3a9e0beSBjörn Töpel 1257b3a9e0beSBjörn Töpel len = sizeof(off); 125877cd0d7bSMagnus Karlsson to_copy = &off; 125977cd0d7bSMagnus Karlsson } else { 126077cd0d7bSMagnus Karlsson xsk_enter_rxtx_offsets(&off_v1.rx); 126177cd0d7bSMagnus Karlsson xsk_enter_rxtx_offsets(&off_v1.tx); 126277cd0d7bSMagnus Karlsson xsk_enter_umem_offsets(&off_v1.fr); 126377cd0d7bSMagnus Karlsson xsk_enter_umem_offsets(&off_v1.cr); 126477cd0d7bSMagnus Karlsson 126577cd0d7bSMagnus Karlsson len = sizeof(off_v1); 126677cd0d7bSMagnus Karlsson to_copy = &off_v1; 126777cd0d7bSMagnus Karlsson } 126877cd0d7bSMagnus Karlsson 126977cd0d7bSMagnus Karlsson if (copy_to_user(optval, to_copy, len)) 1270b3a9e0beSBjörn Töpel return -EFAULT; 1271b3a9e0beSBjörn Töpel if (put_user(len, optlen)) 1272b3a9e0beSBjörn Töpel return -EFAULT; 1273b3a9e0beSBjörn Töpel 1274b3a9e0beSBjörn Töpel return 0; 1275b3a9e0beSBjörn Töpel } 12762640d3c8SMaxim Mikityanskiy case XDP_OPTIONS: 12772640d3c8SMaxim Mikityanskiy { 12782640d3c8SMaxim Mikityanskiy struct xdp_options opts = {}; 12792640d3c8SMaxim Mikityanskiy 12802640d3c8SMaxim Mikityanskiy if (len < sizeof(opts)) 12812640d3c8SMaxim Mikityanskiy return -EINVAL; 12822640d3c8SMaxim Mikityanskiy 12832640d3c8SMaxim Mikityanskiy mutex_lock(&xs->mutex); 12842640d3c8SMaxim Mikityanskiy if (xs->zc) 12852640d3c8SMaxim Mikityanskiy opts.flags |= XDP_OPTIONS_ZEROCOPY; 12862640d3c8SMaxim Mikityanskiy mutex_unlock(&xs->mutex); 12872640d3c8SMaxim Mikityanskiy 12882640d3c8SMaxim Mikityanskiy len = sizeof(opts); 12892640d3c8SMaxim Mikityanskiy if (copy_to_user(optval, &opts, len)) 12902640d3c8SMaxim Mikityanskiy return -EFAULT; 12912640d3c8SMaxim Mikityanskiy if (put_user(len, optlen)) 12922640d3c8SMaxim Mikityanskiy return -EFAULT; 12932640d3c8SMaxim Mikityanskiy 12942640d3c8SMaxim Mikityanskiy return 0; 12952640d3c8SMaxim Mikityanskiy } 1296af75d9e0SMagnus Karlsson default: 1297af75d9e0SMagnus Karlsson break; 1298af75d9e0SMagnus Karlsson } 1299af75d9e0SMagnus Karlsson 1300af75d9e0SMagnus Karlsson return -EOPNOTSUPP; 1301af75d9e0SMagnus Karlsson } 1302af75d9e0SMagnus Karlsson 1303423f3832SMagnus Karlsson static int xsk_mmap(struct file *file, struct socket *sock, 1304423f3832SMagnus Karlsson struct vm_area_struct *vma) 1305423f3832SMagnus Karlsson { 1306a5a16e43SGeert Uytterhoeven loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; 1307423f3832SMagnus Karlsson unsigned long size = vma->vm_end - vma->vm_start; 1308423f3832SMagnus Karlsson struct xdp_sock *xs = xdp_sk(sock->sk); 13095f5a7d8dSNuno Gonçalves int state = READ_ONCE(xs->state); 1310423f3832SMagnus Karlsson struct xsk_queue *q = NULL; 1311423f3832SMagnus Karlsson 13125f5a7d8dSNuno Gonçalves if (state != XSK_READY && state != XSK_BOUND) 1313455302d1SIlya Maximets return -EBUSY; 1314455302d1SIlya Maximets 1315b9b6b68eSBjörn Töpel if (offset == XDP_PGOFF_RX_RING) { 131637b07693SBjörn Töpel q = READ_ONCE(xs->rx); 1317f6145903SMagnus Karlsson } else if (offset == XDP_PGOFF_TX_RING) { 131837b07693SBjörn Töpel q = READ_ONCE(xs->tx); 1319b9b6b68eSBjörn Töpel } else { 1320e6762c8bSMagnus Karlsson /* Matches the smp_wmb() in XDP_UMEM_REG */ 1321e6762c8bSMagnus Karlsson smp_rmb(); 1322423f3832SMagnus Karlsson if (offset == XDP_UMEM_PGOFF_FILL_RING) 13235f5a7d8dSNuno Gonçalves q = state == XSK_READY ? READ_ONCE(xs->fq_tmp) : 13245f5a7d8dSNuno Gonçalves READ_ONCE(xs->pool->fq); 1325fe230832SMagnus Karlsson else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) 13265f5a7d8dSNuno Gonçalves q = state == XSK_READY ? READ_ONCE(xs->cq_tmp) : 13275f5a7d8dSNuno Gonçalves READ_ONCE(xs->pool->cq); 1328b9b6b68eSBjörn Töpel } 1329423f3832SMagnus Karlsson 1330423f3832SMagnus Karlsson if (!q) 1331423f3832SMagnus Karlsson return -EINVAL; 1332423f3832SMagnus Karlsson 1333e6762c8bSMagnus Karlsson /* Matches the smp_wmb() in xsk_init_queue */ 1334e6762c8bSMagnus Karlsson smp_rmb(); 13359f78bf33SXuan Zhuo if (size > q->ring_vmalloc_size) 1336423f3832SMagnus Karlsson return -EINVAL; 1337423f3832SMagnus Karlsson 13389f78bf33SXuan Zhuo return remap_vmalloc_range(vma, q->ring, 0); 1339423f3832SMagnus Karlsson } 1340423f3832SMagnus Karlsson 1341455302d1SIlya Maximets static int xsk_notifier(struct notifier_block *this, 1342455302d1SIlya Maximets unsigned long msg, void *ptr) 1343455302d1SIlya Maximets { 1344455302d1SIlya Maximets struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1345455302d1SIlya Maximets struct net *net = dev_net(dev); 1346455302d1SIlya Maximets struct sock *sk; 1347455302d1SIlya Maximets 1348455302d1SIlya Maximets switch (msg) { 1349455302d1SIlya Maximets case NETDEV_UNREGISTER: 1350455302d1SIlya Maximets mutex_lock(&net->xdp.lock); 1351455302d1SIlya Maximets sk_for_each(sk, &net->xdp.list) { 1352455302d1SIlya Maximets struct xdp_sock *xs = xdp_sk(sk); 1353455302d1SIlya Maximets 1354455302d1SIlya Maximets mutex_lock(&xs->mutex); 1355455302d1SIlya Maximets if (xs->dev == dev) { 1356455302d1SIlya Maximets sk->sk_err = ENETDOWN; 1357455302d1SIlya Maximets if (!sock_flag(sk, SOCK_DEAD)) 1358e3ae2365SAlexander Aring sk_error_report(sk); 1359455302d1SIlya Maximets 1360455302d1SIlya Maximets xsk_unbind_dev(xs); 1361455302d1SIlya Maximets 13621c1efc2aSMagnus Karlsson /* Clear device references. */ 13631c1efc2aSMagnus Karlsson xp_clear_dev(xs->pool); 1364455302d1SIlya Maximets } 1365455302d1SIlya Maximets mutex_unlock(&xs->mutex); 1366455302d1SIlya Maximets } 1367455302d1SIlya Maximets mutex_unlock(&net->xdp.lock); 1368455302d1SIlya Maximets break; 1369455302d1SIlya Maximets } 1370455302d1SIlya Maximets return NOTIFY_DONE; 1371455302d1SIlya Maximets } 1372455302d1SIlya Maximets 1373c0c77d8fSBjörn Töpel static struct proto xsk_proto = { 1374c0c77d8fSBjörn Töpel .name = "XDP", 1375c0c77d8fSBjörn Töpel .owner = THIS_MODULE, 1376c0c77d8fSBjörn Töpel .obj_size = sizeof(struct xdp_sock), 1377c0c77d8fSBjörn Töpel }; 1378c0c77d8fSBjörn Töpel 1379c0c77d8fSBjörn Töpel static const struct proto_ops xsk_proto_ops = { 1380c0c77d8fSBjörn Töpel .family = PF_XDP, 1381c0c77d8fSBjörn Töpel .owner = THIS_MODULE, 1382c0c77d8fSBjörn Töpel .release = xsk_release, 1383965a9909SMagnus Karlsson .bind = xsk_bind, 1384c0c77d8fSBjörn Töpel .connect = sock_no_connect, 1385c0c77d8fSBjörn Töpel .socketpair = sock_no_socketpair, 1386c0c77d8fSBjörn Töpel .accept = sock_no_accept, 1387c0c77d8fSBjörn Töpel .getname = sock_no_getname, 1388a11e1d43SLinus Torvalds .poll = xsk_poll, 1389c0c77d8fSBjörn Töpel .ioctl = sock_no_ioctl, 1390c0c77d8fSBjörn Töpel .listen = sock_no_listen, 1391c0c77d8fSBjörn Töpel .shutdown = sock_no_shutdown, 1392c0c77d8fSBjörn Töpel .setsockopt = xsk_setsockopt, 1393af75d9e0SMagnus Karlsson .getsockopt = xsk_getsockopt, 139435fcde7fSMagnus Karlsson .sendmsg = xsk_sendmsg, 139545a86681SBjörn Töpel .recvmsg = xsk_recvmsg, 1396423f3832SMagnus Karlsson .mmap = xsk_mmap, 1397c0c77d8fSBjörn Töpel }; 1398c0c77d8fSBjörn Töpel 139911fe9262SBjörn Töpel static void xsk_destruct(struct sock *sk) 140011fe9262SBjörn Töpel { 140111fe9262SBjörn Töpel struct xdp_sock *xs = xdp_sk(sk); 140211fe9262SBjörn Töpel 140311fe9262SBjörn Töpel if (!sock_flag(sk, SOCK_DEAD)) 140411fe9262SBjörn Töpel return; 140511fe9262SBjörn Töpel 1406e5e1a4bcSMagnus Karlsson if (!xp_put_pool(xs->pool)) 1407537cf4e3SMagnus Karlsson xdp_put_umem(xs->umem, !xs->pool); 140811fe9262SBjörn Töpel } 140911fe9262SBjörn Töpel 1410c0c77d8fSBjörn Töpel static int xsk_create(struct net *net, struct socket *sock, int protocol, 1411c0c77d8fSBjörn Töpel int kern) 1412c0c77d8fSBjörn Töpel { 1413c0c77d8fSBjörn Töpel struct xdp_sock *xs; 14141c1efc2aSMagnus Karlsson struct sock *sk; 1415c0c77d8fSBjörn Töpel 1416c0c77d8fSBjörn Töpel if (!ns_capable(net->user_ns, CAP_NET_RAW)) 1417c0c77d8fSBjörn Töpel return -EPERM; 1418c0c77d8fSBjörn Töpel if (sock->type != SOCK_RAW) 1419c0c77d8fSBjörn Töpel return -ESOCKTNOSUPPORT; 1420c0c77d8fSBjörn Töpel 1421c0c77d8fSBjörn Töpel if (protocol) 1422c0c77d8fSBjörn Töpel return -EPROTONOSUPPORT; 1423c0c77d8fSBjörn Töpel 1424c0c77d8fSBjörn Töpel sock->state = SS_UNCONNECTED; 1425c0c77d8fSBjörn Töpel 1426c0c77d8fSBjörn Töpel sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern); 1427c0c77d8fSBjörn Töpel if (!sk) 1428c0c77d8fSBjörn Töpel return -ENOBUFS; 1429c0c77d8fSBjörn Töpel 1430c0c77d8fSBjörn Töpel sock->ops = &xsk_proto_ops; 1431c0c77d8fSBjörn Töpel 1432c0c77d8fSBjörn Töpel sock_init_data(sock, sk); 1433c0c77d8fSBjörn Töpel 1434c0c77d8fSBjörn Töpel sk->sk_family = PF_XDP; 1435c0c77d8fSBjörn Töpel 143611fe9262SBjörn Töpel sk->sk_destruct = xsk_destruct; 143711fe9262SBjörn Töpel 1438cee27167SBjörn Töpel sock_set_flag(sk, SOCK_RCU_FREE); 1439cee27167SBjörn Töpel 1440c0c77d8fSBjörn Töpel xs = xdp_sk(sk); 1441455302d1SIlya Maximets xs->state = XSK_READY; 1442c0c77d8fSBjörn Töpel mutex_init(&xs->mutex); 1443bf0bdd13SIlya Maximets spin_lock_init(&xs->rx_lock); 1444c0c77d8fSBjörn Töpel 14450402acd6SBjörn Töpel INIT_LIST_HEAD(&xs->map_list); 14460402acd6SBjörn Töpel spin_lock_init(&xs->map_list_lock); 14470402acd6SBjörn Töpel 14481d0dc069SBjörn Töpel mutex_lock(&net->xdp.lock); 14491d0dc069SBjörn Töpel sk_add_node_rcu(sk, &net->xdp.list); 14501d0dc069SBjörn Töpel mutex_unlock(&net->xdp.lock); 14511d0dc069SBjörn Töpel 1452c0c77d8fSBjörn Töpel sock_prot_inuse_add(net, &xsk_proto, 1); 1453c0c77d8fSBjörn Töpel 1454c0c77d8fSBjörn Töpel return 0; 1455c0c77d8fSBjörn Töpel } 1456c0c77d8fSBjörn Töpel 1457c0c77d8fSBjörn Töpel static const struct net_proto_family xsk_family_ops = { 1458c0c77d8fSBjörn Töpel .family = PF_XDP, 1459c0c77d8fSBjörn Töpel .create = xsk_create, 1460c0c77d8fSBjörn Töpel .owner = THIS_MODULE, 1461c0c77d8fSBjörn Töpel }; 1462c0c77d8fSBjörn Töpel 1463455302d1SIlya Maximets static struct notifier_block xsk_netdev_notifier = { 1464455302d1SIlya Maximets .notifier_call = xsk_notifier, 1465455302d1SIlya Maximets }; 1466455302d1SIlya Maximets 14671d0dc069SBjörn Töpel static int __net_init xsk_net_init(struct net *net) 14681d0dc069SBjörn Töpel { 14691d0dc069SBjörn Töpel mutex_init(&net->xdp.lock); 14701d0dc069SBjörn Töpel INIT_HLIST_HEAD(&net->xdp.list); 14711d0dc069SBjörn Töpel return 0; 14721d0dc069SBjörn Töpel } 14731d0dc069SBjörn Töpel 14741d0dc069SBjörn Töpel static void __net_exit xsk_net_exit(struct net *net) 14751d0dc069SBjörn Töpel { 14761d0dc069SBjörn Töpel WARN_ON_ONCE(!hlist_empty(&net->xdp.list)); 14771d0dc069SBjörn Töpel } 14781d0dc069SBjörn Töpel 14791d0dc069SBjörn Töpel static struct pernet_operations xsk_net_ops = { 14801d0dc069SBjörn Töpel .init = xsk_net_init, 14811d0dc069SBjörn Töpel .exit = xsk_net_exit, 14821d0dc069SBjörn Töpel }; 14831d0dc069SBjörn Töpel 1484c0c77d8fSBjörn Töpel static int __init xsk_init(void) 1485c0c77d8fSBjörn Töpel { 1486e312b9e7SBjörn Töpel int err, cpu; 1487c0c77d8fSBjörn Töpel 1488c0c77d8fSBjörn Töpel err = proto_register(&xsk_proto, 0 /* no slab */); 1489c0c77d8fSBjörn Töpel if (err) 1490c0c77d8fSBjörn Töpel goto out; 1491c0c77d8fSBjörn Töpel 1492c0c77d8fSBjörn Töpel err = sock_register(&xsk_family_ops); 1493c0c77d8fSBjörn Töpel if (err) 1494c0c77d8fSBjörn Töpel goto out_proto; 1495c0c77d8fSBjörn Töpel 14961d0dc069SBjörn Töpel err = register_pernet_subsys(&xsk_net_ops); 14971d0dc069SBjörn Töpel if (err) 14981d0dc069SBjörn Töpel goto out_sk; 1499455302d1SIlya Maximets 1500455302d1SIlya Maximets err = register_netdevice_notifier(&xsk_netdev_notifier); 1501455302d1SIlya Maximets if (err) 1502455302d1SIlya Maximets goto out_pernet; 1503455302d1SIlya Maximets 1504e312b9e7SBjörn Töpel for_each_possible_cpu(cpu) 1505e312b9e7SBjörn Töpel INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu)); 1506c0c77d8fSBjörn Töpel return 0; 1507c0c77d8fSBjörn Töpel 1508455302d1SIlya Maximets out_pernet: 1509455302d1SIlya Maximets unregister_pernet_subsys(&xsk_net_ops); 15101d0dc069SBjörn Töpel out_sk: 15111d0dc069SBjörn Töpel sock_unregister(PF_XDP); 1512c0c77d8fSBjörn Töpel out_proto: 1513c0c77d8fSBjörn Töpel proto_unregister(&xsk_proto); 1514c0c77d8fSBjörn Töpel out: 1515c0c77d8fSBjörn Töpel return err; 1516c0c77d8fSBjörn Töpel } 1517c0c77d8fSBjörn Töpel 1518c0c77d8fSBjörn Töpel fs_initcall(xsk_init); 1519