1c0c77d8fSBjörn Töpel // SPDX-License-Identifier: GPL-2.0 2c0c77d8fSBjörn Töpel /* XDP sockets 3c0c77d8fSBjörn Töpel * 4c0c77d8fSBjörn Töpel * AF_XDP sockets allows a channel between XDP programs and userspace 5c0c77d8fSBjörn Töpel * applications. 6c0c77d8fSBjörn Töpel * Copyright(c) 2018 Intel Corporation. 7c0c77d8fSBjörn Töpel * 8c0c77d8fSBjörn Töpel * Author(s): Björn Töpel <bjorn.topel@intel.com> 9c0c77d8fSBjörn Töpel * Magnus Karlsson <magnus.karlsson@intel.com> 10c0c77d8fSBjörn Töpel */ 11c0c77d8fSBjörn Töpel 12c0c77d8fSBjörn Töpel #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__ 13c0c77d8fSBjörn Töpel 14c0c77d8fSBjörn Töpel #include <linux/if_xdp.h> 15c0c77d8fSBjörn Töpel #include <linux/init.h> 16c0c77d8fSBjörn Töpel #include <linux/sched/mm.h> 17c0c77d8fSBjörn Töpel #include <linux/sched/signal.h> 18c0c77d8fSBjörn Töpel #include <linux/sched/task.h> 19c0c77d8fSBjörn Töpel #include <linux/socket.h> 20c0c77d8fSBjörn Töpel #include <linux/file.h> 21c0c77d8fSBjörn Töpel #include <linux/uaccess.h> 22c0c77d8fSBjörn Töpel #include <linux/net.h> 23c0c77d8fSBjörn Töpel #include <linux/netdevice.h> 24ac98d8aaSMagnus Karlsson #include <linux/rculist.h> 25a71506a4SMagnus Karlsson #include <net/xdp_sock_drv.h> 26b9b6b68eSBjörn Töpel #include <net/xdp.h> 27c0c77d8fSBjörn Töpel 28423f3832SMagnus Karlsson #include "xsk_queue.h" 29c0c77d8fSBjörn Töpel #include "xdp_umem.h" 30a36b38aaSBjörn Töpel #include "xsk.h" 31c0c77d8fSBjörn Töpel 3235fcde7fSMagnus Karlsson #define TX_BATCH_SIZE 16 3335fcde7fSMagnus Karlsson 34e312b9e7SBjörn Töpel static DEFINE_PER_CPU(struct list_head, xskmap_flush_list); 35e312b9e7SBjörn Töpel 36fbfc504aSBjörn Töpel bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) 37fbfc504aSBjörn Töpel { 38173d3adbSBjörn Töpel return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) && 39173d3adbSBjörn Töpel READ_ONCE(xs->umem->fq); 40fbfc504aSBjörn Töpel } 41fbfc504aSBjörn Töpel 4277cd0d7bSMagnus Karlsson void xsk_set_rx_need_wakeup(struct xdp_umem *umem) 4377cd0d7bSMagnus Karlsson { 4477cd0d7bSMagnus Karlsson if (umem->need_wakeup & XDP_WAKEUP_RX) 4577cd0d7bSMagnus Karlsson return; 4677cd0d7bSMagnus Karlsson 4777cd0d7bSMagnus Karlsson umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP; 4877cd0d7bSMagnus Karlsson umem->need_wakeup |= XDP_WAKEUP_RX; 4977cd0d7bSMagnus Karlsson } 5077cd0d7bSMagnus Karlsson EXPORT_SYMBOL(xsk_set_rx_need_wakeup); 5177cd0d7bSMagnus Karlsson 5277cd0d7bSMagnus Karlsson void xsk_set_tx_need_wakeup(struct xdp_umem *umem) 5377cd0d7bSMagnus Karlsson { 5477cd0d7bSMagnus Karlsson struct xdp_sock *xs; 5577cd0d7bSMagnus Karlsson 5677cd0d7bSMagnus Karlsson if (umem->need_wakeup & XDP_WAKEUP_TX) 5777cd0d7bSMagnus Karlsson return; 5877cd0d7bSMagnus Karlsson 5977cd0d7bSMagnus Karlsson rcu_read_lock(); 60e4e5aefcSMagnus Karlsson list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) { 6177cd0d7bSMagnus Karlsson xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; 6277cd0d7bSMagnus Karlsson } 6377cd0d7bSMagnus Karlsson rcu_read_unlock(); 6477cd0d7bSMagnus Karlsson 6577cd0d7bSMagnus Karlsson umem->need_wakeup |= XDP_WAKEUP_TX; 6677cd0d7bSMagnus Karlsson } 6777cd0d7bSMagnus Karlsson EXPORT_SYMBOL(xsk_set_tx_need_wakeup); 6877cd0d7bSMagnus Karlsson 6977cd0d7bSMagnus Karlsson void xsk_clear_rx_need_wakeup(struct xdp_umem *umem) 7077cd0d7bSMagnus Karlsson { 7177cd0d7bSMagnus Karlsson if (!(umem->need_wakeup & XDP_WAKEUP_RX)) 7277cd0d7bSMagnus Karlsson return; 7377cd0d7bSMagnus Karlsson 7477cd0d7bSMagnus Karlsson umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP; 7577cd0d7bSMagnus Karlsson umem->need_wakeup &= ~XDP_WAKEUP_RX; 7677cd0d7bSMagnus Karlsson } 7777cd0d7bSMagnus Karlsson EXPORT_SYMBOL(xsk_clear_rx_need_wakeup); 7877cd0d7bSMagnus Karlsson 7977cd0d7bSMagnus Karlsson void xsk_clear_tx_need_wakeup(struct xdp_umem *umem) 8077cd0d7bSMagnus Karlsson { 8177cd0d7bSMagnus Karlsson struct xdp_sock *xs; 8277cd0d7bSMagnus Karlsson 8377cd0d7bSMagnus Karlsson if (!(umem->need_wakeup & XDP_WAKEUP_TX)) 8477cd0d7bSMagnus Karlsson return; 8577cd0d7bSMagnus Karlsson 8677cd0d7bSMagnus Karlsson rcu_read_lock(); 87e4e5aefcSMagnus Karlsson list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) { 8877cd0d7bSMagnus Karlsson xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP; 8977cd0d7bSMagnus Karlsson } 9077cd0d7bSMagnus Karlsson rcu_read_unlock(); 9177cd0d7bSMagnus Karlsson 9277cd0d7bSMagnus Karlsson umem->need_wakeup &= ~XDP_WAKEUP_TX; 9377cd0d7bSMagnus Karlsson } 9477cd0d7bSMagnus Karlsson EXPORT_SYMBOL(xsk_clear_tx_need_wakeup); 9577cd0d7bSMagnus Karlsson 9677cd0d7bSMagnus Karlsson bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem) 9777cd0d7bSMagnus Karlsson { 9877cd0d7bSMagnus Karlsson return umem->flags & XDP_UMEM_USES_NEED_WAKEUP; 9977cd0d7bSMagnus Karlsson } 10077cd0d7bSMagnus Karlsson EXPORT_SYMBOL(xsk_umem_uses_need_wakeup); 10177cd0d7bSMagnus Karlsson 10226062b18SBjörn Töpel void xp_release(struct xdp_buff_xsk *xskb) 10326062b18SBjörn Töpel { 10426062b18SBjörn Töpel xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; 10526062b18SBjörn Töpel } 10626062b18SBjörn Töpel 10726062b18SBjörn Töpel static u64 xp_get_handle(struct xdp_buff_xsk *xskb) 10826062b18SBjörn Töpel { 10926062b18SBjörn Töpel u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; 11026062b18SBjörn Töpel 11126062b18SBjörn Töpel offset += xskb->pool->headroom; 11226062b18SBjörn Töpel if (!xskb->pool->unaligned) 11326062b18SBjörn Töpel return xskb->orig_addr + offset; 11426062b18SBjörn Töpel return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); 11526062b18SBjörn Töpel } 11626062b18SBjörn Töpel 1172b43470aSBjörn Töpel static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) 118c05cd364SKevin Laatz { 1192b43470aSBjörn Töpel struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); 1202b43470aSBjörn Töpel u64 addr; 1212b43470aSBjörn Töpel int err; 122c05cd364SKevin Laatz 1232b43470aSBjörn Töpel addr = xp_get_handle(xskb); 1242b43470aSBjörn Töpel err = xskq_prod_reserve_desc(xs->rx, addr, len); 1252b43470aSBjörn Töpel if (err) { 1268aa5a335SCiara Loftus xs->rx_queue_full++; 1272b43470aSBjörn Töpel return err; 1282b43470aSBjörn Töpel } 129c05cd364SKevin Laatz 1302b43470aSBjörn Töpel xp_release(xskb); 1312b43470aSBjörn Töpel return 0; 1322b43470aSBjörn Töpel } 133c05cd364SKevin Laatz 1342b43470aSBjörn Töpel static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len) 1352b43470aSBjörn Töpel { 1362b43470aSBjörn Töpel void *from_buf, *to_buf; 1372b43470aSBjörn Töpel u32 metalen; 1382b43470aSBjörn Töpel 1392b43470aSBjörn Töpel if (unlikely(xdp_data_meta_unsupported(from))) { 1402b43470aSBjörn Töpel from_buf = from->data; 1412b43470aSBjörn Töpel to_buf = to->data; 1422b43470aSBjörn Töpel metalen = 0; 1432b43470aSBjörn Töpel } else { 1442b43470aSBjörn Töpel from_buf = from->data_meta; 1452b43470aSBjörn Töpel metalen = from->data - from->data_meta; 1462b43470aSBjörn Töpel to_buf = to->data - metalen; 147c05cd364SKevin Laatz } 148c05cd364SKevin Laatz 149c05cd364SKevin Laatz memcpy(to_buf, from_buf, len + metalen); 150c05cd364SKevin Laatz } 151c05cd364SKevin Laatz 1522b43470aSBjörn Töpel static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len, 1532b43470aSBjörn Töpel bool explicit_free) 154173d3adbSBjörn Töpel { 1552b43470aSBjörn Töpel struct xdp_buff *xsk_xdp; 1564e64c835SBjörn Töpel int err; 157c497176cSBjörn Töpel 1582b43470aSBjörn Töpel if (len > xsk_umem_get_rx_frame_size(xs->umem)) { 159a509a955SBjörn Töpel xs->rx_dropped++; 160c497176cSBjörn Töpel return -ENOSPC; 161a509a955SBjörn Töpel } 162c497176cSBjörn Töpel 1632b43470aSBjörn Töpel xsk_xdp = xsk_buff_alloc(xs->umem); 1642b43470aSBjörn Töpel if (!xsk_xdp) { 1652b43470aSBjörn Töpel xs->rx_dropped++; 1662b43470aSBjörn Töpel return -ENOSPC; 16718baed26SBjörn Töpel } 16818baed26SBjörn Töpel 1692b43470aSBjörn Töpel xsk_copy_xdp(xsk_xdp, xdp, len); 1702b43470aSBjörn Töpel err = __xsk_rcv_zc(xs, xsk_xdp, len); 1712b43470aSBjörn Töpel if (err) { 1722b43470aSBjörn Töpel xsk_buff_free(xsk_xdp); 1732b43470aSBjörn Töpel return err; 1742b43470aSBjörn Töpel } 1752b43470aSBjörn Töpel if (explicit_free) 176173d3adbSBjörn Töpel xdp_return_buff(xdp); 177173d3adbSBjörn Töpel return 0; 178173d3adbSBjörn Töpel } 179173d3adbSBjörn Töpel 18042fddcc7SBjörn Töpel static bool xsk_is_bound(struct xdp_sock *xs) 18142fddcc7SBjörn Töpel { 18242fddcc7SBjörn Töpel if (READ_ONCE(xs->state) == XSK_BOUND) { 18342fddcc7SBjörn Töpel /* Matches smp_wmb() in bind(). */ 18442fddcc7SBjörn Töpel smp_rmb(); 18542fddcc7SBjörn Töpel return true; 18642fddcc7SBjörn Töpel } 18742fddcc7SBjörn Töpel return false; 18842fddcc7SBjörn Töpel } 18942fddcc7SBjörn Töpel 1902b43470aSBjörn Töpel static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, 1912b43470aSBjörn Töpel bool explicit_free) 192c497176cSBjörn Töpel { 193173d3adbSBjörn Töpel u32 len; 194c497176cSBjörn Töpel 19542fddcc7SBjörn Töpel if (!xsk_is_bound(xs)) 19642fddcc7SBjörn Töpel return -EINVAL; 19742fddcc7SBjörn Töpel 198173d3adbSBjörn Töpel if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) 199173d3adbSBjörn Töpel return -EINVAL; 200c497176cSBjörn Töpel 201173d3adbSBjörn Töpel len = xdp->data_end - xdp->data; 202173d3adbSBjörn Töpel 2030807892eSBjörn Töpel return xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ? 2042b43470aSBjörn Töpel __xsk_rcv_zc(xs, xdp, len) : 2052b43470aSBjörn Töpel __xsk_rcv(xs, xdp, len, explicit_free); 206c497176cSBjörn Töpel } 207c497176cSBjörn Töpel 208d817991cSBjörn Töpel static void xsk_flush(struct xdp_sock *xs) 209c497176cSBjörn Töpel { 21059e35e55SMagnus Karlsson xskq_prod_submit(xs->rx); 21130744a68SMagnus Karlsson __xskq_cons_release(xs->umem->fq); 21243a825afSBjörn Töpel sock_def_readable(&xs->sk); 213c497176cSBjörn Töpel } 214c497176cSBjörn Töpel 215c497176cSBjörn Töpel int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) 216c497176cSBjörn Töpel { 217c497176cSBjörn Töpel int err; 218c497176cSBjörn Töpel 219bf0bdd13SIlya Maximets spin_lock_bh(&xs->rx_lock); 2202b43470aSBjörn Töpel err = xsk_rcv(xs, xdp, false); 2212b43470aSBjörn Töpel xsk_flush(xs); 222bf0bdd13SIlya Maximets spin_unlock_bh(&xs->rx_lock); 223c497176cSBjörn Töpel return err; 224c497176cSBjörn Töpel } 225c497176cSBjörn Töpel 226e312b9e7SBjörn Töpel int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) 227d817991cSBjörn Töpel { 228e312b9e7SBjörn Töpel struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); 229d817991cSBjörn Töpel int err; 230d817991cSBjörn Töpel 2312b43470aSBjörn Töpel err = xsk_rcv(xs, xdp, true); 232d817991cSBjörn Töpel if (err) 233d817991cSBjörn Töpel return err; 234d817991cSBjörn Töpel 235d817991cSBjörn Töpel if (!xs->flush_node.prev) 236d817991cSBjörn Töpel list_add(&xs->flush_node, flush_list); 237d817991cSBjörn Töpel 238d817991cSBjörn Töpel return 0; 239d817991cSBjörn Töpel } 240d817991cSBjörn Töpel 241e312b9e7SBjörn Töpel void __xsk_map_flush(void) 242d817991cSBjörn Töpel { 243e312b9e7SBjörn Töpel struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); 244d817991cSBjörn Töpel struct xdp_sock *xs, *tmp; 245d817991cSBjörn Töpel 246d817991cSBjörn Töpel list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { 247d817991cSBjörn Töpel xsk_flush(xs); 248d817991cSBjörn Töpel __list_del_clearprev(&xs->flush_node); 249d817991cSBjörn Töpel } 250d817991cSBjörn Töpel } 251d817991cSBjörn Töpel 252ac98d8aaSMagnus Karlsson void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) 253ac98d8aaSMagnus Karlsson { 25459e35e55SMagnus Karlsson xskq_prod_submit_n(umem->cq, nb_entries); 255ac98d8aaSMagnus Karlsson } 256ac98d8aaSMagnus Karlsson EXPORT_SYMBOL(xsk_umem_complete_tx); 257ac98d8aaSMagnus Karlsson 258ac98d8aaSMagnus Karlsson void xsk_umem_consume_tx_done(struct xdp_umem *umem) 259ac98d8aaSMagnus Karlsson { 260ac98d8aaSMagnus Karlsson struct xdp_sock *xs; 261ac98d8aaSMagnus Karlsson 262ac98d8aaSMagnus Karlsson rcu_read_lock(); 263e4e5aefcSMagnus Karlsson list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) { 26430744a68SMagnus Karlsson __xskq_cons_release(xs->tx); 265ac98d8aaSMagnus Karlsson xs->sk.sk_write_space(&xs->sk); 266ac98d8aaSMagnus Karlsson } 267ac98d8aaSMagnus Karlsson rcu_read_unlock(); 268ac98d8aaSMagnus Karlsson } 269ac98d8aaSMagnus Karlsson EXPORT_SYMBOL(xsk_umem_consume_tx_done); 270ac98d8aaSMagnus Karlsson 2714bce4e5cSMaxim Mikityanskiy bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc) 272ac98d8aaSMagnus Karlsson { 273ac98d8aaSMagnus Karlsson struct xdp_sock *xs; 274ac98d8aaSMagnus Karlsson 275ac98d8aaSMagnus Karlsson rcu_read_lock(); 276e4e5aefcSMagnus Karlsson list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) { 2778aa5a335SCiara Loftus if (!xskq_cons_peek_desc(xs->tx, desc, umem)) { 2788aa5a335SCiara Loftus xs->tx->queue_empty_descs++; 279ac98d8aaSMagnus Karlsson continue; 2808aa5a335SCiara Loftus } 281ac98d8aaSMagnus Karlsson 2820a05861fSTobias Klauser /* This is the backpressure mechanism for the Tx path. 28315d8c916SMagnus Karlsson * Reserve space in the completion queue and only proceed 28415d8c916SMagnus Karlsson * if there is space in it. This avoids having to implement 28515d8c916SMagnus Karlsson * any buffering in the Tx path. 28615d8c916SMagnus Karlsson */ 28759e35e55SMagnus Karlsson if (xskq_prod_reserve_addr(umem->cq, desc->addr)) 288ac98d8aaSMagnus Karlsson goto out; 289ac98d8aaSMagnus Karlsson 290c5ed924bSMagnus Karlsson xskq_cons_release(xs->tx); 291ac98d8aaSMagnus Karlsson rcu_read_unlock(); 292ac98d8aaSMagnus Karlsson return true; 293ac98d8aaSMagnus Karlsson } 294ac98d8aaSMagnus Karlsson 295ac98d8aaSMagnus Karlsson out: 296ac98d8aaSMagnus Karlsson rcu_read_unlock(); 297ac98d8aaSMagnus Karlsson return false; 298ac98d8aaSMagnus Karlsson } 299ac98d8aaSMagnus Karlsson EXPORT_SYMBOL(xsk_umem_consume_tx); 300ac98d8aaSMagnus Karlsson 30106870682SMaxim Mikityanskiy static int xsk_wakeup(struct xdp_sock *xs, u8 flags) 302ac98d8aaSMagnus Karlsson { 303ac98d8aaSMagnus Karlsson struct net_device *dev = xs->dev; 30406870682SMaxim Mikityanskiy int err; 305ac98d8aaSMagnus Karlsson 30606870682SMaxim Mikityanskiy rcu_read_lock(); 30706870682SMaxim Mikityanskiy err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); 30806870682SMaxim Mikityanskiy rcu_read_unlock(); 30906870682SMaxim Mikityanskiy 31006870682SMaxim Mikityanskiy return err; 31106870682SMaxim Mikityanskiy } 31206870682SMaxim Mikityanskiy 31306870682SMaxim Mikityanskiy static int xsk_zc_xmit(struct xdp_sock *xs) 31406870682SMaxim Mikityanskiy { 31506870682SMaxim Mikityanskiy return xsk_wakeup(xs, XDP_WAKEUP_TX); 316ac98d8aaSMagnus Karlsson } 317ac98d8aaSMagnus Karlsson 31835fcde7fSMagnus Karlsson static void xsk_destruct_skb(struct sk_buff *skb) 31935fcde7fSMagnus Karlsson { 320bbff2f32SBjörn Töpel u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg; 32135fcde7fSMagnus Karlsson struct xdp_sock *xs = xdp_sk(skb->sk); 322a9744f7cSMagnus Karlsson unsigned long flags; 32335fcde7fSMagnus Karlsson 324a9744f7cSMagnus Karlsson spin_lock_irqsave(&xs->tx_completion_lock, flags); 32559e35e55SMagnus Karlsson xskq_prod_submit_addr(xs->umem->cq, addr); 326a9744f7cSMagnus Karlsson spin_unlock_irqrestore(&xs->tx_completion_lock, flags); 32735fcde7fSMagnus Karlsson 32835fcde7fSMagnus Karlsson sock_wfree(skb); 32935fcde7fSMagnus Karlsson } 33035fcde7fSMagnus Karlsson 331df551058SMagnus Karlsson static int xsk_generic_xmit(struct sock *sk) 33235fcde7fSMagnus Karlsson { 33335fcde7fSMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 334df551058SMagnus Karlsson u32 max_batch = TX_BATCH_SIZE; 33535fcde7fSMagnus Karlsson bool sent_frame = false; 33635fcde7fSMagnus Karlsson struct xdp_desc desc; 33735fcde7fSMagnus Karlsson struct sk_buff *skb; 33835fcde7fSMagnus Karlsson int err = 0; 33935fcde7fSMagnus Karlsson 34035fcde7fSMagnus Karlsson mutex_lock(&xs->mutex); 34135fcde7fSMagnus Karlsson 34267571640SIlya Maximets if (xs->queue_id >= xs->dev->real_num_tx_queues) 34367571640SIlya Maximets goto out; 34467571640SIlya Maximets 345c5ed924bSMagnus Karlsson while (xskq_cons_peek_desc(xs->tx, &desc, xs->umem)) { 34635fcde7fSMagnus Karlsson char *buffer; 347bbff2f32SBjörn Töpel u64 addr; 348bbff2f32SBjörn Töpel u32 len; 34935fcde7fSMagnus Karlsson 35035fcde7fSMagnus Karlsson if (max_batch-- == 0) { 35135fcde7fSMagnus Karlsson err = -EAGAIN; 35235fcde7fSMagnus Karlsson goto out; 35335fcde7fSMagnus Karlsson } 35435fcde7fSMagnus Karlsson 35509210c4bSMagnus Karlsson len = desc.len; 356ac98d8aaSMagnus Karlsson skb = sock_alloc_send_skb(sk, len, 1, &err); 357aa2cad06SLi RongQing if (unlikely(!skb)) 35835fcde7fSMagnus Karlsson goto out; 35935fcde7fSMagnus Karlsson 36035fcde7fSMagnus Karlsson skb_put(skb, len); 361bbff2f32SBjörn Töpel addr = desc.addr; 3622b43470aSBjörn Töpel buffer = xsk_buff_raw_get_data(xs->umem, addr); 36335fcde7fSMagnus Karlsson err = skb_store_bits(skb, 0, buffer, len); 3640a05861fSTobias Klauser /* This is the backpressure mechanism for the Tx path. 36515d8c916SMagnus Karlsson * Reserve space in the completion queue and only proceed 36615d8c916SMagnus Karlsson * if there is space in it. This avoids having to implement 36715d8c916SMagnus Karlsson * any buffering in the Tx path. 36815d8c916SMagnus Karlsson */ 36959e35e55SMagnus Karlsson if (unlikely(err) || xskq_prod_reserve(xs->umem->cq)) { 37035fcde7fSMagnus Karlsson kfree_skb(skb); 37135fcde7fSMagnus Karlsson goto out; 37235fcde7fSMagnus Karlsson } 37335fcde7fSMagnus Karlsson 37435fcde7fSMagnus Karlsson skb->dev = xs->dev; 37535fcde7fSMagnus Karlsson skb->priority = sk->sk_priority; 37635fcde7fSMagnus Karlsson skb->mark = sk->sk_mark; 377c05cd364SKevin Laatz skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; 37835fcde7fSMagnus Karlsson skb->destructor = xsk_destruct_skb; 37935fcde7fSMagnus Karlsson 380*642e450bSMagnus Karlsson /* Hinder dev_direct_xmit from freeing the packet and 381*642e450bSMagnus Karlsson * therefore completing it in the destructor 382*642e450bSMagnus Karlsson */ 383*642e450bSMagnus Karlsson refcount_inc(&skb->users); 38435fcde7fSMagnus Karlsson err = dev_direct_xmit(skb, xs->queue_id); 385*642e450bSMagnus Karlsson if (err == NETDEV_TX_BUSY) { 386*642e450bSMagnus Karlsson /* Tell user-space to retry the send */ 387*642e450bSMagnus Karlsson skb->destructor = sock_wfree; 388*642e450bSMagnus Karlsson /* Free skb without triggering the perf drop trace */ 389*642e450bSMagnus Karlsson consume_skb(skb); 390*642e450bSMagnus Karlsson err = -EAGAIN; 391*642e450bSMagnus Karlsson goto out; 392*642e450bSMagnus Karlsson } 393*642e450bSMagnus Karlsson 394c5ed924bSMagnus Karlsson xskq_cons_release(xs->tx); 39535fcde7fSMagnus Karlsson /* Ignore NET_XMIT_CN as packet might have been sent */ 396*642e450bSMagnus Karlsson if (err == NET_XMIT_DROP) { 397fe588685SMagnus Karlsson /* SKB completed but not sent */ 398*642e450bSMagnus Karlsson kfree_skb(skb); 399fe588685SMagnus Karlsson err = -EBUSY; 40035fcde7fSMagnus Karlsson goto out; 40135fcde7fSMagnus Karlsson } 40235fcde7fSMagnus Karlsson 403*642e450bSMagnus Karlsson consume_skb(skb); 40435fcde7fSMagnus Karlsson sent_frame = true; 40535fcde7fSMagnus Karlsson } 40635fcde7fSMagnus Karlsson 4078aa5a335SCiara Loftus xs->tx->queue_empty_descs++; 4088aa5a335SCiara Loftus 40935fcde7fSMagnus Karlsson out: 41035fcde7fSMagnus Karlsson if (sent_frame) 41135fcde7fSMagnus Karlsson sk->sk_write_space(sk); 41235fcde7fSMagnus Karlsson 41335fcde7fSMagnus Karlsson mutex_unlock(&xs->mutex); 41435fcde7fSMagnus Karlsson return err; 41535fcde7fSMagnus Karlsson } 41635fcde7fSMagnus Karlsson 417df551058SMagnus Karlsson static int __xsk_sendmsg(struct sock *sk) 418df551058SMagnus Karlsson { 419df551058SMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 420df551058SMagnus Karlsson 421df551058SMagnus Karlsson if (unlikely(!(xs->dev->flags & IFF_UP))) 422df551058SMagnus Karlsson return -ENETDOWN; 423df551058SMagnus Karlsson if (unlikely(!xs->tx)) 424df551058SMagnus Karlsson return -ENOBUFS; 425df551058SMagnus Karlsson 426df551058SMagnus Karlsson return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk); 427df551058SMagnus Karlsson } 428df551058SMagnus Karlsson 42935fcde7fSMagnus Karlsson static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) 43035fcde7fSMagnus Karlsson { 431ac98d8aaSMagnus Karlsson bool need_wait = !(m->msg_flags & MSG_DONTWAIT); 43235fcde7fSMagnus Karlsson struct sock *sk = sock->sk; 43335fcde7fSMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 43435fcde7fSMagnus Karlsson 43542fddcc7SBjörn Töpel if (unlikely(!xsk_is_bound(xs))) 43635fcde7fSMagnus Karlsson return -ENXIO; 437df551058SMagnus Karlsson if (unlikely(need_wait)) 438ac98d8aaSMagnus Karlsson return -EOPNOTSUPP; 43935fcde7fSMagnus Karlsson 440df551058SMagnus Karlsson return __xsk_sendmsg(sk); 44135fcde7fSMagnus Karlsson } 44235fcde7fSMagnus Karlsson 4435d946c5aSLuc Van Oostenryck static __poll_t xsk_poll(struct file *file, struct socket *sock, 444a11e1d43SLinus Torvalds struct poll_table_struct *wait) 445c497176cSBjörn Töpel { 4465d946c5aSLuc Van Oostenryck __poll_t mask = datagram_poll(file, sock, wait); 447df551058SMagnus Karlsson struct sock *sk = sock->sk; 448df551058SMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 44942fddcc7SBjörn Töpel struct xdp_umem *umem; 45042fddcc7SBjörn Töpel 45142fddcc7SBjörn Töpel if (unlikely(!xsk_is_bound(xs))) 45242fddcc7SBjörn Töpel return mask; 45342fddcc7SBjörn Töpel 45442fddcc7SBjörn Töpel umem = xs->umem; 45577cd0d7bSMagnus Karlsson 456df551058SMagnus Karlsson if (umem->need_wakeup) { 45706870682SMaxim Mikityanskiy if (xs->zc) 45806870682SMaxim Mikityanskiy xsk_wakeup(xs, umem->need_wakeup); 459df551058SMagnus Karlsson else 460df551058SMagnus Karlsson /* Poll needs to drive Tx also in copy mode */ 461df551058SMagnus Karlsson __xsk_sendmsg(sk); 462df551058SMagnus Karlsson } 463c497176cSBjörn Töpel 46459e35e55SMagnus Karlsson if (xs->rx && !xskq_prod_is_empty(xs->rx)) 4655d946c5aSLuc Van Oostenryck mask |= EPOLLIN | EPOLLRDNORM; 466c5ed924bSMagnus Karlsson if (xs->tx && !xskq_cons_is_full(xs->tx)) 4675d946c5aSLuc Van Oostenryck mask |= EPOLLOUT | EPOLLWRNORM; 468c497176cSBjörn Töpel 469c497176cSBjörn Töpel return mask; 470c497176cSBjörn Töpel } 471c497176cSBjörn Töpel 472b9b6b68eSBjörn Töpel static int xsk_init_queue(u32 entries, struct xsk_queue **queue, 473b9b6b68eSBjörn Töpel bool umem_queue) 474423f3832SMagnus Karlsson { 475423f3832SMagnus Karlsson struct xsk_queue *q; 476423f3832SMagnus Karlsson 477423f3832SMagnus Karlsson if (entries == 0 || *queue || !is_power_of_2(entries)) 478423f3832SMagnus Karlsson return -EINVAL; 479423f3832SMagnus Karlsson 480b9b6b68eSBjörn Töpel q = xskq_create(entries, umem_queue); 481423f3832SMagnus Karlsson if (!q) 482423f3832SMagnus Karlsson return -ENOMEM; 483423f3832SMagnus Karlsson 48437b07693SBjörn Töpel /* Make sure queue is ready before it can be seen by others */ 48537b07693SBjörn Töpel smp_wmb(); 48694a99763SBjörn Töpel WRITE_ONCE(*queue, q); 487423f3832SMagnus Karlsson return 0; 488423f3832SMagnus Karlsson } 489423f3832SMagnus Karlsson 490455302d1SIlya Maximets static void xsk_unbind_dev(struct xdp_sock *xs) 491455302d1SIlya Maximets { 492455302d1SIlya Maximets struct net_device *dev = xs->dev; 493455302d1SIlya Maximets 49442fddcc7SBjörn Töpel if (xs->state != XSK_BOUND) 495455302d1SIlya Maximets return; 49642fddcc7SBjörn Töpel WRITE_ONCE(xs->state, XSK_UNBOUND); 497455302d1SIlya Maximets 498455302d1SIlya Maximets /* Wait for driver to stop using the xdp socket. */ 499455302d1SIlya Maximets xdp_del_sk_umem(xs->umem, xs); 500455302d1SIlya Maximets xs->dev = NULL; 501455302d1SIlya Maximets synchronize_net(); 502455302d1SIlya Maximets dev_put(dev); 503455302d1SIlya Maximets } 504455302d1SIlya Maximets 5050402acd6SBjörn Töpel static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs, 5060402acd6SBjörn Töpel struct xdp_sock ***map_entry) 5070402acd6SBjörn Töpel { 5080402acd6SBjörn Töpel struct xsk_map *map = NULL; 5090402acd6SBjörn Töpel struct xsk_map_node *node; 5100402acd6SBjörn Töpel 5110402acd6SBjörn Töpel *map_entry = NULL; 5120402acd6SBjörn Töpel 5130402acd6SBjörn Töpel spin_lock_bh(&xs->map_list_lock); 5140402acd6SBjörn Töpel node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node, 5150402acd6SBjörn Töpel node); 5160402acd6SBjörn Töpel if (node) { 5170402acd6SBjörn Töpel WARN_ON(xsk_map_inc(node->map)); 5180402acd6SBjörn Töpel map = node->map; 5190402acd6SBjörn Töpel *map_entry = node->map_entry; 5200402acd6SBjörn Töpel } 5210402acd6SBjörn Töpel spin_unlock_bh(&xs->map_list_lock); 5220402acd6SBjörn Töpel return map; 5230402acd6SBjörn Töpel } 5240402acd6SBjörn Töpel 5250402acd6SBjörn Töpel static void xsk_delete_from_maps(struct xdp_sock *xs) 5260402acd6SBjörn Töpel { 5270402acd6SBjörn Töpel /* This function removes the current XDP socket from all the 5280402acd6SBjörn Töpel * maps it resides in. We need to take extra care here, due to 5290402acd6SBjörn Töpel * the two locks involved. Each map has a lock synchronizing 5300402acd6SBjörn Töpel * updates to the entries, and each socket has a lock that 5310402acd6SBjörn Töpel * synchronizes access to the list of maps (map_list). For 5320402acd6SBjörn Töpel * deadlock avoidance the locks need to be taken in the order 5330402acd6SBjörn Töpel * "map lock"->"socket map list lock". We start off by 5340402acd6SBjörn Töpel * accessing the socket map list, and take a reference to the 5350402acd6SBjörn Töpel * map to guarantee existence between the 5360402acd6SBjörn Töpel * xsk_get_map_list_entry() and xsk_map_try_sock_delete() 5370402acd6SBjörn Töpel * calls. Then we ask the map to remove the socket, which 5380402acd6SBjörn Töpel * tries to remove the socket from the map. Note that there 5390402acd6SBjörn Töpel * might be updates to the map between 5400402acd6SBjörn Töpel * xsk_get_map_list_entry() and xsk_map_try_sock_delete(). 5410402acd6SBjörn Töpel */ 5420402acd6SBjörn Töpel struct xdp_sock **map_entry = NULL; 5430402acd6SBjörn Töpel struct xsk_map *map; 5440402acd6SBjörn Töpel 5450402acd6SBjörn Töpel while ((map = xsk_get_map_list_entry(xs, &map_entry))) { 5460402acd6SBjörn Töpel xsk_map_try_sock_delete(map, xs, map_entry); 5470402acd6SBjörn Töpel xsk_map_put(map); 5480402acd6SBjörn Töpel } 5490402acd6SBjörn Töpel } 5500402acd6SBjörn Töpel 551c0c77d8fSBjörn Töpel static int xsk_release(struct socket *sock) 552c0c77d8fSBjörn Töpel { 553c0c77d8fSBjörn Töpel struct sock *sk = sock->sk; 554965a9909SMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 555c0c77d8fSBjörn Töpel struct net *net; 556c0c77d8fSBjörn Töpel 557c0c77d8fSBjörn Töpel if (!sk) 558c0c77d8fSBjörn Töpel return 0; 559c0c77d8fSBjörn Töpel 560c0c77d8fSBjörn Töpel net = sock_net(sk); 561c0c77d8fSBjörn Töpel 5621d0dc069SBjörn Töpel mutex_lock(&net->xdp.lock); 5631d0dc069SBjörn Töpel sk_del_node_init_rcu(sk); 5641d0dc069SBjörn Töpel mutex_unlock(&net->xdp.lock); 5651d0dc069SBjörn Töpel 566c0c77d8fSBjörn Töpel local_bh_disable(); 567c0c77d8fSBjörn Töpel sock_prot_inuse_add(net, sk->sk_prot, -1); 568c0c77d8fSBjörn Töpel local_bh_enable(); 569c0c77d8fSBjörn Töpel 5700402acd6SBjörn Töpel xsk_delete_from_maps(xs); 57142fddcc7SBjörn Töpel mutex_lock(&xs->mutex); 572455302d1SIlya Maximets xsk_unbind_dev(xs); 57342fddcc7SBjörn Töpel mutex_unlock(&xs->mutex); 574965a9909SMagnus Karlsson 575541d7fddSBjörn Töpel xskq_destroy(xs->rx); 576541d7fddSBjörn Töpel xskq_destroy(xs->tx); 577541d7fddSBjörn Töpel 578c0c77d8fSBjörn Töpel sock_orphan(sk); 579c0c77d8fSBjörn Töpel sock->sk = NULL; 580c0c77d8fSBjörn Töpel 581c0c77d8fSBjörn Töpel sk_refcnt_debug_release(sk); 582c0c77d8fSBjörn Töpel sock_put(sk); 583c0c77d8fSBjörn Töpel 584c0c77d8fSBjörn Töpel return 0; 585c0c77d8fSBjörn Töpel } 586c0c77d8fSBjörn Töpel 587965a9909SMagnus Karlsson static struct socket *xsk_lookup_xsk_from_fd(int fd) 588965a9909SMagnus Karlsson { 589965a9909SMagnus Karlsson struct socket *sock; 590965a9909SMagnus Karlsson int err; 591965a9909SMagnus Karlsson 592965a9909SMagnus Karlsson sock = sockfd_lookup(fd, &err); 593965a9909SMagnus Karlsson if (!sock) 594965a9909SMagnus Karlsson return ERR_PTR(-ENOTSOCK); 595965a9909SMagnus Karlsson 596965a9909SMagnus Karlsson if (sock->sk->sk_family != PF_XDP) { 597965a9909SMagnus Karlsson sockfd_put(sock); 598965a9909SMagnus Karlsson return ERR_PTR(-ENOPROTOOPT); 599965a9909SMagnus Karlsson } 600965a9909SMagnus Karlsson 601965a9909SMagnus Karlsson return sock; 602965a9909SMagnus Karlsson } 603965a9909SMagnus Karlsson 604965a9909SMagnus Karlsson static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) 605965a9909SMagnus Karlsson { 606965a9909SMagnus Karlsson struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; 607965a9909SMagnus Karlsson struct sock *sk = sock->sk; 608965a9909SMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 609959b71dbSBjörn Töpel struct net_device *dev; 610173d3adbSBjörn Töpel u32 flags, qid; 611965a9909SMagnus Karlsson int err = 0; 612965a9909SMagnus Karlsson 613965a9909SMagnus Karlsson if (addr_len < sizeof(struct sockaddr_xdp)) 614965a9909SMagnus Karlsson return -EINVAL; 615965a9909SMagnus Karlsson if (sxdp->sxdp_family != AF_XDP) 616965a9909SMagnus Karlsson return -EINVAL; 617965a9909SMagnus Karlsson 618f54ba391SBjörn Töpel flags = sxdp->sxdp_flags; 61977cd0d7bSMagnus Karlsson if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY | 62077cd0d7bSMagnus Karlsson XDP_USE_NEED_WAKEUP)) 621f54ba391SBjörn Töpel return -EINVAL; 622f54ba391SBjörn Töpel 6235464c3a0SIlya Maximets rtnl_lock(); 624965a9909SMagnus Karlsson mutex_lock(&xs->mutex); 625455302d1SIlya Maximets if (xs->state != XSK_READY) { 626959b71dbSBjörn Töpel err = -EBUSY; 627959b71dbSBjörn Töpel goto out_release; 628959b71dbSBjörn Töpel } 629959b71dbSBjörn Töpel 630965a9909SMagnus Karlsson dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex); 631965a9909SMagnus Karlsson if (!dev) { 632965a9909SMagnus Karlsson err = -ENODEV; 633965a9909SMagnus Karlsson goto out_release; 634965a9909SMagnus Karlsson } 635965a9909SMagnus Karlsson 636f6145903SMagnus Karlsson if (!xs->rx && !xs->tx) { 637965a9909SMagnus Karlsson err = -EINVAL; 638965a9909SMagnus Karlsson goto out_unlock; 639965a9909SMagnus Karlsson } 640965a9909SMagnus Karlsson 641173d3adbSBjörn Töpel qid = sxdp->sxdp_queue_id; 642173d3adbSBjörn Töpel 643173d3adbSBjörn Töpel if (flags & XDP_SHARED_UMEM) { 644965a9909SMagnus Karlsson struct xdp_sock *umem_xs; 645965a9909SMagnus Karlsson struct socket *sock; 646965a9909SMagnus Karlsson 64777cd0d7bSMagnus Karlsson if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) || 64877cd0d7bSMagnus Karlsson (flags & XDP_USE_NEED_WAKEUP)) { 649173d3adbSBjörn Töpel /* Cannot specify flags for shared sockets. */ 650173d3adbSBjörn Töpel err = -EINVAL; 651173d3adbSBjörn Töpel goto out_unlock; 652173d3adbSBjörn Töpel } 653173d3adbSBjörn Töpel 654965a9909SMagnus Karlsson if (xs->umem) { 655965a9909SMagnus Karlsson /* We have already our own. */ 656965a9909SMagnus Karlsson err = -EINVAL; 657965a9909SMagnus Karlsson goto out_unlock; 658965a9909SMagnus Karlsson } 659965a9909SMagnus Karlsson 660965a9909SMagnus Karlsson sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd); 661965a9909SMagnus Karlsson if (IS_ERR(sock)) { 662965a9909SMagnus Karlsson err = PTR_ERR(sock); 663965a9909SMagnus Karlsson goto out_unlock; 664965a9909SMagnus Karlsson } 665965a9909SMagnus Karlsson 666965a9909SMagnus Karlsson umem_xs = xdp_sk(sock->sk); 66742fddcc7SBjörn Töpel if (!xsk_is_bound(umem_xs)) { 668965a9909SMagnus Karlsson err = -EBADF; 669965a9909SMagnus Karlsson sockfd_put(sock); 670965a9909SMagnus Karlsson goto out_unlock; 67142fddcc7SBjörn Töpel } 67242fddcc7SBjörn Töpel if (umem_xs->dev != dev || umem_xs->queue_id != qid) { 673965a9909SMagnus Karlsson err = -EINVAL; 674965a9909SMagnus Karlsson sockfd_put(sock); 675965a9909SMagnus Karlsson goto out_unlock; 676965a9909SMagnus Karlsson } 677965a9909SMagnus Karlsson 678965a9909SMagnus Karlsson xdp_get_umem(umem_xs->umem); 6799764f4b3SBjörn Töpel WRITE_ONCE(xs->umem, umem_xs->umem); 680965a9909SMagnus Karlsson sockfd_put(sock); 681965a9909SMagnus Karlsson } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { 682965a9909SMagnus Karlsson err = -EINVAL; 683965a9909SMagnus Karlsson goto out_unlock; 684c497176cSBjörn Töpel } else { 685c497176cSBjörn Töpel /* This xsk has its own umem. */ 686173d3adbSBjörn Töpel err = xdp_umem_assign_dev(xs->umem, dev, qid, flags); 687173d3adbSBjörn Töpel if (err) 688173d3adbSBjörn Töpel goto out_unlock; 689965a9909SMagnus Karlsson } 690965a9909SMagnus Karlsson 691965a9909SMagnus Karlsson xs->dev = dev; 692ac98d8aaSMagnus Karlsson xs->zc = xs->umem->zc; 693ac98d8aaSMagnus Karlsson xs->queue_id = qid; 694ac98d8aaSMagnus Karlsson xdp_add_sk_umem(xs->umem, xs); 695965a9909SMagnus Karlsson 696965a9909SMagnus Karlsson out_unlock: 69742fddcc7SBjörn Töpel if (err) { 698965a9909SMagnus Karlsson dev_put(dev); 69942fddcc7SBjörn Töpel } else { 70042fddcc7SBjörn Töpel /* Matches smp_rmb() in bind() for shared umem 70142fddcc7SBjörn Töpel * sockets, and xsk_is_bound(). 70242fddcc7SBjörn Töpel */ 70342fddcc7SBjörn Töpel smp_wmb(); 70442fddcc7SBjörn Töpel WRITE_ONCE(xs->state, XSK_BOUND); 70542fddcc7SBjörn Töpel } 706965a9909SMagnus Karlsson out_release: 707965a9909SMagnus Karlsson mutex_unlock(&xs->mutex); 7085464c3a0SIlya Maximets rtnl_unlock(); 709965a9909SMagnus Karlsson return err; 710965a9909SMagnus Karlsson } 711965a9909SMagnus Karlsson 712c05cd364SKevin Laatz struct xdp_umem_reg_v1 { 713c05cd364SKevin Laatz __u64 addr; /* Start of packet data area */ 714c05cd364SKevin Laatz __u64 len; /* Length of packet data area */ 715c05cd364SKevin Laatz __u32 chunk_size; 716c05cd364SKevin Laatz __u32 headroom; 717c05cd364SKevin Laatz }; 718c05cd364SKevin Laatz 719c0c77d8fSBjörn Töpel static int xsk_setsockopt(struct socket *sock, int level, int optname, 720a7b75c5aSChristoph Hellwig sockptr_t optval, unsigned int optlen) 721c0c77d8fSBjörn Töpel { 722c0c77d8fSBjörn Töpel struct sock *sk = sock->sk; 723c0c77d8fSBjörn Töpel struct xdp_sock *xs = xdp_sk(sk); 724c0c77d8fSBjörn Töpel int err; 725c0c77d8fSBjörn Töpel 726c0c77d8fSBjörn Töpel if (level != SOL_XDP) 727c0c77d8fSBjörn Töpel return -ENOPROTOOPT; 728c0c77d8fSBjörn Töpel 729c0c77d8fSBjörn Töpel switch (optname) { 730b9b6b68eSBjörn Töpel case XDP_RX_RING: 731f6145903SMagnus Karlsson case XDP_TX_RING: 732b9b6b68eSBjörn Töpel { 733b9b6b68eSBjörn Töpel struct xsk_queue **q; 734b9b6b68eSBjörn Töpel int entries; 735b9b6b68eSBjörn Töpel 736b9b6b68eSBjörn Töpel if (optlen < sizeof(entries)) 737b9b6b68eSBjörn Töpel return -EINVAL; 738a7b75c5aSChristoph Hellwig if (copy_from_sockptr(&entries, optval, sizeof(entries))) 739b9b6b68eSBjörn Töpel return -EFAULT; 740b9b6b68eSBjörn Töpel 741b9b6b68eSBjörn Töpel mutex_lock(&xs->mutex); 742455302d1SIlya Maximets if (xs->state != XSK_READY) { 743455302d1SIlya Maximets mutex_unlock(&xs->mutex); 744455302d1SIlya Maximets return -EBUSY; 745455302d1SIlya Maximets } 746f6145903SMagnus Karlsson q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx; 747b9b6b68eSBjörn Töpel err = xsk_init_queue(entries, q, false); 74877cd0d7bSMagnus Karlsson if (!err && optname == XDP_TX_RING) 74977cd0d7bSMagnus Karlsson /* Tx needs to be explicitly woken up the first time */ 75077cd0d7bSMagnus Karlsson xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; 751b9b6b68eSBjörn Töpel mutex_unlock(&xs->mutex); 752b9b6b68eSBjörn Töpel return err; 753b9b6b68eSBjörn Töpel } 754c0c77d8fSBjörn Töpel case XDP_UMEM_REG: 755c0c77d8fSBjörn Töpel { 756c05cd364SKevin Laatz size_t mr_size = sizeof(struct xdp_umem_reg); 757c05cd364SKevin Laatz struct xdp_umem_reg mr = {}; 758c0c77d8fSBjörn Töpel struct xdp_umem *umem; 759c0c77d8fSBjörn Töpel 760c05cd364SKevin Laatz if (optlen < sizeof(struct xdp_umem_reg_v1)) 761c05cd364SKevin Laatz return -EINVAL; 762c05cd364SKevin Laatz else if (optlen < sizeof(mr)) 763c05cd364SKevin Laatz mr_size = sizeof(struct xdp_umem_reg_v1); 764c05cd364SKevin Laatz 765a7b75c5aSChristoph Hellwig if (copy_from_sockptr(&mr, optval, mr_size)) 766c0c77d8fSBjörn Töpel return -EFAULT; 767c0c77d8fSBjörn Töpel 768c0c77d8fSBjörn Töpel mutex_lock(&xs->mutex); 769455302d1SIlya Maximets if (xs->state != XSK_READY || xs->umem) { 770c0c77d8fSBjörn Töpel mutex_unlock(&xs->mutex); 771a49049eaSBjörn Töpel return -EBUSY; 772a49049eaSBjörn Töpel } 773a49049eaSBjörn Töpel 774a49049eaSBjörn Töpel umem = xdp_umem_create(&mr); 775a49049eaSBjörn Töpel if (IS_ERR(umem)) { 776a49049eaSBjörn Töpel mutex_unlock(&xs->mutex); 777a49049eaSBjörn Töpel return PTR_ERR(umem); 778c0c77d8fSBjörn Töpel } 779c0c77d8fSBjörn Töpel 780c0c77d8fSBjörn Töpel /* Make sure umem is ready before it can be seen by others */ 781c0c77d8fSBjörn Töpel smp_wmb(); 7829764f4b3SBjörn Töpel WRITE_ONCE(xs->umem, umem); 783c0c77d8fSBjörn Töpel mutex_unlock(&xs->mutex); 784c0c77d8fSBjörn Töpel return 0; 785c0c77d8fSBjörn Töpel } 786423f3832SMagnus Karlsson case XDP_UMEM_FILL_RING: 787fe230832SMagnus Karlsson case XDP_UMEM_COMPLETION_RING: 788423f3832SMagnus Karlsson { 789423f3832SMagnus Karlsson struct xsk_queue **q; 790423f3832SMagnus Karlsson int entries; 791423f3832SMagnus Karlsson 792a7b75c5aSChristoph Hellwig if (copy_from_sockptr(&entries, optval, sizeof(entries))) 793423f3832SMagnus Karlsson return -EFAULT; 794423f3832SMagnus Karlsson 795423f3832SMagnus Karlsson mutex_lock(&xs->mutex); 796455302d1SIlya Maximets if (xs->state != XSK_READY) { 797455302d1SIlya Maximets mutex_unlock(&xs->mutex); 798455302d1SIlya Maximets return -EBUSY; 799455302d1SIlya Maximets } 800a49049eaSBjörn Töpel if (!xs->umem) { 801a49049eaSBjörn Töpel mutex_unlock(&xs->mutex); 802a49049eaSBjörn Töpel return -EINVAL; 803a49049eaSBjörn Töpel } 804a49049eaSBjörn Töpel 805fe230832SMagnus Karlsson q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq : 806fe230832SMagnus Karlsson &xs->umem->cq; 807b9b6b68eSBjörn Töpel err = xsk_init_queue(entries, q, true); 8082b43470aSBjörn Töpel if (optname == XDP_UMEM_FILL_RING) 8092b43470aSBjörn Töpel xp_set_fq(xs->umem->pool, *q); 810423f3832SMagnus Karlsson mutex_unlock(&xs->mutex); 811423f3832SMagnus Karlsson return err; 812423f3832SMagnus Karlsson } 813c0c77d8fSBjörn Töpel default: 814c0c77d8fSBjörn Töpel break; 815c0c77d8fSBjörn Töpel } 816c0c77d8fSBjörn Töpel 817c0c77d8fSBjörn Töpel return -ENOPROTOOPT; 818c0c77d8fSBjörn Töpel } 819c0c77d8fSBjörn Töpel 82077cd0d7bSMagnus Karlsson static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring) 82177cd0d7bSMagnus Karlsson { 82277cd0d7bSMagnus Karlsson ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); 82377cd0d7bSMagnus Karlsson ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); 82477cd0d7bSMagnus Karlsson ring->desc = offsetof(struct xdp_rxtx_ring, desc); 82577cd0d7bSMagnus Karlsson } 82677cd0d7bSMagnus Karlsson 82777cd0d7bSMagnus Karlsson static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring) 82877cd0d7bSMagnus Karlsson { 82977cd0d7bSMagnus Karlsson ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer); 83077cd0d7bSMagnus Karlsson ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); 83177cd0d7bSMagnus Karlsson ring->desc = offsetof(struct xdp_umem_ring, desc); 83277cd0d7bSMagnus Karlsson } 83377cd0d7bSMagnus Karlsson 8348aa5a335SCiara Loftus struct xdp_statistics_v1 { 8358aa5a335SCiara Loftus __u64 rx_dropped; 8368aa5a335SCiara Loftus __u64 rx_invalid_descs; 8378aa5a335SCiara Loftus __u64 tx_invalid_descs; 8388aa5a335SCiara Loftus }; 8398aa5a335SCiara Loftus 840af75d9e0SMagnus Karlsson static int xsk_getsockopt(struct socket *sock, int level, int optname, 841af75d9e0SMagnus Karlsson char __user *optval, int __user *optlen) 842af75d9e0SMagnus Karlsson { 843af75d9e0SMagnus Karlsson struct sock *sk = sock->sk; 844af75d9e0SMagnus Karlsson struct xdp_sock *xs = xdp_sk(sk); 845af75d9e0SMagnus Karlsson int len; 846af75d9e0SMagnus Karlsson 847af75d9e0SMagnus Karlsson if (level != SOL_XDP) 848af75d9e0SMagnus Karlsson return -ENOPROTOOPT; 849af75d9e0SMagnus Karlsson 850af75d9e0SMagnus Karlsson if (get_user(len, optlen)) 851af75d9e0SMagnus Karlsson return -EFAULT; 852af75d9e0SMagnus Karlsson if (len < 0) 853af75d9e0SMagnus Karlsson return -EINVAL; 854af75d9e0SMagnus Karlsson 855af75d9e0SMagnus Karlsson switch (optname) { 856af75d9e0SMagnus Karlsson case XDP_STATISTICS: 857af75d9e0SMagnus Karlsson { 8583c4f850eSPeilin Ye struct xdp_statistics stats = {}; 8598aa5a335SCiara Loftus bool extra_stats = true; 8608aa5a335SCiara Loftus size_t stats_size; 861af75d9e0SMagnus Karlsson 8628aa5a335SCiara Loftus if (len < sizeof(struct xdp_statistics_v1)) { 863af75d9e0SMagnus Karlsson return -EINVAL; 8648aa5a335SCiara Loftus } else if (len < sizeof(stats)) { 8658aa5a335SCiara Loftus extra_stats = false; 8668aa5a335SCiara Loftus stats_size = sizeof(struct xdp_statistics_v1); 8678aa5a335SCiara Loftus } else { 8688aa5a335SCiara Loftus stats_size = sizeof(stats); 8698aa5a335SCiara Loftus } 870af75d9e0SMagnus Karlsson 871af75d9e0SMagnus Karlsson mutex_lock(&xs->mutex); 872af75d9e0SMagnus Karlsson stats.rx_dropped = xs->rx_dropped; 8738aa5a335SCiara Loftus if (extra_stats) { 8748aa5a335SCiara Loftus stats.rx_ring_full = xs->rx_queue_full; 8758aa5a335SCiara Loftus stats.rx_fill_ring_empty_descs = 8768aa5a335SCiara Loftus xs->umem ? xskq_nb_queue_empty_descs(xs->umem->fq) : 0; 8778aa5a335SCiara Loftus stats.tx_ring_empty_descs = xskq_nb_queue_empty_descs(xs->tx); 8788aa5a335SCiara Loftus } else { 8798aa5a335SCiara Loftus stats.rx_dropped += xs->rx_queue_full; 8808aa5a335SCiara Loftus } 881af75d9e0SMagnus Karlsson stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx); 882af75d9e0SMagnus Karlsson stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx); 883af75d9e0SMagnus Karlsson mutex_unlock(&xs->mutex); 884af75d9e0SMagnus Karlsson 8858aa5a335SCiara Loftus if (copy_to_user(optval, &stats, stats_size)) 886af75d9e0SMagnus Karlsson return -EFAULT; 8878aa5a335SCiara Loftus if (put_user(stats_size, optlen)) 888af75d9e0SMagnus Karlsson return -EFAULT; 889af75d9e0SMagnus Karlsson 890af75d9e0SMagnus Karlsson return 0; 891af75d9e0SMagnus Karlsson } 892b3a9e0beSBjörn Töpel case XDP_MMAP_OFFSETS: 893b3a9e0beSBjörn Töpel { 894b3a9e0beSBjörn Töpel struct xdp_mmap_offsets off; 89577cd0d7bSMagnus Karlsson struct xdp_mmap_offsets_v1 off_v1; 89677cd0d7bSMagnus Karlsson bool flags_supported = true; 89777cd0d7bSMagnus Karlsson void *to_copy; 898b3a9e0beSBjörn Töpel 89977cd0d7bSMagnus Karlsson if (len < sizeof(off_v1)) 900b3a9e0beSBjörn Töpel return -EINVAL; 90177cd0d7bSMagnus Karlsson else if (len < sizeof(off)) 90277cd0d7bSMagnus Karlsson flags_supported = false; 903b3a9e0beSBjörn Töpel 90477cd0d7bSMagnus Karlsson if (flags_supported) { 90577cd0d7bSMagnus Karlsson /* xdp_ring_offset is identical to xdp_ring_offset_v1 90677cd0d7bSMagnus Karlsson * except for the flags field added to the end. 90777cd0d7bSMagnus Karlsson */ 90877cd0d7bSMagnus Karlsson xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) 90977cd0d7bSMagnus Karlsson &off.rx); 91077cd0d7bSMagnus Karlsson xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) 91177cd0d7bSMagnus Karlsson &off.tx); 91277cd0d7bSMagnus Karlsson xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) 91377cd0d7bSMagnus Karlsson &off.fr); 91477cd0d7bSMagnus Karlsson xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) 91577cd0d7bSMagnus Karlsson &off.cr); 91677cd0d7bSMagnus Karlsson off.rx.flags = offsetof(struct xdp_rxtx_ring, 91777cd0d7bSMagnus Karlsson ptrs.flags); 91877cd0d7bSMagnus Karlsson off.tx.flags = offsetof(struct xdp_rxtx_ring, 91977cd0d7bSMagnus Karlsson ptrs.flags); 92077cd0d7bSMagnus Karlsson off.fr.flags = offsetof(struct xdp_umem_ring, 92177cd0d7bSMagnus Karlsson ptrs.flags); 92277cd0d7bSMagnus Karlsson off.cr.flags = offsetof(struct xdp_umem_ring, 92377cd0d7bSMagnus Karlsson ptrs.flags); 924b3a9e0beSBjörn Töpel 925b3a9e0beSBjörn Töpel len = sizeof(off); 92677cd0d7bSMagnus Karlsson to_copy = &off; 92777cd0d7bSMagnus Karlsson } else { 92877cd0d7bSMagnus Karlsson xsk_enter_rxtx_offsets(&off_v1.rx); 92977cd0d7bSMagnus Karlsson xsk_enter_rxtx_offsets(&off_v1.tx); 93077cd0d7bSMagnus Karlsson xsk_enter_umem_offsets(&off_v1.fr); 93177cd0d7bSMagnus Karlsson xsk_enter_umem_offsets(&off_v1.cr); 93277cd0d7bSMagnus Karlsson 93377cd0d7bSMagnus Karlsson len = sizeof(off_v1); 93477cd0d7bSMagnus Karlsson to_copy = &off_v1; 93577cd0d7bSMagnus Karlsson } 93677cd0d7bSMagnus Karlsson 93777cd0d7bSMagnus Karlsson if (copy_to_user(optval, to_copy, len)) 938b3a9e0beSBjörn Töpel return -EFAULT; 939b3a9e0beSBjörn Töpel if (put_user(len, optlen)) 940b3a9e0beSBjörn Töpel return -EFAULT; 941b3a9e0beSBjörn Töpel 942b3a9e0beSBjörn Töpel return 0; 943b3a9e0beSBjörn Töpel } 9442640d3c8SMaxim Mikityanskiy case XDP_OPTIONS: 9452640d3c8SMaxim Mikityanskiy { 9462640d3c8SMaxim Mikityanskiy struct xdp_options opts = {}; 9472640d3c8SMaxim Mikityanskiy 9482640d3c8SMaxim Mikityanskiy if (len < sizeof(opts)) 9492640d3c8SMaxim Mikityanskiy return -EINVAL; 9502640d3c8SMaxim Mikityanskiy 9512640d3c8SMaxim Mikityanskiy mutex_lock(&xs->mutex); 9522640d3c8SMaxim Mikityanskiy if (xs->zc) 9532640d3c8SMaxim Mikityanskiy opts.flags |= XDP_OPTIONS_ZEROCOPY; 9542640d3c8SMaxim Mikityanskiy mutex_unlock(&xs->mutex); 9552640d3c8SMaxim Mikityanskiy 9562640d3c8SMaxim Mikityanskiy len = sizeof(opts); 9572640d3c8SMaxim Mikityanskiy if (copy_to_user(optval, &opts, len)) 9582640d3c8SMaxim Mikityanskiy return -EFAULT; 9592640d3c8SMaxim Mikityanskiy if (put_user(len, optlen)) 9602640d3c8SMaxim Mikityanskiy return -EFAULT; 9612640d3c8SMaxim Mikityanskiy 9622640d3c8SMaxim Mikityanskiy return 0; 9632640d3c8SMaxim Mikityanskiy } 964af75d9e0SMagnus Karlsson default: 965af75d9e0SMagnus Karlsson break; 966af75d9e0SMagnus Karlsson } 967af75d9e0SMagnus Karlsson 968af75d9e0SMagnus Karlsson return -EOPNOTSUPP; 969af75d9e0SMagnus Karlsson } 970af75d9e0SMagnus Karlsson 971423f3832SMagnus Karlsson static int xsk_mmap(struct file *file, struct socket *sock, 972423f3832SMagnus Karlsson struct vm_area_struct *vma) 973423f3832SMagnus Karlsson { 974a5a16e43SGeert Uytterhoeven loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; 975423f3832SMagnus Karlsson unsigned long size = vma->vm_end - vma->vm_start; 976423f3832SMagnus Karlsson struct xdp_sock *xs = xdp_sk(sock->sk); 977423f3832SMagnus Karlsson struct xsk_queue *q = NULL; 97837b07693SBjörn Töpel struct xdp_umem *umem; 979423f3832SMagnus Karlsson unsigned long pfn; 980423f3832SMagnus Karlsson struct page *qpg; 981423f3832SMagnus Karlsson 98242fddcc7SBjörn Töpel if (READ_ONCE(xs->state) != XSK_READY) 983455302d1SIlya Maximets return -EBUSY; 984455302d1SIlya Maximets 985b9b6b68eSBjörn Töpel if (offset == XDP_PGOFF_RX_RING) { 98637b07693SBjörn Töpel q = READ_ONCE(xs->rx); 987f6145903SMagnus Karlsson } else if (offset == XDP_PGOFF_TX_RING) { 98837b07693SBjörn Töpel q = READ_ONCE(xs->tx); 989b9b6b68eSBjörn Töpel } else { 99037b07693SBjörn Töpel umem = READ_ONCE(xs->umem); 99137b07693SBjörn Töpel if (!umem) 992423f3832SMagnus Karlsson return -EINVAL; 993423f3832SMagnus Karlsson 994e6762c8bSMagnus Karlsson /* Matches the smp_wmb() in XDP_UMEM_REG */ 995e6762c8bSMagnus Karlsson smp_rmb(); 996423f3832SMagnus Karlsson if (offset == XDP_UMEM_PGOFF_FILL_RING) 99737b07693SBjörn Töpel q = READ_ONCE(umem->fq); 998fe230832SMagnus Karlsson else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) 99937b07693SBjörn Töpel q = READ_ONCE(umem->cq); 1000b9b6b68eSBjörn Töpel } 1001423f3832SMagnus Karlsson 1002423f3832SMagnus Karlsson if (!q) 1003423f3832SMagnus Karlsson return -EINVAL; 1004423f3832SMagnus Karlsson 1005e6762c8bSMagnus Karlsson /* Matches the smp_wmb() in xsk_init_queue */ 1006e6762c8bSMagnus Karlsson smp_rmb(); 1007423f3832SMagnus Karlsson qpg = virt_to_head_page(q->ring); 1008a50b854eSMatthew Wilcox (Oracle) if (size > page_size(qpg)) 1009423f3832SMagnus Karlsson return -EINVAL; 1010423f3832SMagnus Karlsson 1011423f3832SMagnus Karlsson pfn = virt_to_phys(q->ring) >> PAGE_SHIFT; 1012423f3832SMagnus Karlsson return remap_pfn_range(vma, vma->vm_start, pfn, 1013423f3832SMagnus Karlsson size, vma->vm_page_prot); 1014423f3832SMagnus Karlsson } 1015423f3832SMagnus Karlsson 1016455302d1SIlya Maximets static int xsk_notifier(struct notifier_block *this, 1017455302d1SIlya Maximets unsigned long msg, void *ptr) 1018455302d1SIlya Maximets { 1019455302d1SIlya Maximets struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1020455302d1SIlya Maximets struct net *net = dev_net(dev); 1021455302d1SIlya Maximets struct sock *sk; 1022455302d1SIlya Maximets 1023455302d1SIlya Maximets switch (msg) { 1024455302d1SIlya Maximets case NETDEV_UNREGISTER: 1025455302d1SIlya Maximets mutex_lock(&net->xdp.lock); 1026455302d1SIlya Maximets sk_for_each(sk, &net->xdp.list) { 1027455302d1SIlya Maximets struct xdp_sock *xs = xdp_sk(sk); 1028455302d1SIlya Maximets 1029455302d1SIlya Maximets mutex_lock(&xs->mutex); 1030455302d1SIlya Maximets if (xs->dev == dev) { 1031455302d1SIlya Maximets sk->sk_err = ENETDOWN; 1032455302d1SIlya Maximets if (!sock_flag(sk, SOCK_DEAD)) 1033455302d1SIlya Maximets sk->sk_error_report(sk); 1034455302d1SIlya Maximets 1035455302d1SIlya Maximets xsk_unbind_dev(xs); 1036455302d1SIlya Maximets 1037455302d1SIlya Maximets /* Clear device references in umem. */ 1038455302d1SIlya Maximets xdp_umem_clear_dev(xs->umem); 1039455302d1SIlya Maximets } 1040455302d1SIlya Maximets mutex_unlock(&xs->mutex); 1041455302d1SIlya Maximets } 1042455302d1SIlya Maximets mutex_unlock(&net->xdp.lock); 1043455302d1SIlya Maximets break; 1044455302d1SIlya Maximets } 1045455302d1SIlya Maximets return NOTIFY_DONE; 1046455302d1SIlya Maximets } 1047455302d1SIlya Maximets 1048c0c77d8fSBjörn Töpel static struct proto xsk_proto = { 1049c0c77d8fSBjörn Töpel .name = "XDP", 1050c0c77d8fSBjörn Töpel .owner = THIS_MODULE, 1051c0c77d8fSBjörn Töpel .obj_size = sizeof(struct xdp_sock), 1052c0c77d8fSBjörn Töpel }; 1053c0c77d8fSBjörn Töpel 1054c0c77d8fSBjörn Töpel static const struct proto_ops xsk_proto_ops = { 1055c0c77d8fSBjörn Töpel .family = PF_XDP, 1056c0c77d8fSBjörn Töpel .owner = THIS_MODULE, 1057c0c77d8fSBjörn Töpel .release = xsk_release, 1058965a9909SMagnus Karlsson .bind = xsk_bind, 1059c0c77d8fSBjörn Töpel .connect = sock_no_connect, 1060c0c77d8fSBjörn Töpel .socketpair = sock_no_socketpair, 1061c0c77d8fSBjörn Töpel .accept = sock_no_accept, 1062c0c77d8fSBjörn Töpel .getname = sock_no_getname, 1063a11e1d43SLinus Torvalds .poll = xsk_poll, 1064c0c77d8fSBjörn Töpel .ioctl = sock_no_ioctl, 1065c0c77d8fSBjörn Töpel .listen = sock_no_listen, 1066c0c77d8fSBjörn Töpel .shutdown = sock_no_shutdown, 1067c0c77d8fSBjörn Töpel .setsockopt = xsk_setsockopt, 1068af75d9e0SMagnus Karlsson .getsockopt = xsk_getsockopt, 106935fcde7fSMagnus Karlsson .sendmsg = xsk_sendmsg, 1070c0c77d8fSBjörn Töpel .recvmsg = sock_no_recvmsg, 1071423f3832SMagnus Karlsson .mmap = xsk_mmap, 1072c0c77d8fSBjörn Töpel .sendpage = sock_no_sendpage, 1073c0c77d8fSBjörn Töpel }; 1074c0c77d8fSBjörn Töpel 107511fe9262SBjörn Töpel static void xsk_destruct(struct sock *sk) 107611fe9262SBjörn Töpel { 107711fe9262SBjörn Töpel struct xdp_sock *xs = xdp_sk(sk); 107811fe9262SBjörn Töpel 107911fe9262SBjörn Töpel if (!sock_flag(sk, SOCK_DEAD)) 108011fe9262SBjörn Töpel return; 108111fe9262SBjörn Töpel 108211fe9262SBjörn Töpel xdp_put_umem(xs->umem); 108311fe9262SBjörn Töpel 108411fe9262SBjörn Töpel sk_refcnt_debug_dec(sk); 108511fe9262SBjörn Töpel } 108611fe9262SBjörn Töpel 1087c0c77d8fSBjörn Töpel static int xsk_create(struct net *net, struct socket *sock, int protocol, 1088c0c77d8fSBjörn Töpel int kern) 1089c0c77d8fSBjörn Töpel { 1090c0c77d8fSBjörn Töpel struct sock *sk; 1091c0c77d8fSBjörn Töpel struct xdp_sock *xs; 1092c0c77d8fSBjörn Töpel 1093c0c77d8fSBjörn Töpel if (!ns_capable(net->user_ns, CAP_NET_RAW)) 1094c0c77d8fSBjörn Töpel return -EPERM; 1095c0c77d8fSBjörn Töpel if (sock->type != SOCK_RAW) 1096c0c77d8fSBjörn Töpel return -ESOCKTNOSUPPORT; 1097c0c77d8fSBjörn Töpel 1098c0c77d8fSBjörn Töpel if (protocol) 1099c0c77d8fSBjörn Töpel return -EPROTONOSUPPORT; 1100c0c77d8fSBjörn Töpel 1101c0c77d8fSBjörn Töpel sock->state = SS_UNCONNECTED; 1102c0c77d8fSBjörn Töpel 1103c0c77d8fSBjörn Töpel sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern); 1104c0c77d8fSBjörn Töpel if (!sk) 1105c0c77d8fSBjörn Töpel return -ENOBUFS; 1106c0c77d8fSBjörn Töpel 1107c0c77d8fSBjörn Töpel sock->ops = &xsk_proto_ops; 1108c0c77d8fSBjörn Töpel 1109c0c77d8fSBjörn Töpel sock_init_data(sock, sk); 1110c0c77d8fSBjörn Töpel 1111c0c77d8fSBjörn Töpel sk->sk_family = PF_XDP; 1112c0c77d8fSBjörn Töpel 111311fe9262SBjörn Töpel sk->sk_destruct = xsk_destruct; 111411fe9262SBjörn Töpel sk_refcnt_debug_inc(sk); 111511fe9262SBjörn Töpel 1116cee27167SBjörn Töpel sock_set_flag(sk, SOCK_RCU_FREE); 1117cee27167SBjörn Töpel 1118c0c77d8fSBjörn Töpel xs = xdp_sk(sk); 1119455302d1SIlya Maximets xs->state = XSK_READY; 1120c0c77d8fSBjörn Töpel mutex_init(&xs->mutex); 1121bf0bdd13SIlya Maximets spin_lock_init(&xs->rx_lock); 1122a9744f7cSMagnus Karlsson spin_lock_init(&xs->tx_completion_lock); 1123c0c77d8fSBjörn Töpel 11240402acd6SBjörn Töpel INIT_LIST_HEAD(&xs->map_list); 11250402acd6SBjörn Töpel spin_lock_init(&xs->map_list_lock); 11260402acd6SBjörn Töpel 11271d0dc069SBjörn Töpel mutex_lock(&net->xdp.lock); 11281d0dc069SBjörn Töpel sk_add_node_rcu(sk, &net->xdp.list); 11291d0dc069SBjörn Töpel mutex_unlock(&net->xdp.lock); 11301d0dc069SBjörn Töpel 1131c0c77d8fSBjörn Töpel local_bh_disable(); 1132c0c77d8fSBjörn Töpel sock_prot_inuse_add(net, &xsk_proto, 1); 1133c0c77d8fSBjörn Töpel local_bh_enable(); 1134c0c77d8fSBjörn Töpel 1135c0c77d8fSBjörn Töpel return 0; 1136c0c77d8fSBjörn Töpel } 1137c0c77d8fSBjörn Töpel 1138c0c77d8fSBjörn Töpel static const struct net_proto_family xsk_family_ops = { 1139c0c77d8fSBjörn Töpel .family = PF_XDP, 1140c0c77d8fSBjörn Töpel .create = xsk_create, 1141c0c77d8fSBjörn Töpel .owner = THIS_MODULE, 1142c0c77d8fSBjörn Töpel }; 1143c0c77d8fSBjörn Töpel 1144455302d1SIlya Maximets static struct notifier_block xsk_netdev_notifier = { 1145455302d1SIlya Maximets .notifier_call = xsk_notifier, 1146455302d1SIlya Maximets }; 1147455302d1SIlya Maximets 11481d0dc069SBjörn Töpel static int __net_init xsk_net_init(struct net *net) 11491d0dc069SBjörn Töpel { 11501d0dc069SBjörn Töpel mutex_init(&net->xdp.lock); 11511d0dc069SBjörn Töpel INIT_HLIST_HEAD(&net->xdp.list); 11521d0dc069SBjörn Töpel return 0; 11531d0dc069SBjörn Töpel } 11541d0dc069SBjörn Töpel 11551d0dc069SBjörn Töpel static void __net_exit xsk_net_exit(struct net *net) 11561d0dc069SBjörn Töpel { 11571d0dc069SBjörn Töpel WARN_ON_ONCE(!hlist_empty(&net->xdp.list)); 11581d0dc069SBjörn Töpel } 11591d0dc069SBjörn Töpel 11601d0dc069SBjörn Töpel static struct pernet_operations xsk_net_ops = { 11611d0dc069SBjörn Töpel .init = xsk_net_init, 11621d0dc069SBjörn Töpel .exit = xsk_net_exit, 11631d0dc069SBjörn Töpel }; 11641d0dc069SBjörn Töpel 1165c0c77d8fSBjörn Töpel static int __init xsk_init(void) 1166c0c77d8fSBjörn Töpel { 1167e312b9e7SBjörn Töpel int err, cpu; 1168c0c77d8fSBjörn Töpel 1169c0c77d8fSBjörn Töpel err = proto_register(&xsk_proto, 0 /* no slab */); 1170c0c77d8fSBjörn Töpel if (err) 1171c0c77d8fSBjörn Töpel goto out; 1172c0c77d8fSBjörn Töpel 1173c0c77d8fSBjörn Töpel err = sock_register(&xsk_family_ops); 1174c0c77d8fSBjörn Töpel if (err) 1175c0c77d8fSBjörn Töpel goto out_proto; 1176c0c77d8fSBjörn Töpel 11771d0dc069SBjörn Töpel err = register_pernet_subsys(&xsk_net_ops); 11781d0dc069SBjörn Töpel if (err) 11791d0dc069SBjörn Töpel goto out_sk; 1180455302d1SIlya Maximets 1181455302d1SIlya Maximets err = register_netdevice_notifier(&xsk_netdev_notifier); 1182455302d1SIlya Maximets if (err) 1183455302d1SIlya Maximets goto out_pernet; 1184455302d1SIlya Maximets 1185e312b9e7SBjörn Töpel for_each_possible_cpu(cpu) 1186e312b9e7SBjörn Töpel INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu)); 1187c0c77d8fSBjörn Töpel return 0; 1188c0c77d8fSBjörn Töpel 1189455302d1SIlya Maximets out_pernet: 1190455302d1SIlya Maximets unregister_pernet_subsys(&xsk_net_ops); 11911d0dc069SBjörn Töpel out_sk: 11921d0dc069SBjörn Töpel sock_unregister(PF_XDP); 1193c0c77d8fSBjörn Töpel out_proto: 1194c0c77d8fSBjörn Töpel proto_unregister(&xsk_proto); 1195c0c77d8fSBjörn Töpel out: 1196c0c77d8fSBjörn Töpel return err; 1197c0c77d8fSBjörn Töpel } 1198c0c77d8fSBjörn Töpel 1199c0c77d8fSBjörn Töpel fs_initcall(xsk_init); 1200