xref: /linux/io_uring/napi.c (revision 415ce0ea)
18d0c12a8SStefan Roesch // SPDX-License-Identifier: GPL-2.0
28d0c12a8SStefan Roesch 
38d0c12a8SStefan Roesch #include "io_uring.h"
48d0c12a8SStefan Roesch #include "napi.h"
58d0c12a8SStefan Roesch 
68d0c12a8SStefan Roesch #ifdef CONFIG_NET_RX_BUSY_POLL
78d0c12a8SStefan Roesch 
88d0c12a8SStefan Roesch /* Timeout for cleanout of stale entries. */
98d0c12a8SStefan Roesch #define NAPI_TIMEOUT		(60 * SEC_CONVERSION)
108d0c12a8SStefan Roesch 
118d0c12a8SStefan Roesch struct io_napi_entry {
128d0c12a8SStefan Roesch 	unsigned int		napi_id;
138d0c12a8SStefan Roesch 	struct list_head	list;
148d0c12a8SStefan Roesch 
158d0c12a8SStefan Roesch 	unsigned long		timeout;
168d0c12a8SStefan Roesch 	struct hlist_node	node;
178d0c12a8SStefan Roesch 
188d0c12a8SStefan Roesch 	struct rcu_head		rcu;
198d0c12a8SStefan Roesch };
208d0c12a8SStefan Roesch 
io_napi_hash_find(struct hlist_head * hash_list,unsigned int napi_id)218d0c12a8SStefan Roesch static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
228d0c12a8SStefan Roesch 					       unsigned int napi_id)
238d0c12a8SStefan Roesch {
248d0c12a8SStefan Roesch 	struct io_napi_entry *e;
258d0c12a8SStefan Roesch 
268d0c12a8SStefan Roesch 	hlist_for_each_entry_rcu(e, hash_list, node) {
278d0c12a8SStefan Roesch 		if (e->napi_id != napi_id)
288d0c12a8SStefan Roesch 			continue;
298d0c12a8SStefan Roesch 		e->timeout = jiffies + NAPI_TIMEOUT;
308d0c12a8SStefan Roesch 		return e;
318d0c12a8SStefan Roesch 	}
328d0c12a8SStefan Roesch 
338d0c12a8SStefan Roesch 	return NULL;
348d0c12a8SStefan Roesch }
358d0c12a8SStefan Roesch 
__io_napi_add(struct io_ring_ctx * ctx,struct socket * sock)368d0c12a8SStefan Roesch void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
378d0c12a8SStefan Roesch {
388d0c12a8SStefan Roesch 	struct hlist_head *hash_list;
398d0c12a8SStefan Roesch 	unsigned int napi_id;
408d0c12a8SStefan Roesch 	struct sock *sk;
418d0c12a8SStefan Roesch 	struct io_napi_entry *e;
428d0c12a8SStefan Roesch 
438d0c12a8SStefan Roesch 	sk = sock->sk;
448d0c12a8SStefan Roesch 	if (!sk)
458d0c12a8SStefan Roesch 		return;
468d0c12a8SStefan Roesch 
478d0c12a8SStefan Roesch 	napi_id = READ_ONCE(sk->sk_napi_id);
488d0c12a8SStefan Roesch 
498d0c12a8SStefan Roesch 	/* Non-NAPI IDs can be rejected. */
508d0c12a8SStefan Roesch 	if (napi_id < MIN_NAPI_ID)
518d0c12a8SStefan Roesch 		return;
528d0c12a8SStefan Roesch 
538d0c12a8SStefan Roesch 	hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
548d0c12a8SStefan Roesch 
558d0c12a8SStefan Roesch 	rcu_read_lock();
568d0c12a8SStefan Roesch 	e = io_napi_hash_find(hash_list, napi_id);
578d0c12a8SStefan Roesch 	if (e) {
588d0c12a8SStefan Roesch 		e->timeout = jiffies + NAPI_TIMEOUT;
598d0c12a8SStefan Roesch 		rcu_read_unlock();
608d0c12a8SStefan Roesch 		return;
618d0c12a8SStefan Roesch 	}
628d0c12a8SStefan Roesch 	rcu_read_unlock();
638d0c12a8SStefan Roesch 
648d0c12a8SStefan Roesch 	e = kmalloc(sizeof(*e), GFP_NOWAIT);
658d0c12a8SStefan Roesch 	if (!e)
668d0c12a8SStefan Roesch 		return;
678d0c12a8SStefan Roesch 
688d0c12a8SStefan Roesch 	e->napi_id = napi_id;
698d0c12a8SStefan Roesch 	e->timeout = jiffies + NAPI_TIMEOUT;
708d0c12a8SStefan Roesch 
718d0c12a8SStefan Roesch 	spin_lock(&ctx->napi_lock);
728d0c12a8SStefan Roesch 	if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
738d0c12a8SStefan Roesch 		spin_unlock(&ctx->napi_lock);
748d0c12a8SStefan Roesch 		kfree(e);
758d0c12a8SStefan Roesch 		return;
768d0c12a8SStefan Roesch 	}
778d0c12a8SStefan Roesch 
788d0c12a8SStefan Roesch 	hlist_add_tail_rcu(&e->node, hash_list);
798d0c12a8SStefan Roesch 	list_add_tail(&e->list, &ctx->napi_list);
808d0c12a8SStefan Roesch 	spin_unlock(&ctx->napi_lock);
818d0c12a8SStefan Roesch }
828d0c12a8SStefan Roesch 
__io_napi_remove_stale(struct io_ring_ctx * ctx)838d0c12a8SStefan Roesch static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
848d0c12a8SStefan Roesch {
858d0c12a8SStefan Roesch 	struct io_napi_entry *e;
868d0c12a8SStefan Roesch 	unsigned int i;
878d0c12a8SStefan Roesch 
888d0c12a8SStefan Roesch 	spin_lock(&ctx->napi_lock);
898d0c12a8SStefan Roesch 	hash_for_each(ctx->napi_ht, i, e, node) {
908d0c12a8SStefan Roesch 		if (time_after(jiffies, e->timeout)) {
918d0c12a8SStefan Roesch 			list_del(&e->list);
928d0c12a8SStefan Roesch 			hash_del_rcu(&e->node);
938d0c12a8SStefan Roesch 			kfree_rcu(e, rcu);
948d0c12a8SStefan Roesch 		}
958d0c12a8SStefan Roesch 	}
968d0c12a8SStefan Roesch 	spin_unlock(&ctx->napi_lock);
978d0c12a8SStefan Roesch }
988d0c12a8SStefan Roesch 
io_napi_remove_stale(struct io_ring_ctx * ctx,bool is_stale)998d0c12a8SStefan Roesch static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
1008d0c12a8SStefan Roesch {
1018d0c12a8SStefan Roesch 	if (is_stale)
1028d0c12a8SStefan Roesch 		__io_napi_remove_stale(ctx);
1038d0c12a8SStefan Roesch }
1048d0c12a8SStefan Roesch 
io_napi_busy_loop_timeout(unsigned long start_time,unsigned long bp_usec)1058d0c12a8SStefan Roesch static inline bool io_napi_busy_loop_timeout(unsigned long start_time,
1068d0c12a8SStefan Roesch 					     unsigned long bp_usec)
1078d0c12a8SStefan Roesch {
1088d0c12a8SStefan Roesch 	if (bp_usec) {
1098d0c12a8SStefan Roesch 		unsigned long end_time = start_time + bp_usec;
1108d0c12a8SStefan Roesch 		unsigned long now = busy_loop_current_time();
1118d0c12a8SStefan Roesch 
1128d0c12a8SStefan Roesch 		return time_after(now, end_time);
1138d0c12a8SStefan Roesch 	}
1148d0c12a8SStefan Roesch 
1158d0c12a8SStefan Roesch 	return true;
1168d0c12a8SStefan Roesch }
1178d0c12a8SStefan Roesch 
io_napi_busy_loop_should_end(void * data,unsigned long start_time)1188d0c12a8SStefan Roesch static bool io_napi_busy_loop_should_end(void *data,
1198d0c12a8SStefan Roesch 					 unsigned long start_time)
1208d0c12a8SStefan Roesch {
1218d0c12a8SStefan Roesch 	struct io_wait_queue *iowq = data;
1228d0c12a8SStefan Roesch 
1238d0c12a8SStefan Roesch 	if (signal_pending(current))
1248d0c12a8SStefan Roesch 		return true;
125428f1382SJens Axboe 	if (io_should_wake(iowq) || io_has_work(iowq->ctx))
1268d0c12a8SStefan Roesch 		return true;
1278d0c12a8SStefan Roesch 	if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to))
1288d0c12a8SStefan Roesch 		return true;
1298d0c12a8SStefan Roesch 
1308d0c12a8SStefan Roesch 	return false;
1318d0c12a8SStefan Roesch }
1328d0c12a8SStefan Roesch 
__io_napi_do_busy_loop(struct io_ring_ctx * ctx,void * loop_end_arg)1338d0c12a8SStefan Roesch static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
1348d0c12a8SStefan Roesch 				   void *loop_end_arg)
1358d0c12a8SStefan Roesch {
1368d0c12a8SStefan Roesch 	struct io_napi_entry *e;
1378d0c12a8SStefan Roesch 	bool (*loop_end)(void *, unsigned long) = NULL;
1388d0c12a8SStefan Roesch 	bool is_stale = false;
1398d0c12a8SStefan Roesch 
1408d0c12a8SStefan Roesch 	if (loop_end_arg)
1418d0c12a8SStefan Roesch 		loop_end = io_napi_busy_loop_should_end;
1428d0c12a8SStefan Roesch 
1438d0c12a8SStefan Roesch 	list_for_each_entry_rcu(e, &ctx->napi_list, list) {
1448d0c12a8SStefan Roesch 		napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
1458d0c12a8SStefan Roesch 				   ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
1468d0c12a8SStefan Roesch 
1478d0c12a8SStefan Roesch 		if (time_after(jiffies, e->timeout))
1488d0c12a8SStefan Roesch 			is_stale = true;
1498d0c12a8SStefan Roesch 	}
1508d0c12a8SStefan Roesch 
1518d0c12a8SStefan Roesch 	return is_stale;
1528d0c12a8SStefan Roesch }
1538d0c12a8SStefan Roesch 
io_napi_blocking_busy_loop(struct io_ring_ctx * ctx,struct io_wait_queue * iowq)1548d0c12a8SStefan Roesch static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
1558d0c12a8SStefan Roesch 				       struct io_wait_queue *iowq)
1568d0c12a8SStefan Roesch {
1578d0c12a8SStefan Roesch 	unsigned long start_time = busy_loop_current_time();
1588d0c12a8SStefan Roesch 	void *loop_end_arg = NULL;
1598d0c12a8SStefan Roesch 	bool is_stale = false;
1608d0c12a8SStefan Roesch 
1618d0c12a8SStefan Roesch 	/* Singular lists use a different napi loop end check function and are
1628d0c12a8SStefan Roesch 	 * only executed once.
1638d0c12a8SStefan Roesch 	 */
1648d0c12a8SStefan Roesch 	if (list_is_singular(&ctx->napi_list))
1658d0c12a8SStefan Roesch 		loop_end_arg = iowq;
1668d0c12a8SStefan Roesch 
1678d0c12a8SStefan Roesch 	rcu_read_lock();
1688d0c12a8SStefan Roesch 	do {
1698d0c12a8SStefan Roesch 		is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg);
1708d0c12a8SStefan Roesch 	} while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg);
1718d0c12a8SStefan Roesch 	rcu_read_unlock();
1728d0c12a8SStefan Roesch 
1738d0c12a8SStefan Roesch 	io_napi_remove_stale(ctx, is_stale);
1748d0c12a8SStefan Roesch }
1758d0c12a8SStefan Roesch 
1768d0c12a8SStefan Roesch /*
1778d0c12a8SStefan Roesch  * io_napi_init() - Init napi settings
1788d0c12a8SStefan Roesch  * @ctx: pointer to io-uring context structure
1798d0c12a8SStefan Roesch  *
1808d0c12a8SStefan Roesch  * Init napi settings in the io-uring context.
1818d0c12a8SStefan Roesch  */
io_napi_init(struct io_ring_ctx * ctx)1828d0c12a8SStefan Roesch void io_napi_init(struct io_ring_ctx *ctx)
1838d0c12a8SStefan Roesch {
1848d0c12a8SStefan Roesch 	INIT_LIST_HEAD(&ctx->napi_list);
1858d0c12a8SStefan Roesch 	spin_lock_init(&ctx->napi_lock);
1868d0c12a8SStefan Roesch 	ctx->napi_prefer_busy_poll = false;
1878d0c12a8SStefan Roesch 	ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
1888d0c12a8SStefan Roesch }
1898d0c12a8SStefan Roesch 
1908d0c12a8SStefan Roesch /*
1918d0c12a8SStefan Roesch  * io_napi_free() - Deallocate napi
1928d0c12a8SStefan Roesch  * @ctx: pointer to io-uring context structure
1938d0c12a8SStefan Roesch  *
1948d0c12a8SStefan Roesch  * Free the napi list and the hash table in the io-uring context.
1958d0c12a8SStefan Roesch  */
io_napi_free(struct io_ring_ctx * ctx)1968d0c12a8SStefan Roesch void io_napi_free(struct io_ring_ctx *ctx)
1978d0c12a8SStefan Roesch {
1988d0c12a8SStefan Roesch 	struct io_napi_entry *e;
1998d0c12a8SStefan Roesch 	LIST_HEAD(napi_list);
2008d0c12a8SStefan Roesch 	unsigned int i;
2018d0c12a8SStefan Roesch 
2028d0c12a8SStefan Roesch 	spin_lock(&ctx->napi_lock);
2038d0c12a8SStefan Roesch 	hash_for_each(ctx->napi_ht, i, e, node) {
2048d0c12a8SStefan Roesch 		hash_del_rcu(&e->node);
2058d0c12a8SStefan Roesch 		kfree_rcu(e, rcu);
2068d0c12a8SStefan Roesch 	}
2078d0c12a8SStefan Roesch 	spin_unlock(&ctx->napi_lock);
2088d0c12a8SStefan Roesch }
2098d0c12a8SStefan Roesch 
2108d0c12a8SStefan Roesch /*
211ef1186c1SStefan Roesch  * io_napi_register() - Register napi with io-uring
212ef1186c1SStefan Roesch  * @ctx: pointer to io-uring context structure
213ef1186c1SStefan Roesch  * @arg: pointer to io_uring_napi structure
214ef1186c1SStefan Roesch  *
215ef1186c1SStefan Roesch  * Register napi in the io-uring context.
216ef1186c1SStefan Roesch  */
io_register_napi(struct io_ring_ctx * ctx,void __user * arg)217ef1186c1SStefan Roesch int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
218ef1186c1SStefan Roesch {
219ef1186c1SStefan Roesch 	const struct io_uring_napi curr = {
220ef1186c1SStefan Roesch 		.busy_poll_to 	  = ctx->napi_busy_poll_to,
221ef1186c1SStefan Roesch 		.prefer_busy_poll = ctx->napi_prefer_busy_poll
222ef1186c1SStefan Roesch 	};
223ef1186c1SStefan Roesch 	struct io_uring_napi napi;
224ef1186c1SStefan Roesch 
225ef1186c1SStefan Roesch 	if (copy_from_user(&napi, arg, sizeof(napi)))
226ef1186c1SStefan Roesch 		return -EFAULT;
227ef1186c1SStefan Roesch 	if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv)
228ef1186c1SStefan Roesch 		return -EINVAL;
229ef1186c1SStefan Roesch 
230ef1186c1SStefan Roesch 	if (copy_to_user(arg, &curr, sizeof(curr)))
231ef1186c1SStefan Roesch 		return -EFAULT;
232ef1186c1SStefan Roesch 
233b4ccc4ddSJens Axboe 	WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to);
234b4ccc4ddSJens Axboe 	WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
235b4ccc4ddSJens Axboe 	WRITE_ONCE(ctx->napi_enabled, true);
236ef1186c1SStefan Roesch 	return 0;
237ef1186c1SStefan Roesch }
238ef1186c1SStefan Roesch 
239ef1186c1SStefan Roesch /*
240ef1186c1SStefan Roesch  * io_napi_unregister() - Unregister napi with io-uring
241ef1186c1SStefan Roesch  * @ctx: pointer to io-uring context structure
242ef1186c1SStefan Roesch  * @arg: pointer to io_uring_napi structure
243ef1186c1SStefan Roesch  *
244ef1186c1SStefan Roesch  * Unregister napi. If arg has been specified copy the busy poll timeout and
245ef1186c1SStefan Roesch  * prefer busy poll setting to the passed in structure.
246ef1186c1SStefan Roesch  */
io_unregister_napi(struct io_ring_ctx * ctx,void __user * arg)247ef1186c1SStefan Roesch int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
248ef1186c1SStefan Roesch {
249ef1186c1SStefan Roesch 	const struct io_uring_napi curr = {
250ef1186c1SStefan Roesch 		.busy_poll_to 	  = ctx->napi_busy_poll_to,
251ef1186c1SStefan Roesch 		.prefer_busy_poll = ctx->napi_prefer_busy_poll
252ef1186c1SStefan Roesch 	};
253ef1186c1SStefan Roesch 
254ef1186c1SStefan Roesch 	if (arg && copy_to_user(arg, &curr, sizeof(curr)))
255ef1186c1SStefan Roesch 		return -EFAULT;
256ef1186c1SStefan Roesch 
257ef1186c1SStefan Roesch 	WRITE_ONCE(ctx->napi_busy_poll_to, 0);
258ef1186c1SStefan Roesch 	WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
259b4ccc4ddSJens Axboe 	WRITE_ONCE(ctx->napi_enabled, false);
260ef1186c1SStefan Roesch 	return 0;
261ef1186c1SStefan Roesch }
262ef1186c1SStefan Roesch 
263ef1186c1SStefan Roesch /*
264*415ce0eaSJens Axboe  * __io_napi_adjust_timeout() - adjust busy loop timeout
2658d0c12a8SStefan Roesch  * @ctx: pointer to io-uring context structure
2668d0c12a8SStefan Roesch  * @iowq: pointer to io wait queue
2678d0c12a8SStefan Roesch  * @ts: pointer to timespec or NULL
2688d0c12a8SStefan Roesch  *
2698d0c12a8SStefan Roesch  * Adjust the busy loop timeout according to timespec and busy poll timeout.
270*415ce0eaSJens Axboe  * If the specified NAPI timeout is bigger than the wait timeout, then adjust
271*415ce0eaSJens Axboe  * the NAPI timeout accordingly.
2728d0c12a8SStefan Roesch  */
__io_napi_adjust_timeout(struct io_ring_ctx * ctx,struct io_wait_queue * iowq,struct timespec64 * ts)2738d0c12a8SStefan Roesch void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
2748d0c12a8SStefan Roesch 			      struct timespec64 *ts)
2758d0c12a8SStefan Roesch {
2768d0c12a8SStefan Roesch 	unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to);
2778d0c12a8SStefan Roesch 
2788d0c12a8SStefan Roesch 	if (ts) {
279*415ce0eaSJens Axboe 		struct timespec64 poll_to_ts;
2808d0c12a8SStefan Roesch 
281*415ce0eaSJens Axboe 		poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
282*415ce0eaSJens Axboe 		if (timespec64_compare(ts, &poll_to_ts) < 0) {
283*415ce0eaSJens Axboe 			s64 poll_to_ns = timespec64_to_ns(ts);
284*415ce0eaSJens Axboe 			if (poll_to_ns > 0) {
285*415ce0eaSJens Axboe 				u64 val = poll_to_ns + 999;
286*415ce0eaSJens Axboe 				do_div(val, (s64) 1000);
287*415ce0eaSJens Axboe 				poll_to = val;
288*415ce0eaSJens Axboe 			}
2898d0c12a8SStefan Roesch 		}
2908d0c12a8SStefan Roesch 	}
2918d0c12a8SStefan Roesch 
2928d0c12a8SStefan Roesch 	iowq->napi_busy_poll_to = poll_to;
2938d0c12a8SStefan Roesch }
2948d0c12a8SStefan Roesch 
2958d0c12a8SStefan Roesch /*
2968d0c12a8SStefan Roesch  * __io_napi_busy_loop() - execute busy poll loop
2978d0c12a8SStefan Roesch  * @ctx: pointer to io-uring context structure
2988d0c12a8SStefan Roesch  * @iowq: pointer to io wait queue
2998d0c12a8SStefan Roesch  *
3008d0c12a8SStefan Roesch  * Execute the busy poll loop and merge the spliced off list.
3018d0c12a8SStefan Roesch  */
__io_napi_busy_loop(struct io_ring_ctx * ctx,struct io_wait_queue * iowq)3028d0c12a8SStefan Roesch void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
3038d0c12a8SStefan Roesch {
3048d0c12a8SStefan Roesch 	iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
3058d0c12a8SStefan Roesch 
306b4ccc4ddSJens Axboe 	if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled)
3078d0c12a8SStefan Roesch 		io_napi_blocking_busy_loop(ctx, iowq);
3088d0c12a8SStefan Roesch }
3098d0c12a8SStefan Roesch 
310ff183d42SStefan Roesch /*
311ff183d42SStefan Roesch  * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll
312ff183d42SStefan Roesch  * @ctx: pointer to io-uring context structure
313ff183d42SStefan Roesch  *
314ff183d42SStefan Roesch  * Splice of the napi list and execute the napi busy poll loop.
315ff183d42SStefan Roesch  */
io_napi_sqpoll_busy_poll(struct io_ring_ctx * ctx)316ff183d42SStefan Roesch int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
317ff183d42SStefan Roesch {
318ff183d42SStefan Roesch 	LIST_HEAD(napi_list);
319ff183d42SStefan Roesch 	bool is_stale = false;
320ff183d42SStefan Roesch 
321ff183d42SStefan Roesch 	if (!READ_ONCE(ctx->napi_busy_poll_to))
322ff183d42SStefan Roesch 		return 0;
323ff183d42SStefan Roesch 	if (list_empty_careful(&ctx->napi_list))
324ff183d42SStefan Roesch 		return 0;
325ff183d42SStefan Roesch 
326ff183d42SStefan Roesch 	rcu_read_lock();
327ff183d42SStefan Roesch 	is_stale = __io_napi_do_busy_loop(ctx, NULL);
328ff183d42SStefan Roesch 	rcu_read_unlock();
329ff183d42SStefan Roesch 
330ff183d42SStefan Roesch 	io_napi_remove_stale(ctx, is_stale);
331ff183d42SStefan Roesch 	return 1;
332ff183d42SStefan Roesch }
333ff183d42SStefan Roesch 
3348d0c12a8SStefan Roesch #endif
335