xref: /linux/include/linux/ptr_ring.h (revision f86fd32d)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  *	Definitions for the 'struct ptr_ring' datastructure.
4  *
5  *	Author:
6  *		Michael S. Tsirkin <mst@redhat.com>
7  *
8  *	Copyright (C) 2016 Red Hat, Inc.
9  *
10  *	This is a limited-size FIFO maintaining pointers in FIFO order, with
11  *	one CPU producing entries and another consuming entries from a FIFO.
12  *
13  *	This implementation tries to minimize cache-contention when there is a
14  *	single producer and a single consumer CPU.
15  */
16 
17 #ifndef _LINUX_PTR_RING_H
18 #define _LINUX_PTR_RING_H 1
19 
20 #ifdef __KERNEL__
21 #include <linux/spinlock.h>
22 #include <linux/cache.h>
23 #include <linux/types.h>
24 #include <linux/compiler.h>
25 #include <linux/slab.h>
26 #include <linux/mm.h>
27 #include <asm/errno.h>
28 #endif
29 
30 struct ptr_ring {
31 	int producer ____cacheline_aligned_in_smp;
32 	spinlock_t producer_lock;
33 	int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
34 	int consumer_tail; /* next entry to invalidate */
35 	spinlock_t consumer_lock;
36 	/* Shared consumer/producer data */
37 	/* Read-only by both the producer and the consumer */
38 	int size ____cacheline_aligned_in_smp; /* max entries in queue */
39 	int batch; /* number of entries to consume in a batch */
40 	void **queue;
41 };
42 
43 /* Note: callers invoking this in a loop must use a compiler barrier,
44  * for example cpu_relax().
45  *
46  * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
47  * see e.g. ptr_ring_full.
48  */
49 static inline bool __ptr_ring_full(struct ptr_ring *r)
50 {
51 	return r->queue[r->producer];
52 }
53 
54 static inline bool ptr_ring_full(struct ptr_ring *r)
55 {
56 	bool ret;
57 
58 	spin_lock(&r->producer_lock);
59 	ret = __ptr_ring_full(r);
60 	spin_unlock(&r->producer_lock);
61 
62 	return ret;
63 }
64 
65 static inline bool ptr_ring_full_irq(struct ptr_ring *r)
66 {
67 	bool ret;
68 
69 	spin_lock_irq(&r->producer_lock);
70 	ret = __ptr_ring_full(r);
71 	spin_unlock_irq(&r->producer_lock);
72 
73 	return ret;
74 }
75 
76 static inline bool ptr_ring_full_any(struct ptr_ring *r)
77 {
78 	unsigned long flags;
79 	bool ret;
80 
81 	spin_lock_irqsave(&r->producer_lock, flags);
82 	ret = __ptr_ring_full(r);
83 	spin_unlock_irqrestore(&r->producer_lock, flags);
84 
85 	return ret;
86 }
87 
88 static inline bool ptr_ring_full_bh(struct ptr_ring *r)
89 {
90 	bool ret;
91 
92 	spin_lock_bh(&r->producer_lock);
93 	ret = __ptr_ring_full(r);
94 	spin_unlock_bh(&r->producer_lock);
95 
96 	return ret;
97 }
98 
99 /* Note: callers invoking this in a loop must use a compiler barrier,
100  * for example cpu_relax(). Callers must hold producer_lock.
101  * Callers are responsible for making sure pointer that is being queued
102  * points to a valid data.
103  */
104 static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
105 {
106 	if (unlikely(!r->size) || r->queue[r->producer])
107 		return -ENOSPC;
108 
109 	/* Make sure the pointer we are storing points to a valid data. */
110 	/* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
111 	smp_wmb();
112 
113 	WRITE_ONCE(r->queue[r->producer++], ptr);
114 	if (unlikely(r->producer >= r->size))
115 		r->producer = 0;
116 	return 0;
117 }
118 
119 /*
120  * Note: resize (below) nests producer lock within consumer lock, so if you
121  * consume in interrupt or BH context, you must disable interrupts/BH when
122  * calling this.
123  */
124 static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
125 {
126 	int ret;
127 
128 	spin_lock(&r->producer_lock);
129 	ret = __ptr_ring_produce(r, ptr);
130 	spin_unlock(&r->producer_lock);
131 
132 	return ret;
133 }
134 
135 static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
136 {
137 	int ret;
138 
139 	spin_lock_irq(&r->producer_lock);
140 	ret = __ptr_ring_produce(r, ptr);
141 	spin_unlock_irq(&r->producer_lock);
142 
143 	return ret;
144 }
145 
146 static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
147 {
148 	unsigned long flags;
149 	int ret;
150 
151 	spin_lock_irqsave(&r->producer_lock, flags);
152 	ret = __ptr_ring_produce(r, ptr);
153 	spin_unlock_irqrestore(&r->producer_lock, flags);
154 
155 	return ret;
156 }
157 
158 static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
159 {
160 	int ret;
161 
162 	spin_lock_bh(&r->producer_lock);
163 	ret = __ptr_ring_produce(r, ptr);
164 	spin_unlock_bh(&r->producer_lock);
165 
166 	return ret;
167 }
168 
169 static inline void *__ptr_ring_peek(struct ptr_ring *r)
170 {
171 	if (likely(r->size))
172 		return READ_ONCE(r->queue[r->consumer_head]);
173 	return NULL;
174 }
175 
176 /*
177  * Test ring empty status without taking any locks.
178  *
179  * NB: This is only safe to call if ring is never resized.
180  *
181  * However, if some other CPU consumes ring entries at the same time, the value
182  * returned is not guaranteed to be correct.
183  *
184  * In this case - to avoid incorrectly detecting the ring
185  * as empty - the CPU consuming the ring entries is responsible
186  * for either consuming all ring entries until the ring is empty,
187  * or synchronizing with some other CPU and causing it to
188  * re-test __ptr_ring_empty and/or consume the ring enteries
189  * after the synchronization point.
190  *
191  * Note: callers invoking this in a loop must use a compiler barrier,
192  * for example cpu_relax().
193  */
194 static inline bool __ptr_ring_empty(struct ptr_ring *r)
195 {
196 	if (likely(r->size))
197 		return !r->queue[READ_ONCE(r->consumer_head)];
198 	return true;
199 }
200 
201 static inline bool ptr_ring_empty(struct ptr_ring *r)
202 {
203 	bool ret;
204 
205 	spin_lock(&r->consumer_lock);
206 	ret = __ptr_ring_empty(r);
207 	spin_unlock(&r->consumer_lock);
208 
209 	return ret;
210 }
211 
212 static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
213 {
214 	bool ret;
215 
216 	spin_lock_irq(&r->consumer_lock);
217 	ret = __ptr_ring_empty(r);
218 	spin_unlock_irq(&r->consumer_lock);
219 
220 	return ret;
221 }
222 
223 static inline bool ptr_ring_empty_any(struct ptr_ring *r)
224 {
225 	unsigned long flags;
226 	bool ret;
227 
228 	spin_lock_irqsave(&r->consumer_lock, flags);
229 	ret = __ptr_ring_empty(r);
230 	spin_unlock_irqrestore(&r->consumer_lock, flags);
231 
232 	return ret;
233 }
234 
235 static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
236 {
237 	bool ret;
238 
239 	spin_lock_bh(&r->consumer_lock);
240 	ret = __ptr_ring_empty(r);
241 	spin_unlock_bh(&r->consumer_lock);
242 
243 	return ret;
244 }
245 
246 /* Must only be called after __ptr_ring_peek returned !NULL */
247 static inline void __ptr_ring_discard_one(struct ptr_ring *r)
248 {
249 	/* Fundamentally, what we want to do is update consumer
250 	 * index and zero out the entry so producer can reuse it.
251 	 * Doing it naively at each consume would be as simple as:
252 	 *       consumer = r->consumer;
253 	 *       r->queue[consumer++] = NULL;
254 	 *       if (unlikely(consumer >= r->size))
255 	 *               consumer = 0;
256 	 *       r->consumer = consumer;
257 	 * but that is suboptimal when the ring is full as producer is writing
258 	 * out new entries in the same cache line.  Defer these updates until a
259 	 * batch of entries has been consumed.
260 	 */
261 	/* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
262 	 * to work correctly.
263 	 */
264 	int consumer_head = r->consumer_head;
265 	int head = consumer_head++;
266 
267 	/* Once we have processed enough entries invalidate them in
268 	 * the ring all at once so producer can reuse their space in the ring.
269 	 * We also do this when we reach end of the ring - not mandatory
270 	 * but helps keep the implementation simple.
271 	 */
272 	if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
273 		     consumer_head >= r->size)) {
274 		/* Zero out entries in the reverse order: this way we touch the
275 		 * cache line that producer might currently be reading the last;
276 		 * producer won't make progress and touch other cache lines
277 		 * besides the first one until we write out all entries.
278 		 */
279 		while (likely(head >= r->consumer_tail))
280 			r->queue[head--] = NULL;
281 		r->consumer_tail = consumer_head;
282 	}
283 	if (unlikely(consumer_head >= r->size)) {
284 		consumer_head = 0;
285 		r->consumer_tail = 0;
286 	}
287 	/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
288 	WRITE_ONCE(r->consumer_head, consumer_head);
289 }
290 
291 static inline void *__ptr_ring_consume(struct ptr_ring *r)
292 {
293 	void *ptr;
294 
295 	/* The READ_ONCE in __ptr_ring_peek guarantees that anyone
296 	 * accessing data through the pointer is up to date. Pairs
297 	 * with smp_wmb in __ptr_ring_produce.
298 	 */
299 	ptr = __ptr_ring_peek(r);
300 	if (ptr)
301 		__ptr_ring_discard_one(r);
302 
303 	return ptr;
304 }
305 
306 static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
307 					     void **array, int n)
308 {
309 	void *ptr;
310 	int i;
311 
312 	for (i = 0; i < n; i++) {
313 		ptr = __ptr_ring_consume(r);
314 		if (!ptr)
315 			break;
316 		array[i] = ptr;
317 	}
318 
319 	return i;
320 }
321 
322 /*
323  * Note: resize (below) nests producer lock within consumer lock, so if you
324  * call this in interrupt or BH context, you must disable interrupts/BH when
325  * producing.
326  */
327 static inline void *ptr_ring_consume(struct ptr_ring *r)
328 {
329 	void *ptr;
330 
331 	spin_lock(&r->consumer_lock);
332 	ptr = __ptr_ring_consume(r);
333 	spin_unlock(&r->consumer_lock);
334 
335 	return ptr;
336 }
337 
338 static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
339 {
340 	void *ptr;
341 
342 	spin_lock_irq(&r->consumer_lock);
343 	ptr = __ptr_ring_consume(r);
344 	spin_unlock_irq(&r->consumer_lock);
345 
346 	return ptr;
347 }
348 
349 static inline void *ptr_ring_consume_any(struct ptr_ring *r)
350 {
351 	unsigned long flags;
352 	void *ptr;
353 
354 	spin_lock_irqsave(&r->consumer_lock, flags);
355 	ptr = __ptr_ring_consume(r);
356 	spin_unlock_irqrestore(&r->consumer_lock, flags);
357 
358 	return ptr;
359 }
360 
361 static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
362 {
363 	void *ptr;
364 
365 	spin_lock_bh(&r->consumer_lock);
366 	ptr = __ptr_ring_consume(r);
367 	spin_unlock_bh(&r->consumer_lock);
368 
369 	return ptr;
370 }
371 
372 static inline int ptr_ring_consume_batched(struct ptr_ring *r,
373 					   void **array, int n)
374 {
375 	int ret;
376 
377 	spin_lock(&r->consumer_lock);
378 	ret = __ptr_ring_consume_batched(r, array, n);
379 	spin_unlock(&r->consumer_lock);
380 
381 	return ret;
382 }
383 
384 static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
385 					       void **array, int n)
386 {
387 	int ret;
388 
389 	spin_lock_irq(&r->consumer_lock);
390 	ret = __ptr_ring_consume_batched(r, array, n);
391 	spin_unlock_irq(&r->consumer_lock);
392 
393 	return ret;
394 }
395 
396 static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
397 					       void **array, int n)
398 {
399 	unsigned long flags;
400 	int ret;
401 
402 	spin_lock_irqsave(&r->consumer_lock, flags);
403 	ret = __ptr_ring_consume_batched(r, array, n);
404 	spin_unlock_irqrestore(&r->consumer_lock, flags);
405 
406 	return ret;
407 }
408 
409 static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
410 					      void **array, int n)
411 {
412 	int ret;
413 
414 	spin_lock_bh(&r->consumer_lock);
415 	ret = __ptr_ring_consume_batched(r, array, n);
416 	spin_unlock_bh(&r->consumer_lock);
417 
418 	return ret;
419 }
420 
421 /* Cast to structure type and call a function without discarding from FIFO.
422  * Function must return a value.
423  * Callers must take consumer_lock.
424  */
425 #define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
426 
427 #define PTR_RING_PEEK_CALL(r, f) ({ \
428 	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
429 	\
430 	spin_lock(&(r)->consumer_lock); \
431 	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
432 	spin_unlock(&(r)->consumer_lock); \
433 	__PTR_RING_PEEK_CALL_v; \
434 })
435 
436 #define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
437 	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
438 	\
439 	spin_lock_irq(&(r)->consumer_lock); \
440 	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
441 	spin_unlock_irq(&(r)->consumer_lock); \
442 	__PTR_RING_PEEK_CALL_v; \
443 })
444 
445 #define PTR_RING_PEEK_CALL_BH(r, f) ({ \
446 	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
447 	\
448 	spin_lock_bh(&(r)->consumer_lock); \
449 	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
450 	spin_unlock_bh(&(r)->consumer_lock); \
451 	__PTR_RING_PEEK_CALL_v; \
452 })
453 
454 #define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
455 	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
456 	unsigned long __PTR_RING_PEEK_CALL_f;\
457 	\
458 	spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
459 	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
460 	spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
461 	__PTR_RING_PEEK_CALL_v; \
462 })
463 
464 /* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
465  * documentation for vmalloc for which of them are legal.
466  */
467 static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
468 {
469 	if (size > KMALLOC_MAX_SIZE / sizeof(void *))
470 		return NULL;
471 	return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
472 }
473 
474 static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
475 {
476 	r->size = size;
477 	r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
478 	/* We need to set batch at least to 1 to make logic
479 	 * in __ptr_ring_discard_one work correctly.
480 	 * Batching too much (because ring is small) would cause a lot of
481 	 * burstiness. Needs tuning, for now disable batching.
482 	 */
483 	if (r->batch > r->size / 2 || !r->batch)
484 		r->batch = 1;
485 }
486 
487 static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
488 {
489 	r->queue = __ptr_ring_init_queue_alloc(size, gfp);
490 	if (!r->queue)
491 		return -ENOMEM;
492 
493 	__ptr_ring_set_size(r, size);
494 	r->producer = r->consumer_head = r->consumer_tail = 0;
495 	spin_lock_init(&r->producer_lock);
496 	spin_lock_init(&r->consumer_lock);
497 
498 	return 0;
499 }
500 
501 /*
502  * Return entries into ring. Destroy entries that don't fit.
503  *
504  * Note: this is expected to be a rare slow path operation.
505  *
506  * Note: producer lock is nested within consumer lock, so if you
507  * resize you must make sure all uses nest correctly.
508  * In particular if you consume ring in interrupt or BH context, you must
509  * disable interrupts/BH when doing so.
510  */
511 static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
512 				      void (*destroy)(void *))
513 {
514 	unsigned long flags;
515 	int head;
516 
517 	spin_lock_irqsave(&r->consumer_lock, flags);
518 	spin_lock(&r->producer_lock);
519 
520 	if (!r->size)
521 		goto done;
522 
523 	/*
524 	 * Clean out buffered entries (for simplicity). This way following code
525 	 * can test entries for NULL and if not assume they are valid.
526 	 */
527 	head = r->consumer_head - 1;
528 	while (likely(head >= r->consumer_tail))
529 		r->queue[head--] = NULL;
530 	r->consumer_tail = r->consumer_head;
531 
532 	/*
533 	 * Go over entries in batch, start moving head back and copy entries.
534 	 * Stop when we run into previously unconsumed entries.
535 	 */
536 	while (n) {
537 		head = r->consumer_head - 1;
538 		if (head < 0)
539 			head = r->size - 1;
540 		if (r->queue[head]) {
541 			/* This batch entry will have to be destroyed. */
542 			goto done;
543 		}
544 		r->queue[head] = batch[--n];
545 		r->consumer_tail = head;
546 		/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
547 		WRITE_ONCE(r->consumer_head, head);
548 	}
549 
550 done:
551 	/* Destroy all entries left in the batch. */
552 	while (n)
553 		destroy(batch[--n]);
554 	spin_unlock(&r->producer_lock);
555 	spin_unlock_irqrestore(&r->consumer_lock, flags);
556 }
557 
558 static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
559 					   int size, gfp_t gfp,
560 					   void (*destroy)(void *))
561 {
562 	int producer = 0;
563 	void **old;
564 	void *ptr;
565 
566 	while ((ptr = __ptr_ring_consume(r)))
567 		if (producer < size)
568 			queue[producer++] = ptr;
569 		else if (destroy)
570 			destroy(ptr);
571 
572 	if (producer >= size)
573 		producer = 0;
574 	__ptr_ring_set_size(r, size);
575 	r->producer = producer;
576 	r->consumer_head = 0;
577 	r->consumer_tail = 0;
578 	old = r->queue;
579 	r->queue = queue;
580 
581 	return old;
582 }
583 
584 /*
585  * Note: producer lock is nested within consumer lock, so if you
586  * resize you must make sure all uses nest correctly.
587  * In particular if you consume ring in interrupt or BH context, you must
588  * disable interrupts/BH when doing so.
589  */
590 static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
591 				  void (*destroy)(void *))
592 {
593 	unsigned long flags;
594 	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
595 	void **old;
596 
597 	if (!queue)
598 		return -ENOMEM;
599 
600 	spin_lock_irqsave(&(r)->consumer_lock, flags);
601 	spin_lock(&(r)->producer_lock);
602 
603 	old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
604 
605 	spin_unlock(&(r)->producer_lock);
606 	spin_unlock_irqrestore(&(r)->consumer_lock, flags);
607 
608 	kvfree(old);
609 
610 	return 0;
611 }
612 
613 /*
614  * Note: producer lock is nested within consumer lock, so if you
615  * resize you must make sure all uses nest correctly.
616  * In particular if you consume ring in interrupt or BH context, you must
617  * disable interrupts/BH when doing so.
618  */
619 static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
620 					   unsigned int nrings,
621 					   int size,
622 					   gfp_t gfp, void (*destroy)(void *))
623 {
624 	unsigned long flags;
625 	void ***queues;
626 	int i;
627 
628 	queues = kmalloc_array(nrings, sizeof(*queues), gfp);
629 	if (!queues)
630 		goto noqueues;
631 
632 	for (i = 0; i < nrings; ++i) {
633 		queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
634 		if (!queues[i])
635 			goto nomem;
636 	}
637 
638 	for (i = 0; i < nrings; ++i) {
639 		spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
640 		spin_lock(&(rings[i])->producer_lock);
641 		queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
642 						  size, gfp, destroy);
643 		spin_unlock(&(rings[i])->producer_lock);
644 		spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
645 	}
646 
647 	for (i = 0; i < nrings; ++i)
648 		kvfree(queues[i]);
649 
650 	kfree(queues);
651 
652 	return 0;
653 
654 nomem:
655 	while (--i >= 0)
656 		kvfree(queues[i]);
657 
658 	kfree(queues);
659 
660 noqueues:
661 	return -ENOMEM;
662 }
663 
664 static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
665 {
666 	void *ptr;
667 
668 	if (destroy)
669 		while ((ptr = ptr_ring_consume(r)))
670 			destroy(ptr);
671 	kvfree(r->queue);
672 }
673 
674 #endif /* _LINUX_PTR_RING_H  */
675