xref: /linux/drivers/iommu/iova.c (revision d642ef71)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2009, Intel Corporation.
4  *
5  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
6  */
7 
8 #include <linux/iova.h>
9 #include <linux/module.h>
10 #include <linux/slab.h>
11 #include <linux/smp.h>
12 #include <linux/bitops.h>
13 #include <linux/cpu.h>
14 #include <linux/workqueue.h>
15 
16 /* The anchor node sits above the top of the usable address space */
17 #define IOVA_ANCHOR	~0UL
18 
19 #define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
20 
21 static bool iova_rcache_insert(struct iova_domain *iovad,
22 			       unsigned long pfn,
23 			       unsigned long size);
24 static unsigned long iova_rcache_get(struct iova_domain *iovad,
25 				     unsigned long size,
26 				     unsigned long limit_pfn);
27 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
28 static void free_iova_rcaches(struct iova_domain *iovad);
29 
30 unsigned long iova_rcache_range(void)
31 {
32 	return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1);
33 }
34 
35 static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
36 {
37 	struct iova_domain *iovad;
38 
39 	iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
40 
41 	free_cpu_cached_iovas(cpu, iovad);
42 	return 0;
43 }
44 
45 static void free_global_cached_iovas(struct iova_domain *iovad);
46 
47 static struct iova *to_iova(struct rb_node *node)
48 {
49 	return rb_entry(node, struct iova, node);
50 }
51 
52 void
53 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
54 	unsigned long start_pfn)
55 {
56 	/*
57 	 * IOVA granularity will normally be equal to the smallest
58 	 * supported IOMMU page size; both *must* be capable of
59 	 * representing individual CPU pages exactly.
60 	 */
61 	BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
62 
63 	spin_lock_init(&iovad->iova_rbtree_lock);
64 	iovad->rbroot = RB_ROOT;
65 	iovad->cached_node = &iovad->anchor.node;
66 	iovad->cached32_node = &iovad->anchor.node;
67 	iovad->granule = granule;
68 	iovad->start_pfn = start_pfn;
69 	iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
70 	iovad->max32_alloc_size = iovad->dma_32bit_pfn;
71 	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
72 	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
73 	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
74 }
75 EXPORT_SYMBOL_GPL(init_iova_domain);
76 
77 static struct rb_node *
78 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
79 {
80 	if (limit_pfn <= iovad->dma_32bit_pfn)
81 		return iovad->cached32_node;
82 
83 	return iovad->cached_node;
84 }
85 
86 static void
87 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
88 {
89 	if (new->pfn_hi < iovad->dma_32bit_pfn)
90 		iovad->cached32_node = &new->node;
91 	else
92 		iovad->cached_node = &new->node;
93 }
94 
95 static void
96 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
97 {
98 	struct iova *cached_iova;
99 
100 	cached_iova = to_iova(iovad->cached32_node);
101 	if (free == cached_iova ||
102 	    (free->pfn_hi < iovad->dma_32bit_pfn &&
103 	     free->pfn_lo >= cached_iova->pfn_lo))
104 		iovad->cached32_node = rb_next(&free->node);
105 
106 	if (free->pfn_lo < iovad->dma_32bit_pfn)
107 		iovad->max32_alloc_size = iovad->dma_32bit_pfn;
108 
109 	cached_iova = to_iova(iovad->cached_node);
110 	if (free->pfn_lo >= cached_iova->pfn_lo)
111 		iovad->cached_node = rb_next(&free->node);
112 }
113 
114 static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn)
115 {
116 	struct rb_node *node, *next;
117 	/*
118 	 * Ideally what we'd like to judge here is whether limit_pfn is close
119 	 * enough to the highest-allocated IOVA that starting the allocation
120 	 * walk from the anchor node will be quicker than this initial work to
121 	 * find an exact starting point (especially if that ends up being the
122 	 * anchor node anyway). This is an incredibly crude approximation which
123 	 * only really helps the most likely case, but is at least trivially easy.
124 	 */
125 	if (limit_pfn > iovad->dma_32bit_pfn)
126 		return &iovad->anchor.node;
127 
128 	node = iovad->rbroot.rb_node;
129 	while (to_iova(node)->pfn_hi < limit_pfn)
130 		node = node->rb_right;
131 
132 search_left:
133 	while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn)
134 		node = node->rb_left;
135 
136 	if (!node->rb_left)
137 		return node;
138 
139 	next = node->rb_left;
140 	while (next->rb_right) {
141 		next = next->rb_right;
142 		if (to_iova(next)->pfn_lo >= limit_pfn) {
143 			node = next;
144 			goto search_left;
145 		}
146 	}
147 
148 	return node;
149 }
150 
151 /* Insert the iova into domain rbtree by holding writer lock */
152 static void
153 iova_insert_rbtree(struct rb_root *root, struct iova *iova,
154 		   struct rb_node *start)
155 {
156 	struct rb_node **new, *parent = NULL;
157 
158 	new = (start) ? &start : &(root->rb_node);
159 	/* Figure out where to put new node */
160 	while (*new) {
161 		struct iova *this = to_iova(*new);
162 
163 		parent = *new;
164 
165 		if (iova->pfn_lo < this->pfn_lo)
166 			new = &((*new)->rb_left);
167 		else if (iova->pfn_lo > this->pfn_lo)
168 			new = &((*new)->rb_right);
169 		else {
170 			WARN_ON(1); /* this should not happen */
171 			return;
172 		}
173 	}
174 	/* Add new node and rebalance tree. */
175 	rb_link_node(&iova->node, parent, new);
176 	rb_insert_color(&iova->node, root);
177 }
178 
179 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
180 		unsigned long size, unsigned long limit_pfn,
181 			struct iova *new, bool size_aligned)
182 {
183 	struct rb_node *curr, *prev;
184 	struct iova *curr_iova;
185 	unsigned long flags;
186 	unsigned long new_pfn, retry_pfn;
187 	unsigned long align_mask = ~0UL;
188 	unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
189 
190 	if (size_aligned)
191 		align_mask <<= fls_long(size - 1);
192 
193 	/* Walk the tree backwards */
194 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
195 	if (limit_pfn <= iovad->dma_32bit_pfn &&
196 			size >= iovad->max32_alloc_size)
197 		goto iova32_full;
198 
199 	curr = __get_cached_rbnode(iovad, limit_pfn);
200 	curr_iova = to_iova(curr);
201 	retry_pfn = curr_iova->pfn_hi;
202 
203 retry:
204 	do {
205 		high_pfn = min(high_pfn, curr_iova->pfn_lo);
206 		new_pfn = (high_pfn - size) & align_mask;
207 		prev = curr;
208 		curr = rb_prev(curr);
209 		curr_iova = to_iova(curr);
210 	} while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
211 
212 	if (high_pfn < size || new_pfn < low_pfn) {
213 		if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
214 			high_pfn = limit_pfn;
215 			low_pfn = retry_pfn + 1;
216 			curr = iova_find_limit(iovad, limit_pfn);
217 			curr_iova = to_iova(curr);
218 			goto retry;
219 		}
220 		iovad->max32_alloc_size = size;
221 		goto iova32_full;
222 	}
223 
224 	/* pfn_lo will point to size aligned address if size_aligned is set */
225 	new->pfn_lo = new_pfn;
226 	new->pfn_hi = new->pfn_lo + size - 1;
227 
228 	/* If we have 'prev', it's a valid place to start the insertion. */
229 	iova_insert_rbtree(&iovad->rbroot, new, prev);
230 	__cached_rbnode_insert_update(iovad, new);
231 
232 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
233 	return 0;
234 
235 iova32_full:
236 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
237 	return -ENOMEM;
238 }
239 
240 static struct kmem_cache *iova_cache;
241 static unsigned int iova_cache_users;
242 static DEFINE_MUTEX(iova_cache_mutex);
243 
244 static struct iova *alloc_iova_mem(void)
245 {
246 	return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
247 }
248 
249 static void free_iova_mem(struct iova *iova)
250 {
251 	if (iova->pfn_lo != IOVA_ANCHOR)
252 		kmem_cache_free(iova_cache, iova);
253 }
254 
255 int iova_cache_get(void)
256 {
257 	mutex_lock(&iova_cache_mutex);
258 	if (!iova_cache_users) {
259 		int ret;
260 
261 		ret = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL,
262 					iova_cpuhp_dead);
263 		if (ret) {
264 			mutex_unlock(&iova_cache_mutex);
265 			pr_err("Couldn't register cpuhp handler\n");
266 			return ret;
267 		}
268 
269 		iova_cache = kmem_cache_create(
270 			"iommu_iova", sizeof(struct iova), 0,
271 			SLAB_HWCACHE_ALIGN, NULL);
272 		if (!iova_cache) {
273 			cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
274 			mutex_unlock(&iova_cache_mutex);
275 			pr_err("Couldn't create iova cache\n");
276 			return -ENOMEM;
277 		}
278 	}
279 
280 	iova_cache_users++;
281 	mutex_unlock(&iova_cache_mutex);
282 
283 	return 0;
284 }
285 EXPORT_SYMBOL_GPL(iova_cache_get);
286 
287 void iova_cache_put(void)
288 {
289 	mutex_lock(&iova_cache_mutex);
290 	if (WARN_ON(!iova_cache_users)) {
291 		mutex_unlock(&iova_cache_mutex);
292 		return;
293 	}
294 	iova_cache_users--;
295 	if (!iova_cache_users) {
296 		cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
297 		kmem_cache_destroy(iova_cache);
298 	}
299 	mutex_unlock(&iova_cache_mutex);
300 }
301 EXPORT_SYMBOL_GPL(iova_cache_put);
302 
303 /**
304  * alloc_iova - allocates an iova
305  * @iovad: - iova domain in question
306  * @size: - size of page frames to allocate
307  * @limit_pfn: - max limit address
308  * @size_aligned: - set if size_aligned address range is required
309  * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
310  * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
311  * flag is set then the allocated address iova->pfn_lo will be naturally
312  * aligned on roundup_power_of_two(size).
313  */
314 struct iova *
315 alloc_iova(struct iova_domain *iovad, unsigned long size,
316 	unsigned long limit_pfn,
317 	bool size_aligned)
318 {
319 	struct iova *new_iova;
320 	int ret;
321 
322 	new_iova = alloc_iova_mem();
323 	if (!new_iova)
324 		return NULL;
325 
326 	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
327 			new_iova, size_aligned);
328 
329 	if (ret) {
330 		free_iova_mem(new_iova);
331 		return NULL;
332 	}
333 
334 	return new_iova;
335 }
336 EXPORT_SYMBOL_GPL(alloc_iova);
337 
338 static struct iova *
339 private_find_iova(struct iova_domain *iovad, unsigned long pfn)
340 {
341 	struct rb_node *node = iovad->rbroot.rb_node;
342 
343 	assert_spin_locked(&iovad->iova_rbtree_lock);
344 
345 	while (node) {
346 		struct iova *iova = to_iova(node);
347 
348 		if (pfn < iova->pfn_lo)
349 			node = node->rb_left;
350 		else if (pfn > iova->pfn_hi)
351 			node = node->rb_right;
352 		else
353 			return iova;	/* pfn falls within iova's range */
354 	}
355 
356 	return NULL;
357 }
358 
359 static void remove_iova(struct iova_domain *iovad, struct iova *iova)
360 {
361 	assert_spin_locked(&iovad->iova_rbtree_lock);
362 	__cached_rbnode_delete_update(iovad, iova);
363 	rb_erase(&iova->node, &iovad->rbroot);
364 }
365 
366 /**
367  * find_iova - finds an iova for a given pfn
368  * @iovad: - iova domain in question.
369  * @pfn: - page frame number
370  * This function finds and returns an iova belonging to the
371  * given domain which matches the given pfn.
372  */
373 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
374 {
375 	unsigned long flags;
376 	struct iova *iova;
377 
378 	/* Take the lock so that no other thread is manipulating the rbtree */
379 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
380 	iova = private_find_iova(iovad, pfn);
381 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
382 	return iova;
383 }
384 EXPORT_SYMBOL_GPL(find_iova);
385 
386 /**
387  * __free_iova - frees the given iova
388  * @iovad: iova domain in question.
389  * @iova: iova in question.
390  * Frees the given iova belonging to the giving domain
391  */
392 void
393 __free_iova(struct iova_domain *iovad, struct iova *iova)
394 {
395 	unsigned long flags;
396 
397 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
398 	remove_iova(iovad, iova);
399 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
400 	free_iova_mem(iova);
401 }
402 EXPORT_SYMBOL_GPL(__free_iova);
403 
404 /**
405  * free_iova - finds and frees the iova for a given pfn
406  * @iovad: - iova domain in question.
407  * @pfn: - pfn that is allocated previously
408  * This functions finds an iova for a given pfn and then
409  * frees the iova from that domain.
410  */
411 void
412 free_iova(struct iova_domain *iovad, unsigned long pfn)
413 {
414 	unsigned long flags;
415 	struct iova *iova;
416 
417 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
418 	iova = private_find_iova(iovad, pfn);
419 	if (!iova) {
420 		spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
421 		return;
422 	}
423 	remove_iova(iovad, iova);
424 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
425 	free_iova_mem(iova);
426 }
427 EXPORT_SYMBOL_GPL(free_iova);
428 
429 /**
430  * alloc_iova_fast - allocates an iova from rcache
431  * @iovad: - iova domain in question
432  * @size: - size of page frames to allocate
433  * @limit_pfn: - max limit address
434  * @flush_rcache: - set to flush rcache on regular allocation failure
435  * This function tries to satisfy an iova allocation from the rcache,
436  * and falls back to regular allocation on failure. If regular allocation
437  * fails too and the flush_rcache flag is set then the rcache will be flushed.
438 */
439 unsigned long
440 alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
441 		unsigned long limit_pfn, bool flush_rcache)
442 {
443 	unsigned long iova_pfn;
444 	struct iova *new_iova;
445 
446 	/*
447 	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
448 	 * will come back to bite us badly, so we have to waste a bit of space
449 	 * rounding up anything cacheable to make sure that can't happen. The
450 	 * order of the unadjusted size will still match upon freeing.
451 	 */
452 	if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
453 		size = roundup_pow_of_two(size);
454 
455 	iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
456 	if (iova_pfn)
457 		return iova_pfn;
458 
459 retry:
460 	new_iova = alloc_iova(iovad, size, limit_pfn, true);
461 	if (!new_iova) {
462 		unsigned int cpu;
463 
464 		if (!flush_rcache)
465 			return 0;
466 
467 		/* Try replenishing IOVAs by flushing rcache. */
468 		flush_rcache = false;
469 		for_each_online_cpu(cpu)
470 			free_cpu_cached_iovas(cpu, iovad);
471 		free_global_cached_iovas(iovad);
472 		goto retry;
473 	}
474 
475 	return new_iova->pfn_lo;
476 }
477 EXPORT_SYMBOL_GPL(alloc_iova_fast);
478 
479 /**
480  * free_iova_fast - free iova pfn range into rcache
481  * @iovad: - iova domain in question.
482  * @pfn: - pfn that is allocated previously
483  * @size: - # of pages in range
484  * This functions frees an iova range by trying to put it into the rcache,
485  * falling back to regular iova deallocation via free_iova() if this fails.
486  */
487 void
488 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
489 {
490 	if (iova_rcache_insert(iovad, pfn, size))
491 		return;
492 
493 	free_iova(iovad, pfn);
494 }
495 EXPORT_SYMBOL_GPL(free_iova_fast);
496 
497 static void iova_domain_free_rcaches(struct iova_domain *iovad)
498 {
499 	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
500 					    &iovad->cpuhp_dead);
501 	free_iova_rcaches(iovad);
502 }
503 
504 /**
505  * put_iova_domain - destroys the iova domain
506  * @iovad: - iova domain in question.
507  * All the iova's in that domain are destroyed.
508  */
509 void put_iova_domain(struct iova_domain *iovad)
510 {
511 	struct iova *iova, *tmp;
512 
513 	if (iovad->rcaches)
514 		iova_domain_free_rcaches(iovad);
515 
516 	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
517 		free_iova_mem(iova);
518 }
519 EXPORT_SYMBOL_GPL(put_iova_domain);
520 
521 static int
522 __is_range_overlap(struct rb_node *node,
523 	unsigned long pfn_lo, unsigned long pfn_hi)
524 {
525 	struct iova *iova = to_iova(node);
526 
527 	if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
528 		return 1;
529 	return 0;
530 }
531 
532 static inline struct iova *
533 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
534 {
535 	struct iova *iova;
536 
537 	iova = alloc_iova_mem();
538 	if (iova) {
539 		iova->pfn_lo = pfn_lo;
540 		iova->pfn_hi = pfn_hi;
541 	}
542 
543 	return iova;
544 }
545 
546 static struct iova *
547 __insert_new_range(struct iova_domain *iovad,
548 	unsigned long pfn_lo, unsigned long pfn_hi)
549 {
550 	struct iova *iova;
551 
552 	iova = alloc_and_init_iova(pfn_lo, pfn_hi);
553 	if (iova)
554 		iova_insert_rbtree(&iovad->rbroot, iova, NULL);
555 
556 	return iova;
557 }
558 
559 static void
560 __adjust_overlap_range(struct iova *iova,
561 	unsigned long *pfn_lo, unsigned long *pfn_hi)
562 {
563 	if (*pfn_lo < iova->pfn_lo)
564 		iova->pfn_lo = *pfn_lo;
565 	if (*pfn_hi > iova->pfn_hi)
566 		*pfn_lo = iova->pfn_hi + 1;
567 }
568 
569 /**
570  * reserve_iova - reserves an iova in the given range
571  * @iovad: - iova domain pointer
572  * @pfn_lo: - lower page frame address
573  * @pfn_hi:- higher pfn adderss
574  * This function allocates reserves the address range from pfn_lo to pfn_hi so
575  * that this address is not dished out as part of alloc_iova.
576  */
577 struct iova *
578 reserve_iova(struct iova_domain *iovad,
579 	unsigned long pfn_lo, unsigned long pfn_hi)
580 {
581 	struct rb_node *node;
582 	unsigned long flags;
583 	struct iova *iova;
584 	unsigned int overlap = 0;
585 
586 	/* Don't allow nonsensical pfns */
587 	if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
588 		return NULL;
589 
590 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
591 	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
592 		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
593 			iova = to_iova(node);
594 			__adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
595 			if ((pfn_lo >= iova->pfn_lo) &&
596 				(pfn_hi <= iova->pfn_hi))
597 				goto finish;
598 			overlap = 1;
599 
600 		} else if (overlap)
601 				break;
602 	}
603 
604 	/* We are here either because this is the first reserver node
605 	 * or need to insert remaining non overlap addr range
606 	 */
607 	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
608 finish:
609 
610 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
611 	return iova;
612 }
613 EXPORT_SYMBOL_GPL(reserve_iova);
614 
615 /*
616  * Magazine caches for IOVA ranges.  For an introduction to magazines,
617  * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
618  * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
619  * For simplicity, we use a static magazine size and don't implement the
620  * dynamic size tuning described in the paper.
621  */
622 
623 /*
624  * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
625  * assure size of 'iova_magazine' to be 1024 bytes, so that no memory
626  * will be wasted. Since only full magazines are inserted into the depot,
627  * we don't need to waste PFN capacity on a separate list head either.
628  */
629 #define IOVA_MAG_SIZE 127
630 
631 #define IOVA_DEPOT_DELAY msecs_to_jiffies(100)
632 
633 struct iova_magazine {
634 	union {
635 		unsigned long size;
636 		struct iova_magazine *next;
637 	};
638 	unsigned long pfns[IOVA_MAG_SIZE];
639 };
640 static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1)));
641 
642 struct iova_cpu_rcache {
643 	spinlock_t lock;
644 	struct iova_magazine *loaded;
645 	struct iova_magazine *prev;
646 };
647 
648 struct iova_rcache {
649 	spinlock_t lock;
650 	unsigned int depot_size;
651 	struct iova_magazine *depot;
652 	struct iova_cpu_rcache __percpu *cpu_rcaches;
653 	struct iova_domain *iovad;
654 	struct delayed_work work;
655 };
656 
657 static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
658 {
659 	struct iova_magazine *mag;
660 
661 	mag = kmalloc(sizeof(*mag), flags);
662 	if (mag)
663 		mag->size = 0;
664 
665 	return mag;
666 }
667 
668 static void iova_magazine_free(struct iova_magazine *mag)
669 {
670 	kfree(mag);
671 }
672 
673 static void
674 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
675 {
676 	unsigned long flags;
677 	int i;
678 
679 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
680 
681 	for (i = 0 ; i < mag->size; ++i) {
682 		struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
683 
684 		if (WARN_ON(!iova))
685 			continue;
686 
687 		remove_iova(iovad, iova);
688 		free_iova_mem(iova);
689 	}
690 
691 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
692 
693 	mag->size = 0;
694 }
695 
696 static bool iova_magazine_full(struct iova_magazine *mag)
697 {
698 	return mag->size == IOVA_MAG_SIZE;
699 }
700 
701 static bool iova_magazine_empty(struct iova_magazine *mag)
702 {
703 	return mag->size == 0;
704 }
705 
706 static unsigned long iova_magazine_pop(struct iova_magazine *mag,
707 				       unsigned long limit_pfn)
708 {
709 	int i;
710 	unsigned long pfn;
711 
712 	/* Only fall back to the rbtree if we have no suitable pfns at all */
713 	for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
714 		if (i == 0)
715 			return 0;
716 
717 	/* Swap it to pop it */
718 	pfn = mag->pfns[i];
719 	mag->pfns[i] = mag->pfns[--mag->size];
720 
721 	return pfn;
722 }
723 
724 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
725 {
726 	mag->pfns[mag->size++] = pfn;
727 }
728 
729 static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
730 {
731 	struct iova_magazine *mag = rcache->depot;
732 
733 	rcache->depot = mag->next;
734 	mag->size = IOVA_MAG_SIZE;
735 	rcache->depot_size--;
736 	return mag;
737 }
738 
739 static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag)
740 {
741 	mag->next = rcache->depot;
742 	rcache->depot = mag;
743 	rcache->depot_size++;
744 }
745 
746 static void iova_depot_work_func(struct work_struct *work)
747 {
748 	struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work);
749 	struct iova_magazine *mag = NULL;
750 	unsigned long flags;
751 
752 	spin_lock_irqsave(&rcache->lock, flags);
753 	if (rcache->depot_size > num_online_cpus())
754 		mag = iova_depot_pop(rcache);
755 	spin_unlock_irqrestore(&rcache->lock, flags);
756 
757 	if (mag) {
758 		iova_magazine_free_pfns(mag, rcache->iovad);
759 		iova_magazine_free(mag);
760 		schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
761 	}
762 }
763 
764 int iova_domain_init_rcaches(struct iova_domain *iovad)
765 {
766 	unsigned int cpu;
767 	int i, ret;
768 
769 	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
770 				 sizeof(struct iova_rcache),
771 				 GFP_KERNEL);
772 	if (!iovad->rcaches)
773 		return -ENOMEM;
774 
775 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
776 		struct iova_cpu_rcache *cpu_rcache;
777 		struct iova_rcache *rcache;
778 
779 		rcache = &iovad->rcaches[i];
780 		spin_lock_init(&rcache->lock);
781 		rcache->iovad = iovad;
782 		INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func);
783 		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
784 						     cache_line_size());
785 		if (!rcache->cpu_rcaches) {
786 			ret = -ENOMEM;
787 			goto out_err;
788 		}
789 		for_each_possible_cpu(cpu) {
790 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
791 
792 			spin_lock_init(&cpu_rcache->lock);
793 			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
794 			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
795 			if (!cpu_rcache->loaded || !cpu_rcache->prev) {
796 				ret = -ENOMEM;
797 				goto out_err;
798 			}
799 		}
800 	}
801 
802 	ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
803 					       &iovad->cpuhp_dead);
804 	if (ret)
805 		goto out_err;
806 	return 0;
807 
808 out_err:
809 	free_iova_rcaches(iovad);
810 	return ret;
811 }
812 EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
813 
814 /*
815  * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
816  * return true on success.  Can fail if rcache is full and we can't free
817  * space, and free_iova() (our only caller) will then return the IOVA
818  * range to the rbtree instead.
819  */
820 static bool __iova_rcache_insert(struct iova_domain *iovad,
821 				 struct iova_rcache *rcache,
822 				 unsigned long iova_pfn)
823 {
824 	struct iova_cpu_rcache *cpu_rcache;
825 	bool can_insert = false;
826 	unsigned long flags;
827 
828 	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
829 	spin_lock_irqsave(&cpu_rcache->lock, flags);
830 
831 	if (!iova_magazine_full(cpu_rcache->loaded)) {
832 		can_insert = true;
833 	} else if (!iova_magazine_full(cpu_rcache->prev)) {
834 		swap(cpu_rcache->prev, cpu_rcache->loaded);
835 		can_insert = true;
836 	} else {
837 		struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
838 
839 		if (new_mag) {
840 			spin_lock(&rcache->lock);
841 			iova_depot_push(rcache, cpu_rcache->loaded);
842 			spin_unlock(&rcache->lock);
843 			schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
844 
845 			cpu_rcache->loaded = new_mag;
846 			can_insert = true;
847 		}
848 	}
849 
850 	if (can_insert)
851 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
852 
853 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
854 
855 	return can_insert;
856 }
857 
858 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
859 			       unsigned long size)
860 {
861 	unsigned int log_size = order_base_2(size);
862 
863 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
864 		return false;
865 
866 	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
867 }
868 
869 /*
870  * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
871  * satisfy the request, return a matching non-NULL range and remove
872  * it from the 'rcache'.
873  */
874 static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
875 				       unsigned long limit_pfn)
876 {
877 	struct iova_cpu_rcache *cpu_rcache;
878 	unsigned long iova_pfn = 0;
879 	bool has_pfn = false;
880 	unsigned long flags;
881 
882 	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
883 	spin_lock_irqsave(&cpu_rcache->lock, flags);
884 
885 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
886 		has_pfn = true;
887 	} else if (!iova_magazine_empty(cpu_rcache->prev)) {
888 		swap(cpu_rcache->prev, cpu_rcache->loaded);
889 		has_pfn = true;
890 	} else {
891 		spin_lock(&rcache->lock);
892 		if (rcache->depot) {
893 			iova_magazine_free(cpu_rcache->loaded);
894 			cpu_rcache->loaded = iova_depot_pop(rcache);
895 			has_pfn = true;
896 		}
897 		spin_unlock(&rcache->lock);
898 	}
899 
900 	if (has_pfn)
901 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
902 
903 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
904 
905 	return iova_pfn;
906 }
907 
908 /*
909  * Try to satisfy IOVA allocation range from rcache.  Fail if requested
910  * size is too big or the DMA limit we are given isn't satisfied by the
911  * top element in the magazine.
912  */
913 static unsigned long iova_rcache_get(struct iova_domain *iovad,
914 				     unsigned long size,
915 				     unsigned long limit_pfn)
916 {
917 	unsigned int log_size = order_base_2(size);
918 
919 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
920 		return 0;
921 
922 	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
923 }
924 
925 /*
926  * free rcache data structures.
927  */
928 static void free_iova_rcaches(struct iova_domain *iovad)
929 {
930 	struct iova_rcache *rcache;
931 	struct iova_cpu_rcache *cpu_rcache;
932 	unsigned int cpu;
933 
934 	for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
935 		rcache = &iovad->rcaches[i];
936 		if (!rcache->cpu_rcaches)
937 			break;
938 		for_each_possible_cpu(cpu) {
939 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
940 			iova_magazine_free(cpu_rcache->loaded);
941 			iova_magazine_free(cpu_rcache->prev);
942 		}
943 		free_percpu(rcache->cpu_rcaches);
944 		cancel_delayed_work_sync(&rcache->work);
945 		while (rcache->depot)
946 			iova_magazine_free(iova_depot_pop(rcache));
947 	}
948 
949 	kfree(iovad->rcaches);
950 	iovad->rcaches = NULL;
951 }
952 
953 /*
954  * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
955  */
956 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
957 {
958 	struct iova_cpu_rcache *cpu_rcache;
959 	struct iova_rcache *rcache;
960 	unsigned long flags;
961 	int i;
962 
963 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
964 		rcache = &iovad->rcaches[i];
965 		cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
966 		spin_lock_irqsave(&cpu_rcache->lock, flags);
967 		iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
968 		iova_magazine_free_pfns(cpu_rcache->prev, iovad);
969 		spin_unlock_irqrestore(&cpu_rcache->lock, flags);
970 	}
971 }
972 
973 /*
974  * free all the IOVA ranges of global cache
975  */
976 static void free_global_cached_iovas(struct iova_domain *iovad)
977 {
978 	struct iova_rcache *rcache;
979 	unsigned long flags;
980 
981 	for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
982 		rcache = &iovad->rcaches[i];
983 		spin_lock_irqsave(&rcache->lock, flags);
984 		while (rcache->depot) {
985 			struct iova_magazine *mag = iova_depot_pop(rcache);
986 
987 			iova_magazine_free_pfns(mag, iovad);
988 			iova_magazine_free(mag);
989 		}
990 		spin_unlock_irqrestore(&rcache->lock, flags);
991 	}
992 }
993 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
994 MODULE_LICENSE("GPL");
995