xref: /linux/drivers/iommu/iova.c (revision 2da68a77)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2009, Intel Corporation.
4  *
5  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
6  */
7 
8 #include <linux/iova.h>
9 #include <linux/module.h>
10 #include <linux/slab.h>
11 #include <linux/smp.h>
12 #include <linux/bitops.h>
13 #include <linux/cpu.h>
14 
15 /* The anchor node sits above the top of the usable address space */
16 #define IOVA_ANCHOR	~0UL
17 
18 #define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
19 
20 static bool iova_rcache_insert(struct iova_domain *iovad,
21 			       unsigned long pfn,
22 			       unsigned long size);
23 static unsigned long iova_rcache_get(struct iova_domain *iovad,
24 				     unsigned long size,
25 				     unsigned long limit_pfn);
26 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
27 static void free_iova_rcaches(struct iova_domain *iovad);
28 
29 unsigned long iova_rcache_range(void)
30 {
31 	return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1);
32 }
33 
34 static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
35 {
36 	struct iova_domain *iovad;
37 
38 	iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
39 
40 	free_cpu_cached_iovas(cpu, iovad);
41 	return 0;
42 }
43 
44 static void free_global_cached_iovas(struct iova_domain *iovad);
45 
46 static struct iova *to_iova(struct rb_node *node)
47 {
48 	return rb_entry(node, struct iova, node);
49 }
50 
51 void
52 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
53 	unsigned long start_pfn)
54 {
55 	/*
56 	 * IOVA granularity will normally be equal to the smallest
57 	 * supported IOMMU page size; both *must* be capable of
58 	 * representing individual CPU pages exactly.
59 	 */
60 	BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
61 
62 	spin_lock_init(&iovad->iova_rbtree_lock);
63 	iovad->rbroot = RB_ROOT;
64 	iovad->cached_node = &iovad->anchor.node;
65 	iovad->cached32_node = &iovad->anchor.node;
66 	iovad->granule = granule;
67 	iovad->start_pfn = start_pfn;
68 	iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
69 	iovad->max32_alloc_size = iovad->dma_32bit_pfn;
70 	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
71 	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
72 	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
73 }
74 EXPORT_SYMBOL_GPL(init_iova_domain);
75 
76 static struct rb_node *
77 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
78 {
79 	if (limit_pfn <= iovad->dma_32bit_pfn)
80 		return iovad->cached32_node;
81 
82 	return iovad->cached_node;
83 }
84 
85 static void
86 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
87 {
88 	if (new->pfn_hi < iovad->dma_32bit_pfn)
89 		iovad->cached32_node = &new->node;
90 	else
91 		iovad->cached_node = &new->node;
92 }
93 
94 static void
95 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
96 {
97 	struct iova *cached_iova;
98 
99 	cached_iova = to_iova(iovad->cached32_node);
100 	if (free == cached_iova ||
101 	    (free->pfn_hi < iovad->dma_32bit_pfn &&
102 	     free->pfn_lo >= cached_iova->pfn_lo))
103 		iovad->cached32_node = rb_next(&free->node);
104 
105 	if (free->pfn_lo < iovad->dma_32bit_pfn)
106 		iovad->max32_alloc_size = iovad->dma_32bit_pfn;
107 
108 	cached_iova = to_iova(iovad->cached_node);
109 	if (free->pfn_lo >= cached_iova->pfn_lo)
110 		iovad->cached_node = rb_next(&free->node);
111 }
112 
113 static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn)
114 {
115 	struct rb_node *node, *next;
116 	/*
117 	 * Ideally what we'd like to judge here is whether limit_pfn is close
118 	 * enough to the highest-allocated IOVA that starting the allocation
119 	 * walk from the anchor node will be quicker than this initial work to
120 	 * find an exact starting point (especially if that ends up being the
121 	 * anchor node anyway). This is an incredibly crude approximation which
122 	 * only really helps the most likely case, but is at least trivially easy.
123 	 */
124 	if (limit_pfn > iovad->dma_32bit_pfn)
125 		return &iovad->anchor.node;
126 
127 	node = iovad->rbroot.rb_node;
128 	while (to_iova(node)->pfn_hi < limit_pfn)
129 		node = node->rb_right;
130 
131 search_left:
132 	while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn)
133 		node = node->rb_left;
134 
135 	if (!node->rb_left)
136 		return node;
137 
138 	next = node->rb_left;
139 	while (next->rb_right) {
140 		next = next->rb_right;
141 		if (to_iova(next)->pfn_lo >= limit_pfn) {
142 			node = next;
143 			goto search_left;
144 		}
145 	}
146 
147 	return node;
148 }
149 
150 /* Insert the iova into domain rbtree by holding writer lock */
151 static void
152 iova_insert_rbtree(struct rb_root *root, struct iova *iova,
153 		   struct rb_node *start)
154 {
155 	struct rb_node **new, *parent = NULL;
156 
157 	new = (start) ? &start : &(root->rb_node);
158 	/* Figure out where to put new node */
159 	while (*new) {
160 		struct iova *this = to_iova(*new);
161 
162 		parent = *new;
163 
164 		if (iova->pfn_lo < this->pfn_lo)
165 			new = &((*new)->rb_left);
166 		else if (iova->pfn_lo > this->pfn_lo)
167 			new = &((*new)->rb_right);
168 		else {
169 			WARN_ON(1); /* this should not happen */
170 			return;
171 		}
172 	}
173 	/* Add new node and rebalance tree. */
174 	rb_link_node(&iova->node, parent, new);
175 	rb_insert_color(&iova->node, root);
176 }
177 
178 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
179 		unsigned long size, unsigned long limit_pfn,
180 			struct iova *new, bool size_aligned)
181 {
182 	struct rb_node *curr, *prev;
183 	struct iova *curr_iova;
184 	unsigned long flags;
185 	unsigned long new_pfn, retry_pfn;
186 	unsigned long align_mask = ~0UL;
187 	unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
188 
189 	if (size_aligned)
190 		align_mask <<= fls_long(size - 1);
191 
192 	/* Walk the tree backwards */
193 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
194 	if (limit_pfn <= iovad->dma_32bit_pfn &&
195 			size >= iovad->max32_alloc_size)
196 		goto iova32_full;
197 
198 	curr = __get_cached_rbnode(iovad, limit_pfn);
199 	curr_iova = to_iova(curr);
200 	retry_pfn = curr_iova->pfn_hi + 1;
201 
202 retry:
203 	do {
204 		high_pfn = min(high_pfn, curr_iova->pfn_lo);
205 		new_pfn = (high_pfn - size) & align_mask;
206 		prev = curr;
207 		curr = rb_prev(curr);
208 		curr_iova = to_iova(curr);
209 	} while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
210 
211 	if (high_pfn < size || new_pfn < low_pfn) {
212 		if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
213 			high_pfn = limit_pfn;
214 			low_pfn = retry_pfn;
215 			curr = iova_find_limit(iovad, limit_pfn);
216 			curr_iova = to_iova(curr);
217 			goto retry;
218 		}
219 		iovad->max32_alloc_size = size;
220 		goto iova32_full;
221 	}
222 
223 	/* pfn_lo will point to size aligned address if size_aligned is set */
224 	new->pfn_lo = new_pfn;
225 	new->pfn_hi = new->pfn_lo + size - 1;
226 
227 	/* If we have 'prev', it's a valid place to start the insertion. */
228 	iova_insert_rbtree(&iovad->rbroot, new, prev);
229 	__cached_rbnode_insert_update(iovad, new);
230 
231 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
232 	return 0;
233 
234 iova32_full:
235 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
236 	return -ENOMEM;
237 }
238 
239 static struct kmem_cache *iova_cache;
240 static unsigned int iova_cache_users;
241 static DEFINE_MUTEX(iova_cache_mutex);
242 
243 static struct iova *alloc_iova_mem(void)
244 {
245 	return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
246 }
247 
248 static void free_iova_mem(struct iova *iova)
249 {
250 	if (iova->pfn_lo != IOVA_ANCHOR)
251 		kmem_cache_free(iova_cache, iova);
252 }
253 
254 int iova_cache_get(void)
255 {
256 	mutex_lock(&iova_cache_mutex);
257 	if (!iova_cache_users) {
258 		int ret;
259 
260 		ret = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL,
261 					iova_cpuhp_dead);
262 		if (ret) {
263 			mutex_unlock(&iova_cache_mutex);
264 			pr_err("Couldn't register cpuhp handler\n");
265 			return ret;
266 		}
267 
268 		iova_cache = kmem_cache_create(
269 			"iommu_iova", sizeof(struct iova), 0,
270 			SLAB_HWCACHE_ALIGN, NULL);
271 		if (!iova_cache) {
272 			cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
273 			mutex_unlock(&iova_cache_mutex);
274 			pr_err("Couldn't create iova cache\n");
275 			return -ENOMEM;
276 		}
277 	}
278 
279 	iova_cache_users++;
280 	mutex_unlock(&iova_cache_mutex);
281 
282 	return 0;
283 }
284 EXPORT_SYMBOL_GPL(iova_cache_get);
285 
286 void iova_cache_put(void)
287 {
288 	mutex_lock(&iova_cache_mutex);
289 	if (WARN_ON(!iova_cache_users)) {
290 		mutex_unlock(&iova_cache_mutex);
291 		return;
292 	}
293 	iova_cache_users--;
294 	if (!iova_cache_users) {
295 		cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
296 		kmem_cache_destroy(iova_cache);
297 	}
298 	mutex_unlock(&iova_cache_mutex);
299 }
300 EXPORT_SYMBOL_GPL(iova_cache_put);
301 
302 /**
303  * alloc_iova - allocates an iova
304  * @iovad: - iova domain in question
305  * @size: - size of page frames to allocate
306  * @limit_pfn: - max limit address
307  * @size_aligned: - set if size_aligned address range is required
308  * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
309  * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
310  * flag is set then the allocated address iova->pfn_lo will be naturally
311  * aligned on roundup_power_of_two(size).
312  */
313 struct iova *
314 alloc_iova(struct iova_domain *iovad, unsigned long size,
315 	unsigned long limit_pfn,
316 	bool size_aligned)
317 {
318 	struct iova *new_iova;
319 	int ret;
320 
321 	new_iova = alloc_iova_mem();
322 	if (!new_iova)
323 		return NULL;
324 
325 	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
326 			new_iova, size_aligned);
327 
328 	if (ret) {
329 		free_iova_mem(new_iova);
330 		return NULL;
331 	}
332 
333 	return new_iova;
334 }
335 EXPORT_SYMBOL_GPL(alloc_iova);
336 
337 static struct iova *
338 private_find_iova(struct iova_domain *iovad, unsigned long pfn)
339 {
340 	struct rb_node *node = iovad->rbroot.rb_node;
341 
342 	assert_spin_locked(&iovad->iova_rbtree_lock);
343 
344 	while (node) {
345 		struct iova *iova = to_iova(node);
346 
347 		if (pfn < iova->pfn_lo)
348 			node = node->rb_left;
349 		else if (pfn > iova->pfn_hi)
350 			node = node->rb_right;
351 		else
352 			return iova;	/* pfn falls within iova's range */
353 	}
354 
355 	return NULL;
356 }
357 
358 static void remove_iova(struct iova_domain *iovad, struct iova *iova)
359 {
360 	assert_spin_locked(&iovad->iova_rbtree_lock);
361 	__cached_rbnode_delete_update(iovad, iova);
362 	rb_erase(&iova->node, &iovad->rbroot);
363 }
364 
365 /**
366  * find_iova - finds an iova for a given pfn
367  * @iovad: - iova domain in question.
368  * @pfn: - page frame number
369  * This function finds and returns an iova belonging to the
370  * given domain which matches the given pfn.
371  */
372 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
373 {
374 	unsigned long flags;
375 	struct iova *iova;
376 
377 	/* Take the lock so that no other thread is manipulating the rbtree */
378 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
379 	iova = private_find_iova(iovad, pfn);
380 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
381 	return iova;
382 }
383 EXPORT_SYMBOL_GPL(find_iova);
384 
385 /**
386  * __free_iova - frees the given iova
387  * @iovad: iova domain in question.
388  * @iova: iova in question.
389  * Frees the given iova belonging to the giving domain
390  */
391 void
392 __free_iova(struct iova_domain *iovad, struct iova *iova)
393 {
394 	unsigned long flags;
395 
396 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
397 	remove_iova(iovad, iova);
398 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
399 	free_iova_mem(iova);
400 }
401 EXPORT_SYMBOL_GPL(__free_iova);
402 
403 /**
404  * free_iova - finds and frees the iova for a given pfn
405  * @iovad: - iova domain in question.
406  * @pfn: - pfn that is allocated previously
407  * This functions finds an iova for a given pfn and then
408  * frees the iova from that domain.
409  */
410 void
411 free_iova(struct iova_domain *iovad, unsigned long pfn)
412 {
413 	unsigned long flags;
414 	struct iova *iova;
415 
416 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
417 	iova = private_find_iova(iovad, pfn);
418 	if (!iova) {
419 		spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
420 		return;
421 	}
422 	remove_iova(iovad, iova);
423 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
424 	free_iova_mem(iova);
425 }
426 EXPORT_SYMBOL_GPL(free_iova);
427 
428 /**
429  * alloc_iova_fast - allocates an iova from rcache
430  * @iovad: - iova domain in question
431  * @size: - size of page frames to allocate
432  * @limit_pfn: - max limit address
433  * @flush_rcache: - set to flush rcache on regular allocation failure
434  * This function tries to satisfy an iova allocation from the rcache,
435  * and falls back to regular allocation on failure. If regular allocation
436  * fails too and the flush_rcache flag is set then the rcache will be flushed.
437 */
438 unsigned long
439 alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
440 		unsigned long limit_pfn, bool flush_rcache)
441 {
442 	unsigned long iova_pfn;
443 	struct iova *new_iova;
444 
445 	/*
446 	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
447 	 * will come back to bite us badly, so we have to waste a bit of space
448 	 * rounding up anything cacheable to make sure that can't happen. The
449 	 * order of the unadjusted size will still match upon freeing.
450 	 */
451 	if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
452 		size = roundup_pow_of_two(size);
453 
454 	iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
455 	if (iova_pfn)
456 		return iova_pfn;
457 
458 retry:
459 	new_iova = alloc_iova(iovad, size, limit_pfn, true);
460 	if (!new_iova) {
461 		unsigned int cpu;
462 
463 		if (!flush_rcache)
464 			return 0;
465 
466 		/* Try replenishing IOVAs by flushing rcache. */
467 		flush_rcache = false;
468 		for_each_online_cpu(cpu)
469 			free_cpu_cached_iovas(cpu, iovad);
470 		free_global_cached_iovas(iovad);
471 		goto retry;
472 	}
473 
474 	return new_iova->pfn_lo;
475 }
476 EXPORT_SYMBOL_GPL(alloc_iova_fast);
477 
478 /**
479  * free_iova_fast - free iova pfn range into rcache
480  * @iovad: - iova domain in question.
481  * @pfn: - pfn that is allocated previously
482  * @size: - # of pages in range
483  * This functions frees an iova range by trying to put it into the rcache,
484  * falling back to regular iova deallocation via free_iova() if this fails.
485  */
486 void
487 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
488 {
489 	if (iova_rcache_insert(iovad, pfn, size))
490 		return;
491 
492 	free_iova(iovad, pfn);
493 }
494 EXPORT_SYMBOL_GPL(free_iova_fast);
495 
496 static void iova_domain_free_rcaches(struct iova_domain *iovad)
497 {
498 	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
499 					    &iovad->cpuhp_dead);
500 	free_iova_rcaches(iovad);
501 }
502 
503 /**
504  * put_iova_domain - destroys the iova domain
505  * @iovad: - iova domain in question.
506  * All the iova's in that domain are destroyed.
507  */
508 void put_iova_domain(struct iova_domain *iovad)
509 {
510 	struct iova *iova, *tmp;
511 
512 	if (iovad->rcaches)
513 		iova_domain_free_rcaches(iovad);
514 
515 	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
516 		free_iova_mem(iova);
517 }
518 EXPORT_SYMBOL_GPL(put_iova_domain);
519 
520 static int
521 __is_range_overlap(struct rb_node *node,
522 	unsigned long pfn_lo, unsigned long pfn_hi)
523 {
524 	struct iova *iova = to_iova(node);
525 
526 	if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
527 		return 1;
528 	return 0;
529 }
530 
531 static inline struct iova *
532 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
533 {
534 	struct iova *iova;
535 
536 	iova = alloc_iova_mem();
537 	if (iova) {
538 		iova->pfn_lo = pfn_lo;
539 		iova->pfn_hi = pfn_hi;
540 	}
541 
542 	return iova;
543 }
544 
545 static struct iova *
546 __insert_new_range(struct iova_domain *iovad,
547 	unsigned long pfn_lo, unsigned long pfn_hi)
548 {
549 	struct iova *iova;
550 
551 	iova = alloc_and_init_iova(pfn_lo, pfn_hi);
552 	if (iova)
553 		iova_insert_rbtree(&iovad->rbroot, iova, NULL);
554 
555 	return iova;
556 }
557 
558 static void
559 __adjust_overlap_range(struct iova *iova,
560 	unsigned long *pfn_lo, unsigned long *pfn_hi)
561 {
562 	if (*pfn_lo < iova->pfn_lo)
563 		iova->pfn_lo = *pfn_lo;
564 	if (*pfn_hi > iova->pfn_hi)
565 		*pfn_lo = iova->pfn_hi + 1;
566 }
567 
568 /**
569  * reserve_iova - reserves an iova in the given range
570  * @iovad: - iova domain pointer
571  * @pfn_lo: - lower page frame address
572  * @pfn_hi:- higher pfn adderss
573  * This function allocates reserves the address range from pfn_lo to pfn_hi so
574  * that this address is not dished out as part of alloc_iova.
575  */
576 struct iova *
577 reserve_iova(struct iova_domain *iovad,
578 	unsigned long pfn_lo, unsigned long pfn_hi)
579 {
580 	struct rb_node *node;
581 	unsigned long flags;
582 	struct iova *iova;
583 	unsigned int overlap = 0;
584 
585 	/* Don't allow nonsensical pfns */
586 	if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
587 		return NULL;
588 
589 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
590 	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
591 		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
592 			iova = to_iova(node);
593 			__adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
594 			if ((pfn_lo >= iova->pfn_lo) &&
595 				(pfn_hi <= iova->pfn_hi))
596 				goto finish;
597 			overlap = 1;
598 
599 		} else if (overlap)
600 				break;
601 	}
602 
603 	/* We are here either because this is the first reserver node
604 	 * or need to insert remaining non overlap addr range
605 	 */
606 	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
607 finish:
608 
609 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
610 	return iova;
611 }
612 EXPORT_SYMBOL_GPL(reserve_iova);
613 
614 /*
615  * Magazine caches for IOVA ranges.  For an introduction to magazines,
616  * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
617  * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
618  * For simplicity, we use a static magazine size and don't implement the
619  * dynamic size tuning described in the paper.
620  */
621 
622 /*
623  * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
624  * assure size of 'iova_magazine' to be 1024 bytes, so that no memory
625  * will be wasted.
626  */
627 #define IOVA_MAG_SIZE 127
628 #define MAX_GLOBAL_MAGS 32	/* magazines per bin */
629 
630 struct iova_magazine {
631 	unsigned long size;
632 	unsigned long pfns[IOVA_MAG_SIZE];
633 };
634 
635 struct iova_cpu_rcache {
636 	spinlock_t lock;
637 	struct iova_magazine *loaded;
638 	struct iova_magazine *prev;
639 };
640 
641 struct iova_rcache {
642 	spinlock_t lock;
643 	unsigned long depot_size;
644 	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
645 	struct iova_cpu_rcache __percpu *cpu_rcaches;
646 };
647 
648 static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
649 {
650 	return kzalloc(sizeof(struct iova_magazine), flags);
651 }
652 
653 static void iova_magazine_free(struct iova_magazine *mag)
654 {
655 	kfree(mag);
656 }
657 
658 static void
659 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
660 {
661 	unsigned long flags;
662 	int i;
663 
664 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
665 
666 	for (i = 0 ; i < mag->size; ++i) {
667 		struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
668 
669 		if (WARN_ON(!iova))
670 			continue;
671 
672 		remove_iova(iovad, iova);
673 		free_iova_mem(iova);
674 	}
675 
676 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
677 
678 	mag->size = 0;
679 }
680 
681 static bool iova_magazine_full(struct iova_magazine *mag)
682 {
683 	return mag->size == IOVA_MAG_SIZE;
684 }
685 
686 static bool iova_magazine_empty(struct iova_magazine *mag)
687 {
688 	return mag->size == 0;
689 }
690 
691 static unsigned long iova_magazine_pop(struct iova_magazine *mag,
692 				       unsigned long limit_pfn)
693 {
694 	int i;
695 	unsigned long pfn;
696 
697 	/* Only fall back to the rbtree if we have no suitable pfns at all */
698 	for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
699 		if (i == 0)
700 			return 0;
701 
702 	/* Swap it to pop it */
703 	pfn = mag->pfns[i];
704 	mag->pfns[i] = mag->pfns[--mag->size];
705 
706 	return pfn;
707 }
708 
709 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
710 {
711 	mag->pfns[mag->size++] = pfn;
712 }
713 
714 int iova_domain_init_rcaches(struct iova_domain *iovad)
715 {
716 	unsigned int cpu;
717 	int i, ret;
718 
719 	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
720 				 sizeof(struct iova_rcache),
721 				 GFP_KERNEL);
722 	if (!iovad->rcaches)
723 		return -ENOMEM;
724 
725 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
726 		struct iova_cpu_rcache *cpu_rcache;
727 		struct iova_rcache *rcache;
728 
729 		rcache = &iovad->rcaches[i];
730 		spin_lock_init(&rcache->lock);
731 		rcache->depot_size = 0;
732 		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
733 						     cache_line_size());
734 		if (!rcache->cpu_rcaches) {
735 			ret = -ENOMEM;
736 			goto out_err;
737 		}
738 		for_each_possible_cpu(cpu) {
739 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
740 
741 			spin_lock_init(&cpu_rcache->lock);
742 			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
743 			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
744 			if (!cpu_rcache->loaded || !cpu_rcache->prev) {
745 				ret = -ENOMEM;
746 				goto out_err;
747 			}
748 		}
749 	}
750 
751 	ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
752 					       &iovad->cpuhp_dead);
753 	if (ret)
754 		goto out_err;
755 	return 0;
756 
757 out_err:
758 	free_iova_rcaches(iovad);
759 	return ret;
760 }
761 EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
762 
763 /*
764  * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
765  * return true on success.  Can fail if rcache is full and we can't free
766  * space, and free_iova() (our only caller) will then return the IOVA
767  * range to the rbtree instead.
768  */
769 static bool __iova_rcache_insert(struct iova_domain *iovad,
770 				 struct iova_rcache *rcache,
771 				 unsigned long iova_pfn)
772 {
773 	struct iova_magazine *mag_to_free = NULL;
774 	struct iova_cpu_rcache *cpu_rcache;
775 	bool can_insert = false;
776 	unsigned long flags;
777 
778 	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
779 	spin_lock_irqsave(&cpu_rcache->lock, flags);
780 
781 	if (!iova_magazine_full(cpu_rcache->loaded)) {
782 		can_insert = true;
783 	} else if (!iova_magazine_full(cpu_rcache->prev)) {
784 		swap(cpu_rcache->prev, cpu_rcache->loaded);
785 		can_insert = true;
786 	} else {
787 		struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
788 
789 		if (new_mag) {
790 			spin_lock(&rcache->lock);
791 			if (rcache->depot_size < MAX_GLOBAL_MAGS) {
792 				rcache->depot[rcache->depot_size++] =
793 						cpu_rcache->loaded;
794 			} else {
795 				mag_to_free = cpu_rcache->loaded;
796 			}
797 			spin_unlock(&rcache->lock);
798 
799 			cpu_rcache->loaded = new_mag;
800 			can_insert = true;
801 		}
802 	}
803 
804 	if (can_insert)
805 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
806 
807 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
808 
809 	if (mag_to_free) {
810 		iova_magazine_free_pfns(mag_to_free, iovad);
811 		iova_magazine_free(mag_to_free);
812 	}
813 
814 	return can_insert;
815 }
816 
817 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
818 			       unsigned long size)
819 {
820 	unsigned int log_size = order_base_2(size);
821 
822 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
823 		return false;
824 
825 	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
826 }
827 
828 /*
829  * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
830  * satisfy the request, return a matching non-NULL range and remove
831  * it from the 'rcache'.
832  */
833 static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
834 				       unsigned long limit_pfn)
835 {
836 	struct iova_cpu_rcache *cpu_rcache;
837 	unsigned long iova_pfn = 0;
838 	bool has_pfn = false;
839 	unsigned long flags;
840 
841 	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
842 	spin_lock_irqsave(&cpu_rcache->lock, flags);
843 
844 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
845 		has_pfn = true;
846 	} else if (!iova_magazine_empty(cpu_rcache->prev)) {
847 		swap(cpu_rcache->prev, cpu_rcache->loaded);
848 		has_pfn = true;
849 	} else {
850 		spin_lock(&rcache->lock);
851 		if (rcache->depot_size > 0) {
852 			iova_magazine_free(cpu_rcache->loaded);
853 			cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
854 			has_pfn = true;
855 		}
856 		spin_unlock(&rcache->lock);
857 	}
858 
859 	if (has_pfn)
860 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
861 
862 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
863 
864 	return iova_pfn;
865 }
866 
867 /*
868  * Try to satisfy IOVA allocation range from rcache.  Fail if requested
869  * size is too big or the DMA limit we are given isn't satisfied by the
870  * top element in the magazine.
871  */
872 static unsigned long iova_rcache_get(struct iova_domain *iovad,
873 				     unsigned long size,
874 				     unsigned long limit_pfn)
875 {
876 	unsigned int log_size = order_base_2(size);
877 
878 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
879 		return 0;
880 
881 	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
882 }
883 
884 /*
885  * free rcache data structures.
886  */
887 static void free_iova_rcaches(struct iova_domain *iovad)
888 {
889 	struct iova_rcache *rcache;
890 	struct iova_cpu_rcache *cpu_rcache;
891 	unsigned int cpu;
892 	int i, j;
893 
894 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
895 		rcache = &iovad->rcaches[i];
896 		if (!rcache->cpu_rcaches)
897 			break;
898 		for_each_possible_cpu(cpu) {
899 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
900 			iova_magazine_free(cpu_rcache->loaded);
901 			iova_magazine_free(cpu_rcache->prev);
902 		}
903 		free_percpu(rcache->cpu_rcaches);
904 		for (j = 0; j < rcache->depot_size; ++j)
905 			iova_magazine_free(rcache->depot[j]);
906 	}
907 
908 	kfree(iovad->rcaches);
909 	iovad->rcaches = NULL;
910 }
911 
912 /*
913  * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
914  */
915 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
916 {
917 	struct iova_cpu_rcache *cpu_rcache;
918 	struct iova_rcache *rcache;
919 	unsigned long flags;
920 	int i;
921 
922 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
923 		rcache = &iovad->rcaches[i];
924 		cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
925 		spin_lock_irqsave(&cpu_rcache->lock, flags);
926 		iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
927 		iova_magazine_free_pfns(cpu_rcache->prev, iovad);
928 		spin_unlock_irqrestore(&cpu_rcache->lock, flags);
929 	}
930 }
931 
932 /*
933  * free all the IOVA ranges of global cache
934  */
935 static void free_global_cached_iovas(struct iova_domain *iovad)
936 {
937 	struct iova_rcache *rcache;
938 	unsigned long flags;
939 	int i, j;
940 
941 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
942 		rcache = &iovad->rcaches[i];
943 		spin_lock_irqsave(&rcache->lock, flags);
944 		for (j = 0; j < rcache->depot_size; ++j) {
945 			iova_magazine_free_pfns(rcache->depot[j], iovad);
946 			iova_magazine_free(rcache->depot[j]);
947 		}
948 		rcache->depot_size = 0;
949 		spin_unlock_irqrestore(&rcache->lock, flags);
950 	}
951 }
952 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
953 MODULE_LICENSE("GPL");
954