xref: /linux/mm/list_lru.c (revision 44f57d78)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4  * Authors: David Chinner and Glauber Costa
5  *
6  * Generic LRU infrastructure
7  */
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/mm.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
15 
16 #ifdef CONFIG_MEMCG_KMEM
17 static LIST_HEAD(list_lrus);
18 static DEFINE_MUTEX(list_lrus_mutex);
19 
20 static void list_lru_register(struct list_lru *lru)
21 {
22 	mutex_lock(&list_lrus_mutex);
23 	list_add(&lru->list, &list_lrus);
24 	mutex_unlock(&list_lrus_mutex);
25 }
26 
27 static void list_lru_unregister(struct list_lru *lru)
28 {
29 	mutex_lock(&list_lrus_mutex);
30 	list_del(&lru->list);
31 	mutex_unlock(&list_lrus_mutex);
32 }
33 
34 static int lru_shrinker_id(struct list_lru *lru)
35 {
36 	return lru->shrinker_id;
37 }
38 
39 static inline bool list_lru_memcg_aware(struct list_lru *lru)
40 {
41 	return lru->memcg_aware;
42 }
43 
44 static inline struct list_lru_one *
45 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
46 {
47 	struct list_lru_memcg *memcg_lrus;
48 	/*
49 	 * Either lock or RCU protects the array of per cgroup lists
50 	 * from relocation (see memcg_update_list_lru_node).
51 	 */
52 	memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
53 					   lockdep_is_held(&nlru->lock));
54 	if (memcg_lrus && idx >= 0)
55 		return memcg_lrus->lru[idx];
56 	return &nlru->lru;
57 }
58 
59 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
60 {
61 	struct page *page;
62 
63 	if (!memcg_kmem_enabled())
64 		return NULL;
65 	page = virt_to_head_page(ptr);
66 	return page->mem_cgroup;
67 }
68 
69 static inline struct list_lru_one *
70 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
71 		   struct mem_cgroup **memcg_ptr)
72 {
73 	struct list_lru_one *l = &nlru->lru;
74 	struct mem_cgroup *memcg = NULL;
75 
76 	if (!nlru->memcg_lrus)
77 		goto out;
78 
79 	memcg = mem_cgroup_from_kmem(ptr);
80 	if (!memcg)
81 		goto out;
82 
83 	l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
84 out:
85 	if (memcg_ptr)
86 		*memcg_ptr = memcg;
87 	return l;
88 }
89 #else
90 static void list_lru_register(struct list_lru *lru)
91 {
92 }
93 
94 static void list_lru_unregister(struct list_lru *lru)
95 {
96 }
97 
98 static int lru_shrinker_id(struct list_lru *lru)
99 {
100 	return -1;
101 }
102 
103 static inline bool list_lru_memcg_aware(struct list_lru *lru)
104 {
105 	return false;
106 }
107 
108 static inline struct list_lru_one *
109 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
110 {
111 	return &nlru->lru;
112 }
113 
114 static inline struct list_lru_one *
115 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
116 		   struct mem_cgroup **memcg_ptr)
117 {
118 	if (memcg_ptr)
119 		*memcg_ptr = NULL;
120 	return &nlru->lru;
121 }
122 #endif /* CONFIG_MEMCG_KMEM */
123 
124 bool list_lru_add(struct list_lru *lru, struct list_head *item)
125 {
126 	int nid = page_to_nid(virt_to_page(item));
127 	struct list_lru_node *nlru = &lru->node[nid];
128 	struct mem_cgroup *memcg;
129 	struct list_lru_one *l;
130 
131 	spin_lock(&nlru->lock);
132 	if (list_empty(item)) {
133 		l = list_lru_from_kmem(nlru, item, &memcg);
134 		list_add_tail(item, &l->list);
135 		/* Set shrinker bit if the first element was added */
136 		if (!l->nr_items++)
137 			memcg_set_shrinker_bit(memcg, nid,
138 					       lru_shrinker_id(lru));
139 		nlru->nr_items++;
140 		spin_unlock(&nlru->lock);
141 		return true;
142 	}
143 	spin_unlock(&nlru->lock);
144 	return false;
145 }
146 EXPORT_SYMBOL_GPL(list_lru_add);
147 
148 bool list_lru_del(struct list_lru *lru, struct list_head *item)
149 {
150 	int nid = page_to_nid(virt_to_page(item));
151 	struct list_lru_node *nlru = &lru->node[nid];
152 	struct list_lru_one *l;
153 
154 	spin_lock(&nlru->lock);
155 	if (!list_empty(item)) {
156 		l = list_lru_from_kmem(nlru, item, NULL);
157 		list_del_init(item);
158 		l->nr_items--;
159 		nlru->nr_items--;
160 		spin_unlock(&nlru->lock);
161 		return true;
162 	}
163 	spin_unlock(&nlru->lock);
164 	return false;
165 }
166 EXPORT_SYMBOL_GPL(list_lru_del);
167 
168 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
169 {
170 	list_del_init(item);
171 	list->nr_items--;
172 }
173 EXPORT_SYMBOL_GPL(list_lru_isolate);
174 
175 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
176 			   struct list_head *head)
177 {
178 	list_move(item, head);
179 	list->nr_items--;
180 }
181 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
182 
183 unsigned long list_lru_count_one(struct list_lru *lru,
184 				 int nid, struct mem_cgroup *memcg)
185 {
186 	struct list_lru_node *nlru = &lru->node[nid];
187 	struct list_lru_one *l;
188 	unsigned long count;
189 
190 	rcu_read_lock();
191 	l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
192 	count = l->nr_items;
193 	rcu_read_unlock();
194 
195 	return count;
196 }
197 EXPORT_SYMBOL_GPL(list_lru_count_one);
198 
199 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
200 {
201 	struct list_lru_node *nlru;
202 
203 	nlru = &lru->node[nid];
204 	return nlru->nr_items;
205 }
206 EXPORT_SYMBOL_GPL(list_lru_count_node);
207 
208 static unsigned long
209 __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
210 		    list_lru_walk_cb isolate, void *cb_arg,
211 		    unsigned long *nr_to_walk)
212 {
213 
214 	struct list_lru_one *l;
215 	struct list_head *item, *n;
216 	unsigned long isolated = 0;
217 
218 	l = list_lru_from_memcg_idx(nlru, memcg_idx);
219 restart:
220 	list_for_each_safe(item, n, &l->list) {
221 		enum lru_status ret;
222 
223 		/*
224 		 * decrement nr_to_walk first so that we don't livelock if we
225 		 * get stuck on large numbesr of LRU_RETRY items
226 		 */
227 		if (!*nr_to_walk)
228 			break;
229 		--*nr_to_walk;
230 
231 		ret = isolate(item, l, &nlru->lock, cb_arg);
232 		switch (ret) {
233 		case LRU_REMOVED_RETRY:
234 			assert_spin_locked(&nlru->lock);
235 			/* fall through */
236 		case LRU_REMOVED:
237 			isolated++;
238 			nlru->nr_items--;
239 			/*
240 			 * If the lru lock has been dropped, our list
241 			 * traversal is now invalid and so we have to
242 			 * restart from scratch.
243 			 */
244 			if (ret == LRU_REMOVED_RETRY)
245 				goto restart;
246 			break;
247 		case LRU_ROTATE:
248 			list_move_tail(item, &l->list);
249 			break;
250 		case LRU_SKIP:
251 			break;
252 		case LRU_RETRY:
253 			/*
254 			 * The lru lock has been dropped, our list traversal is
255 			 * now invalid and so we have to restart from scratch.
256 			 */
257 			assert_spin_locked(&nlru->lock);
258 			goto restart;
259 		default:
260 			BUG();
261 		}
262 	}
263 	return isolated;
264 }
265 
266 unsigned long
267 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
268 		  list_lru_walk_cb isolate, void *cb_arg,
269 		  unsigned long *nr_to_walk)
270 {
271 	struct list_lru_node *nlru = &lru->node[nid];
272 	unsigned long ret;
273 
274 	spin_lock(&nlru->lock);
275 	ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
276 				  nr_to_walk);
277 	spin_unlock(&nlru->lock);
278 	return ret;
279 }
280 EXPORT_SYMBOL_GPL(list_lru_walk_one);
281 
282 unsigned long
283 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
284 		      list_lru_walk_cb isolate, void *cb_arg,
285 		      unsigned long *nr_to_walk)
286 {
287 	struct list_lru_node *nlru = &lru->node[nid];
288 	unsigned long ret;
289 
290 	spin_lock_irq(&nlru->lock);
291 	ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
292 				  nr_to_walk);
293 	spin_unlock_irq(&nlru->lock);
294 	return ret;
295 }
296 
297 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
298 				 list_lru_walk_cb isolate, void *cb_arg,
299 				 unsigned long *nr_to_walk)
300 {
301 	long isolated = 0;
302 	int memcg_idx;
303 
304 	isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
305 				      nr_to_walk);
306 	if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
307 		for_each_memcg_cache_index(memcg_idx) {
308 			struct list_lru_node *nlru = &lru->node[nid];
309 
310 			spin_lock(&nlru->lock);
311 			isolated += __list_lru_walk_one(nlru, memcg_idx,
312 							isolate, cb_arg,
313 							nr_to_walk);
314 			spin_unlock(&nlru->lock);
315 
316 			if (*nr_to_walk <= 0)
317 				break;
318 		}
319 	}
320 	return isolated;
321 }
322 EXPORT_SYMBOL_GPL(list_lru_walk_node);
323 
324 static void init_one_lru(struct list_lru_one *l)
325 {
326 	INIT_LIST_HEAD(&l->list);
327 	l->nr_items = 0;
328 }
329 
330 #ifdef CONFIG_MEMCG_KMEM
331 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
332 					  int begin, int end)
333 {
334 	int i;
335 
336 	for (i = begin; i < end; i++)
337 		kfree(memcg_lrus->lru[i]);
338 }
339 
340 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
341 				      int begin, int end)
342 {
343 	int i;
344 
345 	for (i = begin; i < end; i++) {
346 		struct list_lru_one *l;
347 
348 		l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
349 		if (!l)
350 			goto fail;
351 
352 		init_one_lru(l);
353 		memcg_lrus->lru[i] = l;
354 	}
355 	return 0;
356 fail:
357 	__memcg_destroy_list_lru_node(memcg_lrus, begin, i);
358 	return -ENOMEM;
359 }
360 
361 static int memcg_init_list_lru_node(struct list_lru_node *nlru)
362 {
363 	struct list_lru_memcg *memcg_lrus;
364 	int size = memcg_nr_cache_ids;
365 
366 	memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
367 			      size * sizeof(void *), GFP_KERNEL);
368 	if (!memcg_lrus)
369 		return -ENOMEM;
370 
371 	if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
372 		kvfree(memcg_lrus);
373 		return -ENOMEM;
374 	}
375 	RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
376 
377 	return 0;
378 }
379 
380 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
381 {
382 	struct list_lru_memcg *memcg_lrus;
383 	/*
384 	 * This is called when shrinker has already been unregistered,
385 	 * and nobody can use it. So, there is no need to use kvfree_rcu().
386 	 */
387 	memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
388 	__memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
389 	kvfree(memcg_lrus);
390 }
391 
392 static void kvfree_rcu(struct rcu_head *head)
393 {
394 	struct list_lru_memcg *mlru;
395 
396 	mlru = container_of(head, struct list_lru_memcg, rcu);
397 	kvfree(mlru);
398 }
399 
400 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
401 				      int old_size, int new_size)
402 {
403 	struct list_lru_memcg *old, *new;
404 
405 	BUG_ON(old_size > new_size);
406 
407 	old = rcu_dereference_protected(nlru->memcg_lrus,
408 					lockdep_is_held(&list_lrus_mutex));
409 	new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
410 	if (!new)
411 		return -ENOMEM;
412 
413 	if (__memcg_init_list_lru_node(new, old_size, new_size)) {
414 		kvfree(new);
415 		return -ENOMEM;
416 	}
417 
418 	memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
419 
420 	/*
421 	 * The locking below allows readers that hold nlru->lock avoid taking
422 	 * rcu_read_lock (see list_lru_from_memcg_idx).
423 	 *
424 	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
425 	 * we have to use IRQ-safe primitives here to avoid deadlock.
426 	 */
427 	spin_lock_irq(&nlru->lock);
428 	rcu_assign_pointer(nlru->memcg_lrus, new);
429 	spin_unlock_irq(&nlru->lock);
430 
431 	call_rcu(&old->rcu, kvfree_rcu);
432 	return 0;
433 }
434 
435 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
436 					      int old_size, int new_size)
437 {
438 	struct list_lru_memcg *memcg_lrus;
439 
440 	memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
441 					       lockdep_is_held(&list_lrus_mutex));
442 	/* do not bother shrinking the array back to the old size, because we
443 	 * cannot handle allocation failures here */
444 	__memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
445 }
446 
447 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
448 {
449 	int i;
450 
451 	lru->memcg_aware = memcg_aware;
452 
453 	if (!memcg_aware)
454 		return 0;
455 
456 	for_each_node(i) {
457 		if (memcg_init_list_lru_node(&lru->node[i]))
458 			goto fail;
459 	}
460 	return 0;
461 fail:
462 	for (i = i - 1; i >= 0; i--) {
463 		if (!lru->node[i].memcg_lrus)
464 			continue;
465 		memcg_destroy_list_lru_node(&lru->node[i]);
466 	}
467 	return -ENOMEM;
468 }
469 
470 static void memcg_destroy_list_lru(struct list_lru *lru)
471 {
472 	int i;
473 
474 	if (!list_lru_memcg_aware(lru))
475 		return;
476 
477 	for_each_node(i)
478 		memcg_destroy_list_lru_node(&lru->node[i]);
479 }
480 
481 static int memcg_update_list_lru(struct list_lru *lru,
482 				 int old_size, int new_size)
483 {
484 	int i;
485 
486 	if (!list_lru_memcg_aware(lru))
487 		return 0;
488 
489 	for_each_node(i) {
490 		if (memcg_update_list_lru_node(&lru->node[i],
491 					       old_size, new_size))
492 			goto fail;
493 	}
494 	return 0;
495 fail:
496 	for (i = i - 1; i >= 0; i--) {
497 		if (!lru->node[i].memcg_lrus)
498 			continue;
499 
500 		memcg_cancel_update_list_lru_node(&lru->node[i],
501 						  old_size, new_size);
502 	}
503 	return -ENOMEM;
504 }
505 
506 static void memcg_cancel_update_list_lru(struct list_lru *lru,
507 					 int old_size, int new_size)
508 {
509 	int i;
510 
511 	if (!list_lru_memcg_aware(lru))
512 		return;
513 
514 	for_each_node(i)
515 		memcg_cancel_update_list_lru_node(&lru->node[i],
516 						  old_size, new_size);
517 }
518 
519 int memcg_update_all_list_lrus(int new_size)
520 {
521 	int ret = 0;
522 	struct list_lru *lru;
523 	int old_size = memcg_nr_cache_ids;
524 
525 	mutex_lock(&list_lrus_mutex);
526 	list_for_each_entry(lru, &list_lrus, list) {
527 		ret = memcg_update_list_lru(lru, old_size, new_size);
528 		if (ret)
529 			goto fail;
530 	}
531 out:
532 	mutex_unlock(&list_lrus_mutex);
533 	return ret;
534 fail:
535 	list_for_each_entry_continue_reverse(lru, &list_lrus, list)
536 		memcg_cancel_update_list_lru(lru, old_size, new_size);
537 	goto out;
538 }
539 
540 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
541 				      int src_idx, struct mem_cgroup *dst_memcg)
542 {
543 	struct list_lru_node *nlru = &lru->node[nid];
544 	int dst_idx = dst_memcg->kmemcg_id;
545 	struct list_lru_one *src, *dst;
546 	bool set;
547 
548 	/*
549 	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
550 	 * we have to use IRQ-safe primitives here to avoid deadlock.
551 	 */
552 	spin_lock_irq(&nlru->lock);
553 
554 	src = list_lru_from_memcg_idx(nlru, src_idx);
555 	dst = list_lru_from_memcg_idx(nlru, dst_idx);
556 
557 	list_splice_init(&src->list, &dst->list);
558 	set = (!dst->nr_items && src->nr_items);
559 	dst->nr_items += src->nr_items;
560 	if (set)
561 		memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
562 	src->nr_items = 0;
563 
564 	spin_unlock_irq(&nlru->lock);
565 }
566 
567 static void memcg_drain_list_lru(struct list_lru *lru,
568 				 int src_idx, struct mem_cgroup *dst_memcg)
569 {
570 	int i;
571 
572 	if (!list_lru_memcg_aware(lru))
573 		return;
574 
575 	for_each_node(i)
576 		memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
577 }
578 
579 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
580 {
581 	struct list_lru *lru;
582 
583 	mutex_lock(&list_lrus_mutex);
584 	list_for_each_entry(lru, &list_lrus, list)
585 		memcg_drain_list_lru(lru, src_idx, dst_memcg);
586 	mutex_unlock(&list_lrus_mutex);
587 }
588 #else
589 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
590 {
591 	return 0;
592 }
593 
594 static void memcg_destroy_list_lru(struct list_lru *lru)
595 {
596 }
597 #endif /* CONFIG_MEMCG_KMEM */
598 
599 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
600 		    struct lock_class_key *key, struct shrinker *shrinker)
601 {
602 	int i;
603 	int err = -ENOMEM;
604 
605 #ifdef CONFIG_MEMCG_KMEM
606 	if (shrinker)
607 		lru->shrinker_id = shrinker->id;
608 	else
609 		lru->shrinker_id = -1;
610 #endif
611 	memcg_get_cache_ids();
612 
613 	lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
614 	if (!lru->node)
615 		goto out;
616 
617 	for_each_node(i) {
618 		spin_lock_init(&lru->node[i].lock);
619 		if (key)
620 			lockdep_set_class(&lru->node[i].lock, key);
621 		init_one_lru(&lru->node[i].lru);
622 	}
623 
624 	err = memcg_init_list_lru(lru, memcg_aware);
625 	if (err) {
626 		kfree(lru->node);
627 		/* Do this so a list_lru_destroy() doesn't crash: */
628 		lru->node = NULL;
629 		goto out;
630 	}
631 
632 	list_lru_register(lru);
633 out:
634 	memcg_put_cache_ids();
635 	return err;
636 }
637 EXPORT_SYMBOL_GPL(__list_lru_init);
638 
639 void list_lru_destroy(struct list_lru *lru)
640 {
641 	/* Already destroyed or not yet initialized? */
642 	if (!lru->node)
643 		return;
644 
645 	memcg_get_cache_ids();
646 
647 	list_lru_unregister(lru);
648 
649 	memcg_destroy_list_lru(lru);
650 	kfree(lru->node);
651 	lru->node = NULL;
652 
653 #ifdef CONFIG_MEMCG_KMEM
654 	lru->shrinker_id = -1;
655 #endif
656 	memcg_put_cache_ids();
657 }
658 EXPORT_SYMBOL_GPL(list_lru_destroy);
659