1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * This file is part of UBIFS.
4  *
5  * Copyright (C) 2006-2008 Nokia Corporation.
6  *
7  * Authors: Artem Bityutskiy (Битюцкий Артём)
8  *          Adrian Hunter
9  */
10 
11 /*
12  * This file implements UBIFS shrinker which evicts clean znodes from the TNC
13  * tree when Linux VM needs more RAM.
14  *
15  * We do not implement any LRU lists to find oldest znodes to free because it
16  * would add additional overhead to the file system fast paths. So the shrinker
17  * just walks the TNC tree when searching for znodes to free.
18  *
19  * If the root of a TNC sub-tree is clean and old enough, then the children are
20  * also clean and old enough. So the shrinker walks the TNC in level order and
21  * dumps entire sub-trees.
22  *
23  * The age of znodes is just the time-stamp when they were last looked at.
24  * The current shrinker first tries to evict old znodes, then young ones.
25  *
26  * Since the shrinker is global, it has to protect against races with FS
27  * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
28  */
29 
30 #include "ubifs.h"
31 
32 /* List of all UBIFS file-system instances */
33 LIST_HEAD(ubifs_infos);
34 
35 /*
36  * We number each shrinker run and record the number on the ubifs_info structure
37  * so that we can easily work out which ubifs_info structures have already been
38  * done by the current run.
39  */
40 static unsigned int shrinker_run_no;
41 
42 /* Protects 'ubifs_infos' list */
43 DEFINE_SPINLOCK(ubifs_infos_lock);
44 
45 /* Global clean znode counter (for all mounted UBIFS instances) */
46 atomic_long_t ubifs_clean_zn_cnt;
47 
48 /**
49  * shrink_tnc - shrink TNC tree.
50  * @c: UBIFS file-system description object
51  * @nr: number of znodes to free
52  * @age: the age of znodes to free
53  * @contention: if any contention, this is set to %1
54  *
55  * This function traverses TNC tree and frees clean znodes. It does not free
56  * clean znodes which younger then @age. Returns number of freed znodes.
57  */
shrink_tnc(struct ubifs_info * c,int nr,int age,int * contention)58 static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
59 {
60 	int total_freed = 0;
61 	struct ubifs_znode *znode, *zprev;
62 	time64_t time = ktime_get_seconds();
63 
64 	ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
65 	ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
66 
67 	if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
68 		return 0;
69 
70 	/*
71 	 * Traverse the TNC tree in levelorder manner, so that it is possible
72 	 * to destroy large sub-trees. Indeed, if a znode is old, then all its
73 	 * children are older or of the same age.
74 	 *
75 	 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
76 	 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
77 	 * changed only when the 'c->tnc_mutex' is held.
78 	 */
79 	zprev = NULL;
80 	znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
81 	while (znode && total_freed < nr &&
82 	       atomic_long_read(&c->clean_zn_cnt) > 0) {
83 		int freed;
84 
85 		/*
86 		 * If the znode is clean, but it is in the 'c->cnext' list, this
87 		 * means that this znode has just been written to flash as a
88 		 * part of commit and was marked clean. They will be removed
89 		 * from the list at end commit. We cannot change the list,
90 		 * because it is not protected by any mutex (design decision to
91 		 * make commit really independent and parallel to main I/O). So
92 		 * we just skip these znodes.
93 		 *
94 		 * Note, the 'clean_zn_cnt' counters are not updated until
95 		 * after the commit, so the UBIFS shrinker does not report
96 		 * the znodes which are in the 'c->cnext' list as freeable.
97 		 *
98 		 * Also note, if the root of a sub-tree is not in 'c->cnext',
99 		 * then the whole sub-tree is not in 'c->cnext' as well, so it
100 		 * is safe to dump whole sub-tree.
101 		 */
102 
103 		if (znode->cnext) {
104 			/*
105 			 * Very soon these znodes will be removed from the list
106 			 * and become freeable.
107 			 */
108 			*contention = 1;
109 		} else if (!ubifs_zn_dirty(znode) &&
110 			   abs(time - znode->time) >= age) {
111 			if (znode->parent)
112 				znode->parent->zbranch[znode->iip].znode = NULL;
113 			else
114 				c->zroot.znode = NULL;
115 
116 			freed = ubifs_destroy_tnc_subtree(c, znode);
117 			atomic_long_sub(freed, &ubifs_clean_zn_cnt);
118 			atomic_long_sub(freed, &c->clean_zn_cnt);
119 			total_freed += freed;
120 			znode = zprev;
121 		}
122 
123 		if (unlikely(!c->zroot.znode))
124 			break;
125 
126 		zprev = znode;
127 		znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
128 		cond_resched();
129 	}
130 
131 	return total_freed;
132 }
133 
134 /**
135  * shrink_tnc_trees - shrink UBIFS TNC trees.
136  * @nr: number of znodes to free
137  * @age: the age of znodes to free
138  * @contention: if any contention, this is set to %1
139  *
140  * This function walks the list of mounted UBIFS file-systems and frees clean
141  * znodes which are older than @age, until at least @nr znodes are freed.
142  * Returns the number of freed znodes.
143  */
shrink_tnc_trees(int nr,int age,int * contention)144 static int shrink_tnc_trees(int nr, int age, int *contention)
145 {
146 	struct ubifs_info *c;
147 	struct list_head *p;
148 	unsigned int run_no;
149 	int freed = 0;
150 
151 	spin_lock(&ubifs_infos_lock);
152 	do {
153 		run_no = ++shrinker_run_no;
154 	} while (run_no == 0);
155 	/* Iterate over all mounted UBIFS file-systems and try to shrink them */
156 	p = ubifs_infos.next;
157 	while (p != &ubifs_infos) {
158 		c = list_entry(p, struct ubifs_info, infos_list);
159 		/*
160 		 * We move the ones we do to the end of the list, so we stop
161 		 * when we see one we have already done.
162 		 */
163 		if (c->shrinker_run_no == run_no)
164 			break;
165 		if (!mutex_trylock(&c->umount_mutex)) {
166 			/* Some un-mount is in progress, try next FS */
167 			*contention = 1;
168 			p = p->next;
169 			continue;
170 		}
171 		/*
172 		 * We're holding 'c->umount_mutex', so the file-system won't go
173 		 * away.
174 		 */
175 		if (!mutex_trylock(&c->tnc_mutex)) {
176 			mutex_unlock(&c->umount_mutex);
177 			*contention = 1;
178 			p = p->next;
179 			continue;
180 		}
181 		spin_unlock(&ubifs_infos_lock);
182 		/*
183 		 * OK, now we have TNC locked, the file-system cannot go away -
184 		 * it is safe to reap the cache.
185 		 */
186 		c->shrinker_run_no = run_no;
187 		freed += shrink_tnc(c, nr, age, contention);
188 		mutex_unlock(&c->tnc_mutex);
189 		spin_lock(&ubifs_infos_lock);
190 		/* Get the next list element before we move this one */
191 		p = p->next;
192 		/*
193 		 * Move this one to the end of the list to provide some
194 		 * fairness.
195 		 */
196 		list_move_tail(&c->infos_list, &ubifs_infos);
197 		mutex_unlock(&c->umount_mutex);
198 		if (freed >= nr)
199 			break;
200 	}
201 	spin_unlock(&ubifs_infos_lock);
202 	return freed;
203 }
204 
205 /**
206  * kick_a_thread - kick a background thread to start commit.
207  *
208  * This function kicks a background thread to start background commit. Returns
209  * %-1 if a thread was kicked or there is another reason to assume the memory
210  * will soon be freed or become freeable. If there are no dirty znodes, returns
211  * %0.
212  */
kick_a_thread(void)213 static int kick_a_thread(void)
214 {
215 	int i;
216 	struct ubifs_info *c;
217 
218 	/*
219 	 * Iterate over all mounted UBIFS file-systems and find out if there is
220 	 * already an ongoing commit operation there. If no, then iterate for
221 	 * the second time and initiate background commit.
222 	 */
223 	spin_lock(&ubifs_infos_lock);
224 	for (i = 0; i < 2; i++) {
225 		list_for_each_entry(c, &ubifs_infos, infos_list) {
226 			long dirty_zn_cnt;
227 
228 			if (!mutex_trylock(&c->umount_mutex)) {
229 				/*
230 				 * Some un-mount is in progress, it will
231 				 * certainly free memory, so just return.
232 				 */
233 				spin_unlock(&ubifs_infos_lock);
234 				return -1;
235 			}
236 
237 			dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
238 
239 			if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
240 			    c->ro_mount || c->ro_error) {
241 				mutex_unlock(&c->umount_mutex);
242 				continue;
243 			}
244 
245 			if (c->cmt_state != COMMIT_RESTING) {
246 				spin_unlock(&ubifs_infos_lock);
247 				mutex_unlock(&c->umount_mutex);
248 				return -1;
249 			}
250 
251 			if (i == 1) {
252 				list_move_tail(&c->infos_list, &ubifs_infos);
253 				spin_unlock(&ubifs_infos_lock);
254 
255 				ubifs_request_bg_commit(c);
256 				mutex_unlock(&c->umount_mutex);
257 				return -1;
258 			}
259 			mutex_unlock(&c->umount_mutex);
260 		}
261 	}
262 	spin_unlock(&ubifs_infos_lock);
263 
264 	return 0;
265 }
266 
ubifs_shrink_count(struct shrinker * shrink,struct shrink_control * sc)267 unsigned long ubifs_shrink_count(struct shrinker *shrink,
268 				 struct shrink_control *sc)
269 {
270 	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
271 
272 	/*
273 	 * Due to the way UBIFS updates the clean znode counter it may
274 	 * temporarily be negative.
275 	 */
276 	return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
277 }
278 
ubifs_shrink_scan(struct shrinker * shrink,struct shrink_control * sc)279 unsigned long ubifs_shrink_scan(struct shrinker *shrink,
280 				struct shrink_control *sc)
281 {
282 	unsigned long nr = sc->nr_to_scan;
283 	int contention = 0;
284 	unsigned long freed;
285 	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
286 
287 	if (!clean_zn_cnt) {
288 		/*
289 		 * No clean znodes, nothing to reap. All we can do in this case
290 		 * is to kick background threads to start commit, which will
291 		 * probably make clean znodes which, in turn, will be freeable.
292 		 * And we return -1 which means will make VM call us again
293 		 * later.
294 		 */
295 		dbg_tnc("no clean znodes, kick a thread");
296 		return kick_a_thread();
297 	}
298 
299 	freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
300 	if (freed >= nr)
301 		goto out;
302 
303 	dbg_tnc("not enough old znodes, try to free young ones");
304 	freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
305 	if (freed >= nr)
306 		goto out;
307 
308 	dbg_tnc("not enough young znodes, free all");
309 	freed += shrink_tnc_trees(nr - freed, 0, &contention);
310 
311 	if (!freed && contention) {
312 		dbg_tnc("freed nothing, but contention");
313 		return SHRINK_STOP;
314 	}
315 
316 out:
317 	dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
318 	return freed;
319 }
320