xref: /linux/fs/jffs2/gc.c (revision 52338415)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
6  *
7  * Created by David Woodhouse <dwmw2@infradead.org>
8  *
9  * For licensing information, see the file 'LICENCE' in this directory.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/kernel.h>
16 #include <linux/mtd/mtd.h>
17 #include <linux/slab.h>
18 #include <linux/pagemap.h>
19 #include <linux/crc32.h>
20 #include <linux/compiler.h>
21 #include <linux/stat.h>
22 #include "nodelist.h"
23 #include "compr.h"
24 
25 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
26 					  struct jffs2_inode_cache *ic,
27 					  struct jffs2_raw_node_ref *raw);
28 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
29 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
30 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
31 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
32 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
33 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
34 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
35 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
36 				      uint32_t start, uint32_t end);
37 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
38 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
39 				       uint32_t start, uint32_t end);
40 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
41 			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
42 
43 /* Called with erase_completion_lock held */
44 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
45 {
46 	struct jffs2_eraseblock *ret;
47 	struct list_head *nextlist = NULL;
48 	int n = jiffies % 128;
49 
50 	/* Pick an eraseblock to garbage collect next. This is where we'll
51 	   put the clever wear-levelling algorithms. Eventually.  */
52 	/* We possibly want to favour the dirtier blocks more when the
53 	   number of free blocks is low. */
54 again:
55 	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
56 		jffs2_dbg(1, "Picking block from bad_used_list to GC next\n");
57 		nextlist = &c->bad_used_list;
58 	} else if (n < 50 && !list_empty(&c->erasable_list)) {
59 		/* Note that most of them will have gone directly to be erased.
60 		   So don't favour the erasable_list _too_ much. */
61 		jffs2_dbg(1, "Picking block from erasable_list to GC next\n");
62 		nextlist = &c->erasable_list;
63 	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
64 		/* Most of the time, pick one off the very_dirty list */
65 		jffs2_dbg(1, "Picking block from very_dirty_list to GC next\n");
66 		nextlist = &c->very_dirty_list;
67 	} else if (n < 126 && !list_empty(&c->dirty_list)) {
68 		jffs2_dbg(1, "Picking block from dirty_list to GC next\n");
69 		nextlist = &c->dirty_list;
70 	} else if (!list_empty(&c->clean_list)) {
71 		jffs2_dbg(1, "Picking block from clean_list to GC next\n");
72 		nextlist = &c->clean_list;
73 	} else if (!list_empty(&c->dirty_list)) {
74 		jffs2_dbg(1, "Picking block from dirty_list to GC next (clean_list was empty)\n");
75 
76 		nextlist = &c->dirty_list;
77 	} else if (!list_empty(&c->very_dirty_list)) {
78 		jffs2_dbg(1, "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n");
79 		nextlist = &c->very_dirty_list;
80 	} else if (!list_empty(&c->erasable_list)) {
81 		jffs2_dbg(1, "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n");
82 
83 		nextlist = &c->erasable_list;
84 	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
85 		/* There are blocks are wating for the wbuf sync */
86 		jffs2_dbg(1, "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n");
87 		spin_unlock(&c->erase_completion_lock);
88 		jffs2_flush_wbuf_pad(c);
89 		spin_lock(&c->erase_completion_lock);
90 		goto again;
91 	} else {
92 		/* Eep. All were empty */
93 		jffs2_dbg(1, "No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n");
94 		return NULL;
95 	}
96 
97 	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
98 	list_del(&ret->list);
99 	c->gcblock = ret;
100 	ret->gc_node = ret->first_node;
101 	if (!ret->gc_node) {
102 		pr_warn("Eep. ret->gc_node for block at 0x%08x is NULL\n",
103 			ret->offset);
104 		BUG();
105 	}
106 
107 	/* Have we accidentally picked a clean block with wasted space ? */
108 	if (ret->wasted_size) {
109 		jffs2_dbg(1, "Converting wasted_size %08x to dirty_size\n",
110 			  ret->wasted_size);
111 		ret->dirty_size += ret->wasted_size;
112 		c->wasted_size -= ret->wasted_size;
113 		c->dirty_size += ret->wasted_size;
114 		ret->wasted_size = 0;
115 	}
116 
117 	return ret;
118 }
119 
120 /* jffs2_garbage_collect_pass
121  * Make a single attempt to progress GC. Move one node, and possibly
122  * start erasing one eraseblock.
123  */
124 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
125 {
126 	struct jffs2_inode_info *f;
127 	struct jffs2_inode_cache *ic;
128 	struct jffs2_eraseblock *jeb;
129 	struct jffs2_raw_node_ref *raw;
130 	uint32_t gcblock_dirty;
131 	int ret = 0, inum, nlink;
132 	int xattr = 0;
133 
134 	if (mutex_lock_interruptible(&c->alloc_sem))
135 		return -EINTR;
136 
137 
138 	for (;;) {
139 		/* We can't start doing GC until we've finished checking
140 		   the node CRCs etc. */
141 		int bucket, want_ino;
142 
143 		spin_lock(&c->erase_completion_lock);
144 		if (!c->unchecked_size)
145 			break;
146 		spin_unlock(&c->erase_completion_lock);
147 
148 		if (!xattr)
149 			xattr = jffs2_verify_xattr(c);
150 
151 		spin_lock(&c->inocache_lock);
152 		/* Instead of doing the inodes in numeric order, doing a lookup
153 		 * in the hash for each possible number, just walk the hash
154 		 * buckets of *existing* inodes. This means that we process
155 		 * them out-of-order, but it can be a lot faster if there's
156 		 * a sparse inode# space. Which there often is. */
157 		want_ino = c->check_ino;
158 		for (bucket = c->check_ino % c->inocache_hashsize ; bucket < c->inocache_hashsize; bucket++) {
159 			for (ic = c->inocache_list[bucket]; ic; ic = ic->next) {
160 				if (ic->ino < want_ino)
161 					continue;
162 
163 				if (ic->state != INO_STATE_CHECKEDABSENT &&
164 				    ic->state != INO_STATE_PRESENT)
165 					goto got_next; /* with inocache_lock held */
166 
167 				jffs2_dbg(1, "Skipping ino #%u already checked\n",
168 					  ic->ino);
169 			}
170 			want_ino = 0;
171 		}
172 
173 		/* Point c->check_ino past the end of the last bucket. */
174 		c->check_ino = ((c->highest_ino + c->inocache_hashsize + 1) &
175 				~c->inocache_hashsize) - 1;
176 
177 		spin_unlock(&c->inocache_lock);
178 
179 		pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
180 			c->unchecked_size);
181 		jffs2_dbg_dump_block_lists_nolock(c);
182 		mutex_unlock(&c->alloc_sem);
183 		return -ENOSPC;
184 
185 	got_next:
186 		/* For next time round the loop, we want c->checked_ino to indicate
187 		 * the *next* one we want to check. And since we're walking the
188 		 * buckets rather than doing it sequentially, it's: */
189 		c->check_ino = ic->ino + c->inocache_hashsize;
190 
191 		if (!ic->pino_nlink) {
192 			jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
193 				  ic->ino);
194 			spin_unlock(&c->inocache_lock);
195 			jffs2_xattr_delete_inode(c, ic);
196 			continue;
197 		}
198 		switch(ic->state) {
199 		case INO_STATE_CHECKEDABSENT:
200 		case INO_STATE_PRESENT:
201 			spin_unlock(&c->inocache_lock);
202 			continue;
203 
204 		case INO_STATE_GC:
205 		case INO_STATE_CHECKING:
206 			pr_warn("Inode #%u is in state %d during CRC check phase!\n",
207 				ic->ino, ic->state);
208 			spin_unlock(&c->inocache_lock);
209 			BUG();
210 
211 		case INO_STATE_READING:
212 			/* We need to wait for it to finish, lest we move on
213 			   and trigger the BUG() above while we haven't yet
214 			   finished checking all its nodes */
215 			jffs2_dbg(1, "Waiting for ino #%u to finish reading\n",
216 				  ic->ino);
217 			/* We need to come back again for the _same_ inode. We've
218 			 made no progress in this case, but that should be OK */
219 			c->check_ino = ic->ino;
220 
221 			mutex_unlock(&c->alloc_sem);
222 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
223 			return 0;
224 
225 		default:
226 			BUG();
227 
228 		case INO_STATE_UNCHECKED:
229 			;
230 		}
231 		ic->state = INO_STATE_CHECKING;
232 		spin_unlock(&c->inocache_lock);
233 
234 		jffs2_dbg(1, "%s(): triggering inode scan of ino#%u\n",
235 			  __func__, ic->ino);
236 
237 		ret = jffs2_do_crccheck_inode(c, ic);
238 		if (ret)
239 			pr_warn("Returned error for crccheck of ino #%u. Expect badness...\n",
240 				ic->ino);
241 
242 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
243 		mutex_unlock(&c->alloc_sem);
244 		return ret;
245 	}
246 
247 	/* If there are any blocks which need erasing, erase them now */
248 	if (!list_empty(&c->erase_complete_list) ||
249 	    !list_empty(&c->erase_pending_list)) {
250 		spin_unlock(&c->erase_completion_lock);
251 		mutex_unlock(&c->alloc_sem);
252 		jffs2_dbg(1, "%s(): erasing pending blocks\n", __func__);
253 		if (jffs2_erase_pending_blocks(c, 1))
254 			return 0;
255 
256 		jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n");
257 		mutex_lock(&c->alloc_sem);
258 		spin_lock(&c->erase_completion_lock);
259 	}
260 
261 	/* First, work out which block we're garbage-collecting */
262 	jeb = c->gcblock;
263 
264 	if (!jeb)
265 		jeb = jffs2_find_gc_block(c);
266 
267 	if (!jeb) {
268 		/* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
269 		if (c->nr_erasing_blocks) {
270 			spin_unlock(&c->erase_completion_lock);
271 			mutex_unlock(&c->alloc_sem);
272 			return -EAGAIN;
273 		}
274 		jffs2_dbg(1, "Couldn't find erase block to garbage collect!\n");
275 		spin_unlock(&c->erase_completion_lock);
276 		mutex_unlock(&c->alloc_sem);
277 		return -EIO;
278 	}
279 
280 	jffs2_dbg(1, "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n",
281 		  jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size);
282 	D1(if (c->nextblock)
283 	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
284 
285 	if (!jeb->used_size) {
286 		mutex_unlock(&c->alloc_sem);
287 		goto eraseit;
288 	}
289 
290 	raw = jeb->gc_node;
291 	gcblock_dirty = jeb->dirty_size;
292 
293 	while(ref_obsolete(raw)) {
294 		jffs2_dbg(1, "Node at 0x%08x is obsolete... skipping\n",
295 			  ref_offset(raw));
296 		raw = ref_next(raw);
297 		if (unlikely(!raw)) {
298 			pr_warn("eep. End of raw list while still supposedly nodes to GC\n");
299 			pr_warn("erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
300 				jeb->offset, jeb->free_size,
301 				jeb->dirty_size, jeb->used_size);
302 			jeb->gc_node = raw;
303 			spin_unlock(&c->erase_completion_lock);
304 			mutex_unlock(&c->alloc_sem);
305 			BUG();
306 		}
307 	}
308 	jeb->gc_node = raw;
309 
310 	jffs2_dbg(1, "Going to garbage collect node at 0x%08x\n",
311 		  ref_offset(raw));
312 
313 	if (!raw->next_in_ino) {
314 		/* Inode-less node. Clean marker, snapshot or something like that */
315 		spin_unlock(&c->erase_completion_lock);
316 		if (ref_flags(raw) == REF_PRISTINE) {
317 			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
318 			jffs2_garbage_collect_pristine(c, NULL, raw);
319 		} else {
320 			/* Just mark it obsolete */
321 			jffs2_mark_node_obsolete(c, raw);
322 		}
323 		mutex_unlock(&c->alloc_sem);
324 		goto eraseit_lock;
325 	}
326 
327 	ic = jffs2_raw_ref_to_ic(raw);
328 
329 #ifdef CONFIG_JFFS2_FS_XATTR
330 	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
331 	 * We can decide whether this node is inode or xattr by ic->class.     */
332 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
333 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
334 		spin_unlock(&c->erase_completion_lock);
335 
336 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
337 			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
338 		} else {
339 			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
340 		}
341 		goto test_gcnode;
342 	}
343 #endif
344 
345 	/* We need to hold the inocache. Either the erase_completion_lock or
346 	   the inocache_lock are sufficient; we trade down since the inocache_lock
347 	   causes less contention. */
348 	spin_lock(&c->inocache_lock);
349 
350 	spin_unlock(&c->erase_completion_lock);
351 
352 	jffs2_dbg(1, "%s(): collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n",
353 		  __func__, jeb->offset, ref_offset(raw), ref_flags(raw),
354 		  ic->ino);
355 
356 	/* Three possibilities:
357 	   1. Inode is already in-core. We must iget it and do proper
358 	      updating to its fragtree, etc.
359 	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
360 	      inocache to prevent a read_inode(), copy the node intact.
361 	   3. Inode is not in-core, node is not pristine. We must iget()
362 	      and take the slow path.
363 	*/
364 
365 	switch(ic->state) {
366 	case INO_STATE_CHECKEDABSENT:
367 		/* It's been checked, but it's not currently in-core.
368 		   We can just copy any pristine nodes, but have
369 		   to prevent anyone else from doing read_inode() while
370 		   we're at it, so we set the state accordingly */
371 		if (ref_flags(raw) == REF_PRISTINE)
372 			ic->state = INO_STATE_GC;
373 		else {
374 			jffs2_dbg(1, "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
375 				  ic->ino);
376 		}
377 		break;
378 
379 	case INO_STATE_PRESENT:
380 		/* It's in-core. GC must iget() it. */
381 		break;
382 
383 	case INO_STATE_UNCHECKED:
384 	case INO_STATE_CHECKING:
385 	case INO_STATE_GC:
386 		/* Should never happen. We should have finished checking
387 		   by the time we actually start doing any GC, and since
388 		   we're holding the alloc_sem, no other garbage collection
389 		   can happen.
390 		*/
391 		pr_crit("Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
392 			ic->ino, ic->state);
393 		mutex_unlock(&c->alloc_sem);
394 		spin_unlock(&c->inocache_lock);
395 		BUG();
396 
397 	case INO_STATE_READING:
398 		/* Someone's currently trying to read it. We must wait for
399 		   them to finish and then go through the full iget() route
400 		   to do the GC. However, sometimes read_inode() needs to get
401 		   the alloc_sem() (for marking nodes invalid) so we must
402 		   drop the alloc_sem before sleeping. */
403 
404 		mutex_unlock(&c->alloc_sem);
405 		jffs2_dbg(1, "%s(): waiting for ino #%u in state %d\n",
406 			  __func__, ic->ino, ic->state);
407 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
408 		/* And because we dropped the alloc_sem we must start again from the
409 		   beginning. Ponder chance of livelock here -- we're returning success
410 		   without actually making any progress.
411 
412 		   Q: What are the chances that the inode is back in INO_STATE_READING
413 		   again by the time we next enter this function? And that this happens
414 		   enough times to cause a real delay?
415 
416 		   A: Small enough that I don't care :)
417 		*/
418 		return 0;
419 	}
420 
421 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
422 	   node intact, and we don't have to muck about with the fragtree etc.
423 	   because we know it's not in-core. If it _was_ in-core, we go through
424 	   all the iget() crap anyway */
425 
426 	if (ic->state == INO_STATE_GC) {
427 		spin_unlock(&c->inocache_lock);
428 
429 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
430 
431 		spin_lock(&c->inocache_lock);
432 		ic->state = INO_STATE_CHECKEDABSENT;
433 		wake_up(&c->inocache_wq);
434 
435 		if (ret != -EBADFD) {
436 			spin_unlock(&c->inocache_lock);
437 			goto test_gcnode;
438 		}
439 
440 		/* Fall through if it wanted us to, with inocache_lock held */
441 	}
442 
443 	/* Prevent the fairly unlikely race where the gcblock is
444 	   entirely obsoleted by the final close of a file which had
445 	   the only valid nodes in the block, followed by erasure,
446 	   followed by freeing of the ic because the erased block(s)
447 	   held _all_ the nodes of that inode.... never been seen but
448 	   it's vaguely possible. */
449 
450 	inum = ic->ino;
451 	nlink = ic->pino_nlink;
452 	spin_unlock(&c->inocache_lock);
453 
454 	f = jffs2_gc_fetch_inode(c, inum, !nlink);
455 	if (IS_ERR(f)) {
456 		ret = PTR_ERR(f);
457 		goto release_sem;
458 	}
459 	if (!f) {
460 		ret = 0;
461 		goto release_sem;
462 	}
463 
464 	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
465 
466 	jffs2_gc_release_inode(c, f);
467 
468  test_gcnode:
469 	if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
470 		/* Eep. This really should never happen. GC is broken */
471 		pr_err("Error garbage collecting node at %08x!\n",
472 		       ref_offset(jeb->gc_node));
473 		ret = -ENOSPC;
474 	}
475  release_sem:
476 	mutex_unlock(&c->alloc_sem);
477 
478  eraseit_lock:
479 	/* If we've finished this block, start it erasing */
480 	spin_lock(&c->erase_completion_lock);
481 
482  eraseit:
483 	if (c->gcblock && !c->gcblock->used_size) {
484 		jffs2_dbg(1, "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n",
485 			  c->gcblock->offset);
486 		/* We're GC'ing an empty block? */
487 		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
488 		c->gcblock = NULL;
489 		c->nr_erasing_blocks++;
490 		jffs2_garbage_collect_trigger(c);
491 	}
492 	spin_unlock(&c->erase_completion_lock);
493 
494 	return ret;
495 }
496 
497 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
498 				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
499 {
500 	struct jffs2_node_frag *frag;
501 	struct jffs2_full_dnode *fn = NULL;
502 	struct jffs2_full_dirent *fd;
503 	uint32_t start = 0, end = 0, nrfrags = 0;
504 	int ret = 0;
505 
506 	mutex_lock(&f->sem);
507 
508 	/* Now we have the lock for this inode. Check that it's still the one at the head
509 	   of the list. */
510 
511 	spin_lock(&c->erase_completion_lock);
512 
513 	if (c->gcblock != jeb) {
514 		spin_unlock(&c->erase_completion_lock);
515 		jffs2_dbg(1, "GC block is no longer gcblock. Restart\n");
516 		goto upnout;
517 	}
518 	if (ref_obsolete(raw)) {
519 		spin_unlock(&c->erase_completion_lock);
520 		jffs2_dbg(1, "node to be GC'd was obsoleted in the meantime.\n");
521 		/* They'll call again */
522 		goto upnout;
523 	}
524 	spin_unlock(&c->erase_completion_lock);
525 
526 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
527 	if (f->metadata && f->metadata->raw == raw) {
528 		fn = f->metadata;
529 		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
530 		goto upnout;
531 	}
532 
533 	/* FIXME. Read node and do lookup? */
534 	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
535 		if (frag->node && frag->node->raw == raw) {
536 			fn = frag->node;
537 			end = frag->ofs + frag->size;
538 			if (!nrfrags++)
539 				start = frag->ofs;
540 			if (nrfrags == frag->node->frags)
541 				break; /* We've found them all */
542 		}
543 	}
544 	if (fn) {
545 		if (ref_flags(raw) == REF_PRISTINE) {
546 			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
547 			if (!ret) {
548 				/* Urgh. Return it sensibly. */
549 				frag->node->raw = f->inocache->nodes;
550 			}
551 			if (ret != -EBADFD)
552 				goto upnout;
553 		}
554 		/* We found a datanode. Do the GC */
555 		if((start >> PAGE_SHIFT) < ((end-1) >> PAGE_SHIFT)) {
556 			/* It crosses a page boundary. Therefore, it must be a hole. */
557 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
558 		} else {
559 			/* It could still be a hole. But we GC the page this way anyway */
560 			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
561 		}
562 		goto upnout;
563 	}
564 
565 	/* Wasn't a dnode. Try dirent */
566 	for (fd = f->dents; fd; fd=fd->next) {
567 		if (fd->raw == raw)
568 			break;
569 	}
570 
571 	if (fd && fd->ino) {
572 		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
573 	} else if (fd) {
574 		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
575 	} else {
576 		pr_warn("Raw node at 0x%08x wasn't in node lists for ino #%u\n",
577 			ref_offset(raw), f->inocache->ino);
578 		if (ref_obsolete(raw)) {
579 			pr_warn("But it's obsolete so we don't mind too much\n");
580 		} else {
581 			jffs2_dbg_dump_node(c, ref_offset(raw));
582 			BUG();
583 		}
584 	}
585  upnout:
586 	mutex_unlock(&f->sem);
587 
588 	return ret;
589 }
590 
591 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
592 					  struct jffs2_inode_cache *ic,
593 					  struct jffs2_raw_node_ref *raw)
594 {
595 	union jffs2_node_union *node;
596 	size_t retlen;
597 	int ret;
598 	uint32_t phys_ofs, alloclen;
599 	uint32_t crc, rawlen;
600 	int retried = 0;
601 
602 	jffs2_dbg(1, "Going to GC REF_PRISTINE node at 0x%08x\n",
603 		  ref_offset(raw));
604 
605 	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
606 
607 	/* Ask for a small amount of space (or the totlen if smaller) because we
608 	   don't want to force wastage of the end of a block if splitting would
609 	   work. */
610 	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
611 		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
612 
613 	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
614 	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
615 
616 	if (ret)
617 		return ret;
618 
619 	if (alloclen < rawlen) {
620 		/* Doesn't fit untouched. We'll go the old route and split it */
621 		return -EBADFD;
622 	}
623 
624 	node = kmalloc(rawlen, GFP_KERNEL);
625 	if (!node)
626 		return -ENOMEM;
627 
628 	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
629 	if (!ret && retlen != rawlen)
630 		ret = -EIO;
631 	if (ret)
632 		goto out_node;
633 
634 	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
635 	if (je32_to_cpu(node->u.hdr_crc) != crc) {
636 		pr_warn("Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
637 			ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
638 		goto bail;
639 	}
640 
641 	switch(je16_to_cpu(node->u.nodetype)) {
642 	case JFFS2_NODETYPE_INODE:
643 		crc = crc32(0, node, sizeof(node->i)-8);
644 		if (je32_to_cpu(node->i.node_crc) != crc) {
645 			pr_warn("Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
646 				ref_offset(raw), je32_to_cpu(node->i.node_crc),
647 				crc);
648 			goto bail;
649 		}
650 
651 		if (je32_to_cpu(node->i.dsize)) {
652 			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
653 			if (je32_to_cpu(node->i.data_crc) != crc) {
654 				pr_warn("Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
655 					ref_offset(raw),
656 					je32_to_cpu(node->i.data_crc), crc);
657 				goto bail;
658 			}
659 		}
660 		break;
661 
662 	case JFFS2_NODETYPE_DIRENT:
663 		crc = crc32(0, node, sizeof(node->d)-8);
664 		if (je32_to_cpu(node->d.node_crc) != crc) {
665 			pr_warn("Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
666 				ref_offset(raw),
667 				je32_to_cpu(node->d.node_crc), crc);
668 			goto bail;
669 		}
670 
671 		if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
672 			pr_warn("Name in dirent node at 0x%08x contains zeroes\n",
673 				ref_offset(raw));
674 			goto bail;
675 		}
676 
677 		if (node->d.nsize) {
678 			crc = crc32(0, node->d.name, node->d.nsize);
679 			if (je32_to_cpu(node->d.name_crc) != crc) {
680 				pr_warn("Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
681 					ref_offset(raw),
682 					je32_to_cpu(node->d.name_crc), crc);
683 				goto bail;
684 			}
685 		}
686 		break;
687 	default:
688 		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
689 		if (ic) {
690 			pr_warn("Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
691 				ref_offset(raw), je16_to_cpu(node->u.nodetype));
692 			goto bail;
693 		}
694 	}
695 
696 	/* OK, all the CRCs are good; this node can just be copied as-is. */
697  retry:
698 	phys_ofs = write_ofs(c);
699 
700 	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
701 
702 	if (ret || (retlen != rawlen)) {
703 		pr_notice("Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
704 			  rawlen, phys_ofs, ret, retlen);
705 		if (retlen) {
706 			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
707 		} else {
708 			pr_notice("Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n",
709 				  phys_ofs);
710 		}
711 		if (!retried) {
712 			/* Try to reallocate space and retry */
713 			uint32_t dummy;
714 			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
715 
716 			retried = 1;
717 
718 			jffs2_dbg(1, "Retrying failed write of REF_PRISTINE node.\n");
719 
720 			jffs2_dbg_acct_sanity_check(c,jeb);
721 			jffs2_dbg_acct_paranoia_check(c, jeb);
722 
723 			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
724 						/* this is not the exact summary size of it,
725 							it is only an upper estimation */
726 
727 			if (!ret) {
728 				jffs2_dbg(1, "Allocated space at 0x%08x to retry failed write.\n",
729 					  phys_ofs);
730 
731 				jffs2_dbg_acct_sanity_check(c,jeb);
732 				jffs2_dbg_acct_paranoia_check(c, jeb);
733 
734 				goto retry;
735 			}
736 			jffs2_dbg(1, "Failed to allocate space to retry failed write: %d!\n",
737 				  ret);
738 		}
739 
740 		if (!ret)
741 			ret = -EIO;
742 		goto out_node;
743 	}
744 	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
745 
746 	jffs2_mark_node_obsolete(c, raw);
747 	jffs2_dbg(1, "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n",
748 		  ref_offset(raw));
749 
750  out_node:
751 	kfree(node);
752 	return ret;
753  bail:
754 	ret = -EBADFD;
755 	goto out_node;
756 }
757 
758 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
759 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
760 {
761 	struct jffs2_full_dnode *new_fn;
762 	struct jffs2_raw_inode ri;
763 	struct jffs2_node_frag *last_frag;
764 	union jffs2_device_node dev;
765 	char *mdata = NULL;
766 	int mdatalen = 0;
767 	uint32_t alloclen, ilen;
768 	int ret;
769 
770 	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
771 	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
772 		/* For these, we don't actually need to read the old node */
773 		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
774 		mdata = (char *)&dev;
775 		jffs2_dbg(1, "%s(): Writing %d bytes of kdev_t\n",
776 			  __func__, mdatalen);
777 	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
778 		mdatalen = fn->size;
779 		mdata = kmalloc(fn->size, GFP_KERNEL);
780 		if (!mdata) {
781 			pr_warn("kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
782 			return -ENOMEM;
783 		}
784 		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
785 		if (ret) {
786 			pr_warn("read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n",
787 				ret);
788 			kfree(mdata);
789 			return ret;
790 		}
791 		jffs2_dbg(1, "%s(): Writing %d bites of symlink target\n",
792 			  __func__, mdatalen);
793 
794 	}
795 
796 	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
797 				JFFS2_SUMMARY_INODE_SIZE);
798 	if (ret) {
799 		pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
800 			sizeof(ri) + mdatalen, ret);
801 		goto out;
802 	}
803 
804 	last_frag = frag_last(&f->fragtree);
805 	if (last_frag)
806 		/* Fetch the inode length from the fragtree rather then
807 		 * from i_size since i_size may have not been updated yet */
808 		ilen = last_frag->ofs + last_frag->size;
809 	else
810 		ilen = JFFS2_F_I_SIZE(f);
811 
812 	memset(&ri, 0, sizeof(ri));
813 	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
814 	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
815 	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
816 	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
817 
818 	ri.ino = cpu_to_je32(f->inocache->ino);
819 	ri.version = cpu_to_je32(++f->highest_version);
820 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
821 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
822 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
823 	ri.isize = cpu_to_je32(ilen);
824 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
825 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
826 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
827 	ri.offset = cpu_to_je32(0);
828 	ri.csize = cpu_to_je32(mdatalen);
829 	ri.dsize = cpu_to_je32(mdatalen);
830 	ri.compr = JFFS2_COMPR_NONE;
831 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
832 	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
833 
834 	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
835 
836 	if (IS_ERR(new_fn)) {
837 		pr_warn("Error writing new dnode: %ld\n", PTR_ERR(new_fn));
838 		ret = PTR_ERR(new_fn);
839 		goto out;
840 	}
841 	jffs2_mark_node_obsolete(c, fn->raw);
842 	jffs2_free_full_dnode(fn);
843 	f->metadata = new_fn;
844  out:
845 	if (S_ISLNK(JFFS2_F_I_MODE(f)))
846 		kfree(mdata);
847 	return ret;
848 }
849 
850 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
851 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
852 {
853 	struct jffs2_full_dirent *new_fd;
854 	struct jffs2_raw_dirent rd;
855 	uint32_t alloclen;
856 	int ret;
857 
858 	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
859 	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
860 	rd.nsize = strlen(fd->name);
861 	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
862 	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
863 
864 	rd.pino = cpu_to_je32(f->inocache->ino);
865 	rd.version = cpu_to_je32(++f->highest_version);
866 	rd.ino = cpu_to_je32(fd->ino);
867 	/* If the times on this inode were set by explicit utime() they can be different,
868 	   so refrain from splatting them. */
869 	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
870 		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
871 	else
872 		rd.mctime = cpu_to_je32(0);
873 	rd.type = fd->type;
874 	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
875 	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
876 
877 	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
878 				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
879 	if (ret) {
880 		pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
881 			sizeof(rd)+rd.nsize, ret);
882 		return ret;
883 	}
884 	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
885 
886 	if (IS_ERR(new_fd)) {
887 		pr_warn("jffs2_write_dirent in garbage_collect_dirent failed: %ld\n",
888 			PTR_ERR(new_fd));
889 		return PTR_ERR(new_fd);
890 	}
891 	jffs2_add_fd_to_list(c, new_fd, &f->dents);
892 	return 0;
893 }
894 
895 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
896 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
897 {
898 	struct jffs2_full_dirent **fdp = &f->dents;
899 	int found = 0;
900 
901 	/* On a medium where we can't actually mark nodes obsolete
902 	   pernamently, such as NAND flash, we need to work out
903 	   whether this deletion dirent is still needed to actively
904 	   delete a 'real' dirent with the same name that's still
905 	   somewhere else on the flash. */
906 	if (!jffs2_can_mark_obsolete(c)) {
907 		struct jffs2_raw_dirent *rd;
908 		struct jffs2_raw_node_ref *raw;
909 		int ret;
910 		size_t retlen;
911 		int name_len = strlen(fd->name);
912 		uint32_t name_crc = crc32(0, fd->name, name_len);
913 		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
914 
915 		rd = kmalloc(rawlen, GFP_KERNEL);
916 		if (!rd)
917 			return -ENOMEM;
918 
919 		/* Prevent the erase code from nicking the obsolete node refs while
920 		   we're looking at them. I really don't like this extra lock but
921 		   can't see any alternative. Suggestions on a postcard to... */
922 		mutex_lock(&c->erase_free_sem);
923 
924 		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
925 
926 			cond_resched();
927 
928 			/* We only care about obsolete ones */
929 			if (!(ref_obsolete(raw)))
930 				continue;
931 
932 			/* Any dirent with the same name is going to have the same length... */
933 			if (ref_totlen(c, NULL, raw) != rawlen)
934 				continue;
935 
936 			/* Doesn't matter if there's one in the same erase block. We're going to
937 			   delete it too at the same time. */
938 			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
939 				continue;
940 
941 			jffs2_dbg(1, "Check potential deletion dirent at %08x\n",
942 				  ref_offset(raw));
943 
944 			/* This is an obsolete node belonging to the same directory, and it's of the right
945 			   length. We need to take a closer look...*/
946 			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
947 			if (ret) {
948 				pr_warn("%s(): Read error (%d) reading obsolete node at %08x\n",
949 					__func__, ret, ref_offset(raw));
950 				/* If we can't read it, we don't need to continue to obsolete it. Continue */
951 				continue;
952 			}
953 			if (retlen != rawlen) {
954 				pr_warn("%s(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
955 					__func__, retlen, rawlen,
956 					ref_offset(raw));
957 				continue;
958 			}
959 
960 			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
961 				continue;
962 
963 			/* If the name CRC doesn't match, skip */
964 			if (je32_to_cpu(rd->name_crc) != name_crc)
965 				continue;
966 
967 			/* If the name length doesn't match, or it's another deletion dirent, skip */
968 			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
969 				continue;
970 
971 			/* OK, check the actual name now */
972 			if (memcmp(rd->name, fd->name, name_len))
973 				continue;
974 
975 			/* OK. The name really does match. There really is still an older node on
976 			   the flash which our deletion dirent obsoletes. So we have to write out
977 			   a new deletion dirent to replace it */
978 			mutex_unlock(&c->erase_free_sem);
979 
980 			jffs2_dbg(1, "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
981 				  ref_offset(fd->raw), fd->name,
982 				  ref_offset(raw), je32_to_cpu(rd->ino));
983 			kfree(rd);
984 
985 			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
986 		}
987 
988 		mutex_unlock(&c->erase_free_sem);
989 		kfree(rd);
990 	}
991 
992 	/* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
993 	   we should update the metadata node with those times accordingly */
994 
995 	/* No need for it any more. Just mark it obsolete and remove it from the list */
996 	while (*fdp) {
997 		if ((*fdp) == fd) {
998 			found = 1;
999 			*fdp = fd->next;
1000 			break;
1001 		}
1002 		fdp = &(*fdp)->next;
1003 	}
1004 	if (!found) {
1005 		pr_warn("Deletion dirent \"%s\" not found in list for ino #%u\n",
1006 			fd->name, f->inocache->ino);
1007 	}
1008 	jffs2_mark_node_obsolete(c, fd->raw);
1009 	jffs2_free_full_dirent(fd);
1010 	return 0;
1011 }
1012 
1013 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1014 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1015 				      uint32_t start, uint32_t end)
1016 {
1017 	struct jffs2_raw_inode ri;
1018 	struct jffs2_node_frag *frag;
1019 	struct jffs2_full_dnode *new_fn;
1020 	uint32_t alloclen, ilen;
1021 	int ret;
1022 
1023 	jffs2_dbg(1, "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
1024 		  f->inocache->ino, start, end);
1025 
1026 	memset(&ri, 0, sizeof(ri));
1027 
1028 	if(fn->frags > 1) {
1029 		size_t readlen;
1030 		uint32_t crc;
1031 		/* It's partially obsoleted by a later write. So we have to
1032 		   write it out again with the _same_ version as before */
1033 		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
1034 		if (readlen != sizeof(ri) || ret) {
1035 			pr_warn("Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n",
1036 				ret, readlen);
1037 			goto fill;
1038 		}
1039 		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
1040 			pr_warn("%s(): Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
1041 				__func__, ref_offset(fn->raw),
1042 				je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
1043 			return -EIO;
1044 		}
1045 		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
1046 			pr_warn("%s(): Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
1047 				__func__, ref_offset(fn->raw),
1048 				je32_to_cpu(ri.totlen), sizeof(ri));
1049 			return -EIO;
1050 		}
1051 		crc = crc32(0, &ri, sizeof(ri)-8);
1052 		if (crc != je32_to_cpu(ri.node_crc)) {
1053 			pr_warn("%s: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
1054 				__func__, ref_offset(fn->raw),
1055 				je32_to_cpu(ri.node_crc), crc);
1056 			/* FIXME: We could possibly deal with this by writing new holes for each frag */
1057 			pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1058 				start, end, f->inocache->ino);
1059 			goto fill;
1060 		}
1061 		if (ri.compr != JFFS2_COMPR_ZERO) {
1062 			pr_warn("%s(): Node 0x%08x wasn't a hole node!\n",
1063 				__func__, ref_offset(fn->raw));
1064 			pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1065 				start, end, f->inocache->ino);
1066 			goto fill;
1067 		}
1068 	} else {
1069 	fill:
1070 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1071 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1072 		ri.totlen = cpu_to_je32(sizeof(ri));
1073 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1074 
1075 		ri.ino = cpu_to_je32(f->inocache->ino);
1076 		ri.version = cpu_to_je32(++f->highest_version);
1077 		ri.offset = cpu_to_je32(start);
1078 		ri.dsize = cpu_to_je32(end - start);
1079 		ri.csize = cpu_to_je32(0);
1080 		ri.compr = JFFS2_COMPR_ZERO;
1081 	}
1082 
1083 	frag = frag_last(&f->fragtree);
1084 	if (frag)
1085 		/* Fetch the inode length from the fragtree rather then
1086 		 * from i_size since i_size may have not been updated yet */
1087 		ilen = frag->ofs + frag->size;
1088 	else
1089 		ilen = JFFS2_F_I_SIZE(f);
1090 
1091 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1092 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1093 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1094 	ri.isize = cpu_to_je32(ilen);
1095 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1096 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1097 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1098 	ri.data_crc = cpu_to_je32(0);
1099 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1100 
1101 	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1102 				     JFFS2_SUMMARY_INODE_SIZE);
1103 	if (ret) {
1104 		pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1105 			sizeof(ri), ret);
1106 		return ret;
1107 	}
1108 	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1109 
1110 	if (IS_ERR(new_fn)) {
1111 		pr_warn("Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1112 		return PTR_ERR(new_fn);
1113 	}
1114 	if (je32_to_cpu(ri.version) == f->highest_version) {
1115 		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1116 		if (f->metadata) {
1117 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1118 			jffs2_free_full_dnode(f->metadata);
1119 			f->metadata = NULL;
1120 		}
1121 		return 0;
1122 	}
1123 
1124 	/*
1125 	 * We should only get here in the case where the node we are
1126 	 * replacing had more than one frag, so we kept the same version
1127 	 * number as before. (Except in case of error -- see 'goto fill;'
1128 	 * above.)
1129 	 */
1130 	D1(if(unlikely(fn->frags <= 1)) {
1131 			pr_warn("%s(): Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1132 				__func__, fn->frags, je32_to_cpu(ri.version),
1133 				f->highest_version, je32_to_cpu(ri.ino));
1134 	});
1135 
1136 	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1137 	mark_ref_normal(new_fn->raw);
1138 
1139 	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1140 	     frag; frag = frag_next(frag)) {
1141 		if (frag->ofs > fn->size + fn->ofs)
1142 			break;
1143 		if (frag->node == fn) {
1144 			frag->node = new_fn;
1145 			new_fn->frags++;
1146 			fn->frags--;
1147 		}
1148 	}
1149 	if (fn->frags) {
1150 		pr_warn("%s(): Old node still has frags!\n", __func__);
1151 		BUG();
1152 	}
1153 	if (!new_fn->frags) {
1154 		pr_warn("%s(): New node has no frags!\n", __func__);
1155 		BUG();
1156 	}
1157 
1158 	jffs2_mark_node_obsolete(c, fn->raw);
1159 	jffs2_free_full_dnode(fn);
1160 
1161 	return 0;
1162 }
1163 
1164 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1165 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1166 				       uint32_t start, uint32_t end)
1167 {
1168 	struct inode *inode = OFNI_EDONI_2SFFJ(f);
1169 	struct jffs2_full_dnode *new_fn;
1170 	struct jffs2_raw_inode ri;
1171 	uint32_t alloclen, offset, orig_end, orig_start;
1172 	int ret = 0;
1173 	unsigned char *comprbuf = NULL, *writebuf;
1174 	struct page *page;
1175 	unsigned char *pg_ptr;
1176 
1177 	memset(&ri, 0, sizeof(ri));
1178 
1179 	jffs2_dbg(1, "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1180 		  f->inocache->ino, start, end);
1181 
1182 	orig_end = end;
1183 	orig_start = start;
1184 
1185 	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1186 		/* Attempt to do some merging. But only expand to cover logically
1187 		   adjacent frags if the block containing them is already considered
1188 		   to be dirty. Otherwise we end up with GC just going round in
1189 		   circles dirtying the nodes it already wrote out, especially
1190 		   on NAND where we have small eraseblocks and hence a much higher
1191 		   chance of nodes having to be split to cross boundaries. */
1192 
1193 		struct jffs2_node_frag *frag;
1194 		uint32_t min, max;
1195 
1196 		min = start & ~(PAGE_SIZE-1);
1197 		max = min + PAGE_SIZE;
1198 
1199 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1200 
1201 		/* BUG_ON(!frag) but that'll happen anyway... */
1202 
1203 		BUG_ON(frag->ofs != start);
1204 
1205 		/* First grow down... */
1206 		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1207 
1208 			/* If the previous frag doesn't even reach the beginning, there's
1209 			   excessive fragmentation. Just merge. */
1210 			if (frag->ofs > min) {
1211 				jffs2_dbg(1, "Expanding down to cover partial frag (0x%x-0x%x)\n",
1212 					  frag->ofs, frag->ofs+frag->size);
1213 				start = frag->ofs;
1214 				continue;
1215 			}
1216 			/* OK. This frag holds the first byte of the page. */
1217 			if (!frag->node || !frag->node->raw) {
1218 				jffs2_dbg(1, "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1219 					  frag->ofs, frag->ofs+frag->size);
1220 				break;
1221 			} else {
1222 
1223 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1224 				   in a block which is still considered clean? If so, don't obsolete it.
1225 				   If not, cover it anyway. */
1226 
1227 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1228 				struct jffs2_eraseblock *jeb;
1229 
1230 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1231 
1232 				if (jeb == c->gcblock) {
1233 					jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1234 						  frag->ofs,
1235 						  frag->ofs + frag->size,
1236 						  ref_offset(raw));
1237 					start = frag->ofs;
1238 					break;
1239 				}
1240 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1241 					jffs2_dbg(1, "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1242 						  frag->ofs,
1243 						  frag->ofs + frag->size,
1244 						  jeb->offset);
1245 					break;
1246 				}
1247 
1248 				jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1249 					  frag->ofs,
1250 					  frag->ofs + frag->size,
1251 					  jeb->offset);
1252 				start = frag->ofs;
1253 				break;
1254 			}
1255 		}
1256 
1257 		/* ... then up */
1258 
1259 		/* Find last frag which is actually part of the node we're to GC. */
1260 		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1261 
1262 		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1263 
1264 			/* If the previous frag doesn't even reach the beginning, there's lots
1265 			   of fragmentation. Just merge. */
1266 			if (frag->ofs+frag->size < max) {
1267 				jffs2_dbg(1, "Expanding up to cover partial frag (0x%x-0x%x)\n",
1268 					  frag->ofs, frag->ofs+frag->size);
1269 				end = frag->ofs + frag->size;
1270 				continue;
1271 			}
1272 
1273 			if (!frag->node || !frag->node->raw) {
1274 				jffs2_dbg(1, "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1275 					  frag->ofs, frag->ofs+frag->size);
1276 				break;
1277 			} else {
1278 
1279 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1280 				   in a block which is still considered clean? If so, don't obsolete it.
1281 				   If not, cover it anyway. */
1282 
1283 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1284 				struct jffs2_eraseblock *jeb;
1285 
1286 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1287 
1288 				if (jeb == c->gcblock) {
1289 					jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1290 						  frag->ofs,
1291 						  frag->ofs + frag->size,
1292 						  ref_offset(raw));
1293 					end = frag->ofs + frag->size;
1294 					break;
1295 				}
1296 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1297 					jffs2_dbg(1, "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1298 						  frag->ofs,
1299 						  frag->ofs + frag->size,
1300 						  jeb->offset);
1301 					break;
1302 				}
1303 
1304 				jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1305 					  frag->ofs,
1306 					  frag->ofs + frag->size,
1307 					  jeb->offset);
1308 				end = frag->ofs + frag->size;
1309 				break;
1310 			}
1311 		}
1312 		jffs2_dbg(1, "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1313 			  orig_start, orig_end, start, end);
1314 
1315 		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1316 		BUG_ON(end < orig_end);
1317 		BUG_ON(start > orig_start);
1318 	}
1319 
1320 	/* The rules state that we must obtain the page lock *before* f->sem, so
1321 	 * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
1322 	 * actually going to *change* so we're safe; we only allow reading.
1323 	 *
1324 	 * It is important to note that jffs2_write_begin() will ensure that its
1325 	 * page is marked Uptodate before allocating space. That means that if we
1326 	 * end up here trying to GC the *same* page that jffs2_write_begin() is
1327 	 * trying to write out, read_cache_page() will not deadlock. */
1328 	mutex_unlock(&f->sem);
1329 	page = read_cache_page(inode->i_mapping, start >> PAGE_SHIFT,
1330 			       jffs2_do_readpage_unlock, inode);
1331 	if (IS_ERR(page)) {
1332 		pr_warn("read_cache_page() returned error: %ld\n",
1333 			PTR_ERR(page));
1334 		mutex_lock(&f->sem);
1335 		return PTR_ERR(page);
1336 	}
1337 
1338 	pg_ptr = kmap(page);
1339 	mutex_lock(&f->sem);
1340 
1341 	offset = start;
1342 	while(offset < orig_end) {
1343 		uint32_t datalen;
1344 		uint32_t cdatalen;
1345 		uint16_t comprtype = JFFS2_COMPR_NONE;
1346 
1347 		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1348 					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1349 
1350 		if (ret) {
1351 			pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1352 				sizeof(ri) + JFFS2_MIN_DATA_LEN, ret);
1353 			break;
1354 		}
1355 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1356 		datalen = end - offset;
1357 
1358 		writebuf = pg_ptr + (offset & (PAGE_SIZE -1));
1359 
1360 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1361 
1362 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1363 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1364 		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1365 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1366 
1367 		ri.ino = cpu_to_je32(f->inocache->ino);
1368 		ri.version = cpu_to_je32(++f->highest_version);
1369 		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1370 		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1371 		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1372 		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1373 		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1374 		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1375 		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1376 		ri.offset = cpu_to_je32(offset);
1377 		ri.csize = cpu_to_je32(cdatalen);
1378 		ri.dsize = cpu_to_je32(datalen);
1379 		ri.compr = comprtype & 0xff;
1380 		ri.usercompr = (comprtype >> 8) & 0xff;
1381 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1382 		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1383 
1384 		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1385 
1386 		jffs2_free_comprbuf(comprbuf, writebuf);
1387 
1388 		if (IS_ERR(new_fn)) {
1389 			pr_warn("Error writing new dnode: %ld\n",
1390 				PTR_ERR(new_fn));
1391 			ret = PTR_ERR(new_fn);
1392 			break;
1393 		}
1394 		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1395 		offset += datalen;
1396 		if (f->metadata) {
1397 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1398 			jffs2_free_full_dnode(f->metadata);
1399 			f->metadata = NULL;
1400 		}
1401 	}
1402 
1403 	kunmap(page);
1404 	put_page(page);
1405 	return ret;
1406 }
1407