1 /* $NetBSD: chfs_gc.c,v 1.12 2021/12/07 22:13:56 andvar Exp $ */
2
3 /*-
4 * Copyright (c) 2010 Department of Software Engineering,
5 * University of Szeged, Hungary
6 * Copyright (c) 2010 Tamas Toth <ttoth@inf.u-szeged.hu>
7 * Copyright (c) 2010 Adam Hoka <ahoka@NetBSD.org>
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to The NetBSD Foundation
11 * by the Department of Software Engineering, University of Szeged, Hungary
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/cprng.h>
36 #include "chfs.h"
37
38 void chfs_gc_release_inode(struct chfs_mount *,
39 struct chfs_inode *);
40 struct chfs_inode *chfs_gc_fetch_inode(struct chfs_mount *,
41 ino_t, uint32_t);
42 int chfs_check(struct chfs_mount *, struct chfs_vnode_cache *);
43 void chfs_clear_inode(struct chfs_mount *, struct chfs_inode *);
44
45
46 struct chfs_eraseblock *find_gc_block(struct chfs_mount *);
47 int chfs_gcollect_pristine(struct chfs_mount *,
48 struct chfs_eraseblock *,
49 struct chfs_vnode_cache *, struct chfs_node_ref *);
50 int chfs_gcollect_live(struct chfs_mount *,
51 struct chfs_eraseblock *, struct chfs_node_ref *,
52 struct chfs_inode *);
53 int chfs_gcollect_vnode(struct chfs_mount *, struct chfs_inode *);
54 int chfs_gcollect_dirent(struct chfs_mount *,
55 struct chfs_eraseblock *, struct chfs_inode *,
56 struct chfs_dirent *);
57 int chfs_gcollect_deletion_dirent(struct chfs_mount *,
58 struct chfs_eraseblock *, struct chfs_inode *,
59 struct chfs_dirent *);
60 int chfs_gcollect_dnode(struct chfs_mount *,
61 struct chfs_eraseblock *, struct chfs_inode *,
62 struct chfs_full_dnode *, uint32_t, uint32_t);
63
64 /*
65 * chfs_gc_trigger - wakes up GC thread, if it should run
66 * Must be called with chm_lock_mountfields held.
67 */
68 void
chfs_gc_trigger(struct chfs_mount * chmp)69 chfs_gc_trigger(struct chfs_mount *chmp)
70 {
71 struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
72
73 if (gc->gcth_running &&
74 chfs_gc_thread_should_wake(chmp)) {
75 cv_signal(&gc->gcth_wakeup);
76 }
77 }
78
79
80 /* chfs_gc_thread - garbage collector's thread */
81 void
chfs_gc_thread(void * data)82 chfs_gc_thread(void *data)
83 {
84 struct chfs_mount *chmp = data;
85 struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
86
87 dbg_gc("[GC THREAD] thread started\n");
88
89 mutex_enter(&chmp->chm_lock_mountfields);
90 while (gc->gcth_running) {
91 /* we must call chfs_gc_thread_should_wake with chm_lock_mountfields
92 * held, which is a bit awkwardly done here, but we can't really
93 * do it otherway with the current design...
94 */
95 if (chfs_gc_thread_should_wake(chmp)) {
96 if (chfs_gcollect_pass(chmp) == ENOSPC) {
97 mutex_exit(&chmp->chm_lock_mountfields);
98 panic("No space for garbage collection\n");
99 /* XXX why break here? i have added a panic
100 * here to see if it gets triggered -ahoka
101 */
102 break;
103 }
104 /* XXX gcollect_pass drops the mutex */
105 }
106
107 cv_timedwait_sig(&gc->gcth_wakeup,
108 &chmp->chm_lock_mountfields, mstohz(100));
109 }
110 mutex_exit(&chmp->chm_lock_mountfields);
111
112 dbg_gc("[GC THREAD] thread stopped\n");
113 kthread_exit(0);
114 }
115
116 /* chfs_gc_thread_start - starts GC */
117 void
chfs_gc_thread_start(struct chfs_mount * chmp)118 chfs_gc_thread_start(struct chfs_mount *chmp)
119 {
120 struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
121
122 cv_init(&gc->gcth_wakeup, "chfsgccv");
123
124 gc->gcth_running = true;
125 kthread_create(PRI_NONE, /*KTHREAD_MPSAFE |*/ KTHREAD_MUSTJOIN,
126 NULL, chfs_gc_thread, chmp, &gc->gcth_thread,
127 "chfsgcth");
128 }
129
130 /* chfs_gc_thread_stop - stops GC */
131 void
chfs_gc_thread_stop(struct chfs_mount * chmp)132 chfs_gc_thread_stop(struct chfs_mount *chmp)
133 {
134 struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
135
136 /* check if it is actually running */
137 if (gc->gcth_running) {
138 gc->gcth_running = false;
139 } else {
140 return;
141 }
142 cv_signal(&gc->gcth_wakeup);
143 dbg_gc("[GC THREAD] stop signal sent\n");
144
145 kthread_join(gc->gcth_thread);
146 #ifdef BROKEN_KTH_JOIN
147 kpause("chfsthjoin", false, mstohz(1000), NULL);
148 #endif
149
150 cv_destroy(&gc->gcth_wakeup);
151 }
152
153 /*
154 * chfs_gc_thread_should_wake - checks if GC thread should wake up
155 * Must be called with chm_lock_mountfields held.
156 * Returns 1, if GC should wake up and 0 else.
157 */
158 int
chfs_gc_thread_should_wake(struct chfs_mount * chmp)159 chfs_gc_thread_should_wake(struct chfs_mount *chmp)
160 {
161 int nr_very_dirty = 0;
162 struct chfs_eraseblock *cheb;
163 uint32_t dirty;
164
165 KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
166
167 /* Erase pending queue is not empty. */
168 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
169 dbg_gc("erase_pending\n");
170 return 1;
171 }
172
173 /* There is something unchecked in the filesystem. */
174 if (chmp->chm_unchecked_size) {
175 dbg_gc("unchecked\n");
176 return 1;
177 }
178
179 dirty = chmp->chm_dirty_size - chmp->chm_nr_erasable_blocks *
180 chmp->chm_ebh->eb_size;
181
182 /* Number of free and erasable blocks are critical. */
183 if (chmp->chm_nr_free_blocks + chmp->chm_nr_erasable_blocks <
184 chmp->chm_resv_blocks_gctrigger && (dirty > chmp->chm_nospc_dirty)) {
185 dbg_gc("free: %d + erasable: %d < resv: %d\n",
186 chmp->chm_nr_free_blocks, chmp->chm_nr_erasable_blocks,
187 chmp->chm_resv_blocks_gctrigger);
188 dbg_gc("dirty: %d > nospc_dirty: %d\n",
189 dirty, chmp->chm_nospc_dirty);
190
191 return 1;
192 }
193
194 /* There are too much very dirty blocks. */
195 TAILQ_FOREACH(cheb, &chmp->chm_very_dirty_queue, queue) {
196 nr_very_dirty++;
197 if (nr_very_dirty == chmp->chm_vdirty_blocks_gctrigger) {
198 dbg_gc("nr_very_dirty\n");
199 return 1;
200 }
201 }
202
203 /* Everything is OK, GC shouldn't run. */
204 return 0;
205 }
206
207 /* chfs_gc_release_inode - does nothing yet */
208 void
chfs_gc_release_inode(struct chfs_mount * chmp,struct chfs_inode * ip)209 chfs_gc_release_inode(struct chfs_mount *chmp,
210 struct chfs_inode *ip)
211 {
212 dbg_gc("release inode\n");
213 }
214
215 /* chfs_gc_fetch_inode - assign the given inode to the GC */
216 struct chfs_inode *
chfs_gc_fetch_inode(struct chfs_mount * chmp,ino_t vno,uint32_t unlinked)217 chfs_gc_fetch_inode(struct chfs_mount *chmp, ino_t vno,
218 uint32_t unlinked)
219 {
220 struct vnode *vp = NULL;
221 struct chfs_vnode_cache *vc;
222 struct chfs_inode *ip;
223 dbg_gc("fetch inode %llu\n", (unsigned long long)vno);
224
225 if (unlinked) {
226 dbg_gc("unlinked\n");
227 vp = chfs_vnode_lookup(chmp, vno);
228 if (!vp) {
229 mutex_enter(&chmp->chm_lock_vnocache);
230 vc = chfs_vnode_cache_get(chmp, vno);
231 if (!vc) {
232 mutex_exit(&chmp->chm_lock_vnocache);
233 return NULL;
234 }
235 mutex_exit(&chmp->chm_lock_vnocache);
236 if (vc->state != VNO_STATE_CHECKEDABSENT) {
237 /* XXX why do we need the delay here?! */
238 KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
239 cv_timedwait_sig(
240 &chmp->chm_gc_thread.gcth_wakeup,
241 &chmp->chm_lock_mountfields, mstohz(50));
242 }
243 return NULL;
244 }
245 } else {
246 dbg_gc("vnode lookup\n");
247 vp = chfs_vnode_lookup(chmp, vno);
248 }
249 dbg_gc("vp to ip\n");
250 ip = VTOI(vp);
251 KASSERT(ip);
252 vrele(vp);
253
254 return ip;
255 }
256
257 extern rb_tree_ops_t frag_rbtree_ops;
258
259 /* chfs_check - checks an inode with minimal initialization */
260 int
chfs_check(struct chfs_mount * chmp,struct chfs_vnode_cache * chvc)261 chfs_check(struct chfs_mount *chmp, struct chfs_vnode_cache *chvc)
262 {
263 KASSERT(mutex_owned(&chmp->chm_lock_vnocache));
264
265 struct chfs_inode *ip;
266 struct vnode *vp;
267 int ret;
268
269 /* Get a new inode. */
270 ip = pool_get(&chfs_inode_pool, PR_WAITOK);
271 if (!ip) {
272 return ENOMEM;
273 }
274
275 vp = kmem_zalloc(sizeof(struct vnode), KM_SLEEP);
276
277 /* Minimal initialization. */
278 ip->chvc = chvc;
279 ip->vp = vp;
280
281 vp->v_data = ip;
282
283 rb_tree_init(&ip->fragtree, &frag_rbtree_ops);
284 TAILQ_INIT(&ip->dents);
285
286 /* Build the node. */
287 mutex_exit(&chmp->chm_lock_vnocache);
288 ret = chfs_read_inode_internal(chmp, ip);
289 mutex_enter(&chmp->chm_lock_vnocache);
290 if (!ret) {
291 chfs_clear_inode(chmp, ip);
292 }
293
294 /* Release inode. */
295 pool_put(&chfs_inode_pool, ip);
296
297 return ret;
298 }
299
300 /* chfs_clear_inode - kills a minimal inode */
301 void
chfs_clear_inode(struct chfs_mount * chmp,struct chfs_inode * ip)302 chfs_clear_inode(struct chfs_mount *chmp, struct chfs_inode *ip)
303 {
304 KASSERT(mutex_owned(&chmp->chm_lock_vnocache));
305
306 struct chfs_dirent *fd, *tmpfd;
307 struct chfs_vnode_cache *chvc;
308 struct chfs_node_ref *nref;
309
310 chvc = ip->chvc;
311 /* shouldnt this be: */
312 //bool deleted = (chvc && !(chvc->pvno || chvc->nlink));
313 int deleted = (chvc && !(chvc->pvno | chvc->nlink));
314
315 /* Set actual state. */
316 if (chvc && chvc->state != VNO_STATE_CHECKING) {
317 chvc->state = VNO_STATE_CLEARING;
318 }
319
320 /* Remove vnode information. */
321 while (deleted && chvc->v != (struct chfs_node_ref *)chvc) {
322 nref = chvc->v;
323 chfs_remove_and_obsolete(chmp, chvc, nref, &chvc->v);
324 }
325
326 /* Destroy data. */
327 chfs_kill_fragtree(chmp, &ip->fragtree);
328
329 /* Clear dirents. */
330 TAILQ_FOREACH_SAFE(fd, &ip->dents, fds, tmpfd) {
331 chfs_free_dirent(fd);
332 }
333
334 /* Remove node from vnode cache. */
335 if (chvc && chvc->state == VNO_STATE_CHECKING) {
336 chvc->state = VNO_STATE_CHECKEDABSENT;
337 if ((struct chfs_vnode_cache *)chvc->v == chvc &&
338 (struct chfs_vnode_cache *)chvc->dirents == chvc &&
339 (struct chfs_vnode_cache *)chvc->dnode == chvc)
340 chfs_vnode_cache_remove(chmp, chvc);
341 }
342 }
343
344 /* find_gc_block - finds the next block for GC */
345 struct chfs_eraseblock *
find_gc_block(struct chfs_mount * chmp)346 find_gc_block(struct chfs_mount *chmp)
347 {
348 struct chfs_eraseblock *ret;
349 struct chfs_eraseblock_queue *nextqueue;
350
351 KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
352
353 /* Get a random number. */
354 uint32_t n = cprng_fast32() % 128;
355
356 again:
357 /* Find an eraseblock queue. */
358 if (n<50 && !TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
359 dbg_gc("Picking block from erase_pending_queue to GC next\n");
360 nextqueue = &chmp->chm_erase_pending_queue;
361 } else if (n<110 && !TAILQ_EMPTY(&chmp->chm_very_dirty_queue) ) {
362 dbg_gc("Picking block from very_dirty_queue to GC next\n");
363 nextqueue = &chmp->chm_very_dirty_queue;
364 } else if (n<126 && !TAILQ_EMPTY(&chmp->chm_dirty_queue) ) {
365 dbg_gc("Picking block from dirty_queue to GC next\n");
366 nextqueue = &chmp->chm_dirty_queue;
367 } else if (!TAILQ_EMPTY(&chmp->chm_clean_queue)) {
368 dbg_gc("Picking block from clean_queue to GC next\n");
369 nextqueue = &chmp->chm_clean_queue;
370 } else if (!TAILQ_EMPTY(&chmp->chm_dirty_queue)) {
371 dbg_gc("Picking block from dirty_queue to GC next"
372 " (clean_queue was empty)\n");
373 nextqueue = &chmp->chm_dirty_queue;
374 } else if (!TAILQ_EMPTY(&chmp->chm_very_dirty_queue)) {
375 dbg_gc("Picking block from very_dirty_queue to GC next"
376 " (clean_queue and dirty_queue were empty)\n");
377 nextqueue = &chmp->chm_very_dirty_queue;
378 } else if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
379 dbg_gc("Picking block from erase_pending_queue to GC next"
380 " (clean_queue and {very_,}dirty_queue were empty)\n");
381 nextqueue = &chmp->chm_erase_pending_queue;
382 } else if (!TAILQ_EMPTY(&chmp->chm_erasable_pending_wbuf_queue)) {
383 dbg_gc("Synching wbuf in order to reuse "
384 "erasable_pendig_wbuf_queue blocks\n");
385 rw_enter(&chmp->chm_lock_wbuf, RW_WRITER);
386 chfs_flush_pending_wbuf(chmp);
387 rw_exit(&chmp->chm_lock_wbuf);
388 goto again;
389 } else {
390 dbg_gc("CHFS: no clean, dirty _or_ erasable"
391 " blocks to GC from! Where are they all?\n");
392 return NULL;
393 }
394
395 /* Get the first block of the queue. */
396 ret = TAILQ_FIRST(nextqueue);
397 if (chmp->chm_nextblock) {
398 dbg_gc("nextblock num: %u - gcblock num: %u\n",
399 chmp->chm_nextblock->lnr, ret->lnr);
400 if (ret == chmp->chm_nextblock)
401 goto again;
402 }
403 TAILQ_REMOVE(nextqueue, ret, queue);
404
405 /* Set GC block. */
406 chmp->chm_gcblock = ret;
407 /* Set GC node. */
408 ret->gc_node = ret->first_node;
409
410 if (!ret->gc_node) {
411 dbg_gc("Oops! ret->gc_node at LEB: %u is NULL\n", ret->lnr);
412 panic("CHFS BUG - one LEB's gc_node is NULL\n");
413 }
414
415 /* TODO wasted size? */
416 return ret;
417 }
418
419 /* chfs_gcollect_pass - this is the main function of GC */
420 int
chfs_gcollect_pass(struct chfs_mount * chmp)421 chfs_gcollect_pass(struct chfs_mount *chmp)
422 {
423 struct chfs_vnode_cache *vc;
424 struct chfs_eraseblock *eb;
425 struct chfs_node_ref *nref;
426 uint32_t gcblock_dirty;
427 struct chfs_inode *ip;
428 ino_t vno, pvno;
429 uint32_t nlink;
430 int ret = 0;
431
432 KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
433
434 /* Check all vnodes. */
435 for (;;) {
436 mutex_enter(&chmp->chm_lock_sizes);
437
438 /* Check unchecked size. */
439 dbg_gc("unchecked size == %u\n", chmp->chm_unchecked_size);
440 if (!chmp->chm_unchecked_size)
441 break;
442
443 /* Compare vnode number to the maximum. */
444 if (chmp->chm_checked_vno > chmp->chm_max_vno) {
445 mutex_exit(&chmp->chm_lock_sizes);
446 dbg_gc("checked_vno (#%llu) > max_vno (#%llu)\n",
447 (unsigned long long)chmp->chm_checked_vno,
448 (unsigned long long)chmp->chm_max_vno);
449 return ENOSPC;
450 }
451
452 mutex_exit(&chmp->chm_lock_sizes);
453
454 mutex_enter(&chmp->chm_lock_vnocache);
455 dbg_gc("checking vno #%llu\n",
456 (unsigned long long)chmp->chm_checked_vno);
457 dbg_gc("get vnode cache\n");
458
459 /* OK, Get and check the vnode cache. */
460 vc = chfs_vnode_cache_get(chmp, chmp->chm_checked_vno++);
461
462 if (!vc) {
463 dbg_gc("!vc\n");
464 mutex_exit(&chmp->chm_lock_vnocache);
465 continue;
466 }
467
468 if ((vc->pvno | vc->nlink) == 0) {
469 dbg_gc("(pvno | nlink) == 0\n");
470 mutex_exit(&chmp->chm_lock_vnocache);
471 continue;
472 }
473
474 /* Find out the state of the vnode. */
475 dbg_gc("switch\n");
476 switch (vc->state) {
477 case VNO_STATE_CHECKEDABSENT:
478 /* FALLTHROUGH */
479 case VNO_STATE_PRESENT:
480 mutex_exit(&chmp->chm_lock_vnocache);
481 continue;
482
483 case VNO_STATE_GC:
484 /* FALLTHROUGH */
485 case VNO_STATE_CHECKING:
486 mutex_exit(&chmp->chm_lock_vnocache);
487 dbg_gc("VNO_STATE GC or CHECKING\n");
488 panic("CHFS BUG - vc state gc or checking\n");
489
490 case VNO_STATE_READING:
491 chmp->chm_checked_vno--;
492 mutex_exit(&chmp->chm_lock_vnocache);
493 /* XXX why do we need the delay here?! */
494 kpause("chvncrea", true, mstohz(50), NULL);
495
496 return 0;
497
498 default:
499 mutex_exit(&chmp->chm_lock_vnocache);
500 dbg_gc("default\n");
501 panic("CHFS BUG - vc state is other what we"
502 " checked\n");
503
504 case VNO_STATE_UNCHECKED:
505 ;
506 }
507
508 /* We found an unchecked vnode. */
509
510 vc->state = VNO_STATE_CHECKING;
511
512 /* XXX check if this is too heavy to call under
513 * chm_lock_vnocache
514 */
515 ret = chfs_check(chmp, vc);
516 vc->state = VNO_STATE_CHECKEDABSENT;
517
518 mutex_exit(&chmp->chm_lock_vnocache);
519 return ret;
520 }
521
522 /* Get GC block. */
523 eb = chmp->chm_gcblock;
524
525 if (!eb) {
526 eb = find_gc_block(chmp);
527 }
528
529 if (!eb) {
530 dbg_gc("!eb\n");
531 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
532 mutex_exit(&chmp->chm_lock_sizes);
533 return EAGAIN;
534 }
535 mutex_exit(&chmp->chm_lock_sizes);
536 return EIO;
537 }
538
539 if (!eb->used_size) {
540 dbg_gc("!eb->used_size\n");
541 goto eraseit;
542 }
543
544 /* Get GC node. */
545 nref = eb->gc_node;
546 gcblock_dirty = eb->dirty_size;
547
548 /* Find a node which wasn't obsoleted yet.
549 * Obsoleted nodes will be simply deleted after the whole block has checked. */
550 while(CHFS_REF_OBSOLETE(nref)) {
551 #ifdef DBG_MSG_GC
552 if (nref == chmp->chm_blocks[nref->nref_lnr].last_node) {
553 dbg_gc("THIS NODE IS THE LAST NODE OF ITS EB\n");
554 }
555 #endif
556 nref = node_next(nref);
557 if (!nref) {
558 eb->gc_node = nref;
559 mutex_exit(&chmp->chm_lock_sizes);
560 panic("CHFS BUG - nref is NULL)\n");
561 }
562 }
563
564 /* We found a "not obsoleted" node. */
565 eb->gc_node = nref;
566 KASSERT(nref->nref_lnr == chmp->chm_gcblock->lnr);
567
568 /* Check if node is in any chain. */
569 if (!nref->nref_next) {
570 /* This node is not in any chain. Simply collect it, or obsolete. */
571 mutex_exit(&chmp->chm_lock_sizes);
572 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) {
573 chfs_gcollect_pristine(chmp, eb, NULL, nref);
574 } else {
575 chfs_mark_node_obsolete(chmp, nref);
576 }
577 goto lock_size;
578 }
579
580 mutex_exit(&chmp->chm_lock_sizes);
581
582 mutex_enter(&chmp->chm_lock_vnocache);
583
584 dbg_gc("nref lnr: %u - offset: %u\n", nref->nref_lnr, nref->nref_offset);
585 vc = chfs_nref_to_vc(nref);
586
587 /* Check the state of the node. */
588 dbg_gc("switch\n");
589 switch(vc->state) {
590 case VNO_STATE_CHECKEDABSENT:
591 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) {
592 vc->state = VNO_STATE_GC;
593 }
594 break;
595
596 case VNO_STATE_PRESENT:
597 break;
598
599 case VNO_STATE_UNCHECKED:
600 /* FALLTHROUGH */
601 case VNO_STATE_CHECKING:
602 /* FALLTHROUGH */
603 case VNO_STATE_GC:
604 mutex_exit(&chmp->chm_lock_vnocache);
605 panic("CHFS BUG - vc state unchecked,"
606 " checking or gc (vno #%llu, num #%d)\n",
607 (unsigned long long)vc->vno, vc->state);
608
609 case VNO_STATE_READING:
610 /* Node is in use at this time. */
611 mutex_exit(&chmp->chm_lock_vnocache);
612 kpause("chvncrea", true, mstohz(50), NULL);
613 return 0;
614 }
615
616 if (vc->state == VNO_STATE_GC) {
617 dbg_gc("vc->state == VNO_STATE_GC\n");
618 vc->state = VNO_STATE_CHECKEDABSENT;
619 mutex_exit(&chmp->chm_lock_vnocache);
620 ret = chfs_gcollect_pristine(chmp, eb, NULL, nref);
621
622 //TODO wake_up(&chmp->chm_vnocache_wq);
623 if (ret != EBADF)
624 goto test_gcnode;
625 mutex_enter(&chmp->chm_lock_vnocache);
626 }
627
628 /* Collect living node. */
629 vno = vc->vno;
630 pvno = vc->pvno;
631 nlink = vc->nlink;
632 mutex_exit(&chmp->chm_lock_vnocache);
633
634 ip = chfs_gc_fetch_inode(chmp, vno, !(pvno | nlink));
635
636 if (!ip) {
637 dbg_gc("!ip\n");
638 ret = 0;
639 goto lock_size;
640 }
641
642 chfs_gcollect_live(chmp, eb, nref, ip);
643
644 chfs_gc_release_inode(chmp, ip);
645
646 test_gcnode:
647 if (eb->dirty_size == gcblock_dirty &&
648 !CHFS_REF_OBSOLETE(eb->gc_node)) {
649 dbg_gc("ERROR collecting node at %u failed.\n",
650 CHFS_GET_OFS(eb->gc_node->nref_offset));
651
652 ret = ENOSPC;
653 }
654
655 lock_size:
656 KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
657 mutex_enter(&chmp->chm_lock_sizes);
658 eraseit:
659 dbg_gc("eraseit\n");
660
661 if (chmp->chm_gcblock) {
662 /* This is only for debugging. */
663 dbg_gc("eb used size = %u\n", chmp->chm_gcblock->used_size);
664 dbg_gc("eb free size = %u\n", chmp->chm_gcblock->free_size);
665 dbg_gc("eb dirty size = %u\n", chmp->chm_gcblock->dirty_size);
666 dbg_gc("eb unchecked size = %u\n",
667 chmp->chm_gcblock->unchecked_size);
668 dbg_gc("eb wasted size = %u\n", chmp->chm_gcblock->wasted_size);
669
670 KASSERT(chmp->chm_gcblock->used_size + chmp->chm_gcblock->free_size +
671 chmp->chm_gcblock->dirty_size +
672 chmp->chm_gcblock->unchecked_size +
673 chmp->chm_gcblock->wasted_size == chmp->chm_ebh->eb_size);
674
675 }
676
677 /* Check the state of GC block. */
678 if (chmp->chm_gcblock && chmp->chm_gcblock->dirty_size +
679 chmp->chm_gcblock->wasted_size == chmp->chm_ebh->eb_size) {
680 dbg_gc("Block at leb #%u completely obsoleted by GC, "
681 "Moving to erase_pending_queue\n", chmp->chm_gcblock->lnr);
682 TAILQ_INSERT_TAIL(&chmp->chm_erase_pending_queue,
683 chmp->chm_gcblock, queue);
684 chmp->chm_gcblock = NULL;
685 chmp->chm_nr_erasable_blocks++;
686 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
687 ret = chfs_remap_leb(chmp);
688 }
689 }
690
691 mutex_exit(&chmp->chm_lock_sizes);
692 dbg_gc("return\n");
693 return ret;
694 }
695
696
697 /* chfs_gcollect_pristine - collects a pristine node */
698 int
chfs_gcollect_pristine(struct chfs_mount * chmp,struct chfs_eraseblock * cheb,struct chfs_vnode_cache * chvc,struct chfs_node_ref * nref)699 chfs_gcollect_pristine(struct chfs_mount *chmp, struct chfs_eraseblock *cheb,
700 struct chfs_vnode_cache *chvc, struct chfs_node_ref *nref)
701 {
702 struct chfs_node_ref *newnref;
703 struct chfs_flash_node_hdr *nhdr;
704 struct chfs_flash_vnode *fvnode;
705 struct chfs_flash_dirent_node *fdirent;
706 struct chfs_flash_data_node *fdata;
707 int ret, retries = 0;
708 uint32_t ofs, crc;
709 size_t totlen = chfs_nref_len(chmp, cheb, nref);
710 char *data;
711 struct iovec vec;
712 size_t retlen;
713
714 dbg_gc("gcollect_pristine\n");
715
716 data = kmem_alloc(totlen, KM_SLEEP);
717 ofs = CHFS_GET_OFS(nref->nref_offset);
718
719 /* Read header. */
720 ret = chfs_read_leb(chmp, nref->nref_lnr, data, ofs, totlen, &retlen);
721 if (ret) {
722 dbg_gc("reading error\n");
723 goto err_out;
724 }
725 if (retlen != totlen) {
726 dbg_gc("read size error\n");
727 ret = EIO;
728 goto err_out;
729 }
730 nhdr = (struct chfs_flash_node_hdr *)data;
731
732 /* Check the header. */
733 if (le16toh(nhdr->magic) != CHFS_FS_MAGIC_BITMASK) {
734 dbg_gc("node header magic number error\n");
735 ret = EBADF;
736 goto err_out;
737 }
738 crc = crc32(0, (uint8_t *)nhdr, CHFS_NODE_HDR_SIZE - 4);
739 if (crc != le32toh(nhdr->hdr_crc)) {
740 dbg_gc("node header crc error\n");
741 ret = EBADF;
742 goto err_out;
743 }
744
745 /* Read the remaining parts. */
746 switch(le16toh(nhdr->type)) {
747 case CHFS_NODETYPE_VNODE:
748 /* vnode information node */
749 fvnode = (struct chfs_flash_vnode *)data;
750 crc = crc32(0, (uint8_t *)fvnode, sizeof(struct chfs_flash_vnode) - 4);
751 if (crc != le32toh(fvnode->node_crc)) {
752 dbg_gc("vnode crc error\n");
753 ret = EBADF;
754 goto err_out;
755 }
756 break;
757 case CHFS_NODETYPE_DIRENT:
758 /* dirent node */
759 fdirent = (struct chfs_flash_dirent_node *)data;
760 crc = crc32(0, (uint8_t *)fdirent, sizeof(struct chfs_flash_dirent_node) - 4);
761 if (crc != le32toh(fdirent->node_crc)) {
762 dbg_gc("dirent crc error\n");
763 ret = EBADF;
764 goto err_out;
765 }
766 crc = crc32(0, fdirent->name, fdirent->nsize);
767 if (crc != le32toh(fdirent->name_crc)) {
768 dbg_gc("dirent name crc error\n");
769 ret = EBADF;
770 goto err_out;
771 }
772 break;
773 case CHFS_NODETYPE_DATA:
774 /* data node */
775 fdata = (struct chfs_flash_data_node *)data;
776 crc = crc32(0, (uint8_t *)fdata, sizeof(struct chfs_flash_data_node) - 4);
777 if (crc != le32toh(fdata->node_crc)) {
778 dbg_gc("data node crc error\n");
779 ret = EBADF;
780 goto err_out;
781 }
782 break;
783 default:
784 /* unknown node */
785 if (chvc) {
786 dbg_gc("unknown node have vnode cache\n");
787 ret = EBADF;
788 goto err_out;
789 }
790 }
791 /* CRC's OK, write node to its new place */
792 retry:
793 ret = chfs_reserve_space_gc(chmp, totlen);
794 if (ret)
795 goto err_out;
796
797 newnref = chfs_alloc_node_ref(chmp->chm_nextblock);
798 if (!newnref) {
799 ret = ENOMEM;
800 goto err_out;
801 }
802
803 ofs = chmp->chm_ebh->eb_size - chmp->chm_nextblock->free_size;
804 newnref->nref_offset = ofs;
805
806 /* write out the whole node */
807 vec.iov_base = (void *)data;
808 vec.iov_len = totlen;
809 mutex_enter(&chmp->chm_lock_sizes);
810 ret = chfs_write_wbuf(chmp, &vec, 1, ofs, &retlen);
811
812 if (ret || retlen != totlen) {
813 /* error while writing */
814 chfs_err("error while writing out to the media\n");
815 chfs_err("err: %d | size: %zu | retlen : %zu\n",
816 ret, totlen, retlen);
817
818 chfs_change_size_dirty(chmp, chmp->chm_nextblock, totlen);
819 if (retries) {
820 mutex_exit(&chmp->chm_lock_sizes);
821 ret = EIO;
822 goto err_out;
823 }
824
825 /* try again */
826 retries++;
827 mutex_exit(&chmp->chm_lock_sizes);
828 goto retry;
829 }
830
831 /* update vnode information */
832 mutex_exit(&chmp->chm_lock_sizes);
833 //TODO should we set free_size?
834 mutex_enter(&chmp->chm_lock_vnocache);
835 chfs_add_vnode_ref_to_vc(chmp, chvc, newnref);
836 mutex_exit(&chmp->chm_lock_vnocache);
837 ret = 0;
838 /* FALLTHROUGH */
839 err_out:
840 kmem_free(data, totlen);
841 return ret;
842 }
843
844
845 /* chfs_gcollect_live - collects a living node */
846 int
chfs_gcollect_live(struct chfs_mount * chmp,struct chfs_eraseblock * cheb,struct chfs_node_ref * nref,struct chfs_inode * ip)847 chfs_gcollect_live(struct chfs_mount *chmp,
848 struct chfs_eraseblock *cheb, struct chfs_node_ref *nref,
849 struct chfs_inode *ip)
850 {
851 struct chfs_node_frag *frag;
852 struct chfs_full_dnode *fn = NULL;
853 int start = 0, end = 0, nrfrags = 0;
854 struct chfs_dirent *fd = NULL;
855 int ret = 0;
856 bool is_dirent;
857
858 dbg_gc("gcollect_live\n");
859
860 if (chmp->chm_gcblock != cheb) {
861 dbg_gc("GC block is no longer gcblock. Restart.\n");
862 goto upnout;
863 }
864
865 if (CHFS_REF_OBSOLETE(nref)) {
866 dbg_gc("node to be GC'd was obsoleted in the meantime.\n");
867 goto upnout;
868 }
869
870 /* It's a vnode? */
871 if (ip->chvc->v == nref) {
872 chfs_gcollect_vnode(chmp, ip);
873 goto upnout;
874 }
875
876 /* Find data node. */
877 dbg_gc("find full dnode\n");
878 for(frag = frag_first(&ip->fragtree);
879 frag; frag = frag_next(&ip->fragtree, frag)) {
880 if (frag->node && frag->node->nref == nref) {
881 fn = frag->node;
882 end = frag->ofs + frag->size;
883 if (!nrfrags++)
884 start = frag->ofs;
885 if (nrfrags == frag->node->frags)
886 break;
887 }
888 }
889
890 /* It's a pristine node, or dnode (or hole? XXX have we hole nodes?) */
891 if (fn) {
892 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) {
893 ret = chfs_gcollect_pristine(chmp,
894 cheb, ip->chvc, nref);
895 if (!ret) {
896 frag->node->nref = ip->chvc->v;
897 }
898 if (ret != EBADF)
899 goto upnout;
900 }
901 ret = chfs_gcollect_dnode(chmp, cheb, ip, fn, start, end);
902 goto upnout;
903 }
904
905 /* Is it a dirent? */
906 dbg_gc("find full dirent\n");
907 is_dirent = false;
908 TAILQ_FOREACH(fd, &ip->dents, fds) {
909 if (fd->nref == nref) {
910 is_dirent = true;
911 break;
912 }
913 }
914
915 if (is_dirent && fd->vno) {
916 /* Living dirent. */
917 ret = chfs_gcollect_dirent(chmp, cheb, ip, fd);
918 } else if (is_dirent) {
919 /* Already deleted dirent. */
920 ret = chfs_gcollect_deletion_dirent(chmp, cheb, ip, fd);
921 } else {
922 dbg_gc("Nref at leb #%u offset 0x%08x wasn't in node list"
923 " for ino #%llu\n",
924 nref->nref_lnr, CHFS_GET_OFS(nref->nref_offset),
925 (unsigned long long)ip->ino);
926 if (CHFS_REF_OBSOLETE(nref)) {
927 dbg_gc("But it's obsolete so we don't mind"
928 " too much.\n");
929 }
930 }
931
932 upnout:
933 return ret;
934 }
935
936 /* chfs_gcollect_vnode - collects a vnode information node */
937 int
chfs_gcollect_vnode(struct chfs_mount * chmp,struct chfs_inode * ip)938 chfs_gcollect_vnode(struct chfs_mount *chmp, struct chfs_inode *ip)
939 {
940 int ret;
941 dbg_gc("gcollect_vnode\n");
942
943 /* Simply write the new vnode information to the flash
944 * with GC's space allocation */
945 ret = chfs_write_flash_vnode(chmp, ip, ALLOC_GC);
946
947 return ret;
948 }
949
950 /* chfs_gcollect_dirent - collects a dirent */
951 int
chfs_gcollect_dirent(struct chfs_mount * chmp,struct chfs_eraseblock * cheb,struct chfs_inode * parent,struct chfs_dirent * fd)952 chfs_gcollect_dirent(struct chfs_mount *chmp,
953 struct chfs_eraseblock *cheb, struct chfs_inode *parent,
954 struct chfs_dirent *fd)
955 {
956 struct vnode *vnode = NULL;
957 struct chfs_inode *ip;
958 dbg_gc("gcollect_dirent\n");
959
960 /* Find vnode. */
961 vnode = chfs_vnode_lookup(chmp, fd->vno);
962
963 /* XXX maybe KASSERT or panic on this? */
964 if (vnode == NULL) {
965 return ENOENT;
966 }
967
968 ip = VTOI(vnode);
969 vrele(vnode);
970
971 /* Remove and obsolete the previous version. */
972 mutex_enter(&chmp->chm_lock_vnocache);
973 chfs_remove_and_obsolete(chmp, parent->chvc, fd->nref,
974 &parent->chvc->dirents);
975 mutex_exit(&chmp->chm_lock_vnocache);
976
977 /* Write the new dirent to the flash. */
978 return chfs_write_flash_dirent(chmp,
979 parent, ip, fd, fd->vno, ALLOC_GC);
980 }
981
982 /*
983 * chfs_gcollect_deletion_dirent -
984 * collects a dirent what was marked as deleted
985 */
986 int
chfs_gcollect_deletion_dirent(struct chfs_mount * chmp,struct chfs_eraseblock * cheb,struct chfs_inode * parent,struct chfs_dirent * fd)987 chfs_gcollect_deletion_dirent(struct chfs_mount *chmp,
988 struct chfs_eraseblock *cheb, struct chfs_inode *parent,
989 struct chfs_dirent *fd)
990 {
991 struct chfs_flash_dirent_node chfdn;
992 struct chfs_node_ref *nref;
993 size_t retlen, name_len, nref_len;
994 uint32_t name_crc;
995
996 int ret;
997
998 dbg_gc("gcollect_deletion_dirent\n");
999
1000 /* Check node. */
1001 name_len = strlen(fd->name);
1002 name_crc = crc32(0, fd->name, name_len);
1003
1004 nref_len = chfs_nref_len(chmp, cheb, fd->nref);
1005
1006 /* XXX This was a noop (void)chfs_vnode_lookup(chmp, fd->vno); */
1007
1008 /* Find it in parent dirents. */
1009 for (nref = parent->chvc->dirents;
1010 nref != (void*)parent->chvc;
1011 nref = nref->nref_next) {
1012
1013 if (!CHFS_REF_OBSOLETE(nref))
1014 continue;
1015
1016 /* if node refs have different length, skip */
1017 if (chfs_nref_len(chmp, NULL, nref) != nref_len)
1018 continue;
1019
1020 if (CHFS_GET_OFS(nref->nref_offset) ==
1021 CHFS_GET_OFS(fd->nref->nref_offset)) {
1022 continue;
1023 }
1024
1025 /* read it from flash */
1026 ret = chfs_read_leb(chmp,
1027 nref->nref_lnr, (void*)&chfdn, CHFS_GET_OFS(nref->nref_offset),
1028 nref_len, &retlen);
1029
1030 if (ret) {
1031 dbg_gc("Read error: %d\n", ret);
1032 continue;
1033 }
1034
1035 if (retlen != nref_len) {
1036 dbg_gc("Error reading node:"
1037 " read: %zu instead of: %zu\n", retlen, nref_len);
1038 continue;
1039 }
1040
1041 /* if node type doesn't match, skip */
1042 if (le16toh(chfdn.type) != CHFS_NODETYPE_DIRENT)
1043 continue;
1044
1045 /* if crc doesn't match, skip */
1046 if (le32toh(chfdn.name_crc) != name_crc)
1047 continue;
1048
1049 /* if length of name different, or this is an another deletion
1050 * dirent, skip
1051 */
1052 if (chfdn.nsize != name_len || !le64toh(chfdn.vno))
1053 continue;
1054
1055 /* check actual name */
1056 if (memcmp(chfdn.name, fd->name, name_len))
1057 continue;
1058
1059 mutex_enter(&chmp->chm_lock_vnocache);
1060 chfs_remove_and_obsolete(chmp, parent->chvc, fd->nref,
1061 &parent->chvc->dirents);
1062 mutex_exit(&chmp->chm_lock_vnocache);
1063 return chfs_write_flash_dirent(chmp,
1064 parent, NULL, fd, fd->vno, ALLOC_GC);
1065 }
1066
1067 /* Simply remove it from the parent dirents. */
1068 TAILQ_REMOVE(&parent->dents, fd, fds);
1069 chfs_free_dirent(fd);
1070 return 0;
1071 }
1072
1073 /* chfs_gcollect_dnode - */
1074 int
chfs_gcollect_dnode(struct chfs_mount * chmp,struct chfs_eraseblock * orig_cheb,struct chfs_inode * ip,struct chfs_full_dnode * fn,uint32_t orig_start,uint32_t orig_end)1075 chfs_gcollect_dnode(struct chfs_mount *chmp,
1076 struct chfs_eraseblock *orig_cheb, struct chfs_inode *ip,
1077 struct chfs_full_dnode *fn, uint32_t orig_start, uint32_t orig_end)
1078 {
1079 struct chfs_node_ref *nref;
1080 struct chfs_full_dnode *newfn;
1081 struct chfs_flash_data_node *fdnode;
1082 int ret = 0, retries = 0;
1083 uint32_t totlen;
1084 char *data = NULL;
1085 struct iovec vec;
1086 size_t retlen;
1087 dbg_gc("gcollect_dnode\n");
1088
1089 //TODO merge frags
1090
1091 KASSERT(orig_cheb->lnr == fn->nref->nref_lnr);
1092 totlen = chfs_nref_len(chmp, orig_cheb, fn->nref);
1093 data = kmem_alloc(totlen, KM_SLEEP);
1094
1095 /* Read the node from the flash. */
1096 ret = chfs_read_leb(chmp, fn->nref->nref_lnr, data, fn->nref->nref_offset,
1097 totlen, &retlen);
1098
1099 fdnode = (struct chfs_flash_data_node *)data;
1100 fdnode->version = htole64(++ip->chvc->highest_version);
1101 fdnode->node_crc = htole32(crc32(0, (uint8_t *)fdnode,
1102 sizeof(*fdnode) - 4));
1103
1104 vec.iov_base = (void *)data;
1105 vec.iov_len = totlen;
1106
1107 retry:
1108 /* Set the next block where we can write. */
1109 ret = chfs_reserve_space_gc(chmp, totlen);
1110 if (ret)
1111 goto out;
1112
1113 nref = chfs_alloc_node_ref(chmp->chm_nextblock);
1114 if (!nref) {
1115 ret = ENOMEM;
1116 goto out;
1117 }
1118
1119 mutex_enter(&chmp->chm_lock_sizes);
1120
1121 nref->nref_offset = chmp->chm_ebh->eb_size - chmp->chm_nextblock->free_size;
1122 KASSERT(nref->nref_offset % 4 == 0);
1123 chfs_change_size_free(chmp, chmp->chm_nextblock, -totlen);
1124
1125 /* Write it to the writebuffer. */
1126 ret = chfs_write_wbuf(chmp, &vec, 1, nref->nref_offset, &retlen);
1127 if (ret || retlen != totlen) {
1128 /* error during writing */
1129 chfs_err("error while writing out to the media\n");
1130 chfs_err("err: %d | size: %d | retlen : %zu\n",
1131 ret, totlen, retlen);
1132 chfs_change_size_dirty(chmp, chmp->chm_nextblock, totlen);
1133 if (retries) {
1134 ret = EIO;
1135 mutex_exit(&chmp->chm_lock_sizes);
1136 goto out;
1137 }
1138
1139 /* try again */
1140 retries++;
1141 mutex_exit(&chmp->chm_lock_sizes);
1142 goto retry;
1143 }
1144
1145 dbg_gc("new nref lnr: %u - offset: %u\n", nref->nref_lnr, nref->nref_offset);
1146
1147 chfs_change_size_used(chmp, &chmp->chm_blocks[nref->nref_lnr], totlen);
1148 mutex_exit(&chmp->chm_lock_sizes);
1149 KASSERT(chmp->chm_blocks[nref->nref_lnr].used_size <= chmp->chm_ebh->eb_size);
1150
1151 /* Set fields of the new node. */
1152 newfn = chfs_alloc_full_dnode();
1153 newfn->nref = nref;
1154 newfn->ofs = fn->ofs;
1155 newfn->size = fn->size;
1156 newfn->frags = 0;
1157
1158 mutex_enter(&chmp->chm_lock_vnocache);
1159 /* Remove every part of the old node. */
1160 chfs_remove_frags_of_node(chmp, &ip->fragtree, fn->nref);
1161 chfs_remove_and_obsolete(chmp, ip->chvc, fn->nref, &ip->chvc->dnode);
1162
1163 /* Add the new nref to inode. */
1164 chfs_add_full_dnode_to_inode(chmp, ip, newfn);
1165 chfs_add_node_to_list(chmp,
1166 ip->chvc, newfn->nref, &ip->chvc->dnode);
1167 mutex_exit(&chmp->chm_lock_vnocache);
1168
1169 out:
1170 kmem_free(data, totlen);
1171 return ret;
1172 }
1173