1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "bbpos.h"
5 #include "bkey_buf.h"
6 #include "btree_cache.h"
7 #include "btree_io.h"
8 #include "btree_iter.h"
9 #include "btree_locking.h"
10 #include "debug.h"
11 #include "errcode.h"
12 #include "error.h"
13 #include "journal.h"
14 #include "trace.h"
15
16 #include <linux/prefetch.h>
17 #include <linux/sched/mm.h>
18
19 #define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
20 do { \
21 if (shrinker_counter) \
22 bc->not_freed_##counter++; \
23 } while (0)
24
25 const char * const bch2_btree_node_flags[] = {
26 #define x(f) #f,
27 BTREE_FLAGS()
28 #undef x
29 NULL
30 };
31
bch2_recalc_btree_reserve(struct bch_fs * c)32 void bch2_recalc_btree_reserve(struct bch_fs *c)
33 {
34 unsigned i, reserve = 16;
35
36 if (!c->btree_roots_known[0].b)
37 reserve += 8;
38
39 for (i = 0; i < btree_id_nr_alive(c); i++) {
40 struct btree_root *r = bch2_btree_id_root(c, i);
41
42 if (r->b)
43 reserve += min_t(unsigned, 1, r->b->c.level) * 8;
44 }
45
46 c->btree_cache.reserve = reserve;
47 }
48
btree_cache_can_free(struct btree_cache * bc)49 static inline unsigned btree_cache_can_free(struct btree_cache *bc)
50 {
51 return max_t(int, 0, bc->used - bc->reserve);
52 }
53
btree_node_to_freedlist(struct btree_cache * bc,struct btree * b)54 static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
55 {
56 if (b->c.lock.readers)
57 list_move(&b->list, &bc->freed_pcpu);
58 else
59 list_move(&b->list, &bc->freed_nonpcpu);
60 }
61
btree_node_data_free(struct bch_fs * c,struct btree * b)62 static void btree_node_data_free(struct bch_fs *c, struct btree *b)
63 {
64 struct btree_cache *bc = &c->btree_cache;
65
66 EBUG_ON(btree_node_write_in_flight(b));
67
68 clear_btree_node_just_written(b);
69
70 kvfree(b->data);
71 b->data = NULL;
72 #ifdef __KERNEL__
73 kvfree(b->aux_data);
74 #else
75 munmap(b->aux_data, btree_aux_data_bytes(b));
76 #endif
77 b->aux_data = NULL;
78
79 bc->used--;
80
81 btree_node_to_freedlist(bc, b);
82 }
83
bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg * arg,const void * obj)84 static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
85 const void *obj)
86 {
87 const struct btree *b = obj;
88 const u64 *v = arg->key;
89
90 return b->hash_val == *v ? 0 : 1;
91 }
92
93 static const struct rhashtable_params bch_btree_cache_params = {
94 .head_offset = offsetof(struct btree, hash),
95 .key_offset = offsetof(struct btree, hash_val),
96 .key_len = sizeof(u64),
97 .obj_cmpfn = bch2_btree_cache_cmp_fn,
98 .automatic_shrinking = true,
99 };
100
btree_node_data_alloc(struct bch_fs * c,struct btree * b,gfp_t gfp)101 static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
102 {
103 BUG_ON(b->data || b->aux_data);
104
105 b->data = kvmalloc(btree_buf_bytes(b), gfp);
106 if (!b->data)
107 return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
108 #ifdef __KERNEL__
109 b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp);
110 #else
111 b->aux_data = mmap(NULL, btree_aux_data_bytes(b),
112 PROT_READ|PROT_WRITE|PROT_EXEC,
113 MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
114 if (b->aux_data == MAP_FAILED)
115 b->aux_data = NULL;
116 #endif
117 if (!b->aux_data) {
118 kvfree(b->data);
119 b->data = NULL;
120 return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
121 }
122
123 return 0;
124 }
125
__btree_node_mem_alloc(struct bch_fs * c,gfp_t gfp)126 static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
127 {
128 struct btree *b;
129
130 b = kzalloc(sizeof(struct btree), gfp);
131 if (!b)
132 return NULL;
133
134 bkey_btree_ptr_init(&b->key);
135 INIT_LIST_HEAD(&b->list);
136 INIT_LIST_HEAD(&b->write_blocked);
137 b->byte_order = ilog2(c->opts.btree_node_size);
138 return b;
139 }
140
__bch2_btree_node_mem_alloc(struct bch_fs * c)141 struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
142 {
143 struct btree_cache *bc = &c->btree_cache;
144 struct btree *b;
145
146 b = __btree_node_mem_alloc(c, GFP_KERNEL);
147 if (!b)
148 return NULL;
149
150 if (btree_node_data_alloc(c, b, GFP_KERNEL)) {
151 kfree(b);
152 return NULL;
153 }
154
155 bch2_btree_lock_init(&b->c, 0);
156
157 bc->used++;
158 list_add(&b->list, &bc->freeable);
159 return b;
160 }
161
162 /* Btree in memory cache - hash table */
163
bch2_btree_node_hash_remove(struct btree_cache * bc,struct btree * b)164 void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
165 {
166 int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
167
168 BUG_ON(ret);
169
170 /* Cause future lookups for this node to fail: */
171 b->hash_val = 0;
172
173 if (b->c.btree_id < BTREE_ID_NR)
174 --bc->used_by_btree[b->c.btree_id];
175 }
176
__bch2_btree_node_hash_insert(struct btree_cache * bc,struct btree * b)177 int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
178 {
179 BUG_ON(b->hash_val);
180 b->hash_val = btree_ptr_hash_val(&b->key);
181
182 int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash,
183 bch_btree_cache_params);
184 if (!ret && b->c.btree_id < BTREE_ID_NR)
185 bc->used_by_btree[b->c.btree_id]++;
186 return ret;
187 }
188
bch2_btree_node_hash_insert(struct btree_cache * bc,struct btree * b,unsigned level,enum btree_id id)189 int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
190 unsigned level, enum btree_id id)
191 {
192 int ret;
193
194 b->c.level = level;
195 b->c.btree_id = id;
196
197 mutex_lock(&bc->lock);
198 ret = __bch2_btree_node_hash_insert(bc, b);
199 if (!ret)
200 list_add_tail(&b->list, &bc->live);
201 mutex_unlock(&bc->lock);
202
203 return ret;
204 }
205
bch2_btree_node_update_key_early(struct btree_trans * trans,enum btree_id btree,unsigned level,struct bkey_s_c old,struct bkey_i * new)206 void bch2_btree_node_update_key_early(struct btree_trans *trans,
207 enum btree_id btree, unsigned level,
208 struct bkey_s_c old, struct bkey_i *new)
209 {
210 struct bch_fs *c = trans->c;
211 struct btree *b;
212 struct bkey_buf tmp;
213 int ret;
214
215 bch2_bkey_buf_init(&tmp);
216 bch2_bkey_buf_reassemble(&tmp, c, old);
217
218 b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true);
219 if (!IS_ERR_OR_NULL(b)) {
220 mutex_lock(&c->btree_cache.lock);
221
222 bch2_btree_node_hash_remove(&c->btree_cache, b);
223
224 bkey_copy(&b->key, new);
225 ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
226 BUG_ON(ret);
227
228 mutex_unlock(&c->btree_cache.lock);
229 six_unlock_read(&b->c.lock);
230 }
231
232 bch2_bkey_buf_exit(&tmp, c);
233 }
234
235 __flatten
btree_cache_find(struct btree_cache * bc,const struct bkey_i * k)236 static inline struct btree *btree_cache_find(struct btree_cache *bc,
237 const struct bkey_i *k)
238 {
239 u64 v = btree_ptr_hash_val(k);
240
241 return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params);
242 }
243
244 /*
245 * this version is for btree nodes that have already been freed (we're not
246 * reaping a real btree node)
247 */
__btree_node_reclaim(struct bch_fs * c,struct btree * b,bool flush,bool shrinker_counter)248 static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
249 {
250 struct btree_cache *bc = &c->btree_cache;
251 int ret = 0;
252
253 lockdep_assert_held(&bc->lock);
254
255 struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
256
257 u64 mask = b->c.level
258 ? bc->pinned_nodes_interior_mask
259 : bc->pinned_nodes_leaf_mask;
260
261 if ((mask & BIT_ULL(b->c.btree_id)) &&
262 bbpos_cmp(bc->pinned_nodes_start, pos) < 0 &&
263 bbpos_cmp(bc->pinned_nodes_end, pos) >= 0)
264 return -BCH_ERR_ENOMEM_btree_node_reclaim;
265
266 wait_on_io:
267 if (b->flags & ((1U << BTREE_NODE_dirty)|
268 (1U << BTREE_NODE_read_in_flight)|
269 (1U << BTREE_NODE_write_in_flight))) {
270 if (!flush) {
271 if (btree_node_dirty(b))
272 BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
273 else if (btree_node_read_in_flight(b))
274 BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
275 else if (btree_node_write_in_flight(b))
276 BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
277 return -BCH_ERR_ENOMEM_btree_node_reclaim;
278 }
279
280 /* XXX: waiting on IO with btree cache lock held */
281 bch2_btree_node_wait_on_read(b);
282 bch2_btree_node_wait_on_write(b);
283 }
284
285 if (!six_trylock_intent(&b->c.lock)) {
286 BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
287 return -BCH_ERR_ENOMEM_btree_node_reclaim;
288 }
289
290 if (!six_trylock_write(&b->c.lock)) {
291 BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
292 goto out_unlock_intent;
293 }
294
295 /* recheck under lock */
296 if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
297 (1U << BTREE_NODE_write_in_flight))) {
298 if (!flush) {
299 if (btree_node_read_in_flight(b))
300 BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
301 else if (btree_node_write_in_flight(b))
302 BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
303 goto out_unlock;
304 }
305 six_unlock_write(&b->c.lock);
306 six_unlock_intent(&b->c.lock);
307 goto wait_on_io;
308 }
309
310 if (btree_node_noevict(b)) {
311 BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
312 goto out_unlock;
313 }
314 if (btree_node_write_blocked(b)) {
315 BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
316 goto out_unlock;
317 }
318 if (btree_node_will_make_reachable(b)) {
319 BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
320 goto out_unlock;
321 }
322
323 if (btree_node_dirty(b)) {
324 if (!flush) {
325 BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
326 goto out_unlock;
327 }
328 /*
329 * Using the underscore version because we don't want to compact
330 * bsets after the write, since this node is about to be evicted
331 * - unless btree verify mode is enabled, since it runs out of
332 * the post write cleanup:
333 */
334 if (bch2_verify_btree_ondisk)
335 bch2_btree_node_write(c, b, SIX_LOCK_intent,
336 BTREE_WRITE_cache_reclaim);
337 else
338 __bch2_btree_node_write(c, b,
339 BTREE_WRITE_cache_reclaim);
340
341 six_unlock_write(&b->c.lock);
342 six_unlock_intent(&b->c.lock);
343 goto wait_on_io;
344 }
345 out:
346 if (b->hash_val && !ret)
347 trace_and_count(c, btree_cache_reap, c, b);
348 return ret;
349 out_unlock:
350 six_unlock_write(&b->c.lock);
351 out_unlock_intent:
352 six_unlock_intent(&b->c.lock);
353 ret = -BCH_ERR_ENOMEM_btree_node_reclaim;
354 goto out;
355 }
356
btree_node_reclaim(struct bch_fs * c,struct btree * b,bool shrinker_counter)357 static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
358 {
359 return __btree_node_reclaim(c, b, false, shrinker_counter);
360 }
361
btree_node_write_and_reclaim(struct bch_fs * c,struct btree * b)362 static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
363 {
364 return __btree_node_reclaim(c, b, true, false);
365 }
366
bch2_btree_cache_scan(struct shrinker * shrink,struct shrink_control * sc)367 static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
368 struct shrink_control *sc)
369 {
370 struct bch_fs *c = shrink->private_data;
371 struct btree_cache *bc = &c->btree_cache;
372 struct btree *b, *t;
373 unsigned long nr = sc->nr_to_scan;
374 unsigned long can_free = 0;
375 unsigned long freed = 0;
376 unsigned long touched = 0;
377 unsigned i, flags;
378 unsigned long ret = SHRINK_STOP;
379 bool trigger_writes = atomic_read(&bc->dirty) + nr >=
380 bc->used * 3 / 4;
381
382 if (bch2_btree_shrinker_disabled)
383 return SHRINK_STOP;
384
385 mutex_lock(&bc->lock);
386 flags = memalloc_nofs_save();
387
388 /*
389 * It's _really_ critical that we don't free too many btree nodes - we
390 * have to always leave ourselves a reserve. The reserve is how we
391 * guarantee that allocating memory for a new btree node can always
392 * succeed, so that inserting keys into the btree can always succeed and
393 * IO can always make forward progress:
394 */
395 can_free = btree_cache_can_free(bc);
396 nr = min_t(unsigned long, nr, can_free);
397
398 i = 0;
399 list_for_each_entry_safe(b, t, &bc->freeable, list) {
400 /*
401 * Leave a few nodes on the freeable list, so that a btree split
402 * won't have to hit the system allocator:
403 */
404 if (++i <= 3)
405 continue;
406
407 touched++;
408
409 if (touched >= nr)
410 goto out;
411
412 if (!btree_node_reclaim(c, b, true)) {
413 btree_node_data_free(c, b);
414 six_unlock_write(&b->c.lock);
415 six_unlock_intent(&b->c.lock);
416 freed++;
417 bc->freed++;
418 }
419 }
420 restart:
421 list_for_each_entry_safe(b, t, &bc->live, list) {
422 touched++;
423
424 if (btree_node_accessed(b)) {
425 clear_btree_node_accessed(b);
426 bc->not_freed_access_bit++;
427 } else if (!btree_node_reclaim(c, b, true)) {
428 freed++;
429 btree_node_data_free(c, b);
430 bc->freed++;
431
432 bch2_btree_node_hash_remove(bc, b);
433 six_unlock_write(&b->c.lock);
434 six_unlock_intent(&b->c.lock);
435
436 if (freed == nr)
437 goto out_rotate;
438 } else if (trigger_writes &&
439 btree_node_dirty(b) &&
440 !btree_node_will_make_reachable(b) &&
441 !btree_node_write_blocked(b) &&
442 six_trylock_read(&b->c.lock)) {
443 list_move(&bc->live, &b->list);
444 mutex_unlock(&bc->lock);
445 __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
446 six_unlock_read(&b->c.lock);
447 if (touched >= nr)
448 goto out_nounlock;
449 mutex_lock(&bc->lock);
450 goto restart;
451 }
452
453 if (touched >= nr)
454 break;
455 }
456 out_rotate:
457 if (&t->list != &bc->live)
458 list_move_tail(&bc->live, &t->list);
459 out:
460 mutex_unlock(&bc->lock);
461 out_nounlock:
462 ret = freed;
463 memalloc_nofs_restore(flags);
464 trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret);
465 return ret;
466 }
467
bch2_btree_cache_count(struct shrinker * shrink,struct shrink_control * sc)468 static unsigned long bch2_btree_cache_count(struct shrinker *shrink,
469 struct shrink_control *sc)
470 {
471 struct bch_fs *c = shrink->private_data;
472 struct btree_cache *bc = &c->btree_cache;
473
474 if (bch2_btree_shrinker_disabled)
475 return 0;
476
477 return btree_cache_can_free(bc);
478 }
479
bch2_fs_btree_cache_exit(struct bch_fs * c)480 void bch2_fs_btree_cache_exit(struct bch_fs *c)
481 {
482 struct btree_cache *bc = &c->btree_cache;
483 struct btree *b;
484 unsigned i, flags;
485
486 shrinker_free(bc->shrink);
487
488 /* vfree() can allocate memory: */
489 flags = memalloc_nofs_save();
490 mutex_lock(&bc->lock);
491
492 if (c->verify_data)
493 list_move(&c->verify_data->list, &bc->live);
494
495 kvfree(c->verify_ondisk);
496
497 for (i = 0; i < btree_id_nr_alive(c); i++) {
498 struct btree_root *r = bch2_btree_id_root(c, i);
499
500 if (r->b)
501 list_add(&r->b->list, &bc->live);
502 }
503
504 list_splice(&bc->freeable, &bc->live);
505
506 while (!list_empty(&bc->live)) {
507 b = list_first_entry(&bc->live, struct btree, list);
508
509 BUG_ON(btree_node_read_in_flight(b) ||
510 btree_node_write_in_flight(b));
511
512 btree_node_data_free(c, b);
513 }
514
515 BUG_ON(!bch2_journal_error(&c->journal) &&
516 atomic_read(&c->btree_cache.dirty));
517
518 list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
519
520 while (!list_empty(&bc->freed_nonpcpu)) {
521 b = list_first_entry(&bc->freed_nonpcpu, struct btree, list);
522 list_del(&b->list);
523 six_lock_exit(&b->c.lock);
524 kfree(b);
525 }
526
527 mutex_unlock(&bc->lock);
528 memalloc_nofs_restore(flags);
529
530 if (bc->table_init_done)
531 rhashtable_destroy(&bc->table);
532 }
533
bch2_fs_btree_cache_init(struct bch_fs * c)534 int bch2_fs_btree_cache_init(struct bch_fs *c)
535 {
536 struct btree_cache *bc = &c->btree_cache;
537 struct shrinker *shrink;
538 unsigned i;
539 int ret = 0;
540
541 ret = rhashtable_init(&bc->table, &bch_btree_cache_params);
542 if (ret)
543 goto err;
544
545 bc->table_init_done = true;
546
547 bch2_recalc_btree_reserve(c);
548
549 for (i = 0; i < bc->reserve; i++)
550 if (!__bch2_btree_node_mem_alloc(c))
551 goto err;
552
553 list_splice_init(&bc->live, &bc->freeable);
554
555 mutex_init(&c->verify_lock);
556
557 shrink = shrinker_alloc(0, "%s-btree_cache", c->name);
558 if (!shrink)
559 goto err;
560 bc->shrink = shrink;
561 shrink->count_objects = bch2_btree_cache_count;
562 shrink->scan_objects = bch2_btree_cache_scan;
563 shrink->seeks = 4;
564 shrink->private_data = c;
565 shrinker_register(shrink);
566
567 return 0;
568 err:
569 return -BCH_ERR_ENOMEM_fs_btree_cache_init;
570 }
571
bch2_fs_btree_cache_init_early(struct btree_cache * bc)572 void bch2_fs_btree_cache_init_early(struct btree_cache *bc)
573 {
574 mutex_init(&bc->lock);
575 INIT_LIST_HEAD(&bc->live);
576 INIT_LIST_HEAD(&bc->freeable);
577 INIT_LIST_HEAD(&bc->freed_pcpu);
578 INIT_LIST_HEAD(&bc->freed_nonpcpu);
579 }
580
581 /*
582 * We can only have one thread cannibalizing other cached btree nodes at a time,
583 * or we'll deadlock. We use an open coded mutex to ensure that, which a
584 * cannibalize_bucket() will take. This means every time we unlock the root of
585 * the btree, we need to release this lock if we have it held.
586 */
bch2_btree_cache_cannibalize_unlock(struct btree_trans * trans)587 void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans)
588 {
589 struct bch_fs *c = trans->c;
590 struct btree_cache *bc = &c->btree_cache;
591
592 if (bc->alloc_lock == current) {
593 trace_and_count(c, btree_cache_cannibalize_unlock, trans);
594 bc->alloc_lock = NULL;
595 closure_wake_up(&bc->alloc_wait);
596 }
597 }
598
bch2_btree_cache_cannibalize_lock(struct btree_trans * trans,struct closure * cl)599 int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure *cl)
600 {
601 struct bch_fs *c = trans->c;
602 struct btree_cache *bc = &c->btree_cache;
603 struct task_struct *old;
604
605 old = cmpxchg(&bc->alloc_lock, NULL, current);
606 if (old == NULL || old == current)
607 goto success;
608
609 if (!cl) {
610 trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
611 return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock;
612 }
613
614 closure_wait(&bc->alloc_wait, cl);
615
616 /* Try again, after adding ourselves to waitlist */
617 old = cmpxchg(&bc->alloc_lock, NULL, current);
618 if (old == NULL || old == current) {
619 /* We raced */
620 closure_wake_up(&bc->alloc_wait);
621 goto success;
622 }
623
624 trace_and_count(c, btree_cache_cannibalize_lock_fail, trans);
625 return -BCH_ERR_btree_cache_cannibalize_lock_blocked;
626
627 success:
628 trace_and_count(c, btree_cache_cannibalize_lock, trans);
629 return 0;
630 }
631
btree_node_cannibalize(struct bch_fs * c)632 static struct btree *btree_node_cannibalize(struct bch_fs *c)
633 {
634 struct btree_cache *bc = &c->btree_cache;
635 struct btree *b;
636
637 list_for_each_entry_reverse(b, &bc->live, list)
638 if (!btree_node_reclaim(c, b, false))
639 return b;
640
641 while (1) {
642 list_for_each_entry_reverse(b, &bc->live, list)
643 if (!btree_node_write_and_reclaim(c, b))
644 return b;
645
646 /*
647 * Rare case: all nodes were intent-locked.
648 * Just busy-wait.
649 */
650 WARN_ONCE(1, "btree cache cannibalize failed\n");
651 cond_resched();
652 }
653 }
654
bch2_btree_node_mem_alloc(struct btree_trans * trans,bool pcpu_read_locks)655 struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_read_locks)
656 {
657 struct bch_fs *c = trans->c;
658 struct btree_cache *bc = &c->btree_cache;
659 struct list_head *freed = pcpu_read_locks
660 ? &bc->freed_pcpu
661 : &bc->freed_nonpcpu;
662 struct btree *b, *b2;
663 u64 start_time = local_clock();
664 unsigned flags;
665
666 flags = memalloc_nofs_save();
667 mutex_lock(&bc->lock);
668
669 /*
670 * We never free struct btree itself, just the memory that holds the on
671 * disk node. Check the freed list before allocating a new one:
672 */
673 list_for_each_entry(b, freed, list)
674 if (!btree_node_reclaim(c, b, false)) {
675 list_del_init(&b->list);
676 goto got_node;
677 }
678
679 b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN);
680 if (!b) {
681 mutex_unlock(&bc->lock);
682 bch2_trans_unlock(trans);
683 b = __btree_node_mem_alloc(c, GFP_KERNEL);
684 if (!b)
685 goto err;
686 mutex_lock(&bc->lock);
687 }
688
689 bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0);
690
691 BUG_ON(!six_trylock_intent(&b->c.lock));
692 BUG_ON(!six_trylock_write(&b->c.lock));
693 got_node:
694
695 /*
696 * btree_free() doesn't free memory; it sticks the node on the end of
697 * the list. Check if there's any freed nodes there:
698 */
699 list_for_each_entry(b2, &bc->freeable, list)
700 if (!btree_node_reclaim(c, b2, false)) {
701 swap(b->data, b2->data);
702 swap(b->aux_data, b2->aux_data);
703 btree_node_to_freedlist(bc, b2);
704 six_unlock_write(&b2->c.lock);
705 six_unlock_intent(&b2->c.lock);
706 goto got_mem;
707 }
708
709 mutex_unlock(&bc->lock);
710
711 if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) {
712 bch2_trans_unlock(trans);
713 if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN))
714 goto err;
715 }
716
717 mutex_lock(&bc->lock);
718 bc->used++;
719 got_mem:
720 mutex_unlock(&bc->lock);
721
722 BUG_ON(btree_node_hashed(b));
723 BUG_ON(btree_node_dirty(b));
724 BUG_ON(btree_node_write_in_flight(b));
725 out:
726 b->flags = 0;
727 b->written = 0;
728 b->nsets = 0;
729 b->sib_u64s[0] = 0;
730 b->sib_u64s[1] = 0;
731 b->whiteout_u64s = 0;
732 bch2_btree_keys_init(b);
733 set_btree_node_accessed(b);
734
735 bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
736 start_time);
737
738 memalloc_nofs_restore(flags);
739 return b;
740 err:
741 mutex_lock(&bc->lock);
742
743 /* Try to cannibalize another cached btree node: */
744 if (bc->alloc_lock == current) {
745 b2 = btree_node_cannibalize(c);
746 clear_btree_node_just_written(b2);
747 bch2_btree_node_hash_remove(bc, b2);
748
749 if (b) {
750 swap(b->data, b2->data);
751 swap(b->aux_data, b2->aux_data);
752 btree_node_to_freedlist(bc, b2);
753 six_unlock_write(&b2->c.lock);
754 six_unlock_intent(&b2->c.lock);
755 } else {
756 b = b2;
757 list_del_init(&b->list);
758 }
759
760 mutex_unlock(&bc->lock);
761
762 trace_and_count(c, btree_cache_cannibalize, trans);
763 goto out;
764 }
765
766 mutex_unlock(&bc->lock);
767 memalloc_nofs_restore(flags);
768 return ERR_PTR(-BCH_ERR_ENOMEM_btree_node_mem_alloc);
769 }
770
771 /* Slowpath, don't want it inlined into btree_iter_traverse() */
bch2_btree_node_fill(struct btree_trans * trans,struct btree_path * path,const struct bkey_i * k,enum btree_id btree_id,unsigned level,enum six_lock_type lock_type,bool sync)772 static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
773 struct btree_path *path,
774 const struct bkey_i *k,
775 enum btree_id btree_id,
776 unsigned level,
777 enum six_lock_type lock_type,
778 bool sync)
779 {
780 struct bch_fs *c = trans->c;
781 struct btree_cache *bc = &c->btree_cache;
782 struct btree *b;
783
784 if (unlikely(level >= BTREE_MAX_DEPTH)) {
785 int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u",
786 level, BTREE_MAX_DEPTH);
787 return ERR_PTR(ret);
788 }
789
790 if (unlikely(!bkey_is_btree_ptr(&k->k))) {
791 struct printbuf buf = PRINTBUF;
792 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
793
794 int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf);
795 printbuf_exit(&buf);
796 return ERR_PTR(ret);
797 }
798
799 if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) {
800 struct printbuf buf = PRINTBUF;
801 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
802
803 int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf);
804 printbuf_exit(&buf);
805 return ERR_PTR(ret);
806 }
807
808 /*
809 * Parent node must be locked, else we could read in a btree node that's
810 * been freed:
811 */
812 if (path && !bch2_btree_node_relock(trans, path, level + 1)) {
813 trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path);
814 return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock));
815 }
816
817 b = bch2_btree_node_mem_alloc(trans, level != 0);
818
819 if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) {
820 if (!path)
821 return b;
822
823 trans->memory_allocation_failure = true;
824 trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path);
825 return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail));
826 }
827
828 if (IS_ERR(b))
829 return b;
830
831 bkey_copy(&b->key, k);
832 if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
833 /* raced with another fill: */
834
835 /* mark as unhashed... */
836 b->hash_val = 0;
837
838 mutex_lock(&bc->lock);
839 list_add(&b->list, &bc->freeable);
840 mutex_unlock(&bc->lock);
841
842 six_unlock_write(&b->c.lock);
843 six_unlock_intent(&b->c.lock);
844 return NULL;
845 }
846
847 set_btree_node_read_in_flight(b);
848 six_unlock_write(&b->c.lock);
849
850 if (path) {
851 u32 seq = six_lock_seq(&b->c.lock);
852
853 /* Unlock before doing IO: */
854 six_unlock_intent(&b->c.lock);
855 bch2_trans_unlock_noassert(trans);
856
857 bch2_btree_node_read(trans, b, sync);
858
859 if (!sync)
860 return NULL;
861
862 if (!six_relock_type(&b->c.lock, lock_type, seq))
863 b = NULL;
864 } else {
865 bch2_btree_node_read(trans, b, sync);
866 if (lock_type == SIX_LOCK_read)
867 six_lock_downgrade(&b->c.lock);
868 }
869
870 return b;
871 }
872
btree_bad_header(struct bch_fs * c,struct btree * b)873 static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
874 {
875 struct printbuf buf = PRINTBUF;
876
877 if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations)
878 return;
879
880 prt_printf(&buf,
881 "btree node header doesn't match ptr\n"
882 "btree %s level %u\n"
883 "ptr: ",
884 bch2_btree_id_str(b->c.btree_id), b->c.level);
885 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
886
887 prt_printf(&buf, "\nheader: btree %s level %llu\n"
888 "min ",
889 bch2_btree_id_str(BTREE_NODE_ID(b->data)),
890 BTREE_NODE_LEVEL(b->data));
891 bch2_bpos_to_text(&buf, b->data->min_key);
892
893 prt_printf(&buf, "\nmax ");
894 bch2_bpos_to_text(&buf, b->data->max_key);
895
896 bch2_fs_topology_error(c, "%s", buf.buf);
897
898 printbuf_exit(&buf);
899 }
900
btree_check_header(struct bch_fs * c,struct btree * b)901 static inline void btree_check_header(struct bch_fs *c, struct btree *b)
902 {
903 if (b->c.btree_id != BTREE_NODE_ID(b->data) ||
904 b->c.level != BTREE_NODE_LEVEL(b->data) ||
905 !bpos_eq(b->data->max_key, b->key.k.p) ||
906 (b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
907 !bpos_eq(b->data->min_key,
908 bkey_i_to_btree_ptr_v2(&b->key)->v.min_key)))
909 btree_bad_header(c, b);
910 }
911
__bch2_btree_node_get(struct btree_trans * trans,struct btree_path * path,const struct bkey_i * k,unsigned level,enum six_lock_type lock_type,unsigned long trace_ip)912 static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path,
913 const struct bkey_i *k, unsigned level,
914 enum six_lock_type lock_type,
915 unsigned long trace_ip)
916 {
917 struct bch_fs *c = trans->c;
918 struct btree_cache *bc = &c->btree_cache;
919 struct btree *b;
920 bool need_relock = false;
921 int ret;
922
923 EBUG_ON(level >= BTREE_MAX_DEPTH);
924 retry:
925 b = btree_cache_find(bc, k);
926 if (unlikely(!b)) {
927 /*
928 * We must have the parent locked to call bch2_btree_node_fill(),
929 * else we could read in a btree node from disk that's been
930 * freed:
931 */
932 b = bch2_btree_node_fill(trans, path, k, path->btree_id,
933 level, lock_type, true);
934 need_relock = true;
935
936 /* We raced and found the btree node in the cache */
937 if (!b)
938 goto retry;
939
940 if (IS_ERR(b))
941 return b;
942 } else {
943 if (btree_node_read_locked(path, level + 1))
944 btree_node_unlock(trans, path, level + 1);
945
946 ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip);
947 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
948 return ERR_PTR(ret);
949
950 BUG_ON(ret);
951
952 if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
953 b->c.level != level ||
954 race_fault())) {
955 six_unlock_type(&b->c.lock, lock_type);
956 if (bch2_btree_node_relock(trans, path, level + 1))
957 goto retry;
958
959 trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path);
960 return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused));
961 }
962
963 /* avoid atomic set bit if it's not needed: */
964 if (!btree_node_accessed(b))
965 set_btree_node_accessed(b);
966 }
967
968 if (unlikely(btree_node_read_in_flight(b))) {
969 u32 seq = six_lock_seq(&b->c.lock);
970
971 six_unlock_type(&b->c.lock, lock_type);
972 bch2_trans_unlock(trans);
973 need_relock = true;
974
975 bch2_btree_node_wait_on_read(b);
976
977 /*
978 * should_be_locked is not set on this path yet, so we need to
979 * relock it specifically:
980 */
981 if (!six_relock_type(&b->c.lock, lock_type, seq))
982 goto retry;
983 }
984
985 if (unlikely(need_relock)) {
986 ret = bch2_trans_relock(trans) ?:
987 bch2_btree_path_relock_intent(trans, path);
988 if (ret) {
989 six_unlock_type(&b->c.lock, lock_type);
990 return ERR_PTR(ret);
991 }
992 }
993
994 prefetch(b->aux_data);
995
996 for_each_bset(b, t) {
997 void *p = (u64 *) b->aux_data + t->aux_data_offset;
998
999 prefetch(p + L1_CACHE_BYTES * 0);
1000 prefetch(p + L1_CACHE_BYTES * 1);
1001 prefetch(p + L1_CACHE_BYTES * 2);
1002 }
1003
1004 if (unlikely(btree_node_read_error(b))) {
1005 six_unlock_type(&b->c.lock, lock_type);
1006 return ERR_PTR(-BCH_ERR_btree_node_read_error);
1007 }
1008
1009 EBUG_ON(b->c.btree_id != path->btree_id);
1010 EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
1011 btree_check_header(c, b);
1012
1013 return b;
1014 }
1015
1016 /**
1017 * bch2_btree_node_get - find a btree node in the cache and lock it, reading it
1018 * in from disk if necessary.
1019 *
1020 * @trans: btree transaction object
1021 * @path: btree_path being traversed
1022 * @k: pointer to btree node (generally KEY_TYPE_btree_ptr_v2)
1023 * @level: level of btree node being looked up (0 == leaf node)
1024 * @lock_type: SIX_LOCK_read or SIX_LOCK_intent
1025 * @trace_ip: ip of caller of btree iterator code (i.e. caller of bch2_btree_iter_peek())
1026 *
1027 * The btree node will have either a read or a write lock held, depending on
1028 * the @write parameter.
1029 *
1030 * Returns: btree node or ERR_PTR()
1031 */
bch2_btree_node_get(struct btree_trans * trans,struct btree_path * path,const struct bkey_i * k,unsigned level,enum six_lock_type lock_type,unsigned long trace_ip)1032 struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path,
1033 const struct bkey_i *k, unsigned level,
1034 enum six_lock_type lock_type,
1035 unsigned long trace_ip)
1036 {
1037 struct bch_fs *c = trans->c;
1038 struct btree *b;
1039 int ret;
1040
1041 EBUG_ON(level >= BTREE_MAX_DEPTH);
1042
1043 b = btree_node_mem_ptr(k);
1044
1045 /*
1046 * Check b->hash_val _before_ calling btree_node_lock() - this might not
1047 * be the node we want anymore, and trying to lock the wrong node could
1048 * cause an unneccessary transaction restart:
1049 */
1050 if (unlikely(!c->opts.btree_node_mem_ptr_optimization ||
1051 !b ||
1052 b->hash_val != btree_ptr_hash_val(k)))
1053 return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
1054
1055 if (btree_node_read_locked(path, level + 1))
1056 btree_node_unlock(trans, path, level + 1);
1057
1058 ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip);
1059 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1060 return ERR_PTR(ret);
1061
1062 BUG_ON(ret);
1063
1064 if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
1065 b->c.level != level ||
1066 race_fault())) {
1067 six_unlock_type(&b->c.lock, lock_type);
1068 if (bch2_btree_node_relock(trans, path, level + 1))
1069 return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
1070
1071 trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path);
1072 return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused));
1073 }
1074
1075 if (unlikely(btree_node_read_in_flight(b))) {
1076 six_unlock_type(&b->c.lock, lock_type);
1077 return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip);
1078 }
1079
1080 prefetch(b->aux_data);
1081
1082 for_each_bset(b, t) {
1083 void *p = (u64 *) b->aux_data + t->aux_data_offset;
1084
1085 prefetch(p + L1_CACHE_BYTES * 0);
1086 prefetch(p + L1_CACHE_BYTES * 1);
1087 prefetch(p + L1_CACHE_BYTES * 2);
1088 }
1089
1090 /* avoid atomic set bit if it's not needed: */
1091 if (!btree_node_accessed(b))
1092 set_btree_node_accessed(b);
1093
1094 if (unlikely(btree_node_read_error(b))) {
1095 six_unlock_type(&b->c.lock, lock_type);
1096 return ERR_PTR(-BCH_ERR_btree_node_read_error);
1097 }
1098
1099 EBUG_ON(b->c.btree_id != path->btree_id);
1100 EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
1101 btree_check_header(c, b);
1102
1103 return b;
1104 }
1105
bch2_btree_node_get_noiter(struct btree_trans * trans,const struct bkey_i * k,enum btree_id btree_id,unsigned level,bool nofill)1106 struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
1107 const struct bkey_i *k,
1108 enum btree_id btree_id,
1109 unsigned level,
1110 bool nofill)
1111 {
1112 struct bch_fs *c = trans->c;
1113 struct btree_cache *bc = &c->btree_cache;
1114 struct btree *b;
1115 int ret;
1116
1117 EBUG_ON(level >= BTREE_MAX_DEPTH);
1118
1119 if (c->opts.btree_node_mem_ptr_optimization) {
1120 b = btree_node_mem_ptr(k);
1121 if (b)
1122 goto lock_node;
1123 }
1124 retry:
1125 b = btree_cache_find(bc, k);
1126 if (unlikely(!b)) {
1127 if (nofill)
1128 goto out;
1129
1130 b = bch2_btree_node_fill(trans, NULL, k, btree_id,
1131 level, SIX_LOCK_read, true);
1132
1133 /* We raced and found the btree node in the cache */
1134 if (!b)
1135 goto retry;
1136
1137 if (IS_ERR(b) &&
1138 !bch2_btree_cache_cannibalize_lock(trans, NULL))
1139 goto retry;
1140
1141 if (IS_ERR(b))
1142 goto out;
1143 } else {
1144 lock_node:
1145 ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read, _THIS_IP_);
1146 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1147 return ERR_PTR(ret);
1148
1149 BUG_ON(ret);
1150
1151 if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
1152 b->c.btree_id != btree_id ||
1153 b->c.level != level)) {
1154 six_unlock_read(&b->c.lock);
1155 goto retry;
1156 }
1157 }
1158
1159 /* XXX: waiting on IO with btree locks held: */
1160 __bch2_btree_node_wait_on_read(b);
1161
1162 prefetch(b->aux_data);
1163
1164 for_each_bset(b, t) {
1165 void *p = (u64 *) b->aux_data + t->aux_data_offset;
1166
1167 prefetch(p + L1_CACHE_BYTES * 0);
1168 prefetch(p + L1_CACHE_BYTES * 1);
1169 prefetch(p + L1_CACHE_BYTES * 2);
1170 }
1171
1172 /* avoid atomic set bit if it's not needed: */
1173 if (!btree_node_accessed(b))
1174 set_btree_node_accessed(b);
1175
1176 if (unlikely(btree_node_read_error(b))) {
1177 six_unlock_read(&b->c.lock);
1178 b = ERR_PTR(-BCH_ERR_btree_node_read_error);
1179 goto out;
1180 }
1181
1182 EBUG_ON(b->c.btree_id != btree_id);
1183 EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
1184 btree_check_header(c, b);
1185 out:
1186 bch2_btree_cache_cannibalize_unlock(trans);
1187 return b;
1188 }
1189
bch2_btree_node_prefetch(struct btree_trans * trans,struct btree_path * path,const struct bkey_i * k,enum btree_id btree_id,unsigned level)1190 int bch2_btree_node_prefetch(struct btree_trans *trans,
1191 struct btree_path *path,
1192 const struct bkey_i *k,
1193 enum btree_id btree_id, unsigned level)
1194 {
1195 struct bch_fs *c = trans->c;
1196 struct btree_cache *bc = &c->btree_cache;
1197
1198 BUG_ON(path && !btree_node_locked(path, level + 1));
1199 BUG_ON(level >= BTREE_MAX_DEPTH);
1200
1201 struct btree *b = btree_cache_find(bc, k);
1202 if (b)
1203 return 0;
1204
1205 b = bch2_btree_node_fill(trans, path, k, btree_id,
1206 level, SIX_LOCK_read, false);
1207 if (!IS_ERR_OR_NULL(b))
1208 six_unlock_read(&b->c.lock);
1209 return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
1210 }
1211
bch2_btree_node_evict(struct btree_trans * trans,const struct bkey_i * k)1212 void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
1213 {
1214 struct bch_fs *c = trans->c;
1215 struct btree_cache *bc = &c->btree_cache;
1216 struct btree *b;
1217
1218 b = btree_cache_find(bc, k);
1219 if (!b)
1220 return;
1221
1222 BUG_ON(b == btree_node_root(trans->c, b));
1223 wait_on_io:
1224 /* not allowed to wait on io with btree locks held: */
1225
1226 /* XXX we're called from btree_gc which will be holding other btree
1227 * nodes locked
1228 */
1229 __bch2_btree_node_wait_on_read(b);
1230 __bch2_btree_node_wait_on_write(b);
1231
1232 btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
1233 btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
1234 if (unlikely(b->hash_val != btree_ptr_hash_val(k)))
1235 goto out;
1236
1237 if (btree_node_dirty(b)) {
1238 __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
1239 six_unlock_write(&b->c.lock);
1240 six_unlock_intent(&b->c.lock);
1241 goto wait_on_io;
1242 }
1243
1244 BUG_ON(btree_node_dirty(b));
1245
1246 mutex_lock(&bc->lock);
1247 btree_node_data_free(c, b);
1248 bch2_btree_node_hash_remove(bc, b);
1249 mutex_unlock(&bc->lock);
1250 out:
1251 six_unlock_write(&b->c.lock);
1252 six_unlock_intent(&b->c.lock);
1253 }
1254
bch2_btree_id_str(enum btree_id btree)1255 const char *bch2_btree_id_str(enum btree_id btree)
1256 {
1257 return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)";
1258 }
1259
bch2_btree_pos_to_text(struct printbuf * out,struct bch_fs * c,const struct btree * b)1260 void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
1261 {
1262 prt_printf(out, "%s level %u/%u\n ",
1263 bch2_btree_id_str(b->c.btree_id),
1264 b->c.level,
1265 bch2_btree_id_root(c, b->c.btree_id)->level);
1266 bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
1267 }
1268
bch2_btree_node_to_text(struct printbuf * out,struct bch_fs * c,const struct btree * b)1269 void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
1270 {
1271 struct bset_stats stats;
1272
1273 memset(&stats, 0, sizeof(stats));
1274
1275 bch2_btree_keys_stats(b, &stats);
1276
1277 prt_printf(out, "l %u ", b->c.level);
1278 bch2_bpos_to_text(out, b->data->min_key);
1279 prt_printf(out, " - ");
1280 bch2_bpos_to_text(out, b->data->max_key);
1281 prt_printf(out, ":\n"
1282 " ptrs: ");
1283 bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key));
1284 prt_newline(out);
1285
1286 prt_printf(out,
1287 " format: ");
1288 bch2_bkey_format_to_text(out, &b->format);
1289
1290 prt_printf(out,
1291 " unpack fn len: %u\n"
1292 " bytes used %zu/%zu (%zu%% full)\n"
1293 " sib u64s: %u, %u (merge threshold %u)\n"
1294 " nr packed keys %u\n"
1295 " nr unpacked keys %u\n"
1296 " floats %zu\n"
1297 " failed unpacked %zu\n",
1298 b->unpack_fn_len,
1299 b->nr.live_u64s * sizeof(u64),
1300 btree_buf_bytes(b) - sizeof(struct btree_node),
1301 b->nr.live_u64s * 100 / btree_max_u64s(c),
1302 b->sib_u64s[0],
1303 b->sib_u64s[1],
1304 c->btree_foreground_merge_threshold,
1305 b->nr.packed_keys,
1306 b->nr.unpacked_keys,
1307 stats.floats,
1308 stats.failed);
1309 }
1310
prt_btree_cache_line(struct printbuf * out,const struct bch_fs * c,const char * label,unsigned nr)1311 static void prt_btree_cache_line(struct printbuf *out, const struct bch_fs *c,
1312 const char *label, unsigned nr)
1313 {
1314 prt_printf(out, "%s\t", label);
1315 prt_human_readable_u64(out, nr * c->opts.btree_node_size);
1316 prt_printf(out, " (%u)\n", nr);
1317 }
1318
bch2_btree_cache_to_text(struct printbuf * out,const struct btree_cache * bc)1319 void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
1320 {
1321 struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);
1322
1323 if (!out->nr_tabstops)
1324 printbuf_tabstop_push(out, 32);
1325
1326 prt_btree_cache_line(out, c, "total:", bc->used);
1327 prt_btree_cache_line(out, c, "nr dirty:", atomic_read(&bc->dirty));
1328 prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
1329 prt_newline(out);
1330
1331 for (unsigned i = 0; i < ARRAY_SIZE(bc->used_by_btree); i++)
1332 prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->used_by_btree[i]);
1333
1334 prt_newline(out);
1335 prt_printf(out, "freed:\t%u\n", bc->freed);
1336 prt_printf(out, "not freed:\n");
1337 prt_printf(out, " dirty\t%u\n", bc->not_freed_dirty);
1338 prt_printf(out, " write in flight\t%u\n", bc->not_freed_write_in_flight);
1339 prt_printf(out, " read in flight\t%u\n", bc->not_freed_read_in_flight);
1340 prt_printf(out, " lock intent failed\t%u\n", bc->not_freed_lock_intent);
1341 prt_printf(out, " lock write failed\t%u\n", bc->not_freed_lock_write);
1342 prt_printf(out, " access bit\t%u\n", bc->not_freed_access_bit);
1343 prt_printf(out, " no evict failed\t%u\n", bc->not_freed_noevict);
1344 prt_printf(out, " write blocked\t%u\n", bc->not_freed_write_blocked);
1345 prt_printf(out, " will make reachable\t%u\n", bc->not_freed_will_make_reachable);
1346 }
1347