xref: /dragonfly/sys/vfs/hammer/hammer_reblock.c (revision 3568afc1)
1 /*
2  * Copyright (c) 2008-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * HAMMER reblocker - This code frees up fragmented physical space
36  *
37  * HAMMER only keeps track of free space on a big-block basis.  A big-block
38  * containing holes can only be freed by migrating the remaining data in
39  * that big-block into a new big-block, then freeing the big-block.
40  *
41  * This function is called from an ioctl or via the hammer support thread.
42  */
43 
44 #include "hammer.h"
45 
46 static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
47 				 hammer_cursor_t cursor,
48 				 hammer_btree_elm_t elm);
49 static int hammer_reblock_data(struct hammer_ioc_reblock *reblock,
50 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
51 static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
52 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
53 static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
54 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
55 static void hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm,
56 				hammer_node_t onode, hammer_node_t nnode);
57 
58 int
59 hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
60 		   struct hammer_ioc_reblock *reblock)
61 {
62 	struct hammer_cursor cursor;
63 	hammer_btree_elm_t elm;
64 	int checkspace_count;
65 	int error;
66 	int seq;
67 	int slop;
68 	uint32_t key_end_localization;
69 
70 	if ((reblock->key_beg.localization | reblock->key_end.localization) &
71 	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
72 		return(EINVAL);
73 	}
74 	if (reblock->key_beg.obj_id >= reblock->key_end.obj_id)
75 		return(EINVAL);
76 	if (reblock->free_level < 0 ||
77 	    reblock->free_level > HAMMER_BIGBLOCK_SIZE)
78 		return(EINVAL);
79 
80 	/*
81 	 * A fill_percentage <= 20% is considered an emergency.  free_level is
82 	 * inverted from fill_percentage.
83 	 */
84 	if (reblock->free_level >= HAMMER_BIGBLOCK_SIZE * 8 / 10)
85 		slop = HAMMER_CHKSPC_EMERGENCY;
86 	else
87 		slop = HAMMER_CHKSPC_REBLOCK;
88 
89 	/*
90 	 * Ioctl caller has only set localization type to reblock.
91 	 * Initialize cursor key localization with ip localization.
92 	 */
93 	reblock->key_cur = reblock->key_beg;
94 	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
95 	if (reblock->allpfs == 0)
96 		reblock->key_cur.localization |= ip->obj_localization;
97 
98 	key_end_localization = reblock->key_end.localization;
99 	key_end_localization &= HAMMER_LOCALIZE_MASK;
100 	if (reblock->allpfs == 0)
101 		key_end_localization |= ip->obj_localization;
102 	else
103 		key_end_localization |= pfs_to_lo(HAMMER_MAX_PFSID);
104 
105 	checkspace_count = 0;
106 	seq = trans->hmp->flusher.done;
107 retry:
108 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
109 	if (error) {
110 		hammer_done_cursor(&cursor);
111 		goto failed;
112 	}
113 	cursor.key_beg.localization = reblock->key_cur.localization;
114 	cursor.key_beg.obj_id = reblock->key_cur.obj_id;
115 	cursor.key_beg.key = HAMMER_MIN_KEY;
116 	cursor.key_beg.create_tid = 1;
117 	cursor.key_beg.delete_tid = 0;
118 	cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
119 	cursor.key_beg.obj_type = 0;
120 
121 	cursor.key_end.localization = key_end_localization;
122 	cursor.key_end.obj_id = reblock->key_end.obj_id;
123 	cursor.key_end.key = HAMMER_MAX_KEY;
124 	cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
125 	cursor.key_end.delete_tid = 0;
126 	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
127 	cursor.key_end.obj_type = 0;
128 
129 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
130 	cursor.flags |= HAMMER_CURSOR_BACKEND;
131 	cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE;
132 
133 	/*
134 	 * This flag allows the btree scan code to return internal nodes,
135 	 * so we can reblock them in addition to the leafs.  Only specify it
136 	 * if we intend to reblock B-Tree nodes.
137 	 */
138 	if (reblock->head.flags & HAMMER_IOC_DO_BTREE)
139 		cursor.flags |= HAMMER_CURSOR_REBLOCKING;
140 
141 	error = hammer_btree_first(&cursor);
142 	while (error == 0) {
143 		/*
144 		 * Internal or Leaf node
145 		 */
146 		KKASSERT(cursor.index < cursor.node->ondisk->count);
147 		elm = &cursor.node->ondisk->elms[cursor.index];
148 		reblock->key_cur.obj_id = elm->base.obj_id;
149 		reblock->key_cur.localization = elm->base.localization;
150 
151 		/*
152 		 * Yield to more important tasks
153 		 */
154 		if ((error = hammer_signal_check(trans->hmp)) != 0)
155 			break;
156 
157 		/*
158 		 * If there is insufficient free space it may be due to
159 		 * reserved big-blocks, which flushing might fix.
160 		 *
161 		 * We must force a retest in case the unlocked cursor is
162 		 * moved to the end of the leaf, or moved to an internal
163 		 * node.
164 		 *
165 		 * WARNING: See warnings in hammer_unlock_cursor() function.
166 		 */
167 		if (hammer_checkspace(trans->hmp, slop)) {
168 			if (++checkspace_count == 10) {
169 				error = ENOSPC;
170 				break;
171 			}
172 			hammer_unlock_cursor(&cursor);
173 			cursor.flags |= HAMMER_CURSOR_RETEST;
174 			hammer_flusher_wait(trans->hmp, seq);
175 			hammer_lock_cursor(&cursor);
176 			seq = hammer_flusher_async(trans->hmp, NULL);
177 			goto skip;
178 		}
179 
180 		/*
181 		 * Acquiring the sync_lock prevents the operation from
182 		 * crossing a synchronization boundary.
183 		 *
184 		 * NOTE: cursor.node may have changed on return.
185 		 *
186 		 * WARNING: See warnings in hammer_unlock_cursor() function.
187 		 */
188 		hammer_sync_lock_sh(trans);
189 		error = hammer_reblock_helper(reblock, &cursor, elm);
190 		hammer_sync_unlock(trans);
191 
192 		while (hammer_flusher_meta_halflimit(trans->hmp) ||
193 		       hammer_flusher_undo_exhausted(trans, 2)) {
194 			hammer_unlock_cursor(&cursor);
195 			hammer_flusher_wait(trans->hmp, seq);
196 			hammer_lock_cursor(&cursor);
197 			seq = hammer_flusher_async_one(trans->hmp);
198 		}
199 
200 		/*
201 		 * Setup for iteration, our cursor flags may be modified by
202 		 * other threads while we are unlocked.
203 		 */
204 		cursor.flags |= HAMMER_CURSOR_ATEDISK;
205 
206 		/*
207 		 * We allocate data buffers, which atm we don't track
208 		 * dirty levels for because we allow the kernel to write
209 		 * them.  But if we allocate too many we can still deadlock
210 		 * the buffer cache.
211 		 *
212 		 * WARNING: See warnings in hammer_unlock_cursor() function.
213 		 *	    (The cursor's node and element may change!)
214 		 */
215 		if (bd_heatup()) {
216 			hammer_unlock_cursor(&cursor);
217 			bwillwrite(HAMMER_XBUFSIZE);
218 			hammer_lock_cursor(&cursor);
219 		}
220 		vm_wait_nominal();
221 skip:
222 		if (error == 0) {
223 			error = hammer_btree_iterate(&cursor);
224 		}
225 	}
226 	if (error == ENOENT)
227 		error = 0;
228 	hammer_done_cursor(&cursor);
229 	if (error == EWOULDBLOCK) {
230 		hammer_flusher_sync(trans->hmp);
231 		goto retry;
232 	}
233 	if (error == EDEADLK)
234 		goto retry;
235 	if (error == EINTR) {
236 		reblock->head.flags |= HAMMER_IOC_HEAD_INTR;
237 		error = 0;
238 	}
239 failed:
240 	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
241 	return(error);
242 }
243 
244 /*
245  * Reblock the B-Tree (leaf) node, record, and/or data if necessary.
246  *
247  * XXX We have no visibility into internal B-Tree nodes at the moment,
248  * only leaf nodes.
249  */
250 static int
251 hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
252 		      hammer_cursor_t cursor, hammer_btree_elm_t elm)
253 {
254 	hammer_mount_t hmp;
255 	hammer_off_t tmp_offset;
256 	hammer_node_ondisk_t ondisk;
257 	struct hammer_btree_leaf_elm leaf;
258 	int error;
259 	int bytes;
260 	int cur;
261 	int iocflags;
262 
263 	error = 0;
264 	hmp = cursor->trans->hmp;
265 
266 	/*
267 	 * Reblock data.  Note that data embedded in a record is reblocked
268 	 * by the record reblock code.  Data processing only occurs at leaf
269 	 * nodes and for RECORD element types.
270 	 */
271 	if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF)
272 		goto skip;
273 	if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
274 		return(EINVAL);
275 	tmp_offset = elm->leaf.data_offset;
276 	if (tmp_offset == 0)
277 		goto skip;
278 
279 	/*
280 	 * If reblock->vol_no is specified we only want to reblock data
281 	 * in that volume, but ignore everything else.
282 	 */
283 	if (reblock->vol_no != -1 &&
284 	    reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset))
285 		goto skip;
286 
287 	/*
288 	 * NOTE: Localization restrictions may also have been set-up, we can't
289 	 *	 just set the match flags willy-nilly here.
290 	 */
291 	switch(elm->leaf.base.rec_type) {
292 	case HAMMER_RECTYPE_INODE:
293 	case HAMMER_RECTYPE_SNAPSHOT:
294 	case HAMMER_RECTYPE_CONFIG:
295 		iocflags = HAMMER_IOC_DO_INODES;
296 		break;
297 	case HAMMER_RECTYPE_EXT:
298 	case HAMMER_RECTYPE_FIX:
299 	case HAMMER_RECTYPE_PFS:
300 	case HAMMER_RECTYPE_DIRENTRY:
301 		iocflags = HAMMER_IOC_DO_DIRS;
302 		break;
303 	case HAMMER_RECTYPE_DATA:
304 	case HAMMER_RECTYPE_DB:
305 		iocflags = HAMMER_IOC_DO_DATA;
306 		break;
307 	default:
308 		iocflags = 0;
309 		break;
310 	}
311 	if (reblock->head.flags & iocflags) {
312 		++reblock->data_count;
313 		reblock->data_byte_count += elm->leaf.data_len;
314 		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
315 		if (hammer_debug_general & 0x4000)
316 			hdkprintf("D %6d/%d\n", bytes, reblock->free_level);
317 		/*
318 		 * Start data reblock if
319 		 * 1. there is no error
320 		 * 2. the data and allocator offset are not in the same
321 		 *    big-block, or free level threshold is 0
322 		 * 3. free bytes in the data's big-block is larger than
323 		 *    free level threshold (means if threshold is 0 then
324 		 *    do reblock no matter what).
325 		 */
326 		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
327 		    bytes >= reblock->free_level) {
328 			/*
329 			 * This is nasty, the uncache code may have to get
330 			 * vnode locks and because of that we can't hold
331 			 * the cursor locked.
332 			 *
333 			 * WARNING: See warnings in hammer_unlock_cursor()
334 			 *	    function.
335 			 */
336 			leaf = elm->leaf;
337 			hammer_unlock_cursor(cursor);
338 			hammer_io_direct_uncache(hmp, &leaf);
339 			hammer_lock_cursor(cursor);
340 
341 			/*
342 			 * elm may have become stale or invalid, reload it.
343 			 * ondisk variable is temporary only.  Note that
344 			 * cursor->node and thus cursor->node->ondisk may
345 			 * also changed.
346 			 */
347 			ondisk = cursor->node->ondisk;
348 			elm = &ondisk->elms[cursor->index];
349 			if (cursor->flags & HAMMER_CURSOR_RETEST) {
350 				hkprintf("debug: retest on reblocker uncache\n");
351 				error = EDEADLK;
352 			} else if (ondisk->type != HAMMER_BTREE_TYPE_LEAF ||
353 				   cursor->index >= ondisk->count) {
354 				hkprintf("debug: shifted on reblocker uncache\n");
355 				error = EDEADLK;
356 			} else if (bcmp(&elm->leaf, &leaf, sizeof(leaf))) {
357 				hkprintf("debug: changed on reblocker uncache\n");
358 				error = EDEADLK;
359 			}
360 			if (error == 0)
361 				error = hammer_cursor_upgrade(cursor);
362 			if (error == 0) {
363 				KKASSERT(cursor->index < ondisk->count);
364 				error = hammer_reblock_data(reblock,
365 							    cursor, elm);
366 			}
367 			if (error == 0) {
368 				++reblock->data_moves;
369 				reblock->data_byte_moves += elm->leaf.data_len;
370 			}
371 		}
372 	}
373 
374 skip:
375 	/*
376 	 * Reblock a B-Tree internal or leaf node.  A leaf node is reblocked
377 	 * on initial entry only (element 0).  An internal node is reblocked
378 	 * when entered upward from its first leaf node only (also element 0,
379 	 * see hammer_btree_iterate() where cursor moves up and may return).
380 	 * Further revisits of the internal node (index > 0) are ignored.
381 	 */
382 	tmp_offset = cursor->node->node_offset;
383 
384 	/*
385 	 * If reblock->vol_no is specified we only want to reblock data
386 	 * in that volume, but ignore everything else.
387 	 */
388 	if (reblock->vol_no != -1 &&
389 	    reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset))
390 		goto end;
391 
392 	if (cursor->index == 0 &&
393 	    error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) {
394 		++reblock->btree_count;
395 		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
396 		if (hammer_debug_general & 0x4000)
397 			hdkprintf("B %6d/%d\n", bytes, reblock->free_level);
398 		/*
399 		 * Start node reblock if
400 		 * 1. there is no error
401 		 * 2. the node and allocator offset are not in the same
402 		 *    big-block, or free level threshold is 0
403 		 * 3. free bytes in the node's big-block is larger than
404 		 *    free level threshold (means if threshold is 0 then
405 		 *    do reblock no matter what).
406 		 */
407 		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
408 		    bytes >= reblock->free_level) {
409 			error = hammer_cursor_upgrade(cursor);
410 			if (error == 0) {
411 				if (cursor->parent) {
412 					KKASSERT(cursor->parent_index <
413 						 cursor->parent->ondisk->count);
414 					elm = &cursor->parent->ondisk->elms[cursor->parent_index];
415 				} else {
416 					elm = NULL;
417 				}
418 				switch(cursor->node->ondisk->type) {
419 				case HAMMER_BTREE_TYPE_LEAF:
420 					error = hammer_reblock_leaf_node(
421 							reblock, cursor, elm);
422 					break;
423 				case HAMMER_BTREE_TYPE_INTERNAL:
424 					error = hammer_reblock_int_node(
425 							reblock, cursor, elm);
426 					break;
427 				default:
428 					hpanic("Illegal B-Tree node type");
429 				}
430 			}
431 			if (error == 0) {
432 				++reblock->btree_moves;
433 			}
434 		}
435 	}
436 end:
437 	hammer_cursor_downgrade(cursor);
438 	return(error);
439 }
440 
441 /*
442  * Reblock a record's data.  Both the B-Tree element and record pointers
443  * to the data must be adjusted.
444  */
445 static int
446 hammer_reblock_data(struct hammer_ioc_reblock *reblock,
447 		    hammer_cursor_t cursor, hammer_btree_elm_t elm)
448 {
449 	hammer_buffer_t data_buffer = NULL;
450 	hammer_off_t odata_offset;
451 	hammer_off_t ndata_offset;
452 	int error;
453 	void *ndata;
454 
455 	error = hammer_btree_extract_data(cursor);
456 	if (error)
457 		return (error);
458 	ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len,
459 				  elm->leaf.base.rec_type,
460 				  &ndata_offset, &data_buffer,
461 				  0, &error);
462 	if (error)
463 		goto done;
464 	hammer_io_notmeta(data_buffer);
465 
466 	/*
467 	 * Move the data.  Note that we must invalidate any cached
468 	 * data buffer in the cursor before calling blockmap_free.
469 	 * The blockmap_free may free up the entire big-block and
470 	 * will not be able to invalidate it if the cursor is holding
471 	 * a data buffer cached in that big-block.
472 	 */
473 	hammer_modify_buffer_noundo(cursor->trans, data_buffer);
474 	bcopy(cursor->data, ndata, elm->leaf.data_len);
475 	hammer_modify_buffer_done(data_buffer);
476 	hammer_cursor_invalidate_cache(cursor);
477 
478 	hammer_blockmap_free(cursor->trans,
479 			     elm->leaf.data_offset, elm->leaf.data_len);
480 
481 	hammer_modify_node(cursor->trans, cursor->node,
482 			   &elm->leaf.data_offset, sizeof(hammer_off_t));
483 	odata_offset = elm->leaf.data_offset;
484 	elm->leaf.data_offset = ndata_offset;
485 	hammer_modify_node_done(cursor->node);
486 
487 	if (hammer_debug_general & 0x4000) {
488 		hdkprintf("%08x %016jx -> %016jx\n",
489 			(elm ? elm->base.localization : -1),
490 			(intmax_t)odata_offset,
491 			(intmax_t)ndata_offset);
492 	}
493 done:
494 	if (data_buffer)
495 		hammer_rel_buffer(data_buffer, 0);
496 	return (error);
497 }
498 
499 /*
500  * Reblock a B-Tree leaf node.  The parent must be adjusted to point to
501  * the new copy of the leaf node.
502  *
503  * elm is a pointer to the parent element pointing at cursor.node.
504  */
505 static int
506 hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
507 			 hammer_cursor_t cursor, hammer_btree_elm_t elm)
508 {
509 	hammer_node_t onode;
510 	hammer_node_t nnode;
511 	int error;
512 
513 	/*
514 	 * Don't supply a hint when allocating the leaf.  Fills are done
515 	 * from the leaf upwards.
516 	 */
517 	onode = cursor->node;
518 	nnode = hammer_alloc_btree(cursor->trans, 0, &error);
519 
520 	if (nnode == NULL)
521 		return (error);
522 
523 	hammer_lock_ex(&nnode->lock);
524 	hammer_modify_node_noundo(cursor->trans, nnode);
525 
526 	hammer_move_node(cursor, elm, onode, nnode);
527 
528 	/*
529 	 * Clean up.
530 	 *
531 	 * The new node replaces the current node in the cursor.  The cursor
532 	 * expects it to be locked so leave it locked.  Discard onode.
533 	 */
534 	hammer_cursor_replaced_node(onode, nnode);
535 	hammer_delete_node(cursor->trans, onode);
536 
537 	if (hammer_debug_general & 0x4000) {
538 		hdkprintf("%08x %016jx -> %016jx\n",
539 			(elm ? elm->base.localization : -1),
540 			(intmax_t)onode->node_offset,
541 			(intmax_t)nnode->node_offset);
542 	}
543 	hammer_modify_node_done(nnode);
544 	cursor->node = nnode;
545 
546 	hammer_unlock(&onode->lock);
547 	hammer_rel_node(onode);
548 
549 	return (error);
550 }
551 
552 /*
553  * Reblock a B-Tree internal node.  The parent must be adjusted to point to
554  * the new copy of the internal node, and the node's children's parent
555  * pointers must also be adjusted to point to the new copy.
556  *
557  * elm is a pointer to the parent element pointing at cursor.node.
558  */
559 static int
560 hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
561 			 hammer_cursor_t cursor, hammer_btree_elm_t elm)
562 {
563 	struct hammer_node_lock lockroot;
564 	hammer_node_t onode;
565 	hammer_node_t nnode;
566 	int error;
567 
568 	hammer_node_lock_init(&lockroot, cursor->node);
569 	error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL);
570 	if (error)
571 		goto done;
572 
573 	/*
574 	 * Don't supply a hint when allocating the leaf.  Fills are done
575 	 * from the leaf upwards.
576 	 */
577 	onode = cursor->node;
578 	nnode = hammer_alloc_btree(cursor->trans, 0, &error);
579 
580 	if (nnode == NULL)
581 		goto done;
582 
583 	hammer_lock_ex(&nnode->lock);
584 	hammer_modify_node_noundo(cursor->trans, nnode);
585 
586 	hammer_move_node(cursor, elm, onode, nnode);
587 
588 	/*
589 	 * Clean up.
590 	 *
591 	 * The new node replaces the current node in the cursor.  The cursor
592 	 * expects it to be locked so leave it locked.  Discard onode.
593 	 */
594 	hammer_cursor_replaced_node(onode, nnode);
595 	hammer_delete_node(cursor->trans, onode);
596 
597 	if (hammer_debug_general & 0x4000) {
598 		hdkprintf("%08x %016jx -> %016jx\n",
599 			(elm ? elm->base.localization : -1),
600 			(intmax_t)onode->node_offset,
601 			(intmax_t)nnode->node_offset);
602 	}
603 	hammer_modify_node_done(nnode);
604 	cursor->node = nnode;
605 
606 	hammer_unlock(&onode->lock);
607 	hammer_rel_node(onode);
608 
609 done:
610 	hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL);
611 	return (error);
612 }
613 
614 /*
615  * nnode is a newly allocated node, and now elm becomes the node
616  * element within nnode's parent that represents a pointer to nnode,
617  * or nnode becomes the root node if elm does not exist.
618  */
619 static void
620 hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm,
621 		 hammer_node_t onode, hammer_node_t nnode)
622 {
623 	int error, i;
624 
625 	bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));
626 
627 	/*
628 	 * Adjust the parent's pointer to us first.
629 	 */
630 	if (elm) {
631 		/*
632 		 * We are not the root of the B-Tree
633 		 */
634 		KKASSERT(hammer_is_internal_node_elm(elm));
635 		hammer_modify_node(cursor->trans, cursor->parent,
636 				   &elm->internal.subtree_offset,
637 				   sizeof(elm->internal.subtree_offset));
638 		elm->internal.subtree_offset = nnode->node_offset;
639 		hammer_modify_node_done(cursor->parent);
640 	} else {
641 		/*
642 		 * We are the root of the B-Tree
643 		 */
644 		hammer_volume_t volume;
645 		volume = hammer_get_root_volume(cursor->trans->hmp, &error);
646 		KKASSERT(error == 0);
647 
648 		hammer_modify_volume_field(cursor->trans, volume,
649 					   vol0_btree_root);
650 		volume->ondisk->vol0_btree_root = nnode->node_offset;
651 		hammer_modify_volume_done(volume);
652 		hammer_rel_volume(volume, 0);
653 	}
654 
655 	/*
656 	 * Now adjust our children's pointers to us
657 	 * if we are an internal node.
658 	 */
659 	if (nnode->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) {
660 		for (i = 0; i < nnode->ondisk->count; ++i) {
661 			error = btree_set_parent_of_child(cursor->trans, nnode,
662 					&nnode->ondisk->elms[i]);
663 			if (error)
664 				hpanic("reblock internal node: fixup problem");
665 		}
666 	}
667 }
668