xref: /dragonfly/sys/vfs/hammer/hammer_reblock.c (revision 55358b98)
1 /*
2  * Copyright (c) 2008-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * HAMMER reblocker - This code frees up fragmented physical space
36  *
37  * HAMMER only keeps track of free space on a big-block basis.  A big-block
38  * containing holes can only be freed by migrating the remaining data in
39  * that big-block into a new big-block, then freeing the big-block.
40  *
41  * This function is called from an ioctl or via the hammer support thread.
42  */
43 
44 #include "hammer.h"
45 
46 static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
47 				 hammer_cursor_t cursor,
48 				 hammer_btree_elm_t elm);
49 static int hammer_reblock_data(struct hammer_ioc_reblock *reblock,
50 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
51 static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
52 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
53 static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
54 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
55 static void hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm,
56 				hammer_node_t onode, hammer_node_t nnode);
57 
58 int
59 hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
60 		   struct hammer_ioc_reblock *reblock)
61 {
62 	struct hammer_cursor cursor;
63 	hammer_btree_elm_t elm;
64 	int checkspace_count;
65 	int error;
66 	int seq;
67 	int slop;
68 	uint32_t key_end_localization;
69 
70 	if ((reblock->key_beg.localization | reblock->key_end.localization) &
71 	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
72 		return(EINVAL);
73 	}
74 	if (reblock->key_beg.obj_id >= reblock->key_end.obj_id)
75 		return(EINVAL);
76 	if (reblock->free_level < 0 ||
77 	    reblock->free_level > HAMMER_BIGBLOCK_SIZE)
78 		return(EINVAL);
79 
80 	/*
81 	 * A fill_percentage <= 20% is considered an emergency.  free_level is
82 	 * inverted from fill_percentage.
83 	 */
84 	if (reblock->free_level >= HAMMER_BIGBLOCK_SIZE * 8 / 10)
85 		slop = HAMMER_CHKSPC_EMERGENCY;
86 	else
87 		slop = HAMMER_CHKSPC_REBLOCK;
88 
89 	/*
90 	 * Ioctl caller has only set localization type to reblock.
91 	 * Initialize cursor key localization with ip localization.
92 	 */
93 	reblock->key_cur = reblock->key_beg;
94 	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
95 	if (reblock->allpfs == 0)
96 		reblock->key_cur.localization |= ip->obj_localization;
97 
98 	key_end_localization = reblock->key_end.localization;
99 	key_end_localization &= HAMMER_LOCALIZE_MASK;
100 	if (reblock->allpfs == 0)
101 		key_end_localization |= ip->obj_localization;
102 	else
103 		key_end_localization |= pfs_to_lo(HAMMER_MAX_PFSID);
104 
105 	checkspace_count = 0;
106 	seq = trans->hmp->flusher.done;
107 retry:
108 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
109 	if (error) {
110 		hammer_done_cursor(&cursor);
111 		goto failed;
112 	}
113 	cursor.key_beg.localization = reblock->key_cur.localization;
114 	cursor.key_beg.obj_id = reblock->key_cur.obj_id;
115 	cursor.key_beg.key = HAMMER_MIN_KEY;
116 	cursor.key_beg.create_tid = 1;
117 	cursor.key_beg.delete_tid = 0;
118 	cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
119 	cursor.key_beg.obj_type = 0;
120 
121 	cursor.key_end.localization = key_end_localization;
122 	cursor.key_end.obj_id = reblock->key_end.obj_id;
123 	cursor.key_end.key = HAMMER_MAX_KEY;
124 	cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
125 	cursor.key_end.delete_tid = 0;
126 	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
127 	cursor.key_end.obj_type = 0;
128 
129 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
130 	cursor.flags |= HAMMER_CURSOR_BACKEND;
131 	cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE;
132 
133 	/*
134 	 * This flag allows the btree scan code to return internal nodes,
135 	 * so we can reblock them in addition to the leafs.  Only specify it
136 	 * if we intend to reblock B-Tree nodes.
137 	 */
138 	if (reblock->head.flags & HAMMER_IOC_DO_BTREE)
139 		cursor.flags |= HAMMER_CURSOR_REBLOCKING;
140 
141 	error = hammer_btree_first(&cursor);
142 	while (error == 0) {
143 		/*
144 		 * Internal or Leaf node
145 		 */
146 		KKASSERT(cursor.index < cursor.node->ondisk->count);
147 		elm = &cursor.node->ondisk->elms[cursor.index];
148 		reblock->key_cur.obj_id = elm->base.obj_id;
149 		reblock->key_cur.localization = elm->base.localization;
150 
151 		/*
152 		 * Yield to more important tasks
153 		 */
154 		if ((error = hammer_signal_check(trans->hmp)) != 0)
155 			break;
156 
157 		/*
158 		 * If there is insufficient free space it may be due to
159 		 * reserved big-blocks, which flushing might fix.
160 		 *
161 		 * We must force a retest in case the unlocked cursor is
162 		 * moved to the end of the leaf, or moved to an internal
163 		 * node.
164 		 *
165 		 * WARNING: See warnings in hammer_unlock_cursor() function.
166 		 */
167 		if (hammer_checkspace(trans->hmp, slop)) {
168 			if (++checkspace_count == 10) {
169 				error = ENOSPC;
170 				break;
171 			}
172 			hammer_unlock_cursor(&cursor);
173 			cursor.flags |= HAMMER_CURSOR_RETEST;
174 			hammer_flusher_wait(trans->hmp, seq);
175 			hammer_lock_cursor(&cursor);
176 			seq = hammer_flusher_async(trans->hmp, NULL);
177 			goto skip;
178 		}
179 
180 		/*
181 		 * Acquiring the sync_lock prevents the operation from
182 		 * crossing a synchronization boundary.
183 		 *
184 		 * NOTE: cursor.node may have changed on return.
185 		 *
186 		 * WARNING: See warnings in hammer_unlock_cursor() function.
187 		 */
188 		hammer_sync_lock_sh(trans);
189 		error = hammer_reblock_helper(reblock, &cursor, elm);
190 		hammer_sync_unlock(trans);
191 
192 		while (hammer_flusher_meta_halflimit(trans->hmp) ||
193 		       hammer_flusher_undo_exhausted(trans, 2)) {
194 			hammer_unlock_cursor(&cursor);
195 			hammer_flusher_wait(trans->hmp, seq);
196 			hammer_lock_cursor(&cursor);
197 			seq = hammer_flusher_async_one(trans->hmp);
198 		}
199 
200 		/*
201 		 * Setup for iteration, our cursor flags may be modified by
202 		 * other threads while we are unlocked.
203 		 */
204 		cursor.flags |= HAMMER_CURSOR_ATEDISK;
205 
206 		/*
207 		 * We allocate data buffers, which atm we don't track
208 		 * dirty levels for because we allow the kernel to write
209 		 * them.  But if we allocate too many we can still deadlock
210 		 * the buffer cache.
211 		 *
212 		 * WARNING: See warnings in hammer_unlock_cursor() function.
213 		 *	    (The cursor's node and element may change!)
214 		 */
215 		if (bd_heatup()) {
216 			hammer_unlock_cursor(&cursor);
217 			bwillwrite(HAMMER_XBUFSIZE);
218 			hammer_lock_cursor(&cursor);
219 		}
220 		vm_wait_nominal();
221 skip:
222 		if (error == 0) {
223 			error = hammer_btree_iterate(&cursor);
224 		}
225 	}
226 	if (error == ENOENT)
227 		error = 0;
228 	hammer_done_cursor(&cursor);
229 	if (error == EWOULDBLOCK) {
230 		hammer_flusher_sync(trans->hmp);
231 		goto retry;
232 	}
233 	if (error == EDEADLK)
234 		goto retry;
235 	if (error == EINTR) {
236 		reblock->head.flags |= HAMMER_IOC_HEAD_INTR;
237 		error = 0;
238 	}
239 failed:
240 	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
241 	return(error);
242 }
243 
244 /*
245  * Reblock the B-Tree (leaf) node, record, and/or data if necessary.
246  *
247  * XXX We have no visibility into internal B-Tree nodes at the moment,
248  * only leaf nodes.
249  */
250 static int
251 hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
252 		      hammer_cursor_t cursor, hammer_btree_elm_t elm)
253 {
254 	hammer_mount_t hmp;
255 	hammer_off_t tmp_offset;
256 	hammer_node_ondisk_t ondisk;
257 	struct hammer_btree_leaf_elm leaf;
258 	int error;
259 	int bytes;
260 	int cur;
261 	int iocflags;
262 
263 	error = 0;
264 	hmp = cursor->trans->hmp;
265 
266 	/*
267 	 * Reblock data.  Note that data embedded in a record is reblocked
268 	 * by the record reblock code.  Data processing only occurs at leaf
269 	 * nodes and for RECORD element types.
270 	 */
271 	if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF)
272 		goto skip;
273 	if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
274 		return(EINVAL);
275 	tmp_offset = elm->leaf.data_offset;
276 	if (tmp_offset == 0)
277 		goto skip;
278 
279 	/*
280 	 * If reblock->vol_no is specified we only want to reblock data
281 	 * in that volume, but ignore everything else.
282 	 */
283 	if (reblock->vol_no != -1 &&
284 	    reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset))
285 		goto skip;
286 
287 	/*
288 	 * NOTE: Localization restrictions may also have been set-up, we can't
289 	 *	 just set the match flags willy-nilly here.
290 	 */
291 	switch(elm->leaf.base.rec_type) {
292 	case HAMMER_RECTYPE_INODE:
293 	case HAMMER_RECTYPE_SNAPSHOT:
294 	case HAMMER_RECTYPE_CONFIG:
295 		iocflags = HAMMER_IOC_DO_INODES;
296 		break;
297 	case HAMMER_RECTYPE_EXT:
298 	case HAMMER_RECTYPE_FIX:
299 	case HAMMER_RECTYPE_PFS:
300 	case HAMMER_RECTYPE_DIRENTRY:
301 		iocflags = HAMMER_IOC_DO_DIRS;
302 		break;
303 	case HAMMER_RECTYPE_DATA:
304 	case HAMMER_RECTYPE_DB:
305 		iocflags = HAMMER_IOC_DO_DATA;
306 		break;
307 	default:
308 		iocflags = 0;
309 		break;
310 	}
311 	if (reblock->head.flags & iocflags) {
312 		++reblock->data_count;
313 		reblock->data_byte_count += elm->leaf.data_len;
314 		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
315 		if (hammer_debug_general & 0x4000)
316 			hdkprintf("D %6d/%d\n", bytes, reblock->free_level);
317 		/*
318 		 * Start data reblock if
319 		 * 1. there is no error
320 		 * 2. the data and allocator offset are not in the same
321 		 *    big-block, or free level threshold is 0
322 		 * 3. free bytes in the data's big-block is larger than
323 		 *    free level threshold (means if threshold is 0 then
324 		 *    do reblock no matter what).
325 		 */
326 		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
327 		    bytes >= reblock->free_level) {
328 			/*
329 			 * This is nasty, the uncache code may have to get
330 			 * vnode locks and because of that we can't hold
331 			 * the cursor locked.
332 			 *
333 			 * WARNING: See warnings in hammer_unlock_cursor()
334 			 *	    function.
335 			 */
336 			leaf = elm->leaf;
337 			hammer_unlock_cursor(cursor);
338 			hammer_io_direct_uncache(hmp, &leaf);
339 			hammer_lock_cursor(cursor);
340 
341 			/*
342 			 * elm may have become stale or invalid, reload it.
343 			 * ondisk variable is temporary only.  Note that
344 			 * cursor->node and thus cursor->node->ondisk may
345 			 * also changed.
346 			 */
347 			ondisk = cursor->node->ondisk;
348 			elm = &ondisk->elms[cursor->index];
349 			if (cursor->flags & HAMMER_CURSOR_RETEST) {
350 				hkprintf("debug: retest on reblocker uncache\n");
351 				error = EDEADLK;
352 			} else if (ondisk->type != HAMMER_BTREE_TYPE_LEAF ||
353 				   cursor->index >= ondisk->count) {
354 				hkprintf("debug: shifted on reblocker uncache\n");
355 				error = EDEADLK;
356 			} else if (bcmp(&elm->leaf, &leaf, sizeof(leaf))) {
357 				hkprintf("debug: changed on reblocker uncache\n");
358 				error = EDEADLK;
359 			}
360 			if (error == 0)
361 				error = hammer_cursor_upgrade(cursor);
362 			if (error == 0) {
363 				KKASSERT(cursor->index < ondisk->count);
364 				error = hammer_reblock_data(reblock,
365 							    cursor, elm);
366 			}
367 			if (error == 0) {
368 				++reblock->data_moves;
369 				reblock->data_byte_moves += elm->leaf.data_len;
370 			}
371 		}
372 	}
373 
374 skip:
375 	/*
376 	 * Reblock a B-Tree internal or leaf node.  A leaf node is reblocked
377 	 * on initial entry only (element 0).  An internal node is reblocked
378 	 * when entered upward from its first leaf node only (also element 0,
379 	 * see hammer_btree_iterate() where cursor moves up and may return).
380 	 * Further revisits of the internal node (index > 0) are ignored.
381 	 */
382 	tmp_offset = cursor->node->node_offset;
383 
384 	/*
385 	 * If reblock->vol_no is specified we only want to reblock data
386 	 * in that volume, but ignore everything else.
387 	 */
388 	if (reblock->vol_no != -1 &&
389 	    reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset))
390 		goto end;
391 
392 	if (cursor->index == 0 &&
393 	    error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) {
394 		++reblock->btree_count;
395 		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
396 		if (hammer_debug_general & 0x4000)
397 			hdkprintf("B %6d/%d\n", bytes, reblock->free_level);
398 		/*
399 		 * Start node reblock if
400 		 * 1. there is no error
401 		 * 2. the node and allocator offset are not in the same
402 		 *    big-block, or free level threshold is 0
403 		 * 3. free bytes in the node's big-block is larger than
404 		 *    free level threshold (means if threshold is 0 then
405 		 *    do reblock no matter what).
406 		 */
407 		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
408 		    bytes >= reblock->free_level) {
409 			error = hammer_cursor_upgrade(cursor);
410 			if (error == 0) {
411 				if (cursor->parent) {
412 					KKASSERT(cursor->parent_index <
413 						 cursor->parent->ondisk->count);
414 					elm = &cursor->parent->ondisk->elms[cursor->parent_index];
415 				} else {
416 					elm = NULL;
417 				}
418 				switch(cursor->node->ondisk->type) {
419 				case HAMMER_BTREE_TYPE_LEAF:
420 					error = hammer_reblock_leaf_node(
421 							reblock, cursor, elm);
422 					break;
423 				case HAMMER_BTREE_TYPE_INTERNAL:
424 					error = hammer_reblock_int_node(
425 							reblock, cursor, elm);
426 					break;
427 				default:
428 					hpanic("Illegal B-Tree node type");
429 				}
430 			}
431 			if (error == 0) {
432 				++reblock->btree_moves;
433 			}
434 		}
435 	}
436 end:
437 	hammer_cursor_downgrade(cursor);
438 	return(error);
439 }
440 
441 /*
442  * Reblock a record's data.  Both the B-Tree element and record pointers
443  * to the data must be adjusted.
444  */
445 static int
446 hammer_reblock_data(struct hammer_ioc_reblock *reblock,
447 		    hammer_cursor_t cursor, hammer_btree_elm_t elm)
448 {
449 	struct hammer_buffer *data_buffer = NULL;
450 	hammer_off_t odata_offset;
451 	hammer_off_t ndata_offset;
452 	int error;
453 	void *ndata;
454 
455 	error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA |
456 					     HAMMER_CURSOR_GET_LEAF);
457 	if (error)
458 		return (error);
459 	ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len,
460 				  elm->leaf.base.rec_type,
461 				  &ndata_offset, &data_buffer,
462 				  0, &error);
463 	if (error)
464 		goto done;
465 	hammer_io_notmeta(data_buffer);
466 
467 	/*
468 	 * Move the data.  Note that we must invalidate any cached
469 	 * data buffer in the cursor before calling blockmap_free.
470 	 * The blockmap_free may free up the entire big-block and
471 	 * will not be able to invalidate it if the cursor is holding
472 	 * a data buffer cached in that big-block.
473 	 */
474 	hammer_modify_buffer_noundo(cursor->trans, data_buffer);
475 	bcopy(cursor->data, ndata, elm->leaf.data_len);
476 	hammer_modify_buffer_done(data_buffer);
477 	hammer_cursor_invalidate_cache(cursor);
478 
479 	hammer_blockmap_free(cursor->trans,
480 			     elm->leaf.data_offset, elm->leaf.data_len);
481 
482 	hammer_modify_node(cursor->trans, cursor->node,
483 			   &elm->leaf.data_offset, sizeof(hammer_off_t));
484 	odata_offset = elm->leaf.data_offset;
485 	elm->leaf.data_offset = ndata_offset;
486 	hammer_modify_node_done(cursor->node);
487 
488 	if (hammer_debug_general & 0x4000) {
489 		hdkprintf("%08x %016llx -> %016llx\n",
490 			(elm ? elm->base.localization : -1),
491 			(long long)odata_offset,
492 			(long long)ndata_offset);
493 	}
494 done:
495 	if (data_buffer)
496 		hammer_rel_buffer(data_buffer, 0);
497 	return (error);
498 }
499 
500 /*
501  * Reblock a B-Tree leaf node.  The parent must be adjusted to point to
502  * the new copy of the leaf node.
503  *
504  * elm is a pointer to the parent element pointing at cursor.node.
505  */
506 static int
507 hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
508 			 hammer_cursor_t cursor, hammer_btree_elm_t elm)
509 {
510 	hammer_node_t onode;
511 	hammer_node_t nnode;
512 	int error;
513 
514 	/*
515 	 * Don't supply a hint when allocating the leaf.  Fills are done
516 	 * from the leaf upwards.
517 	 */
518 	onode = cursor->node;
519 	nnode = hammer_alloc_btree(cursor->trans, 0, &error);
520 
521 	if (nnode == NULL)
522 		return (error);
523 
524 	hammer_lock_ex(&nnode->lock);
525 	hammer_modify_node_noundo(cursor->trans, nnode);
526 
527 	hammer_move_node(cursor, elm, onode, nnode);
528 
529 	/*
530 	 * Clean up.
531 	 *
532 	 * The new node replaces the current node in the cursor.  The cursor
533 	 * expects it to be locked so leave it locked.  Discard onode.
534 	 */
535 	hammer_cursor_replaced_node(onode, nnode);
536 	hammer_delete_node(cursor->trans, onode);
537 
538 	if (hammer_debug_general & 0x4000) {
539 		hdkprintf("%08x %016llx -> %016llx\n",
540 			(elm ? elm->base.localization : -1),
541 			(long long)onode->node_offset,
542 			(long long)nnode->node_offset);
543 	}
544 	hammer_modify_node_done(nnode);
545 	cursor->node = nnode;
546 
547 	hammer_unlock(&onode->lock);
548 	hammer_rel_node(onode);
549 
550 	return (error);
551 }
552 
553 /*
554  * Reblock a B-Tree internal node.  The parent must be adjusted to point to
555  * the new copy of the internal node, and the node's children's parent
556  * pointers must also be adjusted to point to the new copy.
557  *
558  * elm is a pointer to the parent element pointing at cursor.node.
559  */
560 static int
561 hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
562 			 hammer_cursor_t cursor, hammer_btree_elm_t elm)
563 {
564 	struct hammer_node_lock lockroot;
565 	hammer_node_t onode;
566 	hammer_node_t nnode;
567 	int error;
568 
569 	hammer_node_lock_init(&lockroot, cursor->node);
570 	error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL);
571 	if (error)
572 		goto done;
573 
574 	/*
575 	 * Don't supply a hint when allocating the leaf.  Fills are done
576 	 * from the leaf upwards.
577 	 */
578 	onode = cursor->node;
579 	nnode = hammer_alloc_btree(cursor->trans, 0, &error);
580 
581 	if (nnode == NULL)
582 		goto done;
583 
584 	hammer_lock_ex(&nnode->lock);
585 	hammer_modify_node_noundo(cursor->trans, nnode);
586 
587 	hammer_move_node(cursor, elm, onode, nnode);
588 
589 	/*
590 	 * Clean up.
591 	 *
592 	 * The new node replaces the current node in the cursor.  The cursor
593 	 * expects it to be locked so leave it locked.  Discard onode.
594 	 */
595 	hammer_cursor_replaced_node(onode, nnode);
596 	hammer_delete_node(cursor->trans, onode);
597 
598 	if (hammer_debug_general & 0x4000) {
599 		hdkprintf("%08x %016llx -> %016llx\n",
600 			(elm ? elm->base.localization : -1),
601 			(long long)onode->node_offset,
602 			(long long)nnode->node_offset);
603 	}
604 	hammer_modify_node_done(nnode);
605 	cursor->node = nnode;
606 
607 	hammer_unlock(&onode->lock);
608 	hammer_rel_node(onode);
609 
610 done:
611 	hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL);
612 	return (error);
613 }
614 
615 /*
616  * nnode is a newly allocated node, and now elm becomes the node
617  * element within nnode's parent that represents a pointer to nnode,
618  * or nnode becomes the root node if elm does not exist.
619  */
620 static void
621 hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm,
622 		 hammer_node_t onode, hammer_node_t nnode)
623 {
624 	int error, i;
625 
626 	bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));
627 
628 	/*
629 	 * Adjust the parent's pointer to us first.
630 	 */
631 	if (elm) {
632 		/*
633 		 * We are not the root of the B-Tree
634 		 */
635 		KKASSERT(hammer_is_internal_node_elm(elm));
636 		hammer_modify_node(cursor->trans, cursor->parent,
637 				   &elm->internal.subtree_offset,
638 				   sizeof(elm->internal.subtree_offset));
639 		elm->internal.subtree_offset = nnode->node_offset;
640 		hammer_modify_node_done(cursor->parent);
641 	} else {
642 		/*
643 		 * We are the root of the B-Tree
644 		 */
645 		hammer_volume_t volume;
646 		volume = hammer_get_root_volume(cursor->trans->hmp, &error);
647 		KKASSERT(error == 0);
648 
649 		hammer_modify_volume_field(cursor->trans, volume,
650 					   vol0_btree_root);
651 		volume->ondisk->vol0_btree_root = nnode->node_offset;
652 		hammer_modify_volume_done(volume);
653 		hammer_rel_volume(volume, 0);
654 	}
655 
656 	/*
657 	 * Now adjust our children's pointers to us
658 	 * if we are an internal node.
659 	 */
660 	if (nnode->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) {
661 		for (i = 0; i < nnode->ondisk->count; ++i) {
662 			error = btree_set_parent_of_child(cursor->trans, nnode,
663 					&nnode->ondisk->elms[i]);
664 			if (error)
665 				hpanic("reblock internal node: fixup problem");
666 		}
667 	}
668 }
669