xref: /dragonfly/sys/vfs/hammer/hammer_reblock.c (revision 279dd846)
1 /*
2  * Copyright (c) 2008-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * HAMMER reblocker - This code frees up fragmented physical space
36  *
37  * HAMMER only keeps track of free space on a big-block basis.  A big-block
38  * containing holes can only be freed by migrating the remaining data in
39  * that big-block into a new big-block, then freeing the big-block.
40  *
41  * This function is called from an ioctl or via the hammer support thread.
42  */
43 
44 #include "hammer.h"
45 
46 static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
47 				 hammer_cursor_t cursor,
48 				 hammer_btree_elm_t elm);
49 static int hammer_reblock_data(struct hammer_ioc_reblock *reblock,
50 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
51 static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
52 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
53 static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
54 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
55 static void hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm,
56 				hammer_node_t onode, hammer_node_t nnode);
57 
58 int
59 hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
60 		   struct hammer_ioc_reblock *reblock)
61 {
62 	struct hammer_cursor cursor;
63 	hammer_btree_elm_t elm;
64 	int checkspace_count;
65 	int error;
66 	int seq;
67 	int slop;
68 	u_int32_t key_end_localization;
69 
70 	if ((reblock->key_beg.localization | reblock->key_end.localization) &
71 	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
72 		return(EINVAL);
73 	}
74 	if (reblock->key_beg.obj_id >= reblock->key_end.obj_id)
75 		return(EINVAL);
76 	if (reblock->free_level < 0 ||
77 	    reblock->free_level > HAMMER_BIGBLOCK_SIZE)
78 		return(EINVAL);
79 
80 	/*
81 	 * A fill_percentage <= 20% is considered an emergency.  free_level is
82 	 * inverted from fill_percentage.
83 	 */
84 	if (reblock->free_level >= HAMMER_BIGBLOCK_SIZE * 8 / 10)
85 		slop = HAMMER_CHKSPC_EMERGENCY;
86 	else
87 		slop = HAMMER_CHKSPC_REBLOCK;
88 
89 	/*
90 	 * Ioctl caller has only set localization type to reblock.
91 	 * Initialize cursor key localization with ip localization.
92 	 */
93 	reblock->key_cur = reblock->key_beg;
94 	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
95 	if (reblock->allpfs == 0)
96 		reblock->key_cur.localization += ip->obj_localization;
97 
98 	key_end_localization = reblock->key_end.localization;
99 	key_end_localization &= HAMMER_LOCALIZE_MASK;
100 	if (reblock->allpfs == 0)
101 		key_end_localization += ip->obj_localization;
102 	else
103 		key_end_localization += ((HAMMER_MAX_PFS - 1) << 16);
104 
105 	checkspace_count = 0;
106 	seq = trans->hmp->flusher.done;
107 retry:
108 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
109 	if (error) {
110 		hammer_done_cursor(&cursor);
111 		goto failed;
112 	}
113 	cursor.key_beg.localization = reblock->key_cur.localization;
114 	cursor.key_beg.obj_id = reblock->key_cur.obj_id;
115 	cursor.key_beg.key = HAMMER_MIN_KEY;
116 	cursor.key_beg.create_tid = 1;
117 	cursor.key_beg.delete_tid = 0;
118 	cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
119 	cursor.key_beg.obj_type = 0;
120 
121 	cursor.key_end.localization = key_end_localization;
122 	cursor.key_end.obj_id = reblock->key_end.obj_id;
123 	cursor.key_end.key = HAMMER_MAX_KEY;
124 	cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
125 	cursor.key_end.delete_tid = 0;
126 	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
127 	cursor.key_end.obj_type = 0;
128 
129 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
130 	cursor.flags |= HAMMER_CURSOR_BACKEND;
131 	cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE;
132 
133 	/*
134 	 * This flag allows the btree scan code to return internal nodes,
135 	 * so we can reblock them in addition to the leafs.  Only specify it
136 	 * if we intend to reblock B-Tree nodes.
137 	 */
138 	if (reblock->head.flags & HAMMER_IOC_DO_BTREE)
139 		cursor.flags |= HAMMER_CURSOR_REBLOCKING;
140 
141 	error = hammer_btree_first(&cursor);
142 	while (error == 0) {
143 		/*
144 		 * Internal or Leaf node
145 		 */
146 		KKASSERT(cursor.index < cursor.node->ondisk->count);
147 		elm = &cursor.node->ondisk->elms[cursor.index];
148 		reblock->key_cur.obj_id = elm->base.obj_id;
149 		reblock->key_cur.localization = elm->base.localization;
150 
151 		/*
152 		 * Yield to more important tasks
153 		 */
154 		if ((error = hammer_signal_check(trans->hmp)) != 0)
155 			break;
156 
157 		/*
158 		 * If there is insufficient free space it may be due to
159 		 * reserved big-blocks, which flushing might fix.
160 		 *
161 		 * We must force a retest in case the unlocked cursor is
162 		 * moved to the end of the leaf, or moved to an internal
163 		 * node.
164 		 *
165 		 * WARNING: See warnings in hammer_unlock_cursor() function.
166 		 */
167 		if (hammer_checkspace(trans->hmp, slop)) {
168 			if (++checkspace_count == 10) {
169 				error = ENOSPC;
170 				break;
171 			}
172 			hammer_unlock_cursor(&cursor);
173 			cursor.flags |= HAMMER_CURSOR_RETEST;
174 			hammer_flusher_wait(trans->hmp, seq);
175 			hammer_lock_cursor(&cursor);
176 			seq = hammer_flusher_async(trans->hmp, NULL);
177 			goto skip;
178 		}
179 
180 		/*
181 		 * Acquiring the sync_lock prevents the operation from
182 		 * crossing a synchronization boundary.
183 		 *
184 		 * NOTE: cursor.node may have changed on return.
185 		 *
186 		 * WARNING: See warnings in hammer_unlock_cursor() function.
187 		 */
188 		hammer_sync_lock_sh(trans);
189 		error = hammer_reblock_helper(reblock, &cursor, elm);
190 		hammer_sync_unlock(trans);
191 
192 		while (hammer_flusher_meta_halflimit(trans->hmp) ||
193 		       hammer_flusher_undo_exhausted(trans, 2)) {
194 			hammer_unlock_cursor(&cursor);
195 			hammer_flusher_wait(trans->hmp, seq);
196 			hammer_lock_cursor(&cursor);
197 			seq = hammer_flusher_async_one(trans->hmp);
198 		}
199 
200 		/*
201 		 * Setup for iteration, our cursor flags may be modified by
202 		 * other threads while we are unlocked.
203 		 */
204 		cursor.flags |= HAMMER_CURSOR_ATEDISK;
205 
206 		/*
207 		 * We allocate data buffers, which atm we don't track
208 		 * dirty levels for because we allow the kernel to write
209 		 * them.  But if we allocate too many we can still deadlock
210 		 * the buffer cache.
211 		 *
212 		 * WARNING: See warnings in hammer_unlock_cursor() function.
213 		 *	    (The cursor's node and element may change!)
214 		 */
215 		if (bd_heatup()) {
216 			hammer_unlock_cursor(&cursor);
217 			bwillwrite(HAMMER_XBUFSIZE);
218 			hammer_lock_cursor(&cursor);
219 		}
220 		vm_wait_nominal();
221 skip:
222 		if (error == 0) {
223 			error = hammer_btree_iterate(&cursor);
224 		}
225 	}
226 	if (error == ENOENT)
227 		error = 0;
228 	hammer_done_cursor(&cursor);
229 	if (error == EWOULDBLOCK) {
230 		hammer_flusher_sync(trans->hmp);
231 		goto retry;
232 	}
233 	if (error == EDEADLK)
234 		goto retry;
235 	if (error == EINTR) {
236 		reblock->head.flags |= HAMMER_IOC_HEAD_INTR;
237 		error = 0;
238 	}
239 failed:
240 	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
241 	return(error);
242 }
243 
244 /*
245  * Reblock the B-Tree (leaf) node, record, and/or data if necessary.
246  *
247  * XXX We have no visibility into internal B-Tree nodes at the moment,
248  * only leaf nodes.
249  */
250 static int
251 hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
252 		      hammer_cursor_t cursor, hammer_btree_elm_t elm)
253 {
254 	hammer_mount_t hmp;
255 	hammer_off_t tmp_offset;
256 	hammer_node_ondisk_t ondisk;
257 	struct hammer_btree_leaf_elm leaf;
258 	int error;
259 	int bytes;
260 	int cur;
261 	int iocflags;
262 
263 	error = 0;
264 	hmp = cursor->trans->hmp;
265 
266 	/*
267 	 * Reblock data.  Note that data embedded in a record is reblocked
268 	 * by the record reblock code.  Data processing only occurs at leaf
269 	 * nodes and for RECORD element types.
270 	 */
271 	if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF)
272 		goto skip;
273 	if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
274 		return(0);
275 	tmp_offset = elm->leaf.data_offset;
276 	if (tmp_offset == 0)
277 		goto skip;
278 
279 	/*
280 	 * If reblock->vol_no is specified we only want to reblock data
281 	 * in that volume, but ignore everything else.
282 	 */
283 	if (reblock->vol_no != -1 &&
284 	    reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset))
285 		goto skip;
286 
287 	/*
288 	 * NOTE: Localization restrictions may also have been set-up, we can't
289 	 *	 just set the match flags willy-nilly here.
290 	 */
291 	switch(elm->leaf.base.rec_type) {
292 	case HAMMER_RECTYPE_INODE:
293 	case HAMMER_RECTYPE_SNAPSHOT:
294 	case HAMMER_RECTYPE_CONFIG:
295 		iocflags = HAMMER_IOC_DO_INODES;
296 		break;
297 	case HAMMER_RECTYPE_EXT:
298 	case HAMMER_RECTYPE_FIX:
299 	case HAMMER_RECTYPE_PFS:
300 	case HAMMER_RECTYPE_DIRENTRY:
301 		iocflags = HAMMER_IOC_DO_DIRS;
302 		break;
303 	case HAMMER_RECTYPE_DATA:
304 	case HAMMER_RECTYPE_DB:
305 		iocflags = HAMMER_IOC_DO_DATA;
306 		break;
307 	default:
308 		iocflags = 0;
309 		break;
310 	}
311 	if (reblock->head.flags & iocflags) {
312 		++reblock->data_count;
313 		reblock->data_byte_count += elm->leaf.data_len;
314 		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
315 		if (hammer_debug_general & 0x4000)
316 			kprintf("D %6d/%d\n", bytes, reblock->free_level);
317 		/*
318 		 * Start data reblock if
319 		 * 1. there is no error
320 		 * 2. the data and allocator offset are not in the same
321 		 *    big-block, or free level threshold is 0
322 		 * 3. free bytes in the data's big-block is larger than
323 		 *    free level threshold (means if threshold is 0 then
324 		 *    do reblock no matter what).
325 		 */
326 		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
327 		    bytes >= reblock->free_level) {
328 			/*
329 			 * This is nasty, the uncache code may have to get
330 			 * vnode locks and because of that we can't hold
331 			 * the cursor locked.
332 			 *
333 			 * WARNING: See warnings in hammer_unlock_cursor()
334 			 *	    function.
335 			 */
336 			leaf = elm->leaf;
337 			hammer_unlock_cursor(cursor);
338 			hammer_io_direct_uncache(hmp, &leaf);
339 			hammer_lock_cursor(cursor);
340 
341 			/*
342 			 * elm may have become stale or invalid, reload it.
343 			 * ondisk variable is temporary only.  Note that
344 			 * cursor->node and thus cursor->node->ondisk may
345 			 * also changed.
346 			 */
347 			ondisk = cursor->node->ondisk;
348 			elm = &ondisk->elms[cursor->index];
349 			if (cursor->flags & HAMMER_CURSOR_RETEST) {
350 				kprintf("HAMMER: debug: retest on "
351 					"reblocker uncache\n");
352 				error = EDEADLK;
353 			} else if (ondisk->type != HAMMER_BTREE_TYPE_LEAF ||
354 				   cursor->index >= ondisk->count) {
355 				kprintf("HAMMER: debug: shifted on "
356 					"reblocker uncache\n");
357 				error = EDEADLK;
358 			} else if (bcmp(&elm->leaf, &leaf, sizeof(leaf))) {
359 				kprintf("HAMMER: debug: changed on "
360 					"reblocker uncache\n");
361 				error = EDEADLK;
362 			}
363 			if (error == 0)
364 				error = hammer_cursor_upgrade(cursor);
365 			if (error == 0) {
366 				KKASSERT(cursor->index < ondisk->count);
367 				error = hammer_reblock_data(reblock,
368 							    cursor, elm);
369 			}
370 			if (error == 0) {
371 				++reblock->data_moves;
372 				reblock->data_byte_moves += elm->leaf.data_len;
373 			}
374 		}
375 	}
376 
377 skip:
378 	/*
379 	 * Reblock a B-Tree internal or leaf node.  A leaf node is reblocked
380 	 * on initial entry only (element 0).  An internal node is reblocked
381 	 * when entered upward from its first leaf node only (also element 0,
382 	 * see hammer_btree_iterate() where cursor moves up and may return).
383 	 * Further revisits of the internal node (index > 0) are ignored.
384 	 */
385 	tmp_offset = cursor->node->node_offset;
386 
387 	/*
388 	 * If reblock->vol_no is specified we only want to reblock data
389 	 * in that volume, but ignore everything else.
390 	 */
391 	if (reblock->vol_no != -1 &&
392 	    reblock->vol_no != HAMMER_VOL_DECODE(tmp_offset))
393 		goto end;
394 
395 	if (cursor->index == 0 &&
396 	    error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) {
397 		++reblock->btree_count;
398 		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
399 		if (hammer_debug_general & 0x4000)
400 			kprintf("B %6d/%d\n", bytes, reblock->free_level);
401 		/*
402 		 * Start node reblock if
403 		 * 1. there is no error
404 		 * 2. the node and allocator offset are not in the same
405 		 *    big-block, or free level threshold is 0
406 		 * 3. free bytes in the node's big-block is larger than
407 		 *    free level threshold (means if threshold is 0 then
408 		 *    do reblock no matter what).
409 		 */
410 		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
411 		    bytes >= reblock->free_level) {
412 			error = hammer_cursor_upgrade(cursor);
413 			if (error == 0) {
414 				if (cursor->parent) {
415 					KKASSERT(cursor->parent_index <
416 						 cursor->parent->ondisk->count);
417 					elm = &cursor->parent->ondisk->elms[cursor->parent_index];
418 				} else {
419 					elm = NULL;
420 				}
421 				switch(cursor->node->ondisk->type) {
422 				case HAMMER_BTREE_TYPE_LEAF:
423 					error = hammer_reblock_leaf_node(
424 							reblock, cursor, elm);
425 					break;
426 				case HAMMER_BTREE_TYPE_INTERNAL:
427 					error = hammer_reblock_int_node(
428 							reblock, cursor, elm);
429 					break;
430 				default:
431 					panic("Illegal B-Tree node type");
432 				}
433 			}
434 			if (error == 0) {
435 				++reblock->btree_moves;
436 			}
437 		}
438 	}
439 end:
440 	hammer_cursor_downgrade(cursor);
441 	return(error);
442 }
443 
444 /*
445  * Reblock a record's data.  Both the B-Tree element and record pointers
446  * to the data must be adjusted.
447  */
448 static int
449 hammer_reblock_data(struct hammer_ioc_reblock *reblock,
450 		    hammer_cursor_t cursor, hammer_btree_elm_t elm)
451 {
452 	struct hammer_buffer *data_buffer = NULL;
453 	hammer_off_t odata_offset;
454 	hammer_off_t ndata_offset;
455 	int error;
456 	void *ndata;
457 
458 	error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA |
459 					     HAMMER_CURSOR_GET_LEAF);
460 	if (error)
461 		return (error);
462 	ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len,
463 				  elm->leaf.base.rec_type,
464 				  &ndata_offset, &data_buffer,
465 				  0, &error);
466 	if (error)
467 		goto done;
468 	hammer_io_notmeta(data_buffer);
469 
470 	/*
471 	 * Move the data.  Note that we must invalidate any cached
472 	 * data buffer in the cursor before calling blockmap_free.
473 	 * The blockmap_free may free up the entire big-block and
474 	 * will not be able to invalidate it if the cursor is holding
475 	 * a data buffer cached in that big-block.
476 	 */
477 	hammer_modify_buffer_noundo(cursor->trans, data_buffer);
478 	bcopy(cursor->data, ndata, elm->leaf.data_len);
479 	hammer_modify_buffer_done(data_buffer);
480 	hammer_cursor_invalidate_cache(cursor);
481 
482 	hammer_blockmap_free(cursor->trans,
483 			     elm->leaf.data_offset, elm->leaf.data_len);
484 
485 	hammer_modify_node(cursor->trans, cursor->node,
486 			   &elm->leaf.data_offset, sizeof(hammer_off_t));
487 	odata_offset = elm->leaf.data_offset;
488 	elm->leaf.data_offset = ndata_offset;
489 	hammer_modify_node_done(cursor->node);
490 
491 	if (hammer_debug_general & 0x4000) {
492 		kprintf("REBLOCK DATA %08x %016llx -> %016llx\n",
493 			(elm ? elm->base.localization : -1),
494 			(long long)odata_offset,
495 			(long long)ndata_offset);
496 	}
497 done:
498 	if (data_buffer)
499 		hammer_rel_buffer(data_buffer, 0);
500 	return (error);
501 }
502 
503 /*
504  * Reblock a B-Tree leaf node.  The parent must be adjusted to point to
505  * the new copy of the leaf node.
506  *
507  * elm is a pointer to the parent element pointing at cursor.node.
508  */
509 static int
510 hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
511 			 hammer_cursor_t cursor, hammer_btree_elm_t elm)
512 {
513 	hammer_node_t onode;
514 	hammer_node_t nnode;
515 	int error;
516 
517 	/*
518 	 * Don't supply a hint when allocating the leaf.  Fills are done
519 	 * from the leaf upwards.
520 	 */
521 	onode = cursor->node;
522 	nnode = hammer_alloc_btree(cursor->trans, 0, &error);
523 
524 	if (nnode == NULL)
525 		return (error);
526 
527 	hammer_lock_ex(&nnode->lock);
528 	hammer_modify_node_noundo(cursor->trans, nnode);
529 
530 	hammer_move_node(cursor, elm, onode, nnode);
531 
532 	/*
533 	 * Clean up.
534 	 *
535 	 * The new node replaces the current node in the cursor.  The cursor
536 	 * expects it to be locked so leave it locked.  Discard onode.
537 	 */
538 	hammer_cursor_replaced_node(onode, nnode);
539 	hammer_delete_node(cursor->trans, onode);
540 
541 	if (hammer_debug_general & 0x4000) {
542 		kprintf("REBLOCK %cNODE %08x %016llx -> %016llx\n",
543 			nnode->ondisk->type,
544 			(elm ? elm->base.localization : -1),
545 			(long long)onode->node_offset,
546 			(long long)nnode->node_offset);
547 	}
548 	hammer_modify_node_done(nnode);
549 	cursor->node = nnode;
550 
551 	hammer_unlock(&onode->lock);
552 	hammer_rel_node(onode);
553 
554 	return (error);
555 }
556 
557 /*
558  * Reblock a B-Tree internal node.  The parent must be adjusted to point to
559  * the new copy of the internal node, and the node's children's parent
560  * pointers must also be adjusted to point to the new copy.
561  *
562  * elm is a pointer to the parent element pointing at cursor.node.
563  */
564 static int
565 hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
566 			 hammer_cursor_t cursor, hammer_btree_elm_t elm)
567 {
568 	struct hammer_node_lock lockroot;
569 	hammer_node_t onode;
570 	hammer_node_t nnode;
571 	int error;
572 
573 	hammer_node_lock_init(&lockroot, cursor->node);
574 	error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL);
575 	if (error)
576 		goto done;
577 
578 	/*
579 	 * Don't supply a hint when allocating the leaf.  Fills are done
580 	 * from the leaf upwards.
581 	 */
582 	onode = cursor->node;
583 	nnode = hammer_alloc_btree(cursor->trans, 0, &error);
584 
585 	if (nnode == NULL)
586 		goto done;
587 
588 	hammer_lock_ex(&nnode->lock);
589 	hammer_modify_node_noundo(cursor->trans, nnode);
590 
591 	hammer_move_node(cursor, elm, onode, nnode);
592 
593 	/*
594 	 * Clean up.
595 	 *
596 	 * The new node replaces the current node in the cursor.  The cursor
597 	 * expects it to be locked so leave it locked.  Discard onode.
598 	 */
599 	hammer_cursor_replaced_node(onode, nnode);
600 	hammer_delete_node(cursor->trans, onode);
601 
602 	if (hammer_debug_general & 0x4000) {
603 		kprintf("REBLOCK %cNODE %08x %016llx -> %016llx\n",
604 			nnode->ondisk->type,
605 			(elm ? elm->base.localization : -1),
606 			(long long)onode->node_offset,
607 			(long long)nnode->node_offset);
608 	}
609 	hammer_modify_node_done(nnode);
610 	cursor->node = nnode;
611 
612 	hammer_unlock(&onode->lock);
613 	hammer_rel_node(onode);
614 
615 done:
616 	hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL);
617 	return (error);
618 }
619 
620 /*
621  * nnode is a newly allocated node, and now elm becomes the node
622  * element within nnode's parent that represents a pointer to nnode,
623  * or nnode becomes the root node if elm does not exist.
624  */
625 static void
626 hammer_move_node(hammer_cursor_t cursor, hammer_btree_elm_t elm,
627 		 hammer_node_t onode, hammer_node_t nnode)
628 {
629 	int error, i;
630 
631 	bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));
632 
633 	/*
634 	 * Adjust the parent's pointer to us first.
635 	 */
636 	if (elm) {
637 		/*
638 		 * We are not the root of the B-Tree
639 		 */
640 		hammer_modify_node(cursor->trans, cursor->parent,
641 				   &elm->internal.subtree_offset,
642 				   sizeof(elm->internal.subtree_offset));
643 		elm->internal.subtree_offset = nnode->node_offset;
644 		hammer_modify_node_done(cursor->parent);
645 	} else {
646 		/*
647 		 * We are the root of the B-Tree
648 		 */
649 		hammer_volume_t volume;
650 		volume = hammer_get_root_volume(cursor->trans->hmp, &error);
651 		KKASSERT(error == 0);
652 
653 		hammer_modify_volume_field(cursor->trans, volume,
654 					   vol0_btree_root);
655 		volume->ondisk->vol0_btree_root = nnode->node_offset;
656 		hammer_modify_volume_done(volume);
657 		hammer_rel_volume(volume, 0);
658 	}
659 
660 	/*
661 	 * Now adjust our children's pointers to us
662 	 * if we are an internal node.
663 	 */
664 	if (nnode->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) {
665 		for (i = 0; i < nnode->ondisk->count; ++i) {
666 			error = btree_set_parent(cursor->trans, nnode,
667 					&nnode->ondisk->elms[i]);
668 			if (error)
669 				panic("reblock internal node: fixup problem");
670 		}
671 	}
672 }
673