xref: /dragonfly/sys/vfs/hammer/hammer_reblock.c (revision 51f35c5c)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.25 2008/07/09 10:29:20 dillon Exp $
35  */
36 /*
37  * HAMMER reblocker - This code frees up fragmented physical space
38  *
39  * HAMMER only keeps track of free space on a big-block basis.  A big-block
40  * containing holes can only be freed by migrating the remaining data in
41  * that big-block into a new big-block, then freeing the big-block.
42  *
43  * This function is called from an ioctl or via the hammer support thread.
44  */
45 
46 #include "hammer.h"
47 
48 static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
49 				 hammer_cursor_t cursor,
50 				 hammer_btree_elm_t elm);
51 static int hammer_reblock_data(struct hammer_ioc_reblock *reblock,
52 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
53 static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
54 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
55 static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
56 				hammer_cursor_t cursor, hammer_btree_elm_t elm);
57 
58 int
59 hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
60 	       struct hammer_ioc_reblock *reblock)
61 {
62 	struct hammer_cursor cursor;
63 	hammer_btree_elm_t elm;
64 	int error;
65 	int checkspace_count;
66 
67 	if ((reblock->key_beg.localization | reblock->key_end.localization) &
68 	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
69 		return(EINVAL);
70 	}
71 	if (reblock->key_beg.obj_id >= reblock->key_end.obj_id)
72 		return(EINVAL);
73 	if (reblock->free_level < 0)
74 		return(EINVAL);
75 
76 	reblock->key_cur = reblock->key_beg;
77 	reblock->key_cur.localization += ip->obj_localization;
78 
79 	checkspace_count = 0;
80 retry:
81 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
82 	if (error) {
83 		hammer_done_cursor(&cursor);
84 		goto failed;
85 	}
86 	cursor.key_beg.localization = reblock->key_cur.localization;
87 	cursor.key_beg.obj_id = reblock->key_cur.obj_id;
88 	cursor.key_beg.key = HAMMER_MIN_KEY;
89 	cursor.key_beg.create_tid = 1;
90 	cursor.key_beg.delete_tid = 0;
91 	cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
92 	cursor.key_beg.obj_type = 0;
93 
94 	cursor.key_end.localization = reblock->key_end.localization +
95 				      ip->obj_localization;
96 	cursor.key_end.obj_id = reblock->key_end.obj_id;
97 	cursor.key_end.key = HAMMER_MAX_KEY;
98 	cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
99 	cursor.key_end.delete_tid = 0;
100 	cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
101 	cursor.key_end.obj_type = 0;
102 
103 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
104 	cursor.flags |= HAMMER_CURSOR_BACKEND;
105 
106 	/*
107 	 * This flag allows the btree scan code to return internal nodes,
108 	 * so we can reblock them in addition to the leafs.  Only specify it
109 	 * if we intend to reblock B-Tree nodes.
110 	 */
111 	if (reblock->head.flags & HAMMER_IOC_DO_BTREE)
112 		cursor.flags |= HAMMER_CURSOR_REBLOCKING;
113 
114 	error = hammer_btree_first(&cursor);
115 	while (error == 0) {
116 		/*
117 		 * Internal or Leaf node
118 		 */
119 		elm = &cursor.node->ondisk->elms[cursor.index];
120 		reblock->key_cur.obj_id = elm->base.obj_id;
121 		reblock->key_cur.localization = elm->base.localization;
122 
123 		/*
124 		 * Yield to more important tasks
125 		 */
126 		if ((error = hammer_signal_check(trans->hmp)) != 0)
127 			break;
128 		if (trans->hmp->sync_lock.wanted) {
129 			tsleep(trans, 0, "hmrslo", hz / 10);
130 		}
131 
132 		/*
133 		 * If we build up too much meta-data we have to wait for
134 		 * a flush cycle.
135 		 */
136 		if (hammer_flusher_meta_limit(trans->hmp) ||
137 		    hammer_flusher_undo_exhausted(trans, 2)) {
138 			error = EWOULDBLOCK;
139 			break;
140 		}
141 
142 		/*
143 		 * If there is insufficient free space it may be due to
144 		 * reserved bigblocks, which flushing might fix.
145 		 */
146 		if (hammer_checkspace(trans->hmp, HAMMER_CHECKSPACE_SLOP_REBLOCK)) {
147 			if (++checkspace_count == 10) {
148 				error = ENOSPC;
149 			} else {
150 				error = EWOULDBLOCK;
151 			}
152 			break;
153 		}
154 
155 		/*
156 		 * Acquiring the sync_lock prevents the operation from
157 		 * crossing a synchronization boundary.
158 		 *
159 		 * NOTE: cursor.node may have changed on return.
160 		 */
161 		hammer_sync_lock_sh(trans);
162 		error = hammer_reblock_helper(reblock, &cursor, elm);
163 		hammer_sync_unlock(trans);
164 		if (error == 0) {
165 			cursor.flags |= HAMMER_CURSOR_ATEDISK;
166 			error = hammer_btree_iterate(&cursor);
167 		}
168 
169 	}
170 	if (error == ENOENT)
171 		error = 0;
172 	hammer_done_cursor(&cursor);
173 	if (error == EWOULDBLOCK) {
174 		hammer_flusher_sync(trans->hmp);
175 		goto retry;
176 	}
177 	if (error == EDEADLK)
178 		goto retry;
179 	if (error == EINTR) {
180 		reblock->head.flags |= HAMMER_IOC_HEAD_INTR;
181 		error = 0;
182 	}
183 failed:
184 	reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
185 	return(error);
186 }
187 
188 /*
189  * Reblock the B-Tree (leaf) node, record, and/or data if necessary.
190  *
191  * XXX We have no visibility into internal B-Tree nodes at the moment,
192  * only leaf nodes.
193  */
194 static int
195 hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
196 		      hammer_cursor_t cursor, hammer_btree_elm_t elm)
197 {
198 	hammer_mount_t hmp;
199 	hammer_off_t tmp_offset;
200 	int error;
201 	int bytes;
202 	int cur;
203 	int iocflags;
204 
205 	error = 0;
206 	hmp = cursor->trans->hmp;
207 
208 	/*
209 	 * Reblock data.  Note that data embedded in a record is reblocked
210 	 * by the record reblock code.  Data processing only occurs at leaf
211 	 * nodes and for RECORD element types.
212 	 */
213 	if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF)
214 		goto skip;
215 	if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
216 		return(0);
217 	tmp_offset = elm->leaf.data_offset;
218 	if (tmp_offset == 0)
219 		goto skip;
220 	if (error)
221 		goto skip;
222 
223 	/*
224 	 * NOTE: Localization restrictions may also have been set-up, we can't
225 	 * just set the match flags willy-nilly here.
226 	 */
227 	switch(elm->leaf.base.rec_type) {
228 	case HAMMER_RECTYPE_INODE:
229 		iocflags = HAMMER_IOC_DO_INODES;
230 		break;
231 	case HAMMER_RECTYPE_EXT:
232 	case HAMMER_RECTYPE_FIX:
233 	case HAMMER_RECTYPE_PFS:
234 	case HAMMER_RECTYPE_DIRENTRY:
235 		iocflags = HAMMER_IOC_DO_DIRS;
236 		break;
237 	case HAMMER_RECTYPE_DATA:
238 	case HAMMER_RECTYPE_DB:
239 		iocflags = HAMMER_IOC_DO_DATA;
240 		break;
241 	default:
242 		iocflags = 0;
243 		break;
244 	}
245 	if (reblock->head.flags & iocflags) {
246 		++reblock->data_count;
247 		reblock->data_byte_count += elm->leaf.data_len;
248 		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
249 		if (hammer_debug_general & 0x4000)
250 			kprintf("D %6d/%d\n", bytes, reblock->free_level);
251 		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
252 		    bytes >= reblock->free_level) {
253 			hammer_io_direct_uncache(hmp, &elm->leaf);
254 			error = hammer_cursor_upgrade(cursor);
255 			if (error == 0) {
256 				error = hammer_reblock_data(reblock,
257 							    cursor, elm);
258 			}
259 			if (error == 0) {
260 				++reblock->data_moves;
261 				reblock->data_byte_moves += elm->leaf.data_len;
262 			}
263 		}
264 	}
265 
266 skip:
267 	/*
268 	 * Reblock a B-Tree internal or leaf node.
269 	 */
270 	tmp_offset = cursor->node->node_offset;
271 	if (cursor->index == 0 &&
272 	    error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) {
273 		++reblock->btree_count;
274 		bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
275 		if (hammer_debug_general & 0x4000)
276 			kprintf("B %6d/%d\n", bytes, reblock->free_level);
277 		if (error == 0 && (cur == 0 || reblock->free_level == 0) &&
278 		    bytes >= reblock->free_level) {
279 			error = hammer_cursor_upgrade(cursor);
280 			if (error == 0) {
281 				if (cursor->parent)
282 					elm = &cursor->parent->ondisk->elms[cursor->parent_index];
283 				else
284 					elm = NULL;
285 				switch(cursor->node->ondisk->type) {
286 				case HAMMER_BTREE_TYPE_LEAF:
287 					error = hammer_reblock_leaf_node(
288 							reblock, cursor, elm);
289 					break;
290 				case HAMMER_BTREE_TYPE_INTERNAL:
291 					error = hammer_reblock_int_node(
292 							reblock, cursor, elm);
293 					break;
294 				default:
295 					panic("Illegal B-Tree node type");
296 				}
297 			}
298 			if (error == 0) {
299 				++reblock->btree_moves;
300 			}
301 		}
302 	}
303 
304 	hammer_cursor_downgrade(cursor);
305 	return(error);
306 }
307 
308 /*
309  * Reblock a record's data.  Both the B-Tree element and record pointers
310  * to the data must be adjusted.
311  */
312 static int
313 hammer_reblock_data(struct hammer_ioc_reblock *reblock,
314 		    hammer_cursor_t cursor, hammer_btree_elm_t elm)
315 {
316 	struct hammer_buffer *data_buffer = NULL;
317 	hammer_off_t ndata_offset;
318 	int error;
319 	void *ndata;
320 
321 	error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA |
322 					     HAMMER_CURSOR_GET_LEAF);
323 	if (error)
324 		return (error);
325 	ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len,
326 				  elm->leaf.base.rec_type,
327 				  &ndata_offset, &data_buffer, &error);
328 	if (error)
329 		goto done;
330 
331 	/*
332 	 * Move the data
333 	 */
334 	hammer_modify_buffer(cursor->trans, data_buffer, NULL, 0);
335 	bcopy(cursor->data, ndata, elm->leaf.data_len);
336 	hammer_modify_buffer_done(data_buffer);
337 
338 	hammer_blockmap_free(cursor->trans,
339 			     elm->leaf.data_offset, elm->leaf.data_len);
340 
341 	hammer_modify_node(cursor->trans, cursor->node,
342 			   &elm->leaf.data_offset, sizeof(hammer_off_t));
343 	elm->leaf.data_offset = ndata_offset;
344 	hammer_modify_node_done(cursor->node);
345 
346 done:
347 	if (data_buffer)
348 		hammer_rel_buffer(data_buffer, 0);
349 	return (error);
350 }
351 
352 /*
353  * Reblock a B-Tree leaf node.  The parent must be adjusted to point to
354  * the new copy of the leaf node.
355  *
356  * elm is a pointer to the parent element pointing at cursor.node.
357  */
358 static int
359 hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
360 			 hammer_cursor_t cursor, hammer_btree_elm_t elm)
361 {
362 	hammer_node_t onode;
363 	hammer_node_t nnode;
364 	int error;
365 
366 	onode = cursor->node;
367 	nnode = hammer_alloc_btree(cursor->trans, &error);
368 
369 	if (nnode == NULL)
370 		return (error);
371 
372 	/*
373 	 * Move the node
374 	 */
375 	hammer_lock_ex(&nnode->lock);
376 	hammer_modify_node_noundo(cursor->trans, nnode);
377 	bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));
378 
379 	if (elm) {
380 		/*
381 		 * We are not the root of the B-Tree
382 		 */
383 		hammer_modify_node(cursor->trans, cursor->parent,
384 				   &elm->internal.subtree_offset,
385 				   sizeof(elm->internal.subtree_offset));
386 		elm->internal.subtree_offset = nnode->node_offset;
387 		hammer_modify_node_done(cursor->parent);
388 	} else {
389 		/*
390 		 * We are the root of the B-Tree
391 		 */
392                 hammer_volume_t volume;
393 
394                 volume = hammer_get_root_volume(cursor->trans->hmp, &error);
395                 KKASSERT(error == 0);
396 
397                 hammer_modify_volume_field(cursor->trans, volume,
398 					   vol0_btree_root);
399                 volume->ondisk->vol0_btree_root = nnode->node_offset;
400                 hammer_modify_volume_done(volume);
401                 hammer_rel_volume(volume, 0);
402         }
403 
404 	hammer_cursor_replaced_node(onode, nnode);
405 	hammer_delete_node(cursor->trans, onode);
406 
407 	if (hammer_debug_general & 0x4000) {
408 		kprintf("REBLOCK LNODE %016llx -> %016llx\n",
409 			onode->node_offset, nnode->node_offset);
410 	}
411 	hammer_modify_node_done(nnode);
412 	cursor->node = nnode;
413 
414 	hammer_unlock(&onode->lock);
415 	hammer_rel_node(onode);
416 
417 	return (error);
418 }
419 
420 /*
421  * Reblock a B-Tree internal node.  The parent must be adjusted to point to
422  * the new copy of the internal node, and the node's children's parent
423  * pointers must also be adjusted to point to the new copy.
424  *
425  * elm is a pointer to the parent element pointing at cursor.node.
426  */
427 static int
428 hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
429 			 hammer_cursor_t cursor, hammer_btree_elm_t elm)
430 {
431 	hammer_node_locklist_t locklist = NULL;
432 	hammer_node_t onode;
433 	hammer_node_t nnode;
434 	int error;
435 	int i;
436 
437 	error = hammer_btree_lock_children(cursor, &locklist);
438 	if (error)
439 		goto done;
440 
441 	onode = cursor->node;
442 	nnode = hammer_alloc_btree(cursor->trans, &error);
443 
444 	if (nnode == NULL)
445 		goto done;
446 
447 	/*
448 	 * Move the node.  Adjust the parent's pointer to us first.
449 	 */
450 	hammer_lock_ex(&nnode->lock);
451 	hammer_modify_node_noundo(cursor->trans, nnode);
452 	bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));
453 
454 	if (elm) {
455 		/*
456 		 * We are not the root of the B-Tree
457 		 */
458 		hammer_modify_node(cursor->trans, cursor->parent,
459 				   &elm->internal.subtree_offset,
460 				   sizeof(elm->internal.subtree_offset));
461 		elm->internal.subtree_offset = nnode->node_offset;
462 		hammer_modify_node_done(cursor->parent);
463 	} else {
464 		/*
465 		 * We are the root of the B-Tree
466 		 */
467                 hammer_volume_t volume;
468 
469                 volume = hammer_get_root_volume(cursor->trans->hmp, &error);
470                 KKASSERT(error == 0);
471 
472                 hammer_modify_volume_field(cursor->trans, volume,
473 					   vol0_btree_root);
474                 volume->ondisk->vol0_btree_root = nnode->node_offset;
475                 hammer_modify_volume_done(volume);
476                 hammer_rel_volume(volume, 0);
477         }
478 
479 	/*
480 	 * Now adjust our children's pointers to us.
481 	 */
482 	for (i = 0; i < nnode->ondisk->count; ++i) {
483 		elm = &nnode->ondisk->elms[i];
484 		error = btree_set_parent(cursor->trans, nnode, elm);
485 		if (error)
486 			panic("reblock internal node: fixup problem");
487 	}
488 
489 	/*
490 	 * Clean up.
491 	 *
492 	 * The new node replaces the current node in the cursor.  The cursor
493 	 * expects it to be locked so leave it locked.  Discard onode.
494 	 */
495 	hammer_cursor_replaced_node(onode, nnode);
496 	hammer_delete_node(cursor->trans, onode);
497 
498 	if (hammer_debug_general & 0x4000) {
499 		kprintf("REBLOCK INODE %016llx -> %016llx\n",
500 			onode->node_offset, nnode->node_offset);
501 	}
502 	hammer_modify_node_done(nnode);
503 	cursor->node = nnode;
504 
505 	hammer_unlock(&onode->lock);
506 	hammer_rel_node(onode);
507 
508 done:
509 	hammer_btree_unlock_children(&locklist);
510 	return (error);
511 }
512 
513