1 /* 2 * Copyright (c) 2008-2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * HAMMER reblocker - This code frees up fragmented physical space 36 * 37 * HAMMER only keeps track of free space on a big-block basis. A big-block 38 * containing holes can only be freed by migrating the remaining data in 39 * that big-block into a new big-block, then freeing the big-block. 40 * 41 * This function is called from an ioctl or via the hammer support thread. 42 */ 43 44 #include "hammer.h" 45 46 static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock, 47 hammer_cursor_t cursor, 48 hammer_btree_elm_t elm); 49 static int hammer_reblock_data(struct hammer_ioc_reblock *reblock, 50 hammer_cursor_t cursor, hammer_btree_elm_t elm); 51 static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, 52 hammer_cursor_t cursor, hammer_btree_elm_t elm); 53 static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, 54 hammer_cursor_t cursor, hammer_btree_elm_t elm); 55 56 int 57 hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip, 58 struct hammer_ioc_reblock *reblock) 59 { 60 struct hammer_cursor cursor; 61 hammer_btree_elm_t elm; 62 int checkspace_count; 63 int error; 64 int seq; 65 int slop; 66 u_int32_t key_end_localization; 67 68 if ((reblock->key_beg.localization | reblock->key_end.localization) & 69 HAMMER_LOCALIZE_PSEUDOFS_MASK) { 70 return(EINVAL); 71 } 72 if (reblock->key_beg.obj_id >= reblock->key_end.obj_id) 73 return(EINVAL); 74 if (reblock->free_level < 0 || 75 reblock->free_level > HAMMER_BIGBLOCK_SIZE) 76 return(EINVAL); 77 78 /* 79 * A fill_percentage <= 20% is considered an emergency. free_level is 80 * inverted from fill_percentage. 81 */ 82 if (reblock->free_level >= HAMMER_BIGBLOCK_SIZE * 8 / 10) 83 slop = HAMMER_CHKSPC_EMERGENCY; 84 else 85 slop = HAMMER_CHKSPC_REBLOCK; 86 87 /* 88 * Ioctl caller has only set localization type to reblock. 89 * Initialize cursor key localization with ip localization. 90 */ 91 reblock->key_cur = reblock->key_beg; 92 reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; 93 if (reblock->allpfs == 0) 94 reblock->key_cur.localization += ip->obj_localization; 95 96 key_end_localization = reblock->key_end.localization; 97 key_end_localization &= HAMMER_LOCALIZE_MASK; 98 if (reblock->allpfs == 0) 99 key_end_localization += ip->obj_localization; 100 else 101 key_end_localization += ((HAMMER_MAX_PFS - 1) << 16); 102 103 checkspace_count = 0; 104 seq = trans->hmp->flusher.done; 105 retry: 106 error = hammer_init_cursor(trans, &cursor, NULL, NULL); 107 if (error) { 108 hammer_done_cursor(&cursor); 109 goto failed; 110 } 111 cursor.key_beg.localization = reblock->key_cur.localization; 112 cursor.key_beg.obj_id = reblock->key_cur.obj_id; 113 cursor.key_beg.key = HAMMER_MIN_KEY; 114 cursor.key_beg.create_tid = 1; 115 cursor.key_beg.delete_tid = 0; 116 cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; 117 cursor.key_beg.obj_type = 0; 118 119 cursor.key_end.localization = key_end_localization; 120 cursor.key_end.obj_id = reblock->key_end.obj_id; 121 cursor.key_end.key = HAMMER_MAX_KEY; 122 cursor.key_end.create_tid = HAMMER_MAX_TID - 1; 123 cursor.key_end.delete_tid = 0; 124 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 125 cursor.key_end.obj_type = 0; 126 127 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 128 cursor.flags |= HAMMER_CURSOR_BACKEND; 129 cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE; 130 131 /* 132 * This flag allows the btree scan code to return internal nodes, 133 * so we can reblock them in addition to the leafs. Only specify it 134 * if we intend to reblock B-Tree nodes. 135 */ 136 if (reblock->head.flags & HAMMER_IOC_DO_BTREE) 137 cursor.flags |= HAMMER_CURSOR_REBLOCKING; 138 139 error = hammer_btree_first(&cursor); 140 while (error == 0) { 141 /* 142 * Internal or Leaf node 143 */ 144 KKASSERT(cursor.index < cursor.node->ondisk->count); 145 elm = &cursor.node->ondisk->elms[cursor.index]; 146 reblock->key_cur.obj_id = elm->base.obj_id; 147 reblock->key_cur.localization = elm->base.localization; 148 149 /* 150 * Yield to more important tasks 151 */ 152 if ((error = hammer_signal_check(trans->hmp)) != 0) 153 break; 154 155 /* 156 * If there is insufficient free space it may be due to 157 * reserved bigblocks, which flushing might fix. 158 * 159 * We must force a retest in case the unlocked cursor is 160 * moved to the end of the leaf, or moved to an internal 161 * node. 162 * 163 * WARNING: See warnings in hammer_unlock_cursor() function. 164 */ 165 if (hammer_checkspace(trans->hmp, slop)) { 166 if (++checkspace_count == 10) { 167 error = ENOSPC; 168 break; 169 } 170 hammer_unlock_cursor(&cursor); 171 cursor.flags |= HAMMER_CURSOR_RETEST; 172 hammer_flusher_wait(trans->hmp, seq); 173 hammer_lock_cursor(&cursor); 174 seq = hammer_flusher_async(trans->hmp, NULL); 175 goto skip; 176 } 177 178 /* 179 * Acquiring the sync_lock prevents the operation from 180 * crossing a synchronization boundary. 181 * 182 * NOTE: cursor.node may have changed on return. 183 * 184 * WARNING: See warnings in hammer_unlock_cursor() function. 185 */ 186 hammer_sync_lock_sh(trans); 187 error = hammer_reblock_helper(reblock, &cursor, elm); 188 hammer_sync_unlock(trans); 189 190 while (hammer_flusher_meta_halflimit(trans->hmp) || 191 hammer_flusher_undo_exhausted(trans, 2)) { 192 hammer_unlock_cursor(&cursor); 193 hammer_flusher_wait(trans->hmp, seq); 194 hammer_lock_cursor(&cursor); 195 seq = hammer_flusher_async_one(trans->hmp); 196 } 197 198 /* 199 * Setup for iteration, our cursor flags may be modified by 200 * other threads while we are unlocked. 201 */ 202 cursor.flags |= HAMMER_CURSOR_ATEDISK; 203 204 /* 205 * We allocate data buffers, which atm we don't track 206 * dirty levels for because we allow the kernel to write 207 * them. But if we allocate too many we can still deadlock 208 * the buffer cache. 209 * 210 * WARNING: See warnings in hammer_unlock_cursor() function. 211 * (The cursor's node and element may change!) 212 */ 213 if (bd_heatup()) { 214 hammer_unlock_cursor(&cursor); 215 bwillwrite(HAMMER_XBUFSIZE); 216 hammer_lock_cursor(&cursor); 217 } 218 vm_wait_nominal(); 219 skip: 220 if (error == 0) { 221 error = hammer_btree_iterate(&cursor); 222 } 223 } 224 if (error == ENOENT) 225 error = 0; 226 hammer_done_cursor(&cursor); 227 if (error == EWOULDBLOCK) { 228 hammer_flusher_sync(trans->hmp); 229 goto retry; 230 } 231 if (error == EDEADLK) 232 goto retry; 233 if (error == EINTR) { 234 reblock->head.flags |= HAMMER_IOC_HEAD_INTR; 235 error = 0; 236 } 237 failed: 238 reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK; 239 return(error); 240 } 241 242 /* 243 * Reblock the B-Tree (leaf) node, record, and/or data if necessary. 244 * 245 * XXX We have no visibility into internal B-Tree nodes at the moment, 246 * only leaf nodes. 247 */ 248 static int 249 hammer_reblock_helper(struct hammer_ioc_reblock *reblock, 250 hammer_cursor_t cursor, hammer_btree_elm_t elm) 251 { 252 hammer_mount_t hmp; 253 hammer_off_t tmp_offset; 254 hammer_node_ondisk_t ondisk; 255 struct hammer_btree_leaf_elm leaf; 256 int error; 257 int bytes; 258 int cur; 259 int iocflags; 260 261 error = 0; 262 hmp = cursor->trans->hmp; 263 264 /* 265 * Reblock data. Note that data embedded in a record is reblocked 266 * by the record reblock code. Data processing only occurs at leaf 267 * nodes and for RECORD element types. 268 */ 269 if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF) 270 goto skip; 271 if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD) 272 return(0); 273 tmp_offset = elm->leaf.data_offset; 274 if (tmp_offset == 0) 275 goto skip; 276 if (error) 277 goto skip; 278 279 /* 280 * NOTE: Localization restrictions may also have been set-up, we can't 281 * just set the match flags willy-nilly here. 282 */ 283 switch(elm->leaf.base.rec_type) { 284 case HAMMER_RECTYPE_INODE: 285 case HAMMER_RECTYPE_SNAPSHOT: 286 case HAMMER_RECTYPE_CONFIG: 287 iocflags = HAMMER_IOC_DO_INODES; 288 break; 289 case HAMMER_RECTYPE_EXT: 290 case HAMMER_RECTYPE_FIX: 291 case HAMMER_RECTYPE_PFS: 292 case HAMMER_RECTYPE_DIRENTRY: 293 iocflags = HAMMER_IOC_DO_DIRS; 294 break; 295 case HAMMER_RECTYPE_DATA: 296 case HAMMER_RECTYPE_DB: 297 iocflags = HAMMER_IOC_DO_DATA; 298 break; 299 default: 300 iocflags = 0; 301 break; 302 } 303 if (reblock->head.flags & iocflags) { 304 ++reblock->data_count; 305 reblock->data_byte_count += elm->leaf.data_len; 306 bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); 307 if (hammer_debug_general & 0x4000) 308 kprintf("D %6d/%d\n", bytes, reblock->free_level); 309 if (error == 0 && (cur == 0 || reblock->free_level == 0) && 310 bytes >= reblock->free_level) { 311 /* 312 * This is nasty, the uncache code may have to get 313 * vnode locks and because of that we can't hold 314 * the cursor locked. 315 * 316 * WARNING: See warnings in hammer_unlock_cursor() 317 * function. 318 */ 319 leaf = elm->leaf; 320 hammer_unlock_cursor(cursor); 321 hammer_io_direct_uncache(hmp, &leaf); 322 hammer_lock_cursor(cursor); 323 324 /* 325 * elm may have become stale or invalid, reload it. 326 * ondisk variable is temporary only. Note that 327 * cursor->node and thus cursor->node->ondisk may 328 * also changed. 329 */ 330 ondisk = cursor->node->ondisk; 331 elm = &ondisk->elms[cursor->index]; 332 if (cursor->flags & HAMMER_CURSOR_RETEST) { 333 kprintf("hammer: debug: retest on " 334 "reblocker uncache\n"); 335 error = EDEADLK; 336 } else if (ondisk->type != HAMMER_BTREE_TYPE_LEAF || 337 cursor->index >= ondisk->count) { 338 kprintf("hammer: debug: shifted on " 339 "reblocker uncache\n"); 340 error = EDEADLK; 341 } else if (bcmp(&elm->leaf, &leaf, sizeof(leaf))) { 342 kprintf("hammer: debug: changed on " 343 "reblocker uncache\n"); 344 error = EDEADLK; 345 } 346 if (error == 0) 347 error = hammer_cursor_upgrade(cursor); 348 if (error == 0) { 349 KKASSERT(cursor->index < ondisk->count); 350 error = hammer_reblock_data(reblock, 351 cursor, elm); 352 } 353 if (error == 0) { 354 ++reblock->data_moves; 355 reblock->data_byte_moves += elm->leaf.data_len; 356 } 357 } 358 } 359 360 skip: 361 /* 362 * Reblock a B-Tree internal or leaf node. A leaf node is reblocked 363 * on initial entry only (element 0). An internal node is reblocked 364 * when entered upward from its first leaf node only (also element 0). 365 * Further revisits of the internal node (index > 0) are ignored. 366 */ 367 tmp_offset = cursor->node->node_offset; 368 if (cursor->index == 0 && 369 error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) { 370 ++reblock->btree_count; 371 bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error); 372 if (hammer_debug_general & 0x4000) 373 kprintf("B %6d/%d\n", bytes, reblock->free_level); 374 if (error == 0 && (cur == 0 || reblock->free_level == 0) && 375 bytes >= reblock->free_level) { 376 error = hammer_cursor_upgrade(cursor); 377 if (error == 0) { 378 if (cursor->parent) { 379 KKASSERT(cursor->parent_index < 380 cursor->parent->ondisk->count); 381 elm = &cursor->parent->ondisk->elms[cursor->parent_index]; 382 } else { 383 elm = NULL; 384 } 385 switch(cursor->node->ondisk->type) { 386 case HAMMER_BTREE_TYPE_LEAF: 387 error = hammer_reblock_leaf_node( 388 reblock, cursor, elm); 389 break; 390 case HAMMER_BTREE_TYPE_INTERNAL: 391 error = hammer_reblock_int_node( 392 reblock, cursor, elm); 393 break; 394 default: 395 panic("Illegal B-Tree node type"); 396 } 397 } 398 if (error == 0) { 399 ++reblock->btree_moves; 400 } 401 } 402 } 403 404 hammer_cursor_downgrade(cursor); 405 return(error); 406 } 407 408 /* 409 * Reblock a record's data. Both the B-Tree element and record pointers 410 * to the data must be adjusted. 411 */ 412 static int 413 hammer_reblock_data(struct hammer_ioc_reblock *reblock, 414 hammer_cursor_t cursor, hammer_btree_elm_t elm) 415 { 416 struct hammer_buffer *data_buffer = NULL; 417 hammer_off_t ndata_offset; 418 int error; 419 void *ndata; 420 421 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA | 422 HAMMER_CURSOR_GET_LEAF); 423 if (error) 424 return (error); 425 ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len, 426 elm->leaf.base.rec_type, 427 &ndata_offset, &data_buffer, 428 0, &error); 429 if (error) 430 goto done; 431 hammer_io_notmeta(data_buffer); 432 433 /* 434 * Move the data. Note that we must invalidate any cached 435 * data buffer in the cursor before calling blockmap_free. 436 * The blockmap_free may free up the entire big-block and 437 * will not be able to invalidate it if the cursor is holding 438 * a data buffer cached in that big block. 439 */ 440 hammer_modify_buffer_noundo(cursor->trans, data_buffer); 441 bcopy(cursor->data, ndata, elm->leaf.data_len); 442 hammer_modify_buffer_done(data_buffer); 443 hammer_cursor_invalidate_cache(cursor); 444 445 hammer_blockmap_free(cursor->trans, 446 elm->leaf.data_offset, elm->leaf.data_len); 447 448 hammer_modify_node(cursor->trans, cursor->node, 449 &elm->leaf.data_offset, sizeof(hammer_off_t)); 450 elm->leaf.data_offset = ndata_offset; 451 hammer_modify_node_done(cursor->node); 452 453 done: 454 if (data_buffer) 455 hammer_rel_buffer(data_buffer, 0); 456 return (error); 457 } 458 459 /* 460 * Reblock a B-Tree leaf node. The parent must be adjusted to point to 461 * the new copy of the leaf node. 462 * 463 * elm is a pointer to the parent element pointing at cursor.node. 464 */ 465 static int 466 hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, 467 hammer_cursor_t cursor, hammer_btree_elm_t elm) 468 { 469 hammer_node_t onode; 470 hammer_node_t nnode; 471 int error; 472 473 /* 474 * Don't supply a hint when allocating the leaf. Fills are done 475 * from the leaf upwards. 476 */ 477 onode = cursor->node; 478 nnode = hammer_alloc_btree(cursor->trans, 0, &error); 479 480 if (nnode == NULL) 481 return (error); 482 483 /* 484 * Move the node 485 */ 486 hammer_lock_ex(&nnode->lock); 487 hammer_modify_node_noundo(cursor->trans, nnode); 488 bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); 489 490 if (elm) { 491 /* 492 * We are not the root of the B-Tree 493 */ 494 hammer_modify_node(cursor->trans, cursor->parent, 495 &elm->internal.subtree_offset, 496 sizeof(elm->internal.subtree_offset)); 497 elm->internal.subtree_offset = nnode->node_offset; 498 hammer_modify_node_done(cursor->parent); 499 } else { 500 /* 501 * We are the root of the B-Tree 502 */ 503 hammer_volume_t volume; 504 505 volume = hammer_get_root_volume(cursor->trans->hmp, &error); 506 KKASSERT(error == 0); 507 508 hammer_modify_volume_field(cursor->trans, volume, 509 vol0_btree_root); 510 volume->ondisk->vol0_btree_root = nnode->node_offset; 511 hammer_modify_volume_done(volume); 512 hammer_rel_volume(volume, 0); 513 } 514 515 hammer_cursor_replaced_node(onode, nnode); 516 hammer_delete_node(cursor->trans, onode); 517 518 if (hammer_debug_general & 0x4000) { 519 kprintf("REBLOCK LNODE %016llx -> %016llx\n", 520 (long long)onode->node_offset, 521 (long long)nnode->node_offset); 522 } 523 hammer_modify_node_done(nnode); 524 cursor->node = nnode; 525 526 hammer_unlock(&onode->lock); 527 hammer_rel_node(onode); 528 529 return (error); 530 } 531 532 /* 533 * Reblock a B-Tree internal node. The parent must be adjusted to point to 534 * the new copy of the internal node, and the node's children's parent 535 * pointers must also be adjusted to point to the new copy. 536 * 537 * elm is a pointer to the parent element pointing at cursor.node. 538 */ 539 static int 540 hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, 541 hammer_cursor_t cursor, hammer_btree_elm_t elm) 542 { 543 struct hammer_node_lock lockroot; 544 hammer_node_t onode; 545 hammer_node_t nnode; 546 int error; 547 int i; 548 549 hammer_node_lock_init(&lockroot, cursor->node); 550 error = hammer_btree_lock_children(cursor, 1, &lockroot, NULL); 551 if (error) 552 goto done; 553 554 onode = cursor->node; 555 nnode = hammer_alloc_btree(cursor->trans, 0, &error); 556 557 if (nnode == NULL) 558 goto done; 559 560 /* 561 * Move the node. Adjust the parent's pointer to us first. 562 */ 563 hammer_lock_ex(&nnode->lock); 564 hammer_modify_node_noundo(cursor->trans, nnode); 565 bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); 566 567 if (elm) { 568 /* 569 * We are not the root of the B-Tree 570 */ 571 hammer_modify_node(cursor->trans, cursor->parent, 572 &elm->internal.subtree_offset, 573 sizeof(elm->internal.subtree_offset)); 574 elm->internal.subtree_offset = nnode->node_offset; 575 hammer_modify_node_done(cursor->parent); 576 } else { 577 /* 578 * We are the root of the B-Tree 579 */ 580 hammer_volume_t volume; 581 582 volume = hammer_get_root_volume(cursor->trans->hmp, &error); 583 KKASSERT(error == 0); 584 585 hammer_modify_volume_field(cursor->trans, volume, 586 vol0_btree_root); 587 volume->ondisk->vol0_btree_root = nnode->node_offset; 588 hammer_modify_volume_done(volume); 589 hammer_rel_volume(volume, 0); 590 } 591 592 /* 593 * Now adjust our children's pointers to us. 594 */ 595 for (i = 0; i < nnode->ondisk->count; ++i) { 596 elm = &nnode->ondisk->elms[i]; 597 error = btree_set_parent(cursor->trans, nnode, elm); 598 if (error) 599 panic("reblock internal node: fixup problem"); 600 } 601 602 /* 603 * Clean up. 604 * 605 * The new node replaces the current node in the cursor. The cursor 606 * expects it to be locked so leave it locked. Discard onode. 607 */ 608 hammer_cursor_replaced_node(onode, nnode); 609 hammer_delete_node(cursor->trans, onode); 610 611 if (hammer_debug_general & 0x4000) { 612 kprintf("REBLOCK INODE %016llx -> %016llx\n", 613 (long long)onode->node_offset, 614 (long long)nnode->node_offset); 615 } 616 hammer_modify_node_done(nnode); 617 cursor->node = nnode; 618 619 hammer_unlock(&onode->lock); 620 hammer_rel_node(onode); 621 622 done: 623 hammer_btree_unlock_children(cursor->trans->hmp, &lockroot, NULL); 624 return (error); 625 } 626 627