1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $ 35 */ 36 37 /* 38 * HAMMER blockmap 39 */ 40 #include "hammer.h" 41 42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2); 43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp, 44 hammer_off_t base_offset, int zone, 45 struct hammer_blockmap_layer2 *layer2); 46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv); 47 48 /* 49 * Reserved big-blocks red-black tree support 50 */ 51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node, 52 hammer_res_rb_compare, hammer_off_t, zone_offset); 53 54 static int 55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2) 56 { 57 if (res1->zone_offset < res2->zone_offset) 58 return(-1); 59 if (res1->zone_offset > res2->zone_offset) 60 return(1); 61 return(0); 62 } 63 64 /* 65 * Allocate bytes from a zone 66 */ 67 hammer_off_t 68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, 69 int bytes, int *errorp) 70 { 71 hammer_mount_t hmp; 72 hammer_volume_t root_volume; 73 hammer_blockmap_t blockmap; 74 hammer_blockmap_t freemap; 75 hammer_reserve_t resv; 76 struct hammer_blockmap_layer1 *layer1; 77 struct hammer_blockmap_layer2 *layer2; 78 hammer_buffer_t buffer1 = NULL; 79 hammer_buffer_t buffer2 = NULL; 80 hammer_buffer_t buffer3 = NULL; 81 hammer_off_t tmp_offset; 82 hammer_off_t next_offset; 83 hammer_off_t result_offset; 84 hammer_off_t layer1_offset; 85 hammer_off_t layer2_offset; 86 hammer_off_t base_off; 87 int loops = 0; 88 int offset; /* offset within big-block */ 89 90 hmp = trans->hmp; 91 92 /* 93 * Deal with alignment and buffer-boundary issues. 94 * 95 * Be careful, certain primary alignments are used below to allocate 96 * new blockmap blocks. 97 */ 98 bytes = (bytes + 15) & ~15; 99 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE); 100 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES); 101 102 /* 103 * Setup 104 */ 105 root_volume = trans->rootvol; 106 *errorp = 0; 107 blockmap = &hmp->blockmap[zone]; 108 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 109 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone); 110 111 next_offset = blockmap->next_offset; 112 again: 113 /* 114 * Check for wrap 115 */ 116 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) { 117 if (++loops == 2) { 118 result_offset = 0; 119 *errorp = ENOSPC; 120 goto failed; 121 } 122 next_offset = HAMMER_ZONE_ENCODE(zone, 0); 123 } 124 125 /* 126 * The allocation request may not cross a buffer boundary. Special 127 * large allocations must not cross a large-block boundary. 128 */ 129 tmp_offset = next_offset + bytes - 1; 130 if (bytes <= HAMMER_BUFSIZE) { 131 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) { 132 next_offset = tmp_offset & ~HAMMER_BUFMASK64; 133 goto again; 134 } 135 } else { 136 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) { 137 next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64; 138 goto again; 139 } 140 } 141 offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK; 142 143 /* 144 * Dive layer 1. 145 */ 146 layer1_offset = freemap->phys_offset + 147 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset); 148 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1); 149 if (*errorp) { 150 result_offset = 0; 151 goto failed; 152 } 153 154 /* 155 * Check CRC. 156 */ 157 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) { 158 hammer_lock_ex(&hmp->blkmap_lock); 159 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) 160 panic("CRC FAILED: LAYER1"); 161 hammer_unlock(&hmp->blkmap_lock); 162 } 163 164 /* 165 * If we are at a big-block boundary and layer1 indicates no 166 * free big-blocks, then we cannot allocate a new bigblock in 167 * layer2, skip to the next layer1 entry. 168 */ 169 if (offset == 0 && layer1->blocks_free == 0) { 170 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) & 171 ~HAMMER_BLOCKMAP_LAYER2_MASK; 172 goto again; 173 } 174 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); 175 176 /* 177 * Dive layer 2, each entry represents a large-block. 178 */ 179 layer2_offset = layer1->phys_offset + 180 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset); 181 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2); 182 if (*errorp) { 183 result_offset = 0; 184 goto failed; 185 } 186 187 /* 188 * Check CRC. This can race another thread holding the lock 189 * and in the middle of modifying layer2. 190 */ 191 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) { 192 hammer_lock_ex(&hmp->blkmap_lock); 193 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) 194 panic("CRC FAILED: LAYER2"); 195 hammer_unlock(&hmp->blkmap_lock); 196 } 197 198 /* 199 * Skip the layer if the zone is owned by someone other then us. 200 */ 201 if (layer2->zone && layer2->zone != zone) { 202 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset); 203 goto again; 204 } 205 if (offset < layer2->append_off) { 206 next_offset += layer2->append_off - offset; 207 goto again; 208 } 209 210 /* 211 * We need the lock from this point on. We have to re-check zone 212 * ownership after acquiring the lock and also check for reservations. 213 */ 214 hammer_lock_ex(&hmp->blkmap_lock); 215 216 if (layer2->zone && layer2->zone != zone) { 217 hammer_unlock(&hmp->blkmap_lock); 218 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset); 219 goto again; 220 } 221 if (offset < layer2->append_off) { 222 hammer_unlock(&hmp->blkmap_lock); 223 next_offset += layer2->append_off - offset; 224 goto again; 225 } 226 227 /* 228 * The bigblock might be reserved by another zone. If it is reserved 229 * by our zone we may have to move next_offset past the append_off. 230 */ 231 base_off = (next_offset & 232 (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 233 HAMMER_ZONE_RAW_BUFFER; 234 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off); 235 if (resv) { 236 if (resv->zone != zone) { 237 hammer_unlock(&hmp->blkmap_lock); 238 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) & 239 ~HAMMER_LARGEBLOCK_MASK64; 240 goto again; 241 } 242 if (offset < resv->append_off) { 243 hammer_unlock(&hmp->blkmap_lock); 244 next_offset += resv->append_off - offset; 245 goto again; 246 } 247 ++resv->refs; 248 } 249 250 /* 251 * Ok, we can allocate out of this layer2 big-block. Assume ownership 252 * of the layer for real. At this point we've validated any 253 * reservation that might exist and can just ignore resv. 254 */ 255 if (layer2->zone == 0) { 256 /* 257 * Assign the bigblock to our zone 258 */ 259 hammer_modify_buffer(trans, buffer1, 260 layer1, sizeof(*layer1)); 261 --layer1->blocks_free; 262 layer1->layer1_crc = crc32(layer1, 263 HAMMER_LAYER1_CRCSIZE); 264 hammer_modify_buffer_done(buffer1); 265 hammer_modify_buffer(trans, buffer2, 266 layer2, sizeof(*layer2)); 267 layer2->zone = zone; 268 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE); 269 KKASSERT(layer2->append_off == 0); 270 hammer_modify_volume_field(trans, trans->rootvol, 271 vol0_stat_freebigblocks); 272 --root_volume->ondisk->vol0_stat_freebigblocks; 273 hmp->copy_stat_freebigblocks = 274 root_volume->ondisk->vol0_stat_freebigblocks; 275 hammer_modify_volume_done(trans->rootvol); 276 } else { 277 hammer_modify_buffer(trans, buffer2, 278 layer2, sizeof(*layer2)); 279 } 280 KKASSERT(layer2->zone == zone); 281 282 layer2->bytes_free -= bytes; 283 KKASSERT(layer2->append_off <= offset); 284 layer2->append_off = offset + bytes; 285 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE); 286 hammer_modify_buffer_done(buffer2); 287 KKASSERT(layer2->bytes_free >= 0); 288 289 /* 290 * We hold the blockmap lock and should be the only ones 291 * capable of modifying resv->append_off. Track the allocation 292 * as appropriate. 293 */ 294 KKASSERT(bytes != 0); 295 if (resv) { 296 KKASSERT(resv->append_off <= offset); 297 resv->append_off = offset + bytes; 298 resv->flags &= ~HAMMER_RESF_LAYER2FREE; 299 hammer_blockmap_reserve_complete(hmp, resv); 300 } 301 302 /* 303 * If we are allocating from the base of a new buffer we can avoid 304 * a disk read by calling hammer_bnew(). 305 */ 306 if ((next_offset & HAMMER_BUFMASK) == 0) { 307 hammer_bnew_ext(trans->hmp, next_offset, bytes, 308 errorp, &buffer3); 309 } 310 result_offset = next_offset; 311 312 /* 313 * Process allocated result_offset 314 */ 315 hammer_modify_volume(NULL, root_volume, NULL, 0); 316 blockmap->next_offset = next_offset + bytes; 317 hammer_modify_volume_done(root_volume); 318 hammer_unlock(&hmp->blkmap_lock); 319 failed: 320 321 /* 322 * Cleanup 323 */ 324 if (buffer1) 325 hammer_rel_buffer(buffer1, 0); 326 if (buffer2) 327 hammer_rel_buffer(buffer2, 0); 328 if (buffer3) 329 hammer_rel_buffer(buffer3, 0); 330 331 return(result_offset); 332 } 333 334 /* 335 * Frontend function - Reserve bytes in a zone. 336 * 337 * This code reserves bytes out of a blockmap without committing to any 338 * meta-data modifications, allowing the front-end to directly issue disk 339 * write I/O for large blocks of data 340 * 341 * The backend later finalizes the reservation with hammer_blockmap_finalize() 342 * upon committing the related record. 343 */ 344 hammer_reserve_t 345 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes, 346 hammer_off_t *zone_offp, int *errorp) 347 { 348 hammer_volume_t root_volume; 349 hammer_blockmap_t blockmap; 350 hammer_blockmap_t freemap; 351 struct hammer_blockmap_layer1 *layer1; 352 struct hammer_blockmap_layer2 *layer2; 353 hammer_buffer_t buffer1 = NULL; 354 hammer_buffer_t buffer2 = NULL; 355 hammer_buffer_t buffer3 = NULL; 356 hammer_off_t tmp_offset; 357 hammer_off_t next_offset; 358 hammer_off_t layer1_offset; 359 hammer_off_t layer2_offset; 360 hammer_off_t base_off; 361 hammer_reserve_t resv; 362 hammer_reserve_t resx; 363 int loops = 0; 364 int offset; 365 366 /* 367 * Setup 368 */ 369 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES); 370 root_volume = hammer_get_root_volume(hmp, errorp); 371 if (*errorp) 372 return(NULL); 373 blockmap = &hmp->blockmap[zone]; 374 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 375 KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone); 376 377 /* 378 * Deal with alignment and buffer-boundary issues. 379 * 380 * Be careful, certain primary alignments are used below to allocate 381 * new blockmap blocks. 382 */ 383 bytes = (bytes + 15) & ~15; 384 KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE); 385 386 next_offset = blockmap->next_offset; 387 again: 388 resv = NULL; 389 /* 390 * Check for wrap 391 */ 392 if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) { 393 if (++loops == 2) { 394 *errorp = ENOSPC; 395 goto failed; 396 } 397 next_offset = HAMMER_ZONE_ENCODE(zone, 0); 398 } 399 400 /* 401 * The allocation request may not cross a buffer boundary. Special 402 * large allocations must not cross a large-block boundary. 403 */ 404 tmp_offset = next_offset + bytes - 1; 405 if (bytes <= HAMMER_BUFSIZE) { 406 if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) { 407 next_offset = tmp_offset & ~HAMMER_BUFMASK64; 408 goto again; 409 } 410 } else { 411 if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) { 412 next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64; 413 goto again; 414 } 415 } 416 offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK; 417 418 /* 419 * Dive layer 1. 420 */ 421 layer1_offset = freemap->phys_offset + 422 HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset); 423 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1); 424 if (*errorp) 425 goto failed; 426 427 /* 428 * Check CRC. 429 */ 430 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) { 431 hammer_lock_ex(&hmp->blkmap_lock); 432 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) 433 panic("CRC FAILED: LAYER1"); 434 hammer_unlock(&hmp->blkmap_lock); 435 } 436 437 /* 438 * If we are at a big-block boundary and layer1 indicates no 439 * free big-blocks, then we cannot allocate a new bigblock in 440 * layer2, skip to the next layer1 entry. 441 */ 442 if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 && 443 layer1->blocks_free == 0) { 444 next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) & 445 ~HAMMER_BLOCKMAP_LAYER2_MASK; 446 goto again; 447 } 448 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); 449 450 /* 451 * Dive layer 2, each entry represents a large-block. 452 */ 453 layer2_offset = layer1->phys_offset + 454 HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset); 455 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2); 456 if (*errorp) 457 goto failed; 458 459 /* 460 * Check CRC if not allocating into uninitialized space (which we 461 * aren't when reserving space). 462 */ 463 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) { 464 hammer_lock_ex(&hmp->blkmap_lock); 465 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) 466 panic("CRC FAILED: LAYER2"); 467 hammer_unlock(&hmp->blkmap_lock); 468 } 469 470 /* 471 * Skip the layer if the zone is owned by someone other then us. 472 */ 473 if (layer2->zone && layer2->zone != zone) { 474 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset); 475 goto again; 476 } 477 if (offset < layer2->append_off) { 478 next_offset += layer2->append_off - offset; 479 goto again; 480 } 481 482 /* 483 * We need the lock from this point on. We have to re-check zone 484 * ownership after acquiring the lock and also check for reservations. 485 */ 486 hammer_lock_ex(&hmp->blkmap_lock); 487 488 if (layer2->zone && layer2->zone != zone) { 489 hammer_unlock(&hmp->blkmap_lock); 490 next_offset += (HAMMER_LARGEBLOCK_SIZE - offset); 491 goto again; 492 } 493 if (offset < layer2->append_off) { 494 hammer_unlock(&hmp->blkmap_lock); 495 next_offset += layer2->append_off - offset; 496 goto again; 497 } 498 499 /* 500 * The bigblock might be reserved by another zone. If it is reserved 501 * by our zone we may have to move next_offset past the append_off. 502 */ 503 base_off = (next_offset & 504 (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | 505 HAMMER_ZONE_RAW_BUFFER; 506 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off); 507 if (resv) { 508 if (resv->zone != zone) { 509 hammer_unlock(&hmp->blkmap_lock); 510 next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) & 511 ~HAMMER_LARGEBLOCK_MASK64; 512 goto again; 513 } 514 if (offset < resv->append_off) { 515 hammer_unlock(&hmp->blkmap_lock); 516 next_offset += resv->append_off - offset; 517 goto again; 518 } 519 ++resv->refs; 520 resx = NULL; 521 } else { 522 resx = kmalloc(sizeof(*resv), hmp->m_misc, 523 M_WAITOK | M_ZERO | M_USE_RESERVE); 524 resx->refs = 1; 525 resx->zone = zone; 526 resx->zone_offset = base_off; 527 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) 528 resx->flags |= HAMMER_RESF_LAYER2FREE; 529 resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx); 530 KKASSERT(resv == NULL); 531 resv = resx; 532 ++hammer_count_reservations; 533 } 534 resv->append_off = offset + bytes; 535 536 /* 537 * If we are not reserving a whole buffer but are at the start of 538 * a new block, call hammer_bnew() to avoid a disk read. 539 * 540 * If we are reserving a whole buffer (or more), the caller will 541 * probably use a direct read, so do nothing. 542 */ 543 if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) { 544 hammer_bnew(hmp, next_offset, errorp, &buffer3); 545 } 546 547 /* 548 * Adjust our iterator and alloc_offset. The layer1 and layer2 549 * space beyond alloc_offset is uninitialized. alloc_offset must 550 * be big-block aligned. 551 */ 552 blockmap->next_offset = next_offset + bytes; 553 hammer_unlock(&hmp->blkmap_lock); 554 555 failed: 556 if (buffer1) 557 hammer_rel_buffer(buffer1, 0); 558 if (buffer2) 559 hammer_rel_buffer(buffer2, 0); 560 if (buffer3) 561 hammer_rel_buffer(buffer3, 0); 562 hammer_rel_volume(root_volume, 0); 563 *zone_offp = next_offset; 564 565 return(resv); 566 } 567 568 #if 0 569 /* 570 * Backend function - undo a portion of a reservation. 571 */ 572 void 573 hammer_blockmap_reserve_undo(hammer_mount_t hmp, hammer_reserve_t resv, 574 hammer_off_t zone_offset, int bytes) 575 { 576 resv->bytes_freed += bytes; 577 } 578 579 #endif 580 581 /* 582 * Dereference a reservation structure. Upon the final release the 583 * underlying big-block is checked and if it is entirely free we delete 584 * any related HAMMER buffers to avoid potential conflicts with future 585 * reuse of the big-block. 586 */ 587 void 588 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv) 589 { 590 hammer_off_t base_offset; 591 int error; 592 593 KKASSERT(resv->refs > 0); 594 KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) == 595 HAMMER_ZONE_RAW_BUFFER); 596 597 /* 598 * Setting append_off to the max prevents any new allocations 599 * from occuring while we are trying to dispose of the reservation, 600 * allowing us to safely delete any related HAMMER buffers. 601 * 602 * If we are unable to clean out all related HAMMER buffers we 603 * requeue the delay. 604 */ 605 if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) { 606 resv->append_off = HAMMER_LARGEBLOCK_SIZE; 607 base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK; 608 base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset); 609 error = hammer_del_buffers(hmp, base_offset, 610 resv->zone_offset, 611 HAMMER_LARGEBLOCK_SIZE, 612 0); 613 if (error) 614 hammer_reserve_setdelay(hmp, resv); 615 } 616 if (--resv->refs == 0) { 617 KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0); 618 RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv); 619 kfree(resv, hmp->m_misc); 620 --hammer_count_reservations; 621 } 622 } 623 624 /* 625 * Prevent a potentially free big-block from being reused until after 626 * the related flushes have completely cycled, otherwise crash recovery 627 * could resurrect a data block that was already reused and overwritten. 628 * 629 * The caller might reset the underlying layer2 entry's append_off to 0, so 630 * our covering append_off must be set to max to prevent any reallocation 631 * until after the flush delays complete, not to mention proper invalidation 632 * of any underlying cached blocks. 633 */ 634 static void 635 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset, 636 int zone, struct hammer_blockmap_layer2 *layer2) 637 { 638 hammer_reserve_t resv; 639 640 /* 641 * Allocate the reservation if necessary. 642 * 643 * NOTE: need lock in future around resv lookup/allocation and 644 * the setdelay call, currently refs is not bumped until the call. 645 */ 646 again: 647 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset); 648 if (resv == NULL) { 649 resv = kmalloc(sizeof(*resv), hmp->m_misc, 650 M_WAITOK | M_ZERO | M_USE_RESERVE); 651 resv->zone = zone; 652 resv->zone_offset = base_offset; 653 resv->refs = 0; 654 resv->append_off = HAMMER_LARGEBLOCK_SIZE; 655 656 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) 657 resv->flags |= HAMMER_RESF_LAYER2FREE; 658 if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) { 659 kfree(resv, hmp->m_misc); 660 goto again; 661 } 662 ++hammer_count_reservations; 663 } else { 664 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) 665 resv->flags |= HAMMER_RESF_LAYER2FREE; 666 } 667 hammer_reserve_setdelay(hmp, resv); 668 } 669 670 /* 671 * Enter the reservation on the on-delay list, or move it if it 672 * is already on the list. 673 */ 674 static void 675 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv) 676 { 677 if (resv->flags & HAMMER_RESF_ONDELAY) { 678 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry); 679 resv->flush_group = hmp->flusher.next + 1; 680 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry); 681 } else { 682 ++resv->refs; 683 ++hmp->rsv_fromdelay; 684 resv->flags |= HAMMER_RESF_ONDELAY; 685 resv->flush_group = hmp->flusher.next + 1; 686 TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry); 687 } 688 } 689 690 void 691 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv) 692 { 693 KKASSERT(resv->flags & HAMMER_RESF_ONDELAY); 694 resv->flags &= ~HAMMER_RESF_ONDELAY; 695 TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry); 696 --hmp->rsv_fromdelay; 697 hammer_blockmap_reserve_complete(hmp, resv); 698 } 699 700 /* 701 * Backend function - free (offset, bytes) in a zone. 702 * 703 * XXX error return 704 */ 705 void 706 hammer_blockmap_free(hammer_transaction_t trans, 707 hammer_off_t zone_offset, int bytes) 708 { 709 hammer_mount_t hmp; 710 hammer_volume_t root_volume; 711 hammer_blockmap_t blockmap; 712 hammer_blockmap_t freemap; 713 struct hammer_blockmap_layer1 *layer1; 714 struct hammer_blockmap_layer2 *layer2; 715 hammer_buffer_t buffer1 = NULL; 716 hammer_buffer_t buffer2 = NULL; 717 hammer_off_t layer1_offset; 718 hammer_off_t layer2_offset; 719 hammer_off_t base_off; 720 int error; 721 int zone; 722 723 if (bytes == 0) 724 return; 725 hmp = trans->hmp; 726 727 /* 728 * Alignment 729 */ 730 bytes = (bytes + 15) & ~15; 731 KKASSERT(bytes <= HAMMER_XBUFSIZE); 732 KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) & 733 ~HAMMER_LARGEBLOCK_MASK64) == 0); 734 735 /* 736 * Basic zone validation & locking 737 */ 738 zone = HAMMER_ZONE_DECODE(zone_offset); 739 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES); 740 root_volume = trans->rootvol; 741 error = 0; 742 743 blockmap = &hmp->blockmap[zone]; 744 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 745 746 /* 747 * Dive layer 1. 748 */ 749 layer1_offset = freemap->phys_offset + 750 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset); 751 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1); 752 if (error) 753 goto failed; 754 KKASSERT(layer1->phys_offset && 755 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); 756 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) { 757 hammer_lock_ex(&hmp->blkmap_lock); 758 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) 759 panic("CRC FAILED: LAYER1"); 760 hammer_unlock(&hmp->blkmap_lock); 761 } 762 763 /* 764 * Dive layer 2, each entry represents a large-block. 765 */ 766 layer2_offset = layer1->phys_offset + 767 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset); 768 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2); 769 if (error) 770 goto failed; 771 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) { 772 hammer_lock_ex(&hmp->blkmap_lock); 773 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) 774 panic("CRC FAILED: LAYER2"); 775 hammer_unlock(&hmp->blkmap_lock); 776 } 777 778 hammer_lock_ex(&hmp->blkmap_lock); 779 780 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2)); 781 782 /* 783 * Free space previously allocated via blockmap_alloc(). 784 */ 785 KKASSERT(layer2->zone == zone); 786 layer2->bytes_free += bytes; 787 KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE); 788 789 /* 790 * If a big-block becomes entirely free we must create a covering 791 * reservation to prevent premature reuse. Note, however, that 792 * the big-block and/or reservation may still have an append_off 793 * that allows further (non-reused) allocations. 794 * 795 * Once the reservation has been made we re-check layer2 and if 796 * the big-block is still entirely free we reset the layer2 entry. 797 * The reservation will prevent premature reuse. 798 * 799 * NOTE: hammer_buffer's are only invalidated when the reservation 800 * is completed, if the layer2 entry is still completely free at 801 * that time. Any allocations from the reservation that may have 802 * occured in the mean time, or active references on the reservation 803 * from new pending allocations, will prevent the invalidation from 804 * occuring. 805 */ 806 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) { 807 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER; 808 809 hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2); 810 if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) { 811 layer2->zone = 0; 812 layer2->append_off = 0; 813 hammer_modify_buffer(trans, buffer1, 814 layer1, sizeof(*layer1)); 815 ++layer1->blocks_free; 816 layer1->layer1_crc = crc32(layer1, 817 HAMMER_LAYER1_CRCSIZE); 818 hammer_modify_buffer_done(buffer1); 819 hammer_modify_volume_field(trans, 820 trans->rootvol, 821 vol0_stat_freebigblocks); 822 ++root_volume->ondisk->vol0_stat_freebigblocks; 823 hmp->copy_stat_freebigblocks = 824 root_volume->ondisk->vol0_stat_freebigblocks; 825 hammer_modify_volume_done(trans->rootvol); 826 } 827 } 828 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE); 829 hammer_modify_buffer_done(buffer2); 830 hammer_unlock(&hmp->blkmap_lock); 831 832 failed: 833 if (buffer1) 834 hammer_rel_buffer(buffer1, 0); 835 if (buffer2) 836 hammer_rel_buffer(buffer2, 0); 837 } 838 839 /* 840 * Backend function - finalize (offset, bytes) in a zone. 841 * 842 * Allocate space that was previously reserved by the frontend. 843 */ 844 int 845 hammer_blockmap_finalize(hammer_transaction_t trans, 846 hammer_reserve_t resv, 847 hammer_off_t zone_offset, int bytes) 848 { 849 hammer_mount_t hmp; 850 hammer_volume_t root_volume; 851 hammer_blockmap_t blockmap; 852 hammer_blockmap_t freemap; 853 struct hammer_blockmap_layer1 *layer1; 854 struct hammer_blockmap_layer2 *layer2; 855 hammer_buffer_t buffer1 = NULL; 856 hammer_buffer_t buffer2 = NULL; 857 hammer_off_t layer1_offset; 858 hammer_off_t layer2_offset; 859 int error; 860 int zone; 861 int offset; 862 863 if (bytes == 0) 864 return(0); 865 hmp = trans->hmp; 866 867 /* 868 * Alignment 869 */ 870 bytes = (bytes + 15) & ~15; 871 KKASSERT(bytes <= HAMMER_XBUFSIZE); 872 873 /* 874 * Basic zone validation & locking 875 */ 876 zone = HAMMER_ZONE_DECODE(zone_offset); 877 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES); 878 root_volume = trans->rootvol; 879 error = 0; 880 881 blockmap = &hmp->blockmap[zone]; 882 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 883 884 /* 885 * Dive layer 1. 886 */ 887 layer1_offset = freemap->phys_offset + 888 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset); 889 layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1); 890 if (error) 891 goto failed; 892 KKASSERT(layer1->phys_offset && 893 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); 894 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) { 895 hammer_lock_ex(&hmp->blkmap_lock); 896 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) 897 panic("CRC FAILED: LAYER1"); 898 hammer_unlock(&hmp->blkmap_lock); 899 } 900 901 /* 902 * Dive layer 2, each entry represents a large-block. 903 */ 904 layer2_offset = layer1->phys_offset + 905 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset); 906 layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2); 907 if (error) 908 goto failed; 909 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) { 910 hammer_lock_ex(&hmp->blkmap_lock); 911 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) 912 panic("CRC FAILED: LAYER2"); 913 hammer_unlock(&hmp->blkmap_lock); 914 } 915 916 hammer_lock_ex(&hmp->blkmap_lock); 917 918 hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2)); 919 920 /* 921 * Finalize some or all of the space covered by a current 922 * reservation. An allocation in the same layer may have 923 * already assigned ownership. 924 */ 925 if (layer2->zone == 0) { 926 hammer_modify_buffer(trans, buffer1, 927 layer1, sizeof(*layer1)); 928 --layer1->blocks_free; 929 layer1->layer1_crc = crc32(layer1, 930 HAMMER_LAYER1_CRCSIZE); 931 hammer_modify_buffer_done(buffer1); 932 layer2->zone = zone; 933 KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE); 934 KKASSERT(layer2->append_off == 0); 935 hammer_modify_volume_field(trans, 936 trans->rootvol, 937 vol0_stat_freebigblocks); 938 --root_volume->ondisk->vol0_stat_freebigblocks; 939 hmp->copy_stat_freebigblocks = 940 root_volume->ondisk->vol0_stat_freebigblocks; 941 hammer_modify_volume_done(trans->rootvol); 942 } 943 if (layer2->zone != zone) 944 kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone); 945 KKASSERT(layer2->zone == zone); 946 KKASSERT(bytes != 0); 947 layer2->bytes_free -= bytes; 948 if (resv) 949 resv->flags &= ~HAMMER_RESF_LAYER2FREE; 950 951 /* 952 * Finalizations can occur out of order, or combined with allocations. 953 * append_off must be set to the highest allocated offset. 954 */ 955 offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes; 956 if (layer2->append_off < offset) 957 layer2->append_off = offset; 958 959 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE); 960 hammer_modify_buffer_done(buffer2); 961 hammer_unlock(&hmp->blkmap_lock); 962 963 failed: 964 if (buffer1) 965 hammer_rel_buffer(buffer1, 0); 966 if (buffer2) 967 hammer_rel_buffer(buffer2, 0); 968 return(error); 969 } 970 971 /* 972 * Return the number of free bytes in the big-block containing the 973 * specified blockmap offset. 974 */ 975 int 976 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset, 977 int *curp, int *errorp) 978 { 979 hammer_volume_t root_volume; 980 hammer_blockmap_t blockmap; 981 hammer_blockmap_t freemap; 982 struct hammer_blockmap_layer1 *layer1; 983 struct hammer_blockmap_layer2 *layer2; 984 hammer_buffer_t buffer = NULL; 985 hammer_off_t layer1_offset; 986 hammer_off_t layer2_offset; 987 int bytes; 988 int zone; 989 990 zone = HAMMER_ZONE_DECODE(zone_offset); 991 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES); 992 root_volume = hammer_get_root_volume(hmp, errorp); 993 if (*errorp) { 994 *curp = 0; 995 return(0); 996 } 997 blockmap = &hmp->blockmap[zone]; 998 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 999 1000 /* 1001 * Dive layer 1. 1002 */ 1003 layer1_offset = freemap->phys_offset + 1004 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset); 1005 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer); 1006 if (*errorp) { 1007 bytes = 0; 1008 goto failed; 1009 } 1010 KKASSERT(layer1->phys_offset); 1011 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) { 1012 hammer_lock_ex(&hmp->blkmap_lock); 1013 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) 1014 panic("CRC FAILED: LAYER1"); 1015 hammer_unlock(&hmp->blkmap_lock); 1016 } 1017 1018 /* 1019 * Dive layer 2, each entry represents a large-block. 1020 * 1021 * (reuse buffer, layer1 pointer becomes invalid) 1022 */ 1023 layer2_offset = layer1->phys_offset + 1024 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset); 1025 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer); 1026 if (*errorp) { 1027 bytes = 0; 1028 goto failed; 1029 } 1030 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) { 1031 hammer_lock_ex(&hmp->blkmap_lock); 1032 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) 1033 panic("CRC FAILED: LAYER2"); 1034 hammer_unlock(&hmp->blkmap_lock); 1035 } 1036 KKASSERT(layer2->zone == zone); 1037 1038 bytes = layer2->bytes_free; 1039 1040 if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64) 1041 *curp = 0; 1042 else 1043 *curp = 1; 1044 failed: 1045 if (buffer) 1046 hammer_rel_buffer(buffer, 0); 1047 hammer_rel_volume(root_volume, 0); 1048 if (hammer_debug_general & 0x0800) { 1049 kprintf("hammer_blockmap_getfree: %016llx -> %d\n", 1050 zone_offset, bytes); 1051 } 1052 return(bytes); 1053 } 1054 1055 1056 /* 1057 * Lookup a blockmap offset. 1058 */ 1059 hammer_off_t 1060 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset, 1061 int *errorp) 1062 { 1063 hammer_volume_t root_volume; 1064 hammer_blockmap_t freemap; 1065 struct hammer_blockmap_layer1 *layer1; 1066 struct hammer_blockmap_layer2 *layer2; 1067 hammer_buffer_t buffer = NULL; 1068 hammer_off_t layer1_offset; 1069 hammer_off_t layer2_offset; 1070 hammer_off_t result_offset; 1071 hammer_off_t base_off; 1072 hammer_reserve_t resv; 1073 int zone; 1074 1075 /* 1076 * Calculate the zone-2 offset. 1077 */ 1078 zone = HAMMER_ZONE_DECODE(zone_offset); 1079 KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES); 1080 1081 result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) | 1082 HAMMER_ZONE_RAW_BUFFER; 1083 1084 /* 1085 * We can actually stop here, normal blockmaps are now direct-mapped 1086 * onto the freemap and so represent zone-2 addresses. 1087 */ 1088 if (hammer_verify_zone == 0) { 1089 *errorp = 0; 1090 return(result_offset); 1091 } 1092 1093 /* 1094 * Validate the allocation zone 1095 */ 1096 root_volume = hammer_get_root_volume(hmp, errorp); 1097 if (*errorp) 1098 return(0); 1099 freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; 1100 KKASSERT(freemap->phys_offset != 0); 1101 1102 /* 1103 * Dive layer 1. 1104 */ 1105 layer1_offset = freemap->phys_offset + 1106 HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset); 1107 layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer); 1108 if (*errorp) 1109 goto failed; 1110 KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); 1111 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) { 1112 hammer_lock_ex(&hmp->blkmap_lock); 1113 if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) 1114 panic("CRC FAILED: LAYER1"); 1115 hammer_unlock(&hmp->blkmap_lock); 1116 } 1117 1118 /* 1119 * Dive layer 2, each entry represents a large-block. 1120 */ 1121 layer2_offset = layer1->phys_offset + 1122 HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset); 1123 layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer); 1124 1125 if (*errorp) 1126 goto failed; 1127 if (layer2->zone == 0) { 1128 base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER; 1129 resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, 1130 base_off); 1131 KKASSERT(resv && resv->zone == zone); 1132 1133 } else if (layer2->zone != zone) { 1134 panic("hammer_blockmap_lookup: bad zone %d/%d\n", 1135 layer2->zone, zone); 1136 } 1137 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) { 1138 hammer_lock_ex(&hmp->blkmap_lock); 1139 if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) 1140 panic("CRC FAILED: LAYER2"); 1141 hammer_unlock(&hmp->blkmap_lock); 1142 } 1143 1144 failed: 1145 if (buffer) 1146 hammer_rel_buffer(buffer, 0); 1147 hammer_rel_volume(root_volume, 0); 1148 if (hammer_debug_general & 0x0800) { 1149 kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n", 1150 zone_offset, result_offset); 1151 } 1152 return(result_offset); 1153 } 1154 1155 1156 /* 1157 * Check space availability 1158 */ 1159 int 1160 hammer_checkspace(hammer_mount_t hmp, int slop) 1161 { 1162 const int in_size = sizeof(struct hammer_inode_data) + 1163 sizeof(union hammer_btree_elm); 1164 const int rec_size = (sizeof(union hammer_btree_elm) * 2); 1165 int64_t usedbytes; 1166 1167 usedbytes = hmp->rsv_inodes * in_size + 1168 hmp->rsv_recs * rec_size + 1169 hmp->rsv_databytes + 1170 ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) + 1171 ((int64_t)hidirtybufspace << 2) + 1172 (slop << HAMMER_LARGEBLOCK_BITS); 1173 1174 hammer_count_extra_space_used = usedbytes; /* debugging */ 1175 1176 if (hmp->copy_stat_freebigblocks >= 1177 (usedbytes >> HAMMER_LARGEBLOCK_BITS)) { 1178 return(0); 1179 } 1180 return (ENOSPC); 1181 } 1182 1183