1 /* 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2023 Tomohiro Kusumi <tkusumi@netbsd.org> 5 * Copyright (c) 2011-2023 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Matthew Dillon <dillon@dragonflybsd.org> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 /* 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/proc.h> 42 #include <sys/mount.h> 43 #include <vm/vm_kern.h> 44 #include <vm/vm_extern.h> 45 */ 46 47 #include "hammer2.h" 48 49 /* 50 * breadth-first search 51 */ 52 typedef struct hammer2_chain_save { 53 TAILQ_ENTRY(hammer2_chain_save) entry; 54 hammer2_chain_t *chain; 55 } hammer2_chain_save_t; 56 57 TAILQ_HEAD(hammer2_chain_save_list, hammer2_chain_save); 58 typedef struct hammer2_chain_save_list hammer2_chain_save_list_t; 59 60 typedef struct hammer2_bulkfree_info { 61 hammer2_dev_t *hmp; 62 //kmem_anon_desc_t kp; 63 hammer2_off_t sbase; /* sub-loop iteration */ 64 hammer2_off_t sstop; 65 hammer2_bmap_data_t *bmap; 66 int depth; 67 long count_10_00; /* staged->free */ 68 long count_11_10; /* allocated->staged */ 69 long count_00_11; /* (should not happen) */ 70 long count_01_11; /* (should not happen) */ 71 long count_10_11; /* staged->allocated */ 72 long count_l0cleans; 73 long count_linadjusts; 74 long count_inodes_scanned; 75 long count_dirents_scanned; 76 long count_dedup_factor; 77 long count_bytes_scanned; 78 long count_chains_scanned; 79 long count_chains_reported; 80 long bulkfree_calls; 81 int bulkfree_ticks; 82 int list_alert; 83 hammer2_off_t adj_free; 84 hammer2_tid_t mtid; 85 time_t save_time; 86 hammer2_chain_save_list_t list; 87 long list_count; 88 long list_count_max; 89 hammer2_chain_save_t *backout; /* ins pt while backing out */ 90 hammer2_dedup_t *dedup; 91 int pri; 92 } hammer2_bulkfree_info_t; 93 94 static int h2_bulkfree_test(hammer2_bulkfree_info_t *info, 95 hammer2_blockref_t *bref, int pri, int saved_error); 96 static uint32_t bigmask_get(hammer2_bmap_data_t *bmap); 97 static int bigmask_good(hammer2_bmap_data_t *bmap, uint32_t live_bigmask); 98 99 /* 100 * General bulk scan function with callback. Called with a referenced 101 * but UNLOCKED parent. The parent is returned in the same state. 102 */ 103 static 104 int 105 hammer2_bulkfree_scan(hammer2_chain_t *parent, 106 int (*func)(hammer2_bulkfree_info_t *info, 107 hammer2_blockref_t *bref), 108 hammer2_bulkfree_info_t *info) 109 { 110 hammer2_blockref_t bref; 111 hammer2_chain_t *chain; 112 hammer2_chain_save_t *tail; 113 hammer2_chain_save_t *save; 114 int first = 1; 115 int rup_error; 116 int error; 117 int e2; 118 119 ++info->pri; 120 121 chain = NULL; 122 rup_error = 0; 123 error = 0; 124 125 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 126 HAMMER2_RESOLVE_SHARED); 127 128 /* 129 * End of scan if parent is a PFS 130 */ 131 tail = TAILQ_FIRST(&info->list); 132 133 /* 134 * The parent was previously retrieved NODATA and thus has not 135 * tested the CRC. Now that we have locked it normally, check 136 * for a CRC problem and skip it if we found one. The bulk scan 137 * cannot safely traverse invalid block tables (we could end up 138 * in an endless loop or cause a panic). 139 */ 140 if (parent->error & HAMMER2_ERROR_CHECK) { 141 error = parent->error; 142 goto done; 143 } 144 145 /* 146 * Report which PFS is being scanned 147 */ 148 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 149 (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT)) { 150 kprintf("hammer2_bulkfree: Scanning %s\n", 151 parent->data->ipdata.filename); 152 } 153 154 /* 155 * Generally loop on the contents if we have not been flagged 156 * for abort. 157 * 158 * Remember that these chains are completely isolated from 159 * the frontend, so we can release locks temporarily without 160 * imploding. 161 */ 162 for (;;) { 163 error |= hammer2_chain_scan(parent, &chain, &bref, &first, 164 HAMMER2_LOOKUP_NODATA | 165 HAMMER2_LOOKUP_SHARED); 166 167 /* 168 * Handle EOF or other error at current level. This stops 169 * the bulkfree scan. 170 */ 171 if (error & ~HAMMER2_ERROR_CHECK) 172 break; 173 174 /* 175 * Account for dirents before thre data_off test, since most 176 * dirents do not need a data reference. 177 */ 178 if (bref.type == HAMMER2_BREF_TYPE_DIRENT) 179 ++info->count_dirents_scanned; 180 181 /* 182 * Ignore brefs without data (typically dirents) 183 */ 184 if ((bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 185 continue; 186 187 /* 188 * Process bref, chain is only non-NULL if the bref 189 * might be recursable (its possible that we sometimes get 190 * a non-NULL chain where the bref cannot be recursed). 191 * 192 * If we already ran down this tree we do not have to do it 193 * again, but we must still recover any cumulative error 194 * recorded from the time we did. 195 */ 196 ++info->pri; 197 e2 = h2_bulkfree_test(info, &bref, 1, 0); 198 if (e2) { 199 error |= e2 & ~HAMMER2_ERROR_EOF; 200 continue; 201 } 202 203 if (bref.type == HAMMER2_BREF_TYPE_INODE) 204 ++info->count_inodes_scanned; 205 206 error |= func(info, &bref); 207 if (error & ~HAMMER2_ERROR_CHECK) 208 break; 209 210 /* 211 * A non-null chain is always returned if it is 212 * recursive, otherwise a non-null chain might be 213 * returned but usually is not when not recursive. 214 */ 215 if (chain == NULL) 216 continue; 217 218 info->count_bytes_scanned += chain->bytes; 219 ++info->count_chains_scanned; 220 221 if (info->count_chains_scanned >= 222 info->count_chains_reported + 1000000 || 223 (info->count_chains_scanned < 1000000 && 224 info->count_chains_scanned >= 225 info->count_chains_reported + 100000)) { 226 kprintf(" chains %-7ld inodes %-7ld " 227 "dirents %-7ld bytes %5ldMB\n", 228 info->count_chains_scanned, 229 info->count_inodes_scanned, 230 info->count_dirents_scanned, 231 info->count_bytes_scanned / 1000000); 232 info->count_chains_reported = 233 info->count_chains_scanned; 234 } 235 236 /* 237 * Else check type and setup depth-first scan. 238 * 239 * Account for bytes actually read. 240 */ 241 switch(chain->bref.type) { 242 case HAMMER2_BREF_TYPE_INODE: 243 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 244 case HAMMER2_BREF_TYPE_INDIRECT: 245 case HAMMER2_BREF_TYPE_VOLUME: 246 case HAMMER2_BREF_TYPE_FREEMAP: 247 ++info->depth; 248 if (chain->error & HAMMER2_ERROR_CHECK) { 249 /* 250 * Cannot safely recurse chains with crc 251 * errors, even in emergency mode. 252 */ 253 /* NOP */ 254 } else if (info->depth > 16 || 255 info->backout || 256 (info->depth > hammer2_limit_saved_depth && 257 info->list_count >= 258 (hammer2_limit_saved_chains >> 2))) 259 { 260 /* 261 * We must defer the recursion if it runs 262 * too deep or if too many saved chains are 263 * allocated. 264 * 265 * In the case of too many saved chains, we 266 * have to stop recursing ASAP to avoid an 267 * explosion of memory use since each radix 268 * level can hold 512 elements. 269 * 270 * If we had to defer at a deeper level 271 * backout is non-NULL. We must backout 272 * completely before resuming. 273 */ 274 if (info->list_count > 275 hammer2_limit_saved_chains && 276 info->list_alert == 0) 277 { 278 kprintf("hammer2: during bulkfree, " 279 "saved chains exceeded %ld " 280 "at depth %d, " 281 "backing off to less-efficient " 282 "operation\n", 283 hammer2_limit_saved_chains, 284 info->depth); 285 info->list_alert = 1; 286 } 287 288 /* 289 * Must be placed at head so pfsroot scan 290 * can exhaust saved elements for that pfs 291 * first. 292 * 293 * Must be placed at head for depth-first 294 * recovery when too many saved chains, to 295 * limit number of chains saved during 296 * saved-chain reruns. The worst-case excess 297 * is (maximum_depth * 512) saved chains above 298 * the threshold. 299 * 300 * The maximum_depth generally occurs in the 301 * inode index and can be fairly deep once 302 * the radix tree becomes a bit fragmented. 303 * nominally 100M inodes would be only 4 deep, 304 * plus a maximally sized file would be another 305 * 8 deep, but with fragmentation it can wind 306 * up being a lot more. 307 * 308 * However, when backing out, we have to place 309 * all the entries in each parent node not 310 * yet processed on the list too, and because 311 * these entries are shallower they must be 312 * placed after each other in order to maintain 313 * our depth-first processing. 314 */ 315 save = kmalloc(sizeof(*save), M_HAMMER2, 316 M_WAITOK | M_ZERO); 317 save->chain = chain; 318 hammer2_chain_ref(chain); 319 320 if (info->backout) { 321 TAILQ_INSERT_AFTER(&info->list, 322 info->backout, 323 save, entry); 324 } else { 325 TAILQ_INSERT_HEAD(&info->list, 326 save, entry); 327 } 328 info->backout = save; 329 ++info->list_count; 330 if (info->list_count_max < info->list_count) 331 info->list_count_max = info->list_count; 332 333 /* guess */ 334 info->pri += 10; 335 } else { 336 int savepri = info->pri; 337 338 hammer2_chain_unlock(chain); 339 hammer2_chain_unlock(parent); 340 info->pri = 0; 341 rup_error |= hammer2_bulkfree_scan(chain, 342 func, info); 343 info->pri += savepri; 344 hammer2_chain_lock(parent, 345 HAMMER2_RESOLVE_ALWAYS | 346 HAMMER2_RESOLVE_SHARED); 347 hammer2_chain_lock(chain, 348 HAMMER2_RESOLVE_ALWAYS | 349 HAMMER2_RESOLVE_SHARED); 350 } 351 --info->depth; 352 break; 353 case HAMMER2_BREF_TYPE_DATA: 354 break; 355 default: 356 /* does not recurse */ 357 break; 358 } 359 if (rup_error & HAMMER2_ERROR_ABORTED) 360 break; 361 } 362 if (chain) { 363 hammer2_chain_unlock(chain); 364 hammer2_chain_drop(chain); 365 } 366 367 /* 368 * If this is a PFSROOT, also re-run any defered elements 369 * added during our scan so we can report any cumulative errors 370 * for the PFS. 371 */ 372 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 373 (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT)) { 374 for (;;) { 375 int opri; 376 377 save = TAILQ_FIRST(&info->list); 378 if (save == tail) /* exhaust this PFS only */ 379 break; 380 381 TAILQ_REMOVE(&info->list, save, entry); 382 info->backout = NULL; 383 --info->list_count; 384 opri = info->pri; 385 info->pri = 0; 386 rup_error |= hammer2_bulkfree_scan(save->chain, func, info); 387 hammer2_chain_drop(save->chain); 388 kfree(save, M_HAMMER2); 389 info->pri = opri; 390 } 391 } 392 393 error |= rup_error; 394 395 /* 396 * Report which PFS the errors were encountered in. 397 */ 398 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 399 (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) && 400 (error & ~HAMMER2_ERROR_EOF)) { 401 kprintf("hammer2_bulkfree: Encountered errors (%08x) " 402 "while scanning \"%s\"\n", 403 error, parent->data->ipdata.filename); 404 } 405 406 /* 407 * Save with higher pri now that we know what it is. 408 */ 409 h2_bulkfree_test(info, &parent->bref, info->pri + 1, 410 (error & ~HAMMER2_ERROR_EOF)); 411 412 done: 413 hammer2_chain_unlock(parent); 414 415 return (error & ~HAMMER2_ERROR_EOF); 416 } 417 418 /* 419 * Bulkfree algorithm 420 * 421 * Repeat { 422 * Chain flush (partial synchronization) XXX removed 423 * Scan the whole topology - build in-memory freemap (mark 11) 424 * Reconcile the in-memory freemap against the on-disk freemap. 425 * ondisk xx -> ondisk 11 (if allocated) 426 * ondisk 11 -> ondisk 10 (if free in-memory) 427 * ondisk 10 -> ondisk 00 (if free in-memory) - on next pass 428 * } 429 * 430 * The topology scan may have to be performed multiple times to window 431 * freemaps which are too large to fit in kernel memory. 432 * 433 * Races are handled using a double-transition (11->10, 10->00). The bulkfree 434 * scan snapshots the volume root's blockset and thus can run concurrent with 435 * normal operations, as long as a full flush is made between each pass to 436 * synchronize any modified chains (otherwise their blocks might be improperly 437 * freed). 438 * 439 * Temporary memory in multiples of 32KB is required to reconstruct the leaf 440 * hammer2_bmap_data blocks so they can later be compared against the live 441 * freemap. Each 32KB represents 256 x 16KB x 256 = ~1 GB of storage. 442 * A 32MB save area thus represents around ~1 TB. The temporary memory 443 * allocated can be specified. If it is not sufficient multiple topology 444 * passes will be made. 445 */ 446 447 /* 448 * Bulkfree callback info 449 */ 450 //static void hammer2_bulkfree_thread(void *arg __unused); 451 static void cbinfo_bmap_init(hammer2_bulkfree_info_t *cbinfo, size_t size); 452 static int h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo, 453 hammer2_blockref_t *bref); 454 static int h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo); 455 static void h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo, 456 hammer2_off_t data_off, hammer2_bmap_data_t *live, 457 hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base); 458 459 void 460 hammer2_bulkfree_init(hammer2_dev_t *hmp) 461 { 462 /* 463 hammer2_thr_create(&hmp->bfthr, NULL, hmp, 464 hmp->devrepname, -1, -1, 465 hammer2_bulkfree_thread); 466 */ 467 } 468 469 void 470 hammer2_bulkfree_uninit(hammer2_dev_t *hmp) 471 { 472 //hammer2_thr_delete(&hmp->bfthr); 473 } 474 475 #if 0 476 static void 477 hammer2_bulkfree_thread(void *arg) 478 { 479 hammer2_thread_t *thr = arg; 480 hammer2_ioc_bulkfree_t bfi; 481 uint32_t flags; 482 483 for (;;) { 484 hammer2_thr_wait_any(thr, 485 HAMMER2_THREAD_STOP | 486 HAMMER2_THREAD_FREEZE | 487 HAMMER2_THREAD_UNFREEZE | 488 HAMMER2_THREAD_REMASTER, 489 hz * 60); 490 491 flags = thr->flags; 492 cpu_ccfence(); 493 if (flags & HAMMER2_THREAD_STOP) 494 break; 495 if (flags & HAMMER2_THREAD_FREEZE) { 496 hammer2_thr_signal2(thr, HAMMER2_THREAD_FROZEN, 497 HAMMER2_THREAD_FREEZE); 498 continue; 499 } 500 if (flags & HAMMER2_THREAD_UNFREEZE) { 501 hammer2_thr_signal2(thr, 0, 502 HAMMER2_THREAD_FROZEN | 503 HAMMER2_THREAD_UNFREEZE); 504 continue; 505 } 506 if (flags & HAMMER2_THREAD_FROZEN) 507 continue; 508 if (flags & HAMMER2_THREAD_REMASTER) { 509 hammer2_thr_signal2(thr, 0, HAMMER2_THREAD_REMASTER); 510 bzero(&bfi, sizeof(bfi)); 511 bfi.size = 8192 * 1024; 512 /* hammer2_bulkfree_pass(thr->hmp, &bfi); */ 513 } 514 } 515 thr->td = NULL; 516 hammer2_thr_signal(thr, HAMMER2_THREAD_STOPPED); 517 /* structure can go invalid at this point */ 518 } 519 #endif 520 521 int 522 hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_chain_t *vchain, 523 hammer2_ioc_bulkfree_t *bfi) 524 { 525 hammer2_bulkfree_info_t cbinfo; 526 hammer2_chain_save_t *save; 527 hammer2_off_t incr; 528 size_t size; 529 int error; 530 531 /* 532 * We have to clear the live dedup cache as it might have entries 533 * that are freeable as of now. Any new entries in the dedup cache 534 * made after this point, even if they become freeable, will have 535 * previously been fully allocated and will be protected by the 536 * 2-stage bulkfree. 537 */ 538 hammer2_dedup_clear(hmp); 539 540 /* 541 * Setup for free pass using the buffer size specified by the 542 * hammer2 utility, 32K-aligned. 543 */ 544 bzero(&cbinfo, sizeof(cbinfo)); 545 size = (bfi->size + HAMMER2_FREEMAP_LEVELN_PSIZE - 1) & 546 ~(size_t)(HAMMER2_FREEMAP_LEVELN_PSIZE - 1); 547 548 /* 549 * Cap at 1/4 physical memory (hammer2 utility will not normally 550 * ever specify a buffer this big, but leave the option available). 551 */ 552 /* 553 if (size > kmem_lim_size() * 1024 * 1024 / 4) { 554 size = kmem_lim_size() * 1024 * 1024 / 4; 555 kprintf("hammer2: Warning: capping bulkfree buffer at %jdM\n", 556 (intmax_t)size / (1024 * 1024)); 557 } 558 */ 559 560 #define HAMMER2_FREEMAP_SIZEDIV \ 561 (HAMMER2_FREEMAP_LEVEL1_SIZE / HAMMER2_FREEMAP_LEVELN_PSIZE) 562 563 /* 564 * Cap at the size needed to cover the whole volume to avoid 565 * making an unnecessarily large allocation. 566 */ 567 if (size > hmp->total_size / HAMMER2_FREEMAP_SIZEDIV) 568 size = howmany(hmp->total_size, HAMMER2_FREEMAP_SIZEDIV); 569 570 /* 571 * Minimum bitmap buffer size, then align to a LEVELN_PSIZE (32K) 572 * boundary. 573 */ 574 if (size < 1024 * 1024) 575 size = 1024 * 1024; 576 size = (size + HAMMER2_FREEMAP_LEVELN_PSIZE - 1) & 577 ~(size_t)(HAMMER2_FREEMAP_LEVELN_PSIZE - 1); 578 579 cbinfo.hmp = hmp; 580 cbinfo.bmap = kmalloc(size, M_HAMMER2, M_WAITOK | M_ZERO); 581 cbinfo.dedup = kmalloc(sizeof(*cbinfo.dedup) * HAMMER2_DEDUP_HEUR_SIZE, 582 M_HAMMER2, M_WAITOK | M_ZERO); 583 584 kprintf("hammer2: bulkfree buf=%jdM\n", 585 (intmax_t)size / (1024 * 1024)); 586 587 /* 588 * Normalize start point to a 1GB boundary. We operate on a 589 * 32KB leaf bitmap boundary which represents 1GB of storage. 590 */ 591 cbinfo.sbase = bfi->sbase; 592 if (cbinfo.sbase > hmp->total_size) 593 cbinfo.sbase = hmp->total_size; 594 cbinfo.sbase &= ~HAMMER2_FREEMAP_LEVEL1_MASK; 595 TAILQ_INIT(&cbinfo.list); 596 597 cbinfo.bulkfree_ticks = ticks; 598 599 /* 600 * Loop on a full meta-data scan as many times as required to 601 * get through all available storage. 602 */ 603 error = 0; 604 while (cbinfo.sbase < hmp->total_size) { 605 /* 606 * We have enough ram to represent (incr) bytes of storage. 607 * Each 32KB of ram represents 1GB of storage. 608 * 609 * We must also clean out our de-duplication heuristic for 610 * each (incr) bytes of storage, otherwise we wind up not 611 * scanning meta-data for later areas of storage because 612 * they had already been scanned in earlier areas of storage. 613 * Since the ranging is different, we have to restart 614 * the dedup heuristic too. 615 */ 616 int allmedia; 617 618 cbinfo_bmap_init(&cbinfo, size); 619 bzero(cbinfo.dedup, sizeof(*cbinfo.dedup) * 620 HAMMER2_DEDUP_HEUR_SIZE); 621 cbinfo.count_inodes_scanned = 0; 622 cbinfo.count_dirents_scanned = 0; 623 cbinfo.count_bytes_scanned = 0; 624 cbinfo.count_chains_scanned = 0; 625 cbinfo.count_chains_reported = 0; 626 627 incr = size / HAMMER2_FREEMAP_LEVELN_PSIZE * 628 HAMMER2_FREEMAP_LEVEL1_SIZE; 629 if (hmp->total_size - cbinfo.sbase <= incr) { 630 cbinfo.sstop = hmp->total_size; 631 allmedia = 1; 632 } else { 633 cbinfo.sstop = cbinfo.sbase + incr; 634 allmedia = 0; 635 } 636 kprintf("hammer2: pass %016jx-%016jx ", 637 (intmax_t)cbinfo.sbase, 638 (intmax_t)cbinfo.sstop); 639 if (allmedia && cbinfo.sbase == 0) 640 kprintf("(all media)\n"); 641 else if (allmedia) 642 kprintf("(remaining media)\n"); 643 else 644 kprintf("(%jdGB of media)\n", 645 (intmax_t)incr / (1024L*1024*1024)); 646 647 /* 648 * Scan topology for stuff inside this range. 649 * 650 * NOTE - By not using a transaction the operation can 651 * run concurrent with the frontend as well as 652 * with flushes. 653 * 654 * We cannot safely set a mtid without a transaction, 655 * and in fact we don't want to set one anyway. We 656 * want the bulkfree to be passive and no interfere 657 * with crash recovery. 658 */ 659 #undef HAMMER2_BULKFREE_TRANS /* undef - don't use transaction */ 660 #ifdef HAMMER2_BULKFREE_TRANS 661 hammer2_trans_init(hmp->spmp, 0); 662 cbinfo.mtid = hammer2_trans_sub(hmp->spmp); 663 #else 664 cbinfo.mtid = 0; 665 #endif 666 cbinfo.pri = 0; 667 error |= hammer2_bulkfree_scan(vchain, 668 h2_bulkfree_callback, &cbinfo); 669 670 while ((save = TAILQ_FIRST(&cbinfo.list)) != NULL && 671 (error & ~HAMMER2_ERROR_CHECK) == 0) { 672 TAILQ_REMOVE(&cbinfo.list, save, entry); 673 --cbinfo.list_count; 674 cbinfo.pri = 0; 675 cbinfo.backout = NULL; 676 error |= hammer2_bulkfree_scan(save->chain, 677 h2_bulkfree_callback, 678 &cbinfo); 679 hammer2_chain_drop(save->chain); 680 kfree(save, M_HAMMER2); 681 } 682 while (save) { 683 TAILQ_REMOVE(&cbinfo.list, save, entry); 684 --cbinfo.list_count; 685 hammer2_chain_drop(save->chain); 686 kfree(save, M_HAMMER2); 687 save = TAILQ_FIRST(&cbinfo.list); 688 } 689 cbinfo.backout = NULL; 690 691 /* 692 * If the complete scan succeeded we can synchronize our 693 * in-memory freemap against live storage. If an abort 694 * occured we cannot safely synchronize our partially 695 * filled-out in-memory freemap. 696 * 697 * We still synchronize on CHECK failures. That is, we still 698 * want bulkfree to operate even if the filesystem has defects. 699 */ 700 if (error & ~HAMMER2_ERROR_CHECK) { 701 kprintf("bulkfree lastdrop %d %d error=0x%04x\n", 702 vchain->refs, vchain->core.chain_count, error); 703 } else { 704 if (error & HAMMER2_ERROR_CHECK) { 705 kprintf("bulkfree lastdrop %d %d " 706 "(with check errors)\n", 707 vchain->refs, vchain->core.chain_count); 708 } else { 709 kprintf("bulkfree lastdrop %d %d\n", 710 vchain->refs, vchain->core.chain_count); 711 } 712 713 error = h2_bulkfree_sync(&cbinfo); 714 715 hammer2_voldata_lock(hmp); 716 hammer2_voldata_modify(hmp); 717 hmp->voldata.allocator_free += cbinfo.adj_free; 718 hammer2_voldata_unlock(hmp); 719 } 720 721 /* 722 * Cleanup for next loop. 723 */ 724 #ifdef HAMMER2_BULKFREE_TRANS 725 hammer2_trans_done(hmp->spmp, 0); 726 #endif 727 if (error & ~HAMMER2_ERROR_CHECK) 728 break; 729 cbinfo.sbase = cbinfo.sstop; 730 cbinfo.adj_free = 0; 731 } 732 kfree(cbinfo.bmap, M_HAMMER2); 733 kfree(cbinfo.dedup, M_HAMMER2); 734 cbinfo.dedup = NULL; 735 736 bfi->sstop = cbinfo.sbase; 737 738 incr = bfi->sstop / (hmp->total_size / 10000); 739 if (incr > 10000) 740 incr = 10000; 741 742 kprintf("bulkfree pass statistics (%d.%02d%% storage processed):\n", 743 (int)incr / 100, 744 (int)incr % 100); 745 746 if (error & ~HAMMER2_ERROR_CHECK) { 747 kprintf(" bulkfree was aborted\n"); 748 } else { 749 if (error & HAMMER2_ERROR_CHECK) { 750 kprintf(" WARNING: bulkfree " 751 "encountered CRC errors\n"); 752 } 753 kprintf(" transition->free %ld\n", cbinfo.count_10_00); 754 kprintf(" transition->staged %ld\n", cbinfo.count_11_10); 755 kprintf(" ERR(00)->allocated %ld\n", cbinfo.count_00_11); 756 kprintf(" ERR(01)->allocated %ld\n", cbinfo.count_01_11); 757 kprintf(" staged->allocated %ld\n", cbinfo.count_10_11); 758 kprintf(" ~4MB segs cleaned %ld\n", cbinfo.count_l0cleans); 759 kprintf(" linear adjusts %ld\n", 760 cbinfo.count_linadjusts); 761 kprintf(" dedup factor %ld\n", 762 cbinfo.count_dedup_factor); 763 kprintf(" max saved chains %ld\n", cbinfo.list_count_max); 764 } 765 766 return error; 767 } 768 769 static void 770 cbinfo_bmap_init(hammer2_bulkfree_info_t *cbinfo, size_t size) 771 { 772 hammer2_bmap_data_t *bmap = cbinfo->bmap; 773 hammer2_key_t key = cbinfo->sbase; 774 hammer2_key_t lokey; 775 hammer2_key_t hikey; 776 777 lokey = (cbinfo->hmp->voldata.allocator_beg + HAMMER2_SEGMASK64) & 778 ~HAMMER2_SEGMASK64; 779 hikey = cbinfo->hmp->total_size & ~HAMMER2_SEGMASK64; 780 781 bzero(bmap, size); 782 while (size) { 783 bzero(bmap, sizeof(*bmap)); 784 if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX)) 785 lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX); 786 if (lokey < H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64) 787 lokey = H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64; 788 if (key < lokey || key >= hikey) { 789 memset(bmap->bitmapq, -1, 790 sizeof(bmap->bitmapq)); 791 bmap->avail = 0; 792 bmap->linear = HAMMER2_SEGSIZE; 793 } else { 794 bmap->avail = HAMMER2_FREEMAP_LEVEL0_SIZE; 795 } 796 size -= sizeof(*bmap); 797 key += HAMMER2_FREEMAP_LEVEL0_SIZE; 798 ++bmap; 799 } 800 } 801 802 static int 803 h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref) 804 { 805 hammer2_bmap_data_t *bmap; 806 hammer2_off_t data_off; 807 uint16_t class; 808 size_t bytes; 809 int radix; 810 811 /* 812 * Check for signal and allow yield to userland during scan. 813 */ 814 if (hammer2_signal_check(&cbinfo->save_time)) 815 return HAMMER2_ERROR_ABORTED; 816 817 /* 818 * Deal with kernel thread cpu or I/O hogging by limiting the 819 * number of chains scanned per second to hammer2_bulkfree_tps. 820 * Ignore leaf records (DIRENT and DATA), no per-record I/O is 821 * involved for those since we don't load their data. 822 */ 823 if (bref->type != HAMMER2_BREF_TYPE_DATA && 824 bref->type != HAMMER2_BREF_TYPE_DIRENT) { 825 ++cbinfo->bulkfree_calls; 826 if (cbinfo->bulkfree_calls > hammer2_bulkfree_tps) { 827 int dticks = ticks - cbinfo->bulkfree_ticks; 828 if (dticks < 0) 829 dticks = 0; 830 if (dticks < hz) { 831 tsleep(&cbinfo->bulkfree_ticks, 0, 832 "h2bw", hz - dticks); 833 } 834 cbinfo->bulkfree_calls = 0; 835 cbinfo->bulkfree_ticks = ticks; 836 } 837 } 838 839 /* 840 * Calculate the data offset and determine if it is within 841 * the current freemap range being gathered. 842 */ 843 data_off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX; 844 if (data_off < cbinfo->sbase || data_off >= cbinfo->sstop) 845 return 0; 846 if (data_off < cbinfo->hmp->voldata.allocator_beg) 847 return 0; 848 if (data_off >= cbinfo->hmp->total_size) 849 return 0; 850 851 /* 852 * Calculate the information needed to generate the in-memory 853 * freemap record. 854 * 855 * Hammer2 does not allow allocations to cross the L1 (1GB) boundary, 856 * it's a problem if it does. (Or L0 (4MB) for that matter). 857 */ 858 radix = (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 859 KKASSERT(radix != 0); 860 bytes = (size_t)1 << radix; 861 class = (bref->type << 8) | HAMMER2_PBUFRADIX; 862 863 if (data_off + bytes > cbinfo->sstop) { 864 kprintf("hammer2_bulkfree_scan: illegal 1GB boundary " 865 "%016jx %016jx/%d\n", 866 (intmax_t)bref->data_off, 867 (intmax_t)bref->key, 868 bref->keybits); 869 bytes = cbinfo->sstop - data_off; /* XXX */ 870 } 871 872 /* 873 * Convert to a storage offset relative to the beginning of the 874 * storage range we are collecting. Then lookup the level0 bmap entry. 875 */ 876 data_off -= cbinfo->sbase; 877 bmap = cbinfo->bmap + (data_off >> HAMMER2_FREEMAP_LEVEL0_RADIX); 878 879 /* 880 * Convert data_off to a bmap-relative value (~4MB storage range). 881 * Adjust linear, class, and avail. 882 * 883 * Hammer2 does not allow allocations to cross the L0 (4MB) boundary, 884 */ 885 data_off &= HAMMER2_FREEMAP_LEVEL0_MASK; 886 if (data_off + bytes > HAMMER2_FREEMAP_LEVEL0_SIZE) { 887 kprintf("hammer2_bulkfree_scan: illegal 4MB boundary " 888 "%016jx %016jx/%d\n", 889 (intmax_t)bref->data_off, 890 (intmax_t)bref->key, 891 bref->keybits); 892 bytes = HAMMER2_FREEMAP_LEVEL0_SIZE - data_off; 893 } 894 895 if (bmap->class == 0) { 896 bmap->class = class; 897 bmap->avail = HAMMER2_FREEMAP_LEVEL0_SIZE; 898 } 899 900 /* 901 * NOTE: bmap->class does not have to match class. Classification 902 * is relaxed when free space is low, so some mixing can occur. 903 */ 904 #if 0 905 /* 906 * XXX removed 907 */ 908 if (bmap->class != class) { 909 kprintf("hammer2_bulkfree_scan: illegal mixed class " 910 "%016jx %016jx/%d (%04x vs %04x)\n", 911 (intmax_t)bref->data_off, 912 (intmax_t)bref->key, 913 bref->keybits, 914 class, bmap->class); 915 } 916 #endif 917 918 /* 919 * Just record the highest byte-granular offset for now. Do not 920 * match against allocations which are in multiples of whole blocks. 921 * 922 * Make sure that any in-block linear offset at least covers the 923 * data range. This can cause bmap->linear to become block-aligned. 924 */ 925 if (bytes & HAMMER2_FREEMAP_BLOCK_MASK) { 926 if (bmap->linear < (int32_t)data_off + (int32_t)bytes) 927 bmap->linear = (int32_t)data_off + (int32_t)bytes; 928 } else if (bmap->linear >= (int32_t)data_off && 929 bmap->linear < (int32_t)data_off + (int32_t)bytes) { 930 bmap->linear = (int32_t)data_off + (int32_t)bytes; 931 } 932 933 /* 934 * Adjust the hammer2_bitmap_t bitmap[HAMMER2_BMAP_ELEMENTS]. 935 * 64-bit entries, 2 bits per entry, to code 11. 936 * 937 * NOTE: data_off mask to 524288, shift right by 14 (radix for 16384), 938 * and multiply shift amount by 2 for sets of 2 bits. 939 * 940 * NOTE: The allocation can be smaller than HAMMER2_FREEMAP_BLOCK_SIZE. 941 * also, data_off may not be FREEMAP_BLOCK_SIZE aligned. 942 */ 943 while (bytes > 0) { 944 hammer2_bitmap_t bmask; 945 int bindex; 946 947 bindex = (int)data_off >> (HAMMER2_FREEMAP_BLOCK_RADIX + 948 HAMMER2_BMAP_INDEX_RADIX); 949 bmask = (hammer2_bitmap_t)3 << 950 ((((int)data_off & HAMMER2_BMAP_INDEX_MASK) >> 951 HAMMER2_FREEMAP_BLOCK_RADIX) << 1); 952 953 /* 954 * NOTE! The (avail) calculation is bitmap-granular. Multiple 955 * sub-granular records can wind up at the same bitmap 956 * position. 957 */ 958 if ((bmap->bitmapq[bindex] & bmask) == 0) { 959 if (bytes < HAMMER2_FREEMAP_BLOCK_SIZE) { 960 bmap->avail -= HAMMER2_FREEMAP_BLOCK_SIZE; 961 } else { 962 bmap->avail -= bytes; 963 } 964 bmap->bitmapq[bindex] |= bmask; 965 } 966 data_off += HAMMER2_FREEMAP_BLOCK_SIZE; 967 if (bytes < HAMMER2_FREEMAP_BLOCK_SIZE) 968 bytes = 0; 969 else 970 bytes -= HAMMER2_FREEMAP_BLOCK_SIZE; 971 } 972 return 0; 973 } 974 975 /* 976 * Synchronize the in-memory bitmap with the live freemap. This is not a 977 * direct copy. Instead the bitmaps must be compared: 978 * 979 * In-memory Live-freemap 980 * 00 11 -> 10 (do nothing if live modified) 981 * 10 -> 00 (do nothing if live modified) 982 * 11 10 -> 11 handles race against live 983 * ** -> 11 nominally warn of corruption 984 * 985 * We must also fixup the hints in HAMMER2_BREF_TYPE_FREEMAP_LEAF. 986 */ 987 static int 988 h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo) 989 { 990 hammer2_off_t data_off; 991 hammer2_key_t key; 992 hammer2_key_t key_dummy; 993 hammer2_bmap_data_t *bmap; 994 hammer2_bmap_data_t *live; 995 hammer2_chain_t *live_parent; 996 hammer2_chain_t *live_chain; 997 int bmapindex; 998 int error; 999 1000 kprintf("hammer2_bulkfree - range "); 1001 1002 if (cbinfo->sbase < cbinfo->hmp->voldata.allocator_beg) 1003 kprintf("%016jx-", 1004 (intmax_t)cbinfo->hmp->voldata.allocator_beg); 1005 else 1006 kprintf("%016jx-", 1007 (intmax_t)cbinfo->sbase); 1008 1009 if (cbinfo->sstop > cbinfo->hmp->total_size) 1010 kprintf("%016jx\n", 1011 (intmax_t)cbinfo->hmp->total_size); 1012 else 1013 kprintf("%016jx\n", 1014 (intmax_t)cbinfo->sstop); 1015 1016 data_off = cbinfo->sbase; 1017 bmap = cbinfo->bmap; 1018 1019 live_parent = &cbinfo->hmp->fchain; 1020 hammer2_chain_ref(live_parent); 1021 hammer2_chain_lock(live_parent, HAMMER2_RESOLVE_ALWAYS); 1022 live_chain = NULL; 1023 error = 0; 1024 1025 /* 1026 * Iterate each hammer2_bmap_data_t line (128 bytes) managing 1027 * 4MB of storage. 1028 */ 1029 while (data_off < cbinfo->sstop) { 1030 /* 1031 * The freemap is not used below allocator_beg or beyond 1032 * total_size. 1033 */ 1034 1035 if (data_off < cbinfo->hmp->voldata.allocator_beg) 1036 goto next; 1037 if (data_off >= cbinfo->hmp->total_size) 1038 goto next; 1039 1040 /* 1041 * Locate the freemap leaf on the live filesystem 1042 */ 1043 key = (data_off & ~HAMMER2_FREEMAP_LEVEL1_MASK); 1044 1045 if (live_chain == NULL || live_chain->bref.key != key) { 1046 if (live_chain) { 1047 hammer2_chain_unlock(live_chain); 1048 hammer2_chain_drop(live_chain); 1049 } 1050 live_chain = hammer2_chain_lookup( 1051 &live_parent, 1052 &key_dummy, 1053 key, 1054 key + HAMMER2_FREEMAP_LEVEL1_MASK, 1055 &error, 1056 HAMMER2_LOOKUP_ALWAYS); 1057 if (error) { 1058 kprintf("hammer2_bulkfree: freemap lookup " 1059 "error near %016jx, error %s\n", 1060 (intmax_t)data_off, 1061 hammer2_error_str(live_chain->error)); 1062 break; 1063 } 1064 } 1065 if (live_chain == NULL) { 1066 /* 1067 * XXX if we implement a full recovery mode we need 1068 * to create/recreate missing freemap chains if our 1069 * bmap has any allocated blocks. 1070 */ 1071 if (bmap->class && 1072 bmap->avail != HAMMER2_FREEMAP_LEVEL0_SIZE) { 1073 kprintf("hammer2_bulkfree: cannot locate " 1074 "live leaf for allocated data " 1075 "near %016jx\n", 1076 (intmax_t)data_off); 1077 } 1078 goto next; 1079 } 1080 if (live_chain->error) { 1081 kprintf("hammer2_bulkfree: unable to access freemap " 1082 "near %016jx, error %s\n", 1083 (intmax_t)data_off, 1084 hammer2_error_str(live_chain->error)); 1085 hammer2_chain_unlock(live_chain); 1086 hammer2_chain_drop(live_chain); 1087 live_chain = NULL; 1088 goto next; 1089 } 1090 1091 bmapindex = (data_off & HAMMER2_FREEMAP_LEVEL1_MASK) >> 1092 HAMMER2_FREEMAP_LEVEL0_RADIX; 1093 live = &live_chain->data->bmdata[bmapindex]; 1094 1095 /* 1096 * Shortcut if the bitmaps match and the live linear 1097 * indicator is sane. We can't do a perfect check of 1098 * live->linear because the only real requirement is that 1099 * if it is not block-aligned, that it not cover the space 1100 * within its current block which overlaps one of the data 1101 * ranges we scan. We don't retain enough fine-grained 1102 * data in our scan to be able to set it exactly. 1103 * 1104 * TODO - we could shortcut this by testing that both 1105 * live->class and bmap->class are 0, and both avails are 1106 * set to HAMMER2_FREEMAP_LEVEL0_SIZE (4MB). 1107 */ 1108 if (bcmp(live->bitmapq, bmap->bitmapq, 1109 sizeof(bmap->bitmapq)) == 0 && 1110 live->linear >= bmap->linear && 1111 (hammer2_aux_flags & 1) == 0 && 1112 bigmask_good(bmap, live_chain->bref.check.freemap.bigmask)) 1113 { 1114 goto next; 1115 } 1116 if (hammer2_debug & 1) { 1117 kprintf("live %016jx %04d.%04x (avail=%d) " 1118 "bigmask %08x->%08x\n", 1119 data_off, bmapindex, live->class, live->avail, 1120 live_chain->bref.check.freemap.bigmask, 1121 live_chain->bref.check.freemap.bigmask | 1122 bigmask_get(bmap)); 1123 } 1124 1125 if (hammer2_chain_modify(live_chain, cbinfo->mtid, 0, 0)) { 1126 kprintf("hammer2_bulkfree: unable to modify freemap " 1127 "at %016jx for data-block %016jx, error %s\n", 1128 live_chain->bref.data_off, 1129 (intmax_t)data_off, 1130 hammer2_error_str(live_chain->error)); 1131 hammer2_chain_unlock(live_chain); 1132 hammer2_chain_drop(live_chain); 1133 live_chain = NULL; 1134 goto next; 1135 } 1136 live_chain->bref.check.freemap.bigmask = -1; 1137 cbinfo->hmp->freemap_relaxed = 0; /* reset heuristic */ 1138 live = &live_chain->data->bmdata[bmapindex]; 1139 1140 h2_bulkfree_sync_adjust(cbinfo, data_off, live, bmap, 1141 live_chain->bref.key + 1142 bmapindex * 1143 HAMMER2_FREEMAP_LEVEL0_SIZE); 1144 next: 1145 data_off += HAMMER2_FREEMAP_LEVEL0_SIZE; 1146 ++bmap; 1147 } 1148 if (live_chain) { 1149 hammer2_chain_unlock(live_chain); 1150 hammer2_chain_drop(live_chain); 1151 } 1152 if (live_parent) { 1153 hammer2_chain_unlock(live_parent); 1154 hammer2_chain_drop(live_parent); 1155 } 1156 return error; 1157 } 1158 1159 /* 1160 * Merge the bulkfree bitmap against the existing bitmap. 1161 */ 1162 static 1163 void 1164 h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo, 1165 hammer2_off_t data_off, hammer2_bmap_data_t *live, 1166 hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base) 1167 { 1168 int bindex; 1169 int scount; 1170 hammer2_off_t tmp_off; 1171 hammer2_bitmap_t lmask; 1172 hammer2_bitmap_t mmask; 1173 1174 tmp_off = data_off; 1175 1176 for (bindex = 0; bindex < HAMMER2_BMAP_ELEMENTS; ++bindex) { 1177 lmask = live->bitmapq[bindex]; /* live */ 1178 mmask = bmap->bitmapq[bindex]; /* snapshotted bulkfree */ 1179 if (lmask == mmask) { 1180 tmp_off += HAMMER2_BMAP_INDEX_SIZE; 1181 continue; 1182 } 1183 1184 for (scount = 0; 1185 scount < HAMMER2_BMAP_BITS_PER_ELEMENT; 1186 scount += 2) { 1187 if ((mmask & 3) == 0) { 1188 /* 1189 * in-memory 00 live 11 -> 10 1190 * live 10 -> 00 1191 * 1192 * Storage might be marked allocated or 1193 * staged and must be remarked staged or 1194 * free. 1195 */ 1196 switch (lmask & 3) { 1197 case 0: /* 00 */ 1198 break; 1199 case 1: /* 01 */ 1200 kprintf("hammer2_bulkfree: cannot " 1201 "transition m=00/l=01\n"); 1202 break; 1203 case 2: /* 10 -> 00 */ 1204 live->bitmapq[bindex] &= 1205 ~((hammer2_bitmap_t)2 << scount); 1206 live->avail += 1207 HAMMER2_FREEMAP_BLOCK_SIZE; 1208 if (live->avail > 1209 HAMMER2_FREEMAP_LEVEL0_SIZE) { 1210 live->avail = 1211 HAMMER2_FREEMAP_LEVEL0_SIZE; 1212 } 1213 cbinfo->adj_free += 1214 HAMMER2_FREEMAP_BLOCK_SIZE; 1215 ++cbinfo->count_10_00; 1216 hammer2_io_dedup_assert( 1217 cbinfo->hmp, 1218 tmp_off | 1219 HAMMER2_FREEMAP_BLOCK_RADIX, 1220 HAMMER2_FREEMAP_BLOCK_SIZE); 1221 break; 1222 case 3: /* 11 -> 10 */ 1223 live->bitmapq[bindex] &= 1224 ~((hammer2_bitmap_t)1 << scount); 1225 ++cbinfo->count_11_10; 1226 hammer2_io_dedup_delete( 1227 cbinfo->hmp, 1228 HAMMER2_BREF_TYPE_DATA, 1229 tmp_off | 1230 HAMMER2_FREEMAP_BLOCK_RADIX, 1231 HAMMER2_FREEMAP_BLOCK_SIZE); 1232 break; 1233 } 1234 } else if ((mmask & 3) == 3) { 1235 /* 1236 * in-memory 11 live 10 -> 11 1237 * live ** -> 11 1238 * 1239 * Storage might be incorrectly marked free 1240 * or staged and must be remarked fully 1241 * allocated. 1242 */ 1243 switch (lmask & 3) { 1244 case 0: /* 00 */ 1245 /* 1246 * This case is not supposed to 1247 * happen. If it does, it means 1248 * that an allocated block was 1249 * thought by the filesystem to be 1250 * free. 1251 */ 1252 kprintf("hammer2_bulkfree: " 1253 "00->11 critical freemap " 1254 "transition for datablock " 1255 "%016jx\n", 1256 tmp_off); 1257 ++cbinfo->count_00_11; 1258 cbinfo->adj_free -= 1259 HAMMER2_FREEMAP_BLOCK_SIZE; 1260 live->avail -= 1261 HAMMER2_FREEMAP_BLOCK_SIZE; 1262 if ((int32_t)live->avail < 0) 1263 live->avail = 0; 1264 break; 1265 case 1: /* 01 */ 1266 ++cbinfo->count_01_11; 1267 break; 1268 case 2: /* 10 -> 11 */ 1269 ++cbinfo->count_10_11; 1270 break; 1271 case 3: /* 11 */ 1272 break; 1273 } 1274 live->bitmapq[bindex] |= 1275 ((hammer2_bitmap_t)3 << scount); 1276 } 1277 mmask >>= 2; 1278 lmask >>= 2; 1279 tmp_off += HAMMER2_FREEMAP_BLOCK_SIZE; 1280 } 1281 } 1282 1283 /* 1284 * Determine if the live bitmap is completely free and reset its 1285 * fields if so. Otherwise check to see if we can reduce the linear 1286 * offset. 1287 */ 1288 for (bindex = HAMMER2_BMAP_ELEMENTS - 1; bindex >= 0; --bindex) { 1289 if (live->bitmapq[bindex] != 0) 1290 break; 1291 } 1292 if (bindex < 0) { 1293 /* 1294 * Completely empty, reset entire segment 1295 */ 1296 #if 0 1297 kprintf("hammer2: cleanseg %016jx.%04x (%d)\n", 1298 alloc_base, live->class, live->avail); 1299 #endif 1300 live->avail = HAMMER2_FREEMAP_LEVEL0_SIZE; 1301 live->class = 0; 1302 live->linear = 0; 1303 ++cbinfo->count_l0cleans; 1304 } else if (bindex < 7) { 1305 /* 1306 * Partially full, bitmapq[bindex] != 0. Our bulkfree pass 1307 * does not record enough information to set live->linear 1308 * exactly. 1309 * 1310 * NOTE: Setting live->linear to a sub-block (16K) boundary 1311 * forces the live code to iterate to the next fully 1312 * free block. It does NOT mean that all blocks above 1313 * live->linear are available. 1314 * 1315 * Setting live->linear to a fragmentary (less than 1316 * 16K) boundary allows allocations to iterate within 1317 * that sub-block. 1318 */ 1319 if (live->linear < bmap->linear && 1320 ((live->linear ^ bmap->linear) & 1321 ~HAMMER2_FREEMAP_BLOCK_MASK) == 0) { 1322 /* 1323 * If greater than but still within the same 1324 * sub-block as live we can adjust linear upward. 1325 */ 1326 live->linear = bmap->linear; 1327 ++cbinfo->count_linadjusts; 1328 } else { 1329 /* 1330 * Otherwise adjust to the nearest higher or same 1331 * sub-block boundary. The live system may have 1332 * bounced live->linear around so we cannot make any 1333 * assumptions with regards to available fragmentary 1334 * allocations. 1335 */ 1336 live->linear = 1337 (bmap->linear + HAMMER2_FREEMAP_BLOCK_MASK) & 1338 ~HAMMER2_FREEMAP_BLOCK_MASK; 1339 ++cbinfo->count_linadjusts; 1340 } 1341 } else { 1342 /* 1343 * Completely full, effectively disable the linear iterator 1344 */ 1345 live->linear = HAMMER2_SEGSIZE; 1346 } 1347 1348 #if 0 1349 if (bmap->class) { 1350 kprintf("%016jx %04d.%04x (avail=%7d) " 1351 "%08x %08x %08x %08x %08x %08x %08x %08x\n", 1352 (intmax_t)data_off, 1353 (int)((data_off & 1354 HAMMER2_FREEMAP_LEVEL1_MASK) >> 1355 HAMMER2_FREEMAP_LEVEL0_RADIX), 1356 bmap->class, 1357 bmap->avail, 1358 bmap->bitmap[0], bmap->bitmap[1], 1359 bmap->bitmap[2], bmap->bitmap[3], 1360 bmap->bitmap[4], bmap->bitmap[5], 1361 bmap->bitmap[6], bmap->bitmap[7]); 1362 } 1363 #endif 1364 } 1365 1366 /* 1367 * BULKFREE DEDUP HEURISTIC 1368 * 1369 * WARNING! This code is SMP safe but the heuristic allows SMP collisions. 1370 * All fields must be loaded into locals and validated. 1371 */ 1372 static 1373 int 1374 h2_bulkfree_test(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref, 1375 int pri, int saved_error) 1376 { 1377 hammer2_dedup_t *dedup; 1378 int best; 1379 int n; 1380 int i; 1381 1382 n = hammer2_icrc32(&bref->data_off, sizeof(bref->data_off)); 1383 dedup = cbinfo->dedup + (n & (HAMMER2_DEDUP_HEUR_MASK & ~7)); 1384 1385 for (i = best = 0; i < 8; ++i) { 1386 if (dedup[i].data_off == bref->data_off) { 1387 if (dedup[i].ticks < pri) 1388 dedup[i].ticks = pri; 1389 if (pri == 1) 1390 cbinfo->count_dedup_factor += dedup[i].ticks; 1391 return (dedup[i].saved_error | HAMMER2_ERROR_EOF); 1392 } 1393 if (dedup[i].ticks < dedup[best].ticks) 1394 best = i; 1395 } 1396 dedup[best].data_off = bref->data_off; 1397 dedup[best].ticks = pri; 1398 dedup[best].saved_error = saved_error; 1399 1400 return 0; 1401 } 1402 1403 /* 1404 * Calculate what the bigmask should be. bigmask is permissive, so the 1405 * bits returned must be set at a minimum in the live bigmask. Other bits 1406 * might also be set in the live bigmask. 1407 */ 1408 static uint32_t 1409 bigmask_get(hammer2_bmap_data_t *bmap) 1410 { 1411 hammer2_bitmap_t mask; /* 64-bit mask to check */ 1412 hammer2_bitmap_t scan; 1413 uint32_t bigmask; 1414 uint32_t radix_mask; 1415 int iter; 1416 int i; 1417 int j; 1418 1419 bigmask = 0; 1420 for (i = 0; i < HAMMER2_BMAP_ELEMENTS; ++i) { 1421 mask = bmap->bitmapq[i]; 1422 1423 radix_mask = 1U << HAMMER2_FREEMAP_BLOCK_RADIX; 1424 radix_mask |= radix_mask - 1; 1425 iter = 2; /* each bitmap entry is 2 bits. 2, 4, 8... */ 1426 while (iter <= HAMMER2_BMAP_BITS_PER_ELEMENT) { 1427 if (iter == HAMMER2_BMAP_BITS_PER_ELEMENT) 1428 scan = -1; 1429 else 1430 scan = (1LU << iter) - 1; 1431 j = 0; 1432 while (j < HAMMER2_BMAP_BITS_PER_ELEMENT) { 1433 /* 1434 * Check if all bits are 0 (free block). 1435 * If so, set the bit in bigmask for the 1436 * allocation radix under test. 1437 */ 1438 if ((scan & mask) == 0) { 1439 bigmask |= radix_mask; 1440 } 1441 scan <<= iter; 1442 j += iter; 1443 } 1444 iter <<= 1; 1445 radix_mask = (radix_mask << 1) | 1; 1446 } 1447 } 1448 return bigmask; 1449 } 1450 1451 static int 1452 bigmask_good(hammer2_bmap_data_t *bmap, uint32_t live_bigmask) 1453 { 1454 uint32_t bigmask; 1455 1456 bigmask = bigmask_get(bmap); 1457 return ((live_bigmask & bigmask) == bigmask); 1458 } 1459