1 /* 2 * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Ilya Dryomov <idryomov@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer.h" 36 #include <libutil.h> 37 #include <crypto/sha2/sha2.h> 38 39 #define DEDUP_BUF (64 * 1024) 40 41 /* Sorted list of block CRCs - light version for dedup-simulate */ 42 struct sim_dedup_entry_rb_tree; 43 RB_HEAD(sim_dedup_entry_rb_tree, sim_dedup_entry) sim_dedup_tree = 44 RB_INITIALIZER(&sim_dedup_tree); 45 RB_PROTOTYPE2(sim_dedup_entry_rb_tree, sim_dedup_entry, rb_entry, 46 rb_sim_dedup_entry_compare, hammer_crc_t); 47 48 struct sim_dedup_entry { 49 hammer_crc_t crc; 50 u_int64_t ref_blks; /* number of blocks referenced */ 51 u_int64_t ref_size; /* size of data referenced */ 52 RB_ENTRY(sim_dedup_entry) rb_entry; 53 }; 54 55 /* Sorted list of HAMMER B-Tree keys */ 56 struct dedup_entry_rb_tree; 57 struct sha_dedup_entry_rb_tree; 58 59 RB_HEAD(dedup_entry_rb_tree, dedup_entry) dedup_tree = 60 RB_INITIALIZER(&dedup_tree); 61 RB_PROTOTYPE2(dedup_entry_rb_tree, dedup_entry, rb_entry, 62 rb_dedup_entry_compare, hammer_crc_t); 63 64 RB_PROTOTYPE(sha_dedup_entry_rb_tree, sha_dedup_entry, fict_entry, 65 rb_sha_dedup_entry_compare); 66 67 struct dedup_entry { 68 struct hammer_btree_leaf_elm leaf; 69 union { 70 struct { 71 u_int64_t ref_blks; 72 u_int64_t ref_size; 73 } de; 74 RB_HEAD(sha_dedup_entry_rb_tree, sha_dedup_entry) fict_root; 75 } u; 76 u_int8_t flags; 77 RB_ENTRY(dedup_entry) rb_entry; 78 }; 79 80 #define HAMMER_DEDUP_ENTRY_FICTITIOUS 0x0001 81 82 struct sha_dedup_entry { 83 struct hammer_btree_leaf_elm leaf; 84 u_int64_t ref_blks; 85 u_int64_t ref_size; 86 u_int8_t sha_hash[SHA256_DIGEST_LENGTH]; 87 RB_ENTRY(sha_dedup_entry) fict_entry; 88 }; 89 90 /* 91 * Pass2 list - contains entries that were not dedup'ed because ioctl failed 92 */ 93 STAILQ_HEAD(, pass2_dedup_entry) pass2_dedup_queue = 94 STAILQ_HEAD_INITIALIZER(pass2_dedup_queue); 95 96 struct pass2_dedup_entry { 97 struct hammer_btree_leaf_elm leaf; 98 STAILQ_ENTRY(pass2_dedup_entry) sq_entry; 99 }; 100 101 #define DEDUP_PASS2 0x0001 /* process_btree_elm() mode */ 102 103 /* PFS global ids - we deal with just one PFS at a run */ 104 int glob_fd; 105 struct hammer_ioc_pseudofs_rw glob_pfs; 106 107 /* 108 * Global accounting variables 109 * 110 * Last three don't have to be 64-bit, just to be safe.. 111 */ 112 u_int64_t dedup_alloc_size = 0; 113 u_int64_t dedup_ref_size = 0; 114 u_int64_t dedup_skipped_size = 0; 115 u_int64_t dedup_crc_failures = 0; 116 u_int64_t dedup_sha_failures = 0; 117 u_int64_t dedup_underflows = 0; 118 119 static int rb_sim_dedup_entry_compare(struct sim_dedup_entry *sim_de1, 120 struct sim_dedup_entry *sim_de2); 121 static int rb_dedup_entry_compare(struct dedup_entry *de1, 122 struct dedup_entry *de2); 123 static int rb_sha_dedup_entry_compare(struct sha_dedup_entry *sha_de1, 124 struct sha_dedup_entry *sha_de2); 125 typedef int (*scan_pfs_cb_t)(hammer_btree_leaf_elm_t scan_leaf, int flags); 126 static void scan_pfs(char *filesystem, scan_pfs_cb_t func); 127 static int collect_btree_elm(hammer_btree_leaf_elm_t scan_leaf, int flags); 128 static int process_btree_elm(hammer_btree_leaf_elm_t scan_leaf, int flags); 129 static int upgrade_chksum(hammer_btree_leaf_elm_t leaf, u_int8_t *sha_hash); 130 static void dump_simulated_dedup(void); 131 static void dump_real_dedup(void); 132 static void dedup_usage(int code); 133 134 RB_GENERATE2(sim_dedup_entry_rb_tree, sim_dedup_entry, rb_entry, 135 rb_sim_dedup_entry_compare, hammer_crc_t, crc); 136 RB_GENERATE2(dedup_entry_rb_tree, dedup_entry, rb_entry, 137 rb_dedup_entry_compare, hammer_crc_t, leaf.data_crc); 138 RB_GENERATE(sha_dedup_entry_rb_tree, sha_dedup_entry, fict_entry, 139 rb_sha_dedup_entry_compare); 140 141 static int 142 rb_sim_dedup_entry_compare(struct sim_dedup_entry *sim_de1, 143 struct sim_dedup_entry *sim_de2) 144 { 145 if (sim_de1->crc < sim_de2->crc) 146 return (-1); 147 if (sim_de1->crc > sim_de2->crc) 148 return (1); 149 150 return (0); 151 } 152 153 static int 154 rb_dedup_entry_compare(struct dedup_entry *de1, struct dedup_entry *de2) 155 { 156 if (de1->leaf.data_crc < de2->leaf.data_crc) 157 return (-1); 158 if (de1->leaf.data_crc > de2->leaf.data_crc) 159 return (1); 160 161 return (0); 162 } 163 164 static int 165 rb_sha_dedup_entry_compare(struct sha_dedup_entry *sha_de1, 166 struct sha_dedup_entry *sha_de2) 167 { 168 unsigned long *h1 = (unsigned long *)&sha_de1->sha_hash; 169 unsigned long *h2 = (unsigned long *)&sha_de2->sha_hash; 170 int i; 171 172 for (i = 0; i < SHA256_DIGEST_LENGTH / (int)sizeof(unsigned long); ++i) { 173 if (h1[i] < h2[i]) 174 return (-1); 175 if (h1[i] > h2[i]) 176 return (1); 177 } 178 179 return (0); 180 } 181 182 /* 183 * dedup-simulate <filesystem> 184 */ 185 void 186 hammer_cmd_dedup_simulate(char **av, int ac) 187 { 188 struct sim_dedup_entry *sim_de; 189 190 if (ac != 1) 191 dedup_usage(1); 192 193 glob_fd = getpfs(&glob_pfs, av[0]); 194 195 printf("Dedup-simulate "); 196 scan_pfs(av[0], &collect_btree_elm); 197 printf("Dedup-simulate %s succeeded\n", av[0]); 198 199 relpfs(glob_fd, &glob_pfs); 200 201 if (VerboseOpt >= 2) 202 dump_simulated_dedup(); 203 204 /* 205 * Calculate simulated dedup ratio and get rid of the tree 206 */ 207 while ((sim_de = RB_ROOT(&sim_dedup_tree)) != NULL) { 208 assert(sim_de->ref_blks != 0); 209 dedup_ref_size += sim_de->ref_size; 210 dedup_alloc_size += sim_de->ref_size / sim_de->ref_blks; 211 212 RB_REMOVE(sim_dedup_entry_rb_tree, &sim_dedup_tree, sim_de); 213 free(sim_de); 214 } 215 216 printf("Simulated dedup ratio = %.2f\n", 217 (dedup_alloc_size != 0) ? 218 (double)dedup_ref_size / dedup_alloc_size : 0); 219 } 220 221 /* 222 * dedup <filesystem> 223 */ 224 void 225 hammer_cmd_dedup(char **av, int ac) 226 { 227 struct dedup_entry *de; 228 struct sha_dedup_entry *sha_de; 229 struct pass2_dedup_entry *pass2_de; 230 char buf[8]; 231 232 if (TimeoutOpt > 0) 233 alarm(TimeoutOpt); 234 235 if (ac != 1) 236 dedup_usage(1); 237 238 STAILQ_INIT(&pass2_dedup_queue); 239 240 glob_fd = getpfs(&glob_pfs, av[0]); 241 242 printf("Dedup "); 243 scan_pfs(av[0], &process_btree_elm); 244 245 while ((pass2_de = STAILQ_FIRST(&pass2_dedup_queue)) != NULL) { 246 if (process_btree_elm(&pass2_de->leaf, DEDUP_PASS2)) 247 dedup_skipped_size -= pass2_de->leaf.data_len; 248 249 STAILQ_REMOVE_HEAD(&pass2_dedup_queue, sq_entry); 250 free(pass2_de); 251 } 252 assert(STAILQ_EMPTY(&pass2_dedup_queue)); 253 254 printf("Dedup %s succeeded\n", av[0]); 255 256 relpfs(glob_fd, &glob_pfs); 257 258 if (VerboseOpt >= 2) 259 dump_real_dedup(); 260 261 /* 262 * Calculate dedup ratio and get rid of the trees 263 */ 264 while ((de = RB_ROOT(&dedup_tree)) != NULL) { 265 if (de->flags & HAMMER_DEDUP_ENTRY_FICTITIOUS) { 266 while ((sha_de = RB_ROOT(&de->u.fict_root)) != NULL) { 267 assert(sha_de->ref_blks != 0); 268 dedup_ref_size += sha_de->ref_size; 269 dedup_alloc_size += sha_de->ref_size / sha_de->ref_blks; 270 271 RB_REMOVE(sha_dedup_entry_rb_tree, 272 &de->u.fict_root, sha_de); 273 free(sha_de); 274 } 275 assert(RB_EMPTY(&de->u.fict_root)); 276 } else { 277 assert(de->u.de.ref_blks != 0); 278 dedup_ref_size += de->u.de.ref_size; 279 dedup_alloc_size += de->u.de.ref_size / de->u.de.ref_blks; 280 } 281 282 RB_REMOVE(dedup_entry_rb_tree, &dedup_tree, de); 283 free(de); 284 } 285 assert(RB_EMPTY(&dedup_tree)); 286 287 humanize_unsigned(buf, sizeof(buf), dedup_ref_size, "B", 1024); 288 printf("Dedup ratio = %.2f\n" 289 " %8s referenced\n", 290 ((dedup_alloc_size != 0) ? 291 (double)dedup_ref_size / dedup_alloc_size : 0), 292 buf 293 ); 294 humanize_unsigned(buf, sizeof(buf), dedup_alloc_size, "B", 1024); 295 printf(" %8s allocated\n", buf); 296 humanize_unsigned(buf, sizeof(buf), dedup_skipped_size, "B", 1024); 297 printf(" %8s skipped\n", buf); 298 printf(" %8jd CRC collisions\n" 299 " %8jd SHA collisions\n" 300 " %8jd bigblock underflows\n", 301 (intmax_t)dedup_crc_failures, 302 (intmax_t)dedup_sha_failures, 303 (intmax_t)dedup_underflows 304 ); 305 } 306 307 static int 308 collect_btree_elm(hammer_btree_leaf_elm_t scan_leaf, int flags __unused) 309 { 310 struct sim_dedup_entry *sim_de; 311 312 sim_de = RB_LOOKUP(sim_dedup_entry_rb_tree, &sim_dedup_tree, scan_leaf->data_crc); 313 314 if (sim_de == NULL) { 315 sim_de = calloc(sizeof(*sim_de), 1); 316 sim_de->crc = scan_leaf->data_crc; 317 RB_INSERT(sim_dedup_entry_rb_tree, &sim_dedup_tree, sim_de); 318 } 319 320 sim_de->ref_blks += 1; 321 sim_de->ref_size += scan_leaf->data_len; 322 return (1); 323 } 324 325 static __inline int 326 validate_dedup_pair(hammer_btree_leaf_elm_t p, hammer_btree_leaf_elm_t q) 327 { 328 if ((p->data_offset & HAMMER_OFF_ZONE_MASK) != 329 (q->data_offset & HAMMER_OFF_ZONE_MASK)) { 330 return (1); 331 } 332 if (p->data_len != q->data_len) { 333 return (1); 334 } 335 336 return (0); 337 } 338 339 #define DEDUP_TECH_FAILURE 1 340 #define DEDUP_CMP_FAILURE 2 341 #define DEDUP_INVALID_ZONE 3 342 #define DEDUP_UNDERFLOW 4 343 #define DEDUP_VERS_FAILURE 5 344 345 static __inline int 346 deduplicate(hammer_btree_leaf_elm_t p, hammer_btree_leaf_elm_t q) 347 { 348 struct hammer_ioc_dedup dedup; 349 350 bzero(&dedup, sizeof(dedup)); 351 352 /* 353 * If data_offset fields are the same there is no need to run ioctl, 354 * candidate is already dedup'ed. 355 */ 356 if (p->data_offset == q->data_offset) { 357 return (0); 358 } 359 360 dedup.elm1 = p->base; 361 dedup.elm2 = q->base; 362 RunningIoctl = 1; 363 if (ioctl(glob_fd, HAMMERIOC_DEDUP, &dedup) < 0) { 364 if (errno == EOPNOTSUPP) { 365 /* must be at least version 5 */ 366 return (DEDUP_VERS_FAILURE); 367 } 368 /* Technical failure - locking or w/e */ 369 return (DEDUP_TECH_FAILURE); 370 } 371 if (dedup.head.flags & HAMMER_IOC_DEDUP_CMP_FAILURE) { 372 return (DEDUP_CMP_FAILURE); 373 } 374 if (dedup.head.flags & HAMMER_IOC_DEDUP_INVALID_ZONE) { 375 return (DEDUP_INVALID_ZONE); 376 } 377 if (dedup.head.flags & HAMMER_IOC_DEDUP_UNDERFLOW) { 378 return (DEDUP_UNDERFLOW); 379 } 380 RunningIoctl = 0; 381 382 return (0); 383 } 384 385 static int 386 process_btree_elm(hammer_btree_leaf_elm_t scan_leaf, int flags) 387 { 388 struct dedup_entry *de; 389 struct sha_dedup_entry *sha_de, temp; 390 struct pass2_dedup_entry *pass2_de; 391 int error; 392 393 de = RB_LOOKUP(dedup_entry_rb_tree, &dedup_tree, scan_leaf->data_crc); 394 if (de == NULL) { 395 /* 396 * Insert new entry into CRC tree 397 */ 398 de = calloc(sizeof(*de), 1); 399 de->leaf = *scan_leaf; 400 RB_INSERT(dedup_entry_rb_tree, &dedup_tree, de); 401 goto upgrade_stats; 402 } 403 404 /* 405 * Found entry in CRC tree 406 */ 407 if (de->flags & HAMMER_DEDUP_ENTRY_FICTITIOUS) { 408 /* 409 * Entry in CRC tree is fictitious, so we already had problems 410 * with this CRC. Upgrade (compute SHA) the candidate and 411 * dive into SHA subtree. If upgrade fails insert the candidate 412 * into Pass2 list (it will be processed later). 413 */ 414 if (upgrade_chksum(scan_leaf, temp.sha_hash)) 415 goto pass2_insert; 416 417 sha_de = RB_FIND(sha_dedup_entry_rb_tree, &de->u.fict_root, &temp); 418 if (sha_de == NULL) { 419 /* 420 * Nothing in SHA subtree so far, so this is a new 421 * 'dataset'. Insert new entry into SHA subtree. 422 */ 423 sha_de = calloc(sizeof(*sha_de), 1); 424 sha_de->leaf = *scan_leaf; 425 memcpy(sha_de->sha_hash, temp.sha_hash, SHA256_DIGEST_LENGTH); 426 RB_INSERT(sha_dedup_entry_rb_tree, &de->u.fict_root, sha_de); 427 goto upgrade_stats_sha; 428 } 429 430 /* 431 * Found entry in SHA subtree, it means we have a potential 432 * dedup pair. Validate it (zones have to match and data_len 433 * field have to be the same too. If validation fails, treat 434 * it as a SHA collision (jump to sha256_failure). 435 */ 436 if (validate_dedup_pair(&sha_de->leaf, scan_leaf)) 437 goto sha256_failure; 438 439 /* 440 * We have a valid dedup pair (SHA match, validated). 441 * 442 * In case of technical failure (dedup pair was good, but 443 * ioctl failed anyways) insert the candidate into Pass2 list 444 * (we will try to dedup it after we are done with the rest of 445 * the tree). 446 * 447 * If ioctl fails because either of blocks is in the non-dedup 448 * zone (we can dedup only in LARGE_DATA and SMALL_DATA) don't 449 * bother with the candidate and terminate early. 450 * 451 * If ioctl fails because of bigblock underflow replace the 452 * leaf node that found dedup entry represents with scan_leaf. 453 */ 454 error = deduplicate(&sha_de->leaf, scan_leaf); 455 switch(error) { 456 case 0: 457 goto upgrade_stats_sha; 458 case DEDUP_TECH_FAILURE: 459 goto pass2_insert; 460 case DEDUP_CMP_FAILURE: 461 goto sha256_failure; 462 case DEDUP_INVALID_ZONE: 463 goto terminate_early; 464 case DEDUP_UNDERFLOW: 465 ++dedup_underflows; 466 sha_de->leaf = *scan_leaf; 467 memcpy(sha_de->sha_hash, temp.sha_hash, SHA256_DIGEST_LENGTH); 468 goto upgrade_stats_sha; 469 case DEDUP_VERS_FAILURE: 470 fprintf(stderr, 471 "HAMMER filesystem must be at least " 472 "version 5 to dedup\n"); 473 exit (1); 474 default: 475 fprintf(stderr, "Unknown error\n"); 476 goto terminate_early; 477 } 478 479 /* 480 * Ooh la la.. SHA-256 collision. Terminate early, there's 481 * nothing we can do here. 482 */ 483 sha256_failure: 484 ++dedup_sha_failures; 485 goto terminate_early; 486 } else { 487 /* 488 * Candidate CRC is good for now (we found an entry in CRC 489 * tree and it's not fictitious). This means we have a 490 * potential dedup pair. 491 */ 492 if (validate_dedup_pair(&de->leaf, scan_leaf)) 493 goto crc_failure; 494 495 /* 496 * We have a valid dedup pair (CRC match, validated) 497 */ 498 error = deduplicate(&de->leaf, scan_leaf); 499 switch(error) { 500 case 0: 501 goto upgrade_stats; 502 case DEDUP_TECH_FAILURE: 503 goto pass2_insert; 504 case DEDUP_CMP_FAILURE: 505 goto crc_failure; 506 case DEDUP_INVALID_ZONE: 507 goto terminate_early; 508 case DEDUP_UNDERFLOW: 509 ++dedup_underflows; 510 de->leaf = *scan_leaf; 511 goto upgrade_stats; 512 case DEDUP_VERS_FAILURE: 513 fprintf(stderr, 514 "HAMMER filesystem must be at least " 515 "version 5 to dedup\n"); 516 exit (1); 517 default: 518 fprintf(stderr, "Unknown error\n"); 519 goto terminate_early; 520 } 521 522 crc_failure: 523 /* 524 * We got a CRC collision - either ioctl failed because of 525 * the comparison failure or validation of the potential 526 * dedup pair went bad. In all cases insert both blocks 527 * into SHA subtree (this requires checksum upgrade) and mark 528 * entry that corresponds to this CRC in the CRC tree 529 * fictitious, so that all futher operations with this CRC go 530 * through SHA subtree. 531 */ 532 ++dedup_crc_failures; 533 /* 534 * Insert block that was represented by now fictitious dedup 535 * entry (create a new SHA entry and preserve stats of the 536 * old CRC one). If checksum upgrade fails insert the 537 * candidate into Pass2 list and return - keep both trees 538 * unmodified. 539 */ 540 sha_de = calloc(sizeof(*sha_de), 1); 541 sha_de->leaf = de->leaf; 542 sha_de->ref_blks = de->u.de.ref_blks; 543 sha_de->ref_size = de->u.de.ref_size; 544 if (upgrade_chksum(&sha_de->leaf, sha_de->sha_hash)) { 545 free(sha_de); 546 goto pass2_insert; 547 } 548 549 RB_INIT(&de->u.fict_root); 550 /* 551 * Here we can insert without prior checking because the tree 552 * is empty at this point 553 */ 554 RB_INSERT(sha_dedup_entry_rb_tree, &de->u.fict_root, sha_de); 555 556 /* 557 * Mark entry in CRC tree fictitious 558 */ 559 de->flags |= HAMMER_DEDUP_ENTRY_FICTITIOUS; 560 561 /* 562 * Upgrade checksum of the candidate and insert it into 563 * SHA subtree. If upgrade fails insert the candidate into 564 * Pass2 list. 565 */ 566 if (upgrade_chksum(scan_leaf, temp.sha_hash)) { 567 goto pass2_insert; 568 } 569 sha_de = RB_FIND(sha_dedup_entry_rb_tree, &de->u.fict_root, &temp); 570 if (sha_de != NULL) 571 /* There is an entry with this SHA already, but the only 572 * RB-tree element at this point is that entry we just 573 * added. We know for sure these blocks are different 574 * (this is crc_failure branch) so treat it as SHA 575 * collision. 576 */ 577 goto sha256_failure; 578 579 sha_de = calloc(sizeof(*sha_de), 1); 580 sha_de->leaf = *scan_leaf; 581 memcpy(sha_de->sha_hash, temp.sha_hash, SHA256_DIGEST_LENGTH); 582 RB_INSERT(sha_dedup_entry_rb_tree, &de->u.fict_root, sha_de); 583 goto upgrade_stats_sha; 584 } 585 586 upgrade_stats: 587 de->u.de.ref_blks += 1; 588 de->u.de.ref_size += scan_leaf->data_len; 589 return (1); 590 591 upgrade_stats_sha: 592 sha_de->ref_blks += 1; 593 sha_de->ref_size += scan_leaf->data_len; 594 return (1); 595 596 pass2_insert: 597 /* 598 * If in pass2 mode don't insert anything, fall through to 599 * terminate_early 600 */ 601 if ((flags & DEDUP_PASS2) == 0) { 602 pass2_de = calloc(sizeof(*pass2_de), 1); 603 pass2_de->leaf = *scan_leaf; 604 STAILQ_INSERT_TAIL(&pass2_dedup_queue, pass2_de, sq_entry); 605 dedup_skipped_size += scan_leaf->data_len; 606 return (1); 607 } 608 609 terminate_early: 610 /* 611 * Early termination path. Fixup stats. 612 */ 613 dedup_alloc_size += scan_leaf->data_len; 614 dedup_ref_size += scan_leaf->data_len; 615 return (0); 616 } 617 618 static int 619 upgrade_chksum(hammer_btree_leaf_elm_t leaf, u_int8_t *sha_hash) 620 { 621 struct hammer_ioc_data data; 622 char *buf = malloc(DEDUP_BUF); 623 SHA256_CTX ctx; 624 int error; 625 626 bzero(&data, sizeof(data)); 627 data.elm = leaf->base; 628 data.ubuf = buf; 629 data.size = DEDUP_BUF; 630 631 error = 0; 632 if (ioctl(glob_fd, HAMMERIOC_GET_DATA, &data) < 0) { 633 fprintf(stderr, "Get-data failed: %s\n", strerror(errno)); 634 error = 1; 635 goto done; 636 } 637 638 if (data.leaf.data_len != leaf->data_len) { 639 error = 1; 640 goto done; 641 } 642 643 if (data.leaf.base.btype == HAMMER_BTREE_TYPE_RECORD && 644 data.leaf.base.rec_type == HAMMER_RECTYPE_DATA) { 645 SHA256_Init(&ctx); 646 SHA256_Update(&ctx, (void *)buf, data.leaf.data_len); 647 SHA256_Final(sha_hash, &ctx); 648 } 649 650 done: 651 free(buf); 652 return (error); 653 } 654 655 static void 656 scan_pfs(char *filesystem, scan_pfs_cb_t func) 657 { 658 struct hammer_ioc_mirror_rw mirror; 659 hammer_ioc_mrecord_any_t mrec; 660 struct hammer_btree_leaf_elm elm; 661 char *buf = malloc(DEDUP_BUF); 662 int offset, bytes; 663 664 bzero(&mirror, sizeof(mirror)); 665 hammer_key_beg_init(&mirror.key_beg); 666 hammer_key_end_init(&mirror.key_end); 667 668 mirror.tid_beg = glob_pfs.ondisk->sync_beg_tid; 669 mirror.tid_end = glob_pfs.ondisk->sync_end_tid; 670 mirror.head.flags |= HAMMER_IOC_MIRROR_NODATA; /* we want only keys */ 671 mirror.ubuf = buf; 672 mirror.size = DEDUP_BUF; 673 mirror.pfs_id = glob_pfs.pfs_id; 674 mirror.shared_uuid = glob_pfs.ondisk->shared_uuid; 675 676 printf("%s: objspace %016jx:%04x %016jx:%04x pfs_id %d\n", 677 filesystem, 678 (uintmax_t)mirror.key_beg.obj_id, 679 mirror.key_beg.localization, 680 (uintmax_t)mirror.key_end.obj_id, 681 mirror.key_end.localization, 682 glob_pfs.pfs_id); 683 fflush(stdout); 684 685 do { 686 mirror.count = 0; 687 mirror.pfs_id = glob_pfs.pfs_id; 688 mirror.shared_uuid = glob_pfs.ondisk->shared_uuid; 689 if (ioctl(glob_fd, HAMMERIOC_MIRROR_READ, &mirror) < 0) { 690 fprintf(stderr, "Mirror-read %s failed: %s\n", 691 filesystem, strerror(errno)); 692 exit(1); 693 } 694 if (mirror.head.flags & HAMMER_IOC_HEAD_ERROR) { 695 fprintf(stderr, "Mirror-read %s fatal error %d\n", 696 filesystem, mirror.head.error); 697 exit(1); 698 } 699 if (mirror.count) { 700 offset = 0; 701 while (offset < mirror.count) { 702 mrec = (void *)((char *)buf + offset); 703 bytes = HAMMER_HEAD_DOALIGN(mrec->head.rec_size); 704 if (offset + bytes > mirror.count) { 705 fprintf(stderr, "Misaligned record\n"); 706 exit(1); 707 } 708 assert((mrec->head.type & 709 HAMMER_MRECF_TYPE_MASK) == HAMMER_MREC_TYPE_REC); 710 711 elm = mrec->rec.leaf; 712 if (elm.base.btype == HAMMER_BTREE_TYPE_RECORD && 713 elm.base.rec_type == HAMMER_RECTYPE_DATA) { 714 func(&elm, 0); 715 } 716 offset += bytes; 717 } 718 } 719 mirror.key_beg = mirror.key_cur; 720 } while (mirror.count != 0); 721 722 free(buf); 723 } 724 725 static void 726 dump_simulated_dedup(void) 727 { 728 struct sim_dedup_entry *sim_de; 729 730 printf("=== Dumping simulated dedup entries:\n"); 731 RB_FOREACH(sim_de, sim_dedup_entry_rb_tree, &sim_dedup_tree) { 732 printf("\tcrc=%08x cnt=%ju size=%ju\n", 733 sim_de->crc, 734 (intmax_t)sim_de->ref_blks, 735 (intmax_t)sim_de->ref_size); 736 } 737 printf("end of dump ===\n"); 738 } 739 740 static void 741 dump_real_dedup(void) 742 { 743 struct dedup_entry *de; 744 struct sha_dedup_entry *sha_de; 745 int i; 746 747 printf("=== Dumping dedup entries:\n"); 748 RB_FOREACH(de, dedup_entry_rb_tree, &dedup_tree) { 749 if (de->flags & HAMMER_DEDUP_ENTRY_FICTITIOUS) { 750 printf("\tcrc=%08x fictitious\n", de->leaf.data_crc); 751 752 RB_FOREACH(sha_de, sha_dedup_entry_rb_tree, &de->u.fict_root) { 753 printf("\t\tcrc=%08x cnt=%ju size=%ju\n\t" 754 "\t\tsha=", 755 sha_de->leaf.data_crc, 756 (intmax_t)sha_de->ref_blks, 757 (intmax_t)sha_de->ref_size); 758 for (i = 0; i < SHA256_DIGEST_LENGTH; ++i) 759 printf("%02x", sha_de->sha_hash[i]); 760 printf("\n"); 761 } 762 } else { 763 printf("\tcrc=%08x cnt=%ju size=%ju\n", 764 de->leaf.data_crc, 765 (intmax_t)de->u.de.ref_blks, 766 (intmax_t)de->u.de.ref_size); 767 } 768 } 769 printf("end of dump ===\n"); 770 } 771 772 static void 773 dedup_usage(int code) 774 { 775 fprintf(stderr, 776 "hammer dedup-simulate <filesystem>\n" 777 "hammer dedup <filesystem>\n" 778 ); 779 exit(code); 780 } 781