1 /* 2 * Copyright (c) 2023 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * hammer2 recover <devpath> <path> <destdir> 37 * 38 * Recover files from corrupted media, recover deleted files from good 39 * media, generally ignoring the data structure topology outside of the 40 * structures that hang off of inodes and directory entries. Files are 41 * validated during recovery and renamed to .corrupted when a version of 42 * a file cannot be completely recovered. 43 * 44 * This is a "try to get everything we can out of the filesystem" 45 * directive when you've done something terrible to the filesystem. 46 * The resulting <destdir> tree cannot directly replace the lost topology 47 * as many versions of the same file will be present, so filenames are 48 * suffixed. 49 * 50 * <path> may be a relative file, directory, directory path, or absolute 51 * (absolute is relative to the mount) file or directory path. 52 * 53 * For example, "hammer2 recover /dev/da0s1d /home/charlie /tmp/" will 54 * recover all possible versions of the /home/charlie sub-tree. If 55 * you said "home/charlie" instead, then it would recover the same but 56 * also include any sub-directory paths that match home/charlie, not 57 * just root paths. If you want a specific file, then e.g. ".cshrc" 58 * would recover every single .cshrc that can be found on the media. 59 * 60 * The command checks ALL PFSs and snapshots. Redundant files with the same 61 * path are ignored. You basically get everything that can possibly be 62 * recovered from the media. 63 */ 64 #include "hammer2.h" 65 66 #define HTABLE_SIZE (4*1024*1024) 67 #define HTABLE_MASK (HTABLE_SIZE - 1) 68 #define DISPMODULO (HTABLE_SIZE / 32768) 69 70 #include <openssl/sha.h> 71 72 typedef struct dirent_entry { 73 struct dirent_entry *next; 74 char *filename; 75 size_t len; 76 hammer2_key_t inum; 77 hammer2_off_t bref_off; 78 } dirent_entry_t; 79 80 typedef struct inode_entry { 81 struct inode_entry *next; 82 struct inode_entry *next2; /* secondary (ino ^ data_off) hash */ 83 hammer2_off_t data_off; 84 hammer2_key_t inum; /* from bref or inode meta */ 85 //hammer2_inode_data_t inode; /* (removed, too expensive) */ 86 char *link_file_path; 87 uint32_t inode_crc; 88 uint8_t type; /* from inode meta */ 89 uint8_t encountered; /* copies limit w/REPINODEDEPTH */ 90 uint8_t loopcheck; /* recursion loop check */ 91 uint8_t unused03; 92 } inode_entry_t; 93 94 typedef struct topology_entry { 95 struct topology_entry *next; 96 char *path; 97 long iterator; 98 } topology_entry_t; 99 100 typedef struct neg_entry { 101 struct neg_entry *next; 102 hammer2_blockref_t bref; 103 } neg_entry_t; 104 105 typedef struct topo_bref_entry { 106 struct topo_bref_entry *next; 107 topology_entry_t *topo; 108 hammer2_off_t data_off; 109 } topo_bref_entry_t; 110 111 typedef struct topo_inode_entry { 112 struct topo_inode_entry *next; 113 topology_entry_t *topo; 114 inode_entry_t *iscan; 115 } topo_inode_entry_t; 116 117 static dirent_entry_t **DirHash; 118 static inode_entry_t **InodeHash; 119 static inode_entry_t **InodeHash2; /* secondary multi-variable hash */ 120 static topology_entry_t **TopologyHash; 121 static neg_entry_t **NegativeHash; 122 static topo_bref_entry_t **TopoBRefHash; 123 static topo_inode_entry_t **TopoInodeHash; 124 125 /*static void resolve_topology(void);*/ 126 static int check_filename(hammer2_blockref_t *bref, 127 const char *filename, char *buf, size_t flen); 128 //static void enter_dirent(hammer2_blockref_t *bref, hammer2_off_t loff, 129 // const char *filename, size_t flen); 130 static topology_entry_t *enter_topology(const char *path); 131 static int topology_check_duplicate_inode(topology_entry_t *topo, 132 inode_entry_t *iscan); 133 static int topology_check_duplicate_indirect(topology_entry_t *topo, 134 hammer2_blockref_t *bref); 135 static void enter_inode(hammer2_blockref_t *bref); 136 static void enter_inode_untested(hammer2_inode_data_t *ip, hammer2_off_t loff); 137 static inode_entry_t *find_first_inode(hammer2_key_t inum); 138 /*static dirent_entry_t *find_first_dirent(hammer2_key_t inum);*/ 139 static int find_neg(hammer2_blockref_t *bref); 140 static void enter_neg(hammer2_blockref_t *bref); 141 static void dump_tree(inode_entry_t *iscan, const char *dest, 142 const char *remain, int depth, int path_depth, 143 int isafile); 144 static int dump_inum_file(inode_entry_t *iscan, hammer2_inode_data_t *inode, 145 const char *path); 146 static int dump_inum_softlink(hammer2_inode_data_t *inode, const char *path); 147 static int dump_dir_data(const char *dest, const char *remain, 148 hammer2_blockref_t *base, int count, 149 int depth, int path_depth, int isafile); 150 static int dump_file_data(int wfd, hammer2_off_t fsize, 151 hammer2_blockref_t *bref, int count); 152 static int validate_crc(hammer2_blockref_t *bref, void *data, size_t bytes); 153 static uint32_t hammer2_to_unix_xid(const uuid_t *uuid); 154 static void *hammer2_cache_read(hammer2_off_t data_off, size_t *bytesp); 155 156 static long InodeCount; 157 static long TopoBRefCount; 158 static long TopoBRefDupCount; 159 static long TopoInodeCount; 160 static long TopoInodeDupCount; 161 static long NegativeCount; 162 static long NegativeHits; 163 static long MediaBytes; 164 static int StrictMode = 1; 165 166 #define INODES_PER_BLOCK \ 167 (sizeof(union hammer2_media_data) / sizeof(hammer2_inode_data_t)) 168 #define REPINODEDEPTH 256 169 170 /* 171 * Recover the specified file. 172 * 173 * Basically do a raw scan of the drive image looking for directory entries 174 * and inodes. Index all inodes found, including copies, and filter 175 * directory entries for the requested filename to locate inode numbers. 176 * 177 * All copies that are located are written to destdir with a suffix .00001, 178 * .00002, etc. 179 */ 180 int 181 cmd_recover(const char *devpath, const char *pathname, 182 const char *destdir, int strict, int isafile) 183 { 184 hammer2_media_data_t data; 185 hammer2_volume_t *vol; 186 hammer2_off_t loff; 187 hammer2_off_t poff; 188 size_t i; 189 190 StrictMode = strict; 191 hammer2_init_volumes(devpath, 1); 192 DirHash = calloc(HTABLE_SIZE, sizeof(dirent_entry_t *)); 193 InodeHash = calloc(HTABLE_SIZE, sizeof(inode_entry_t *)); 194 InodeHash2 = calloc(HTABLE_SIZE, sizeof(inode_entry_t *)); 195 TopologyHash = calloc(HTABLE_SIZE, sizeof(topology_entry_t *)); 196 NegativeHash = calloc(HTABLE_SIZE, sizeof(neg_entry_t *)); 197 TopoBRefHash = calloc(HTABLE_SIZE, sizeof(topo_bref_entry_t *)); 198 TopoInodeHash = calloc(HTABLE_SIZE, sizeof(topo_inode_entry_t *)); 199 200 /* 201 * Media Pass 202 * 203 * Look for blockrefs that point to inodes. The blockrefs could 204 * be bogus since we aren't validating them, but the combination 205 * of a CRC that matches the inode content is fairly robust in 206 * finding actual inodes. 207 * 208 * We also enter unvalidated inodes for inode #1 (PFS roots), 209 * because there might not be any blockrefs pointing to some of 210 * them. We need these to be able to locate directory entries 211 * under the roots. 212 * 213 * At the moment we do not try to enter unvalidated directory 214 * entries, since this will result in a massive number of false 215 * hits 216 */ 217 printf("MEDIA PASS\n"); 218 219 loff = 0; 220 while ((vol = hammer2_get_volume(loff)) != NULL) { 221 int fd; 222 int xdisp; 223 hammer2_off_t vol_size; 224 225 fd = vol->fd; 226 poff = loff - vol->offset; 227 vol_size = lseek(fd, 0L, SEEK_END); 228 xdisp = 0; 229 230 while (poff < vol->size) { 231 if (pread(fd, &data, sizeof(data), poff) != 232 sizeof(data)) 233 { 234 /* try to skip possible I/O error */ 235 poff += sizeof(data); 236 continue; 237 } 238 for (i = 0; i < HAMMER2_IND_COUNT_MAX; ++i) { 239 hammer2_blockref_t *bref; 240 // char filename_buf[1024+1]; 241 242 bref = &data.npdata[i]; 243 244 /* 245 * Found a possible inode 246 */ 247 switch(bref->type) { 248 case HAMMER2_BREF_TYPE_INODE: 249 /* 250 * Note: preliminary bref filter 251 * is inside enter_inode(). 252 */ 253 enter_inode(bref); 254 break; 255 case HAMMER2_BREF_TYPE_DIRENT: 256 /* 257 * Go overboard and try to index 258 * anything that looks like a 259 * directory entry. This might find 260 * entries whos inodes are no longer 261 * available, but will also generate 262 * a lot of false files. 263 */ 264 #if 0 265 if (filename && 266 flen != bref->embed.dirent.namlen) 267 { 268 /* single-file shortcut */ 269 break; 270 } 271 if (check_filename(bref, filename, 272 filename_buf, 273 bref->embed.dirent.namlen)) 274 { 275 enter_dirent(bref, 276 poff + vol->offset + 277 (i * 278 sizeof(hammer2_blockref_t)), 279 filename_buf, 280 bref->embed.dirent.namlen); 281 } 282 #endif 283 break; 284 default: 285 break; 286 } 287 } 288 289 /* 290 * Look for possible root inodes. We generally can't 291 * find these by finding BREFs pointing to them because 292 * the BREFs often hang off the volume header. 293 * 294 * These "inodes" could be seriously corrupt, but if 295 * the bref tree is intact that is what we need to 296 * get top-level directory entries. 297 */ 298 for (i = 0; i < INODES_PER_BLOCK; ++i) { 299 hammer2_inode_data_t *ip; 300 301 ip = (void *)(data.buf + i * sizeof(*ip)); 302 if (ip->meta.inum == 1 && 303 ip->meta.iparent == 0 && 304 ip->meta.type == 305 HAMMER2_OBJTYPE_DIRECTORY && 306 ip->meta.op_flags & HAMMER2_OPFLAG_PFSROOT) 307 { 308 enter_inode_untested(ip, 309 poff + vol->offset + 310 (i * sizeof(*ip))); 311 } 312 } 313 poff += sizeof(data); 314 315 MediaBytes += sizeof(data); 316 317 /* 318 * Update progress 319 */ 320 if (QuietOpt == 0 && 321 (++xdisp == DISPMODULO || 322 poff == vol->size - sizeof(data))) 323 { 324 xdisp = 0; 325 printf("%ld inodes scanned, " 326 "media %6.2f/%-3.2fG\r", 327 InodeCount, 328 MediaBytes / 1e9, 329 vol_size / 1e9); 330 fflush(stdout); 331 } 332 } 333 loff = vol->offset + vol->size; 334 } 335 336 /* 337 * Restoration Pass 338 * 339 * Run through available directory inodes, which allows us to locate 340 * and validate (crc check) their directory entry blockrefs and 341 * construct absolute or relative paths through a recursion. 342 * 343 * When an absolute path is obtained the search is anchored on a 344 * root inode. When a relative path is obtained the search is 345 * unanchored and will find all matching sub-paths. For example, 346 * if you look for ".cshrc" it will find ALL .cshrc's. If you 347 * look for "fubar/.cshsrc" it will find ALL .cshrc's residing 348 * in a directory called fubar, however many there are. But if 349 * you look for "/fubar/srcs" it will only find the sub-tree 350 * "/fubar/srcs" relative to PFS roots. 351 * 352 * We may not have indexed the PFS roots themselves, because they 353 * often hang off of the volume header and might not have COW'd 354 * references to them, so we use the "iparent" field in the inode 355 * to detect top-level directories under those roots. 356 */ 357 printf("\nInodes=%ld, Invalid_brefs=%ld, Invalid_hits=%ld\n", 358 InodeCount, NegativeCount, NegativeHits); 359 printf("RESTORATION PASS\n"); 360 361 { 362 int abspath = 0; 363 long root_count = 0; 364 long root_max = 0; 365 366 /* 367 * Check for absolute path, else relative 368 */ 369 if (pathname[0] == '/') { 370 abspath = 1; 371 while (*pathname == '/') 372 ++pathname; 373 } 374 375 /* 376 * Count root inodes 377 */ 378 { 379 inode_entry_t *iscan; 380 381 for (iscan = InodeHash[1]; 382 iscan; 383 iscan = iscan->next) 384 { 385 if (iscan->inum == 1) 386 ++root_max; 387 } 388 } 389 390 /* 391 * Run through all directory inodes to locate validated 392 * directory entries. If an absolute path was specified 393 * we start at root inodes. 394 */ 395 for (i = 0; i < HTABLE_SIZE; ++i) { 396 inode_entry_t *iscan; 397 398 for (iscan = InodeHash[i]; 399 iscan; 400 iscan = iscan->next) 401 { 402 /* 403 * Absolute paths always start at root inodes, 404 * otherwise we can start at any directory 405 * inode. 406 */ 407 if (abspath && iscan->inum != 1) 408 continue; 409 if (iscan->type != HAMMER2_OBJTYPE_DIRECTORY) 410 continue; 411 412 /* 413 * Progress down root inodes can be slow, 414 * so print progress for each root inode. 415 */ 416 if (i == 1 && iscan->inum == 1 && 417 QuietOpt == 0) 418 { 419 printf("scan roots %p 0x%016jx " 420 "(count %ld/%ld)\r", 421 iscan, 422 iscan->data_off, 423 ++root_count, root_max); 424 fflush(stdout); 425 } 426 427 /* 428 * Primary match/recover recursion 429 */ 430 dump_tree(iscan, destdir, pathname, 431 1, 1, isafile); 432 } 433 if (QuietOpt == 0 && 434 (i & (DISPMODULO - 1)) == DISPMODULO - 1) 435 { 436 if (i == DISPMODULO - 1) 437 printf("\n"); 438 printf("Progress %zd/%d\r", i, HTABLE_SIZE); 439 fflush(stdout); 440 } 441 } 442 printf("\n"); 443 } 444 445 printf("CLEANUP\n"); 446 printf("TopoBRef stats: count=%ld dups=%ld\n", 447 TopoBRefCount, TopoBRefDupCount); 448 printf("TopoInode stats: count=%ld dups=%ld\n", 449 TopoInodeCount, TopoInodeDupCount); 450 451 /* 452 * Cleanup 453 */ 454 hammer2_cleanup_volumes(); 455 456 for (i = 0; i < HTABLE_SIZE; ++i) { 457 dirent_entry_t *dscan; 458 inode_entry_t *iscan; 459 topology_entry_t *top_scan; 460 neg_entry_t *negscan; 461 topo_bref_entry_t *topo_bref; 462 topo_inode_entry_t *topo_inode; 463 464 while ((dscan = DirHash[i]) != NULL) { 465 DirHash[i] = dscan->next; 466 free(dscan->filename); 467 free(dscan); 468 } 469 470 while ((iscan = InodeHash[i]) != NULL) { 471 InodeHash[i] = iscan->next; 472 free(iscan); 473 } 474 /* InodeHash2[] indexes the same structures */ 475 476 while ((top_scan = TopologyHash[i]) != NULL) { 477 TopologyHash[i] = top_scan->next; 478 free(top_scan->path); 479 free(top_scan); 480 } 481 482 while ((negscan = NegativeHash[i]) != NULL) { 483 NegativeHash[i] = negscan->next; 484 free(negscan); 485 } 486 487 while ((topo_bref = TopoBRefHash[i]) != NULL) { 488 TopoBRefHash[i] = topo_bref->next; 489 free(topo_bref); 490 } 491 492 while ((topo_inode = TopoInodeHash[i]) != NULL) { 493 TopoInodeHash[i] = topo_inode->next; 494 free(topo_inode); 495 } 496 } 497 free(TopoInodeHash); 498 free(TopoBRefHash); 499 free(NegativeHash); 500 free(TopologyHash); 501 free(DirHash); 502 free(InodeHash); 503 free(InodeHash2); 504 505 return 0; 506 } 507 508 /* 509 * Check for a matching filename, Directory entries can directly-embed 510 * filenames <= 64 bytes. Otherwise the directory entry has a data 511 * reference to the location of the filename. 512 * 513 * If filename is NULL, check for a valid filename, and copy it into buf. 514 */ 515 static int 516 check_filename(hammer2_blockref_t *bref, const char *filename, char *buf, 517 size_t flen) 518 { 519 /* filename too long */ 520 if (flen > 1024) 521 return 0; 522 523 if (flen <= 64) { 524 /* 525 * Filename is embedded in bref 526 */ 527 if (buf) 528 bcopy(bref->check.buf, buf, flen); 529 buf[flen] = 0; 530 if (filename == NULL) 531 return 1; 532 if (bcmp(filename, bref->check.buf, flen) == 0) 533 return 1; 534 } else { 535 /* 536 * Filename requires media access 537 */ 538 hammer2_media_data_t data; 539 hammer2_volume_t *vol; 540 hammer2_off_t poff; 541 hammer2_off_t psize; 542 int vfd; 543 544 /* 545 * bref must represent a data reference to a 1KB block or 546 * smaller. 547 */ 548 if ((bref->data_off & 0x1F) == 0 || 549 (bref->data_off & 0x1F) > 10) 550 { 551 return 0; 552 } 553 554 /* 555 * Indirect block containing filename must be large enough 556 * to contain the filename. 557 */ 558 psize = 1 << (bref->data_off & 0x1F); 559 if (flen > psize) 560 return 0; 561 562 /* 563 * In strict mode we disallow bref's set to HAMMER2_CHECK_NONE 564 * or HAMMER2_CHECK_DISABLED. Do this check before burning 565 * time on an I/O. 566 */ 567 if (StrictMode) { 568 if (HAMMER2_DEC_CHECK(bref->methods) == 569 HAMMER2_CHECK_NONE || 570 HAMMER2_DEC_CHECK(bref->methods) == 571 HAMMER2_CHECK_DISABLED) 572 { 573 return 0; 574 } 575 } 576 577 /* 578 * Read the data, check CRC and such 579 */ 580 vol = hammer2_get_volume(bref->data_off); 581 if (vol == NULL) 582 return 0; 583 584 vfd = vol->fd; 585 poff = (bref->data_off - vol->offset) & ~0x1FL; 586 if (pread(vfd, &data, psize, poff) != (ssize_t)psize) 587 return 0; 588 589 if (validate_crc(bref, &data, psize) == 0) 590 return 0; 591 592 if (buf) 593 bcopy(data.buf, buf, flen); 594 buf[flen] = 0; 595 if (filename == NULL) 596 return 1; 597 if (bcmp(filename, data.buf, flen) == 0) 598 return 1; 599 } 600 return 0; 601 } 602 603 #if 0 604 static void 605 enter_dirent(hammer2_blockref_t *bref, hammer2_off_t loff, 606 const char *filename, size_t flen) 607 { 608 hammer2_key_t inum = bref->embed.dirent.inum; 609 dirent_entry_t *entry; 610 uint32_t hv = (inum ^ (inum >> 16)) & HTABLE_MASK; 611 612 for (entry = DirHash[hv]; entry; entry = entry->next) { 613 if (entry->inum == inum && 614 entry->len == flen && 615 bcmp(entry->filename, filename, flen) == 0) 616 { 617 return; 618 } 619 } 620 entry = malloc(sizeof(*entry)); 621 bzero(entry, sizeof(*entry)); 622 entry->inum = inum; 623 entry->next = DirHash[hv]; 624 entry->filename = malloc(flen + 1); 625 entry->len = flen; 626 entry->bref_off = loff; 627 bcopy(filename, entry->filename, flen); 628 entry->filename[flen] = 0; 629 DirHash[hv] = entry; 630 } 631 #endif 632 633 /* 634 * Topology duplicate scan avoidance helpers. We associate inodes and 635 * indirect block data offsets, allowing us to avoid re-scanning any 636 * duplicates that we see. And there will be many due to how the COW 637 * process occurs. 638 * 639 * For example, when a large directory is modified the content update to 640 * the directory entries will cause the directory inode to be COWd, along 641 * with whatever is holding the bref(s) blocks that have undergone 642 * adjustment. More likely than not, there will be MANY shared indirect 643 * blocks. 644 */ 645 static topology_entry_t * 646 enter_topology(const char *path) 647 { 648 topology_entry_t *topo; 649 uint32_t hv = 0; 650 size_t i; 651 652 for (i = 0; path[i]; ++i) 653 hv = (hv << 5) ^ path[i] ^ (hv >> 24); 654 hv = (hv ^ (hv >> 16)) & HTABLE_MASK; 655 for (topo = TopologyHash[hv]; topo; topo = topo->next) { 656 if (strcmp(path, topo->path) == 0) 657 return topo; 658 } 659 topo = malloc(sizeof(*topo)); 660 bzero(topo, sizeof(*topo)); 661 662 topo->next = TopologyHash[hv]; 663 TopologyHash[hv] = topo; 664 topo->path = strdup(path); 665 topo->iterator = 1; 666 667 return topo; 668 } 669 670 /* 671 * Determine if an inode at the current topology location is one that we 672 * have already dealt with. 673 */ 674 static int 675 topology_check_duplicate_inode(topology_entry_t *topo, inode_entry_t *iscan) 676 { 677 int hv = (((intptr_t)topo ^ (intptr_t)iscan) >> 6) & HTABLE_MASK; 678 topo_inode_entry_t *scan; 679 680 for (scan = TopoInodeHash[hv]; scan; scan = scan->next) { 681 if (scan->topo == topo && 682 scan->iscan == iscan) 683 { 684 ++TopoInodeDupCount; 685 return 1; 686 } 687 } 688 scan = malloc(sizeof(*scan)); 689 bzero(scan, sizeof(*scan)); 690 scan->iscan = iscan; 691 scan->topo = topo; 692 scan->next = TopoInodeHash[hv]; 693 TopoInodeHash[hv] = scan; 694 ++TopoInodeCount; 695 696 return 0; 697 } 698 699 /* 700 * Determine if an indirect block (represented by the bref) at the current 701 * topology level is one that we have already dealt with. 702 */ 703 static int 704 topology_check_duplicate_indirect(topology_entry_t *topo, 705 hammer2_blockref_t *bref) 706 { 707 int hv = ((intptr_t)topo ^ (bref->data_off >> 8)) & HTABLE_MASK; 708 topo_bref_entry_t *scan; 709 710 for (scan = TopoBRefHash[hv]; scan; scan = scan->next) { 711 if (scan->topo == topo && 712 scan->data_off == bref->data_off) 713 { 714 ++TopoBRefDupCount; 715 return 1; 716 } 717 } 718 scan = malloc(sizeof(*scan)); 719 bzero(scan, sizeof(*scan)); 720 scan->data_off = bref->data_off; 721 scan->topo = topo; 722 scan->next = TopoBRefHash[hv]; 723 TopoBRefHash[hv] = scan; 724 ++TopoBRefCount; 725 726 return 0; 727 } 728 729 /* 730 * Valid and record an inode found on media. There can be many versions 731 * of the same inode number present on the media. 732 */ 733 static void 734 enter_inode(hammer2_blockref_t *bref) 735 { 736 uint32_t hv; 737 uint32_t hv2; 738 inode_entry_t *scan; 739 hammer2_inode_data_t *inode; 740 size_t psize; 741 742 hv = (bref->key ^ (bref->key >> 16)) & HTABLE_MASK; 743 hv2 = (bref->key ^ (bref->key >> 16) ^ (bref->data_off >> 10)) & 744 HTABLE_MASK; 745 746 /* 747 * Ignore duplicate inodes, use the secondary inode hash table's 748 * better spread to reduce cpu consumption (there can be many 749 * copies of the same inode so the primary hash table can have 750 * very long chains in it). 751 */ 752 for (scan = InodeHash2[hv2]; scan; scan = scan->next2) { 753 if (bref->key == scan->inum && 754 bref->data_off == scan->data_off) 755 { 756 return; 757 } 758 } 759 760 /* 761 * Ignore brefs which we have already determined to be bad 762 */ 763 if (find_neg(bref)) 764 return; 765 766 /* 767 * Validate the potential blockref. Note that this might not be a 768 * real blockref. Don't trust anything, really. 769 * 770 * - Must be sized for an inode block 771 * - Must be properly aligned for an inode block 772 * - Keyspace is 1 (keybits == 0), i.e. a single inode number 773 */ 774 if ((1 << (bref->data_off & 0x1F)) != sizeof(*inode)) 775 return; 776 if ((bref->data_off & ~0x1FL & (sizeof(*inode) - 1)) != 0) 777 return; 778 if (bref->keybits != 0) 779 return; 780 if (bref->key == 0) 781 return; 782 783 inode = hammer2_cache_read(bref->data_off, &psize); 784 785 /* 786 * Failure prior to I/O being performed. 787 */ 788 if (psize == 0) 789 return; 790 791 /* 792 * Any failures which occur after the I/O has been performed 793 * should enter the bref in the negative cache to avoid unnecessary 794 * guaranteed-to-fil reissuances of the same (bref, data_off) combo. 795 */ 796 if (inode == NULL) { 797 fail: 798 enter_neg(bref); 799 return; 800 } 801 802 /* 803 * The blockref looks ok but the real test is whether the 804 * inode data it references passes the CRC check. If it 805 * does, it is highly likely that we have a valid inode. 806 */ 807 if (validate_crc(bref, inode, sizeof(*inode)) == 0) 808 goto fail; 809 if (inode->meta.inum != bref->key) 810 goto fail; 811 812 /* 813 * Record the inode. For now we do not record the actual content 814 * of the inode because if there are more than few million of them 815 * the memory consumption can get into the dozens of gigabytes. 816 * 817 * Instead, the inode will be re-read from media in the recovery 818 * pass. 819 */ 820 scan = malloc(sizeof(*scan)); 821 bzero(scan, sizeof(*scan)); 822 823 scan->inum = bref->key; 824 scan->type = inode->meta.type; 825 scan->data_off = bref->data_off; 826 scan->inode_crc = hammer2_icrc32(inode, sizeof(*inode)); 827 //scan->inode = *inode; /* removed, too expensive */ 828 829 scan->next = InodeHash[hv]; 830 InodeHash[hv] = scan; 831 scan->next2 = InodeHash2[hv2]; 832 InodeHash2[hv2] = scan; 833 834 ++InodeCount; 835 } 836 837 /* 838 * This is used to enter possible root inodes. Root inodes typically hang 839 * off the volume root and thus there might not be a bref reference to the 840 * many old copies of root inodes sitting around on the media. Without a 841 * bref we can't really validate that the content is ok. But we need 842 * these inodes as part of our path searches. 843 */ 844 static void 845 enter_inode_untested(hammer2_inode_data_t *ip, hammer2_off_t loff) 846 { 847 uint32_t hv; 848 uint32_t hv2; 849 inode_entry_t *scan; 850 851 hv = (ip->meta.inum ^ (ip->meta.inum >> 16)) & HTABLE_MASK; 852 hv2 = (ip->meta.inum ^ (ip->meta.inum >> 16) ^ (loff >> 10)) & 853 HTABLE_MASK; 854 855 for (scan = InodeHash2[hv2]; scan; scan = scan->next2) { 856 if (ip->meta.inum == scan->inum && 857 loff == scan->data_off) 858 { 859 return; 860 } 861 } 862 863 /* 864 * Record the inode. For now we do not record the actual content 865 * of the inode because if there are more than few million of them 866 * the memory consumption can get into the dozens of gigabytes. 867 * 868 * Instead, the inode will be re-read from media in the recovery 869 * pass. 870 */ 871 scan = malloc(sizeof(*scan)); 872 bzero(scan, sizeof(*scan)); 873 874 scan->inum = ip->meta.inum; 875 scan->type = ip->meta.type; 876 scan->data_off = loff; 877 scan->inode_crc = hammer2_icrc32(ip, sizeof(*ip)); 878 //scan->inode = *ip; /* removed, too expensive */ 879 880 scan->next = InodeHash[hv]; 881 InodeHash[hv] = scan; 882 scan->next2 = InodeHash2[hv2]; 883 InodeHash2[hv2] = scan; 884 885 ++InodeCount; 886 } 887 888 static inode_entry_t * 889 find_first_inode(hammer2_key_t inum) 890 { 891 inode_entry_t *entry; 892 uint32_t hv = (inum ^ (inum >> 16)) & HTABLE_MASK; 893 894 for (entry = InodeHash[hv]; entry; entry = entry->next) { 895 if (entry->inum == inum) 896 return entry; 897 } 898 return NULL; 899 } 900 901 /* 902 * Negative bref cache. A cache of brefs that we have determined 903 * to be invalid. Used to reduce unnecessary disk I/O. 904 * 905 * NOTE: Checks must be reasonable and at least encompass checks 906 * done in enter_inode() after it has decided to read the 907 * block at data_off. 908 * 909 * Adding a few more match fields in addition won't hurt either. 910 */ 911 static int 912 find_neg(hammer2_blockref_t *bref) 913 { 914 int hv = (bref->data_off >> 10) & HTABLE_MASK; 915 neg_entry_t *neg; 916 917 for (neg = NegativeHash[hv]; neg; neg = neg->next) { 918 if (bref->data_off == neg->bref.data_off && 919 bref->type == neg->bref.type && 920 bref->methods == neg->bref.methods && 921 bref->key == neg->bref.key && 922 bcmp(&bref->check, &neg->bref.check, 923 sizeof(bref->check)) == 0) 924 { 925 ++NegativeHits; 926 return 1; 927 } 928 } 929 return 0; 930 } 931 932 static void 933 enter_neg(hammer2_blockref_t *bref) 934 { 935 int hv = (bref->data_off >> 10) & HTABLE_MASK; 936 neg_entry_t *neg; 937 938 neg = malloc(sizeof(*neg)); 939 bzero(neg, sizeof(*neg)); 940 neg->next = NegativeHash[hv]; 941 neg->bref = *bref; 942 NegativeHash[hv] = neg; 943 ++NegativeCount; 944 } 945 946 /* 947 * Dump the specified inode (file or directory) 948 * 949 * This function recurses via dump_dir_data(). 950 */ 951 static void 952 dump_tree(inode_entry_t *iscan, const char *dest, const char *remain, 953 int depth, int path_depth, int isafile) 954 { 955 topology_entry_t *topo; 956 hammer2_inode_data_t *inode; 957 hammer2_inode_data_t inode_copy; 958 struct stat st; 959 size_t psize; 960 char *path; 961 962 /* 963 * Re-read the already-validated inode instead of saving it in 964 * memory from the media pass. Even though we already validated 965 * it, the content may have changed if scanning live media, so 966 * check against a simple crc we recorded earlier. 967 */ 968 inode = hammer2_cache_read(iscan->data_off, &psize); 969 if (psize == 0) 970 return; 971 if (inode == NULL || psize != sizeof(*inode)) 972 return; 973 if (iscan->inode_crc != hammer2_icrc32(inode, sizeof(*inode))) 974 return; 975 inode_copy = *inode; 976 inode = &inode_copy; 977 978 /* 979 * Try to limit potential infinite loops 980 */ 981 if (depth > REPINODEDEPTH && iscan->encountered) 982 return; 983 984 /* 985 * Get rid of any dividing slashes 986 */ 987 while (*remain == '/') 988 ++remain; 989 990 /* 991 * Create/lookup destination path (without the iterator), to acquire 992 * an iterator for different versions of the same file. 993 * 994 * Due to the COW mechanism, a lot of repeated snapshot-like 995 * directories may be encountered so we use the topology tree 996 * to weed out duplicates that attempt to use the same pathname. 997 * 998 * We also don't iterate copies of directories since this would 999 * create a major mess due to the many versions that might be 1000 * laying around. Directories use unextended names. 1001 */ 1002 topo = enter_topology(dest); 1003 if (topology_check_duplicate_inode(topo, iscan)) 1004 return; 1005 1006 switch(iscan->type) { 1007 case HAMMER2_OBJTYPE_DIRECTORY: 1008 /* 1009 * If we have exhausted the path and isafile is TRUE, 1010 * stop. 1011 */ 1012 if (isafile && *remain == 0) 1013 return; 1014 1015 /* 1016 * Create / make usable the target directory. Note that 1017 * it might already exist. 1018 * 1019 * Do not do this for the destination base directory 1020 * (depth 1). 1021 */ 1022 if (depth != 1) { 1023 if (mkdir(dest, 0755) == 0) 1024 iscan->encountered = 1; 1025 if (stat(dest, &st) == 0) { 1026 if (st.st_flags) 1027 chflags(dest, 0); 1028 if ((st.st_mode & 0700) != 0700) 1029 chmod(dest, 0755); 1030 } 1031 } 1032 1033 /* 1034 * Dump directory contents (scan the directory) 1035 */ 1036 (void)dump_dir_data(dest, remain, 1037 &inode->u.blockset.blockref[0], 1038 HAMMER2_SET_COUNT, depth, path_depth + 1, 1039 isafile); 1040 1041 /* 1042 * Final adjustment to directory inode 1043 */ 1044 if (depth != 1) { 1045 struct timeval tvs[2]; 1046 1047 tvs[0].tv_sec = inode->meta.atime / 1000000; 1048 tvs[0].tv_usec = inode->meta.atime % 1000000; 1049 tvs[1].tv_sec = inode->meta.mtime / 1000000; 1050 tvs[1].tv_usec = inode->meta.mtime % 1000000; 1051 1052 if (lutimes(dest, tvs) < 0) 1053 perror("futimes"); 1054 lchown(dest, 1055 hammer2_to_unix_xid(&inode->meta.uid), 1056 hammer2_to_unix_xid(&inode->meta.gid)); 1057 1058 lchmod(dest, inode->meta.mode); 1059 lchflags(dest, inode->meta.uflags); 1060 } 1061 break; 1062 case HAMMER2_OBJTYPE_REGFILE: 1063 /* 1064 * If no more path to match, dump the file contents 1065 */ 1066 if (*remain == 0) { 1067 asprintf(&path, "%s.%05ld", dest, topo->iterator); 1068 ++topo->iterator; 1069 1070 if (stat(path, &st) == 0) { 1071 if (st.st_flags) 1072 chflags(path, 0); 1073 if ((st.st_mode & 0600) != 0600) 1074 chmod(path, 0644); 1075 } 1076 iscan->encountered = 1; 1077 (void)dump_inum_file(iscan, inode, path); 1078 free(path); 1079 } 1080 break; 1081 case HAMMER2_OBJTYPE_SOFTLINK: 1082 /* 1083 * If no more path to match, dump the file contents 1084 */ 1085 if (*remain == 0) { 1086 asprintf(&path, "%s.%05ld", dest, topo->iterator); 1087 ++topo->iterator; 1088 1089 if (stat(path, &st) == 0) { 1090 if (st.st_flags) 1091 chflags(path, 0); 1092 if ((st.st_mode & 0600) != 0600) 1093 chmod(path, 0644); 1094 } 1095 (void)dump_inum_softlink(inode, path); 1096 free(path); 1097 } 1098 break; 1099 } 1100 } 1101 1102 /* 1103 * [re]create a regular file and attempt to restore the originanl perms, 1104 * modes, flags, times, uid, and gid if successful. 1105 * 1106 * If the data block recursion fails the file will be renamed 1107 * .corrupted. 1108 */ 1109 static int 1110 dump_inum_file(inode_entry_t *iscan, hammer2_inode_data_t *inode, 1111 const char *path1) 1112 { 1113 char *path2; 1114 int wfd; 1115 int res; 1116 1117 /* 1118 * If this specific inode has already been generated, try to 1119 * hardlink it instead of regenerating the same file again. 1120 */ 1121 if (iscan->link_file_path) { 1122 if (link(iscan->link_file_path, path1) == 0) 1123 return 1; 1124 chflags(iscan->link_file_path, 0); 1125 chmod(iscan->link_file_path, 0600); 1126 if (link(iscan->link_file_path, path1) == 0) { 1127 chmod(iscan->link_file_path, inode->meta.mode); 1128 chflags(iscan->link_file_path, inode->meta.uflags); 1129 return 1; 1130 } 1131 } 1132 1133 /* 1134 * Cleanup potential flags and modes to allow us to write out a 1135 * new file. 1136 */ 1137 chflags(path1, 0); 1138 chmod(path1, 0600); 1139 wfd = open(path1, O_RDWR|O_CREAT|O_TRUNC, 0600); 1140 1141 if (inode->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 1142 /* 1143 * Direct data case 1144 */ 1145 if (inode->meta.size > 0 && 1146 inode->meta.size <= sizeof(inode->u.data)) 1147 { 1148 write(wfd, inode->u.data, inode->meta.size); 1149 } 1150 res = 1; 1151 } else { 1152 /* 1153 * file content, indirect blockrefs 1154 */ 1155 res = dump_file_data(wfd, inode->meta.size, 1156 &inode->u.blockset.blockref[0], 1157 HAMMER2_SET_COUNT); 1158 } 1159 1160 /* 1161 * On success, set perms, mtime, flags, etc 1162 * On failure, rename file to .corrupted 1163 */ 1164 if (res) { 1165 struct timeval tvs[2]; 1166 1167 tvs[0].tv_sec = inode->meta.atime / 1000000; 1168 tvs[0].tv_usec = inode->meta.atime % 1000000; 1169 tvs[1].tv_sec = inode->meta.mtime / 1000000; 1170 tvs[1].tv_usec = inode->meta.mtime % 1000000; 1171 1172 ftruncate(wfd, inode->meta.size); 1173 if (futimes(wfd, tvs) < 0) 1174 perror("futimes"); 1175 fchown(wfd, 1176 hammer2_to_unix_xid(&inode->meta.uid), 1177 hammer2_to_unix_xid(&inode->meta.gid)); 1178 1179 fchmod(wfd, inode->meta.mode); 1180 fchflags(wfd, inode->meta.uflags); 1181 iscan->link_file_path = strdup(path1); 1182 } else { 1183 struct timeval tvs[2]; 1184 1185 tvs[0].tv_sec = inode->meta.atime / 1000000; 1186 tvs[0].tv_usec = inode->meta.atime % 1000000; 1187 tvs[1].tv_sec = inode->meta.mtime / 1000000; 1188 tvs[1].tv_usec = inode->meta.mtime % 1000000; 1189 1190 ftruncate(wfd, inode->meta.size); 1191 if (futimes(wfd, tvs) < 0) 1192 perror("futimes"); 1193 fchown(wfd, 1194 hammer2_to_unix_xid(&inode->meta.uid), 1195 hammer2_to_unix_xid(&inode->meta.gid)); 1196 1197 asprintf(&path2, "%s.corrupted", path1); 1198 rename(path1, path2); 1199 iscan->link_file_path = path2; 1200 } 1201 1202 /* 1203 * Cleanup 1204 */ 1205 close(wfd); 1206 1207 return res; 1208 } 1209 1210 /* 1211 * TODO XXX 1212 */ 1213 static int 1214 dump_inum_softlink(hammer2_inode_data_t *inode __unused, 1215 const char *path __unused) 1216 { 1217 return 0; 1218 } 1219 1220 /* 1221 * Scan the directory for a match against the next component in 1222 * the (remain) path. 1223 * 1224 * This function is part of the dump_tree() recursion mechanism. 1225 */ 1226 static int 1227 dump_dir_data(const char *dest, const char *remain, 1228 hammer2_blockref_t *base, int count, 1229 int depth, int path_depth, int isafile) 1230 { 1231 hammer2_media_data_t data; 1232 hammer2_volume_t *vol; 1233 hammer2_off_t poff; 1234 size_t psize; 1235 int res = 1; 1236 int rtmp; 1237 int n; 1238 size_t flen; 1239 1240 /* 1241 * Calculate length of next path component to match against. 1242 * A length of zero matches against the entire remaining 1243 * directory sub-tree. 1244 */ 1245 for (flen = 0; remain[flen] && remain[flen] != '/'; ++flen) 1246 ; 1247 1248 /* 1249 * Scan the brefs associated with the directory 1250 */ 1251 for (n = 0; n < count; ++n) { 1252 hammer2_blockref_t *bref; 1253 char filename_buf[1024+1]; 1254 topology_entry_t *topo; 1255 1256 bref = &base[n]; 1257 1258 if (bref->type == HAMMER2_BREF_TYPE_EMPTY) 1259 continue; 1260 1261 vol = NULL; 1262 poff = 0; 1263 psize = 0; 1264 if (bref->data_off & 0x1F) { 1265 vol = hammer2_get_volume(bref->data_off); 1266 if (vol == NULL) { 1267 res = 0; 1268 continue; 1269 } 1270 poff = (bref->data_off - vol->offset) & ~0x1FL; 1271 psize = 1 << (bref->data_off & 0x1F); 1272 if (psize > sizeof(data)) { 1273 res = 0; 1274 continue; 1275 } 1276 } 1277 1278 switch(bref->type) { 1279 case HAMMER2_BREF_TYPE_DIRENT: 1280 /* 1281 * Match the path element or match all directory 1282 * entries if we have exhausted (remain). 1283 * 1284 * Locate all matching inodes and continue the 1285 * traversal. 1286 * 1287 * Avoid traversal loops by recording path_depth 1288 * on the way down and clearing it on the way back 1289 * up. 1290 */ 1291 if ((flen == 0 && 1292 check_filename(bref, NULL, filename_buf, 1293 bref->embed.dirent.namlen)) || 1294 (flen && 1295 flen == bref->embed.dirent.namlen && 1296 check_filename(bref, remain, filename_buf, flen))) 1297 { 1298 inode_entry_t *iscan; 1299 hammer2_key_t inum; 1300 char *path; 1301 1302 inum = bref->embed.dirent.inum; 1303 1304 asprintf(&path, "%s/%s", dest, filename_buf); 1305 iscan = find_first_inode(inum); 1306 while (iscan) { 1307 if (iscan->inum == inum && 1308 iscan->loopcheck == 0 && 1309 iscan->type == 1310 bref->embed.dirent.type) 1311 { 1312 iscan->loopcheck = 1; 1313 dump_tree(iscan, path, 1314 remain + flen, 1315 depth + 1, 1316 path_depth, 1317 isafile); 1318 1319 /* 1320 * Clear loop check 1321 */ 1322 iscan->loopcheck = 0; 1323 } 1324 iscan = iscan->next; 1325 } 1326 free(path); 1327 } 1328 break; 1329 case HAMMER2_BREF_TYPE_INDIRECT: 1330 if (psize == 0) { 1331 res = 0; 1332 break; 1333 } 1334 1335 /* 1336 * Due to COW operations, even if the inode is 1337 * replicated, some of the indirect brefs might 1338 * still be shared and allow us to reject duplicate 1339 * scans. 1340 */ 1341 topo = enter_topology(dest); 1342 if (topology_check_duplicate_indirect(topo, bref)) 1343 break; 1344 1345 if (pread(vol->fd, &data, psize, poff) != 1346 (ssize_t)psize) 1347 { 1348 res = 0; 1349 break; 1350 } 1351 1352 if (validate_crc(bref, &data, psize) == 0) { 1353 res = 0; 1354 break; 1355 } 1356 1357 rtmp = dump_dir_data(dest, remain, 1358 &data.npdata[0], 1359 psize / sizeof(hammer2_blockref_t), 1360 depth, path_depth, isafile); 1361 if (res) 1362 res = rtmp; 1363 break; 1364 } 1365 } 1366 return res; 1367 } 1368 1369 /* 1370 * Dumps the data records for an inode to the target file (wfd), returns 1371 * TRUE on success, FALSE if corruption was detected. 1372 */ 1373 static int 1374 dump_file_data(int wfd, hammer2_off_t fsize, 1375 hammer2_blockref_t *base, int count) 1376 { 1377 hammer2_media_data_t data; 1378 hammer2_blockref_t *bref; 1379 hammer2_volume_t *vol; 1380 hammer2_off_t poff; 1381 hammer2_off_t psize; 1382 hammer2_off_t nsize; 1383 int res = 1; 1384 int rtmp; 1385 int n; 1386 1387 for (n = 0; n < count; ++n) { 1388 char *dptr; 1389 int res2; 1390 1391 bref = &base[n]; 1392 1393 if (bref->type == HAMMER2_BREF_TYPE_EMPTY || 1394 bref->data_off == 0) 1395 { 1396 continue; 1397 } 1398 vol = hammer2_get_volume(bref->data_off); 1399 if (vol == NULL) 1400 continue; 1401 if (bref->type == HAMMER2_BREF_TYPE_EMPTY || 1402 bref->data_off == 0) 1403 { 1404 continue; 1405 } 1406 1407 poff = (bref->data_off - vol->offset) & ~0x1FL; 1408 psize = 1 << (bref->data_off & 0x1F); 1409 if (psize > sizeof(data)) { 1410 res = 0; 1411 continue; 1412 } 1413 1414 if (pread(vol->fd, &data, psize, poff) != (ssize_t)psize) { 1415 res = 0; 1416 continue; 1417 } 1418 1419 if (validate_crc(bref, &data, psize) == 0) { 1420 res = 0; 1421 continue; 1422 } 1423 1424 switch(bref->type) { 1425 case HAMMER2_BREF_TYPE_DATA: 1426 dptr = (void *)&data; 1427 1428 nsize = 1L << bref->keybits; 1429 if (nsize > sizeof(data)) { 1430 res = 0; 1431 break; 1432 } 1433 1434 switch (HAMMER2_DEC_COMP(bref->methods)) { 1435 case HAMMER2_COMP_LZ4: 1436 dptr = hammer2_decompress_LZ4(dptr, psize, 1437 nsize, &res2); 1438 if (res) 1439 res = res2; 1440 psize = nsize; 1441 break; 1442 case HAMMER2_COMP_ZLIB: 1443 dptr = hammer2_decompress_ZLIB(dptr, psize, 1444 nsize, &res2); 1445 if (res) 1446 res = res2; 1447 psize = nsize; 1448 break; 1449 case HAMMER2_COMP_NONE: 1450 default: 1451 /* leave in current form */ 1452 break; 1453 } 1454 1455 if (bref->key + psize > fsize) 1456 pwrite(wfd, dptr, fsize - bref->key, 1457 bref->key); 1458 else 1459 pwrite(wfd, dptr, psize, bref->key); 1460 break; 1461 case HAMMER2_BREF_TYPE_INDIRECT: 1462 rtmp = dump_file_data(wfd, fsize, 1463 &data.npdata[0], 1464 psize / sizeof(hammer2_blockref_t)); 1465 if (res) 1466 res = rtmp; 1467 break; 1468 } 1469 } 1470 return res; 1471 } 1472 1473 /* 1474 * Validate the bref data target. The recovery scan will often attempt to 1475 * validate invalid elements, so don't spew errors to stderr on failure. 1476 */ 1477 static int 1478 validate_crc(hammer2_blockref_t *bref, void *data, size_t bytes) 1479 { 1480 uint32_t cv; 1481 uint64_t cv64; 1482 SHA256_CTX hash_ctx; 1483 int success = 0; 1484 union { 1485 uint8_t digest[SHA256_DIGEST_LENGTH]; 1486 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 1487 } u; 1488 1489 switch(HAMMER2_DEC_CHECK(bref->methods)) { 1490 case HAMMER2_CHECK_NONE: 1491 if (StrictMode == 0) 1492 success = 1; 1493 break; 1494 case HAMMER2_CHECK_DISABLED: 1495 if (StrictMode == 0) 1496 success = 1; 1497 break; 1498 case HAMMER2_CHECK_ISCSI32: 1499 cv = hammer2_icrc32(data, bytes); 1500 if (bref->check.iscsi32.value == cv) { 1501 success = 1; 1502 } 1503 break; 1504 case HAMMER2_CHECK_XXHASH64: 1505 cv64 = XXH64(data, bytes, XXH_HAMMER2_SEED); 1506 if (bref->check.xxhash64.value == cv64) { 1507 success = 1; 1508 } 1509 break; 1510 case HAMMER2_CHECK_SHA192: 1511 SHA256_Init(&hash_ctx); 1512 SHA256_Update(&hash_ctx, data, bytes); 1513 SHA256_Final(u.digest, &hash_ctx); 1514 u.digest64[2] ^= u.digest64[3]; 1515 if (memcmp(u.digest, bref->check.sha192.data, 1516 sizeof(bref->check.sha192.data)) == 0) 1517 { 1518 success = 1; 1519 } 1520 break; 1521 case HAMMER2_CHECK_FREEMAP: 1522 cv = hammer2_icrc32(data, bytes); 1523 if (bref->check.freemap.icrc32 == cv) { 1524 success = 1; 1525 } 1526 break; 1527 } 1528 return success; 1529 } 1530 1531 /* 1532 * Convert a hammer2 uuid to a uid or gid. 1533 */ 1534 static uint32_t 1535 hammer2_to_unix_xid(const uuid_t *uuid) 1536 { 1537 return(*(const uint32_t *)&uuid->node[2]); 1538 } 1539 1540 /* 1541 * Read from disk image, with caching to improve performance. 1542 * 1543 * Use a very simple LRU algo with 16 entries, linearly checked. 1544 */ 1545 #define SDCCOUNT 16 1546 #define SDCMASK (SDCCOUNT - 1) 1547 1548 typedef struct sdccache { 1549 char buf[HAMMER2_PBUFSIZE]; 1550 hammer2_off_t offset; 1551 hammer2_volume_t *vol; 1552 int64_t last; 1553 } sdccache_t; 1554 1555 static sdccache_t SDCCache[SDCCOUNT]; 1556 static int64_t SDCLast; 1557 1558 static void * 1559 hammer2_cache_read(hammer2_off_t data_off, size_t *bytesp) 1560 { 1561 hammer2_off_t poff; 1562 hammer2_off_t pbase; 1563 hammer2_volume_t *vol; 1564 sdccache_t *sdc; 1565 sdccache_t *sdc_worst; 1566 int i; 1567 1568 *bytesp = 0; 1569 1570 /* 1571 * Translate logical offset to volume and physical offset. 1572 * Return NULL with *bytesp set to 0 to indicate pre-I/O 1573 * sanity check failure. 1574 */ 1575 vol = hammer2_get_volume(data_off); 1576 if (vol == NULL) 1577 return NULL; 1578 poff = (data_off - vol->offset) & ~0x1FL; 1579 *bytesp = 1 << (data_off & 0x1F); 1580 1581 pbase = poff & ~HAMMER2_PBUFMASK64; 1582 1583 /* 1584 * Must not straddle two full-sized hammer2 blocks 1585 */ 1586 if ((poff ^ (poff + *bytesp - 1)) & ~HAMMER2_PBUFMASK64) { 1587 *bytesp = 0; 1588 return NULL; 1589 } 1590 1591 /* 1592 * I/Os are powers of 2 in size and must be aligned to at least 1593 * the I/O size. 1594 */ 1595 if (poff & ~0x1FL & (*bytesp - 1)) { 1596 *bytesp = 0; 1597 return NULL; 1598 } 1599 1600 /* 1601 * LRU match lookup 1602 */ 1603 sdc_worst = NULL; 1604 for (i = 0; i < SDCCOUNT; ++i) { 1605 sdc = &SDCCache[i]; 1606 1607 if (sdc->vol == vol && sdc->offset == pbase) { 1608 sdc->last = ++SDCLast; 1609 return (&sdc->buf[poff - pbase]); 1610 } 1611 if (sdc_worst == NULL || sdc_worst->last > sdc->last) 1612 sdc_worst = sdc; 1613 } 1614 1615 /* 1616 * Fallback to I/O if not found, using oldest entry 1617 * 1618 * On failure we leave (*bytesp) intact to indicate that an I/O 1619 * was attempted. 1620 */ 1621 sdc = sdc_worst; 1622 sdc->vol = vol; 1623 sdc->offset = pbase; 1624 sdc->last = ++SDCLast; 1625 1626 if (pread(vol->fd, sdc->buf, HAMMER2_PBUFSIZE, pbase) != 1627 HAMMER2_PBUFSIZE) 1628 { 1629 sdc->offset = (hammer2_off_t)-1; 1630 sdc->last = 0; 1631 return NULL; 1632 } 1633 return (&sdc->buf[poff - pbase]); 1634 } 1635