1 /* 2 * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer.h" 36 37 struct recover_dict { 38 struct recover_dict *next; 39 struct recover_dict *parent; 40 int64_t obj_id; 41 uint8_t obj_type; 42 uint8_t flags; 43 uint16_t pfs_id; 44 int64_t size; 45 char *name; 46 }; 47 48 #define DICTF_MADEDIR 0x01 49 #define DICTF_MADEFILE 0x02 50 #define DICTF_PARENT 0x04 /* parent attached for real */ 51 #define DICTF_TRAVERSED 0x80 52 53 static void recover_top(char *ptr, hammer_off_t offset); 54 static void recover_elm(hammer_btree_leaf_elm_t leaf); 55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id); 56 static char *recover_path(struct recover_dict *dict); 57 static void sanitize_string(char *str); 58 59 static const char *TargetDir; 60 static int CachedFd = -1; 61 static char *CachedPath; 62 63 void 64 hammer_cmd_recover(const char *target_dir) 65 { 66 struct buffer_info *data_buffer; 67 struct volume_info *volume; 68 hammer_off_t off; 69 hammer_off_t off_end; 70 char *ptr; 71 72 AssertOnFailure = 0; 73 TargetDir = target_dir; 74 75 if (mkdir(TargetDir, 0777) == -1) { 76 if (errno != EEXIST) { 77 perror("mkdir"); 78 exit(1); 79 } 80 } 81 82 printf("Running raw scan of HAMMER image, recovering to %s\n", 83 TargetDir); 84 85 data_buffer = NULL; 86 TAILQ_FOREACH(volume, &VolList, entry) { 87 check_volume(volume); 88 printf("Scanning volume %d size %s\n", 89 volume->vol_no, sizetostr(volume->size)); 90 off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 91 off_end = off + HAMMER_VOL_BUF_SIZE(volume->ondisk); 92 while (off < off_end) { 93 ptr = get_buffer_data(off, &data_buffer, 0); 94 if (ptr) 95 recover_top(ptr, off); 96 off += HAMMER_BUFSIZE; 97 } 98 } 99 rel_buffer(data_buffer); 100 101 if (CachedPath) { 102 free(CachedPath); 103 close(CachedFd); 104 CachedPath = NULL; 105 CachedFd = -1; 106 } 107 108 AssertOnFailure = 1; 109 } 110 111 /* 112 * Top level recovery processor. Assume the data is a B-Tree node. 113 * If the CRC is good we attempt to process the node, building the 114 * object space and creating the dictionary as we go. 115 */ 116 static void 117 recover_top(char *ptr, hammer_off_t offset) 118 { 119 struct hammer_node_ondisk *node; 120 hammer_btree_elm_t elm; 121 int maxcount; 122 int i; 123 int isnode; 124 char buf[HAMMER_BTREE_LEAF_ELMS + 1]; 125 126 for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) { 127 isnode = (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) == node->crc); 128 maxcount = hammer_node_max_elements(node->type); 129 130 if (DebugOpt) { 131 for (i = 0; i < node->count && i < maxcount; ++i) 132 buf[i] = hammer_elm_btype(&node->elms[i]); 133 buf[i] = '\0'; 134 if (!isnode && DebugOpt > 1) 135 printf("%016jx -\n", offset); 136 if (isnode) 137 printf("%016jx %c %d %s\n", 138 offset, node->type, node->count, buf); 139 } 140 offset += sizeof(*node); 141 142 if (isnode && node->type == HAMMER_BTREE_TYPE_LEAF) { 143 for (i = 0; i < node->count && i < maxcount; ++i) { 144 elm = &node->elms[i]; 145 if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD) 146 continue; 147 recover_elm(&elm->leaf); 148 } 149 } 150 } 151 } 152 153 static void 154 recover_elm(hammer_btree_leaf_elm_t leaf) 155 { 156 struct buffer_info *data_buffer = NULL; 157 struct recover_dict *dict; 158 struct recover_dict *dict2; 159 hammer_data_ondisk_t ondisk; 160 hammer_off_t data_offset; 161 struct stat st; 162 int chunk; 163 int len; 164 int zfill; 165 int64_t file_offset; 166 uint16_t pfs_id; 167 size_t nlen; 168 int fd; 169 char *name; 170 char *path1; 171 char *path2; 172 173 /* 174 * Ignore deleted records 175 */ 176 if (leaf->delete_ts) 177 return; 178 if ((data_offset = leaf->data_offset) != 0) 179 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 180 else 181 ondisk = NULL; 182 if (ondisk == NULL) 183 goto done; 184 185 len = leaf->data_len; 186 chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK); 187 if (chunk > len) 188 chunk = len; 189 190 if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk) 191 goto done; 192 193 pfs_id = lo_to_pfs(leaf->base.localization); 194 195 dict = get_dict(leaf->base.obj_id, pfs_id); 196 197 switch(leaf->base.rec_type) { 198 case HAMMER_RECTYPE_INODE: 199 /* 200 * We found an inode which also tells us where the file 201 * or directory is in the directory hierarchy. 202 */ 203 if (VerboseOpt) { 204 printf("file %016jx:%05d inode found\n", 205 (uintmax_t)leaf->base.obj_id, pfs_id); 206 } 207 path1 = recover_path(dict); 208 209 /* 210 * Attach the inode to its parent. This isn't strictly 211 * necessary because the information is also in the 212 * directory entries, but if we do not find the directory 213 * entry this ensures that the files will still be 214 * reasonably well organized in their proper directories. 215 */ 216 if ((dict->flags & DICTF_PARENT) == 0 && 217 dict->obj_id != HAMMER_OBJID_ROOT && 218 ondisk->inode.parent_obj_id != 0) { 219 dict->flags |= DICTF_PARENT; 220 dict->parent = get_dict(ondisk->inode.parent_obj_id, 221 pfs_id); 222 if (dict->parent && 223 (dict->parent->flags & DICTF_MADEDIR) == 0) { 224 dict->parent->flags |= DICTF_MADEDIR; 225 path2 = recover_path(dict->parent); 226 printf("mkdir %s\n", path2); 227 mkdir(path2, 0777); 228 free(path2); 229 path2 = NULL; 230 } 231 } 232 if (dict->obj_type == 0) 233 dict->obj_type = ondisk->inode.obj_type; 234 dict->size = ondisk->inode.size; 235 path2 = recover_path(dict); 236 237 if (lstat(path1, &st) == 0) { 238 if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 239 truncate(path1, dict->size); 240 /* chmod(path1, 0666); */ 241 } 242 if (strcmp(path1, path2)) { 243 printf("Rename %s -> %s\n", path1, path2); 244 rename(path1, path2); 245 } 246 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 247 printf("mkinode (file) %s\n", path2); 248 fd = open(path2, O_RDWR|O_CREAT, 0666); 249 if (fd > 0) 250 close(fd); 251 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) { 252 printf("mkinode (dir) %s\n", path2); 253 mkdir(path2, 0777); 254 dict->flags |= DICTF_MADEDIR; 255 } 256 free(path1); 257 free(path2); 258 break; 259 case HAMMER_RECTYPE_DATA: 260 /* 261 * File record data 262 */ 263 if (leaf->base.obj_id == 0) 264 break; 265 if (VerboseOpt) { 266 printf("file %016jx:%05d data %016jx,%d\n", 267 (uintmax_t)leaf->base.obj_id, 268 pfs_id, 269 (uintmax_t)leaf->base.key - len, 270 len); 271 } 272 273 /* 274 * Update the dictionary entry 275 */ 276 if (dict->obj_type == 0) 277 dict->obj_type = HAMMER_OBJTYPE_REGFILE; 278 279 /* 280 * If the parent directory has not been created we 281 * have to create it (typically a PFS%05d) 282 */ 283 if (dict->parent && 284 (dict->parent->flags & DICTF_MADEDIR) == 0) { 285 dict->parent->flags |= DICTF_MADEDIR; 286 path2 = recover_path(dict->parent); 287 printf("mkdir %s\n", path2); 288 mkdir(path2, 0777); 289 free(path2); 290 path2 = NULL; 291 } 292 293 /* 294 * Create the file if necessary, report file creations 295 */ 296 path1 = recover_path(dict); 297 if (CachedPath && strcmp(CachedPath, path1) == 0) { 298 fd = CachedFd; 299 } else { 300 fd = open(path1, O_CREAT|O_RDWR, 0666); 301 } 302 if (fd < 0) { 303 printf("Unable to create %s: %s\n", 304 path1, strerror(errno)); 305 free(path1); 306 break; 307 } 308 if ((dict->flags & DICTF_MADEFILE) == 0) { 309 dict->flags |= DICTF_MADEFILE; 310 printf("mkfile %s\n", path1); 311 } 312 313 /* 314 * And write the record. A HAMMER data block is aligned 315 * and may contain trailing zeros after the file EOF. The 316 * inode record is required to get the actual file size. 317 * 318 * However, when the inode record is not available 319 * we can do a sparse write and that will get it right 320 * most of the time even if the inode record is never 321 * found. 322 */ 323 file_offset = (int64_t)leaf->base.key - len; 324 lseek(fd, (off_t)file_offset, SEEK_SET); 325 while (len) { 326 if (dict->size == -1) { 327 for (zfill = chunk - 1; zfill >= 0; --zfill) { 328 if (((char *)ondisk)[zfill]) 329 break; 330 } 331 ++zfill; 332 } else { 333 zfill = chunk; 334 } 335 336 if (zfill) 337 write(fd, ondisk, zfill); 338 if (zfill < chunk) 339 lseek(fd, chunk - zfill, SEEK_CUR); 340 341 len -= chunk; 342 data_offset += chunk; 343 file_offset += chunk; 344 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 345 if (ondisk == NULL) 346 break; 347 chunk = HAMMER_BUFSIZE - 348 ((int)data_offset & HAMMER_BUFMASK); 349 if (chunk > len) 350 chunk = len; 351 } 352 if (dict->size >= 0 && file_offset > dict->size) { 353 ftruncate(fd, dict->size); 354 /* fchmod(fd, 0666); */ 355 } 356 357 if (fd == CachedFd) { 358 free(path1); 359 } else if (CachedPath) { 360 free(CachedPath); 361 close(CachedFd); 362 CachedPath = path1; 363 CachedFd = fd; 364 } else { 365 CachedPath = path1; 366 CachedFd = fd; 367 } 368 break; 369 case HAMMER_RECTYPE_DIRENTRY: 370 nlen = len - offsetof(struct hammer_direntry_data, name[0]); 371 if ((int)nlen < 0) /* illegal length */ 372 break; 373 if (ondisk->entry.obj_id == 0 || 374 ondisk->entry.obj_id == HAMMER_OBJID_ROOT) 375 break; 376 name = malloc(nlen + 1); 377 bcopy(ondisk->entry.name, name, nlen); 378 name[nlen] = 0; 379 sanitize_string(name); 380 381 /* 382 * We can't deal with hardlinks so if the object already 383 * has a name assigned to it we just keep using that name. 384 */ 385 dict2 = get_dict(ondisk->entry.obj_id, pfs_id); 386 path1 = recover_path(dict2); 387 388 if (dict2->name == NULL) 389 dict2->name = name; 390 else 391 free(name); 392 393 /* 394 * Attach dict2 to its directory (dict), create the 395 * directory (dict) if necessary. We must ensure 396 * that the directory entry exists in order to be 397 * able to properly rename() the file without creating 398 * a namespace conflict. 399 */ 400 if ((dict2->flags & DICTF_PARENT) == 0) { 401 dict2->flags |= DICTF_PARENT; 402 dict2->parent = dict; 403 if ((dict->flags & DICTF_MADEDIR) == 0) { 404 dict->flags |= DICTF_MADEDIR; 405 path2 = recover_path(dict); 406 printf("mkdir %s\n", path2); 407 mkdir(path2, 0777); 408 free(path2); 409 path2 = NULL; 410 } 411 } 412 path2 = recover_path(dict2); 413 if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) { 414 printf("Rename %s -> %s\n", path1, path2); 415 rename(path1, path2); 416 } 417 free(path1); 418 free(path2); 419 420 printf("dir %016jx:%05d entry %016jx \"%s\"\n", 421 (uintmax_t)leaf->base.obj_id, 422 pfs_id, 423 (uintmax_t)ondisk->entry.obj_id, 424 name); 425 break; 426 default: 427 /* 428 * Ignore any other record types 429 */ 430 break; 431 } 432 done: 433 rel_buffer(data_buffer); 434 } 435 436 #define RD_HSIZE 32768 437 #define RD_HMASK (RD_HSIZE - 1) 438 439 struct recover_dict *RDHash[RD_HSIZE]; 440 441 static 442 struct recover_dict * 443 get_dict(int64_t obj_id, uint16_t pfs_id) 444 { 445 struct recover_dict *dict; 446 int i; 447 448 if (obj_id == 0) 449 return(NULL); 450 451 i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK; 452 for (dict = RDHash[i]; dict; dict = dict->next) { 453 if (dict->obj_id == obj_id && 454 dict->pfs_id == pfs_id) { 455 break; 456 } 457 } 458 if (dict == NULL) { 459 dict = malloc(sizeof(*dict)); 460 bzero(dict, sizeof(*dict)); 461 dict->obj_id = obj_id; 462 dict->pfs_id = pfs_id; 463 dict->next = RDHash[i]; 464 dict->size = -1; 465 RDHash[i] = dict; 466 467 /* 468 * Always connect dangling dictionary entries to object 1 469 * (the root of the PFS). 470 * 471 * DICTF_PARENT will not be set until we know what the 472 * real parent directory object is. 473 */ 474 if (dict->obj_id != HAMMER_OBJID_ROOT) 475 dict->parent = get_dict(1, pfs_id); 476 } 477 return(dict); 478 } 479 480 struct path_info { 481 enum { PI_FIGURE, PI_LOAD } state; 482 uint16_t pfs_id; 483 char *base; 484 char *next; 485 int len; 486 }; 487 488 static void recover_path_helper(struct recover_dict *, struct path_info *); 489 490 static 491 char * 492 recover_path(struct recover_dict *dict) 493 { 494 struct path_info info; 495 496 bzero(&info, sizeof(info)); 497 info.pfs_id = dict->pfs_id; 498 info.state = PI_FIGURE; 499 recover_path_helper(dict, &info); 500 info.base = malloc(info.len); 501 info.next = info.base; 502 info.state = PI_LOAD; 503 recover_path_helper(dict, &info); 504 505 return(info.base); 506 } 507 508 static 509 void 510 recover_path_helper(struct recover_dict *dict, struct path_info *info) 511 { 512 /* 513 * Calculate path element length 514 */ 515 dict->flags |= DICTF_TRAVERSED; 516 517 switch(info->state) { 518 case PI_FIGURE: 519 if (dict->obj_id == HAMMER_OBJID_ROOT) 520 info->len += 8; 521 else if (dict->name) 522 info->len += strlen(dict->name); 523 else 524 info->len += 6 + 16; 525 ++info->len; 526 527 if (dict->parent && 528 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 529 recover_path_helper(dict->parent, info); 530 } else { 531 info->len += strlen(TargetDir) + 1; 532 } 533 break; 534 case PI_LOAD: 535 if (dict->parent && 536 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 537 recover_path_helper(dict->parent, info); 538 } else { 539 strcpy(info->next, TargetDir); 540 info->next += strlen(info->next); 541 } 542 543 *info->next++ = '/'; 544 if (dict->obj_id == HAMMER_OBJID_ROOT) { 545 snprintf(info->next, 8+1, "PFS%05d", info->pfs_id); 546 } else if (dict->name) { 547 strcpy(info->next, dict->name); 548 } else { 549 snprintf(info->next, 6+16+1, "obj_0x%016jx", 550 (uintmax_t)dict->obj_id); 551 } 552 info->next += strlen(info->next); 553 break; 554 } 555 dict->flags &= ~DICTF_TRAVERSED; 556 } 557 558 static 559 void 560 sanitize_string(char *str) 561 { 562 while (*str) { 563 if (!isprint(*str)) 564 *str = 'x'; 565 ++str; 566 } 567 } 568