1 /* 2 * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer.h" 36 37 struct recover_dict { 38 struct recover_dict *next; 39 struct recover_dict *parent; 40 int64_t obj_id; 41 uint8_t obj_type; 42 uint8_t flags; 43 uint16_t llid; 44 int64_t size; 45 char *name; 46 }; 47 48 #define DICTF_MADEDIR 0x01 49 #define DICTF_MADEFILE 0x02 50 #define DICTF_PARENT 0x04 /* parent attached for real */ 51 #define DICTF_TRAVERSED 0x80 52 53 static void recover_top(char *ptr); 54 static void recover_elm(hammer_btree_leaf_elm_t leaf); 55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t llid); 56 static char *recover_path(struct recover_dict *dict); 57 static void sanitize_string(char *str); 58 59 static const char *TargetDir; 60 static int CachedFd = -1; 61 static char *CachedPath; 62 63 void 64 hammer_cmd_recover(const char *target_dir) 65 { 66 struct buffer_info *data_buffer; 67 struct volume_info *scan; 68 struct volume_info *volume; 69 hammer_off_t off; 70 hammer_off_t off_end; 71 char *ptr; 72 73 AssertOnFailure = 0; 74 TargetDir = target_dir; 75 76 printf("Running raw scan of HAMMER image, recovering to %s\n", 77 TargetDir); 78 mkdir(TargetDir, 0777); 79 80 data_buffer = NULL; 81 TAILQ_FOREACH(scan, &VolList, entry) { 82 volume = get_volume(scan->vol_no); 83 84 off = HAMMER_ZONE_RAW_BUFFER + 0; 85 off |= HAMMER_VOL_ENCODE(volume->vol_no); 86 off_end = off + (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg); 87 while (off < off_end) { 88 ptr = get_buffer_data(off, &data_buffer, 0); 89 if (ptr) { 90 recover_top(ptr); 91 off += HAMMER_BUFSIZE; 92 } 93 } 94 } 95 if (data_buffer) 96 rel_buffer(data_buffer); 97 98 if (CachedPath) { 99 free(CachedPath); 100 close(CachedFd); 101 CachedPath = NULL; 102 CachedFd = -1; 103 } 104 105 AssertOnFailure = 1; 106 } 107 108 /* 109 * Top level recovery processor. Assume the data is a B-Tree node. 110 * If the CRC is good we attempt to process the node, building the 111 * object space and creating the dictionary as we go. 112 */ 113 static void 114 recover_top(char *ptr) 115 { 116 struct hammer_node_ondisk *node; 117 hammer_btree_elm_t elm; 118 int maxcount; 119 int i; 120 121 for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) { 122 if (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) == 123 node->crc && 124 node->type == HAMMER_BTREE_TYPE_LEAF) { 125 /* 126 * Scan elements 127 */ 128 maxcount = HAMMER_BTREE_LEAF_ELMS; 129 for (i = 0; i < node->count && i < maxcount; ++i) { 130 elm = &node->elms[i]; 131 if (elm->base.btype != 'R') 132 continue; 133 recover_elm(&elm->leaf); 134 } 135 } 136 } 137 } 138 139 static void 140 recover_elm(hammer_btree_leaf_elm_t leaf) 141 { 142 struct buffer_info *data_buffer = NULL; 143 struct recover_dict *dict; 144 struct recover_dict *dict2; 145 hammer_data_ondisk_t ondisk; 146 hammer_off_t data_offset; 147 struct stat st; 148 int chunk; 149 int len; 150 int zfill; 151 int64_t file_offset; 152 uint16_t llid; 153 size_t nlen; 154 int fd; 155 char *name; 156 char *path1; 157 char *path2; 158 159 /* 160 * Ignore deleted records 161 */ 162 if (leaf->delete_ts) 163 return; 164 if ((data_offset = leaf->data_offset) != 0) 165 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 166 else 167 ondisk = NULL; 168 if (ondisk == NULL) 169 goto done; 170 171 len = leaf->data_len; 172 chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK); 173 if (chunk > len) 174 chunk = len; 175 176 if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk) 177 goto done; 178 179 llid = leaf->base.localization >> 16; 180 181 dict = get_dict(leaf->base.obj_id, llid); 182 183 switch(leaf->base.rec_type) { 184 case HAMMER_RECTYPE_INODE: 185 /* 186 * We found an inode which also tells us where the file 187 * or directory is in the directory hierarchy. 188 */ 189 if (VerboseOpt) { 190 printf("file %016jx:%05d inode found\n", 191 (uintmax_t)leaf->base.obj_id, llid); 192 } 193 path1 = recover_path(dict); 194 195 /* 196 * Attach the inode to its parent. This isn't strictly 197 * necessary because the information is also in the 198 * directory entries, but if we do not find the directory 199 * entry this ensures that the files will still be 200 * reasonably well organized in their proper directories. 201 */ 202 if ((dict->flags & DICTF_PARENT) == 0 && 203 dict->obj_id != 1 && ondisk->inode.parent_obj_id != 0) { 204 dict->flags |= DICTF_PARENT; 205 dict->parent = get_dict(ondisk->inode.parent_obj_id, 206 llid); 207 if (dict->parent && 208 (dict->parent->flags & DICTF_MADEDIR) == 0) { 209 dict->parent->flags |= DICTF_MADEDIR; 210 path2 = recover_path(dict->parent); 211 printf("mkdir %s\n", path2); 212 mkdir(path2, 0777); 213 free(path2); 214 path2 = NULL; 215 } 216 } 217 if (dict->obj_type == 0) 218 dict->obj_type = ondisk->inode.obj_type; 219 dict->size = ondisk->inode.size; 220 path2 = recover_path(dict); 221 222 if (lstat(path1, &st) == 0) { 223 if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 224 truncate(path1, dict->size); 225 /* chmod(path1, 0666); */ 226 } 227 if (strcmp(path1, path2)) { 228 printf("Rename %s -> %s\n", path1, path2); 229 rename(path1, path2); 230 } 231 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 232 printf("mkinode (file) %s\n", path2); 233 fd = open(path2, O_RDWR|O_CREAT, 0666); 234 if (fd > 0) 235 close(fd); 236 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) { 237 printf("mkinode (dir) %s\n", path2); 238 mkdir(path2, 0777); 239 dict->flags |= DICTF_MADEDIR; 240 } 241 free(path1); 242 free(path2); 243 break; 244 case HAMMER_RECTYPE_DATA: 245 /* 246 * File record data 247 */ 248 if (leaf->base.obj_id == 0) 249 break; 250 if (VerboseOpt) { 251 printf("file %016jx:%05d data %016jx,%d\n", 252 (uintmax_t)leaf->base.obj_id, 253 llid, 254 (uintmax_t)leaf->base.key - len, 255 len); 256 } 257 258 /* 259 * Update the dictionary entry 260 */ 261 if (dict->obj_type == 0) 262 dict->obj_type = HAMMER_OBJTYPE_REGFILE; 263 264 /* 265 * If the parent directory has not been created we 266 * have to create it (typically a PFS%05d) 267 */ 268 if (dict->parent && 269 (dict->parent->flags & DICTF_MADEDIR) == 0) { 270 dict->parent->flags |= DICTF_MADEDIR; 271 path2 = recover_path(dict->parent); 272 printf("mkdir %s\n", path2); 273 mkdir(path2, 0777); 274 free(path2); 275 path2 = NULL; 276 } 277 278 /* 279 * Create the file if necessary, report file creations 280 */ 281 path1 = recover_path(dict); 282 if (CachedPath && strcmp(CachedPath, path1) == 0) { 283 fd = CachedFd; 284 } else { 285 fd = open(path1, O_CREAT|O_RDWR, 0666); 286 } 287 if (fd < 0) { 288 printf("Unable to create %s: %s\n", 289 path1, strerror(errno)); 290 free(path1); 291 break; 292 } 293 if ((dict->flags & DICTF_MADEFILE) == 0) { 294 dict->flags |= DICTF_MADEFILE; 295 printf("mkfile %s\n", path1); 296 } 297 298 /* 299 * And write the record. A HAMMER data block is aligned 300 * and may contain trailing zeros after the file EOF. The 301 * inode record is required to get the actual file size. 302 * 303 * However, when the inode record is not available 304 * we can do a sparse write and that will get it right 305 * most of the time even if the inode record is never 306 * found. 307 */ 308 file_offset = (int64_t)leaf->base.key - len; 309 lseek(fd, (off_t)file_offset, SEEK_SET); 310 while (len) { 311 if (dict->size == -1) { 312 for (zfill = chunk - 1; zfill >= 0; --zfill) { 313 if (((char *)ondisk)[zfill]) 314 break; 315 } 316 ++zfill; 317 } else { 318 zfill = chunk; 319 } 320 321 if (zfill) 322 write(fd, ondisk, zfill); 323 if (zfill < chunk) 324 lseek(fd, chunk - zfill, SEEK_CUR); 325 326 len -= chunk; 327 data_offset += chunk; 328 file_offset += chunk; 329 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 330 if (ondisk == NULL) 331 break; 332 chunk = HAMMER_BUFSIZE - 333 ((int)data_offset & HAMMER_BUFMASK); 334 if (chunk > len) 335 chunk = len; 336 } 337 if (dict->size >= 0 && file_offset > dict->size) { 338 ftruncate(fd, dict->size); 339 /* fchmod(fd, 0666); */ 340 } 341 342 if (fd == CachedFd) { 343 free(path1); 344 } else if (CachedPath) { 345 free(CachedPath); 346 close(CachedFd); 347 CachedPath = path1; 348 CachedFd = fd; 349 } else { 350 CachedPath = path1; 351 CachedFd = fd; 352 } 353 break; 354 case HAMMER_RECTYPE_DIRENTRY: 355 nlen = len - offsetof(struct hammer_entry_data, name[0]); 356 if ((int)nlen < 0) /* illegal length */ 357 break; 358 if (ondisk->entry.obj_id == 0 || ondisk->entry.obj_id == 1) 359 break; 360 name = malloc(nlen + 1); 361 bcopy(ondisk->entry.name, name, nlen); 362 name[nlen] = 0; 363 sanitize_string(name); 364 365 /* 366 * We can't deal with hardlinks so if the object already 367 * has a name assigned to it we just keep using that name. 368 */ 369 dict2 = get_dict(ondisk->entry.obj_id, llid); 370 path1 = recover_path(dict2); 371 372 if (dict2->name == NULL) 373 dict2->name = name; 374 else 375 free(name); 376 377 /* 378 * Attach dict2 to its directory (dict), create the 379 * directory (dict) if necessary. We must ensure 380 * that the directory entry exists in order to be 381 * able to properly rename() the file without creating 382 * a namespace conflict. 383 */ 384 if ((dict2->flags & DICTF_PARENT) == 0) { 385 dict2->flags |= DICTF_PARENT; 386 dict2->parent = dict; 387 if ((dict->flags & DICTF_MADEDIR) == 0) { 388 dict->flags |= DICTF_MADEDIR; 389 path2 = recover_path(dict); 390 printf("mkdir %s\n", path2); 391 mkdir(path2, 0777); 392 free(path2); 393 path2 = NULL; 394 } 395 } 396 path2 = recover_path(dict2); 397 if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) { 398 printf("Rename %s -> %s\n", path1, path2); 399 rename(path1, path2); 400 } 401 free(path1); 402 free(path2); 403 404 printf("dir %016jx:%05d entry %016jx \"%s\"\n", 405 (uintmax_t)leaf->base.obj_id, 406 llid, 407 (uintmax_t)ondisk->entry.obj_id, 408 name); 409 break; 410 default: 411 /* 412 * Ignore any other record types 413 */ 414 break; 415 } 416 done: 417 if (data_buffer) 418 rel_buffer(data_buffer); 419 } 420 421 #define RD_HSIZE 32768 422 #define RD_HMASK (RD_HSIZE - 1) 423 424 struct recover_dict *RDHash[RD_HSIZE]; 425 426 static 427 struct recover_dict * 428 get_dict(int64_t obj_id, uint16_t llid) 429 { 430 struct recover_dict *dict; 431 int i; 432 433 if (obj_id == 0) 434 return(NULL); 435 436 i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK; 437 for (dict = RDHash[i]; dict; dict = dict->next) { 438 if (dict->obj_id == obj_id && 439 dict->llid == llid) { 440 break; 441 } 442 } 443 if (dict == NULL) { 444 dict = malloc(sizeof(*dict)); 445 bzero(dict, sizeof(*dict)); 446 dict->obj_id = obj_id; 447 dict->llid = llid; 448 dict->next = RDHash[i]; 449 dict->size = -1; 450 RDHash[i] = dict; 451 452 /* 453 * Always connect dangling dictionary entries to object 1 454 * (the root of the PFS). 455 * 456 * DICTF_PARENT will not be set until we know what the 457 * real parent directory object is. 458 */ 459 if (dict->obj_id != 1) 460 dict->parent = get_dict(1, llid); 461 } 462 return(dict); 463 } 464 465 struct path_info { 466 enum { PI_FIGURE, PI_LOAD } state; 467 uint16_t llid; 468 char *base; 469 char *next; 470 int len; 471 }; 472 473 static void recover_path_helper(struct recover_dict *, struct path_info *); 474 475 static 476 char * 477 recover_path(struct recover_dict *dict) 478 { 479 struct path_info info; 480 481 bzero(&info, sizeof(info)); 482 info.llid = dict->llid; 483 info.state = PI_FIGURE; 484 recover_path_helper(dict, &info); 485 info.base = malloc(info.len); 486 info.next = info.base; 487 info.state = PI_LOAD; 488 recover_path_helper(dict, &info); 489 490 return(info.base); 491 } 492 493 static 494 void 495 recover_path_helper(struct recover_dict *dict, struct path_info *info) 496 { 497 /* 498 * Calculate path element length 499 */ 500 dict->flags |= DICTF_TRAVERSED; 501 502 switch(info->state) { 503 case PI_FIGURE: 504 if (dict->obj_id == 1) 505 info->len += 8; 506 else if (dict->name) 507 info->len += strlen(dict->name); 508 else 509 info->len += 6 + 16; 510 ++info->len; 511 512 if (dict->parent && 513 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 514 recover_path_helper(dict->parent, info); 515 } else { 516 info->len += strlen(TargetDir) + 1; 517 } 518 break; 519 case PI_LOAD: 520 if (dict->parent && 521 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 522 recover_path_helper(dict->parent, info); 523 } else { 524 strcpy(info->next, TargetDir); 525 info->next += strlen(info->next); 526 } 527 528 *info->next++ = '/'; 529 if (dict->obj_id == 1) { 530 snprintf(info->next, 8+1, "PFS%05d", info->llid); 531 } else if (dict->name) { 532 strcpy(info->next, dict->name); 533 } else { 534 snprintf(info->next, 6+16+1, "obj_0x%016jx", 535 (uintmax_t)dict->obj_id); 536 } 537 info->next += strlen(info->next); 538 break; 539 } 540 dict->flags &= ~DICTF_TRAVERSED; 541 } 542 543 static 544 void 545 sanitize_string(char *str) 546 { 547 while (*str) { 548 if (!isprint(*str)) 549 *str = 'x'; 550 ++str; 551 } 552 } 553