1 /*
2 * Copyright (c) 2023 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35 /*
36 * hammer2 recover <devpath> <path> <destdir>
37 *
38 * Recover files from corrupted media, recover deleted files from good
39 * media, generally ignoring the data structure topology outside of the
40 * structures that hang off of inodes and directory entries. Files are
41 * validated during recovery and renamed to .corrupted when a version of
42 * a file cannot be completely recovered.
43 *
44 * This is a "try to get everything we can out of the filesystem"
45 * directive when you've done something terrible to the filesystem.
46 * The resulting <destdir> tree cannot directly replace the lost topology
47 * as many versions of the same file will be present, so filenames are
48 * suffixed.
49 *
50 * <path> may be a relative file, directory, directory path, or absolute
51 * (absolute is relative to the mount) file or directory path.
52 *
53 * For example, "hammer2 recover /dev/da0s1d /home/charlie /tmp/" will
54 * recover all possible versions of the /home/charlie sub-tree. If
55 * you said "home/charlie" instead, then it would recover the same but
56 * also include any sub-directory paths that match home/charlie, not
57 * just root paths. If you want a specific file, then e.g. ".cshrc"
58 * would recover every single .cshrc that can be found on the media.
59 *
60 * The command checks ALL PFSs and snapshots. Redundant files with the same
61 * path are ignored. You basically get everything that can possibly be
62 * recovered from the media.
63 */
64 #include "hammer2.h"
65
66 #define HTABLE_SIZE (4*1024*1024)
67 #define HTABLE_MASK (HTABLE_SIZE - 1)
68 #define DISPMODULO (HTABLE_SIZE / 32768)
69
70 #include <openssl/sha.h>
71
72 typedef struct dirent_entry {
73 struct dirent_entry *next;
74 char *filename;
75 size_t len;
76 hammer2_key_t inum;
77 hammer2_off_t bref_off;
78 } dirent_entry_t;
79
80 typedef struct inode_entry {
81 struct inode_entry *next;
82 struct inode_entry *next2; /* secondary (ino ^ data_off) hash */
83 hammer2_off_t data_off;
84 hammer2_key_t inum; /* from bref or inode meta */
85 //hammer2_inode_data_t inode; /* (removed, too expensive) */
86 char *link_file_path;
87 uint32_t inode_crc;
88 uint8_t type; /* from inode meta */
89 uint8_t encountered; /* copies limit w/REPINODEDEPTH */
90 uint8_t loopcheck; /* recursion loop check */
91 uint8_t unused03;
92 } inode_entry_t;
93
94 typedef struct topology_entry {
95 struct topology_entry *next;
96 char *path;
97 long iterator;
98 } topology_entry_t;
99
100 typedef struct neg_entry {
101 struct neg_entry *next;
102 hammer2_blockref_t bref;
103 } neg_entry_t;
104
105 typedef struct topo_bref_entry {
106 struct topo_bref_entry *next;
107 topology_entry_t *topo;
108 hammer2_off_t data_off;
109 } topo_bref_entry_t;
110
111 typedef struct topo_inode_entry {
112 struct topo_inode_entry *next;
113 topology_entry_t *topo;
114 inode_entry_t *iscan;
115 } topo_inode_entry_t;
116
117 static dirent_entry_t **DirHash;
118 static inode_entry_t **InodeHash;
119 static inode_entry_t **InodeHash2; /* secondary multi-variable hash */
120 static topology_entry_t **TopologyHash;
121 static neg_entry_t **NegativeHash;
122 static topo_bref_entry_t **TopoBRefHash;
123 static topo_inode_entry_t **TopoInodeHash;
124
125 /*static void resolve_topology(void);*/
126 static int check_filename(hammer2_blockref_t *bref,
127 const char *filename, char *buf, size_t flen);
128 //static void enter_dirent(hammer2_blockref_t *bref, hammer2_off_t loff,
129 // const char *filename, size_t flen);
130 static topology_entry_t *enter_topology(const char *path);
131 static int topology_check_duplicate_inode(topology_entry_t *topo,
132 inode_entry_t *iscan);
133 static int topology_check_duplicate_indirect(topology_entry_t *topo,
134 hammer2_blockref_t *bref);
135 static void enter_inode(hammer2_blockref_t *bref);
136 static void enter_inode_untested(hammer2_inode_data_t *ip, hammer2_off_t loff);
137 static inode_entry_t *find_first_inode(hammer2_key_t inum);
138 /*static dirent_entry_t *find_first_dirent(hammer2_key_t inum);*/
139 static int find_neg(hammer2_blockref_t *bref);
140 static void enter_neg(hammer2_blockref_t *bref);
141 static void dump_tree(inode_entry_t *iscan, const char *dest,
142 const char *remain, int depth, int path_depth,
143 int isafile);
144 static int dump_inum_file(inode_entry_t *iscan, hammer2_inode_data_t *inode,
145 const char *path);
146 static int dump_inum_softlink(hammer2_inode_data_t *inode, const char *path);
147 static int dump_dir_data(const char *dest, const char *remain,
148 hammer2_blockref_t *base, int count,
149 int depth, int path_depth, int isafile);
150 static int dump_file_data(int wfd, hammer2_off_t fsize,
151 hammer2_blockref_t *bref, int count);
152 static int validate_crc(hammer2_blockref_t *bref, void *data, size_t bytes);
153 static uint32_t hammer2_to_unix_xid(const uuid_t *uuid);
154 static void *hammer2_cache_read(hammer2_off_t data_off, size_t *bytesp);
155
156 static long InodeCount;
157 static long TopoBRefCount;
158 static long TopoBRefDupCount;
159 static long TopoInodeCount;
160 static long TopoInodeDupCount;
161 static long NegativeCount;
162 static long NegativeHits;
163 static long MediaBytes;
164 static int StrictMode = 1;
165
166 #define INODES_PER_BLOCK \
167 (sizeof(union hammer2_media_data) / sizeof(hammer2_inode_data_t))
168 #define REPINODEDEPTH 256
169
170 /*
171 * Recover the specified file.
172 *
173 * Basically do a raw scan of the drive image looking for directory entries
174 * and inodes. Index all inodes found, including copies, and filter
175 * directory entries for the requested filename to locate inode numbers.
176 *
177 * All copies that are located are written to destdir with a suffix .00001,
178 * .00002, etc.
179 */
180 int
cmd_recover(const char * devpath,const char * pathname,const char * destdir,int strict,int isafile)181 cmd_recover(const char *devpath, const char *pathname,
182 const char *destdir, int strict, int isafile)
183 {
184 hammer2_media_data_t data;
185 hammer2_volume_t *vol;
186 hammer2_off_t loff;
187 hammer2_off_t poff;
188 size_t i;
189
190 StrictMode = strict;
191 hammer2_init_volumes(devpath, 1);
192 DirHash = calloc(HTABLE_SIZE, sizeof(dirent_entry_t *));
193 InodeHash = calloc(HTABLE_SIZE, sizeof(inode_entry_t *));
194 InodeHash2 = calloc(HTABLE_SIZE, sizeof(inode_entry_t *));
195 TopologyHash = calloc(HTABLE_SIZE, sizeof(topology_entry_t *));
196 NegativeHash = calloc(HTABLE_SIZE, sizeof(neg_entry_t *));
197 TopoBRefHash = calloc(HTABLE_SIZE, sizeof(topo_bref_entry_t *));
198 TopoInodeHash = calloc(HTABLE_SIZE, sizeof(topo_inode_entry_t *));
199
200 /*
201 * Media Pass
202 *
203 * Look for blockrefs that point to inodes. The blockrefs could
204 * be bogus since we aren't validating them, but the combination
205 * of a CRC that matches the inode content is fairly robust in
206 * finding actual inodes.
207 *
208 * We also enter unvalidated inodes for inode #1 (PFS roots),
209 * because there might not be any blockrefs pointing to some of
210 * them. We need these to be able to locate directory entries
211 * under the roots.
212 *
213 * At the moment we do not try to enter unvalidated directory
214 * entries, since this will result in a massive number of false
215 * hits
216 */
217 printf("MEDIA PASS\n");
218
219 loff = 0;
220 while ((vol = hammer2_get_volume(loff)) != NULL) {
221 int fd;
222 int xdisp;
223 hammer2_off_t vol_size;
224
225 fd = vol->fd;
226 poff = loff - vol->offset;
227 vol_size = lseek(fd, 0L, SEEK_END);
228 xdisp = 0;
229
230 while (poff < vol->size) {
231 if (pread(fd, &data, sizeof(data), poff) !=
232 sizeof(data))
233 {
234 /* try to skip possible I/O error */
235 poff += sizeof(data);
236 continue;
237 }
238 for (i = 0; i < HAMMER2_IND_COUNT_MAX; ++i) {
239 hammer2_blockref_t *bref;
240 // char filename_buf[1024+1];
241
242 bref = &data.npdata[i];
243
244 /*
245 * Found a possible inode
246 */
247 switch(bref->type) {
248 case HAMMER2_BREF_TYPE_INODE:
249 /*
250 * Note: preliminary bref filter
251 * is inside enter_inode().
252 */
253 enter_inode(bref);
254 break;
255 case HAMMER2_BREF_TYPE_DIRENT:
256 /*
257 * Go overboard and try to index
258 * anything that looks like a
259 * directory entry. This might find
260 * entries whos inodes are no longer
261 * available, but will also generate
262 * a lot of false files.
263 */
264 #if 0
265 if (filename &&
266 flen != bref->embed.dirent.namlen)
267 {
268 /* single-file shortcut */
269 break;
270 }
271 if (check_filename(bref, filename,
272 filename_buf,
273 bref->embed.dirent.namlen))
274 {
275 enter_dirent(bref,
276 poff + vol->offset +
277 (i *
278 sizeof(hammer2_blockref_t)),
279 filename_buf,
280 bref->embed.dirent.namlen);
281 }
282 #endif
283 break;
284 default:
285 break;
286 }
287 }
288
289 /*
290 * Look for possible root inodes. We generally can't
291 * find these by finding BREFs pointing to them because
292 * the BREFs often hang off the volume header.
293 *
294 * These "inodes" could be seriously corrupt, but if
295 * the bref tree is intact that is what we need to
296 * get top-level directory entries.
297 */
298 for (i = 0; i < INODES_PER_BLOCK; ++i) {
299 hammer2_inode_data_t *ip;
300
301 ip = (void *)(data.buf + i * sizeof(*ip));
302 if (ip->meta.inum == 1 &&
303 ip->meta.iparent == 0 &&
304 ip->meta.type ==
305 HAMMER2_OBJTYPE_DIRECTORY &&
306 ip->meta.op_flags & HAMMER2_OPFLAG_PFSROOT)
307 {
308 enter_inode_untested(ip,
309 poff + vol->offset +
310 (i * sizeof(*ip)));
311 }
312 }
313 poff += sizeof(data);
314
315 MediaBytes += sizeof(data);
316
317 /*
318 * Update progress
319 */
320 if (QuietOpt == 0 &&
321 (++xdisp == DISPMODULO ||
322 poff == vol->size - sizeof(data)))
323 {
324 xdisp = 0;
325 printf("%ld inodes scanned, "
326 "media %6.2f/%-3.2fG\r",
327 InodeCount,
328 MediaBytes / 1e9,
329 vol_size / 1e9);
330 fflush(stdout);
331 }
332 }
333 loff = vol->offset + vol->size;
334 }
335
336 /*
337 * Restoration Pass
338 *
339 * Run through available directory inodes, which allows us to locate
340 * and validate (crc check) their directory entry blockrefs and
341 * construct absolute or relative paths through a recursion.
342 *
343 * When an absolute path is obtained the search is anchored on a
344 * root inode. When a relative path is obtained the search is
345 * unanchored and will find all matching sub-paths. For example,
346 * if you look for ".cshrc" it will find ALL .cshrc's. If you
347 * look for "fubar/.cshsrc" it will find ALL .cshrc's residing
348 * in a directory called fubar, however many there are. But if
349 * you look for "/fubar/srcs" it will only find the sub-tree
350 * "/fubar/srcs" relative to PFS roots.
351 *
352 * We may not have indexed the PFS roots themselves, because they
353 * often hang off of the volume header and might not have COW'd
354 * references to them, so we use the "iparent" field in the inode
355 * to detect top-level directories under those roots.
356 */
357 printf("\nInodes=%ld, Invalid_brefs=%ld, Invalid_hits=%ld\n",
358 InodeCount, NegativeCount, NegativeHits);
359 printf("RESTORATION PASS\n");
360
361 {
362 int abspath = 0;
363 long root_count = 0;
364 long root_max = 0;
365
366 /*
367 * Check for absolute path, else relative
368 */
369 if (pathname[0] == '/') {
370 abspath = 1;
371 while (*pathname == '/')
372 ++pathname;
373 }
374
375 /*
376 * Count root inodes
377 */
378 {
379 inode_entry_t *iscan;
380
381 for (iscan = InodeHash[1];
382 iscan;
383 iscan = iscan->next)
384 {
385 if (iscan->inum == 1)
386 ++root_max;
387 }
388 }
389
390 /*
391 * Run through all directory inodes to locate validated
392 * directory entries. If an absolute path was specified
393 * we start at root inodes.
394 */
395 for (i = 0; i < HTABLE_SIZE; ++i) {
396 inode_entry_t *iscan;
397
398 for (iscan = InodeHash[i];
399 iscan;
400 iscan = iscan->next)
401 {
402 /*
403 * Absolute paths always start at root inodes,
404 * otherwise we can start at any directory
405 * inode.
406 */
407 if (abspath && iscan->inum != 1)
408 continue;
409 if (iscan->type != HAMMER2_OBJTYPE_DIRECTORY)
410 continue;
411
412 /*
413 * Progress down root inodes can be slow,
414 * so print progress for each root inode.
415 */
416 if (i == 1 && iscan->inum == 1 &&
417 QuietOpt == 0)
418 {
419 printf("scan roots %p 0x%016jx "
420 "(count %ld/%ld)\r",
421 iscan,
422 iscan->data_off,
423 ++root_count, root_max);
424 fflush(stdout);
425 }
426
427 /*
428 * Primary match/recover recursion
429 */
430 dump_tree(iscan, destdir, pathname,
431 1, 1, isafile);
432 }
433 if (QuietOpt == 0 &&
434 (i & (DISPMODULO - 1)) == DISPMODULO - 1)
435 {
436 if (i == DISPMODULO - 1)
437 printf("\n");
438 printf("Progress %zd/%d\r", i, HTABLE_SIZE);
439 fflush(stdout);
440 }
441 }
442 printf("\n");
443 }
444
445 printf("CLEANUP\n");
446 printf("TopoBRef stats: count=%ld dups=%ld\n",
447 TopoBRefCount, TopoBRefDupCount);
448 printf("TopoInode stats: count=%ld dups=%ld\n",
449 TopoInodeCount, TopoInodeDupCount);
450
451 /*
452 * Cleanup
453 */
454 hammer2_cleanup_volumes();
455
456 for (i = 0; i < HTABLE_SIZE; ++i) {
457 dirent_entry_t *dscan;
458 inode_entry_t *iscan;
459 topology_entry_t *top_scan;
460 neg_entry_t *negscan;
461 topo_bref_entry_t *topo_bref;
462 topo_inode_entry_t *topo_inode;
463
464 while ((dscan = DirHash[i]) != NULL) {
465 DirHash[i] = dscan->next;
466 free(dscan->filename);
467 free(dscan);
468 }
469
470 while ((iscan = InodeHash[i]) != NULL) {
471 InodeHash[i] = iscan->next;
472 free(iscan);
473 }
474 /* InodeHash2[] indexes the same structures */
475
476 while ((top_scan = TopologyHash[i]) != NULL) {
477 TopologyHash[i] = top_scan->next;
478 free(top_scan->path);
479 free(top_scan);
480 }
481
482 while ((negscan = NegativeHash[i]) != NULL) {
483 NegativeHash[i] = negscan->next;
484 free(negscan);
485 }
486
487 while ((topo_bref = TopoBRefHash[i]) != NULL) {
488 TopoBRefHash[i] = topo_bref->next;
489 free(topo_bref);
490 }
491
492 while ((topo_inode = TopoInodeHash[i]) != NULL) {
493 TopoInodeHash[i] = topo_inode->next;
494 free(topo_inode);
495 }
496 }
497 free(TopoInodeHash);
498 free(TopoBRefHash);
499 free(NegativeHash);
500 free(TopologyHash);
501 free(DirHash);
502 free(InodeHash);
503 free(InodeHash2);
504
505 return 0;
506 }
507
508 /*
509 * Check for a matching filename, Directory entries can directly-embed
510 * filenames <= 64 bytes. Otherwise the directory entry has a data
511 * reference to the location of the filename.
512 *
513 * If filename is NULL, check for a valid filename, and copy it into buf.
514 */
515 static int
check_filename(hammer2_blockref_t * bref,const char * filename,char * buf,size_t flen)516 check_filename(hammer2_blockref_t *bref, const char *filename, char *buf,
517 size_t flen)
518 {
519 /* filename too long */
520 if (flen > 1024)
521 return 0;
522
523 if (flen <= 64) {
524 /*
525 * Filename is embedded in bref
526 */
527 if (buf)
528 bcopy(bref->check.buf, buf, flen);
529 buf[flen] = 0;
530 if (filename == NULL)
531 return 1;
532 if (bcmp(filename, bref->check.buf, flen) == 0)
533 return 1;
534 } else {
535 /*
536 * Filename requires media access
537 */
538 hammer2_media_data_t data;
539 hammer2_volume_t *vol;
540 hammer2_off_t poff;
541 hammer2_off_t psize;
542 int vfd;
543
544 /*
545 * bref must represent a data reference to a 1KB block or
546 * smaller.
547 */
548 if ((bref->data_off & 0x1F) == 0 ||
549 (bref->data_off & 0x1F) > 10)
550 {
551 return 0;
552 }
553
554 /*
555 * Indirect block containing filename must be large enough
556 * to contain the filename.
557 */
558 psize = 1 << (bref->data_off & 0x1F);
559 if (flen > psize)
560 return 0;
561
562 /*
563 * In strict mode we disallow bref's set to HAMMER2_CHECK_NONE
564 * or HAMMER2_CHECK_DISABLED. Do this check before burning
565 * time on an I/O.
566 */
567 if (StrictMode) {
568 if (HAMMER2_DEC_CHECK(bref->methods) ==
569 HAMMER2_CHECK_NONE ||
570 HAMMER2_DEC_CHECK(bref->methods) ==
571 HAMMER2_CHECK_DISABLED)
572 {
573 return 0;
574 }
575 }
576
577 /*
578 * Read the data, check CRC and such
579 */
580 vol = hammer2_get_volume(bref->data_off);
581 if (vol == NULL)
582 return 0;
583
584 vfd = vol->fd;
585 poff = (bref->data_off - vol->offset) & ~0x1FL;
586 if (pread(vfd, &data, psize, poff) != (ssize_t)psize)
587 return 0;
588
589 if (validate_crc(bref, &data, psize) == 0)
590 return 0;
591
592 if (buf)
593 bcopy(data.buf, buf, flen);
594 buf[flen] = 0;
595 if (filename == NULL)
596 return 1;
597 if (bcmp(filename, data.buf, flen) == 0)
598 return 1;
599 }
600 return 0;
601 }
602
603 #if 0
604 static void
605 enter_dirent(hammer2_blockref_t *bref, hammer2_off_t loff,
606 const char *filename, size_t flen)
607 {
608 hammer2_key_t inum = bref->embed.dirent.inum;
609 dirent_entry_t *entry;
610 uint32_t hv = (inum ^ (inum >> 16)) & HTABLE_MASK;
611
612 for (entry = DirHash[hv]; entry; entry = entry->next) {
613 if (entry->inum == inum &&
614 entry->len == flen &&
615 bcmp(entry->filename, filename, flen) == 0)
616 {
617 return;
618 }
619 }
620 entry = malloc(sizeof(*entry));
621 bzero(entry, sizeof(*entry));
622 entry->inum = inum;
623 entry->next = DirHash[hv];
624 entry->filename = malloc(flen + 1);
625 entry->len = flen;
626 entry->bref_off = loff;
627 bcopy(filename, entry->filename, flen);
628 entry->filename[flen] = 0;
629 DirHash[hv] = entry;
630 }
631 #endif
632
633 /*
634 * Topology duplicate scan avoidance helpers. We associate inodes and
635 * indirect block data offsets, allowing us to avoid re-scanning any
636 * duplicates that we see. And there will be many due to how the COW
637 * process occurs.
638 *
639 * For example, when a large directory is modified the content update to
640 * the directory entries will cause the directory inode to be COWd, along
641 * with whatever is holding the bref(s) blocks that have undergone
642 * adjustment. More likely than not, there will be MANY shared indirect
643 * blocks.
644 */
645 static topology_entry_t *
enter_topology(const char * path)646 enter_topology(const char *path)
647 {
648 topology_entry_t *topo;
649 uint32_t hv = 0;
650 size_t i;
651
652 for (i = 0; path[i]; ++i)
653 hv = (hv << 5) ^ path[i] ^ (hv >> 24);
654 hv = (hv ^ (hv >> 16)) & HTABLE_MASK;
655 for (topo = TopologyHash[hv]; topo; topo = topo->next) {
656 if (strcmp(path, topo->path) == 0)
657 return topo;
658 }
659 topo = malloc(sizeof(*topo));
660 bzero(topo, sizeof(*topo));
661
662 topo->next = TopologyHash[hv];
663 TopologyHash[hv] = topo;
664 topo->path = strdup(path);
665 topo->iterator = 1;
666
667 return topo;
668 }
669
670 /*
671 * Determine if an inode at the current topology location is one that we
672 * have already dealt with.
673 */
674 static int
topology_check_duplicate_inode(topology_entry_t * topo,inode_entry_t * iscan)675 topology_check_duplicate_inode(topology_entry_t *topo, inode_entry_t *iscan)
676 {
677 int hv = (((intptr_t)topo ^ (intptr_t)iscan) >> 6) & HTABLE_MASK;
678 topo_inode_entry_t *scan;
679
680 for (scan = TopoInodeHash[hv]; scan; scan = scan->next) {
681 if (scan->topo == topo &&
682 scan->iscan == iscan)
683 {
684 ++TopoInodeDupCount;
685 return 1;
686 }
687 }
688 scan = malloc(sizeof(*scan));
689 bzero(scan, sizeof(*scan));
690 scan->iscan = iscan;
691 scan->topo = topo;
692 scan->next = TopoInodeHash[hv];
693 TopoInodeHash[hv] = scan;
694 ++TopoInodeCount;
695
696 return 0;
697 }
698
699 /*
700 * Determine if an indirect block (represented by the bref) at the current
701 * topology level is one that we have already dealt with.
702 */
703 static int
topology_check_duplicate_indirect(topology_entry_t * topo,hammer2_blockref_t * bref)704 topology_check_duplicate_indirect(topology_entry_t *topo,
705 hammer2_blockref_t *bref)
706 {
707 int hv = ((intptr_t)topo ^ (bref->data_off >> 8)) & HTABLE_MASK;
708 topo_bref_entry_t *scan;
709
710 for (scan = TopoBRefHash[hv]; scan; scan = scan->next) {
711 if (scan->topo == topo &&
712 scan->data_off == bref->data_off)
713 {
714 ++TopoBRefDupCount;
715 return 1;
716 }
717 }
718 scan = malloc(sizeof(*scan));
719 bzero(scan, sizeof(*scan));
720 scan->data_off = bref->data_off;
721 scan->topo = topo;
722 scan->next = TopoBRefHash[hv];
723 TopoBRefHash[hv] = scan;
724 ++TopoBRefCount;
725
726 return 0;
727 }
728
729 /*
730 * Valid and record an inode found on media. There can be many versions
731 * of the same inode number present on the media.
732 */
733 static void
enter_inode(hammer2_blockref_t * bref)734 enter_inode(hammer2_blockref_t *bref)
735 {
736 uint32_t hv;
737 uint32_t hv2;
738 inode_entry_t *scan;
739 hammer2_inode_data_t *inode;
740 size_t psize;
741
742 hv = (bref->key ^ (bref->key >> 16)) & HTABLE_MASK;
743 hv2 = (bref->key ^ (bref->key >> 16) ^ (bref->data_off >> 10)) &
744 HTABLE_MASK;
745
746 /*
747 * Ignore duplicate inodes, use the secondary inode hash table's
748 * better spread to reduce cpu consumption (there can be many
749 * copies of the same inode so the primary hash table can have
750 * very long chains in it).
751 */
752 for (scan = InodeHash2[hv2]; scan; scan = scan->next2) {
753 if (bref->key == scan->inum &&
754 bref->data_off == scan->data_off)
755 {
756 return;
757 }
758 }
759
760 /*
761 * Ignore brefs which we have already determined to be bad
762 */
763 if (find_neg(bref))
764 return;
765
766 /*
767 * Validate the potential blockref. Note that this might not be a
768 * real blockref. Don't trust anything, really.
769 *
770 * - Must be sized for an inode block
771 * - Must be properly aligned for an inode block
772 * - Keyspace is 1 (keybits == 0), i.e. a single inode number
773 */
774 if ((1 << (bref->data_off & 0x1F)) != sizeof(*inode))
775 return;
776 if ((bref->data_off & ~0x1FL & (sizeof(*inode) - 1)) != 0)
777 return;
778 if (bref->keybits != 0)
779 return;
780 if (bref->key == 0)
781 return;
782
783 inode = hammer2_cache_read(bref->data_off, &psize);
784
785 /*
786 * Failure prior to I/O being performed.
787 */
788 if (psize == 0)
789 return;
790
791 /*
792 * Any failures which occur after the I/O has been performed
793 * should enter the bref in the negative cache to avoid unnecessary
794 * guaranteed-to-fil reissuances of the same (bref, data_off) combo.
795 */
796 if (inode == NULL) {
797 fail:
798 enter_neg(bref);
799 return;
800 }
801
802 /*
803 * The blockref looks ok but the real test is whether the
804 * inode data it references passes the CRC check. If it
805 * does, it is highly likely that we have a valid inode.
806 */
807 if (validate_crc(bref, inode, sizeof(*inode)) == 0)
808 goto fail;
809 if (inode->meta.inum != bref->key)
810 goto fail;
811
812 /*
813 * Record the inode. For now we do not record the actual content
814 * of the inode because if there are more than few million of them
815 * the memory consumption can get into the dozens of gigabytes.
816 *
817 * Instead, the inode will be re-read from media in the recovery
818 * pass.
819 */
820 scan = malloc(sizeof(*scan));
821 bzero(scan, sizeof(*scan));
822
823 scan->inum = bref->key;
824 scan->type = inode->meta.type;
825 scan->data_off = bref->data_off;
826 scan->inode_crc = hammer2_icrc32(inode, sizeof(*inode));
827 //scan->inode = *inode; /* removed, too expensive */
828
829 scan->next = InodeHash[hv];
830 InodeHash[hv] = scan;
831 scan->next2 = InodeHash2[hv2];
832 InodeHash2[hv2] = scan;
833
834 ++InodeCount;
835 }
836
837 /*
838 * This is used to enter possible root inodes. Root inodes typically hang
839 * off the volume root and thus there might not be a bref reference to the
840 * many old copies of root inodes sitting around on the media. Without a
841 * bref we can't really validate that the content is ok. But we need
842 * these inodes as part of our path searches.
843 */
844 static void
enter_inode_untested(hammer2_inode_data_t * ip,hammer2_off_t loff)845 enter_inode_untested(hammer2_inode_data_t *ip, hammer2_off_t loff)
846 {
847 uint32_t hv;
848 uint32_t hv2;
849 inode_entry_t *scan;
850
851 hv = (ip->meta.inum ^ (ip->meta.inum >> 16)) & HTABLE_MASK;
852 hv2 = (ip->meta.inum ^ (ip->meta.inum >> 16) ^ (loff >> 10)) &
853 HTABLE_MASK;
854
855 for (scan = InodeHash2[hv2]; scan; scan = scan->next2) {
856 if (ip->meta.inum == scan->inum &&
857 loff == scan->data_off)
858 {
859 return;
860 }
861 }
862
863 /*
864 * Record the inode. For now we do not record the actual content
865 * of the inode because if there are more than few million of them
866 * the memory consumption can get into the dozens of gigabytes.
867 *
868 * Instead, the inode will be re-read from media in the recovery
869 * pass.
870 */
871 scan = malloc(sizeof(*scan));
872 bzero(scan, sizeof(*scan));
873
874 scan->inum = ip->meta.inum;
875 scan->type = ip->meta.type;
876 scan->data_off = loff;
877 scan->inode_crc = hammer2_icrc32(ip, sizeof(*ip));
878 //scan->inode = *ip; /* removed, too expensive */
879
880 scan->next = InodeHash[hv];
881 InodeHash[hv] = scan;
882 scan->next2 = InodeHash2[hv2];
883 InodeHash2[hv2] = scan;
884
885 ++InodeCount;
886 }
887
888 static inode_entry_t *
find_first_inode(hammer2_key_t inum)889 find_first_inode(hammer2_key_t inum)
890 {
891 inode_entry_t *entry;
892 uint32_t hv = (inum ^ (inum >> 16)) & HTABLE_MASK;
893
894 for (entry = InodeHash[hv]; entry; entry = entry->next) {
895 if (entry->inum == inum)
896 return entry;
897 }
898 return NULL;
899 }
900
901 /*
902 * Negative bref cache. A cache of brefs that we have determined
903 * to be invalid. Used to reduce unnecessary disk I/O.
904 *
905 * NOTE: Checks must be reasonable and at least encompass checks
906 * done in enter_inode() after it has decided to read the
907 * block at data_off.
908 *
909 * Adding a few more match fields in addition won't hurt either.
910 */
911 static int
find_neg(hammer2_blockref_t * bref)912 find_neg(hammer2_blockref_t *bref)
913 {
914 int hv = (bref->data_off >> 10) & HTABLE_MASK;
915 neg_entry_t *neg;
916
917 for (neg = NegativeHash[hv]; neg; neg = neg->next) {
918 if (bref->data_off == neg->bref.data_off &&
919 bref->type == neg->bref.type &&
920 bref->methods == neg->bref.methods &&
921 bref->key == neg->bref.key &&
922 bcmp(&bref->check, &neg->bref.check,
923 sizeof(bref->check)) == 0)
924 {
925 ++NegativeHits;
926 return 1;
927 }
928 }
929 return 0;
930 }
931
932 static void
enter_neg(hammer2_blockref_t * bref)933 enter_neg(hammer2_blockref_t *bref)
934 {
935 int hv = (bref->data_off >> 10) & HTABLE_MASK;
936 neg_entry_t *neg;
937
938 neg = malloc(sizeof(*neg));
939 bzero(neg, sizeof(*neg));
940 neg->next = NegativeHash[hv];
941 neg->bref = *bref;
942 NegativeHash[hv] = neg;
943 ++NegativeCount;
944 }
945
946 /*
947 * Dump the specified inode (file or directory)
948 *
949 * This function recurses via dump_dir_data().
950 */
951 static void
dump_tree(inode_entry_t * iscan,const char * dest,const char * remain,int depth,int path_depth,int isafile)952 dump_tree(inode_entry_t *iscan, const char *dest, const char *remain,
953 int depth, int path_depth, int isafile)
954 {
955 topology_entry_t *topo;
956 hammer2_inode_data_t *inode;
957 hammer2_inode_data_t inode_copy;
958 struct stat st;
959 size_t psize;
960 char *path;
961
962 /*
963 * Re-read the already-validated inode instead of saving it in
964 * memory from the media pass. Even though we already validated
965 * it, the content may have changed if scanning live media, so
966 * check against a simple crc we recorded earlier.
967 */
968 inode = hammer2_cache_read(iscan->data_off, &psize);
969 if (psize == 0)
970 return;
971 if (inode == NULL || psize != sizeof(*inode))
972 return;
973 if (iscan->inode_crc != hammer2_icrc32(inode, sizeof(*inode)))
974 return;
975 inode_copy = *inode;
976 inode = &inode_copy;
977
978 /*
979 * Try to limit potential infinite loops
980 */
981 if (depth > REPINODEDEPTH && iscan->encountered)
982 return;
983
984 /*
985 * Get rid of any dividing slashes
986 */
987 while (*remain == '/')
988 ++remain;
989
990 /*
991 * Create/lookup destination path (without the iterator), to acquire
992 * an iterator for different versions of the same file.
993 *
994 * Due to the COW mechanism, a lot of repeated snapshot-like
995 * directories may be encountered so we use the topology tree
996 * to weed out duplicates that attempt to use the same pathname.
997 *
998 * We also don't iterate copies of directories since this would
999 * create a major mess due to the many versions that might be
1000 * laying around. Directories use unextended names.
1001 */
1002 topo = enter_topology(dest);
1003 if (topology_check_duplicate_inode(topo, iscan))
1004 return;
1005
1006 switch(iscan->type) {
1007 case HAMMER2_OBJTYPE_DIRECTORY:
1008 /*
1009 * If we have exhausted the path and isafile is TRUE,
1010 * stop.
1011 */
1012 if (isafile && *remain == 0)
1013 return;
1014
1015 /*
1016 * Create / make usable the target directory. Note that
1017 * it might already exist.
1018 *
1019 * Do not do this for the destination base directory
1020 * (depth 1).
1021 */
1022 if (depth != 1) {
1023 if (mkdir(dest, 0755) == 0)
1024 iscan->encountered = 1;
1025 if (stat(dest, &st) == 0) {
1026 if (st.st_flags)
1027 chflags(dest, 0);
1028 if ((st.st_mode & 0700) != 0700)
1029 chmod(dest, 0755);
1030 }
1031 }
1032
1033 /*
1034 * Dump directory contents (scan the directory)
1035 */
1036 (void)dump_dir_data(dest, remain,
1037 &inode->u.blockset.blockref[0],
1038 HAMMER2_SET_COUNT, depth, path_depth + 1,
1039 isafile);
1040
1041 /*
1042 * Final adjustment to directory inode
1043 */
1044 if (depth != 1) {
1045 struct timeval tvs[2];
1046
1047 tvs[0].tv_sec = inode->meta.atime / 1000000;
1048 tvs[0].tv_usec = inode->meta.atime % 1000000;
1049 tvs[1].tv_sec = inode->meta.mtime / 1000000;
1050 tvs[1].tv_usec = inode->meta.mtime % 1000000;
1051
1052 if (lutimes(dest, tvs) < 0)
1053 perror("futimes");
1054 lchown(dest,
1055 hammer2_to_unix_xid(&inode->meta.uid),
1056 hammer2_to_unix_xid(&inode->meta.gid));
1057
1058 lchmod(dest, inode->meta.mode);
1059 lchflags(dest, inode->meta.uflags);
1060 }
1061 break;
1062 case HAMMER2_OBJTYPE_REGFILE:
1063 /*
1064 * If no more path to match, dump the file contents
1065 */
1066 if (*remain == 0) {
1067 asprintf(&path, "%s.%05ld", dest, topo->iterator);
1068 ++topo->iterator;
1069
1070 if (stat(path, &st) == 0) {
1071 if (st.st_flags)
1072 chflags(path, 0);
1073 if ((st.st_mode & 0600) != 0600)
1074 chmod(path, 0644);
1075 }
1076 iscan->encountered = 1;
1077 (void)dump_inum_file(iscan, inode, path);
1078 free(path);
1079 }
1080 break;
1081 case HAMMER2_OBJTYPE_SOFTLINK:
1082 /*
1083 * If no more path to match, dump the file contents
1084 */
1085 if (*remain == 0) {
1086 asprintf(&path, "%s.%05ld", dest, topo->iterator);
1087 ++topo->iterator;
1088
1089 if (stat(path, &st) == 0) {
1090 if (st.st_flags)
1091 chflags(path, 0);
1092 if ((st.st_mode & 0600) != 0600)
1093 chmod(path, 0644);
1094 }
1095 (void)dump_inum_softlink(inode, path);
1096 free(path);
1097 }
1098 break;
1099 }
1100 }
1101
1102 /*
1103 * [re]create a regular file and attempt to restore the originanl perms,
1104 * modes, flags, times, uid, and gid if successful.
1105 *
1106 * If the data block recursion fails the file will be renamed
1107 * .corrupted.
1108 */
1109 static int
dump_inum_file(inode_entry_t * iscan,hammer2_inode_data_t * inode,const char * path1)1110 dump_inum_file(inode_entry_t *iscan, hammer2_inode_data_t *inode,
1111 const char *path1)
1112 {
1113 char *path2;
1114 int wfd;
1115 int res;
1116
1117 /*
1118 * If this specific inode has already been generated, try to
1119 * hardlink it instead of regenerating the same file again.
1120 */
1121 if (iscan->link_file_path) {
1122 if (link(iscan->link_file_path, path1) == 0)
1123 return 1;
1124 chflags(iscan->link_file_path, 0);
1125 chmod(iscan->link_file_path, 0600);
1126 if (link(iscan->link_file_path, path1) == 0) {
1127 chmod(iscan->link_file_path, inode->meta.mode);
1128 chflags(iscan->link_file_path, inode->meta.uflags);
1129 return 1;
1130 }
1131 }
1132
1133 /*
1134 * Cleanup potential flags and modes to allow us to write out a
1135 * new file.
1136 */
1137 chflags(path1, 0);
1138 chmod(path1, 0600);
1139 wfd = open(path1, O_RDWR|O_CREAT|O_TRUNC, 0600);
1140
1141 if (inode->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
1142 /*
1143 * Direct data case
1144 */
1145 if (inode->meta.size > 0 &&
1146 inode->meta.size <= sizeof(inode->u.data))
1147 {
1148 write(wfd, inode->u.data, inode->meta.size);
1149 }
1150 res = 1;
1151 } else {
1152 /*
1153 * file content, indirect blockrefs
1154 */
1155 res = dump_file_data(wfd, inode->meta.size,
1156 &inode->u.blockset.blockref[0],
1157 HAMMER2_SET_COUNT);
1158 }
1159
1160 /*
1161 * On success, set perms, mtime, flags, etc
1162 * On failure, rename file to .corrupted
1163 */
1164 if (res) {
1165 struct timeval tvs[2];
1166
1167 tvs[0].tv_sec = inode->meta.atime / 1000000;
1168 tvs[0].tv_usec = inode->meta.atime % 1000000;
1169 tvs[1].tv_sec = inode->meta.mtime / 1000000;
1170 tvs[1].tv_usec = inode->meta.mtime % 1000000;
1171
1172 ftruncate(wfd, inode->meta.size);
1173 if (futimes(wfd, tvs) < 0)
1174 perror("futimes");
1175 fchown(wfd,
1176 hammer2_to_unix_xid(&inode->meta.uid),
1177 hammer2_to_unix_xid(&inode->meta.gid));
1178
1179 fchmod(wfd, inode->meta.mode);
1180 fchflags(wfd, inode->meta.uflags);
1181 iscan->link_file_path = strdup(path1);
1182 } else {
1183 struct timeval tvs[2];
1184
1185 tvs[0].tv_sec = inode->meta.atime / 1000000;
1186 tvs[0].tv_usec = inode->meta.atime % 1000000;
1187 tvs[1].tv_sec = inode->meta.mtime / 1000000;
1188 tvs[1].tv_usec = inode->meta.mtime % 1000000;
1189
1190 ftruncate(wfd, inode->meta.size);
1191 if (futimes(wfd, tvs) < 0)
1192 perror("futimes");
1193 fchown(wfd,
1194 hammer2_to_unix_xid(&inode->meta.uid),
1195 hammer2_to_unix_xid(&inode->meta.gid));
1196
1197 asprintf(&path2, "%s.corrupted", path1);
1198 rename(path1, path2);
1199 iscan->link_file_path = path2;
1200 }
1201
1202 /*
1203 * Cleanup
1204 */
1205 close(wfd);
1206
1207 return res;
1208 }
1209
1210 /*
1211 * TODO XXX
1212 */
1213 static int
dump_inum_softlink(hammer2_inode_data_t * inode __unused,const char * path __unused)1214 dump_inum_softlink(hammer2_inode_data_t *inode __unused,
1215 const char *path __unused)
1216 {
1217 return 0;
1218 }
1219
1220 /*
1221 * Scan the directory for a match against the next component in
1222 * the (remain) path.
1223 *
1224 * This function is part of the dump_tree() recursion mechanism.
1225 */
1226 static int
dump_dir_data(const char * dest,const char * remain,hammer2_blockref_t * base,int count,int depth,int path_depth,int isafile)1227 dump_dir_data(const char *dest, const char *remain,
1228 hammer2_blockref_t *base, int count,
1229 int depth, int path_depth, int isafile)
1230 {
1231 hammer2_media_data_t data;
1232 hammer2_volume_t *vol;
1233 hammer2_off_t poff;
1234 size_t psize;
1235 int res = 1;
1236 int rtmp;
1237 int n;
1238 size_t flen;
1239
1240 /*
1241 * Calculate length of next path component to match against.
1242 * A length of zero matches against the entire remaining
1243 * directory sub-tree.
1244 */
1245 for (flen = 0; remain[flen] && remain[flen] != '/'; ++flen)
1246 ;
1247
1248 /*
1249 * Scan the brefs associated with the directory
1250 */
1251 for (n = 0; n < count; ++n) {
1252 hammer2_blockref_t *bref;
1253 char filename_buf[1024+1];
1254 topology_entry_t *topo;
1255
1256 bref = &base[n];
1257
1258 if (bref->type == HAMMER2_BREF_TYPE_EMPTY)
1259 continue;
1260
1261 vol = NULL;
1262 poff = 0;
1263 psize = 0;
1264 if (bref->data_off & 0x1F) {
1265 vol = hammer2_get_volume(bref->data_off);
1266 if (vol == NULL) {
1267 res = 0;
1268 continue;
1269 }
1270 poff = (bref->data_off - vol->offset) & ~0x1FL;
1271 psize = 1 << (bref->data_off & 0x1F);
1272 if (psize > sizeof(data)) {
1273 res = 0;
1274 continue;
1275 }
1276 }
1277
1278 switch(bref->type) {
1279 case HAMMER2_BREF_TYPE_DIRENT:
1280 /*
1281 * Match the path element or match all directory
1282 * entries if we have exhausted (remain).
1283 *
1284 * Locate all matching inodes and continue the
1285 * traversal.
1286 *
1287 * Avoid traversal loops by recording path_depth
1288 * on the way down and clearing it on the way back
1289 * up.
1290 */
1291 if ((flen == 0 &&
1292 check_filename(bref, NULL, filename_buf,
1293 bref->embed.dirent.namlen)) ||
1294 (flen &&
1295 flen == bref->embed.dirent.namlen &&
1296 check_filename(bref, remain, filename_buf, flen)))
1297 {
1298 inode_entry_t *iscan;
1299 hammer2_key_t inum;
1300 char *path;
1301
1302 inum = bref->embed.dirent.inum;
1303
1304 asprintf(&path, "%s/%s", dest, filename_buf);
1305 iscan = find_first_inode(inum);
1306 while (iscan) {
1307 if (iscan->inum == inum &&
1308 iscan->loopcheck == 0 &&
1309 iscan->type ==
1310 bref->embed.dirent.type)
1311 {
1312 iscan->loopcheck = 1;
1313 dump_tree(iscan, path,
1314 remain + flen,
1315 depth + 1,
1316 path_depth,
1317 isafile);
1318
1319 /*
1320 * Clear loop check
1321 */
1322 iscan->loopcheck = 0;
1323 }
1324 iscan = iscan->next;
1325 }
1326 free(path);
1327 }
1328 break;
1329 case HAMMER2_BREF_TYPE_INDIRECT:
1330 if (psize == 0) {
1331 res = 0;
1332 break;
1333 }
1334
1335 /*
1336 * Due to COW operations, even if the inode is
1337 * replicated, some of the indirect brefs might
1338 * still be shared and allow us to reject duplicate
1339 * scans.
1340 */
1341 topo = enter_topology(dest);
1342 if (topology_check_duplicate_indirect(topo, bref))
1343 break;
1344
1345 if (pread(vol->fd, &data, psize, poff) !=
1346 (ssize_t)psize)
1347 {
1348 res = 0;
1349 break;
1350 }
1351
1352 if (validate_crc(bref, &data, psize) == 0) {
1353 res = 0;
1354 break;
1355 }
1356
1357 rtmp = dump_dir_data(dest, remain,
1358 &data.npdata[0],
1359 psize / sizeof(hammer2_blockref_t),
1360 depth, path_depth, isafile);
1361 if (res)
1362 res = rtmp;
1363 break;
1364 }
1365 }
1366 return res;
1367 }
1368
1369 /*
1370 * Dumps the data records for an inode to the target file (wfd), returns
1371 * TRUE on success, FALSE if corruption was detected.
1372 */
1373 static int
dump_file_data(int wfd,hammer2_off_t fsize,hammer2_blockref_t * base,int count)1374 dump_file_data(int wfd, hammer2_off_t fsize,
1375 hammer2_blockref_t *base, int count)
1376 {
1377 hammer2_media_data_t data;
1378 hammer2_blockref_t *bref;
1379 hammer2_volume_t *vol;
1380 hammer2_off_t poff;
1381 hammer2_off_t psize;
1382 hammer2_off_t nsize;
1383 int res = 1;
1384 int rtmp;
1385 int n;
1386
1387 for (n = 0; n < count; ++n) {
1388 char *dptr;
1389 int res2;
1390
1391 bref = &base[n];
1392
1393 if (bref->type == HAMMER2_BREF_TYPE_EMPTY ||
1394 bref->data_off == 0)
1395 {
1396 continue;
1397 }
1398 vol = hammer2_get_volume(bref->data_off);
1399 if (vol == NULL)
1400 continue;
1401 if (bref->type == HAMMER2_BREF_TYPE_EMPTY ||
1402 bref->data_off == 0)
1403 {
1404 continue;
1405 }
1406
1407 poff = (bref->data_off - vol->offset) & ~0x1FL;
1408 psize = 1 << (bref->data_off & 0x1F);
1409 if (psize > sizeof(data)) {
1410 res = 0;
1411 continue;
1412 }
1413
1414 if (pread(vol->fd, &data, psize, poff) != (ssize_t)psize) {
1415 res = 0;
1416 continue;
1417 }
1418
1419 if (validate_crc(bref, &data, psize) == 0) {
1420 res = 0;
1421 continue;
1422 }
1423
1424 switch(bref->type) {
1425 case HAMMER2_BREF_TYPE_DATA:
1426 dptr = (void *)&data;
1427
1428 nsize = 1L << bref->keybits;
1429 if (nsize > sizeof(data)) {
1430 res = 0;
1431 break;
1432 }
1433
1434 switch (HAMMER2_DEC_COMP(bref->methods)) {
1435 case HAMMER2_COMP_LZ4:
1436 dptr = hammer2_decompress_LZ4(dptr, psize,
1437 nsize, &res2);
1438 if (res)
1439 res = res2;
1440 psize = nsize;
1441 break;
1442 case HAMMER2_COMP_ZLIB:
1443 dptr = hammer2_decompress_ZLIB(dptr, psize,
1444 nsize, &res2);
1445 if (res)
1446 res = res2;
1447 psize = nsize;
1448 break;
1449 case HAMMER2_COMP_NONE:
1450 default:
1451 /* leave in current form */
1452 break;
1453 }
1454
1455 if (bref->key + psize > fsize)
1456 pwrite(wfd, dptr, fsize - bref->key,
1457 bref->key);
1458 else
1459 pwrite(wfd, dptr, psize, bref->key);
1460 break;
1461 case HAMMER2_BREF_TYPE_INDIRECT:
1462 rtmp = dump_file_data(wfd, fsize,
1463 &data.npdata[0],
1464 psize / sizeof(hammer2_blockref_t));
1465 if (res)
1466 res = rtmp;
1467 break;
1468 }
1469 }
1470 return res;
1471 }
1472
1473 /*
1474 * Validate the bref data target. The recovery scan will often attempt to
1475 * validate invalid elements, so don't spew errors to stderr on failure.
1476 */
1477 static int
validate_crc(hammer2_blockref_t * bref,void * data,size_t bytes)1478 validate_crc(hammer2_blockref_t *bref, void *data, size_t bytes)
1479 {
1480 uint32_t cv;
1481 uint64_t cv64;
1482 SHA256_CTX hash_ctx;
1483 int success = 0;
1484 union {
1485 uint8_t digest[SHA256_DIGEST_LENGTH];
1486 uint64_t digest64[SHA256_DIGEST_LENGTH/8];
1487 } u;
1488
1489 switch(HAMMER2_DEC_CHECK(bref->methods)) {
1490 case HAMMER2_CHECK_NONE:
1491 if (StrictMode == 0)
1492 success = 1;
1493 break;
1494 case HAMMER2_CHECK_DISABLED:
1495 if (StrictMode == 0)
1496 success = 1;
1497 break;
1498 case HAMMER2_CHECK_ISCSI32:
1499 cv = hammer2_icrc32(data, bytes);
1500 if (bref->check.iscsi32.value == cv) {
1501 success = 1;
1502 }
1503 break;
1504 case HAMMER2_CHECK_XXHASH64:
1505 cv64 = XXH64(data, bytes, XXH_HAMMER2_SEED);
1506 if (bref->check.xxhash64.value == cv64) {
1507 success = 1;
1508 }
1509 break;
1510 case HAMMER2_CHECK_SHA192:
1511 SHA256_Init(&hash_ctx);
1512 SHA256_Update(&hash_ctx, data, bytes);
1513 SHA256_Final(u.digest, &hash_ctx);
1514 u.digest64[2] ^= u.digest64[3];
1515 if (memcmp(u.digest, bref->check.sha192.data,
1516 sizeof(bref->check.sha192.data)) == 0)
1517 {
1518 success = 1;
1519 }
1520 break;
1521 case HAMMER2_CHECK_FREEMAP:
1522 cv = hammer2_icrc32(data, bytes);
1523 if (bref->check.freemap.icrc32 == cv) {
1524 success = 1;
1525 }
1526 break;
1527 }
1528 return success;
1529 }
1530
1531 /*
1532 * Convert a hammer2 uuid to a uid or gid.
1533 */
1534 static uint32_t
hammer2_to_unix_xid(const uuid_t * uuid)1535 hammer2_to_unix_xid(const uuid_t *uuid)
1536 {
1537 return(*(const uint32_t *)&uuid->node[2]);
1538 }
1539
1540 /*
1541 * Read from disk image, with caching to improve performance.
1542 *
1543 * Use a very simple LRU algo with 16 entries, linearly checked.
1544 */
1545 #define SDCCOUNT 16
1546 #define SDCMASK (SDCCOUNT - 1)
1547
1548 typedef struct sdccache {
1549 char buf[HAMMER2_PBUFSIZE];
1550 hammer2_off_t offset;
1551 hammer2_volume_t *vol;
1552 int64_t last;
1553 } sdccache_t;
1554
1555 static sdccache_t SDCCache[SDCCOUNT];
1556 static int64_t SDCLast;
1557
1558 static void *
hammer2_cache_read(hammer2_off_t data_off,size_t * bytesp)1559 hammer2_cache_read(hammer2_off_t data_off, size_t *bytesp)
1560 {
1561 hammer2_off_t poff;
1562 hammer2_off_t pbase;
1563 hammer2_volume_t *vol;
1564 sdccache_t *sdc;
1565 sdccache_t *sdc_worst;
1566 int i;
1567
1568 *bytesp = 0;
1569
1570 /*
1571 * Translate logical offset to volume and physical offset.
1572 * Return NULL with *bytesp set to 0 to indicate pre-I/O
1573 * sanity check failure.
1574 */
1575 vol = hammer2_get_volume(data_off);
1576 if (vol == NULL)
1577 return NULL;
1578 poff = (data_off - vol->offset) & ~0x1FL;
1579 *bytesp = 1 << (data_off & 0x1F);
1580
1581 pbase = poff & ~HAMMER2_PBUFMASK64;
1582
1583 /*
1584 * Must not straddle two full-sized hammer2 blocks
1585 */
1586 if ((poff ^ (poff + *bytesp - 1)) & ~HAMMER2_PBUFMASK64) {
1587 *bytesp = 0;
1588 return NULL;
1589 }
1590
1591 /*
1592 * I/Os are powers of 2 in size and must be aligned to at least
1593 * the I/O size.
1594 */
1595 if (poff & ~0x1FL & (*bytesp - 1)) {
1596 *bytesp = 0;
1597 return NULL;
1598 }
1599
1600 /*
1601 * LRU match lookup
1602 */
1603 sdc_worst = NULL;
1604 for (i = 0; i < SDCCOUNT; ++i) {
1605 sdc = &SDCCache[i];
1606
1607 if (sdc->vol == vol && sdc->offset == pbase) {
1608 sdc->last = ++SDCLast;
1609 return (&sdc->buf[poff - pbase]);
1610 }
1611 if (sdc_worst == NULL || sdc_worst->last > sdc->last)
1612 sdc_worst = sdc;
1613 }
1614
1615 /*
1616 * Fallback to I/O if not found, using oldest entry
1617 *
1618 * On failure we leave (*bytesp) intact to indicate that an I/O
1619 * was attempted.
1620 */
1621 sdc = sdc_worst;
1622 sdc->vol = vol;
1623 sdc->offset = pbase;
1624 sdc->last = ++SDCLast;
1625
1626 if (pread(vol->fd, sdc->buf, HAMMER2_PBUFSIZE, pbase) !=
1627 HAMMER2_PBUFSIZE)
1628 {
1629 sdc->offset = (hammer2_off_t)-1;
1630 sdc->last = 0;
1631 return NULL;
1632 }
1633 return (&sdc->buf[poff - pbase]);
1634 }
1635