xref: /dragonfly/sbin/hammer2/cmd_recover.c (revision 556932ec)
1 /*
2  * Copyright (c) 2023 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 /*
36  * hammer2 recover <devpath> <path> <destdir>
37  *
38  * Recover files from corrupted media, recover deleted files from good
39  * media, generally ignoring the data structure topology outside of the
40  * structures that hang off of inodes and directory entries.  Files are
41  * validated during recovery and renamed to .corrupted when a version of
42  * a file cannot be completely recovered.
43  *
44  * This is a "try to get everything we can out of the filesystem"
45  * directive when you've done something terrible to the filesystem.
46  * The resulting <destdir> tree cannot directly replace the lost topology
47  * as many versions of the same file will be present, so filenames are
48  * suffixed.
49  *
50  * <path> may be a relative file, directory, directory path, or absolute
51  * (absolute is relative to the mount) file or directory path.
52  *
53  * For example, "hammer2 recover /dev/da0s1d /home/charlie /tmp/" will
54  * recover all possible versions of the /home/charlie sub-tree.  If
55  * you said "home/charlie" instead, then it would recover the same but
56  * also include any sub-directory paths that match home/charlie, not
57  * just root paths.  If you want a specific file, then e.g. ".cshrc"
58  * would recover every single .cshrc that can be found on the media.
59  *
60  * The command checks ALL PFSs and snapshots.  Redundant files with the same
61  * path are ignored.  You basically get everything that can possibly be
62  * recovered from the media.
63  */
64 #include "hammer2.h"
65 
66 #define INUM_HSIZE	(1024*1024)
67 #define INUM_HMASK	(INUM_HSIZE - 1)
68 
69 #include <openssl/sha.h>
70 
71 typedef struct dirent_entry {
72 	struct dirent_entry *next;
73 	char		*filename;
74 	size_t		len;
75 	hammer2_key_t	inum;
76 	hammer2_off_t	bref_off;
77 } dirent_entry_t;
78 
79 typedef struct inode_entry {
80 	struct inode_entry *next;
81 	struct inode_entry *first_parent;
82 	dirent_entry_t	*first_dirent;
83 	hammer2_off_t	data_off;
84 	hammer2_key_t	inum;
85 	hammer2_inode_data_t inode;
86 	long		encounters;
87 	char		*link_file_path;
88 	int		path_depth;	/* non-zero already visited at N */
89 } inode_entry_t;
90 
91 typedef struct topology_inode {
92 	struct topology_inode *next;
93 	struct inode_entry *inode;
94 } topology_inode_t;
95 
96 typedef struct topology_bref {
97 	struct topology_bref *next;
98 	hammer2_off_t	data_off;
99 } topology_bref_t;
100 
101 typedef struct topology_entry {
102 	struct topology_entry *next;
103 	char		*path;
104 	long		iterator;
105 	topology_inode_t *inodes;
106 	topology_bref_t  *brefs;
107 } topology_entry_t;
108 
109 static dirent_entry_t **DirHash;
110 static inode_entry_t **InodeHash;
111 static topology_entry_t **TopologyHash;
112 
113 /*static void resolve_topology(void);*/
114 static int check_filename(hammer2_blockref_t *bref,
115 			const char *filename, char *buf, size_t flen);
116 //static void enter_dirent(hammer2_blockref_t *bref, hammer2_off_t loff,
117 //			const char *filename, size_t flen);
118 static topology_entry_t *enter_topology(const char *path);
119 static int topology_check_duplicate_inode(topology_entry_t *topo,
120 			inode_entry_t *iscan);
121 static int topology_check_duplicate_indirect(topology_entry_t *topo,
122 			hammer2_blockref_t *bref);
123 static void enter_inode(hammer2_blockref_t *bref);
124 static void enter_inode_untested(hammer2_inode_data_t *ip, hammer2_off_t loff);
125 static inode_entry_t *find_first_inode(hammer2_key_t inum);
126 /*static dirent_entry_t *find_first_dirent(hammer2_key_t inum);*/
127 static void dump_tree(inode_entry_t *iscan, const char *dest,
128 			const char *remain, int depth, int path_depth,
129 			int isafile);
130 static int dump_inum_file(inode_entry_t *iscan, const char *path);
131 static int dump_inum_softlink(hammer2_inode_data_t *inode, const char *path);
132 static int dump_dir_data(const char *dest, const char *remain,
133 			hammer2_blockref_t *base, int count,
134 			int depth, int path_depth, int isafile);
135 static int dump_file_data(int wfd, hammer2_off_t fsize,
136 			hammer2_blockref_t *bref, int count);
137 static int validate_crc(hammer2_blockref_t *bref, void *data, size_t bytes);
138 static uint32_t hammer2_to_unix_xid(const uuid_t *uuid);
139 
140 static long InodeCount;
141 static long MediaBytes;
142 static int StrictMode = 1;
143 
144 #define INODES_PER_BLOCK	\
145 	(sizeof(union hammer2_media_data) / sizeof(hammer2_inode_data_t))
146 #define REPINODEDEPTH		256
147 
148 /*
149  * Recover the specified file.
150  *
151  * Basically do a raw scan of the drive image looking for directory entries
152  * and inodes.  Index all inodes found, including copies, and filter
153  * directory entries for the requested filename to locate inode numbers.
154  *
155  * All copies that are located are written to destdir with a suffix .00001,
156  * .00002, etc.
157  */
158 int
159 cmd_recover(const char *devpath, const char *pathname,
160 	    const char *destdir, int strict, int isafile)
161 {
162 	hammer2_media_data_t data;
163 	hammer2_volume_t *vol;
164 	hammer2_off_t loff;
165 	hammer2_off_t poff;
166 	size_t i;
167 
168 	StrictMode = strict;
169 	hammer2_init_volumes(devpath, 1);
170 	DirHash = calloc(INUM_HSIZE, sizeof(dirent_entry_t *));
171 	InodeHash = calloc(INUM_HSIZE, sizeof(inode_entry_t *));
172 	TopologyHash = calloc(INUM_HSIZE, sizeof(topology_entry_t *));
173 
174 	/*
175 	 * Media Pass
176 	 *
177 	 * Look for blockrefs that point to inodes.  The blockrefs could
178 	 * be bogus since we aren't validating them, but the combination
179 	 * of a CRC that matches the inode content is fairly robust in
180 	 * finding actual inodes.
181 	 *
182 	 * We also enter unvalidated inodes for inode #1 (PFS roots),
183 	 * because there might not be any blockrefs pointing to some of
184 	 * them.  We need these to be able to locate directory entries
185 	 * under the roots.
186 	 *
187 	 * At the moment we do not try to enter unvalidated directory
188 	 * entries, since this will result in a massive number of false
189 	 * hits
190 	 */
191 	printf("MEDIA PASS\n");
192 
193 	loff = 0;
194 	while ((vol = hammer2_get_volume(loff)) != NULL) {
195 		int fd;
196 		int xdisp;
197 		hammer2_off_t vol_size;
198 
199 		fd = vol->fd;
200 		poff = loff - vol->offset;
201 		vol_size = lseek(fd, 0L, SEEK_END);
202 		xdisp = 0;
203 
204 		while (poff < vol->size) {
205 			if (pread(fd, &data, sizeof(data), poff) !=
206 			    sizeof(data))
207 			{
208 				/* try to skip possible I/O error */
209 				poff += sizeof(data);
210 				continue;
211 			}
212 			for (i = 0; i < HAMMER2_IND_COUNT_MAX; ++i) {
213 				hammer2_blockref_t *bref;
214 				// char filename_buf[1024+1];
215 
216 				bref = &data.npdata[i];
217 
218 				/*
219 				 * Found a possible inode
220 				 */
221 				switch(bref->type) {
222 				case HAMMER2_BREF_TYPE_INODE:
223 					enter_inode(bref);
224 					break;
225 				case HAMMER2_BREF_TYPE_DIRENT:
226 					/*
227 					 * Go overboard and try to index
228 					 * anything that looks like a
229 					 * directory entry.  This might find
230 					 * entries whos inodes are no longer
231 					 * available, but will also generate
232 					 * a lot of false files.
233 					 */
234 #if 0
235 					if (filename &&
236 					    flen != bref->embed.dirent.namlen)
237 					{
238 						/* single-file shortcut */
239 						break;
240 					}
241 					if (check_filename(bref, filename,
242 						   filename_buf,
243 						   bref->embed.dirent.namlen))
244 					{
245 						enter_dirent(bref,
246 						    poff + vol->offset +
247 						    (i *
248 						    sizeof(hammer2_blockref_t)),
249 						    filename_buf,
250 						    bref->embed.dirent.namlen);
251 					}
252 #endif
253 					break;
254 				default:
255 					break;
256 				}
257 			}
258 
259 			/*
260 			 * Look for possible root inodes.  We generally can't
261 			 * find these by finding BREFs pointing to them because
262 			 * the BREFs often hang off the volume header.
263 			 *
264 			 * These "inodes" could be seriously corrupt, but if
265 			 * the bref tree is intact that is what we need to
266 			 * get top-level directory entries.
267 			 */
268 			for (i = 0; i < INODES_PER_BLOCK; ++i) {
269 				hammer2_inode_data_t *ip;
270 
271 				ip = (void *)(data.buf + i * sizeof(*ip));
272 				if (ip->meta.inum == 1 &&
273 				    ip->meta.iparent == 0 &&
274 				    ip->meta.type ==
275 					HAMMER2_OBJTYPE_DIRECTORY &&
276 				    ip->meta.op_flags & HAMMER2_OPFLAG_PFSROOT)
277 				{
278 					enter_inode_untested(ip,
279 						poff + vol->offset +
280 						(i * sizeof(*ip)));
281 				}
282 			}
283 			poff += sizeof(data);
284 
285 			MediaBytes += sizeof(data);
286 
287 			/*
288 			 * Update progress
289 			 */
290 			if (QuietOpt == 0 &&
291 			    (++xdisp == 32 || poff == vol->size - sizeof(data)))
292 			{
293 				xdisp = 0;
294 				printf("%ld inodes scanned, "
295 				       "media %6.2f/%-3.2fG\r",
296 					InodeCount,
297 					MediaBytes / 1e9,
298 					vol_size / 1e9);
299 				fflush(stdout);
300 			}
301 		}
302 		loff = vol->offset + vol->size;
303 	}
304 
305 	/*
306 	 * Restoration Pass
307 	 *
308 	 * Run through available directory inodes, which allows us to locate
309 	 * and validate (crc check) their directory entry blockrefs and
310 	 * construct absolute or relative paths through a recursion.
311 	 *
312 	 * When an absolute path is obtained the search is anchored on a
313 	 * root inode.  When a relative path is obtained the search is
314 	 * unanchored and will find all matching sub-paths.  For example,
315 	 * if you look for ".cshrc" it will find ALL .cshrc's.  If you
316 	 * look for "fubar/.cshsrc" it will find ALL .cshrc's residing
317 	 * in a directory called fubar, however many there are.  But if
318 	 * you look for "/fubar/srcs" it will only find the sub-tree
319 	 * "/fubar/srcs" relative to PFS roots.
320 	 *
321 	 * We may not have indexed the PFS roots themselves, because they
322 	 * often hang off of the volume header and might not have COW'd
323 	 * references to them, so we use the "iparent" field in the inode
324 	 * to detect top-level directories under those roots.
325 	 */
326 	printf("\nInodes Scanned: %ld\n", InodeCount);
327 	printf("RESTORATION PASS\n");
328 
329 	{
330 		int abspath = 0;
331 		long root_count = 0;
332 		long root_max = 0;
333 
334 		/*
335 		 * Check for absolute path, else relative
336 		 */
337 		if (pathname[0] == '/') {
338 			abspath = 1;
339 			while (*pathname == '/')
340 				++pathname;
341 		}
342 
343 		/*
344 		 * Count root inodes
345 		 */
346 		{
347 			inode_entry_t *iscan;
348 
349 			for (iscan = InodeHash[1];
350 			     iscan;
351 			     iscan = iscan->next)
352 			{
353 				if (iscan->inum == 1)
354 					++root_max;
355 			}
356 		}
357 
358 		/*
359 		 * Run through all directory inodes to locate validated
360 		 * directory entries.  If an absolute path was specified
361 		 * we start at root inodes.
362 		 */
363 		for (i = 0; i < INUM_HSIZE; ++i) {
364 			inode_entry_t *iscan;
365 
366 			for (iscan = InodeHash[i];
367 			     iscan;
368 			     iscan = iscan->next)
369 			{
370 				/*
371 				 * Absolute paths always start at root inodes,
372 				 * otherwise we can start at any directory
373 				 * inode.
374 				 */
375 				if (abspath && iscan->inode.meta.inum != 1)
376 					continue;
377 				if (iscan->inode.meta.type !=
378 				    HAMMER2_OBJTYPE_DIRECTORY)
379 				{
380 					continue;
381 				}
382 
383 				/*
384 				 * Progress down root inodes can be slow,
385 				 * so print progress for each root inode.
386 				 */
387 				if (i == 1 && iscan->inum == 1 &&
388 				    QuietOpt == 0)
389 				{
390 					printf("scan root %p 0x%016jx "
391 					       "(count %ld/%ld)\r",
392 						iscan,
393 						iscan->data_off,
394 						++root_count, root_max);
395 					fflush(stdout);
396 				}
397 
398 				/*
399 				 * Primary match/recover recursion
400 				 */
401 				dump_tree(iscan, destdir, pathname,
402 					  1, 1, isafile);
403 			}
404 			if (QuietOpt == 0 && (i & 31) == 31) {
405 				if (i == 31)
406 					printf("\n");
407 				printf("Progress %zd/%d\r", i, INUM_HSIZE);
408 				fflush(stdout);
409 			}
410 		}
411 		printf("\n");
412 	}
413 
414 	printf("CLEANUP\n");
415 
416 	/*
417 	 * Cleanup
418 	 */
419 	hammer2_cleanup_volumes();
420 
421 	for (i = 0; i < INUM_HSIZE; ++i) {
422 		dirent_entry_t *dscan;
423 		inode_entry_t *iscan;
424 		topology_entry_t *top_scan;
425 		topology_inode_t *top_ino;
426 		topology_bref_t *top_bref;
427 
428 		while ((dscan = DirHash[i]) != NULL) {
429 			DirHash[i] = dscan->next;
430 			free(dscan->filename);
431 			free(dscan);
432 		}
433 		while ((iscan = InodeHash[i]) != NULL) {
434 			InodeHash[i] = iscan->next;
435 			free(iscan);
436 		}
437 		while ((top_scan = TopologyHash[i]) != NULL) {
438 			TopologyHash[i] = top_scan->next;
439 			while ((top_ino = top_scan->inodes) != NULL) {
440 				top_scan->inodes = top_ino->next;
441 				free(top_ino);
442 			}
443 			while ((top_bref = top_scan->brefs) != NULL) {
444 				top_scan->brefs = top_bref->next;
445 				free(top_bref);
446 			}
447 			free(top_scan->path);
448 			free(top_scan);
449 		}
450 	}
451 	free(TopologyHash);
452 	free(DirHash);
453 	free(InodeHash);
454 
455 	return 0;
456 }
457 
458 /*
459  * Check for a matching filename, Directory entries can directly-embed
460  * filenames <= 64 bytes.  Otherwise the directory entry has a data
461  * reference to the location of the filename.
462  *
463  * If filename is NULL, check for a valid filename, and copy it into buf.
464  */
465 static int
466 check_filename(hammer2_blockref_t *bref, const char *filename, char *buf,
467 	       size_t flen)
468 {
469 	/* filename too long */
470 	if (flen > 1024)
471 		return 0;
472 
473 	if (flen <= 64) {
474 		/*
475 		 * Filename is embedded in bref
476 		 */
477 		if (buf)
478 			bcopy(bref->check.buf, buf, flen);
479 		buf[flen] = 0;
480 		if (filename == NULL)
481 			return 1;
482 		if (bcmp(filename, bref->check.buf, flen) == 0)
483 			return 1;
484 	} else {
485 		/*
486 		 * Filename requires media access
487 		 */
488 		hammer2_media_data_t data;
489 		hammer2_volume_t *vol;
490 		hammer2_off_t poff;
491 		hammer2_off_t psize;
492 		int vfd;
493 
494 		/*
495 		 * bref must represent a data reference to a 1KB block or
496 		 * smaller.
497 		 */
498 		if ((bref->data_off & 0x1F) == 0 ||
499 		    (bref->data_off & 0x1F) > 10)
500 		{
501 			return 0;
502 		}
503 
504 		/*
505 		 * Indirect block containing filename must be large enough
506 		 * to contain the filename.
507 		 */
508 		psize = 1 << (bref->data_off & 0x1F);
509 		if (flen > psize)
510 			return 0;
511 
512 		/*
513 		 * In strict mode we disallow bref's set to HAMMER2_CHECK_NONE
514 		 * or HAMMER2_CHECK_DISABLED.  Do this check before burning
515 		 * time on an I/O.
516 		 */
517 		if (StrictMode) {
518 			if (HAMMER2_DEC_CHECK(bref->methods) ==
519 				HAMMER2_CHECK_NONE ||
520 			    HAMMER2_DEC_CHECK(bref->methods) ==
521 				HAMMER2_CHECK_DISABLED)
522 			{
523 				return 0;
524 			}
525 		}
526 
527 		/*
528 		 * Read the data, check CRC and such
529 		 */
530 		vol = hammer2_get_volume(bref->data_off);
531 		if (vol == NULL)
532 			return 0;
533 
534 		vfd = vol->fd;
535 		poff = (bref->data_off - vol->offset) & ~0x1FL;
536 		if (pread(vfd, &data, psize, poff) != (ssize_t)psize)
537 			return 0;
538 
539 		if (validate_crc(bref, &data, psize) == 0)
540 			return 0;
541 
542 		if (buf)
543 			bcopy(data.buf, buf, flen);
544 		buf[flen] = 0;
545 		if (filename == NULL)
546 			return 1;
547 		if (bcmp(filename, data.buf, flen) == 0)
548 			return 1;
549 	}
550 	return 0;
551 }
552 
553 #if 0
554 static void
555 enter_dirent(hammer2_blockref_t *bref, hammer2_off_t loff,
556 	     const char *filename, size_t flen)
557 {
558 	hammer2_key_t inum = bref->embed.dirent.inum;
559 	dirent_entry_t *entry;
560 	uint32_t hv = (inum ^ (inum >> 16)) & INUM_HMASK;
561 
562 	for (entry = DirHash[hv]; entry; entry = entry->next) {
563 		if (entry->inum == inum &&
564 		    entry->len == flen &&
565 		    bcmp(entry->filename, filename, flen) == 0)
566 		{
567 			return;
568 		}
569 	}
570 	entry = malloc(sizeof(*entry));
571 	bzero(entry, sizeof(*entry));
572 	entry->inum = inum;
573 	entry->next = DirHash[hv];
574 	entry->filename = malloc(flen + 1);
575 	entry->len = flen;
576 	entry->bref_off = loff;
577 	bcopy(filename, entry->filename, flen);
578 	entry->filename[flen] = 0;
579 	DirHash[hv] = entry;
580 }
581 #endif
582 
583 /*
584  * Topology duplicate scan avoidance helpers.  We associate inodes and
585  * indirect block data offsets, allowing us to avoid re-scanning any
586  * duplicates that we see.  And there will be many due to how the COW
587  * process occurs.
588  *
589  * For example, when a large directory is modified the content update to
590  * the directory entries will cause the directory inode to be COWd, along
591  * with whatever is holding the bref(s) blocks that have undergone
592  * adjustment.  More likely than not, there will be MANY shared indirect
593  * blocks.
594  */
595 static topology_entry_t *
596 enter_topology(const char *path)
597 {
598 	topology_entry_t *topo;
599 	uint32_t hv = 0;
600 	size_t i;
601 
602 	for (i = 0; path[i]; ++i)
603 		hv = (hv << 5) ^ path[i] ^ (hv >> 24);
604 	hv = (hv ^ (hv >> 16)) & INUM_HMASK;
605 	for (topo = TopologyHash[hv]; topo; topo = topo->next) {
606 		if (strcmp(path, topo->path) == 0)
607 			return topo;
608 	}
609 	topo = malloc(sizeof(*topo));
610 	bzero(topo, sizeof(*topo));
611 
612 	topo->next = TopologyHash[hv];
613 	TopologyHash[hv] = topo;
614 	topo->path = strdup(path);
615 	topo->iterator = 1;
616 
617 	return topo;
618 }
619 
620 static int
621 topology_check_duplicate_inode(topology_entry_t *topo, inode_entry_t *inode)
622 {
623 	topology_inode_t *scan;
624 
625 	for (scan = topo->inodes; scan; scan = scan->next) {
626 		if (scan->inode == inode)
627 			return 1;
628 	}
629 	scan = malloc(sizeof(*scan));
630 	bzero(scan, sizeof(*scan));
631 	scan->inode = inode;
632 	scan->next = topo->inodes;
633 	topo->inodes = scan;
634 
635 	return 0;
636 }
637 
638 static int
639 topology_check_duplicate_indirect(topology_entry_t *topo,
640 				  hammer2_blockref_t *bref)
641 {
642 	topology_bref_t *scan;
643 
644 	for (scan = topo->brefs; scan; scan = scan->next) {
645 		if (scan->data_off == bref->data_off) {
646 			return 1;
647 		}
648 	}
649 	scan = malloc(sizeof(*scan));
650 	bzero(scan, sizeof(*scan));
651 	scan->data_off = bref->data_off;
652 	scan->next = topo->brefs;
653 	topo->brefs = scan;
654 
655 	return 0;
656 }
657 
658 /*
659  * Valid and record an inode found on media.  There can be many versions
660  * of the same inode number present on the media.
661  */
662 static void
663 enter_inode(hammer2_blockref_t *bref)
664 {
665 	uint32_t hv = (bref->key ^ (bref->key >> 16)) & INUM_HMASK;
666 	inode_entry_t *scan;
667 	hammer2_volume_t *vol;
668 	hammer2_off_t poff;
669 	hammer2_inode_data_t inode;
670 	int vfd;
671 
672 	/*
673 	 * Ignore duplicate inodes.
674 	 */
675 	for (scan = InodeHash[hv]; scan; scan = scan->next) {
676 		if (bref->key == scan->inum &&
677 		    bref->data_off == scan->data_off)
678 		{
679 			return;
680 		}
681 	}
682 
683 	/*
684 	 * Validate the potential blockref.  Note that this might not be a
685 	 * real blockref.  Don't trust anything, really.
686 	 */
687 	if ((1 << (bref->data_off & 0x1F)) != sizeof(inode))
688 		return;
689 	if ((bref->data_off & ~0x1FL & (sizeof(inode) - 1)) != 0)
690 		return;
691 	vol = hammer2_get_volume(bref->data_off);
692 	if (vol == NULL)
693 		return;
694 
695 	vfd = vol->fd;
696 	poff = (bref->data_off - vol->offset) & ~0x1FL;
697 	if (pread(vfd, &inode, sizeof(inode), poff) != sizeof(inode))
698 		return;
699 
700 	/*
701 	 * The blockref looks ok but the real test is whether the
702 	 * inode data it references passes the CRC check.  If it
703 	 * does, it is highly likely that we have a valid inode.
704 	 */
705 	if (validate_crc(bref, &inode, sizeof(inode)) == 0)
706 		return;
707 	if (inode.meta.inum != bref->key)
708 		return;
709 
710 	scan = malloc(sizeof(*scan));
711 	bzero(scan, sizeof(*scan));
712 
713 	scan->inum = bref->key;
714 	scan->data_off = bref->data_off;
715 	scan->inode = inode;
716 	scan->next = InodeHash[hv];
717 	InodeHash[hv] = scan;
718 
719 	++InodeCount;
720 }
721 
722 /*
723  * This is used to enter possible root inodes.  Root inodes typically hang
724  * off the volume root and thus there might not be a bref reference to the
725  * many old copies of root inodes sitting around on the media.  Without a
726  * bref we can't really validate that the content is ok.  But we need
727  * these inodes as part of our path searches.
728  */
729 static void
730 enter_inode_untested(hammer2_inode_data_t *ip, hammer2_off_t loff)
731 {
732 	uint32_t hv = (ip->meta.inum ^ (ip->meta.inum >> 16)) & INUM_HMASK;
733 	inode_entry_t *scan;
734 
735 	for (scan = InodeHash[hv]; scan; scan = scan->next) {
736 		if (ip->meta.inum == scan->inum &&
737 		    loff == scan->data_off)
738 		{
739 			return;
740 		}
741 	}
742 
743 	scan = malloc(sizeof(*scan));
744 	bzero(scan, sizeof(*scan));
745 
746 	scan->inum = ip->meta.inum;
747 	scan->data_off = loff;
748 	scan->inode = *ip;
749 	scan->next = InodeHash[hv];
750 	InodeHash[hv] = scan;
751 
752 	++InodeCount;
753 }
754 
755 static inode_entry_t *
756 find_first_inode(hammer2_key_t inum)
757 {
758 	inode_entry_t *entry;
759 	uint32_t hv = (inum ^ (inum >> 16)) & INUM_HMASK;
760 
761 	for (entry = InodeHash[hv]; entry; entry = entry->next) {
762 		if (entry->inum == inum)
763 			return entry;
764 	}
765 	return NULL;
766 }
767 
768 /*
769  * Dump the specified inode (file or directory)
770  *
771  * This function recurses via dump_dir_data().
772  */
773 static void
774 dump_tree(inode_entry_t *iscan, const char *dest, const char *remain,
775 	  int depth, int path_depth, int isafile)
776 {
777 	topology_entry_t *topo;
778 	struct stat st;
779 	char *path;
780 
781 	/*
782 	 * Set path depth.  This is typically cleared on the way back
783 	 * up and is primarily used to prevent recursion loops.
784 	 */
785 	iscan->path_depth = path_depth;
786 
787 	/*
788 	 * Try to limit potential infinite loops
789 	 */
790 	if (depth > REPINODEDEPTH && iscan->encounters)
791 		return;
792 
793 	/*
794 	 * Get rid of any dividing slashes
795 	 */
796 	while (*remain == '/')
797 		++remain;
798 
799 	/*
800 	 * Create/lookup destination path (without the iterator), to acquire
801 	 * an iterator for different versions of the same file.
802 	 *
803 	 * Due to the COW mechanism, a lot of repeated snapshot-like
804 	 * directories may be encountered so we use the topology tree
805 	 * to weed out duplicates that attempt to use the same pathname.
806 	 *
807 	 * We also don't iterate copies of directories since this would
808 	 * create a major mess due to the many versions that might be
809 	 * laying around.  Directories use unextended names.
810 	 */
811 	topo = enter_topology(dest);
812 	if (topology_check_duplicate_inode(topo, iscan))
813 		return;
814 
815 	switch(iscan->inode.meta.type) {
816 	case HAMMER2_OBJTYPE_DIRECTORY:
817 		/*
818 		 * If we have exhausted the path and isafile is TRUE,
819 		 * stop.
820 		 */
821 		if (isafile && *remain == 0)
822 			return;
823 
824 		/*
825 		 * Create / make usable the target directory.  Note that
826 		 * it might already exist.
827 		 *
828 		 * Do not do this for the destination base directory
829 		 * (depth 1).
830 		 */
831 		if (depth != 1) {
832 			if (mkdir(dest, 0755) == 0)
833 				++iscan->encounters;
834 			if (stat(dest, &st) == 0) {
835 				if (st.st_flags)
836 					chflags(dest, 0);
837 				if ((st.st_mode & 0700) != 0700)
838 					chmod(dest, 0755);
839 			}
840 		}
841 
842 		/*
843 		 * Dump directory contents (scan the directory)
844 		 */
845 		(void)dump_dir_data(dest, remain,
846 				    &iscan->inode.u.blockset.blockref[0],
847 				    HAMMER2_SET_COUNT, depth, path_depth + 1,
848 				    isafile);
849 
850 		/*
851 		 * Final adjustment to directory inode
852 		 */
853 		if (depth != 1) {
854 			struct timeval tvs[2];
855 
856 			tvs[0].tv_sec = iscan->inode.meta.atime / 1000000;
857 			tvs[0].tv_usec = iscan->inode.meta.atime % 1000000;
858 			tvs[1].tv_sec = iscan->inode.meta.mtime / 1000000;
859 			tvs[1].tv_usec = iscan->inode.meta.mtime % 1000000;
860 
861 			if (lutimes(dest, tvs) < 0)
862 				perror("futimes");
863 			lchown(dest,
864 			       hammer2_to_unix_xid(&iscan->inode.meta.uid),
865 			       hammer2_to_unix_xid(&iscan->inode.meta.gid));
866 
867 			lchmod(dest, iscan->inode.meta.mode);
868 			lchflags(dest, iscan->inode.meta.uflags);
869 		}
870 		break;
871 	case HAMMER2_OBJTYPE_REGFILE:
872 		/*
873 		 * If no more path to match, dump the file contents
874 		 */
875 		if (*remain == 0) {
876 			asprintf(&path, "%s.%05ld", dest, topo->iterator);
877 			++topo->iterator;
878 
879 			if (stat(path, &st) == 0) {
880 				if (st.st_flags)
881 					chflags(path, 0);
882 				if ((st.st_mode & 0600) != 0600)
883 					chmod(path, 0644);
884 			}
885 			++iscan->encounters;
886 			(void)dump_inum_file(iscan, path);
887 			free(path);
888 		}
889 		break;
890 	case HAMMER2_OBJTYPE_SOFTLINK:
891 		/*
892 		 * If no more path to match, dump the file contents
893 		 */
894 		if (*remain == 0) {
895 			asprintf(&path, "%s.%05ld", dest, topo->iterator);
896 			++topo->iterator;
897 
898 			if (stat(path, &st) == 0) {
899 				if (st.st_flags)
900 					chflags(path, 0);
901 				if ((st.st_mode & 0600) != 0600)
902 					chmod(path, 0644);
903 			}
904 			(void)dump_inum_softlink(&iscan->inode, path);
905 			free(path);
906 		}
907 		break;
908 	}
909 }
910 
911 /*
912  * [re]create a regular file and attempt to restore the originanl perms,
913  * modes, flags, times, uid, and gid if successful.
914  *
915  * If the data block recursion fails the file will be renamed
916  * .corrupted.
917  */
918 static int
919 dump_inum_file(inode_entry_t *iscan, const char *path1)
920 {
921 	hammer2_inode_data_t *inode = &iscan->inode;
922 	char *path2;
923 	int wfd;
924 	int res;
925 
926 	/*
927 	 * If this specific inode has already been generated, try to
928 	 * hardlink it instead of regenerating the same file again.
929 	 */
930 	if (iscan->link_file_path) {
931 		if (link(iscan->link_file_path, path1) == 0)
932 			return 1;
933 		chflags(iscan->link_file_path, 0);
934 		chmod(iscan->link_file_path, 0600);
935 		if (link(iscan->link_file_path, path1) == 0) {
936 			chmod(iscan->link_file_path, inode->meta.mode);
937 			chflags(iscan->link_file_path, inode->meta.uflags);
938 			return 1;
939 		}
940 	}
941 
942 	/*
943 	 * Cleanup potential flags and modes to allow us to write out a
944 	 * new file.
945 	 */
946 	chflags(path1, 0);
947 	chmod(path1, 0600);
948 	wfd = open(path1, O_RDWR|O_CREAT|O_TRUNC, 0600);
949 
950 	if (inode->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
951 		/*
952 		 * Direct data case
953 		 */
954 		if (inode->meta.size > 0 &&
955 		    inode->meta.size <= sizeof(inode->u.data))
956 		{
957 			write(wfd, inode->u.data, inode->meta.size);
958 		}
959 		res = 1;
960 	} else {
961 		/*
962 		 * file content, indirect blockrefs
963 		 */
964 		res = dump_file_data(wfd, inode->meta.size,
965 				     &inode->u.blockset.blockref[0],
966 				     HAMMER2_SET_COUNT);
967 	}
968 
969 	/*
970 	 * On success, set perms, mtime, flags, etc
971 	 * On failure, rename file to .corrupted
972 	 */
973 	if (res) {
974 		struct timeval tvs[2];
975 
976 		tvs[0].tv_sec = inode->meta.atime / 1000000;
977 		tvs[0].tv_usec = inode->meta.atime % 1000000;
978 		tvs[1].tv_sec = inode->meta.mtime / 1000000;
979 		tvs[1].tv_usec = inode->meta.mtime % 1000000;
980 
981 		ftruncate(wfd, inode->meta.size);
982 		if (futimes(wfd, tvs) < 0)
983 			perror("futimes");
984 		fchown(wfd,
985 		       hammer2_to_unix_xid(&inode->meta.uid),
986 		       hammer2_to_unix_xid(&inode->meta.gid));
987 
988 		fchmod(wfd, inode->meta.mode);
989 		fchflags(wfd, inode->meta.uflags);
990 		iscan->link_file_path = strdup(path1);
991 	} else {
992 		struct timeval tvs[2];
993 
994 		tvs[0].tv_sec = inode->meta.atime / 1000000;
995 		tvs[0].tv_usec = inode->meta.atime % 1000000;
996 		tvs[1].tv_sec = inode->meta.mtime / 1000000;
997 		tvs[1].tv_usec = inode->meta.mtime % 1000000;
998 
999 		ftruncate(wfd, inode->meta.size);
1000 		if (futimes(wfd, tvs) < 0)
1001 			perror("futimes");
1002 		fchown(wfd,
1003 		       hammer2_to_unix_xid(&inode->meta.uid),
1004 		       hammer2_to_unix_xid(&inode->meta.gid));
1005 
1006 		asprintf(&path2, "%s.corrupted", path1);
1007 		rename(path1, path2);
1008 		iscan->link_file_path = path2;
1009 	}
1010 
1011 	/*
1012 	 * Cleanup
1013 	 */
1014 	close(wfd);
1015 
1016 	return res;
1017 }
1018 
1019 /*
1020  * TODO XXX
1021  */
1022 static int
1023 dump_inum_softlink(hammer2_inode_data_t *inode __unused,
1024 		   const char *path __unused)
1025 {
1026 	return 0;
1027 }
1028 
1029 /*
1030  * Scan the directory for a match against the next component in
1031  * the (remain) path.
1032  *
1033  * This function is part of the dump_tree() recursion mechanism.
1034  */
1035 static int
1036 dump_dir_data(const char *dest, const char *remain,
1037 	      hammer2_blockref_t *base, int count,
1038 	      int depth, int path_depth, int isafile)
1039 {
1040 	hammer2_media_data_t data;
1041 	hammer2_volume_t *vol;
1042 	hammer2_off_t poff;
1043 	hammer2_off_t psize;
1044 	int res = 1;
1045 	int rtmp;
1046 	int n;
1047 	size_t flen;
1048 
1049 	/*
1050 	 * Calculate length of next path component to match against.
1051 	 * A length of zero matches against the entire remaining
1052 	 * directory sub-tree.
1053 	 */
1054 	for (flen = 0; remain[flen] && remain[flen] != '/'; ++flen)
1055 		;
1056 
1057 	/*
1058 	 * Scan the brefs associated with the directory
1059 	 */
1060 	for (n = 0; n < count; ++n) {
1061 		hammer2_blockref_t *bref;
1062 		char filename_buf[1024+1];
1063 		topology_entry_t *topo;
1064 
1065 		bref = &base[n];
1066 
1067 		if (bref->type == HAMMER2_BREF_TYPE_EMPTY)
1068 			continue;
1069 
1070 		vol = NULL;
1071 		poff = 0;
1072 		psize = 0;
1073 		if (bref->data_off & 0x1F) {
1074 			vol = hammer2_get_volume(bref->data_off);
1075 			if (vol == NULL) {
1076 				res = 0;
1077 				continue;
1078 			}
1079 			poff = (bref->data_off - vol->offset) & ~0x1FL;
1080 			psize = 1 << (bref->data_off & 0x1F);
1081 			if (psize > sizeof(data)) {
1082 				res = 0;
1083 				continue;
1084 			}
1085 		}
1086 
1087 		switch(bref->type) {
1088 		case HAMMER2_BREF_TYPE_DIRENT:
1089 			/*
1090 			 * Match the path element or match all directory
1091 			 * entries if we have exhausted (remain).
1092 			 *
1093 			 * Locate all matching inodes and continue the
1094 			 * traversal.
1095 			 *
1096 			 * Avoid traversal loops by recording path_depth
1097 			 * on the way down and clearing it on the way back
1098 			 * up.
1099 			 */
1100 			if ((flen == 0 &&
1101 			    check_filename(bref, NULL, filename_buf,
1102 					   bref->embed.dirent.namlen)) ||
1103 			    (flen &&
1104 			     flen == bref->embed.dirent.namlen &&
1105 			     check_filename(bref, remain, filename_buf, flen)))
1106 			{
1107 				inode_entry_t *iscan;
1108 				hammer2_key_t inum;
1109 				char *path;
1110 
1111 				inum = bref->embed.dirent.inum;
1112 
1113 				asprintf(&path, "%s/%s", dest, filename_buf);
1114 				iscan = find_first_inode(inum);
1115 				while (iscan) {
1116 					if (iscan->inum == inum &&
1117 					    iscan->path_depth == 0 &&
1118 					    iscan->inode.meta.type ==
1119 					    bref->embed.dirent.type)
1120 					{
1121 						dump_tree(iscan, path,
1122 							  remain + flen,
1123 							  depth + 1,
1124 							  path_depth,
1125 							  isafile);
1126 
1127 						/*
1128 						 * Clear loop check
1129 						 */
1130 						iscan->path_depth = 0;
1131 					}
1132 					iscan = iscan->next;
1133 				}
1134 				free(path);
1135 			}
1136 			break;
1137 		case HAMMER2_BREF_TYPE_INDIRECT:
1138 			if (psize == 0) {
1139 				res = 0;
1140 				break;
1141 			}
1142 
1143 			/*
1144 			 * Due to COW operations, even if the inode is
1145 			 * replicated, some of the indirect brefs might
1146 			 * still be shared and allow us to reject duplicate
1147 			 * scans.
1148 			 */
1149 			topo = enter_topology(dest);
1150 			if (topology_check_duplicate_indirect(topo, bref))
1151 				break;
1152 
1153 			if (pread(vol->fd, &data, psize, poff) !=
1154 			    (ssize_t)psize)
1155 			{
1156 				res = 0;
1157 				break;
1158 			}
1159 
1160 			if (validate_crc(bref, &data, psize) == 0) {
1161 				res = 0;
1162 				break;
1163 			}
1164 
1165 			rtmp = dump_dir_data(dest, remain,
1166 					  &data.npdata[0],
1167 					  psize / sizeof(hammer2_blockref_t),
1168 					  depth, path_depth, isafile);
1169 			if (res)
1170 				res = rtmp;
1171 			break;
1172 		}
1173 	}
1174 	return res;
1175 }
1176 
1177 /*
1178  * Dumps the data records for an inode to the target file (wfd), returns
1179  * TRUE on success, FALSE if corruption was detected.
1180  */
1181 static int
1182 dump_file_data(int wfd, hammer2_off_t fsize,
1183 	       hammer2_blockref_t *base, int count)
1184 {
1185 	hammer2_media_data_t data;
1186 	hammer2_blockref_t *bref;
1187 	hammer2_volume_t *vol;
1188 	hammer2_off_t poff;
1189 	hammer2_off_t psize;
1190 	hammer2_off_t nsize;
1191 	int res = 1;
1192 	int rtmp;
1193 	int n;
1194 
1195 	for (n = 0; n < count; ++n) {
1196 		char *dptr;
1197 		int res2;
1198 
1199 		bref = &base[n];
1200 
1201 		if (bref->type == HAMMER2_BREF_TYPE_EMPTY ||
1202 		    bref->data_off == 0)
1203 		{
1204 			continue;
1205 		}
1206 		vol = hammer2_get_volume(bref->data_off);
1207 		if (vol == NULL)
1208 			continue;
1209 		if (bref->type == HAMMER2_BREF_TYPE_EMPTY ||
1210 		    bref->data_off == 0)
1211 		{
1212 			continue;
1213 		}
1214 
1215 		poff = (bref->data_off - vol->offset) & ~0x1FL;
1216 		psize = 1 << (bref->data_off & 0x1F);
1217 		if (psize > sizeof(data)) {
1218 			res = 0;
1219 			continue;
1220 		}
1221 
1222 		if (pread(vol->fd, &data, psize, poff) != (ssize_t)psize) {
1223 			res = 0;
1224 			continue;
1225 		}
1226 
1227 		if (validate_crc(bref, &data, psize) == 0) {
1228 			res = 0;
1229 			continue;
1230 		}
1231 
1232 		switch(bref->type) {
1233 		case HAMMER2_BREF_TYPE_DATA:
1234 			dptr = (void *)&data;
1235 
1236 			nsize = 1L << bref->keybits;
1237 			if (nsize > sizeof(data)) {
1238 				res = 0;
1239 				break;
1240 			}
1241 
1242 			switch (HAMMER2_DEC_COMP(bref->methods)) {
1243 			case HAMMER2_COMP_LZ4:
1244 				dptr = hammer2_decompress_LZ4(dptr, psize,
1245 							      nsize, &res2);
1246 				if (res)
1247 					res = res2;
1248 				psize = nsize;
1249 				break;
1250 			case HAMMER2_COMP_ZLIB:
1251 				dptr = hammer2_decompress_ZLIB(dptr, psize,
1252 							       nsize, &res2);
1253 				if (res)
1254 					res = res2;
1255 				psize = nsize;
1256 				break;
1257 			case HAMMER2_COMP_NONE:
1258 			default:
1259 				/* leave in current form */
1260 				break;
1261 			}
1262 
1263 			if (bref->key + psize > fsize)
1264 				pwrite(wfd, dptr, fsize - bref->key,
1265 				       bref->key);
1266 			else
1267 				pwrite(wfd, dptr, psize, bref->key);
1268 			break;
1269 		case HAMMER2_BREF_TYPE_INDIRECT:
1270 			rtmp = dump_file_data(wfd, fsize,
1271 					  &data.npdata[0],
1272 					  psize / sizeof(hammer2_blockref_t));
1273 			if (res)
1274 				res = rtmp;
1275 			break;
1276 		}
1277 	}
1278 	return res;
1279 }
1280 
1281 /*
1282  * Validate the bref data target.  The recovery scan will often attempt to
1283  * validate invalid elements, so don't spew errors to stderr on failure.
1284  */
1285 static int
1286 validate_crc(hammer2_blockref_t *bref, void *data, size_t bytes)
1287 {
1288 	uint32_t cv;
1289 	uint64_t cv64;
1290 	SHA256_CTX hash_ctx;
1291 	int success = 0;
1292 	union {
1293 		uint8_t digest[SHA256_DIGEST_LENGTH];
1294 		uint64_t digest64[SHA256_DIGEST_LENGTH/8];
1295 	} u;
1296 
1297 	switch(HAMMER2_DEC_CHECK(bref->methods)) {
1298 	case HAMMER2_CHECK_NONE:
1299 		if (StrictMode == 0)
1300 			success = 1;
1301 		break;
1302 	case HAMMER2_CHECK_DISABLED:
1303 		if (StrictMode == 0)
1304 			success = 1;
1305 		break;
1306 	case HAMMER2_CHECK_ISCSI32:
1307 		cv = hammer2_icrc32(data, bytes);
1308 		if (bref->check.iscsi32.value == cv) {
1309 			success = 1;
1310 		}
1311 		break;
1312 	case HAMMER2_CHECK_XXHASH64:
1313 		cv64 = XXH64(data, bytes, XXH_HAMMER2_SEED);
1314 		if (bref->check.xxhash64.value == cv64) {
1315 			success = 1;
1316 		}
1317 		break;
1318 	case HAMMER2_CHECK_SHA192:
1319 		SHA256_Init(&hash_ctx);
1320 		SHA256_Update(&hash_ctx, data, bytes);
1321 		SHA256_Final(u.digest, &hash_ctx);
1322 		u.digest64[2] ^= u.digest64[3];
1323 		if (memcmp(u.digest, bref->check.sha192.data,
1324 			   sizeof(bref->check.sha192.data)) == 0)
1325 		{
1326 			success = 1;
1327 		}
1328 		break;
1329 	case HAMMER2_CHECK_FREEMAP:
1330 		cv = hammer2_icrc32(data, bytes);
1331 		if (bref->check.freemap.icrc32 == cv) {
1332 			success = 1;
1333 		}
1334 		break;
1335 	}
1336 	return success;
1337 }
1338 
1339 /*
1340  * Convert a hammer2 uuid to a uid or gid.
1341  */
1342 static uint32_t
1343 hammer2_to_unix_xid(const uuid_t *uuid)
1344 {
1345         return(*(const uint32_t *)&uuid->node[2]);
1346 }
1347