xref: /dragonfly/sbin/hammer/cmd_recover.c (revision b3c6ec2b)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer.h"
36 
37 struct recover_dict {
38 	struct recover_dict *next;
39 	struct recover_dict *parent;
40 	int64_t	obj_id;
41 	uint8_t obj_type;
42 	uint8_t flags;
43 	uint16_t pfs_id;
44 	int64_t	size;
45 	char	*name;
46 };
47 
48 #define DICTF_MADEDIR	0x01
49 #define DICTF_MADEFILE	0x02
50 #define DICTF_PARENT	0x04	/* parent attached for real */
51 #define DICTF_TRAVERSED	0x80
52 
53 static void recover_top(char *ptr, hammer_off_t offset);
54 static void recover_elm(hammer_btree_leaf_elm_t leaf);
55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id);
56 static char *recover_path(struct recover_dict *dict);
57 static void sanitize_string(char *str);
58 
59 static const char *TargetDir;
60 static int CachedFd = -1;
61 static char *CachedPath;
62 
63 void
64 hammer_cmd_recover(const char *target_dir)
65 {
66 	struct buffer_info *data_buffer;
67 	struct volume_info *volume;
68 	hammer_off_t off;
69 	hammer_off_t off_end;
70 	char *ptr;
71 
72 	AssertOnFailure = 0;
73 	TargetDir = target_dir;
74 
75 	if (mkdir(TargetDir, 0777) == -1) {
76 		if (errno != EEXIST) {
77 			perror("mkdir");
78 			exit(1);
79 		}
80 	}
81 
82 	printf("Running raw scan of HAMMER image, recovering to %s\n",
83 		TargetDir);
84 
85 	data_buffer = NULL;
86 	TAILQ_FOREACH(volume, &VolList, entry) {
87 		check_volume(volume);
88 		printf("Scanning volume %d size %s\n",
89 			volume->vol_no, sizetostr(volume->size));
90 		off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
91 		off_end = off + HAMMER_VOL_BUF_SIZE(volume->ondisk);
92 		while (off < off_end) {
93 			ptr = get_buffer_data(off, &data_buffer, 0);
94 			if (ptr)
95 				recover_top(ptr, off);
96 			off += HAMMER_BUFSIZE;
97 		}
98 	}
99 	rel_buffer(data_buffer);
100 
101 	if (CachedPath) {
102 		free(CachedPath);
103 		close(CachedFd);
104 		CachedPath = NULL;
105 		CachedFd = -1;
106 	}
107 
108 	AssertOnFailure = 1;
109 }
110 
111 /*
112  * Top level recovery processor.  Assume the data is a B-Tree node.
113  * If the CRC is good we attempt to process the node, building the
114  * object space and creating the dictionary as we go.
115  */
116 static void
117 recover_top(char *ptr, hammer_off_t offset)
118 {
119 	struct hammer_node_ondisk *node;
120 	hammer_btree_elm_t elm;
121 	int maxcount;
122 	int i;
123 	int isnode;
124 	char buf[HAMMER_BTREE_LEAF_ELMS + 1];
125 
126 	for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) {
127 		isnode = (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) == node->crc);
128 		maxcount = hammer_node_max_elements(node->type);
129 
130 		if (DebugOpt) {
131 			for (i = 0; i < node->count && i < maxcount; ++i)
132 				buf[i] = hammer_elm_btype(&node->elms[i]);
133 			buf[i] = '\0';
134 			if (!isnode && DebugOpt > 1)
135 				printf("%016jx -\n", offset);
136 			if (isnode)
137 				printf("%016jx %c %d %s\n",
138 					offset, node->type, node->count, buf);
139 		}
140 		offset += sizeof(*node);
141 
142 		if (isnode && node->type == HAMMER_BTREE_TYPE_LEAF) {
143 			for (i = 0; i < node->count && i < maxcount; ++i) {
144 				elm = &node->elms[i];
145 				if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD)
146 					continue;
147 				recover_elm(&elm->leaf);
148 			}
149 		}
150 	}
151 }
152 
153 static void
154 recover_elm(hammer_btree_leaf_elm_t leaf)
155 {
156 	struct buffer_info *data_buffer = NULL;
157 	struct recover_dict *dict;
158 	struct recover_dict *dict2;
159 	hammer_data_ondisk_t ondisk;
160 	hammer_off_t data_offset;
161 	struct stat st;
162 	int chunk;
163 	int len;
164 	int zfill;
165 	int64_t file_offset;
166 	uint16_t pfs_id;
167 	size_t nlen;
168 	int fd;
169 	char *name;
170 	char *path1;
171 	char *path2;
172 
173 	/*
174 	 * Ignore deleted records
175 	 */
176 	if (leaf->delete_ts)
177 		return;
178 	if ((data_offset = leaf->data_offset) != 0)
179 		ondisk = get_buffer_data(data_offset, &data_buffer, 0);
180 	else
181 		ondisk = NULL;
182 	if (ondisk == NULL)
183 		goto done;
184 
185 	len = leaf->data_len;
186 	chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK);
187 	if (chunk > len)
188 		chunk = len;
189 
190 	if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk)
191 		goto done;
192 
193 	pfs_id = lo_to_pfs(leaf->base.localization);
194 
195 	dict = get_dict(leaf->base.obj_id, pfs_id);
196 
197 	switch(leaf->base.rec_type) {
198 	case HAMMER_RECTYPE_INODE:
199 		/*
200 		 * We found an inode which also tells us where the file
201 		 * or directory is in the directory hierarchy.
202 		 */
203 		if (VerboseOpt) {
204 			printf("file %016jx:%05d inode found\n",
205 				(uintmax_t)leaf->base.obj_id, pfs_id);
206 		}
207 		path1 = recover_path(dict);
208 
209 		/*
210 		 * Attach the inode to its parent.  This isn't strictly
211 		 * necessary because the information is also in the
212 		 * directory entries, but if we do not find the directory
213 		 * entry this ensures that the files will still be
214 		 * reasonably well organized in their proper directories.
215 		 */
216 		if ((dict->flags & DICTF_PARENT) == 0 &&
217 		    dict->obj_id != HAMMER_OBJID_ROOT &&
218 		    ondisk->inode.parent_obj_id != 0) {
219 			dict->flags |= DICTF_PARENT;
220 			dict->parent = get_dict(ondisk->inode.parent_obj_id,
221 						pfs_id);
222 			if (dict->parent &&
223 			    (dict->parent->flags & DICTF_MADEDIR) == 0) {
224 				dict->parent->flags |= DICTF_MADEDIR;
225 				path2 = recover_path(dict->parent);
226 				printf("mkdir %s\n", path2);
227 				mkdir(path2, 0777);
228 				free(path2);
229 				path2 = NULL;
230 			}
231 		}
232 		if (dict->obj_type == 0)
233 			dict->obj_type = ondisk->inode.obj_type;
234 		dict->size = ondisk->inode.size;
235 		path2 = recover_path(dict);
236 
237 		if (lstat(path1, &st) == 0) {
238 			if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
239 				truncate(path1, dict->size);
240 				/* chmod(path1, 0666); */
241 			}
242 			if (strcmp(path1, path2)) {
243 				printf("Rename %s -> %s\n", path1, path2);
244 				rename(path1, path2);
245 			}
246 		} else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
247 			printf("mkinode (file) %s\n", path2);
248 			fd = open(path2, O_RDWR|O_CREAT, 0666);
249 			if (fd > 0)
250 				close(fd);
251 		} else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) {
252 			printf("mkinode (dir) %s\n", path2);
253 			mkdir(path2, 0777);
254 			dict->flags |= DICTF_MADEDIR;
255 		}
256 		free(path1);
257 		free(path2);
258 		break;
259 	case HAMMER_RECTYPE_DATA:
260 		/*
261 		 * File record data
262 		 */
263 		if (leaf->base.obj_id == 0)
264 			break;
265 		if (VerboseOpt) {
266 			printf("file %016jx:%05d data %016jx,%d\n",
267 				(uintmax_t)leaf->base.obj_id,
268 				pfs_id,
269 				(uintmax_t)leaf->base.key - len,
270 				len);
271 		}
272 
273 		/*
274 		 * Update the dictionary entry
275 		 */
276 		if (dict->obj_type == 0)
277 			dict->obj_type = HAMMER_OBJTYPE_REGFILE;
278 
279 		/*
280 		 * If the parent directory has not been created we
281 		 * have to create it (typically a PFS%05d)
282 		 */
283 		if (dict->parent &&
284 		    (dict->parent->flags & DICTF_MADEDIR) == 0) {
285 			dict->parent->flags |= DICTF_MADEDIR;
286 			path2 = recover_path(dict->parent);
287 			printf("mkdir %s\n", path2);
288 			mkdir(path2, 0777);
289 			free(path2);
290 			path2 = NULL;
291 		}
292 
293 		/*
294 		 * Create the file if necessary, report file creations
295 		 */
296 		path1 = recover_path(dict);
297 		if (CachedPath && strcmp(CachedPath, path1) == 0) {
298 			fd = CachedFd;
299 		} else {
300 			fd = open(path1, O_CREAT|O_RDWR, 0666);
301 		}
302 		if (fd < 0) {
303 			printf("Unable to create %s: %s\n",
304 				path1, strerror(errno));
305 			free(path1);
306 			break;
307 		}
308 		if ((dict->flags & DICTF_MADEFILE) == 0) {
309 			dict->flags |= DICTF_MADEFILE;
310 			printf("mkfile %s\n", path1);
311 		}
312 
313 		/*
314 		 * And write the record.  A HAMMER data block is aligned
315 		 * and may contain trailing zeros after the file EOF.  The
316 		 * inode record is required to get the actual file size.
317 		 *
318 		 * However, when the inode record is not available
319 		 * we can do a sparse write and that will get it right
320 		 * most of the time even if the inode record is never
321 		 * found.
322 		 */
323 		file_offset = (int64_t)leaf->base.key - len;
324 		lseek(fd, (off_t)file_offset, SEEK_SET);
325 		while (len) {
326 			if (dict->size == -1) {
327 				for (zfill = chunk - 1; zfill >= 0; --zfill) {
328 					if (((char *)ondisk)[zfill])
329 						break;
330 				}
331 				++zfill;
332 			} else {
333 				zfill = chunk;
334 			}
335 
336 			if (zfill)
337 				write(fd, ondisk, zfill);
338 			if (zfill < chunk)
339 				lseek(fd, chunk - zfill, SEEK_CUR);
340 
341 			len -= chunk;
342 			data_offset += chunk;
343 			file_offset += chunk;
344 			ondisk = get_buffer_data(data_offset, &data_buffer, 0);
345 			if (ondisk == NULL)
346 				break;
347 			chunk = HAMMER_BUFSIZE -
348 				((int)data_offset & HAMMER_BUFMASK);
349 			if (chunk > len)
350 				chunk = len;
351 		}
352 		if (dict->size >= 0 && file_offset > dict->size) {
353 			ftruncate(fd, dict->size);
354 			/* fchmod(fd, 0666); */
355 		}
356 
357 		if (fd == CachedFd) {
358 			free(path1);
359 		} else if (CachedPath) {
360 			free(CachedPath);
361 			close(CachedFd);
362 			CachedPath = path1;
363 			CachedFd = fd;
364 		} else {
365 			CachedPath = path1;
366 			CachedFd = fd;
367 		}
368 		break;
369 	case HAMMER_RECTYPE_DIRENTRY:
370 		nlen = len - offsetof(struct hammer_direntry_data, name[0]);
371 		if ((int)nlen < 0)	/* illegal length */
372 			break;
373 		if (ondisk->entry.obj_id == 0 ||
374 		    ondisk->entry.obj_id == HAMMER_OBJID_ROOT)
375 			break;
376 		name = malloc(nlen + 1);
377 		bcopy(ondisk->entry.name, name, nlen);
378 		name[nlen] = 0;
379 		sanitize_string(name);
380 
381 		/*
382 		 * We can't deal with hardlinks so if the object already
383 		 * has a name assigned to it we just keep using that name.
384 		 */
385 		dict2 = get_dict(ondisk->entry.obj_id, pfs_id);
386 		path1 = recover_path(dict2);
387 
388 		if (dict2->name == NULL)
389 			dict2->name = name;
390 		else
391 			free(name);
392 
393 		/*
394 		 * Attach dict2 to its directory (dict), create the
395 		 * directory (dict) if necessary.  We must ensure
396 		 * that the directory entry exists in order to be
397 		 * able to properly rename() the file without creating
398 		 * a namespace conflict.
399 		 */
400 		if ((dict2->flags & DICTF_PARENT) == 0) {
401 			dict2->flags |= DICTF_PARENT;
402 			dict2->parent = dict;
403 			if ((dict->flags & DICTF_MADEDIR) == 0) {
404 				dict->flags |= DICTF_MADEDIR;
405 				path2 = recover_path(dict);
406 				printf("mkdir %s\n", path2);
407 				mkdir(path2, 0777);
408 				free(path2);
409 				path2 = NULL;
410 			}
411 		}
412 		path2 = recover_path(dict2);
413 		if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) {
414 			printf("Rename %s -> %s\n", path1, path2);
415 			rename(path1, path2);
416 		}
417 		free(path1);
418 		free(path2);
419 
420 		printf("dir  %016jx:%05d entry %016jx \"%s\"\n",
421 			(uintmax_t)leaf->base.obj_id,
422 			pfs_id,
423 			(uintmax_t)ondisk->entry.obj_id,
424 			name);
425 		break;
426 	default:
427 		/*
428 		 * Ignore any other record types
429 		 */
430 		break;
431 	}
432 done:
433 	rel_buffer(data_buffer);
434 }
435 
436 #define RD_HSIZE	32768
437 #define RD_HMASK	(RD_HSIZE - 1)
438 
439 struct recover_dict *RDHash[RD_HSIZE];
440 
441 static
442 struct recover_dict *
443 get_dict(int64_t obj_id, uint16_t pfs_id)
444 {
445 	struct recover_dict *dict;
446 	int i;
447 
448 	if (obj_id == 0)
449 		return(NULL);
450 
451 	i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK;
452 	for (dict = RDHash[i]; dict; dict = dict->next) {
453 		if (dict->obj_id == obj_id &&
454 		    dict->pfs_id == pfs_id) {
455 			break;
456 		}
457 	}
458 	if (dict == NULL) {
459 		dict = malloc(sizeof(*dict));
460 		bzero(dict, sizeof(*dict));
461 		dict->obj_id = obj_id;
462 		dict->pfs_id = pfs_id;
463 		dict->next = RDHash[i];
464 		dict->size = -1;
465 		RDHash[i] = dict;
466 
467 		/*
468 		 * Always connect dangling dictionary entries to object 1
469 		 * (the root of the PFS).
470 		 *
471 		 * DICTF_PARENT will not be set until we know what the
472 		 * real parent directory object is.
473 		 */
474 		if (dict->obj_id != HAMMER_OBJID_ROOT)
475 			dict->parent = get_dict(1, pfs_id);
476 	}
477 	return(dict);
478 }
479 
480 struct path_info {
481 	enum { PI_FIGURE, PI_LOAD } state;
482 	uint16_t pfs_id;
483 	char *base;
484 	char *next;
485 	int len;
486 };
487 
488 static void recover_path_helper(struct recover_dict *, struct path_info *);
489 
490 static
491 char *
492 recover_path(struct recover_dict *dict)
493 {
494 	struct path_info info;
495 
496 	bzero(&info, sizeof(info));
497 	info.pfs_id = dict->pfs_id;
498 	info.state = PI_FIGURE;
499 	recover_path_helper(dict, &info);
500 	info.base = malloc(info.len);
501 	info.next = info.base;
502 	info.state = PI_LOAD;
503 	recover_path_helper(dict, &info);
504 
505 	return(info.base);
506 }
507 
508 static
509 void
510 recover_path_helper(struct recover_dict *dict, struct path_info *info)
511 {
512 	/*
513 	 * Calculate path element length
514 	 */
515 	dict->flags |= DICTF_TRAVERSED;
516 
517 	switch(info->state) {
518 	case PI_FIGURE:
519 		if (dict->obj_id == HAMMER_OBJID_ROOT)
520 			info->len += 8;
521 		else if (dict->name)
522 			info->len += strlen(dict->name);
523 		else
524 			info->len += 6 + 16;
525 		++info->len;
526 
527 		if (dict->parent &&
528 		    (dict->parent->flags & DICTF_TRAVERSED) == 0) {
529 			recover_path_helper(dict->parent, info);
530 		} else {
531 			info->len += strlen(TargetDir) + 1;
532 		}
533 		break;
534 	case PI_LOAD:
535 		if (dict->parent &&
536 		    (dict->parent->flags & DICTF_TRAVERSED) == 0) {
537 			recover_path_helper(dict->parent, info);
538 		} else {
539 			strcpy(info->next, TargetDir);
540 			info->next += strlen(info->next);
541 		}
542 
543 		*info->next++ = '/';
544 		if (dict->obj_id == HAMMER_OBJID_ROOT) {
545 			snprintf(info->next, 8+1, "PFS%05d", info->pfs_id);
546 		} else if (dict->name) {
547 			strcpy(info->next, dict->name);
548 		} else {
549 			snprintf(info->next, 6+16+1, "obj_0x%016jx",
550 				(uintmax_t)dict->obj_id);
551 		}
552 		info->next += strlen(info->next);
553 		break;
554 	}
555 	dict->flags &= ~DICTF_TRAVERSED;
556 }
557 
558 static
559 void
560 sanitize_string(char *str)
561 {
562 	while (*str) {
563 		if (!isprint(*str))
564 			*str = 'x';
565 		++str;
566 	}
567 }
568