xref: /dragonfly/sbin/hammer/cmd_recover.c (revision 0d27ae55)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer.h"
36 
37 struct recover_dict {
38 	struct recover_dict *next;
39 	struct recover_dict *parent;
40 	int64_t	obj_id;
41 	uint8_t obj_type;
42 	uint8_t flags;
43 	uint16_t pfs_id;
44 	int64_t	size;
45 	char	*name;
46 };
47 
48 #define DICTF_MADEDIR	0x01
49 #define DICTF_MADEFILE	0x02
50 #define DICTF_PARENT	0x04	/* parent attached for real */
51 #define DICTF_TRAVERSED	0x80
52 
53 static void recover_top(char *ptr);
54 static void recover_elm(hammer_btree_leaf_elm_t leaf);
55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id);
56 static char *recover_path(struct recover_dict *dict);
57 static void sanitize_string(char *str);
58 
59 static const char *TargetDir;
60 static int CachedFd = -1;
61 static char *CachedPath;
62 
63 void
64 hammer_cmd_recover(const char *target_dir)
65 {
66 	struct buffer_info *data_buffer;
67 	struct volume_info *scan;
68 	struct volume_info *volume;
69 	hammer_off_t off;
70 	hammer_off_t off_end;
71 	char *ptr;
72 
73 	AssertOnFailure = 0;
74 	TargetDir = target_dir;
75 
76 	printf("Running raw scan of HAMMER image, recovering to %s\n",
77 		TargetDir);
78 	mkdir(TargetDir, 0777);
79 
80 	data_buffer = NULL;
81 	TAILQ_FOREACH(scan, &VolList, entry) {
82 		volume = get_volume(scan->vol_no);
83 
84 		off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
85 		off_end = off + (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg);
86 		while (off < off_end) {
87 			ptr = get_buffer_data(off, &data_buffer, 0);
88 			if (ptr) {
89 				recover_top(ptr);
90 				off += HAMMER_BUFSIZE;
91 			}
92 		}
93 	}
94 	rel_buffer(data_buffer);
95 
96 	if (CachedPath) {
97 		free(CachedPath);
98 		close(CachedFd);
99 		CachedPath = NULL;
100 		CachedFd = -1;
101 	}
102 
103 	AssertOnFailure = 1;
104 }
105 
106 /*
107  * Top level recovery processor.  Assume the data is a B-Tree node.
108  * If the CRC is good we attempt to process the node, building the
109  * object space and creating the dictionary as we go.
110  */
111 static void
112 recover_top(char *ptr)
113 {
114 	struct hammer_node_ondisk *node;
115 	hammer_btree_elm_t elm;
116 	int maxcount;
117 	int i;
118 
119 	for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) {
120 		if (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) ==
121 		    node->crc &&
122 		    node->type == HAMMER_BTREE_TYPE_LEAF) {
123 			/*
124 			 * Scan elements
125 			 */
126 			maxcount = HAMMER_BTREE_LEAF_ELMS;
127 			for (i = 0; i < node->count && i < maxcount; ++i) {
128 				elm = &node->elms[i];
129 				if (elm->base.btype != 'R')
130 					continue;
131 				recover_elm(&elm->leaf);
132 			}
133 		}
134 	}
135 }
136 
137 static void
138 recover_elm(hammer_btree_leaf_elm_t leaf)
139 {
140 	struct buffer_info *data_buffer = NULL;
141 	struct recover_dict *dict;
142 	struct recover_dict *dict2;
143 	hammer_data_ondisk_t ondisk;
144 	hammer_off_t data_offset;
145 	struct stat st;
146 	int chunk;
147 	int len;
148 	int zfill;
149 	int64_t file_offset;
150 	uint16_t pfs_id;
151 	size_t nlen;
152 	int fd;
153 	char *name;
154 	char *path1;
155 	char *path2;
156 
157 	/*
158 	 * Ignore deleted records
159 	 */
160 	if (leaf->delete_ts)
161 		return;
162 	if ((data_offset = leaf->data_offset) != 0)
163 		ondisk = get_buffer_data(data_offset, &data_buffer, 0);
164 	else
165 		ondisk = NULL;
166 	if (ondisk == NULL)
167 		goto done;
168 
169 	len = leaf->data_len;
170 	chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK);
171 	if (chunk > len)
172 		chunk = len;
173 
174 	if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk)
175 		goto done;
176 
177 	pfs_id = lo_to_pfs(leaf->base.localization);
178 
179 	dict = get_dict(leaf->base.obj_id, pfs_id);
180 
181 	switch(leaf->base.rec_type) {
182 	case HAMMER_RECTYPE_INODE:
183 		/*
184 		 * We found an inode which also tells us where the file
185 		 * or directory is in the directory hierarchy.
186 		 */
187 		if (VerboseOpt) {
188 			printf("file %016jx:%05d inode found\n",
189 				(uintmax_t)leaf->base.obj_id, pfs_id);
190 		}
191 		path1 = recover_path(dict);
192 
193 		/*
194 		 * Attach the inode to its parent.  This isn't strictly
195 		 * necessary because the information is also in the
196 		 * directory entries, but if we do not find the directory
197 		 * entry this ensures that the files will still be
198 		 * reasonably well organized in their proper directories.
199 		 */
200 		if ((dict->flags & DICTF_PARENT) == 0 &&
201 		    dict->obj_id != HAMMER_OBJID_ROOT &&
202 		    ondisk->inode.parent_obj_id != 0) {
203 			dict->flags |= DICTF_PARENT;
204 			dict->parent = get_dict(ondisk->inode.parent_obj_id,
205 						pfs_id);
206 			if (dict->parent &&
207 			    (dict->parent->flags & DICTF_MADEDIR) == 0) {
208 				dict->parent->flags |= DICTF_MADEDIR;
209 				path2 = recover_path(dict->parent);
210 				printf("mkdir %s\n", path2);
211 				mkdir(path2, 0777);
212 				free(path2);
213 				path2 = NULL;
214 			}
215 		}
216 		if (dict->obj_type == 0)
217 			dict->obj_type = ondisk->inode.obj_type;
218 		dict->size = ondisk->inode.size;
219 		path2 = recover_path(dict);
220 
221 		if (lstat(path1, &st) == 0) {
222 			if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
223 				truncate(path1, dict->size);
224 				/* chmod(path1, 0666); */
225 			}
226 			if (strcmp(path1, path2)) {
227 				printf("Rename %s -> %s\n", path1, path2);
228 				rename(path1, path2);
229 			}
230 		} else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
231 			printf("mkinode (file) %s\n", path2);
232 			fd = open(path2, O_RDWR|O_CREAT, 0666);
233 			if (fd > 0)
234 				close(fd);
235 		} else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) {
236 			printf("mkinode (dir) %s\n", path2);
237 			mkdir(path2, 0777);
238 			dict->flags |= DICTF_MADEDIR;
239 		}
240 		free(path1);
241 		free(path2);
242 		break;
243 	case HAMMER_RECTYPE_DATA:
244 		/*
245 		 * File record data
246 		 */
247 		if (leaf->base.obj_id == 0)
248 			break;
249 		if (VerboseOpt) {
250 			printf("file %016jx:%05d data %016jx,%d\n",
251 				(uintmax_t)leaf->base.obj_id,
252 				pfs_id,
253 				(uintmax_t)leaf->base.key - len,
254 				len);
255 		}
256 
257 		/*
258 		 * Update the dictionary entry
259 		 */
260 		if (dict->obj_type == 0)
261 			dict->obj_type = HAMMER_OBJTYPE_REGFILE;
262 
263 		/*
264 		 * If the parent directory has not been created we
265 		 * have to create it (typically a PFS%05d)
266 		 */
267 		if (dict->parent &&
268 		    (dict->parent->flags & DICTF_MADEDIR) == 0) {
269 			dict->parent->flags |= DICTF_MADEDIR;
270 			path2 = recover_path(dict->parent);
271 			printf("mkdir %s\n", path2);
272 			mkdir(path2, 0777);
273 			free(path2);
274 			path2 = NULL;
275 		}
276 
277 		/*
278 		 * Create the file if necessary, report file creations
279 		 */
280 		path1 = recover_path(dict);
281 		if (CachedPath && strcmp(CachedPath, path1) == 0) {
282 			fd = CachedFd;
283 		} else {
284 			fd = open(path1, O_CREAT|O_RDWR, 0666);
285 		}
286 		if (fd < 0) {
287 			printf("Unable to create %s: %s\n",
288 				path1, strerror(errno));
289 			free(path1);
290 			break;
291 		}
292 		if ((dict->flags & DICTF_MADEFILE) == 0) {
293 			dict->flags |= DICTF_MADEFILE;
294 			printf("mkfile %s\n", path1);
295 		}
296 
297 		/*
298 		 * And write the record.  A HAMMER data block is aligned
299 		 * and may contain trailing zeros after the file EOF.  The
300 		 * inode record is required to get the actual file size.
301 		 *
302 		 * However, when the inode record is not available
303 		 * we can do a sparse write and that will get it right
304 		 * most of the time even if the inode record is never
305 		 * found.
306 		 */
307 		file_offset = (int64_t)leaf->base.key - len;
308 		lseek(fd, (off_t)file_offset, SEEK_SET);
309 		while (len) {
310 			if (dict->size == -1) {
311 				for (zfill = chunk - 1; zfill >= 0; --zfill) {
312 					if (((char *)ondisk)[zfill])
313 						break;
314 				}
315 				++zfill;
316 			} else {
317 				zfill = chunk;
318 			}
319 
320 			if (zfill)
321 				write(fd, ondisk, zfill);
322 			if (zfill < chunk)
323 				lseek(fd, chunk - zfill, SEEK_CUR);
324 
325 			len -= chunk;
326 			data_offset += chunk;
327 			file_offset += chunk;
328 			ondisk = get_buffer_data(data_offset, &data_buffer, 0);
329 			if (ondisk == NULL)
330 				break;
331 			chunk = HAMMER_BUFSIZE -
332 				((int)data_offset & HAMMER_BUFMASK);
333 			if (chunk > len)
334 				chunk = len;
335 		}
336 		if (dict->size >= 0 && file_offset > dict->size) {
337 			ftruncate(fd, dict->size);
338 			/* fchmod(fd, 0666); */
339 		}
340 
341 		if (fd == CachedFd) {
342 			free(path1);
343 		} else if (CachedPath) {
344 			free(CachedPath);
345 			close(CachedFd);
346 			CachedPath = path1;
347 			CachedFd = fd;
348 		} else {
349 			CachedPath = path1;
350 			CachedFd = fd;
351 		}
352 		break;
353 	case HAMMER_RECTYPE_DIRENTRY:
354 		nlen = len - offsetof(struct hammer_direntry_data, name[0]);
355 		if ((int)nlen < 0)	/* illegal length */
356 			break;
357 		if (ondisk->entry.obj_id == 0 ||
358 		    ondisk->entry.obj_id == HAMMER_OBJID_ROOT)
359 			break;
360 		name = malloc(nlen + 1);
361 		bcopy(ondisk->entry.name, name, nlen);
362 		name[nlen] = 0;
363 		sanitize_string(name);
364 
365 		/*
366 		 * We can't deal with hardlinks so if the object already
367 		 * has a name assigned to it we just keep using that name.
368 		 */
369 		dict2 = get_dict(ondisk->entry.obj_id, pfs_id);
370 		path1 = recover_path(dict2);
371 
372 		if (dict2->name == NULL)
373 			dict2->name = name;
374 		else
375 			free(name);
376 
377 		/*
378 		 * Attach dict2 to its directory (dict), create the
379 		 * directory (dict) if necessary.  We must ensure
380 		 * that the directory entry exists in order to be
381 		 * able to properly rename() the file without creating
382 		 * a namespace conflict.
383 		 */
384 		if ((dict2->flags & DICTF_PARENT) == 0) {
385 			dict2->flags |= DICTF_PARENT;
386 			dict2->parent = dict;
387 			if ((dict->flags & DICTF_MADEDIR) == 0) {
388 				dict->flags |= DICTF_MADEDIR;
389 				path2 = recover_path(dict);
390 				printf("mkdir %s\n", path2);
391 				mkdir(path2, 0777);
392 				free(path2);
393 				path2 = NULL;
394 			}
395 		}
396 		path2 = recover_path(dict2);
397 		if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) {
398 			printf("Rename %s -> %s\n", path1, path2);
399 			rename(path1, path2);
400 		}
401 		free(path1);
402 		free(path2);
403 
404 		printf("dir  %016jx:%05d entry %016jx \"%s\"\n",
405 			(uintmax_t)leaf->base.obj_id,
406 			pfs_id,
407 			(uintmax_t)ondisk->entry.obj_id,
408 			name);
409 		break;
410 	default:
411 		/*
412 		 * Ignore any other record types
413 		 */
414 		break;
415 	}
416 done:
417 	rel_buffer(data_buffer);
418 }
419 
420 #define RD_HSIZE	32768
421 #define RD_HMASK	(RD_HSIZE - 1)
422 
423 struct recover_dict *RDHash[RD_HSIZE];
424 
425 static
426 struct recover_dict *
427 get_dict(int64_t obj_id, uint16_t pfs_id)
428 {
429 	struct recover_dict *dict;
430 	int i;
431 
432 	if (obj_id == 0)
433 		return(NULL);
434 
435 	i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK;
436 	for (dict = RDHash[i]; dict; dict = dict->next) {
437 		if (dict->obj_id == obj_id &&
438 		    dict->pfs_id == pfs_id) {
439 			break;
440 		}
441 	}
442 	if (dict == NULL) {
443 		dict = malloc(sizeof(*dict));
444 		bzero(dict, sizeof(*dict));
445 		dict->obj_id = obj_id;
446 		dict->pfs_id = pfs_id;
447 		dict->next = RDHash[i];
448 		dict->size = -1;
449 		RDHash[i] = dict;
450 
451 		/*
452 		 * Always connect dangling dictionary entries to object 1
453 		 * (the root of the PFS).
454 		 *
455 		 * DICTF_PARENT will not be set until we know what the
456 		 * real parent directory object is.
457 		 */
458 		if (dict->obj_id != HAMMER_OBJID_ROOT)
459 			dict->parent = get_dict(1, pfs_id);
460 	}
461 	return(dict);
462 }
463 
464 struct path_info {
465 	enum { PI_FIGURE, PI_LOAD } state;
466 	uint16_t pfs_id;
467 	char *base;
468 	char *next;
469 	int len;
470 };
471 
472 static void recover_path_helper(struct recover_dict *, struct path_info *);
473 
474 static
475 char *
476 recover_path(struct recover_dict *dict)
477 {
478 	struct path_info info;
479 
480 	bzero(&info, sizeof(info));
481 	info.pfs_id = dict->pfs_id;
482 	info.state = PI_FIGURE;
483 	recover_path_helper(dict, &info);
484 	info.base = malloc(info.len);
485 	info.next = info.base;
486 	info.state = PI_LOAD;
487 	recover_path_helper(dict, &info);
488 
489 	return(info.base);
490 }
491 
492 static
493 void
494 recover_path_helper(struct recover_dict *dict, struct path_info *info)
495 {
496 	/*
497 	 * Calculate path element length
498 	 */
499 	dict->flags |= DICTF_TRAVERSED;
500 
501 	switch(info->state) {
502 	case PI_FIGURE:
503 		if (dict->obj_id == HAMMER_OBJID_ROOT)
504 			info->len += 8;
505 		else if (dict->name)
506 			info->len += strlen(dict->name);
507 		else
508 			info->len += 6 + 16;
509 		++info->len;
510 
511 		if (dict->parent &&
512 		    (dict->parent->flags & DICTF_TRAVERSED) == 0) {
513 			recover_path_helper(dict->parent, info);
514 		} else {
515 			info->len += strlen(TargetDir) + 1;
516 		}
517 		break;
518 	case PI_LOAD:
519 		if (dict->parent &&
520 		    (dict->parent->flags & DICTF_TRAVERSED) == 0) {
521 			recover_path_helper(dict->parent, info);
522 		} else {
523 			strcpy(info->next, TargetDir);
524 			info->next += strlen(info->next);
525 		}
526 
527 		*info->next++ = '/';
528 		if (dict->obj_id == HAMMER_OBJID_ROOT) {
529 			snprintf(info->next, 8+1, "PFS%05d", info->pfs_id);
530 		} else if (dict->name) {
531 			strcpy(info->next, dict->name);
532 		} else {
533 			snprintf(info->next, 6+16+1, "obj_0x%016jx",
534 				(uintmax_t)dict->obj_id);
535 		}
536 		info->next += strlen(info->next);
537 		break;
538 	}
539 	dict->flags &= ~DICTF_TRAVERSED;
540 }
541 
542 static
543 void
544 sanitize_string(char *str)
545 {
546 	while (*str) {
547 		if (!isprint(*str))
548 			*str = 'x';
549 		++str;
550 	}
551 }
552