xref: /dragonfly/sys/vfs/hammer/hammer_recover.c (revision 4a65f651)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.29 2008/07/26 05:36:21 dillon Exp $
35  */
36 
37 #include "hammer.h"
38 
39 static int hammer_check_tail_signature(hammer_fifo_tail_t tail,
40 			hammer_off_t end_off);
41 static void hammer_recover_copy_undo(hammer_off_t undo_offset,
42 			char *src, char *dst, int bytes);
43 #if 0
44 static void hammer_recover_debug_dump(int w, char *buf, int bytes);
45 #endif
46 static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
47 			hammer_fifo_undo_t undo, int bytes);
48 
49 /*
50  * Recover a filesystem on mount
51  *
52  * NOTE: No information from the root volume has been cached in the
53  * hammer_mount structure yet, so we need to access the root volume's
54  * buffer directly.
55  */
56 int
57 hammer_recover(hammer_mount_t hmp, hammer_volume_t root_volume)
58 {
59 	hammer_blockmap_t rootmap;
60 	hammer_buffer_t buffer;
61 	hammer_off_t scan_offset;
62 	hammer_off_t bytes;
63 	hammer_fifo_tail_t tail;
64 	hammer_fifo_undo_t undo;
65 	hammer_off_t first_offset;
66 	hammer_off_t last_offset;
67 	int error;
68 
69 	/*
70 	 * Examine the UNDO FIFO.  If it is empty the filesystem is clean
71 	 * and no action need be taken.
72 	 */
73 	rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
74 
75 	if (rootmap->first_offset == rootmap->next_offset)
76 		return(0);
77 
78 	first_offset = rootmap->first_offset;
79 	last_offset  = rootmap->next_offset;
80 
81 	if (last_offset >= first_offset) {
82 		bytes = last_offset - first_offset;
83 	} else {
84 		bytes = rootmap->alloc_offset - first_offset +
85 			(last_offset & HAMMER_OFF_LONG_MASK);
86 	}
87 	kprintf("HAMMER(%s) Start Recovery %016llx - %016llx "
88 		"(%lld bytes of UNDO)%s\n",
89 		root_volume->ondisk->vol_name,
90 		(long long)first_offset,
91 		(long long)last_offset,
92 		(long long)bytes,
93 		(hmp->ronly ? " (RO)" : "(RW)"));
94 	if (bytes > (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK)) {
95 		kprintf("Undo size is absurd, unable to mount\n");
96 		return(EIO);
97 	}
98 
99 	/*
100 	 * Scan the UNDOs backwards.
101 	 */
102 	scan_offset = last_offset;
103 	buffer = NULL;
104 	if (scan_offset > rootmap->alloc_offset) {
105 		kprintf("HAMMER(%s) UNDO record at %016llx FIFO overflow\n",
106 			root_volume->ondisk->vol_name,
107 			(long long)scan_offset);
108 		error = EIO;
109 		goto done;
110 	}
111 
112 	while ((int64_t)bytes > 0) {
113 		if (hammer_debug_general & 0x0080)
114 			kprintf("scan_offset %016llx\n",
115 				(long long)scan_offset);
116 		if (scan_offset == HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
117 			scan_offset = rootmap->alloc_offset;
118 			continue;
119 		}
120 		if (scan_offset - sizeof(*tail) <
121 		    HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) {
122 			kprintf("HAMMER(%s) UNDO record at %016llx FIFO "
123 				"underflow\n",
124 				root_volume->ondisk->vol_name,
125 				(long long)scan_offset);
126 			error = EIO;
127 			break;
128 		}
129 		tail = hammer_bread(hmp, scan_offset - sizeof(*tail),
130 				    &error, &buffer);
131 		if (error) {
132 			kprintf("HAMMER(%s) Unable to read UNDO TAIL "
133 				"at %016llx\n",
134 				root_volume->ondisk->vol_name,
135 				(long long)scan_offset - sizeof(*tail));
136 			break;
137 		}
138 
139 		if (hammer_check_tail_signature(tail, scan_offset) != 0) {
140 			kprintf("HAMMER(%s) Illegal UNDO TAIL signature "
141 				"at %016llx\n",
142 				root_volume->ondisk->vol_name,
143 				(long long)scan_offset - sizeof(*tail));
144 			error = EIO;
145 			break;
146 		}
147 		undo = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
148 
149 		error = hammer_recover_undo(hmp, root_volume, undo,
150 				HAMMER_BUFSIZE -
151 				(int)((char *)undo - (char *)buffer->ondisk));
152 		if (error) {
153 			kprintf("HAMMER(%s) UNDO record at %016llx failed\n",
154 				root_volume->ondisk->vol_name,
155 				(long long)scan_offset - tail->tail_size);
156 			break;
157 		}
158 		scan_offset -= tail->tail_size;
159 		bytes -= tail->tail_size;
160 
161 		/*
162 		 * If too many dirty buffers have built up we have to flush'm
163 		 * out.  As long as we do not flush out the volume header
164 		 * a crash here should not cause any problems.
165 		 *
166 		 * buffer must be released so the flush can assert that
167 		 * all buffers are idle.
168 		 */
169 		if (hammer_flusher_meta_limit(hmp)) {
170 			if (buffer) {
171 				hammer_rel_buffer(buffer, 0);
172 				buffer = NULL;
173 			}
174 			if (hmp->ronly == 0) {
175 				hammer_recover_flush_buffers(hmp, root_volume,
176 							     0);
177 				kprintf("HAMMER(%s) Continuing recovery\n",
178 					root_volume->ondisk->vol_name);
179 			} else {
180 				kprintf("HAMMER(%s) Recovery failure: Insufficient buffer cache to hold dirty buffers on read-only mount!\n",
181 					root_volume->ondisk->vol_name);
182 				error = EIO;
183 				break;
184 			}
185 		}
186 	}
187 done:
188 	if (buffer)
189 		hammer_rel_buffer(buffer, 0);
190 
191 	/*
192 	 * After completely flushing all the recovered buffers the volume
193 	 * header will also be flushed.  Force the UNDO FIFO to 0-length.
194 	 */
195 	if (root_volume->io.recovered == 0) {
196 		hammer_ref_volume(root_volume);
197 		root_volume->io.recovered = 1;
198 	}
199 
200 	/*
201 	 * Finish up flushing (or discarding) recovered buffers
202 	 */
203 	if (error == 0) {
204 		hammer_modify_volume(NULL, root_volume, NULL, 0);
205 		rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
206 		rootmap->first_offset = last_offset;
207 		rootmap->next_offset = last_offset;
208 		hammer_modify_volume_done(root_volume);
209 		if (hmp->ronly == 0)
210 			hammer_recover_flush_buffers(hmp, root_volume, 1);
211 	} else {
212 		hammer_recover_flush_buffers(hmp, root_volume, -1);
213 	}
214 	kprintf("HAMMER(%s) End Recovery\n", root_volume->ondisk->vol_name);
215 	return (error);
216 }
217 
218 static int
219 hammer_check_tail_signature(hammer_fifo_tail_t tail, hammer_off_t end_off)
220 {
221 	int max_bytes;
222 
223 	max_bytes = ((end_off - sizeof(*tail)) & HAMMER_BUFMASK);
224 	max_bytes += sizeof(*tail);
225 
226 	/*
227 	 * tail overlaps buffer boundary
228 	 */
229 	if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64) {
230 		return(1);
231 	}
232 
233 	/*
234 	 * signature check, the tail signature is allowed to be the head
235 	 * signature only for 8-byte PADs.
236 	 */
237 	switch(tail->tail_signature) {
238 	case HAMMER_TAIL_SIGNATURE:
239 		break;
240 	case HAMMER_HEAD_SIGNATURE:
241 		if (tail->tail_type != HAMMER_HEAD_TYPE_PAD ||
242 		    tail->tail_size != sizeof(*tail)) {
243 			return(2);
244 		}
245 		break;
246 	}
247 
248 	/*
249 	 * The undo structure must not overlap a buffer boundary.
250 	 */
251 	if (tail->tail_size < sizeof(*tail) || tail->tail_size > max_bytes) {
252 		return(3);
253 	}
254 	return(0);
255 }
256 
257 static int
258 hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
259 		    hammer_fifo_undo_t undo, int bytes)
260 {
261 	hammer_fifo_tail_t tail;
262 	hammer_volume_t volume;
263 	hammer_buffer_t buffer;
264 	hammer_off_t buf_offset;
265 	int zone;
266 	int error;
267 	int vol_no;
268 	int max_bytes;
269 	u_int32_t offset;
270 	u_int32_t crc;
271 
272 	/*
273 	 * Basic sanity checks
274 	 */
275 	if (bytes < HAMMER_HEAD_ALIGN) {
276 		kprintf("HAMMER: Undo alignment error (%d)\n", bytes);
277 		return(EIO);
278 	}
279 	if (undo->head.hdr_signature != HAMMER_HEAD_SIGNATURE) {
280 		kprintf("HAMMER: Bad head signature %04x\n",
281 			undo->head.hdr_signature);
282 		return(EIO);
283 	}
284 	if (undo->head.hdr_size < HAMMER_HEAD_ALIGN ||
285 	    undo->head.hdr_size > bytes) {
286 		kprintf("HAMMER: Bad size %d\n", bytes);
287 		return(EIO);
288 	}
289 
290 	/*
291 	 * Skip PAD records.  Note that PAD records also do not require
292 	 * a tail and may have a truncated structure.
293 	 */
294 	if (undo->head.hdr_type == HAMMER_HEAD_TYPE_PAD)
295 		return(0);
296 
297 	/*
298 	 * Check the CRC
299 	 */
300 	crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^
301 	      crc32(&undo->head + 1, undo->head.hdr_size - sizeof(undo->head));
302 	if (undo->head.hdr_crc != crc) {
303 		kprintf("HAMMER: Undo record CRC failed %08x %08x\n",
304 			undo->head.hdr_crc, crc);
305 		return(EIO);
306 	}
307 
308 
309 	/*
310 	 * Check the tail
311 	 */
312 	bytes = undo->head.hdr_size;
313 	tail = (void *)((char *)undo + bytes - sizeof(*tail));
314 	if (tail->tail_size != undo->head.hdr_size) {
315 		kprintf("HAMMER: Bad tail size %d\n", tail->tail_size);
316 		return(EIO);
317 	}
318 	if (tail->tail_type != undo->head.hdr_type) {
319 		kprintf("HAMMER: Bad tail type %d\n", tail->tail_type);
320 		return(EIO);
321 	}
322 
323 	/*
324 	 * Only process UNDO records
325 	 */
326 	if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO)
327 		return(0);
328 
329 	/*
330 	 * Validate the UNDO record.
331 	 */
332 	max_bytes = undo->head.hdr_size - sizeof(*undo) - sizeof(*tail);
333 	if (undo->undo_data_bytes < 0 || undo->undo_data_bytes > max_bytes) {
334 		kprintf("HAMMER: Corrupt UNDO record, undo_data_bytes %d/%d\n",
335 			undo->undo_data_bytes, max_bytes);
336 		return(EIO);
337 	}
338 
339 	/*
340 	 * The undo offset may only be a zone-1 or zone-2 offset.
341 	 *
342 	 * Currently we only support a zone-1 offset representing the
343 	 * volume header.
344 	 */
345 	zone = HAMMER_ZONE_DECODE(undo->undo_offset);
346 	offset = undo->undo_offset & HAMMER_BUFMASK;
347 
348 	if (offset + undo->undo_data_bytes > HAMMER_BUFSIZE) {
349 		kprintf("HAMMER: Corrupt UNDO record, bad offset\n");
350 		return (EIO);
351 	}
352 
353 	switch(zone) {
354 	case HAMMER_ZONE_RAW_VOLUME_INDEX:
355 		vol_no = HAMMER_VOL_DECODE(undo->undo_offset);
356 		volume = hammer_get_volume(hmp, vol_no, &error);
357 		if (volume == NULL) {
358 			kprintf("HAMMER: UNDO record, "
359 				"cannot access volume %d\n", vol_no);
360 			break;
361 		}
362 		hammer_modify_volume(NULL, volume, NULL, 0);
363 		hammer_recover_copy_undo(undo->undo_offset,
364 					 (char *)(undo + 1),
365 					 (char *)volume->ondisk + offset,
366 					 undo->undo_data_bytes);
367 		hammer_modify_volume_done(volume);
368 
369 		/*
370 		 * Multiple modifications may be made to the same buffer.
371 		 * Also, the volume header cannot be written out until
372 		 * everything else has been flushed.  This also
373 		 * covers the read-only case by preventing the kernel from
374 		 * flushing the buffer.
375 		 */
376 		if (volume->io.recovered == 0)
377 			volume->io.recovered = 1;
378 		else
379 			hammer_rel_volume(volume, 0);
380 		break;
381 	case HAMMER_ZONE_RAW_BUFFER_INDEX:
382 		buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64;
383 		buffer = hammer_get_buffer(hmp, buf_offset, HAMMER_BUFSIZE,
384 					   0, &error);
385 		if (buffer == NULL) {
386 			kprintf("HAMMER: UNDO record, "
387 				"cannot access buffer %016llx\n",
388 				(long long)undo->undo_offset);
389 			break;
390 		}
391 		hammer_modify_buffer(NULL, buffer, NULL, 0);
392 		hammer_recover_copy_undo(undo->undo_offset,
393 					 (char *)(undo + 1),
394 					 (char *)buffer->ondisk + offset,
395 					 undo->undo_data_bytes);
396 		hammer_modify_buffer_done(buffer);
397 
398 		/*
399 		 * Multiple modifications may be made to the same buffer,
400 		 * improve performance by delaying the flush.  This also
401 		 * covers the read-only case by preventing the kernel from
402 		 * flushing the buffer.
403 		 */
404 		if (buffer->io.recovered == 0)
405 			buffer->io.recovered = 1;
406 		else
407 			hammer_rel_buffer(buffer, 0);
408 		break;
409 	default:
410 		kprintf("HAMMER: Corrupt UNDO record\n");
411 		error = EIO;
412 	}
413 	return (error);
414 }
415 
416 static void
417 hammer_recover_copy_undo(hammer_off_t undo_offset,
418 			 char *src, char *dst, int bytes)
419 {
420 	if (hammer_debug_general & 0x0080) {
421 		kprintf("UNDO %016llx: %d\n",
422 			(long long)undo_offset, bytes);
423 	}
424 #if 0
425 	kprintf("UNDO %016llx:", (long long)undo_offset);
426 	hammer_recover_debug_dump(22, dst, bytes);
427 	kprintf("%22s", "to:");
428 	hammer_recover_debug_dump(22, src, bytes);
429 #endif
430 	bcopy(src, dst, bytes);
431 }
432 
433 #if 0
434 
435 static void
436 hammer_recover_debug_dump(int w, char *buf, int bytes)
437 {
438 	int i;
439 
440 	for (i = 0; i < bytes; ++i) {
441 		if (i && (i & 15) == 0)
442 			kprintf("\n%*.*s", w, w, "");
443 		kprintf(" %02x", (unsigned char)buf[i]);
444 	}
445 	kprintf("\n");
446 }
447 
448 #endif
449 
450 /*
451  * Flush recovered buffers from recovery operations.  The call to this
452  * routine may be delayed if a read-only mount was made and then later
453  * upgraded to read-write.
454  *
455  * The volume header is always written last.  The UNDO FIFO will be forced
456  * to zero-length by setting next_offset to first_offset.  This leaves the
457  * (now stale) UNDO information used to recover the disk available for
458  * forensic analysis.
459  *
460  * final is typically 0 or 1.  The volume header is only written if final
461  * is 1.  If final is -1 the recovered buffers are discarded instead of
462  * written and root_volume can also be passed as NULL in that case.
463  */
464 static int hammer_recover_flush_volume_callback(hammer_volume_t, void *);
465 static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *);
466 
467 void
468 hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume,
469 			     int final)
470 {
471         /*
472          * Flush the buffers out asynchronously, wait for all the I/O to
473 	 * complete, then do it again to destroy the buffer cache buffer
474 	 * so it doesn't alias something later on.
475          */
476 	RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
477 		hammer_recover_flush_buffer_callback, &final);
478 	hammer_io_wait_all(hmp, "hmrrcw");
479 	RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
480 		hammer_recover_flush_buffer_callback, &final);
481 
482 	/*
483 	 * Flush all volume headers except the root volume.  If final < 0
484 	 * we discard all volume headers including the root volume.
485 	 */
486 	if (final >= 0) {
487 		RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
488 			hammer_recover_flush_volume_callback, root_volume);
489 	} else {
490 		RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
491 			hammer_recover_flush_volume_callback, NULL);
492 	}
493 
494 	/*
495 	 * Finalize the root volume header.
496 	 */
497 	if (root_volume && root_volume->io.recovered && final > 0) {
498 		crit_enter();
499 		while (hmp->io_running_space > 0)
500 			tsleep(&hmp->io_running_space, 0, "hmrflx", 0);
501 		crit_exit();
502 		root_volume->io.recovered = 0;
503 		hammer_io_flush(&root_volume->io, 0);
504 		hammer_rel_volume(root_volume, 0);
505 	}
506 }
507 
508 /*
509  * Callback to flush volume headers.  If discarding data will be NULL and
510  * all volume headers (including the root volume) will be discarded.
511  * Otherwise data is the root_volume and we flush all volume headers
512  * EXCEPT the root_volume.
513  */
514 static
515 int
516 hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data)
517 {
518 	hammer_volume_t root_volume = data;
519 
520 	if (volume->io.recovered && volume != root_volume) {
521 		volume->io.recovered = 0;
522 		if (root_volume != NULL)
523 			hammer_io_flush(&volume->io, 0);
524 		else
525 			hammer_io_clear_modify(&volume->io, 1);
526 		hammer_rel_volume(volume, 0);
527 	}
528 	return(0);
529 }
530 
531 static
532 int
533 hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
534 {
535 	int final = *(int *)data;
536 
537 	if (buffer->io.recovered) {
538 		buffer->io.recovered = 0;
539 		buffer->io.reclaim = 1;
540 		if (final < 0)
541 			hammer_io_clear_modify(&buffer->io, 1);
542 		else
543 			hammer_io_flush(&buffer->io, 0);
544 		hammer_rel_buffer(buffer, 0);
545 	} else {
546 		KKASSERT(buffer->io.lock.refs == 0);
547 		++hammer_count_refedbufs;
548 		hammer_ref(&buffer->io.lock);
549 		buffer->io.reclaim = 1;
550 		hammer_rel_buffer(buffer, 1);
551 	}
552 	return(0);
553 }
554 
555