1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.29 2008/07/26 05:36:21 dillon Exp $ 35 */ 36 37 #include "hammer.h" 38 39 static int hammer_check_tail_signature(hammer_fifo_tail_t tail, 40 hammer_off_t end_off); 41 static void hammer_recover_copy_undo(hammer_off_t undo_offset, 42 char *src, char *dst, int bytes); 43 #if 0 44 static void hammer_recover_debug_dump(int w, char *buf, int bytes); 45 #endif 46 static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume, 47 hammer_fifo_undo_t undo, int bytes); 48 49 /* 50 * Recover a filesystem on mount 51 * 52 * NOTE: No information from the root volume has been cached in the 53 * hammer_mount structure yet, so we need to access the root volume's 54 * buffer directly. 55 */ 56 int 57 hammer_recover(hammer_mount_t hmp, hammer_volume_t root_volume) 58 { 59 hammer_blockmap_t rootmap; 60 hammer_buffer_t buffer; 61 hammer_off_t scan_offset; 62 hammer_off_t bytes; 63 hammer_fifo_tail_t tail; 64 hammer_fifo_undo_t undo; 65 hammer_off_t first_offset; 66 hammer_off_t last_offset; 67 int error; 68 69 /* 70 * Examine the UNDO FIFO. If it is empty the filesystem is clean 71 * and no action need be taken. 72 */ 73 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 74 75 if (rootmap->first_offset == rootmap->next_offset) 76 return(0); 77 78 first_offset = rootmap->first_offset; 79 last_offset = rootmap->next_offset; 80 81 if (last_offset >= first_offset) { 82 bytes = last_offset - first_offset; 83 } else { 84 bytes = rootmap->alloc_offset - first_offset + 85 (last_offset & HAMMER_OFF_LONG_MASK); 86 } 87 kprintf("HAMMER(%s) Start Recovery %016llx - %016llx " 88 "(%lld bytes of UNDO)%s\n", 89 root_volume->ondisk->vol_name, 90 first_offset, last_offset, 91 bytes, 92 (hmp->ronly ? " (RO)" : "(RW)")); 93 if (bytes > (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK)) { 94 kprintf("Undo size is absurd, unable to mount\n"); 95 return(EIO); 96 } 97 98 /* 99 * Scan the UNDOs backwards. 100 */ 101 scan_offset = last_offset; 102 buffer = NULL; 103 if (scan_offset > rootmap->alloc_offset) { 104 kprintf("HAMMER(%s) UNDO record at %016llx FIFO overflow\n", 105 root_volume->ondisk->vol_name, 106 scan_offset); 107 error = EIO; 108 goto done; 109 } 110 111 while ((int64_t)bytes > 0) { 112 if (hammer_debug_general & 0x0080) 113 kprintf("scan_offset %016llx\n", scan_offset); 114 if (scan_offset == HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) { 115 scan_offset = rootmap->alloc_offset; 116 continue; 117 } 118 if (scan_offset - sizeof(*tail) < 119 HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) { 120 kprintf("HAMMER(%s) UNDO record at %016llx FIFO " 121 "underflow\n", 122 root_volume->ondisk->vol_name, 123 scan_offset); 124 error = EIO; 125 break; 126 } 127 tail = hammer_bread(hmp, scan_offset - sizeof(*tail), 128 &error, &buffer); 129 if (error) { 130 kprintf("HAMMER(%s) Unable to read UNDO TAIL " 131 "at %016llx\n", 132 root_volume->ondisk->vol_name, 133 scan_offset - sizeof(*tail)); 134 break; 135 } 136 137 if (hammer_check_tail_signature(tail, scan_offset) != 0) { 138 kprintf("HAMMER(%s) Illegal UNDO TAIL signature " 139 "at %016llx\n", 140 root_volume->ondisk->vol_name, 141 scan_offset - sizeof(*tail)); 142 error = EIO; 143 break; 144 } 145 undo = (void *)((char *)tail + sizeof(*tail) - tail->tail_size); 146 147 error = hammer_recover_undo(hmp, root_volume, undo, 148 HAMMER_BUFSIZE - 149 (int)((char *)undo - (char *)buffer->ondisk)); 150 if (error) { 151 kprintf("HAMMER(%s) UNDO record at %016llx failed\n", 152 root_volume->ondisk->vol_name, 153 scan_offset - tail->tail_size); 154 break; 155 } 156 scan_offset -= tail->tail_size; 157 bytes -= tail->tail_size; 158 159 /* 160 * If too many dirty buffers have built up we have to flush'm 161 * out. As long as we do not flush out the volume header 162 * a crash here should not cause any problems. 163 * 164 * buffer must be released so the flush can assert that 165 * all buffers are idle. 166 */ 167 if (hammer_flusher_meta_limit(hmp)) { 168 if (buffer) { 169 hammer_rel_buffer(buffer, 0); 170 buffer = NULL; 171 } 172 if (hmp->ronly == 0) { 173 hammer_recover_flush_buffers(hmp, root_volume, 174 0); 175 kprintf("HAMMER(%s) Continuing recovery\n", 176 root_volume->ondisk->vol_name); 177 } else { 178 kprintf("HAMMER(%s) Recovery failure: Insufficient buffer cache to hold dirty buffers on read-only mount!\n", 179 root_volume->ondisk->vol_name); 180 error = EIO; 181 break; 182 } 183 } 184 } 185 done: 186 if (buffer) 187 hammer_rel_buffer(buffer, 0); 188 189 /* 190 * After completely flushing all the recovered buffers the volume 191 * header will also be flushed. Force the UNDO FIFO to 0-length. 192 */ 193 if (root_volume->io.recovered == 0) { 194 hammer_ref_volume(root_volume); 195 root_volume->io.recovered = 1; 196 } 197 198 /* 199 * Finish up flushing (or discarding) recovered buffers 200 */ 201 if (error == 0) { 202 hammer_modify_volume(NULL, root_volume, NULL, 0); 203 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 204 rootmap->first_offset = last_offset; 205 rootmap->next_offset = last_offset; 206 hammer_modify_volume_done(root_volume); 207 if (hmp->ronly == 0) 208 hammer_recover_flush_buffers(hmp, root_volume, 1); 209 } else { 210 hammer_recover_flush_buffers(hmp, root_volume, -1); 211 } 212 kprintf("HAMMER(%s) End Recovery\n", root_volume->ondisk->vol_name); 213 return (error); 214 } 215 216 static int 217 hammer_check_tail_signature(hammer_fifo_tail_t tail, hammer_off_t end_off) 218 { 219 int max_bytes; 220 221 max_bytes = ((end_off - sizeof(*tail)) & HAMMER_BUFMASK); 222 max_bytes += sizeof(*tail); 223 224 /* 225 * tail overlaps buffer boundary 226 */ 227 if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64) { 228 return(1); 229 } 230 231 /* 232 * signature check, the tail signature is allowed to be the head 233 * signature only for 8-byte PADs. 234 */ 235 switch(tail->tail_signature) { 236 case HAMMER_TAIL_SIGNATURE: 237 break; 238 case HAMMER_HEAD_SIGNATURE: 239 if (tail->tail_type != HAMMER_HEAD_TYPE_PAD || 240 tail->tail_size != sizeof(*tail)) { 241 return(2); 242 } 243 break; 244 } 245 246 /* 247 * The undo structure must not overlap a buffer boundary. 248 */ 249 if (tail->tail_size < sizeof(*tail) || tail->tail_size > max_bytes) { 250 return(3); 251 } 252 return(0); 253 } 254 255 static int 256 hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume, 257 hammer_fifo_undo_t undo, int bytes) 258 { 259 hammer_fifo_tail_t tail; 260 hammer_volume_t volume; 261 hammer_buffer_t buffer; 262 hammer_off_t buf_offset; 263 int zone; 264 int error; 265 int vol_no; 266 int max_bytes; 267 u_int32_t offset; 268 u_int32_t crc; 269 270 /* 271 * Basic sanity checks 272 */ 273 if (bytes < HAMMER_HEAD_ALIGN) { 274 kprintf("HAMMER: Undo alignment error (%d)\n", bytes); 275 return(EIO); 276 } 277 if (undo->head.hdr_signature != HAMMER_HEAD_SIGNATURE) { 278 kprintf("HAMMER: Bad head signature %04x\n", 279 undo->head.hdr_signature); 280 return(EIO); 281 } 282 if (undo->head.hdr_size < HAMMER_HEAD_ALIGN || 283 undo->head.hdr_size > bytes) { 284 kprintf("HAMMER: Bad size %d\n", bytes); 285 return(EIO); 286 } 287 288 /* 289 * Skip PAD records. Note that PAD records also do not require 290 * a tail and may have a truncated structure. 291 */ 292 if (undo->head.hdr_type == HAMMER_HEAD_TYPE_PAD) 293 return(0); 294 295 /* 296 * Check the CRC 297 */ 298 crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^ 299 crc32(&undo->head + 1, undo->head.hdr_size - sizeof(undo->head)); 300 if (undo->head.hdr_crc != crc) { 301 kprintf("HAMMER: Undo record CRC failed %08x %08x\n", 302 undo->head.hdr_crc, crc); 303 return(EIO); 304 } 305 306 307 /* 308 * Check the tail 309 */ 310 bytes = undo->head.hdr_size; 311 tail = (void *)((char *)undo + bytes - sizeof(*tail)); 312 if (tail->tail_size != undo->head.hdr_size) { 313 kprintf("HAMMER: Bad tail size %d\n", tail->tail_size); 314 return(EIO); 315 } 316 if (tail->tail_type != undo->head.hdr_type) { 317 kprintf("HAMMER: Bad tail type %d\n", tail->tail_type); 318 return(EIO); 319 } 320 321 /* 322 * Only process UNDO records 323 */ 324 if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO) 325 return(0); 326 327 /* 328 * Validate the UNDO record. 329 */ 330 max_bytes = undo->head.hdr_size - sizeof(*undo) - sizeof(*tail); 331 if (undo->undo_data_bytes < 0 || undo->undo_data_bytes > max_bytes) { 332 kprintf("HAMMER: Corrupt UNDO record, undo_data_bytes %d/%d\n", 333 undo->undo_data_bytes, max_bytes); 334 return(EIO); 335 } 336 337 /* 338 * The undo offset may only be a zone-1 or zone-2 offset. 339 * 340 * Currently we only support a zone-1 offset representing the 341 * volume header. 342 */ 343 zone = HAMMER_ZONE_DECODE(undo->undo_offset); 344 offset = undo->undo_offset & HAMMER_BUFMASK; 345 346 if (offset + undo->undo_data_bytes > HAMMER_BUFSIZE) { 347 kprintf("HAMMER: Corrupt UNDO record, bad offset\n"); 348 return (EIO); 349 } 350 351 switch(zone) { 352 case HAMMER_ZONE_RAW_VOLUME_INDEX: 353 vol_no = HAMMER_VOL_DECODE(undo->undo_offset); 354 volume = hammer_get_volume(hmp, vol_no, &error); 355 if (volume == NULL) { 356 kprintf("HAMMER: UNDO record, " 357 "cannot access volume %d\n", vol_no); 358 break; 359 } 360 hammer_modify_volume(NULL, volume, NULL, 0); 361 hammer_recover_copy_undo(undo->undo_offset, 362 (char *)(undo + 1), 363 (char *)volume->ondisk + offset, 364 undo->undo_data_bytes); 365 hammer_modify_volume_done(volume); 366 367 /* 368 * Multiple modifications may be made to the same buffer. 369 * Also, the volume header cannot be written out until 370 * everything else has been flushed. This also 371 * covers the read-only case by preventing the kernel from 372 * flushing the buffer. 373 */ 374 if (volume->io.recovered == 0) 375 volume->io.recovered = 1; 376 else 377 hammer_rel_volume(volume, 0); 378 break; 379 case HAMMER_ZONE_RAW_BUFFER_INDEX: 380 buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64; 381 buffer = hammer_get_buffer(hmp, buf_offset, HAMMER_BUFSIZE, 382 0, &error); 383 if (buffer == NULL) { 384 kprintf("HAMMER: UNDO record, " 385 "cannot access buffer %016llx\n", 386 undo->undo_offset); 387 break; 388 } 389 hammer_modify_buffer(NULL, buffer, NULL, 0); 390 hammer_recover_copy_undo(undo->undo_offset, 391 (char *)(undo + 1), 392 (char *)buffer->ondisk + offset, 393 undo->undo_data_bytes); 394 hammer_modify_buffer_done(buffer); 395 396 /* 397 * Multiple modifications may be made to the same buffer, 398 * improve performance by delaying the flush. This also 399 * covers the read-only case by preventing the kernel from 400 * flushing the buffer. 401 */ 402 if (buffer->io.recovered == 0) 403 buffer->io.recovered = 1; 404 else 405 hammer_rel_buffer(buffer, 0); 406 break; 407 default: 408 kprintf("HAMMER: Corrupt UNDO record\n"); 409 error = EIO; 410 } 411 return (error); 412 } 413 414 static void 415 hammer_recover_copy_undo(hammer_off_t undo_offset, 416 char *src, char *dst, int bytes) 417 { 418 if (hammer_debug_general & 0x0080) 419 kprintf("UNDO %016llx: %d\n", undo_offset, bytes); 420 #if 0 421 kprintf("UNDO %016llx:", undo_offset); 422 hammer_recover_debug_dump(22, dst, bytes); 423 kprintf("%22s", "to:"); 424 hammer_recover_debug_dump(22, src, bytes); 425 #endif 426 bcopy(src, dst, bytes); 427 } 428 429 #if 0 430 431 static void 432 hammer_recover_debug_dump(int w, char *buf, int bytes) 433 { 434 int i; 435 436 for (i = 0; i < bytes; ++i) { 437 if (i && (i & 15) == 0) 438 kprintf("\n%*.*s", w, w, ""); 439 kprintf(" %02x", (unsigned char)buf[i]); 440 } 441 kprintf("\n"); 442 } 443 444 #endif 445 446 /* 447 * Flush recovered buffers from recovery operations. The call to this 448 * routine may be delayed if a read-only mount was made and then later 449 * upgraded to read-write. 450 * 451 * The volume header is always written last. The UNDO FIFO will be forced 452 * to zero-length by setting next_offset to first_offset. This leaves the 453 * (now stale) UNDO information used to recover the disk available for 454 * forensic analysis. 455 * 456 * final is typically 0 or 1. The volume header is only written if final 457 * is 1. If final is -1 the recovered buffers are discarded instead of 458 * written and root_volume can also be passed as NULL in that case. 459 */ 460 static int hammer_recover_flush_volume_callback(hammer_volume_t, void *); 461 static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *); 462 463 void 464 hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume, 465 int final) 466 { 467 /* 468 * Flush the buffers out asynchronously, wait for all the I/O to 469 * complete, then do it again to destroy the buffer cache buffer 470 * so it doesn't alias something later on. 471 */ 472 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL, 473 hammer_recover_flush_buffer_callback, &final); 474 hammer_io_wait_all(hmp, "hmrrcw"); 475 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL, 476 hammer_recover_flush_buffer_callback, &final); 477 478 /* 479 * Flush all volume headers except the root volume. If final < 0 480 * we discard all volume headers including the root volume. 481 */ 482 if (final >= 0) { 483 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, 484 hammer_recover_flush_volume_callback, root_volume); 485 } else { 486 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, 487 hammer_recover_flush_volume_callback, NULL); 488 } 489 490 /* 491 * Finalize the root volume header. 492 */ 493 if (root_volume && root_volume->io.recovered && final > 0) { 494 crit_enter(); 495 while (hmp->io_running_space > 0) 496 tsleep(&hmp->io_running_space, 0, "hmrflx", 0); 497 crit_exit(); 498 root_volume->io.recovered = 0; 499 hammer_io_flush(&root_volume->io); 500 hammer_rel_volume(root_volume, 0); 501 } 502 } 503 504 /* 505 * Callback to flush volume headers. If discarding data will be NULL and 506 * all volume headers (including the root volume) will be discarded. 507 * Otherwise data is the root_volume and we flush all volume headers 508 * EXCEPT the root_volume. 509 */ 510 static 511 int 512 hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data) 513 { 514 hammer_volume_t root_volume = data; 515 516 if (volume->io.recovered && volume != root_volume) { 517 volume->io.recovered = 0; 518 if (root_volume != NULL) 519 hammer_io_flush(&volume->io); 520 else 521 hammer_io_clear_modify(&volume->io, 1); 522 hammer_rel_volume(volume, 0); 523 } 524 return(0); 525 } 526 527 static 528 int 529 hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data) 530 { 531 int final = *(int *)data; 532 533 if (buffer->io.recovered) { 534 buffer->io.recovered = 0; 535 buffer->io.reclaim = 1; 536 if (final < 0) 537 hammer_io_clear_modify(&buffer->io, 1); 538 else 539 hammer_io_flush(&buffer->io); 540 hammer_rel_buffer(buffer, 0); 541 } else { 542 KKASSERT(buffer->io.lock.refs == 0); 543 ++hammer_count_refedbufs; 544 hammer_ref(&buffer->io.lock); 545 buffer->io.reclaim = 1; 546 hammer_rel_buffer(buffer, 1); 547 } 548 return(0); 549 } 550 551