1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.29 2008/07/26 05:36:21 dillon Exp $ 35 */ 36 37 #include "hammer.h" 38 39 static int hammer_check_tail_signature(hammer_fifo_tail_t tail, 40 hammer_off_t end_off); 41 static void hammer_recover_copy_undo(hammer_off_t undo_offset, 42 char *src, char *dst, int bytes); 43 #if 0 44 static void hammer_recover_debug_dump(int w, char *buf, int bytes); 45 #endif 46 static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume, 47 hammer_fifo_undo_t undo, int bytes); 48 49 /* 50 * Recover a filesystem on mount 51 * 52 * NOTE: No information from the root volume has been cached in the 53 * hammer_mount structure yet, so we need to access the root volume's 54 * buffer directly. 55 */ 56 int 57 hammer_recover(hammer_mount_t hmp, hammer_volume_t root_volume) 58 { 59 hammer_blockmap_t rootmap; 60 hammer_buffer_t buffer; 61 hammer_off_t scan_offset; 62 hammer_off_t bytes; 63 hammer_fifo_tail_t tail; 64 hammer_fifo_undo_t undo; 65 hammer_off_t first_offset; 66 hammer_off_t last_offset; 67 int error; 68 69 /* 70 * Examine the UNDO FIFO. If it is empty the filesystem is clean 71 * and no action need be taken. 72 */ 73 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 74 75 if (rootmap->first_offset == rootmap->next_offset) 76 return(0); 77 78 first_offset = rootmap->first_offset; 79 last_offset = rootmap->next_offset; 80 81 if (last_offset >= first_offset) { 82 bytes = last_offset - first_offset; 83 } else { 84 bytes = rootmap->alloc_offset - first_offset + 85 (last_offset & HAMMER_OFF_LONG_MASK); 86 } 87 kprintf("HAMMER(%s) Start Recovery %016llx - %016llx " 88 "(%lld bytes of UNDO)%s\n", 89 root_volume->ondisk->vol_name, 90 (long long)first_offset, 91 (long long)last_offset, 92 (long long)bytes, 93 (hmp->ronly ? " (RO)" : "(RW)")); 94 if (bytes > (rootmap->alloc_offset & HAMMER_OFF_LONG_MASK)) { 95 kprintf("Undo size is absurd, unable to mount\n"); 96 return(EIO); 97 } 98 99 /* 100 * Scan the UNDOs backwards. 101 */ 102 scan_offset = last_offset; 103 buffer = NULL; 104 if (scan_offset > rootmap->alloc_offset) { 105 kprintf("HAMMER(%s) UNDO record at %016llx FIFO overflow\n", 106 root_volume->ondisk->vol_name, 107 (long long)scan_offset); 108 error = EIO; 109 goto done; 110 } 111 112 while ((int64_t)bytes > 0) { 113 if (hammer_debug_general & 0x0080) 114 kprintf("scan_offset %016llx\n", 115 (long long)scan_offset); 116 if (scan_offset == HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) { 117 scan_offset = rootmap->alloc_offset; 118 continue; 119 } 120 if (scan_offset - sizeof(*tail) < 121 HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0)) { 122 kprintf("HAMMER(%s) UNDO record at %016llx FIFO " 123 "underflow\n", 124 root_volume->ondisk->vol_name, 125 (long long)scan_offset); 126 error = EIO; 127 break; 128 } 129 tail = hammer_bread(hmp, scan_offset - sizeof(*tail), 130 &error, &buffer); 131 if (error) { 132 kprintf("HAMMER(%s) Unable to read UNDO TAIL " 133 "at %016llx\n", 134 root_volume->ondisk->vol_name, 135 (long long)scan_offset - sizeof(*tail)); 136 break; 137 } 138 139 if (hammer_check_tail_signature(tail, scan_offset) != 0) { 140 kprintf("HAMMER(%s) Illegal UNDO TAIL signature " 141 "at %016llx\n", 142 root_volume->ondisk->vol_name, 143 (long long)scan_offset - sizeof(*tail)); 144 error = EIO; 145 break; 146 } 147 undo = (void *)((char *)tail + sizeof(*tail) - tail->tail_size); 148 149 error = hammer_recover_undo(hmp, root_volume, undo, 150 HAMMER_BUFSIZE - 151 (int)((char *)undo - (char *)buffer->ondisk)); 152 if (error) { 153 kprintf("HAMMER(%s) UNDO record at %016llx failed\n", 154 root_volume->ondisk->vol_name, 155 (long long)scan_offset - tail->tail_size); 156 break; 157 } 158 scan_offset -= tail->tail_size; 159 bytes -= tail->tail_size; 160 161 /* 162 * If too many dirty buffers have built up we have to flush'm 163 * out. As long as we do not flush out the volume header 164 * a crash here should not cause any problems. 165 * 166 * buffer must be released so the flush can assert that 167 * all buffers are idle. 168 */ 169 if (hammer_flusher_meta_limit(hmp)) { 170 if (buffer) { 171 hammer_rel_buffer(buffer, 0); 172 buffer = NULL; 173 } 174 if (hmp->ronly == 0) { 175 hammer_recover_flush_buffers(hmp, root_volume, 176 0); 177 kprintf("HAMMER(%s) Continuing recovery\n", 178 root_volume->ondisk->vol_name); 179 } else { 180 kprintf("HAMMER(%s) Recovery failure: Insufficient buffer cache to hold dirty buffers on read-only mount!\n", 181 root_volume->ondisk->vol_name); 182 error = EIO; 183 break; 184 } 185 } 186 } 187 done: 188 if (buffer) 189 hammer_rel_buffer(buffer, 0); 190 191 /* 192 * After completely flushing all the recovered buffers the volume 193 * header will also be flushed. Force the UNDO FIFO to 0-length. 194 */ 195 if (root_volume->io.recovered == 0) { 196 hammer_ref_volume(root_volume); 197 root_volume->io.recovered = 1; 198 } 199 200 /* 201 * Finish up flushing (or discarding) recovered buffers 202 */ 203 if (error == 0) { 204 hammer_modify_volume(NULL, root_volume, NULL, 0); 205 rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 206 rootmap->first_offset = last_offset; 207 rootmap->next_offset = last_offset; 208 hammer_modify_volume_done(root_volume); 209 if (hmp->ronly == 0) 210 hammer_recover_flush_buffers(hmp, root_volume, 1); 211 } else { 212 hammer_recover_flush_buffers(hmp, root_volume, -1); 213 } 214 kprintf("HAMMER(%s) End Recovery\n", root_volume->ondisk->vol_name); 215 return (error); 216 } 217 218 static int 219 hammer_check_tail_signature(hammer_fifo_tail_t tail, hammer_off_t end_off) 220 { 221 int max_bytes; 222 223 max_bytes = ((end_off - sizeof(*tail)) & HAMMER_BUFMASK); 224 max_bytes += sizeof(*tail); 225 226 /* 227 * tail overlaps buffer boundary 228 */ 229 if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64) { 230 return(1); 231 } 232 233 /* 234 * signature check, the tail signature is allowed to be the head 235 * signature only for 8-byte PADs. 236 */ 237 switch(tail->tail_signature) { 238 case HAMMER_TAIL_SIGNATURE: 239 break; 240 case HAMMER_HEAD_SIGNATURE: 241 if (tail->tail_type != HAMMER_HEAD_TYPE_PAD || 242 tail->tail_size != sizeof(*tail)) { 243 return(2); 244 } 245 break; 246 } 247 248 /* 249 * The undo structure must not overlap a buffer boundary. 250 */ 251 if (tail->tail_size < sizeof(*tail) || tail->tail_size > max_bytes) { 252 return(3); 253 } 254 return(0); 255 } 256 257 static int 258 hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume, 259 hammer_fifo_undo_t undo, int bytes) 260 { 261 hammer_fifo_tail_t tail; 262 hammer_volume_t volume; 263 hammer_buffer_t buffer; 264 hammer_off_t buf_offset; 265 int zone; 266 int error; 267 int vol_no; 268 int max_bytes; 269 u_int32_t offset; 270 u_int32_t crc; 271 272 /* 273 * Basic sanity checks 274 */ 275 if (bytes < HAMMER_HEAD_ALIGN) { 276 kprintf("HAMMER: Undo alignment error (%d)\n", bytes); 277 return(EIO); 278 } 279 if (undo->head.hdr_signature != HAMMER_HEAD_SIGNATURE) { 280 kprintf("HAMMER: Bad head signature %04x\n", 281 undo->head.hdr_signature); 282 return(EIO); 283 } 284 if (undo->head.hdr_size < HAMMER_HEAD_ALIGN || 285 undo->head.hdr_size > bytes) { 286 kprintf("HAMMER: Bad size %d\n", bytes); 287 return(EIO); 288 } 289 290 /* 291 * Skip PAD records. Note that PAD records also do not require 292 * a tail and may have a truncated structure. 293 */ 294 if (undo->head.hdr_type == HAMMER_HEAD_TYPE_PAD) 295 return(0); 296 297 /* 298 * Check the CRC 299 */ 300 crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^ 301 crc32(&undo->head + 1, undo->head.hdr_size - sizeof(undo->head)); 302 if (undo->head.hdr_crc != crc) { 303 kprintf("HAMMER: Undo record CRC failed %08x %08x\n", 304 undo->head.hdr_crc, crc); 305 return(EIO); 306 } 307 308 309 /* 310 * Check the tail 311 */ 312 bytes = undo->head.hdr_size; 313 tail = (void *)((char *)undo + bytes - sizeof(*tail)); 314 if (tail->tail_size != undo->head.hdr_size) { 315 kprintf("HAMMER: Bad tail size %d\n", tail->tail_size); 316 return(EIO); 317 } 318 if (tail->tail_type != undo->head.hdr_type) { 319 kprintf("HAMMER: Bad tail type %d\n", tail->tail_type); 320 return(EIO); 321 } 322 323 /* 324 * Only process UNDO records 325 */ 326 if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO) 327 return(0); 328 329 /* 330 * Validate the UNDO record. 331 */ 332 max_bytes = undo->head.hdr_size - sizeof(*undo) - sizeof(*tail); 333 if (undo->undo_data_bytes < 0 || undo->undo_data_bytes > max_bytes) { 334 kprintf("HAMMER: Corrupt UNDO record, undo_data_bytes %d/%d\n", 335 undo->undo_data_bytes, max_bytes); 336 return(EIO); 337 } 338 339 /* 340 * The undo offset may only be a zone-1 or zone-2 offset. 341 * 342 * Currently we only support a zone-1 offset representing the 343 * volume header. 344 */ 345 zone = HAMMER_ZONE_DECODE(undo->undo_offset); 346 offset = undo->undo_offset & HAMMER_BUFMASK; 347 348 if (offset + undo->undo_data_bytes > HAMMER_BUFSIZE) { 349 kprintf("HAMMER: Corrupt UNDO record, bad offset\n"); 350 return (EIO); 351 } 352 353 switch(zone) { 354 case HAMMER_ZONE_RAW_VOLUME_INDEX: 355 vol_no = HAMMER_VOL_DECODE(undo->undo_offset); 356 volume = hammer_get_volume(hmp, vol_no, &error); 357 if (volume == NULL) { 358 kprintf("HAMMER: UNDO record, " 359 "cannot access volume %d\n", vol_no); 360 break; 361 } 362 hammer_modify_volume(NULL, volume, NULL, 0); 363 hammer_recover_copy_undo(undo->undo_offset, 364 (char *)(undo + 1), 365 (char *)volume->ondisk + offset, 366 undo->undo_data_bytes); 367 hammer_modify_volume_done(volume); 368 369 /* 370 * Multiple modifications may be made to the same buffer. 371 * Also, the volume header cannot be written out until 372 * everything else has been flushed. This also 373 * covers the read-only case by preventing the kernel from 374 * flushing the buffer. 375 */ 376 if (volume->io.recovered == 0) 377 volume->io.recovered = 1; 378 else 379 hammer_rel_volume(volume, 0); 380 break; 381 case HAMMER_ZONE_RAW_BUFFER_INDEX: 382 buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64; 383 buffer = hammer_get_buffer(hmp, buf_offset, HAMMER_BUFSIZE, 384 0, &error); 385 if (buffer == NULL) { 386 kprintf("HAMMER: UNDO record, " 387 "cannot access buffer %016llx\n", 388 (long long)undo->undo_offset); 389 break; 390 } 391 hammer_modify_buffer(NULL, buffer, NULL, 0); 392 hammer_recover_copy_undo(undo->undo_offset, 393 (char *)(undo + 1), 394 (char *)buffer->ondisk + offset, 395 undo->undo_data_bytes); 396 hammer_modify_buffer_done(buffer); 397 398 /* 399 * Multiple modifications may be made to the same buffer, 400 * improve performance by delaying the flush. This also 401 * covers the read-only case by preventing the kernel from 402 * flushing the buffer. 403 */ 404 if (buffer->io.recovered == 0) 405 buffer->io.recovered = 1; 406 else 407 hammer_rel_buffer(buffer, 0); 408 break; 409 default: 410 kprintf("HAMMER: Corrupt UNDO record\n"); 411 error = EIO; 412 } 413 return (error); 414 } 415 416 static void 417 hammer_recover_copy_undo(hammer_off_t undo_offset, 418 char *src, char *dst, int bytes) 419 { 420 if (hammer_debug_general & 0x0080) { 421 kprintf("UNDO %016llx: %d\n", 422 (long long)undo_offset, bytes); 423 } 424 #if 0 425 kprintf("UNDO %016llx:", (long long)undo_offset); 426 hammer_recover_debug_dump(22, dst, bytes); 427 kprintf("%22s", "to:"); 428 hammer_recover_debug_dump(22, src, bytes); 429 #endif 430 bcopy(src, dst, bytes); 431 } 432 433 #if 0 434 435 static void 436 hammer_recover_debug_dump(int w, char *buf, int bytes) 437 { 438 int i; 439 440 for (i = 0; i < bytes; ++i) { 441 if (i && (i & 15) == 0) 442 kprintf("\n%*.*s", w, w, ""); 443 kprintf(" %02x", (unsigned char)buf[i]); 444 } 445 kprintf("\n"); 446 } 447 448 #endif 449 450 /* 451 * Flush recovered buffers from recovery operations. The call to this 452 * routine may be delayed if a read-only mount was made and then later 453 * upgraded to read-write. 454 * 455 * The volume header is always written last. The UNDO FIFO will be forced 456 * to zero-length by setting next_offset to first_offset. This leaves the 457 * (now stale) UNDO information used to recover the disk available for 458 * forensic analysis. 459 * 460 * final is typically 0 or 1. The volume header is only written if final 461 * is 1. If final is -1 the recovered buffers are discarded instead of 462 * written and root_volume can also be passed as NULL in that case. 463 */ 464 static int hammer_recover_flush_volume_callback(hammer_volume_t, void *); 465 static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *); 466 467 void 468 hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume, 469 int final) 470 { 471 /* 472 * Flush the buffers out asynchronously, wait for all the I/O to 473 * complete, then do it again to destroy the buffer cache buffer 474 * so it doesn't alias something later on. 475 */ 476 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL, 477 hammer_recover_flush_buffer_callback, &final); 478 hammer_io_wait_all(hmp, "hmrrcw"); 479 RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL, 480 hammer_recover_flush_buffer_callback, &final); 481 482 /* 483 * Flush all volume headers except the root volume. If final < 0 484 * we discard all volume headers including the root volume. 485 */ 486 if (final >= 0) { 487 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, 488 hammer_recover_flush_volume_callback, root_volume); 489 } else { 490 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, 491 hammer_recover_flush_volume_callback, NULL); 492 } 493 494 /* 495 * Finalize the root volume header. 496 */ 497 if (root_volume && root_volume->io.recovered && final > 0) { 498 crit_enter(); 499 while (hmp->io_running_space > 0) 500 tsleep(&hmp->io_running_space, 0, "hmrflx", 0); 501 crit_exit(); 502 root_volume->io.recovered = 0; 503 hammer_io_flush(&root_volume->io, 0); 504 hammer_rel_volume(root_volume, 0); 505 } 506 } 507 508 /* 509 * Callback to flush volume headers. If discarding data will be NULL and 510 * all volume headers (including the root volume) will be discarded. 511 * Otherwise data is the root_volume and we flush all volume headers 512 * EXCEPT the root_volume. 513 */ 514 static 515 int 516 hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data) 517 { 518 hammer_volume_t root_volume = data; 519 520 if (volume->io.recovered && volume != root_volume) { 521 volume->io.recovered = 0; 522 if (root_volume != NULL) 523 hammer_io_flush(&volume->io, 0); 524 else 525 hammer_io_clear_modify(&volume->io, 1); 526 hammer_rel_volume(volume, 0); 527 } 528 return(0); 529 } 530 531 static 532 int 533 hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data) 534 { 535 int final = *(int *)data; 536 537 if (buffer->io.recovered) { 538 buffer->io.recovered = 0; 539 buffer->io.reclaim = 1; 540 if (final < 0) 541 hammer_io_clear_modify(&buffer->io, 1); 542 else 543 hammer_io_flush(&buffer->io, 0); 544 hammer_rel_buffer(buffer, 0); 545 } else { 546 KKASSERT(buffer->io.lock.refs == 0); 547 ++hammer_count_refedbufs; 548 hammer_ref(&buffer->io.lock); 549 buffer->io.reclaim = 1; 550 hammer_rel_buffer(buffer, 1); 551 } 552 return(0); 553 } 554 555