1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * HAMMER undo - undo buffer/FIFO management. 37 */ 38 39 #include "hammer.h" 40 41 static int hammer_und_rb_compare(hammer_undo_t node1, hammer_undo_t node2); 42 43 RB_GENERATE2(hammer_und_rb_tree, hammer_undo, rb_node, 44 hammer_und_rb_compare, hammer_off_t, offset); 45 46 /* 47 * Convert a zone-3 undo offset into a zone-2 buffer offset. 48 */ 49 hammer_off_t 50 hammer_undo_lookup(hammer_mount_t hmp, hammer_off_t zone3_off, int *errorp) 51 { 52 hammer_volume_t root_volume; 53 hammer_blockmap_t undomap __debugvar; 54 hammer_off_t result_offset; 55 int i; 56 57 KKASSERT((zone3_off & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_UNDO); 58 root_volume = hammer_get_root_volume(hmp, errorp); 59 if (*errorp) 60 return(0); 61 undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; 62 KKASSERT(HAMMER_ZONE_DECODE(undomap->alloc_offset) == HAMMER_ZONE_UNDO_INDEX); 63 KKASSERT(zone3_off < undomap->alloc_offset); 64 65 /* 66 * undo offsets[i] in zone-2 + 67 * big-block offset of zone-3 address 68 * which results zone-2 address 69 */ 70 i = (zone3_off & HAMMER_OFF_SHORT_MASK) / HAMMER_BIGBLOCK_SIZE; 71 result_offset = root_volume->ondisk->vol0_undo_array[i] + 72 (zone3_off & HAMMER_BIGBLOCK_MASK64); 73 74 hammer_rel_volume(root_volume, 0); 75 return(result_offset); 76 } 77 78 /* 79 * Generate UNDO record(s) for the block of data at the specified zone1 80 * or zone2 offset. 81 * 82 * The recovery code will execute UNDOs in reverse order, allowing overlaps. 83 * All the UNDOs are executed together so if we already laid one down we 84 * do not have to lay another one down for the same range. 85 * 86 * For HAMMER version 4+ UNDO a 512 byte boundary is enforced and a PAD 87 * will be laid down for any unused space. UNDO FIFO media structures 88 * will implement the hdr_seq field (it used to be reserved01), and 89 * both flush and recovery mechanics will be very different. 90 * 91 * WARNING! See also hammer_generate_redo() in hammer_redo.c 92 */ 93 int 94 hammer_generate_undo(hammer_transaction_t trans, 95 hammer_off_t zone_off, void *base, int len) 96 { 97 hammer_mount_t hmp; 98 hammer_volume_t root_volume; 99 hammer_blockmap_t undomap; 100 hammer_buffer_t buffer = NULL; 101 hammer_fifo_undo_t undo; 102 hammer_fifo_tail_t tail; 103 hammer_off_t next_offset; 104 int error; 105 int bytes; 106 int n; 107 108 hmp = trans->hmp; 109 110 /* 111 * A SYNC record may be required before we can lay down a general 112 * UNDO. This ensures that the nominal recovery span contains 113 * at least one SYNC record telling the recovery code how far 114 * out-of-span it must go to run the REDOs. 115 */ 116 if ((hmp->flags & HAMMER_MOUNT_REDO_SYNC) == 0 && 117 hmp->version >= HAMMER_VOL_VERSION_FOUR) { 118 hammer_generate_redo_sync(trans); 119 } 120 121 /* 122 * Enter the offset into our undo history. If there is an existing 123 * undo we do not have to generate a new one. 124 */ 125 if (hammer_enter_undo_history(hmp, zone_off, len) == EALREADY) 126 return(0); 127 128 root_volume = trans->rootvol; 129 undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; 130 131 /* no undo recursion */ 132 hammer_modify_volume_noundo(NULL, root_volume); 133 hammer_lock_ex(&hmp->undo_lock); 134 135 /* undo had better not roll over (loose test) */ 136 if (hammer_undo_space(trans) < len + HAMMER_BUFSIZE*3) 137 hpanic("insufficient undo FIFO space!"); 138 139 /* 140 * Loop until the undo for the entire range has been laid down. 141 */ 142 while (len) { 143 /* 144 * Fetch the layout offset in the UNDO FIFO, wrap it as 145 * necessary. 146 */ 147 if (undomap->next_offset == undomap->alloc_offset) { 148 undomap->next_offset = 149 HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); 150 } 151 next_offset = undomap->next_offset; 152 153 /* 154 * This is a tail-chasing FIFO, when we hit the start of a new 155 * buffer we don't have to read it in. 156 */ 157 if ((next_offset & HAMMER_BUFMASK) == 0) { 158 undo = hammer_bnew(hmp, next_offset, &error, &buffer); 159 hammer_format_undo(undo, hmp->undo_seqno ^ 0x40000000); 160 } else { 161 undo = hammer_bread(hmp, next_offset, &error, &buffer); 162 } 163 if (error) 164 break; 165 /* no undo recursion */ 166 hammer_modify_buffer_noundo(NULL, buffer); 167 168 /* 169 * Calculate how big a media structure fits up to the next 170 * alignment point and how large a data payload we can 171 * accomodate. 172 * 173 * If n calculates to 0 or negative there is no room for 174 * anything but a PAD. 175 */ 176 bytes = HAMMER_UNDO_ALIGN - 177 ((int)next_offset & HAMMER_UNDO_MASK); 178 n = bytes - 179 (int)sizeof(struct hammer_fifo_undo) - 180 (int)sizeof(struct hammer_fifo_tail); 181 182 /* 183 * If available space is insufficient for any payload 184 * we have to lay down a PAD. 185 * 186 * The minimum PAD is 8 bytes and the head and tail will 187 * overlap each other in that case. PADs do not have 188 * sequence numbers or CRCs. 189 * 190 * A PAD may not start on a boundary. That is, every 191 * 512-byte block in the UNDO/REDO FIFO must begin with 192 * a record containing a sequence number. 193 */ 194 if (n <= 0) { 195 KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); 196 KKASSERT(((int)next_offset & HAMMER_UNDO_MASK) != 0); 197 tail = (void *)((char *)undo + bytes - sizeof(*tail)); 198 if ((void *)undo != (void *)tail) { 199 tail->tail_signature = HAMMER_TAIL_SIGNATURE; 200 tail->tail_type = HAMMER_HEAD_TYPE_PAD; 201 tail->tail_size = bytes; 202 } 203 undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; 204 undo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; 205 undo->head.hdr_size = bytes; 206 /* NO CRC OR SEQ NO */ 207 undomap->next_offset += bytes; 208 hammer_modify_buffer_done(buffer); 209 hammer_stats_undo += bytes; 210 continue; 211 } 212 213 /* 214 * Calculate the actual payload and recalculate the size 215 * of the media structure as necessary. 216 */ 217 if (n > len) { 218 n = len; 219 bytes = ((n + HAMMER_HEAD_ALIGN_MASK) & 220 ~HAMMER_HEAD_ALIGN_MASK) + 221 (int)sizeof(struct hammer_fifo_undo) + 222 (int)sizeof(struct hammer_fifo_tail); 223 } 224 if (hammer_debug_general & 0x0080) { 225 hdkprintf("undo %016llx %d %d\n", 226 (long long)next_offset, bytes, n); 227 } 228 229 undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; 230 undo->head.hdr_type = HAMMER_HEAD_TYPE_UNDO; 231 undo->head.hdr_size = bytes; 232 undo->head.hdr_seq = hmp->undo_seqno++; 233 undo->head.hdr_crc = 0; 234 undo->undo_offset = zone_off; 235 undo->undo_data_bytes = n; 236 bcopy(base, undo + 1, n); 237 238 tail = (void *)((char *)undo + bytes - sizeof(*tail)); 239 tail->tail_signature = HAMMER_TAIL_SIGNATURE; 240 tail->tail_type = HAMMER_HEAD_TYPE_UNDO; 241 tail->tail_size = bytes; 242 243 KKASSERT(bytes >= sizeof(undo->head)); 244 undo->head.hdr_crc = crc32(undo, HAMMER_FIFO_HEAD_CRCOFF) ^ 245 crc32(&undo->head + 1, bytes - sizeof(undo->head)); 246 undomap->next_offset += bytes; 247 hammer_stats_undo += bytes; 248 249 /* 250 * Before we finish off the buffer we have to deal with any 251 * junk between the end of the media structure we just laid 252 * down and the UNDO alignment boundary. We do this by laying 253 * down a dummy PAD. Even though we will probably overwrite 254 * it almost immediately we have to do this so recovery runs 255 * can iterate the UNDO space without having to depend on 256 * the indices in the volume header. 257 * 258 * This dummy PAD will be overwritten on the next undo so 259 * we do not adjust undomap->next_offset. 260 */ 261 bytes = HAMMER_UNDO_ALIGN - 262 ((int)undomap->next_offset & HAMMER_UNDO_MASK); 263 if (bytes != HAMMER_UNDO_ALIGN) { 264 KKASSERT(bytes >= sizeof(struct hammer_fifo_tail)); 265 undo = (void *)(tail + 1); 266 tail = (void *)((char *)undo + bytes - sizeof(*tail)); 267 if ((void *)undo != (void *)tail) { 268 tail->tail_signature = HAMMER_TAIL_SIGNATURE; 269 tail->tail_type = HAMMER_HEAD_TYPE_PAD; 270 tail->tail_size = bytes; 271 } 272 undo->head.hdr_signature = HAMMER_HEAD_SIGNATURE; 273 undo->head.hdr_type = HAMMER_HEAD_TYPE_PAD; 274 undo->head.hdr_size = bytes; 275 /* NO CRC OR SEQ NO */ 276 } 277 hammer_modify_buffer_done(buffer); 278 279 /* 280 * Adjust for loop 281 */ 282 len -= n; 283 base = (char *)base + n; 284 zone_off += n; 285 } 286 hammer_modify_volume_done(root_volume); 287 hammer_unlock(&hmp->undo_lock); 288 289 if (buffer) 290 hammer_rel_buffer(buffer, 0); 291 return(error); 292 } 293 294 /* 295 * Preformat a new UNDO block. We could read the old one in but we get 296 * better performance if we just pre-format a new one. 297 * 298 * The recovery code always works forwards so the caller just makes sure the 299 * seqno is not contiguous with prior UNDOs or ancient UNDOs now being 300 * overwritten. 301 * 302 * The preformatted UNDO headers use the smallest possible sector size 303 * (512) to ensure that any missed media writes are caught. 304 * 305 * NOTE: Also used by the REDO code. 306 */ 307 void 308 hammer_format_undo(void *base, u_int32_t seqno) 309 { 310 hammer_fifo_head_t head; 311 hammer_fifo_tail_t tail; 312 int i; 313 int bytes = HAMMER_UNDO_ALIGN; 314 315 bzero(base, HAMMER_BUFSIZE); 316 317 for (i = 0; i < HAMMER_BUFSIZE; i += bytes) { 318 head = (void *)((char *)base + i); 319 tail = (void *)((char *)head + bytes - sizeof(*tail)); 320 321 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 322 head->hdr_type = HAMMER_HEAD_TYPE_DUMMY; 323 head->hdr_size = bytes; 324 head->hdr_seq = seqno++; 325 head->hdr_crc = 0; 326 327 tail->tail_signature = HAMMER_TAIL_SIGNATURE; 328 tail->tail_type = HAMMER_HEAD_TYPE_DUMMY; 329 tail->tail_size = bytes; 330 331 head->hdr_crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^ 332 crc32(head + 1, bytes - sizeof(*head)); 333 } 334 } 335 336 /* 337 * HAMMER version 4+ conversion support. 338 * 339 * Convert a HAMMER version < 4 UNDO FIFO area to a 4+ UNDO FIFO area. 340 * The 4+ UNDO FIFO area is backwards compatible. The conversion is 341 * needed to initialize the sequence space and place headers on the 342 * new 512-byte undo boundary. 343 */ 344 int 345 hammer_upgrade_undo_4(hammer_transaction_t trans) 346 { 347 hammer_mount_t hmp; 348 hammer_volume_t root_volume; 349 hammer_blockmap_t undomap; 350 hammer_buffer_t buffer = NULL; 351 hammer_fifo_head_t head; 352 hammer_fifo_tail_t tail; 353 hammer_off_t next_offset; 354 u_int32_t seqno; 355 int error; 356 int bytes; 357 358 hmp = trans->hmp; 359 360 root_volume = trans->rootvol; 361 362 /* no undo recursion */ 363 hammer_lock_ex(&hmp->undo_lock); 364 hammer_modify_volume_noundo(NULL, root_volume); 365 366 /* 367 * Adjust the in-core undomap and the on-disk undomap. 368 */ 369 next_offset = HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0); 370 undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; 371 undomap->next_offset = next_offset; 372 undomap->first_offset = next_offset; 373 374 undomap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 375 undomap->next_offset = next_offset; 376 undomap->first_offset = next_offset; 377 378 /* 379 * Loop over the entire UNDO space creating DUMMY entries. Sequence 380 * numbers are assigned. 381 */ 382 seqno = 0; 383 bytes = HAMMER_UNDO_ALIGN; 384 385 while (next_offset != undomap->alloc_offset) { 386 head = hammer_bnew(hmp, next_offset, &error, &buffer); 387 if (error) 388 break; 389 hammer_modify_buffer_noundo(NULL, buffer); 390 tail = (void *)((char *)head + bytes - sizeof(*tail)); 391 392 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 393 head->hdr_type = HAMMER_HEAD_TYPE_DUMMY; 394 head->hdr_size = bytes; 395 head->hdr_seq = seqno; 396 head->hdr_crc = 0; 397 398 tail = (void *)((char *)head + bytes - sizeof(*tail)); 399 tail->tail_signature = HAMMER_TAIL_SIGNATURE; 400 tail->tail_type = HAMMER_HEAD_TYPE_DUMMY; 401 tail->tail_size = bytes; 402 403 head->hdr_crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^ 404 crc32(head + 1, bytes - sizeof(*head)); 405 hammer_modify_buffer_done(buffer); 406 407 hammer_stats_undo += bytes; 408 next_offset += HAMMER_UNDO_ALIGN; 409 ++seqno; 410 } 411 412 /* 413 * The sequence number will be the next sequence number to lay down. 414 */ 415 hmp->undo_seqno = seqno; 416 hmkprintf(hmp, "version upgrade seqno start %08x\n", seqno); 417 418 hammer_modify_volume_done(root_volume); 419 hammer_unlock(&hmp->undo_lock); 420 421 if (buffer) 422 hammer_rel_buffer(buffer, 0); 423 return (error); 424 } 425 426 /* 427 * UNDO HISTORY API 428 * 429 * It is not necessary to layout an undo record for the same address space 430 * multiple times. Maintain a cache of recent undo's. 431 */ 432 433 /* 434 * Enter an undo into the history. Return EALREADY if the request completely 435 * covers a previous request. 436 */ 437 int 438 hammer_enter_undo_history(hammer_mount_t hmp, hammer_off_t offset, int bytes) 439 { 440 hammer_undo_t node; 441 hammer_undo_t onode __debugvar; 442 443 node = RB_LOOKUP(hammer_und_rb_tree, &hmp->rb_undo_root, offset); 444 if (node) { 445 TAILQ_REMOVE(&hmp->undo_lru_list, node, lru_entry); 446 TAILQ_INSERT_TAIL(&hmp->undo_lru_list, node, lru_entry); 447 if (bytes <= node->bytes) 448 return(EALREADY); 449 node->bytes = bytes; 450 return(0); 451 } 452 if (hmp->undo_alloc != HAMMER_MAX_UNDOS) { 453 node = &hmp->undos[hmp->undo_alloc++]; 454 } else { 455 node = TAILQ_FIRST(&hmp->undo_lru_list); 456 TAILQ_REMOVE(&hmp->undo_lru_list, node, lru_entry); 457 RB_REMOVE(hammer_und_rb_tree, &hmp->rb_undo_root, node); 458 } 459 node->offset = offset; 460 node->bytes = bytes; 461 TAILQ_INSERT_TAIL(&hmp->undo_lru_list, node, lru_entry); 462 onode = RB_INSERT(hammer_und_rb_tree, &hmp->rb_undo_root, node); 463 KKASSERT(onode == NULL); 464 return(0); 465 } 466 467 void 468 hammer_clear_undo_history(hammer_mount_t hmp) 469 { 470 RB_INIT(&hmp->rb_undo_root); 471 TAILQ_INIT(&hmp->undo_lru_list); 472 hmp->undo_alloc = 0; 473 } 474 475 /* 476 * Return how much of the undo FIFO has been used 477 * 478 * The calculation includes undo FIFO space still reserved from a previous 479 * flush (because it will still be run on recovery if a crash occurs and 480 * we can't overwrite it yet). 481 */ 482 int64_t 483 hammer_undo_used(hammer_transaction_t trans) 484 { 485 hammer_blockmap_t cundomap; 486 hammer_blockmap_t dundomap; 487 int64_t max_bytes __debugvar; 488 int64_t bytes; 489 490 cundomap = &trans->hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; 491 dundomap = &trans->rootvol->ondisk-> 492 vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; 493 494 if (dundomap->first_offset <= cundomap->next_offset) { 495 bytes = cundomap->next_offset - dundomap->first_offset; 496 } else { 497 bytes = cundomap->alloc_offset - dundomap->first_offset + 498 (cundomap->next_offset & HAMMER_OFF_LONG_MASK); 499 } 500 max_bytes = cundomap->alloc_offset & HAMMER_OFF_SHORT_MASK; 501 KKASSERT(bytes <= max_bytes); 502 return(bytes); 503 } 504 505 /* 506 * Return how much of the undo FIFO is available for new records. 507 */ 508 int64_t 509 hammer_undo_space(hammer_transaction_t trans) 510 { 511 hammer_blockmap_t rootmap; 512 int64_t max_bytes; 513 514 rootmap = &trans->hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; 515 max_bytes = rootmap->alloc_offset & HAMMER_OFF_SHORT_MASK; 516 return(max_bytes - hammer_undo_used(trans)); 517 } 518 519 int64_t 520 hammer_undo_max(hammer_mount_t hmp) 521 { 522 hammer_blockmap_t rootmap; 523 int64_t max_bytes; 524 525 rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; 526 max_bytes = rootmap->alloc_offset & HAMMER_OFF_SHORT_MASK; 527 528 return(max_bytes); 529 } 530 531 /* 532 * Returns 1 if the undo buffer should be reclaimed on release. The 533 * only undo buffer we do NOT want to reclaim is the one at the current 534 * append offset. 535 */ 536 int 537 hammer_undo_reclaim(hammer_io_t io) 538 { 539 hammer_blockmap_t undomap; 540 hammer_off_t next_offset; 541 542 undomap = &io->hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; 543 next_offset = undomap->next_offset & ~HAMMER_BUFMASK64; 544 if (HAMMER_ITOB(io)->zoneX_offset == next_offset) 545 return(0); 546 return(1); 547 } 548 549 static int 550 hammer_und_rb_compare(hammer_undo_t node1, hammer_undo_t node2) 551 { 552 if (node1->offset < node2->offset) 553 return(-1); 554 if (node1->offset > node2->offset) 555 return(1); 556 return(0); 557 } 558 559