1 /* Copyright (c) Mark Harmstone 2017 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 20 #define SCRUB_UNIT 0x100000 // 1 MB 21 22 struct _scrub_context; 23 24 typedef struct { 25 struct _scrub_context* context; 26 PIRP Irp; 27 uint64_t start; 28 uint32_t length; 29 IO_STATUS_BLOCK iosb; 30 uint8_t* buf; 31 bool csum_error; 32 uint32_t* bad_csums; 33 } scrub_context_stripe; 34 35 typedef struct _scrub_context { 36 KEVENT Event; 37 scrub_context_stripe* stripes; 38 LONG stripes_left; 39 } scrub_context; 40 41 typedef struct { 42 ANSI_STRING name; 43 bool orig_subvol; 44 LIST_ENTRY list_entry; 45 } path_part; 46 47 static void log_file_checksum_error(device_extension* Vcb, uint64_t addr, uint64_t devid, uint64_t subvol, uint64_t inode, uint64_t offset) { 48 LIST_ENTRY *le, parts; 49 root* r = NULL; 50 KEY searchkey; 51 traverse_ptr tp; 52 uint64_t dir; 53 bool orig_subvol = true, not_in_tree = false; 54 ANSI_STRING fn; 55 scrub_error* err; 56 NTSTATUS Status; 57 ULONG utf16len; 58 59 le = Vcb->roots.Flink; 60 while (le != &Vcb->roots) { 61 root* r2 = CONTAINING_RECORD(le, root, list_entry); 62 63 if (r2->id == subvol) { 64 r = r2; 65 break; 66 } 67 68 le = le->Flink; 69 } 70 71 if (!r) { 72 ERR("could not find subvol %I64x\n", subvol); 73 return; 74 } 75 76 InitializeListHead(&parts); 77 78 dir = inode; 79 80 while (true) { 81 if (dir == r->root_item.objid) { 82 if (r == Vcb->root_fileref->fcb->subvol) 83 break; 84 85 searchkey.obj_id = r->id; 86 searchkey.obj_type = TYPE_ROOT_BACKREF; 87 searchkey.offset = 0xffffffffffffffff; 88 89 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, NULL); 90 if (!NT_SUCCESS(Status)) { 91 ERR("find_item returned %08x\n", Status); 92 goto end; 93 } 94 95 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 96 ROOT_REF* rr = (ROOT_REF*)tp.item->data; 97 path_part* pp; 98 99 if (tp.item->size < sizeof(ROOT_REF)) { 100 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF)); 101 goto end; 102 } 103 104 if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) { 105 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 106 tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n); 107 goto end; 108 } 109 110 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 111 if (!pp) { 112 ERR("out of memory\n"); 113 goto end; 114 } 115 116 pp->name.Buffer = rr->name; 117 pp->name.Length = pp->name.MaximumLength = rr->n; 118 pp->orig_subvol = false; 119 120 InsertTailList(&parts, &pp->list_entry); 121 122 r = NULL; 123 124 le = Vcb->roots.Flink; 125 while (le != &Vcb->roots) { 126 root* r2 = CONTAINING_RECORD(le, root, list_entry); 127 128 if (r2->id == tp.item->key.offset) { 129 r = r2; 130 break; 131 } 132 133 le = le->Flink; 134 } 135 136 if (!r) { 137 ERR("could not find subvol %I64x\n", tp.item->key.offset); 138 goto end; 139 } 140 141 dir = rr->dir; 142 orig_subvol = false; 143 } else { 144 not_in_tree = true; 145 break; 146 } 147 } else { 148 searchkey.obj_id = dir; 149 searchkey.obj_type = TYPE_INODE_EXTREF; 150 searchkey.offset = 0xffffffffffffffff; 151 152 Status = find_item(Vcb, r, &tp, &searchkey, false, NULL); 153 if (!NT_SUCCESS(Status)) { 154 ERR("find_item returned %08x\n", Status); 155 goto end; 156 } 157 158 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_REF) { 159 INODE_REF* ir = (INODE_REF*)tp.item->data; 160 path_part* pp; 161 162 if (tp.item->size < sizeof(INODE_REF)) { 163 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF)); 164 goto end; 165 } 166 167 if (tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) { 168 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 169 tp.item->size, offsetof(INODE_REF, name[0]) + ir->n); 170 goto end; 171 } 172 173 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 174 if (!pp) { 175 ERR("out of memory\n"); 176 goto end; 177 } 178 179 pp->name.Buffer = ir->name; 180 pp->name.Length = pp->name.MaximumLength = ir->n; 181 pp->orig_subvol = orig_subvol; 182 183 InsertTailList(&parts, &pp->list_entry); 184 185 if (dir == tp.item->key.offset) 186 break; 187 188 dir = tp.item->key.offset; 189 } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_EXTREF) { 190 INODE_EXTREF* ier = (INODE_EXTREF*)tp.item->data; 191 path_part* pp; 192 193 if (tp.item->size < sizeof(INODE_EXTREF)) { 194 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 195 tp.item->size, sizeof(INODE_EXTREF)); 196 goto end; 197 } 198 199 if (tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) { 200 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 201 tp.item->size, offsetof(INODE_EXTREF, name[0]) + ier->n); 202 goto end; 203 } 204 205 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 206 if (!pp) { 207 ERR("out of memory\n"); 208 goto end; 209 } 210 211 pp->name.Buffer = ier->name; 212 pp->name.Length = pp->name.MaximumLength = ier->n; 213 pp->orig_subvol = orig_subvol; 214 215 InsertTailList(&parts, &pp->list_entry); 216 217 if (dir == ier->dir) 218 break; 219 220 dir = ier->dir; 221 } else { 222 ERR("could not find INODE_REF for inode %I64x in subvol %I64x\n", dir, r->id); 223 goto end; 224 } 225 } 226 } 227 228 fn.MaximumLength = 0; 229 230 if (not_in_tree) { 231 le = parts.Blink; 232 while (le != &parts) { 233 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 234 LIST_ENTRY* le2 = le->Blink; 235 236 if (pp->orig_subvol) 237 break; 238 239 RemoveTailList(&parts); 240 ExFreePool(pp); 241 242 le = le2; 243 } 244 } 245 246 le = parts.Flink; 247 while (le != &parts) { 248 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 249 250 fn.MaximumLength += pp->name.Length + 1; 251 252 le = le->Flink; 253 } 254 255 fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG); 256 if (!fn.Buffer) { 257 ERR("out of memory\n"); 258 goto end; 259 } 260 261 fn.Length = 0; 262 263 le = parts.Blink; 264 while (le != &parts) { 265 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 266 267 fn.Buffer[fn.Length] = '\\'; 268 fn.Length++; 269 270 RtlCopyMemory(&fn.Buffer[fn.Length], pp->name.Buffer, pp->name.Length); 271 fn.Length += pp->name.Length; 272 273 le = le->Blink; 274 } 275 276 if (not_in_tree) 277 ERR("subvol %I64x, %.*s, offset %I64x\n", subvol, fn.Length, fn.Buffer, offset); 278 else 279 ERR("%.*s, offset %I64x\n", fn.Length, fn.Buffer, offset); 280 281 Status = utf8_to_utf16(NULL, 0, &utf16len, fn.Buffer, fn.Length); 282 if (!NT_SUCCESS(Status)) { 283 ERR("utf8_to_utf16 1 returned %08x\n", Status); 284 ExFreePool(fn.Buffer); 285 goto end; 286 } 287 288 err = ExAllocatePoolWithTag(PagedPool, offsetof(scrub_error, data.filename[0]) + utf16len, ALLOC_TAG); 289 if (!err) { 290 ERR("out of memory\n"); 291 ExFreePool(fn.Buffer); 292 goto end; 293 } 294 295 err->address = addr; 296 err->device = devid; 297 err->recovered = false; 298 err->is_metadata = false; 299 err->parity = false; 300 301 err->data.subvol = not_in_tree ? subvol : 0; 302 err->data.offset = offset; 303 err->data.filename_length = (uint16_t)utf16len; 304 305 Status = utf8_to_utf16(err->data.filename, utf16len, &utf16len, fn.Buffer, fn.Length); 306 if (!NT_SUCCESS(Status)) { 307 ERR("utf8_to_utf16 2 returned %08x\n", Status); 308 ExFreePool(fn.Buffer); 309 ExFreePool(err); 310 goto end; 311 } 312 313 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 314 315 Vcb->scrub.num_errors++; 316 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 317 318 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 319 320 ExFreePool(fn.Buffer); 321 322 end: 323 while (!IsListEmpty(&parts)) { 324 path_part* pp = CONTAINING_RECORD(RemoveHeadList(&parts), path_part, list_entry); 325 326 ExFreePool(pp); 327 } 328 } 329 330 static void log_file_checksum_error_shared(device_extension* Vcb, uint64_t treeaddr, uint64_t addr, uint64_t devid, uint64_t extent) { 331 tree_header* tree; 332 NTSTATUS Status; 333 leaf_node* ln; 334 ULONG i; 335 336 tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); 337 if (!tree) { 338 ERR("out of memory\n"); 339 return; 340 } 341 342 Status = read_data(Vcb, treeaddr, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority); 343 if (!NT_SUCCESS(Status)) { 344 ERR("read_data returned %08x\n", Status); 345 goto end; 346 } 347 348 if (tree->level != 0) { 349 ERR("tree level was %x, expected 0\n", tree->level); 350 goto end; 351 } 352 353 ln = (leaf_node*)&tree[1]; 354 355 for (i = 0; i < tree->num_items; i++) { 356 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { 357 EXTENT_DATA* ed = (EXTENT_DATA*)((uint8_t*)tree + sizeof(tree_header) + ln[i].offset); 358 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 359 360 if (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0 && ed2->address == addr) 361 log_file_checksum_error(Vcb, addr, devid, tree->tree_id, ln[i].key.obj_id, ln[i].key.offset + addr - extent); 362 } 363 } 364 365 end: 366 ExFreePool(tree); 367 } 368 369 static void log_tree_checksum_error(device_extension* Vcb, uint64_t addr, uint64_t devid, uint64_t root, uint8_t level, KEY* firstitem) { 370 scrub_error* err; 371 372 err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG); 373 if (!err) { 374 ERR("out of memory\n"); 375 return; 376 } 377 378 err->address = addr; 379 err->device = devid; 380 err->recovered = false; 381 err->is_metadata = true; 382 err->parity = false; 383 384 err->metadata.root = root; 385 err->metadata.level = level; 386 387 if (firstitem) { 388 ERR("root %I64x, level %u, first item (%I64x,%x,%I64x)\n", root, level, firstitem->obj_id, 389 firstitem->obj_type, firstitem->offset); 390 391 err->metadata.firstitem = *firstitem; 392 } else { 393 ERR("root %I64x, level %u\n", root, level); 394 395 RtlZeroMemory(&err->metadata.firstitem, sizeof(KEY)); 396 } 397 398 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 399 400 Vcb->scrub.num_errors++; 401 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 402 403 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 404 } 405 406 static void log_tree_checksum_error_shared(device_extension* Vcb, uint64_t offset, uint64_t address, uint64_t devid) { 407 tree_header* tree; 408 NTSTATUS Status; 409 internal_node* in; 410 ULONG i; 411 412 tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); 413 if (!tree) { 414 ERR("out of memory\n"); 415 return; 416 } 417 418 Status = read_data(Vcb, offset, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority); 419 if (!NT_SUCCESS(Status)) { 420 ERR("read_data returned %08x\n", Status); 421 goto end; 422 } 423 424 if (tree->level == 0) { 425 ERR("tree level was 0\n"); 426 goto end; 427 } 428 429 in = (internal_node*)&tree[1]; 430 431 for (i = 0; i < tree->num_items; i++) { 432 if (in[i].address == address) { 433 log_tree_checksum_error(Vcb, address, devid, tree->tree_id, tree->level - 1, &in[i].key); 434 break; 435 } 436 } 437 438 end: 439 ExFreePool(tree); 440 } 441 442 static void log_unrecoverable_error(device_extension* Vcb, uint64_t address, uint64_t devid) { 443 KEY searchkey; 444 traverse_ptr tp; 445 NTSTATUS Status; 446 EXTENT_ITEM* ei; 447 EXTENT_ITEM2* ei2 = NULL; 448 uint8_t* ptr; 449 ULONG len; 450 uint64_t rc; 451 452 // FIXME - still log even if rest of this function fails 453 454 searchkey.obj_id = address; 455 searchkey.obj_type = TYPE_METADATA_ITEM; 456 searchkey.offset = 0xffffffffffffffff; 457 458 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 459 if (!NT_SUCCESS(Status)) { 460 ERR("find_item returned %08x\n", Status); 461 return; 462 } 463 464 if ((tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM) || 465 tp.item->key.obj_id >= address + Vcb->superblock.sector_size || 466 (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.obj_id + tp.item->key.offset <= address) || 467 (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->key.obj_id + Vcb->superblock.node_size <= address) 468 ) 469 return; 470 471 if (tp.item->size < sizeof(EXTENT_ITEM)) { 472 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 473 return; 474 } 475 476 ei = (EXTENT_ITEM*)tp.item->data; 477 ptr = (uint8_t*)&ei[1]; 478 len = tp.item->size - sizeof(EXTENT_ITEM); 479 480 if (tp.item->key.obj_id == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) { 481 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) { 482 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 483 tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)); 484 return; 485 } 486 487 ei2 = (EXTENT_ITEM2*)ptr; 488 489 ptr += sizeof(EXTENT_ITEM2); 490 len -= sizeof(EXTENT_ITEM2); 491 } 492 493 rc = 0; 494 495 while (len > 0) { 496 uint8_t type = *ptr; 497 498 ptr++; 499 len--; 500 501 if (type == TYPE_TREE_BLOCK_REF) { 502 TREE_BLOCK_REF* tbr; 503 504 if (len < sizeof(TREE_BLOCK_REF)) { 505 ERR("TREE_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(TREE_BLOCK_REF), len); 506 break; 507 } 508 509 tbr = (TREE_BLOCK_REF*)ptr; 510 511 log_tree_checksum_error(Vcb, address, devid, tbr->offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL); 512 513 rc++; 514 515 ptr += sizeof(TREE_BLOCK_REF); 516 len -= sizeof(TREE_BLOCK_REF); 517 } else if (type == TYPE_EXTENT_DATA_REF) { 518 EXTENT_DATA_REF* edr; 519 520 if (len < sizeof(EXTENT_DATA_REF)) { 521 ERR("EXTENT_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(EXTENT_DATA_REF), len); 522 break; 523 } 524 525 edr = (EXTENT_DATA_REF*)ptr; 526 527 log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id); 528 529 rc += edr->count; 530 531 ptr += sizeof(EXTENT_DATA_REF); 532 len -= sizeof(EXTENT_DATA_REF); 533 } else if (type == TYPE_SHARED_BLOCK_REF) { 534 SHARED_BLOCK_REF* sbr; 535 536 if (len < sizeof(SHARED_BLOCK_REF)) { 537 ERR("SHARED_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_BLOCK_REF), len); 538 break; 539 } 540 541 sbr = (SHARED_BLOCK_REF*)ptr; 542 543 log_tree_checksum_error_shared(Vcb, sbr->offset, address, devid); 544 545 rc++; 546 547 ptr += sizeof(SHARED_BLOCK_REF); 548 len -= sizeof(SHARED_BLOCK_REF); 549 } else if (type == TYPE_SHARED_DATA_REF) { 550 SHARED_DATA_REF* sdr; 551 552 if (len < sizeof(SHARED_DATA_REF)) { 553 ERR("SHARED_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_DATA_REF), len); 554 break; 555 } 556 557 sdr = (SHARED_DATA_REF*)ptr; 558 559 log_file_checksum_error_shared(Vcb, sdr->offset, address, devid, tp.item->key.obj_id); 560 561 rc += sdr->count; 562 563 ptr += sizeof(SHARED_DATA_REF); 564 len -= sizeof(SHARED_DATA_REF); 565 } else { 566 ERR("unknown extent type %x\n", type); 567 break; 568 } 569 } 570 571 if (rc < ei->refcount) { 572 do { 573 traverse_ptr next_tp; 574 575 if (find_next_item(Vcb, &tp, &next_tp, false, NULL)) 576 tp = next_tp; 577 else 578 break; 579 580 if (tp.item->key.obj_id == address) { 581 if (tp.item->key.obj_type == TYPE_TREE_BLOCK_REF) 582 log_tree_checksum_error(Vcb, address, devid, tp.item->key.offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL); 583 else if (tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) { 584 EXTENT_DATA_REF* edr; 585 586 if (tp.item->size < sizeof(EXTENT_DATA_REF)) { 587 ERR("(%I64x,%x,%I64x) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 588 tp.item->size, sizeof(EXTENT_DATA_REF)); 589 break; 590 } 591 592 edr = (EXTENT_DATA_REF*)tp.item->data; 593 594 log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id); 595 } else if (tp.item->key.obj_type == TYPE_SHARED_BLOCK_REF) 596 log_tree_checksum_error_shared(Vcb, tp.item->key.offset, address, devid); 597 else if (tp.item->key.obj_type == TYPE_SHARED_DATA_REF) 598 log_file_checksum_error_shared(Vcb, tp.item->key.offset, address, devid, tp.item->key.obj_id); 599 } else 600 break; 601 } while (true); 602 } 603 } 604 605 static void log_error(device_extension* Vcb, uint64_t addr, uint64_t devid, bool metadata, bool recoverable, bool parity) { 606 if (recoverable) { 607 scrub_error* err; 608 609 if (parity) { 610 ERR("recovering from parity error at %I64x on device %I64x\n", addr, devid); 611 } else { 612 if (metadata) 613 ERR("recovering from metadata checksum error at %I64x on device %I64x\n", addr, devid); 614 else 615 ERR("recovering from data checksum error at %I64x on device %I64x\n", addr, devid); 616 } 617 618 err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG); 619 if (!err) { 620 ERR("out of memory\n"); 621 return; 622 } 623 624 err->address = addr; 625 err->device = devid; 626 err->recovered = true; 627 err->is_metadata = metadata; 628 err->parity = parity; 629 630 if (metadata) 631 RtlZeroMemory(&err->metadata, sizeof(err->metadata)); 632 else 633 RtlZeroMemory(&err->data, sizeof(err->data)); 634 635 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 636 637 Vcb->scrub.num_errors++; 638 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 639 640 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 641 } else { 642 if (metadata) 643 ERR("unrecoverable metadata checksum error at %I64x\n", addr); 644 else 645 ERR("unrecoverable data checksum error at %I64x\n", addr); 646 647 log_unrecoverable_error(Vcb, addr, devid); 648 } 649 } 650 651 _Function_class_(IO_COMPLETION_ROUTINE) 652 static NTSTATUS __stdcall scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 653 scrub_context_stripe* stripe = conptr; 654 scrub_context* context = (scrub_context*)stripe->context; 655 ULONG left = InterlockedDecrement(&context->stripes_left); 656 657 UNUSED(DeviceObject); 658 659 stripe->iosb = Irp->IoStatus; 660 661 if (left == 0) 662 KeSetEvent(&context->Event, 0, false); 663 664 return STATUS_MORE_PROCESSING_REQUIRED; 665 } 666 667 static NTSTATUS scrub_extent_dup(device_extension* Vcb, chunk* c, uint64_t offset, uint32_t* csum, scrub_context* context) { 668 NTSTATUS Status; 669 bool csum_error = false; 670 ULONG i; 671 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 672 uint16_t present_devices = 0; 673 674 if (csum) { 675 ULONG good_stripe = 0xffffffff; 676 677 for (i = 0; i < c->chunk_item->num_stripes; i++) { 678 if (c->devices[i]->devobj) { 679 present_devices++; 680 681 // if first stripe is okay, we only need to check that the others are identical to it 682 if (good_stripe != 0xffffffff) { 683 if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf, 684 context->stripes[good_stripe].length) != context->stripes[i].length) { 685 context->stripes[i].csum_error = true; 686 csum_error = true; 687 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 688 } 689 } else { 690 Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, csum); 691 if (Status == STATUS_CRC_ERROR) { 692 context->stripes[i].csum_error = true; 693 csum_error = true; 694 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 695 } else if (!NT_SUCCESS(Status)) { 696 ERR("check_csum returned %08x\n", Status); 697 return Status; 698 } else 699 good_stripe = i; 700 } 701 } 702 } 703 } else { 704 ULONG good_stripe = 0xffffffff; 705 706 for (i = 0; i < c->chunk_item->num_stripes; i++) { 707 ULONG j; 708 709 if (c->devices[i]->devobj) { 710 // if first stripe is okay, we only need to check that the others are identical to it 711 if (good_stripe != 0xffffffff) { 712 if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf, 713 context->stripes[good_stripe].length) != context->stripes[i].length) { 714 context->stripes[i].csum_error = true; 715 csum_error = true; 716 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 717 } 718 } else { 719 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 720 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 721 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 722 723 if (crc32 != *((uint32_t*)th->csum) || th->address != offset + UInt32x32To64(j, Vcb->superblock.node_size)) { 724 context->stripes[i].csum_error = true; 725 csum_error = true; 726 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 727 } 728 } 729 730 if (!context->stripes[i].csum_error) 731 good_stripe = i; 732 } 733 } 734 } 735 } 736 737 if (!csum_error) 738 return STATUS_SUCCESS; 739 740 // handle checksum error 741 742 for (i = 0; i < c->chunk_item->num_stripes; i++) { 743 if (context->stripes[i].csum_error) { 744 if (csum) { 745 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(uint32_t) / Vcb->superblock.sector_size, ALLOC_TAG); 746 if (!context->stripes[i].bad_csums) { 747 ERR("out of memory\n"); 748 return STATUS_INSUFFICIENT_RESOURCES; 749 } 750 751 Status = calc_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, context->stripes[i].bad_csums); 752 if (!NT_SUCCESS(Status)) { 753 ERR("calc_csum returned %08x\n", Status); 754 return Status; 755 } 756 } else { 757 ULONG j; 758 759 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(uint32_t) / Vcb->superblock.node_size, ALLOC_TAG); 760 if (!context->stripes[i].bad_csums) { 761 ERR("out of memory\n"); 762 return STATUS_INSUFFICIENT_RESOURCES; 763 } 764 765 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 766 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 767 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 768 769 context->stripes[i].bad_csums[j] = crc32; 770 } 771 } 772 } 773 } 774 775 if (present_devices > 1) { 776 ULONG good_stripe = 0xffffffff; 777 778 for (i = 0; i < c->chunk_item->num_stripes; i++) { 779 if (c->devices[i]->devobj && !context->stripes[i].csum_error) { 780 good_stripe = i; 781 break; 782 } 783 } 784 785 if (good_stripe != 0xffffffff) { 786 // log 787 788 for (i = 0; i < c->chunk_item->num_stripes; i++) { 789 if (context->stripes[i].csum_error) { 790 ULONG j; 791 792 if (csum) { 793 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { 794 if (context->stripes[i].bad_csums[j] != csum[j]) { 795 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); 796 797 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false); 798 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 799 } 800 } 801 } else { 802 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 803 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 804 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 805 806 if (context->stripes[i].bad_csums[j] != *((uint32_t*)th->csum) || th->address != addr) { 807 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false); 808 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 809 } 810 } 811 } 812 } 813 } 814 815 // write good data over bad 816 817 for (i = 0; i < c->chunk_item->num_stripes; i++) { 818 if (context->stripes[i].csum_error && !c->devices[i]->readonly) { 819 Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset, 820 context->stripes[good_stripe].buf, context->stripes[i].length); 821 822 if (!NT_SUCCESS(Status)) { 823 ERR("write_data_phys returned %08x\n", Status); 824 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS); 825 return Status; 826 } 827 } 828 } 829 830 return STATUS_SUCCESS; 831 } 832 833 // if csum errors on all stripes, check sector by sector 834 835 for (i = 0; i < c->chunk_item->num_stripes; i++) { 836 ULONG j; 837 838 if (c->devices[i]->devobj) { 839 if (csum) { 840 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { 841 if (context->stripes[i].bad_csums[j] != csum[j]) { 842 ULONG k; 843 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); 844 bool recovered = false; 845 846 for (k = 0; k < c->chunk_item->num_stripes; k++) { 847 if (i != k && c->devices[k]->devobj && context->stripes[k].bad_csums[j] == csum[j]) { 848 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false); 849 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 850 851 RtlCopyMemory(context->stripes[i].buf + (j * Vcb->superblock.sector_size), 852 context->stripes[k].buf + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 853 854 recovered = true; 855 break; 856 } 857 } 858 859 if (!recovered) { 860 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false); 861 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 862 } 863 } 864 } 865 } else { 866 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 867 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 868 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 869 870 if (context->stripes[i].bad_csums[j] != *((uint32_t*)th->csum) || th->address != addr) { 871 ULONG k; 872 bool recovered = false; 873 874 for (k = 0; k < c->chunk_item->num_stripes; k++) { 875 if (i != k && c->devices[k]->devobj) { 876 tree_header* th2 = (tree_header*)&context->stripes[k].buf[j * Vcb->superblock.node_size]; 877 878 if (context->stripes[k].bad_csums[j] == *((uint32_t*)th2->csum) && th2->address == addr) { 879 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false); 880 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 881 882 RtlCopyMemory(th, th2, Vcb->superblock.node_size); 883 884 recovered = true; 885 break; 886 } 887 } 888 } 889 890 if (!recovered) { 891 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false); 892 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 893 } 894 } 895 } 896 } 897 } 898 } 899 900 // write good data over bad 901 902 for (i = 0; i < c->chunk_item->num_stripes; i++) { 903 if (c->devices[i]->devobj && !c->devices[i]->readonly) { 904 Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset, 905 context->stripes[i].buf, context->stripes[i].length); 906 if (!NT_SUCCESS(Status)) { 907 ERR("write_data_phys returned %08x\n", Status); 908 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 909 return Status; 910 } 911 } 912 } 913 914 return STATUS_SUCCESS; 915 } 916 917 for (i = 0; i < c->chunk_item->num_stripes; i++) { 918 if (c->devices[i]->devobj) { 919 ULONG j; 920 921 if (csum) { 922 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { 923 if (context->stripes[i].bad_csums[j] != csum[j]) { 924 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); 925 926 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false); 927 } 928 } 929 } else { 930 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 931 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 932 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 933 934 if (context->stripes[i].bad_csums[j] != *((uint32_t*)th->csum) || th->address != addr) 935 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false); 936 } 937 } 938 } 939 } 940 941 return STATUS_SUCCESS; 942 } 943 944 static NTSTATUS scrub_extent_raid0(device_extension* Vcb, chunk* c, uint64_t offset, uint32_t length, uint16_t startoffstripe, uint32_t* csum, scrub_context* context) { 945 ULONG j; 946 uint16_t stripe; 947 uint32_t pos, *stripeoff; 948 949 pos = 0; 950 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes, ALLOC_TAG); 951 if (!stripeoff) { 952 ERR("out of memory\n"); 953 return STATUS_INSUFFICIENT_RESOURCES; 954 } 955 956 RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes); 957 958 stripe = startoffstripe; 959 while (pos < length) { 960 uint32_t readlen; 961 962 if (pos == 0) 963 readlen = (uint32_t)min(context->stripes[stripe].length, c->chunk_item->stripe_length - (context->stripes[stripe].start % c->chunk_item->stripe_length)); 964 else 965 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 966 967 if (csum) { 968 for (j = 0; j < readlen; j += Vcb->superblock.sector_size) { 969 uint32_t crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + stripeoff[stripe], Vcb->superblock.sector_size); 970 971 if (crc32 != csum[pos / Vcb->superblock.sector_size]) { 972 uint64_t addr = offset + pos; 973 974 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false); 975 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 976 } 977 978 pos += Vcb->superblock.sector_size; 979 stripeoff[stripe] += Vcb->superblock.sector_size; 980 } 981 } else { 982 for (j = 0; j < readlen; j += Vcb->superblock.node_size) { 983 tree_header* th = (tree_header*)(context->stripes[stripe].buf + stripeoff[stripe]); 984 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 985 uint64_t addr = offset + pos; 986 987 if (crc32 != *((uint32_t*)th->csum) || th->address != addr) { 988 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false); 989 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 990 } 991 992 pos += Vcb->superblock.node_size; 993 stripeoff[stripe] += Vcb->superblock.node_size; 994 } 995 } 996 997 stripe = (stripe + 1) % c->chunk_item->num_stripes; 998 } 999 1000 ExFreePool(stripeoff); 1001 1002 return STATUS_SUCCESS; 1003 } 1004 1005 static NTSTATUS scrub_extent_raid10(device_extension* Vcb, chunk* c, uint64_t offset, uint32_t length, uint16_t startoffstripe, uint32_t* csum, scrub_context* context) { 1006 ULONG j; 1007 uint16_t stripe, sub_stripes = max(c->chunk_item->sub_stripes, 1); 1008 uint32_t pos, *stripeoff; 1009 bool csum_error = false; 1010 NTSTATUS Status; 1011 1012 pos = 0; 1013 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes, ALLOC_TAG); 1014 if (!stripeoff) { 1015 ERR("out of memory\n"); 1016 return STATUS_INSUFFICIENT_RESOURCES; 1017 } 1018 1019 RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes); 1020 1021 stripe = startoffstripe; 1022 while (pos < length) { 1023 uint32_t readlen; 1024 1025 if (pos == 0) 1026 readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length, 1027 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1028 else 1029 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 1030 1031 if (csum) { 1032 ULONG good_stripe = 0xffffffff; 1033 uint16_t k; 1034 1035 for (k = 0; k < sub_stripes; k++) { 1036 if (c->devices[(stripe * sub_stripes) + k]->devobj) { 1037 // if first stripe is okay, we only need to check that the others are identical to it 1038 if (good_stripe != 0xffffffff) { 1039 if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe], 1040 context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe], 1041 readlen) != readlen) { 1042 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1043 csum_error = true; 1044 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1045 } 1046 } else { 1047 for (j = 0; j < readlen; j += Vcb->superblock.sector_size) { 1048 uint32_t crc32 = ~calc_crc32c(0xffffffff, context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j, Vcb->superblock.sector_size); 1049 1050 if (crc32 != csum[(pos + j) / Vcb->superblock.sector_size]) { 1051 csum_error = true; 1052 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1053 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1054 break; 1055 } 1056 } 1057 1058 if (!context->stripes[(stripe * sub_stripes) + k].csum_error) 1059 good_stripe = k; 1060 } 1061 } 1062 } 1063 1064 pos += readlen; 1065 stripeoff[stripe] += readlen; 1066 } else { 1067 ULONG good_stripe = 0xffffffff; 1068 uint16_t k; 1069 1070 for (k = 0; k < sub_stripes; k++) { 1071 if (c->devices[(stripe * sub_stripes) + k]->devobj) { 1072 // if first stripe is okay, we only need to check that the others are identical to it 1073 if (good_stripe != 0xffffffff) { 1074 if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe], 1075 context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe], 1076 readlen) != readlen) { 1077 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1078 csum_error = true; 1079 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1080 } 1081 } else { 1082 for (j = 0; j < readlen; j += Vcb->superblock.node_size) { 1083 tree_header* th = (tree_header*)(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j); 1084 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1085 uint64_t addr = offset + pos + j; 1086 1087 if (crc32 != *((uint32_t*)th->csum) || th->address != addr) { 1088 csum_error = true; 1089 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1090 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1091 break; 1092 } 1093 } 1094 1095 if (!context->stripes[(stripe * sub_stripes) + k].csum_error) 1096 good_stripe = k; 1097 } 1098 } 1099 } 1100 1101 pos += readlen; 1102 stripeoff[stripe] += readlen; 1103 } 1104 1105 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1106 } 1107 1108 if (!csum_error) { 1109 Status = STATUS_SUCCESS; 1110 goto end; 1111 } 1112 1113 for (j = 0; j < c->chunk_item->num_stripes; j += sub_stripes) { 1114 ULONG goodstripe = 0xffffffff; 1115 uint16_t k; 1116 bool hasbadstripe = false; 1117 1118 if (context->stripes[j].length == 0) 1119 continue; 1120 1121 for (k = 0; k < sub_stripes; k++) { 1122 if (c->devices[j + k]->devobj) { 1123 if (!context->stripes[j + k].csum_error) 1124 goodstripe = k; 1125 else 1126 hasbadstripe = true; 1127 } 1128 } 1129 1130 if (hasbadstripe) { 1131 if (goodstripe != 0xffffffff) { 1132 for (k = 0; k < sub_stripes; k++) { 1133 if (c->devices[j + k]->devobj && context->stripes[j + k].csum_error) { 1134 uint32_t so = 0; 1135 bool recovered = false; 1136 1137 pos = 0; 1138 1139 stripe = startoffstripe; 1140 while (pos < length) { 1141 uint32_t readlen; 1142 1143 if (pos == 0) 1144 readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length, 1145 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1146 else 1147 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 1148 1149 if (stripe == j / sub_stripes) { 1150 if (csum) { 1151 ULONG l; 1152 1153 for (l = 0; l < readlen; l += Vcb->superblock.sector_size) { 1154 if (RtlCompareMemory(context->stripes[j + k].buf + so, 1155 context->stripes[j + goodstripe].buf + so, 1156 Vcb->superblock.sector_size) != Vcb->superblock.sector_size) { 1157 uint64_t addr = offset + pos; 1158 1159 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false); 1160 1161 recovered = true; 1162 } 1163 1164 pos += Vcb->superblock.sector_size; 1165 so += Vcb->superblock.sector_size; 1166 } 1167 } else { 1168 ULONG l; 1169 1170 for (l = 0; l < readlen; l += Vcb->superblock.node_size) { 1171 if (RtlCompareMemory(context->stripes[j + k].buf + so, 1172 context->stripes[j + goodstripe].buf + so, 1173 Vcb->superblock.node_size) != Vcb->superblock.node_size) { 1174 uint64_t addr = offset + pos; 1175 1176 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false); 1177 1178 recovered = true; 1179 } 1180 1181 pos += Vcb->superblock.node_size; 1182 so += Vcb->superblock.node_size; 1183 } 1184 } 1185 } else 1186 pos += readlen; 1187 1188 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1189 } 1190 1191 if (recovered) { 1192 // write good data over bad 1193 1194 if (!c->devices[j + k]->readonly) { 1195 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1196 1197 Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset, 1198 context->stripes[j + goodstripe].buf, context->stripes[j + goodstripe].length); 1199 1200 if (!NT_SUCCESS(Status)) { 1201 ERR("write_data_phys returned %08x\n", Status); 1202 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS); 1203 goto end; 1204 } 1205 } 1206 } 1207 } 1208 } 1209 } else { 1210 uint32_t so = 0; 1211 bool recovered = false; 1212 1213 if (csum) { 1214 for (k = 0; k < sub_stripes; k++) { 1215 if (c->devices[j + k]->devobj) { 1216 context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(uint32_t) / Vcb->superblock.sector_size, ALLOC_TAG); 1217 if (!context->stripes[j + k].bad_csums) { 1218 ERR("out of memory\n"); 1219 Status = STATUS_INSUFFICIENT_RESOURCES; 1220 goto end; 1221 } 1222 1223 Status = calc_csum(Vcb, context->stripes[j + k].buf, context->stripes[j + k].length / Vcb->superblock.sector_size, context->stripes[j + k].bad_csums); 1224 if (!NT_SUCCESS(Status)) { 1225 ERR("calc_csum returned %08x\n", Status); 1226 goto end; 1227 } 1228 } 1229 } 1230 } else { 1231 for (k = 0; k < sub_stripes; k++) { 1232 if (c->devices[j + k]->devobj) { 1233 ULONG l; 1234 1235 context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(uint32_t) / Vcb->superblock.node_size, ALLOC_TAG); 1236 if (!context->stripes[j + k].bad_csums) { 1237 ERR("out of memory\n"); 1238 Status = STATUS_INSUFFICIENT_RESOURCES; 1239 goto end; 1240 } 1241 1242 for (l = 0; l < context->stripes[j + k].length / Vcb->superblock.node_size; l++) { 1243 tree_header* th = (tree_header*)&context->stripes[j + k].buf[l * Vcb->superblock.node_size]; 1244 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1245 1246 context->stripes[j + k].bad_csums[l] = crc32; 1247 } 1248 } 1249 } 1250 } 1251 1252 pos = 0; 1253 1254 stripe = startoffstripe; 1255 while (pos < length) { 1256 uint32_t readlen; 1257 1258 if (pos == 0) 1259 readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length, 1260 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1261 else 1262 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 1263 1264 if (stripe == j / sub_stripes) { 1265 ULONG l; 1266 1267 if (csum) { 1268 for (l = 0; l < readlen; l += Vcb->superblock.sector_size) { 1269 uint32_t crc32 = csum[pos / Vcb->superblock.sector_size]; 1270 bool has_error = false; 1271 1272 goodstripe = 0xffffffff; 1273 for (k = 0; k < sub_stripes; k++) { 1274 if (c->devices[j + k]->devobj) { 1275 if (context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32) 1276 has_error = true; 1277 else 1278 goodstripe = k; 1279 } 1280 } 1281 1282 if (has_error) { 1283 if (goodstripe != 0xffffffff) { 1284 for (k = 0; k < sub_stripes; k++) { 1285 if (c->devices[j + k]->devobj && context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32) { 1286 uint64_t addr = offset + pos; 1287 1288 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false); 1289 1290 recovered = true; 1291 1292 RtlCopyMemory(context->stripes[j + k].buf + so, context->stripes[j + goodstripe].buf + so, 1293 Vcb->superblock.sector_size); 1294 } 1295 } 1296 } else { 1297 uint64_t addr = offset + pos; 1298 1299 for (k = 0; k < sub_stripes; k++) { 1300 if (c->devices[j + j]->devobj) { 1301 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, false, false); 1302 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1303 } 1304 } 1305 } 1306 } 1307 1308 pos += Vcb->superblock.sector_size; 1309 so += Vcb->superblock.sector_size; 1310 } 1311 } else { 1312 for (l = 0; l < readlen; l += Vcb->superblock.node_size) { 1313 for (k = 0; k < sub_stripes; k++) { 1314 if (c->devices[j + k]->devobj) { 1315 tree_header* th = (tree_header*)&context->stripes[j + k].buf[so]; 1316 uint64_t addr = offset + pos; 1317 1318 if (context->stripes[j + k].bad_csums[so / Vcb->superblock.node_size] != *((uint32_t*)th->csum) || th->address != addr) { 1319 ULONG m; 1320 1321 recovered = false; 1322 1323 for (m = 0; m < sub_stripes; m++) { 1324 if (m != k) { 1325 tree_header* th2 = (tree_header*)&context->stripes[j + m].buf[so]; 1326 1327 if (context->stripes[j + m].bad_csums[so / Vcb->superblock.node_size] == *((uint32_t*)th2->csum) && th2->address == addr) { 1328 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false); 1329 1330 RtlCopyMemory(th, th2, Vcb->superblock.node_size); 1331 1332 recovered = true; 1333 break; 1334 } else 1335 log_device_error(Vcb, c->devices[j + m], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1336 } 1337 } 1338 1339 if (!recovered) 1340 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, false, false); 1341 } 1342 } 1343 } 1344 1345 pos += Vcb->superblock.node_size; 1346 so += Vcb->superblock.node_size; 1347 } 1348 } 1349 } else 1350 pos += readlen; 1351 1352 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1353 } 1354 1355 if (recovered) { 1356 // write good data over bad 1357 1358 for (k = 0; k < sub_stripes; k++) { 1359 if (c->devices[j + k]->devobj && !c->devices[j + k]->readonly) { 1360 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1361 1362 Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset, 1363 context->stripes[j + k].buf, context->stripes[j + k].length); 1364 1365 if (!NT_SUCCESS(Status)) { 1366 ERR("write_data_phys returned %08x\n", Status); 1367 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS); 1368 goto end; 1369 } 1370 } 1371 } 1372 } 1373 } 1374 } 1375 } 1376 1377 Status = STATUS_SUCCESS; 1378 1379 end: 1380 ExFreePool(stripeoff); 1381 1382 return Status; 1383 } 1384 1385 static NTSTATUS scrub_extent(device_extension* Vcb, chunk* c, ULONG type, uint64_t offset, uint32_t size, uint32_t* csum) { 1386 ULONG i; 1387 scrub_context context; 1388 CHUNK_ITEM_STRIPE* cis; 1389 NTSTATUS Status; 1390 uint16_t startoffstripe, num_missing, allowed_missing; 1391 1392 TRACE("(%p, %p, %I64x, %I64x, %p)\n", Vcb, c, offset, size, csum); 1393 1394 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes, ALLOC_TAG); 1395 if (!context.stripes) { 1396 ERR("out of memory\n"); 1397 Status = STATUS_INSUFFICIENT_RESOURCES; 1398 goto end; 1399 } 1400 1401 RtlZeroMemory(context.stripes, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes); 1402 1403 context.stripes_left = 0; 1404 1405 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1406 1407 if (type == BLOCK_FLAG_RAID0) { 1408 uint64_t startoff, endoff; 1409 uint16_t endoffstripe; 1410 1411 get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe); 1412 get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe); 1413 1414 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1415 if (startoffstripe > i) 1416 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 1417 else if (startoffstripe == i) 1418 context.stripes[i].start = startoff; 1419 else 1420 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); 1421 1422 if (endoffstripe > i) 1423 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start); 1424 else if (endoffstripe == i) 1425 context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start); 1426 else 1427 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start); 1428 } 1429 1430 allowed_missing = 0; 1431 } else if (type == BLOCK_FLAG_RAID10) { 1432 uint64_t startoff, endoff; 1433 uint16_t endoffstripe, j, sub_stripes = max(c->chunk_item->sub_stripes, 1); 1434 1435 get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe); 1436 get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe); 1437 1438 if ((c->chunk_item->num_stripes % sub_stripes) != 0) { 1439 ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", c->offset, c->chunk_item->num_stripes, sub_stripes); 1440 Status = STATUS_INTERNAL_ERROR; 1441 goto end; 1442 } 1443 1444 startoffstripe *= sub_stripes; 1445 endoffstripe *= sub_stripes; 1446 1447 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) { 1448 if (startoffstripe > i) 1449 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 1450 else if (startoffstripe == i) 1451 context.stripes[i].start = startoff; 1452 else 1453 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); 1454 1455 if (endoffstripe > i) 1456 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start); 1457 else if (endoffstripe == i) 1458 context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start); 1459 else 1460 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start); 1461 1462 for (j = 1; j < sub_stripes; j++) { 1463 context.stripes[i+j].start = context.stripes[i].start; 1464 context.stripes[i+j].length = context.stripes[i].length; 1465 } 1466 } 1467 1468 startoffstripe /= sub_stripes; 1469 allowed_missing = 1; 1470 } else 1471 allowed_missing = c->chunk_item->num_stripes - 1; 1472 1473 num_missing = 0; 1474 1475 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1476 PIO_STACK_LOCATION IrpSp; 1477 1478 context.stripes[i].context = (struct _scrub_context*)&context; 1479 1480 if (type == BLOCK_FLAG_DUPLICATE) { 1481 context.stripes[i].start = offset - c->offset; 1482 context.stripes[i].length = size; 1483 } else if (type != BLOCK_FLAG_RAID0 && type != BLOCK_FLAG_RAID10) { 1484 ERR("unexpected chunk type %x\n", type); 1485 Status = STATUS_INTERNAL_ERROR; 1486 goto end; 1487 } 1488 1489 if (!c->devices[i]->devobj) { 1490 num_missing++; 1491 1492 if (num_missing > allowed_missing) { 1493 ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing, allowed_missing); 1494 Status = STATUS_INTERNAL_ERROR; 1495 goto end; 1496 } 1497 } else if (context.stripes[i].length > 0) { 1498 context.stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG); 1499 1500 if (!context.stripes[i].buf) { 1501 ERR("out of memory\n"); 1502 Status = STATUS_INSUFFICIENT_RESOURCES; 1503 goto end; 1504 } 1505 1506 context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false); 1507 1508 if (!context.stripes[i].Irp) { 1509 ERR("IoAllocateIrp failed\n"); 1510 Status = STATUS_INSUFFICIENT_RESOURCES; 1511 goto end; 1512 } 1513 1514 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 1515 IrpSp->MajorFunction = IRP_MJ_READ; 1516 IrpSp->FileObject = c->devices[i]->fileobj; 1517 1518 if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) { 1519 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG); 1520 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 1521 ERR("out of memory\n"); 1522 Status = STATUS_INSUFFICIENT_RESOURCES; 1523 goto end; 1524 } 1525 1526 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 1527 1528 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 1529 } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) { 1530 context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, context.stripes[i].length, false, false, NULL); 1531 if (!context.stripes[i].Irp->MdlAddress) { 1532 ERR("IoAllocateMdl failed\n"); 1533 Status = STATUS_INSUFFICIENT_RESOURCES; 1534 goto end; 1535 } 1536 1537 Status = STATUS_SUCCESS; 1538 1539 _SEH2_TRY { 1540 MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); 1541 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1542 Status = _SEH2_GetExceptionCode(); 1543 } _SEH2_END; 1544 1545 if (!NT_SUCCESS(Status)) { 1546 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1547 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 1548 context.stripes[i].Irp->MdlAddress = NULL; 1549 goto end; 1550 } 1551 } else 1552 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 1553 1554 IrpSp->Parameters.Read.Length = context.stripes[i].length; 1555 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].start + cis[i].offset; 1556 1557 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 1558 1559 IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion, &context.stripes[i], true, true, true); 1560 1561 context.stripes_left++; 1562 1563 Vcb->scrub.data_scrubbed += context.stripes[i].length; 1564 } 1565 } 1566 1567 if (context.stripes_left == 0) { 1568 ERR("error - not reading any stripes\n"); 1569 Status = STATUS_INTERNAL_ERROR; 1570 goto end; 1571 } 1572 1573 KeInitializeEvent(&context.Event, NotificationEvent, false); 1574 1575 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1576 if (c->devices[i]->devobj && context.stripes[i].length > 0) 1577 IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp); 1578 } 1579 1580 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 1581 1582 // return an error if any of the stripes returned an error 1583 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1584 if (!NT_SUCCESS(context.stripes[i].iosb.Status)) { 1585 Status = context.stripes[i].iosb.Status; 1586 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS); 1587 goto end; 1588 } 1589 } 1590 1591 if (type == BLOCK_FLAG_DUPLICATE) { 1592 Status = scrub_extent_dup(Vcb, c, offset, csum, &context); 1593 if (!NT_SUCCESS(Status)) { 1594 ERR("scrub_extent_dup returned %08x\n", Status); 1595 goto end; 1596 } 1597 } else if (type == BLOCK_FLAG_RAID0) { 1598 Status = scrub_extent_raid0(Vcb, c, offset, size, startoffstripe, csum, &context); 1599 if (!NT_SUCCESS(Status)) { 1600 ERR("scrub_extent_raid0 returned %08x\n", Status); 1601 goto end; 1602 } 1603 } else if (type == BLOCK_FLAG_RAID10) { 1604 Status = scrub_extent_raid10(Vcb, c, offset, size, startoffstripe, csum, &context); 1605 if (!NT_SUCCESS(Status)) { 1606 ERR("scrub_extent_raid10 returned %08x\n", Status); 1607 goto end; 1608 } 1609 } 1610 1611 end: 1612 if (context.stripes) { 1613 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1614 if (context.stripes[i].Irp) { 1615 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) { 1616 MmUnlockPages(context.stripes[i].Irp->MdlAddress); 1617 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 1618 } 1619 IoFreeIrp(context.stripes[i].Irp); 1620 } 1621 1622 if (context.stripes[i].buf) 1623 ExFreePool(context.stripes[i].buf); 1624 1625 if (context.stripes[i].bad_csums) 1626 ExFreePool(context.stripes[i].bad_csums); 1627 } 1628 1629 ExFreePool(context.stripes); 1630 } 1631 1632 return Status; 1633 } 1634 1635 static NTSTATUS scrub_data_extent(device_extension* Vcb, chunk* c, uint64_t offset, ULONG type, uint32_t* csum, RTL_BITMAP* bmp, ULONG bmplen) { 1636 NTSTATUS Status; 1637 ULONG runlength, index; 1638 1639 runlength = RtlFindFirstRunClear(bmp, &index); 1640 1641 while (runlength != 0) { 1642 if (index >= bmplen) 1643 break; 1644 1645 if (index + runlength >= bmplen) { 1646 runlength = bmplen - index; 1647 1648 if (runlength == 0) 1649 break; 1650 } 1651 1652 do { 1653 ULONG rl; 1654 1655 if (runlength * Vcb->superblock.sector_size > SCRUB_UNIT) 1656 rl = SCRUB_UNIT / Vcb->superblock.sector_size; 1657 else 1658 rl = runlength; 1659 1660 Status = scrub_extent(Vcb, c, type, offset + UInt32x32To64(index, Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index]); 1661 if (!NT_SUCCESS(Status)) { 1662 ERR("scrub_data_extent_dup returned %08x\n", Status); 1663 return Status; 1664 } 1665 1666 runlength -= rl; 1667 index += rl; 1668 } while (runlength > 0); 1669 1670 runlength = RtlFindNextForwardRunClear(bmp, index, &index); 1671 } 1672 1673 return STATUS_SUCCESS; 1674 } 1675 1676 typedef struct { 1677 uint8_t* buf; 1678 PIRP Irp; 1679 void* context; 1680 IO_STATUS_BLOCK iosb; 1681 uint64_t offset; 1682 bool rewrite, missing; 1683 RTL_BITMAP error; 1684 ULONG* errorarr; 1685 } scrub_context_raid56_stripe; 1686 1687 typedef struct { 1688 scrub_context_raid56_stripe* stripes; 1689 LONG stripes_left; 1690 KEVENT Event; 1691 RTL_BITMAP alloc; 1692 RTL_BITMAP has_csum; 1693 RTL_BITMAP is_tree; 1694 uint32_t* csum; 1695 uint8_t* parity_scratch; 1696 uint8_t* parity_scratch2; 1697 } scrub_context_raid56; 1698 1699 _Function_class_(IO_COMPLETION_ROUTINE) 1700 static NTSTATUS __stdcall scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 1701 scrub_context_raid56_stripe* stripe = conptr; 1702 scrub_context_raid56* context = (scrub_context_raid56*)stripe->context; 1703 LONG left = InterlockedDecrement(&context->stripes_left); 1704 1705 UNUSED(DeviceObject); 1706 1707 stripe->iosb = Irp->IoStatus; 1708 1709 if (left == 0) 1710 KeSetEvent(&context->Event, 0, false); 1711 1712 return STATUS_MORE_PROCESSING_REQUIRED; 1713 } 1714 1715 static void scrub_raid5_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, uint64_t stripe_start, uint64_t bit_start, 1716 uint64_t num, uint16_t missing_devices) { 1717 ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off; 1718 uint16_t stripe, parity = (bit_start + num + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; 1719 uint64_t stripeoff; 1720 1721 stripe = (parity + 1) % c->chunk_item->num_stripes; 1722 off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1); 1723 stripeoff = num * sectors_per_stripe; 1724 1725 if (missing_devices == 0) 1726 RtlCopyMemory(context->parity_scratch, &context->stripes[parity].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1727 1728 while (stripe != parity) { 1729 RtlClearAllBits(&context->stripes[stripe].error); 1730 1731 for (i = 0; i < sectors_per_stripe; i++) { 1732 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 1733 if (RtlCheckBit(&context->is_tree, off)) { 1734 tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size]; 1735 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1736 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1737 1738 if (crc32 != *((uint32_t*)th->csum) || th->address != addr) { 1739 RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size); 1740 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1741 1742 if (missing_devices > 0) 1743 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false); 1744 } 1745 1746 off += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1747 stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1748 i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1; 1749 1750 continue; 1751 } else if (RtlCheckBit(&context->has_csum, off)) { 1752 uint32_t crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1753 1754 if (crc32 != context->csum[off]) { 1755 RtlSetBit(&context->stripes[stripe].error, i); 1756 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1757 1758 if (missing_devices > 0) { 1759 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1760 1761 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false); 1762 } 1763 } 1764 } 1765 } 1766 1767 off++; 1768 stripeoff++; 1769 } 1770 1771 if (missing_devices == 0) 1772 do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1773 1774 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1775 stripeoff = num * sectors_per_stripe; 1776 } 1777 1778 // check parity 1779 1780 if (missing_devices == 0) { 1781 RtlClearAllBits(&context->stripes[parity].error); 1782 1783 for (i = 0; i < sectors_per_stripe; i++) { 1784 ULONG o, j; 1785 1786 o = i * Vcb->superblock.sector_size; 1787 for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE 1788 if (context->parity_scratch[o] != 0) { 1789 RtlSetBit(&context->stripes[parity].error, i); 1790 break; 1791 } 1792 o++; 1793 } 1794 } 1795 } 1796 1797 // log and fix errors 1798 1799 if (missing_devices > 0) 1800 return; 1801 1802 for (i = 0; i < sectors_per_stripe; i++) { 1803 ULONG num_errors = 0, bad_off; 1804 uint64_t bad_stripe; 1805 bool alloc = false; 1806 1807 stripe = (parity + 1) % c->chunk_item->num_stripes; 1808 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1809 1810 while (stripe != parity) { 1811 if (RtlCheckBit(&context->alloc, off)) { 1812 alloc = true; 1813 1814 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 1815 bad_stripe = stripe; 1816 bad_off = off; 1817 num_errors++; 1818 } 1819 } 1820 1821 off += sectors_per_stripe; 1822 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1823 } 1824 1825 if (!alloc) 1826 continue; 1827 1828 if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity].error, i)) // everything fine 1829 continue; 1830 1831 if (num_errors == 0 && RtlCheckBit(&context->stripes[parity].error, i)) { // parity error 1832 uint64_t addr; 1833 1834 do_xor(&context->stripes[parity].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1835 &context->parity_scratch[i * Vcb->superblock.sector_size], 1836 Vcb->superblock.sector_size); 1837 1838 bad_off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1839 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size); 1840 1841 context->stripes[parity].rewrite = true; 1842 1843 log_error(Vcb, addr, c->devices[parity]->devitem.dev_id, false, true, true); 1844 log_device_error(Vcb, c->devices[parity], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1845 } else if (num_errors == 1) { 1846 uint32_t crc32; 1847 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size); 1848 1849 if (RtlCheckBit(&context->is_tree, bad_off)) { 1850 tree_header* th; 1851 1852 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 1853 &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1854 Vcb->superblock.node_size); 1855 1856 th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; 1857 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1858 1859 if (crc32 == *((uint32_t*)th->csum) && th->address == addr) { 1860 RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1861 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 1862 1863 context->stripes[bad_stripe].rewrite = true; 1864 1865 RtlClearBits(&context->stripes[bad_stripe].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 1866 1867 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, true, false); 1868 } else 1869 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, false, false); 1870 } else { 1871 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 1872 &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1873 Vcb->superblock.sector_size); 1874 1875 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 1876 1877 if (crc32 == context->csum[bad_off]) { 1878 RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1879 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 1880 1881 context->stripes[bad_stripe].rewrite = true; 1882 1883 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, true, false); 1884 } else 1885 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, false, false); 1886 } 1887 } else { 1888 stripe = (parity + 1) % c->chunk_item->num_stripes; 1889 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1890 1891 while (stripe != parity) { 1892 if (RtlCheckBit(&context->alloc, off)) { 1893 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 1894 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1895 1896 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false); 1897 } 1898 } 1899 1900 off += sectors_per_stripe; 1901 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1902 } 1903 } 1904 } 1905 } 1906 1907 static void scrub_raid6_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, uint64_t stripe_start, uint64_t bit_start, 1908 uint64_t num, uint16_t missing_devices) { 1909 ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off; 1910 uint16_t stripe, parity1 = (bit_start + num + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes; 1911 uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes; 1912 uint64_t stripeoff; 1913 1914 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 1915 off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2); 1916 stripeoff = num * sectors_per_stripe; 1917 1918 if (c->devices[parity1]->devobj) 1919 RtlCopyMemory(context->parity_scratch, &context->stripes[parity1].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1920 1921 if (c->devices[parity2]->devobj) 1922 RtlZeroMemory(context->parity_scratch2, (ULONG)c->chunk_item->stripe_length); 1923 1924 while (stripe != parity1) { 1925 RtlClearAllBits(&context->stripes[stripe].error); 1926 1927 for (i = 0; i < sectors_per_stripe; i++) { 1928 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 1929 if (RtlCheckBit(&context->is_tree, off)) { 1930 tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size]; 1931 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1932 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1933 1934 if (crc32 != *((uint32_t*)th->csum) || th->address != addr) { 1935 RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size); 1936 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1937 1938 if (missing_devices == 2) 1939 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false); 1940 } 1941 1942 off += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1943 stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1944 i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1; 1945 1946 continue; 1947 } else if (RtlCheckBit(&context->has_csum, off)) { 1948 uint32_t crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1949 1950 if (crc32 != context->csum[off]) { 1951 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1952 1953 RtlSetBit(&context->stripes[stripe].error, i); 1954 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1955 1956 if (missing_devices == 2) 1957 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false); 1958 } 1959 } 1960 } 1961 1962 off++; 1963 stripeoff++; 1964 } 1965 1966 if (c->devices[parity1]->devobj) 1967 do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length); 1968 1969 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1970 stripeoff = num * sectors_per_stripe; 1971 } 1972 1973 RtlClearAllBits(&context->stripes[parity1].error); 1974 1975 if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity2]->devobj)) { 1976 // check parity 1 1977 1978 for (i = 0; i < sectors_per_stripe; i++) { 1979 ULONG o, j; 1980 1981 o = i * Vcb->superblock.sector_size; 1982 for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE 1983 if (context->parity_scratch[o] != 0) { 1984 RtlSetBit(&context->stripes[parity1].error, i); 1985 break; 1986 } 1987 o++; 1988 } 1989 } 1990 } 1991 1992 RtlClearAllBits(&context->stripes[parity2].error); 1993 1994 if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity1]->devobj)) { 1995 // check parity 2 1996 1997 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 1998 1999 while (stripe != parity2) { 2000 galois_double(context->parity_scratch2, (uint32_t)c->chunk_item->stripe_length); 2001 do_xor(context->parity_scratch2, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length); 2002 2003 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2004 } 2005 2006 for (i = 0; i < sectors_per_stripe; i++) { 2007 if (RtlCompareMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2008 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size) != Vcb->superblock.sector_size) 2009 RtlSetBit(&context->stripes[parity2].error, i); 2010 } 2011 } 2012 2013 if (missing_devices == 2) 2014 return; 2015 2016 // log and fix errors 2017 2018 for (i = 0; i < sectors_per_stripe; i++) { 2019 ULONG num_errors = 0; 2020 uint64_t bad_stripe1, bad_stripe2; 2021 ULONG bad_off1, bad_off2; 2022 bool alloc = false; 2023 2024 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2025 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2026 2027 while (stripe != parity1) { 2028 if (RtlCheckBit(&context->alloc, off)) { 2029 alloc = true; 2030 2031 if (!c->devices[stripe]->devobj || RtlCheckBit(&context->stripes[stripe].error, i)) { 2032 if (num_errors == 0) { 2033 bad_stripe1 = stripe; 2034 bad_off1 = off; 2035 } else if (num_errors == 1) { 2036 bad_stripe2 = stripe; 2037 bad_off2 = off; 2038 } 2039 num_errors++; 2040 } 2041 } 2042 2043 off += sectors_per_stripe; 2044 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2045 } 2046 2047 if (!alloc) 2048 continue; 2049 2050 if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity1].error, i) && !RtlCheckBit(&context->stripes[parity2].error, i)) // everything fine 2051 continue; 2052 2053 if (num_errors == 0) { // parity error 2054 uint64_t addr; 2055 2056 if (RtlCheckBit(&context->stripes[parity1].error, i)) { 2057 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2058 &context->parity_scratch[i * Vcb->superblock.sector_size], 2059 Vcb->superblock.sector_size); 2060 2061 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2062 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2063 2064 context->stripes[parity1].rewrite = true; 2065 2066 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true); 2067 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2068 } 2069 2070 if (RtlCheckBit(&context->stripes[parity2].error, i)) { 2071 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2072 &context->parity_scratch2[i * Vcb->superblock.sector_size], 2073 Vcb->superblock.sector_size); 2074 2075 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2076 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2077 2078 context->stripes[parity2].rewrite = true; 2079 2080 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true); 2081 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2082 } 2083 } else if (num_errors == 1) { 2084 uint32_t crc32a, crc32b, len; 2085 uint16_t stripe_num, bad_stripe_num; 2086 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2087 uint8_t* scratch; 2088 2089 len = RtlCheckBit(&context->is_tree, bad_off1)? Vcb->superblock.node_size : Vcb->superblock.sector_size; 2090 2091 scratch = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG); 2092 if (!scratch) { 2093 ERR("out of memory\n"); 2094 return; 2095 } 2096 2097 RtlZeroMemory(scratch, len); 2098 2099 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 2100 &context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2101 2102 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2103 2104 if (c->devices[parity2]->devobj) { 2105 stripe_num = c->chunk_item->num_stripes - 3; 2106 while (stripe != parity2) { 2107 galois_double(scratch, len); 2108 2109 if (stripe != bad_stripe1) 2110 do_xor(scratch, &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2111 else 2112 bad_stripe_num = stripe_num; 2113 2114 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2115 stripe_num--; 2116 } 2117 2118 do_xor(scratch, &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2119 2120 if (bad_stripe_num != 0) 2121 galois_divpower(scratch, (uint8_t)bad_stripe_num, len); 2122 } 2123 2124 if (RtlCheckBit(&context->is_tree, bad_off1)) { 2125 tree_header *th1 = NULL, *th2 = NULL; 2126 2127 if (c->devices[parity1]->devobj) { 2128 th1 = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; 2129 crc32a = ~calc_crc32c(0xffffffff, (uint8_t*)&th1->fs_uuid, Vcb->superblock.node_size - sizeof(th1->csum)); 2130 } 2131 2132 if (c->devices[parity2]->devobj) { 2133 th2 = (tree_header*)scratch; 2134 crc32b = ~calc_crc32c(0xffffffff, (uint8_t*)&th2->fs_uuid, Vcb->superblock.node_size - sizeof(th2->csum)); 2135 } 2136 2137 if ((c->devices[parity1]->devobj && crc32a == *((uint32_t*)th1->csum) && th1->address == addr) || 2138 (c->devices[parity2]->devobj && crc32b == *((uint32_t*)th2->csum) && th2->address == addr)) { 2139 if (!c->devices[parity1]->devobj || crc32a != *((uint32_t*)th1->csum) || th1->address != addr) { 2140 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2141 scratch, Vcb->superblock.node_size); 2142 2143 if (c->devices[parity1]->devobj) { 2144 // fix parity 1 2145 2146 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2147 2148 RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2149 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2150 Vcb->superblock.node_size); 2151 2152 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2153 2154 while (stripe != parity1) { 2155 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2156 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2157 Vcb->superblock.node_size); 2158 2159 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2160 } 2161 2162 context->stripes[parity1].rewrite = true; 2163 2164 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true); 2165 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2166 } 2167 } else { 2168 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2169 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 2170 2171 if (!c->devices[parity2]->devobj || crc32b != *((uint32_t*)th2->csum) || th2->address != addr) { 2172 // fix parity 2 2173 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2174 2175 if (c->devices[parity2]->devobj) { 2176 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2177 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2178 Vcb->superblock.node_size); 2179 2180 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2181 2182 while (stripe != parity2) { 2183 galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.node_size); 2184 2185 do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2186 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2187 Vcb->superblock.node_size); 2188 2189 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2190 } 2191 2192 context->stripes[parity2].rewrite = true; 2193 2194 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true); 2195 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2196 } 2197 } 2198 } 2199 2200 context->stripes[bad_stripe1].rewrite = true; 2201 2202 RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 2203 2204 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false); 2205 } else 2206 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false); 2207 } else { 2208 if (c->devices[parity1]->devobj) 2209 crc32a = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2210 2211 if (c->devices[parity2]->devobj) 2212 crc32b = ~calc_crc32c(0xffffffff, scratch, Vcb->superblock.sector_size); 2213 2214 if ((c->devices[parity1]->devobj && crc32a == context->csum[bad_off1]) || (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1])) { 2215 if (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1]) { 2216 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2217 scratch, Vcb->superblock.sector_size); 2218 2219 if (c->devices[parity1]->devobj && crc32a != context->csum[bad_off1]) { 2220 // fix parity 1 2221 2222 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2223 2224 RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2225 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2226 Vcb->superblock.sector_size); 2227 2228 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2229 2230 while (stripe != parity1) { 2231 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2232 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2233 Vcb->superblock.sector_size); 2234 2235 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2236 } 2237 2238 context->stripes[parity1].rewrite = true; 2239 2240 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true); 2241 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2242 } 2243 } else { 2244 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2245 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2246 2247 if (c->devices[parity2]->devobj && crc32b != context->csum[bad_off1]) { 2248 // fix parity 2 2249 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2250 2251 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2252 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2253 Vcb->superblock.sector_size); 2254 2255 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2256 2257 while (stripe != parity2) { 2258 galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size); 2259 2260 do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2261 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2262 Vcb->superblock.sector_size); 2263 2264 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2265 } 2266 2267 context->stripes[parity2].rewrite = true; 2268 2269 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true); 2270 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2271 } 2272 } 2273 2274 context->stripes[bad_stripe1].rewrite = true; 2275 2276 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false); 2277 } else 2278 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false); 2279 } 2280 2281 ExFreePool(scratch); 2282 } else if (num_errors == 2 && missing_devices == 0) { 2283 uint16_t x, y, k; 2284 uint64_t addr; 2285 uint32_t len = (RtlCheckBit(&context->is_tree, bad_off1) || RtlCheckBit(&context->is_tree, bad_off2)) ? Vcb->superblock.node_size : Vcb->superblock.sector_size; 2286 uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; 2287 uint32_t j; 2288 2289 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2290 2291 // put qxy in parity_scratch 2292 // put pxy in parity_scratch2 2293 2294 k = c->chunk_item->num_stripes - 3; 2295 if (stripe == bad_stripe1 || stripe == bad_stripe2) { 2296 RtlZeroMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], len); 2297 RtlZeroMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], len); 2298 2299 if (stripe == bad_stripe1) 2300 x = k; 2301 else 2302 y = k; 2303 } else { 2304 RtlCopyMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], 2305 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2306 RtlCopyMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], 2307 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2308 } 2309 2310 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2311 2312 k--; 2313 do { 2314 galois_double(&context->parity_scratch[i * Vcb->superblock.sector_size], len); 2315 2316 if (stripe != bad_stripe1 && stripe != bad_stripe2) { 2317 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 2318 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2319 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], 2320 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2321 } else if (stripe == bad_stripe1) 2322 x = k; 2323 else if (stripe == bad_stripe2) 2324 y = k; 2325 2326 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2327 k--; 2328 } while (stripe != parity2); 2329 2330 gyx = gpow2(y > x ? (y-x) : (255-x+y)); 2331 gx = gpow2(255-x); 2332 2333 denom = gdiv(1, gyx ^ 1); 2334 a = gmul(gyx, denom); 2335 b = gmul(gx, denom); 2336 2337 p = &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)]; 2338 q = &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)]; 2339 pxy = &context->parity_scratch2[i * Vcb->superblock.sector_size]; 2340 qxy = &context->parity_scratch[i * Vcb->superblock.sector_size]; 2341 2342 for (j = 0; j < len; j++) { 2343 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); 2344 2345 p++; 2346 q++; 2347 pxy++; 2348 qxy++; 2349 } 2350 2351 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->parity_scratch[i * Vcb->superblock.sector_size], len); 2352 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2353 2354 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2355 2356 if (RtlCheckBit(&context->is_tree, bad_off1)) { 2357 tree_header* th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; 2358 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 2359 2360 if (crc32 == *((uint32_t*)th->csum) && th->address == addr) { 2361 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2362 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 2363 2364 context->stripes[bad_stripe1].rewrite = true; 2365 2366 RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 2367 2368 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false); 2369 } else 2370 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false); 2371 } else { 2372 uint32_t crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2373 2374 if (crc32 == context->csum[bad_off1]) { 2375 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2376 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2377 2378 context->stripes[bad_stripe1].rewrite = true; 2379 2380 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false); 2381 } else 2382 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false); 2383 } 2384 2385 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off2 * Vcb->superblock.sector_size); 2386 2387 if (RtlCheckBit(&context->is_tree, bad_off2)) { 2388 tree_header* th = (tree_header*)&context->parity_scratch2[i * Vcb->superblock.sector_size]; 2389 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 2390 2391 if (crc32 == *((uint32_t*)th->csum) && th->address == addr) { 2392 RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2393 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 2394 2395 context->stripes[bad_stripe2].rewrite = true; 2396 2397 RtlClearBits(&context->stripes[bad_stripe2].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 2398 2399 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, true, false); 2400 } else 2401 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, false, false); 2402 } else { 2403 uint32_t crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2404 2405 if (crc32 == context->csum[bad_off2]) { 2406 RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2407 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2408 2409 context->stripes[bad_stripe2].rewrite = true; 2410 2411 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, true, false); 2412 } else 2413 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, false, false); 2414 } 2415 } else { 2416 stripe = (parity2 + 1) % c->chunk_item->num_stripes; 2417 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2418 2419 while (stripe != parity1) { 2420 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 2421 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 2422 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 2423 2424 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false); 2425 } 2426 } 2427 2428 off += sectors_per_stripe; 2429 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2430 } 2431 } 2432 } 2433 } 2434 2435 static NTSTATUS scrub_chunk_raid56_stripe_run(device_extension* Vcb, chunk* c, uint64_t stripe_start, uint64_t stripe_end) { 2436 NTSTATUS Status; 2437 KEY searchkey; 2438 traverse_ptr tp; 2439 bool b; 2440 uint64_t run_start, run_end, full_stripe_len, stripe; 2441 uint32_t max_read, num_sectors; 2442 ULONG arrlen, *allocarr, *csumarr = NULL, *treearr, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1; 2443 scrub_context_raid56 context; 2444 uint16_t i; 2445 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 2446 2447 TRACE("(%p, %p, %I64x, %I64x)\n", Vcb, c, stripe_start, stripe_end); 2448 2449 full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length; 2450 run_start = c->offset + (stripe_start * full_stripe_len); 2451 run_end = c->offset + ((stripe_end + 1) * full_stripe_len); 2452 2453 searchkey.obj_id = run_start; 2454 searchkey.obj_type = TYPE_METADATA_ITEM; 2455 searchkey.offset = 0xffffffffffffffff; 2456 2457 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 2458 if (!NT_SUCCESS(Status)) { 2459 ERR("find_item returned %08x\n", Status); 2460 return Status; 2461 } 2462 2463 num_sectors = (uint32_t)((stripe_end - stripe_start + 1) * full_stripe_len / Vcb->superblock.sector_size); 2464 arrlen = (ULONG)sector_align((num_sectors / 8) + 1, sizeof(ULONG)); 2465 2466 allocarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2467 if (!allocarr) { 2468 ERR("out of memory\n"); 2469 return STATUS_INSUFFICIENT_RESOURCES; 2470 } 2471 2472 treearr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2473 if (!treearr) { 2474 ERR("out of memory\n"); 2475 ExFreePool(allocarr); 2476 return STATUS_INSUFFICIENT_RESOURCES; 2477 } 2478 2479 RtlInitializeBitMap(&context.alloc, allocarr, num_sectors); 2480 RtlClearAllBits(&context.alloc); 2481 2482 RtlInitializeBitMap(&context.is_tree, treearr, num_sectors); 2483 RtlClearAllBits(&context.is_tree); 2484 2485 context.parity_scratch = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG); 2486 if (!context.parity_scratch) { 2487 ERR("out of memory\n"); 2488 ExFreePool(allocarr); 2489 ExFreePool(treearr); 2490 return STATUS_INSUFFICIENT_RESOURCES; 2491 } 2492 2493 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2494 csumarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2495 if (!csumarr) { 2496 ERR("out of memory\n"); 2497 ExFreePool(allocarr); 2498 ExFreePool(treearr); 2499 ExFreePool(context.parity_scratch); 2500 return STATUS_INSUFFICIENT_RESOURCES; 2501 } 2502 2503 RtlInitializeBitMap(&context.has_csum, csumarr, num_sectors); 2504 RtlClearAllBits(&context.has_csum); 2505 2506 context.csum = ExAllocatePoolWithTag(PagedPool, num_sectors * sizeof(uint32_t), ALLOC_TAG); 2507 if (!context.csum) { 2508 ERR("out of memory\n"); 2509 ExFreePool(allocarr); 2510 ExFreePool(treearr); 2511 ExFreePool(context.parity_scratch); 2512 ExFreePool(csumarr); 2513 return STATUS_INSUFFICIENT_RESOURCES; 2514 } 2515 } 2516 2517 if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2518 context.parity_scratch2 = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG); 2519 if (!context.parity_scratch2) { 2520 ERR("out of memory\n"); 2521 ExFreePool(allocarr); 2522 ExFreePool(treearr); 2523 ExFreePool(context.parity_scratch); 2524 2525 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2526 ExFreePool(csumarr); 2527 ExFreePool(context.csum); 2528 } 2529 2530 return STATUS_INSUFFICIENT_RESOURCES; 2531 } 2532 } 2533 2534 do { 2535 traverse_ptr next_tp; 2536 2537 if (tp.item->key.obj_id >= run_end) 2538 break; 2539 2540 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM) { 2541 uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2542 2543 if (tp.item->key.obj_id + size > run_start) { 2544 uint64_t extent_start = max(run_start, tp.item->key.obj_id); 2545 uint64_t extent_end = min(tp.item->key.obj_id + size, run_end); 2546 bool extent_is_tree = false; 2547 2548 RtlSetBits(&context.alloc, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size)); 2549 2550 if (tp.item->key.obj_type == TYPE_METADATA_ITEM) 2551 extent_is_tree = true; 2552 else { 2553 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; 2554 2555 if (tp.item->size < sizeof(EXTENT_ITEM)) { 2556 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 2557 Status = STATUS_INTERNAL_ERROR; 2558 goto end; 2559 } 2560 2561 if (ei->flags & EXTENT_ITEM_TREE_BLOCK) 2562 extent_is_tree = true; 2563 } 2564 2565 if (extent_is_tree) 2566 RtlSetBits(&context.is_tree, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size)); 2567 else if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2568 traverse_ptr tp2; 2569 bool b2; 2570 2571 searchkey.obj_id = EXTENT_CSUM_ID; 2572 searchkey.obj_type = TYPE_EXTENT_CSUM; 2573 searchkey.offset = extent_start; 2574 2575 Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL); 2576 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { 2577 ERR("find_item returned %08x\n", Status); 2578 goto end; 2579 } 2580 2581 do { 2582 traverse_ptr next_tp2; 2583 2584 if (tp2.item->key.offset >= extent_end) 2585 break; 2586 2587 if (tp2.item->key.offset >= extent_start) { 2588 uint64_t csum_start = max(extent_start, tp2.item->key.offset); 2589 uint64_t csum_end = min(extent_end, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(uint32_t))); 2590 2591 RtlSetBits(&context.has_csum, (ULONG)((csum_start - run_start) / Vcb->superblock.sector_size), (ULONG)((csum_end - csum_start) / Vcb->superblock.sector_size)); 2592 2593 RtlCopyMemory(&context.csum[(csum_start - run_start) / Vcb->superblock.sector_size], 2594 tp2.item->data + ((csum_start - tp2.item->key.offset) * sizeof(uint32_t) / Vcb->superblock.sector_size), 2595 (ULONG)((csum_end - csum_start) * sizeof(uint32_t) / Vcb->superblock.sector_size)); 2596 } 2597 2598 b2 = find_next_item(Vcb, &tp2, &next_tp2, false, NULL); 2599 2600 if (b2) 2601 tp2 = next_tp2; 2602 } while (b2); 2603 } 2604 } 2605 } 2606 2607 b = find_next_item(Vcb, &tp, &next_tp, false, NULL); 2608 2609 if (b) 2610 tp = next_tp; 2611 } while (b); 2612 2613 context.stripes = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_context_raid56_stripe) * c->chunk_item->num_stripes, ALLOC_TAG); 2614 if (!context.stripes) { 2615 ERR("out of memory\n"); 2616 Status = STATUS_INSUFFICIENT_RESOURCES; 2617 goto end; 2618 } 2619 2620 max_read = (uint32_t)min(1048576 / c->chunk_item->stripe_length, stripe_end - stripe_start + 1); // only process 1 MB of data at a time 2621 2622 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2623 context.stripes[i].buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(max_read * c->chunk_item->stripe_length), ALLOC_TAG); 2624 if (!context.stripes[i].buf) { 2625 uint64_t j; 2626 2627 ERR("out of memory\n"); 2628 2629 for (j = 0; j < i; j++) { 2630 ExFreePool(context.stripes[j].buf); 2631 } 2632 ExFreePool(context.stripes); 2633 2634 Status = STATUS_INSUFFICIENT_RESOURCES; 2635 goto end; 2636 } 2637 2638 context.stripes[i].errorarr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align(((c->chunk_item->stripe_length / Vcb->superblock.sector_size) / 8) + 1, sizeof(ULONG)), ALLOC_TAG); 2639 if (!context.stripes[i].errorarr) { 2640 uint64_t j; 2641 2642 ERR("out of memory\n"); 2643 2644 ExFreePool(context.stripes[i].buf); 2645 2646 for (j = 0; j < i; j++) { 2647 ExFreePool(context.stripes[j].buf); 2648 } 2649 ExFreePool(context.stripes); 2650 2651 Status = STATUS_INSUFFICIENT_RESOURCES; 2652 goto end; 2653 } 2654 2655 RtlInitializeBitMap(&context.stripes[i].error, context.stripes[i].errorarr, (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size)); 2656 2657 context.stripes[i].context = &context; 2658 context.stripes[i].rewrite = false; 2659 } 2660 2661 stripe = stripe_start; 2662 2663 Status = STATUS_SUCCESS; 2664 2665 chunk_lock_range(Vcb, c, run_start, run_end - run_start); 2666 2667 do { 2668 ULONG read_stripes; 2669 uint16_t missing_devices = 0; 2670 bool need_wait = false; 2671 2672 if (max_read < stripe_end + 1 - stripe) 2673 read_stripes = max_read; 2674 else 2675 read_stripes = (ULONG)(stripe_end + 1 - stripe); 2676 2677 context.stripes_left = c->chunk_item->num_stripes; 2678 2679 // read megabyte by megabyte 2680 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2681 if (c->devices[i]->devobj) { 2682 PIO_STACK_LOCATION IrpSp; 2683 2684 context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false); 2685 2686 if (!context.stripes[i].Irp) { 2687 ERR("IoAllocateIrp failed\n"); 2688 Status = STATUS_INSUFFICIENT_RESOURCES; 2689 goto end3; 2690 } 2691 2692 context.stripes[i].Irp->MdlAddress = NULL; 2693 2694 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 2695 IrpSp->MajorFunction = IRP_MJ_READ; 2696 IrpSp->FileObject = c->devices[i]->fileobj; 2697 2698 if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) { 2699 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(read_stripes * c->chunk_item->stripe_length), ALLOC_TAG); 2700 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 2701 ERR("out of memory\n"); 2702 Status = STATUS_INSUFFICIENT_RESOURCES; 2703 goto end3; 2704 } 2705 2706 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 2707 2708 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 2709 } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) { 2710 context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, (ULONG)(read_stripes * c->chunk_item->stripe_length), false, false, NULL); 2711 if (!context.stripes[i].Irp->MdlAddress) { 2712 ERR("IoAllocateMdl failed\n"); 2713 Status = STATUS_INSUFFICIENT_RESOURCES; 2714 goto end3; 2715 } 2716 2717 Status = STATUS_SUCCESS; 2718 2719 _SEH2_TRY { 2720 MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); 2721 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2722 Status = _SEH2_GetExceptionCode(); 2723 } _SEH2_END; 2724 2725 if (!NT_SUCCESS(Status)) { 2726 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 2727 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 2728 goto end3; 2729 } 2730 } else 2731 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 2732 2733 context.stripes[i].offset = stripe * c->chunk_item->stripe_length; 2734 2735 IrpSp->Parameters.Read.Length = (ULONG)(read_stripes * c->chunk_item->stripe_length); 2736 IrpSp->Parameters.Read.ByteOffset.QuadPart = cis[i].offset + context.stripes[i].offset; 2737 2738 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 2739 context.stripes[i].missing = false; 2740 2741 IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion_raid56, &context.stripes[i], true, true, true); 2742 2743 Vcb->scrub.data_scrubbed += read_stripes * c->chunk_item->stripe_length; 2744 need_wait = true; 2745 } else { 2746 context.stripes[i].Irp = NULL; 2747 context.stripes[i].missing = true; 2748 missing_devices++; 2749 InterlockedDecrement(&context.stripes_left); 2750 } 2751 } 2752 2753 if (c->chunk_item->type & BLOCK_FLAG_RAID5 && missing_devices > 1) { 2754 ERR("too many missing devices (%u, maximum 1)\n", missing_devices); 2755 Status = STATUS_UNEXPECTED_IO_ERROR; 2756 goto end3; 2757 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6 && missing_devices > 2) { 2758 ERR("too many missing devices (%u, maximum 2)\n", missing_devices); 2759 Status = STATUS_UNEXPECTED_IO_ERROR; 2760 goto end3; 2761 } 2762 2763 if (need_wait) { 2764 KeInitializeEvent(&context.Event, NotificationEvent, false); 2765 2766 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2767 if (c->devices[i]->devobj) 2768 IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp); 2769 } 2770 2771 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 2772 } 2773 2774 // return an error if any of the stripes returned an error 2775 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2776 if (!context.stripes[i].missing && !NT_SUCCESS(context.stripes[i].iosb.Status)) { 2777 Status = context.stripes[i].iosb.Status; 2778 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS); 2779 goto end3; 2780 } 2781 } 2782 2783 if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2784 for (i = 0; i < read_stripes; i++) { 2785 scrub_raid6_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices); 2786 } 2787 } else { 2788 for (i = 0; i < read_stripes; i++) { 2789 scrub_raid5_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices); 2790 } 2791 } 2792 stripe += read_stripes; 2793 2794 end3: 2795 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2796 if (context.stripes[i].Irp) { 2797 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) { 2798 MmUnlockPages(context.stripes[i].Irp->MdlAddress); 2799 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 2800 } 2801 IoFreeIrp(context.stripes[i].Irp); 2802 context.stripes[i].Irp = NULL; 2803 2804 if (context.stripes[i].rewrite) { 2805 Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + context.stripes[i].offset, 2806 context.stripes[i].buf, (uint32_t)(read_stripes * c->chunk_item->stripe_length)); 2807 2808 if (!NT_SUCCESS(Status)) { 2809 ERR("write_data_phys returned %08x\n", Status); 2810 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS); 2811 goto end2; 2812 } 2813 } 2814 } 2815 } 2816 2817 if (!NT_SUCCESS(Status)) 2818 break; 2819 } while (stripe < stripe_end); 2820 2821 end2: 2822 chunk_unlock_range(Vcb, c, run_start, run_end - run_start); 2823 2824 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2825 ExFreePool(context.stripes[i].buf); 2826 ExFreePool(context.stripes[i].errorarr); 2827 } 2828 ExFreePool(context.stripes); 2829 2830 end: 2831 ExFreePool(treearr); 2832 ExFreePool(allocarr); 2833 ExFreePool(context.parity_scratch); 2834 2835 if (c->chunk_item->type & BLOCK_FLAG_RAID6) 2836 ExFreePool(context.parity_scratch2); 2837 2838 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2839 ExFreePool(csumarr); 2840 ExFreePool(context.csum); 2841 } 2842 2843 return Status; 2844 } 2845 2846 static NTSTATUS scrub_chunk_raid56(device_extension* Vcb, chunk* c, uint64_t* offset, bool* changed) { 2847 NTSTATUS Status; 2848 KEY searchkey; 2849 traverse_ptr tp; 2850 bool b; 2851 uint64_t full_stripe_len, stripe, stripe_start, stripe_end, total_data = 0; 2852 ULONG num_extents = 0, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1; 2853 2854 full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length; 2855 stripe = (*offset - c->offset) / full_stripe_len; 2856 2857 *offset = c->offset + (stripe * full_stripe_len); 2858 2859 searchkey.obj_id = *offset; 2860 searchkey.obj_type = TYPE_METADATA_ITEM; 2861 searchkey.offset = 0xffffffffffffffff; 2862 2863 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 2864 if (!NT_SUCCESS(Status)) { 2865 ERR("find_item returned %08x\n", Status); 2866 return Status; 2867 } 2868 2869 *changed = false; 2870 2871 do { 2872 traverse_ptr next_tp; 2873 2874 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) 2875 break; 2876 2877 if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { 2878 uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2879 2880 TRACE("%I64x\n", tp.item->key.obj_id); 2881 2882 if (size < Vcb->superblock.sector_size) { 2883 ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size); 2884 return STATUS_INTERNAL_ERROR; 2885 } 2886 2887 stripe = (tp.item->key.obj_id - c->offset) / full_stripe_len; 2888 2889 if (*changed) { 2890 if (stripe > stripe_end + 1) { 2891 Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end); 2892 if (!NT_SUCCESS(Status)) { 2893 ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status); 2894 return Status; 2895 } 2896 2897 stripe_start = stripe; 2898 } 2899 } else 2900 stripe_start = stripe; 2901 2902 stripe_end = (tp.item->key.obj_id + size - 1 - c->offset) / full_stripe_len; 2903 2904 *changed = true; 2905 2906 total_data += size; 2907 num_extents++; 2908 2909 // only do so much at a time 2910 if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB 2911 break; 2912 } 2913 2914 b = find_next_item(Vcb, &tp, &next_tp, false, NULL); 2915 2916 if (b) 2917 tp = next_tp; 2918 } while (b); 2919 2920 if (*changed) { 2921 Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end); 2922 if (!NT_SUCCESS(Status)) { 2923 ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status); 2924 return Status; 2925 } 2926 2927 *offset = c->offset + ((stripe_end + 1) * full_stripe_len); 2928 } 2929 2930 return STATUS_SUCCESS; 2931 } 2932 2933 static NTSTATUS scrub_chunk(device_extension* Vcb, chunk* c, uint64_t* offset, bool* changed) { 2934 NTSTATUS Status; 2935 KEY searchkey; 2936 traverse_ptr tp; 2937 bool b = false, tree_run = false; 2938 ULONG type, num_extents = 0; 2939 uint64_t total_data = 0, tree_run_start, tree_run_end; 2940 2941 TRACE("chunk %I64x\n", c->offset); 2942 2943 ExAcquireResourceSharedLite(&Vcb->tree_lock, true); 2944 2945 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) 2946 type = BLOCK_FLAG_DUPLICATE; 2947 else if (c->chunk_item->type & BLOCK_FLAG_RAID0) 2948 type = BLOCK_FLAG_RAID0; 2949 else if (c->chunk_item->type & BLOCK_FLAG_RAID1) 2950 type = BLOCK_FLAG_DUPLICATE; 2951 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 2952 type = BLOCK_FLAG_RAID10; 2953 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { 2954 Status = scrub_chunk_raid56(Vcb, c, offset, changed); 2955 goto end; 2956 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2957 Status = scrub_chunk_raid56(Vcb, c, offset, changed); 2958 goto end; 2959 } else // SINGLE 2960 type = BLOCK_FLAG_DUPLICATE; 2961 2962 searchkey.obj_id = *offset; 2963 searchkey.obj_type = TYPE_METADATA_ITEM; 2964 searchkey.offset = 0xffffffffffffffff; 2965 2966 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 2967 if (!NT_SUCCESS(Status)) { 2968 ERR("error - find_item returned %08x\n", Status); 2969 goto end; 2970 } 2971 2972 do { 2973 traverse_ptr next_tp; 2974 2975 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) 2976 break; 2977 2978 if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { 2979 uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2980 bool is_tree; 2981 uint32_t* csum = NULL; 2982 RTL_BITMAP bmp; 2983 ULONG* bmparr = NULL, bmplen; 2984 2985 TRACE("%I64x\n", tp.item->key.obj_id); 2986 2987 is_tree = false; 2988 2989 if (tp.item->key.obj_type == TYPE_METADATA_ITEM) 2990 is_tree = true; 2991 else { 2992 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; 2993 2994 if (tp.item->size < sizeof(EXTENT_ITEM)) { 2995 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 2996 Status = STATUS_INTERNAL_ERROR; 2997 goto end; 2998 } 2999 3000 if (ei->flags & EXTENT_ITEM_TREE_BLOCK) 3001 is_tree = true; 3002 } 3003 3004 if (size < Vcb->superblock.sector_size) { 3005 ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size); 3006 Status = STATUS_INTERNAL_ERROR; 3007 goto end; 3008 } 3009 3010 // load csum 3011 if (!is_tree) { 3012 traverse_ptr tp2; 3013 3014 csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(sizeof(uint32_t) * size / Vcb->superblock.sector_size), ALLOC_TAG); 3015 if (!csum) { 3016 ERR("out of memory\n"); 3017 Status = STATUS_INSUFFICIENT_RESOURCES; 3018 goto end; 3019 } 3020 3021 bmplen = (ULONG)(size / Vcb->superblock.sector_size); 3022 3023 bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)(sector_align((bmplen >> 3) + 1, sizeof(ULONG))), ALLOC_TAG); 3024 if (!bmparr) { 3025 ERR("out of memory\n"); 3026 ExFreePool(csum); 3027 Status = STATUS_INSUFFICIENT_RESOURCES; 3028 goto end; 3029 } 3030 3031 RtlInitializeBitMap(&bmp, bmparr, bmplen); 3032 RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum 3033 3034 searchkey.obj_id = EXTENT_CSUM_ID; 3035 searchkey.obj_type = TYPE_EXTENT_CSUM; 3036 searchkey.offset = tp.item->key.obj_id; 3037 3038 Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL); 3039 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { 3040 ERR("find_item returned %08x\n", Status); 3041 ExFreePool(csum); 3042 ExFreePool(bmparr); 3043 goto end; 3044 } 3045 3046 if (Status != STATUS_NOT_FOUND) { 3047 do { 3048 traverse_ptr next_tp2; 3049 3050 if (tp2.item->key.obj_type == TYPE_EXTENT_CSUM) { 3051 if (tp2.item->key.offset >= tp.item->key.obj_id + size) 3052 break; 3053 else if (tp2.item->size >= sizeof(uint32_t) && tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(uint32_t)) >= tp.item->key.obj_id) { 3054 uint64_t cs = max(tp.item->key.obj_id, tp2.item->key.offset); 3055 uint64_t ce = min(tp.item->key.obj_id + size, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(uint32_t))); 3056 3057 RtlCopyMemory(csum + ((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), 3058 tp2.item->data + ((cs - tp2.item->key.offset) * sizeof(uint32_t) / Vcb->superblock.sector_size), 3059 (ULONG)((ce - cs) * sizeof(uint32_t) / Vcb->superblock.sector_size)); 3060 3061 RtlClearBits(&bmp, (ULONG)((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size)); 3062 3063 if (ce == tp.item->key.obj_id + size) 3064 break; 3065 } 3066 } 3067 3068 if (find_next_item(Vcb, &tp2, &next_tp2, false, NULL)) 3069 tp2 = next_tp2; 3070 else 3071 break; 3072 } while (true); 3073 } 3074 } 3075 3076 if (tree_run) { 3077 if (!is_tree || tp.item->key.obj_id > tree_run_end) { 3078 Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL); 3079 if (!NT_SUCCESS(Status)) { 3080 ERR("scrub_extent returned %08x\n", Status); 3081 goto end; 3082 } 3083 3084 if (!is_tree) 3085 tree_run = false; 3086 else { 3087 tree_run_start = tp.item->key.obj_id; 3088 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3089 } 3090 } else 3091 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3092 } else if (is_tree) { 3093 tree_run = true; 3094 tree_run_start = tp.item->key.obj_id; 3095 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3096 } 3097 3098 if (!is_tree) { 3099 Status = scrub_data_extent(Vcb, c, tp.item->key.obj_id, type, csum, &bmp, bmplen); 3100 if (!NT_SUCCESS(Status)) { 3101 ERR("scrub_data_extent returned %08x\n", Status); 3102 ExFreePool(csum); 3103 ExFreePool(bmparr); 3104 goto end; 3105 } 3106 3107 ExFreePool(csum); 3108 ExFreePool(bmparr); 3109 } 3110 3111 *offset = tp.item->key.obj_id + size; 3112 *changed = true; 3113 3114 total_data += size; 3115 num_extents++; 3116 3117 // only do so much at a time 3118 if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB 3119 break; 3120 } 3121 3122 b = find_next_item(Vcb, &tp, &next_tp, false, NULL); 3123 3124 if (b) 3125 tp = next_tp; 3126 } while (b); 3127 3128 if (tree_run) { 3129 Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL); 3130 if (!NT_SUCCESS(Status)) { 3131 ERR("scrub_extent returned %08x\n", Status); 3132 goto end; 3133 } 3134 } 3135 3136 Status = STATUS_SUCCESS; 3137 3138 end: 3139 ExReleaseResourceLite(&Vcb->tree_lock); 3140 3141 return Status; 3142 } 3143 3144 _Function_class_(KSTART_ROUTINE) 3145 static void __stdcall scrub_thread(void* context) { 3146 device_extension* Vcb = context; 3147 LIST_ENTRY chunks, *le; 3148 NTSTATUS Status; 3149 LARGE_INTEGER time; 3150 3151 KeInitializeEvent(&Vcb->scrub.finished, NotificationEvent, false); 3152 3153 InitializeListHead(&chunks); 3154 3155 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true); 3156 3157 if (Vcb->need_write && !Vcb->readonly) 3158 Status = do_write(Vcb, NULL); 3159 else 3160 Status = STATUS_SUCCESS; 3161 3162 free_trees(Vcb); 3163 3164 if (!NT_SUCCESS(Status)) { 3165 ExReleaseResourceLite(&Vcb->tree_lock); 3166 ERR("do_write returned %08x\n", Status); 3167 Vcb->scrub.error = Status; 3168 goto end; 3169 } 3170 3171 ExConvertExclusiveToSharedLite(&Vcb->tree_lock); 3172 3173 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 3174 3175 KeQuerySystemTime(&Vcb->scrub.start_time); 3176 Vcb->scrub.finish_time.QuadPart = 0; 3177 Vcb->scrub.resume_time.QuadPart = Vcb->scrub.start_time.QuadPart; 3178 Vcb->scrub.duration.QuadPart = 0; 3179 Vcb->scrub.total_chunks = 0; 3180 Vcb->scrub.chunks_left = 0; 3181 Vcb->scrub.data_scrubbed = 0; 3182 Vcb->scrub.num_errors = 0; 3183 3184 while (!IsListEmpty(&Vcb->scrub.errors)) { 3185 scrub_error* err = CONTAINING_RECORD(RemoveHeadList(&Vcb->scrub.errors), scrub_error, list_entry); 3186 ExFreePool(err); 3187 } 3188 3189 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true); 3190 3191 le = Vcb->chunks.Flink; 3192 while (le != &Vcb->chunks) { 3193 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 3194 3195 acquire_chunk_lock(c, Vcb); 3196 3197 if (!c->readonly) { 3198 InsertTailList(&chunks, &c->list_entry_balance); 3199 Vcb->scrub.total_chunks++; 3200 Vcb->scrub.chunks_left++; 3201 } 3202 3203 release_chunk_lock(c, Vcb); 3204 3205 le = le->Flink; 3206 } 3207 3208 ExReleaseResourceLite(&Vcb->chunk_lock); 3209 3210 ExReleaseResource(&Vcb->scrub.stats_lock); 3211 3212 ExReleaseResourceLite(&Vcb->tree_lock); 3213 3214 while (!IsListEmpty(&chunks)) { 3215 chunk* c = CONTAINING_RECORD(RemoveHeadList(&chunks), chunk, list_entry_balance); 3216 uint64_t offset = c->offset; 3217 bool changed; 3218 3219 c->reloc = true; 3220 3221 KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL); 3222 3223 if (!Vcb->scrub.stopping) { 3224 do { 3225 changed = false; 3226 3227 Status = scrub_chunk(Vcb, c, &offset, &changed); 3228 if (!NT_SUCCESS(Status)) { 3229 ERR("scrub_chunk returned %08x\n", Status); 3230 Vcb->scrub.stopping = true; 3231 Vcb->scrub.error = Status; 3232 break; 3233 } 3234 3235 if (offset == c->offset + c->chunk_item->size || Vcb->scrub.stopping) 3236 break; 3237 3238 KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL); 3239 } while (changed); 3240 } 3241 3242 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 3243 3244 if (!Vcb->scrub.stopping) 3245 Vcb->scrub.chunks_left--; 3246 3247 if (IsListEmpty(&chunks)) 3248 KeQuerySystemTime(&Vcb->scrub.finish_time); 3249 3250 ExReleaseResource(&Vcb->scrub.stats_lock); 3251 3252 c->reloc = false; 3253 c->list_entry_balance.Flink = NULL; 3254 } 3255 3256 KeQuerySystemTime(&time); 3257 Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3258 3259 end: 3260 ZwClose(Vcb->scrub.thread); 3261 Vcb->scrub.thread = NULL; 3262 3263 KeSetEvent(&Vcb->scrub.finished, 0, false); 3264 } 3265 3266 NTSTATUS start_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3267 NTSTATUS Status; 3268 OBJECT_ATTRIBUTES oa; 3269 3270 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3271 return STATUS_PRIVILEGE_NOT_HELD; 3272 3273 if (Vcb->locked) { 3274 WARN("cannot start scrub while locked\n"); 3275 return STATUS_DEVICE_NOT_READY; 3276 } 3277 3278 if (Vcb->balance.thread) { 3279 WARN("cannot start scrub while balance running\n"); 3280 return STATUS_DEVICE_NOT_READY; 3281 } 3282 3283 if (Vcb->scrub.thread) { 3284 WARN("scrub already running\n"); 3285 return STATUS_DEVICE_NOT_READY; 3286 } 3287 3288 if (Vcb->readonly) 3289 return STATUS_MEDIA_WRITE_PROTECTED; 3290 3291 Vcb->scrub.stopping = false; 3292 Vcb->scrub.paused = false; 3293 Vcb->scrub.error = STATUS_SUCCESS; 3294 KeInitializeEvent(&Vcb->scrub.event, NotificationEvent, !Vcb->scrub.paused); 3295 3296 InitializeObjectAttributes(&oa, NULL, OBJ_KERNEL_HANDLE, NULL, NULL); 3297 3298 Status = PsCreateSystemThread(&Vcb->scrub.thread, 0, &oa, NULL, NULL, scrub_thread, Vcb); 3299 if (!NT_SUCCESS(Status)) { 3300 ERR("PsCreateSystemThread returned %08x\n", Status); 3301 return Status; 3302 } 3303 3304 return STATUS_SUCCESS; 3305 } 3306 3307 NTSTATUS query_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode, void* data, ULONG length) { 3308 btrfs_query_scrub* bqs = (btrfs_query_scrub*)data; 3309 ULONG len; 3310 NTSTATUS Status; 3311 LIST_ENTRY* le; 3312 btrfs_scrub_error* bse = NULL; 3313 3314 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3315 return STATUS_PRIVILEGE_NOT_HELD; 3316 3317 if (length < offsetof(btrfs_query_scrub, errors)) 3318 return STATUS_BUFFER_TOO_SMALL; 3319 3320 ExAcquireResourceSharedLite(&Vcb->scrub.stats_lock, true); 3321 3322 if (Vcb->scrub.thread && Vcb->scrub.chunks_left > 0) 3323 bqs->status = Vcb->scrub.paused ? BTRFS_SCRUB_PAUSED : BTRFS_SCRUB_RUNNING; 3324 else 3325 bqs->status = BTRFS_SCRUB_STOPPED; 3326 3327 bqs->start_time.QuadPart = Vcb->scrub.start_time.QuadPart; 3328 bqs->finish_time.QuadPart = Vcb->scrub.finish_time.QuadPart; 3329 bqs->chunks_left = Vcb->scrub.chunks_left; 3330 bqs->total_chunks = Vcb->scrub.total_chunks; 3331 bqs->data_scrubbed = Vcb->scrub.data_scrubbed; 3332 3333 bqs->duration = Vcb->scrub.duration.QuadPart; 3334 3335 if (bqs->status == BTRFS_SCRUB_RUNNING) { 3336 LARGE_INTEGER time; 3337 3338 KeQuerySystemTime(&time); 3339 bqs->duration += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3340 } 3341 3342 bqs->error = Vcb->scrub.error; 3343 3344 bqs->num_errors = Vcb->scrub.num_errors; 3345 3346 len = length - offsetof(btrfs_query_scrub, errors); 3347 3348 le = Vcb->scrub.errors.Flink; 3349 while (le != &Vcb->scrub.errors) { 3350 scrub_error* err = CONTAINING_RECORD(le, scrub_error, list_entry); 3351 ULONG errlen; 3352 3353 if (err->is_metadata) 3354 errlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY); 3355 else 3356 errlen = offsetof(btrfs_scrub_error, data.filename) + err->data.filename_length; 3357 3358 if (len < errlen) { 3359 Status = STATUS_BUFFER_OVERFLOW; 3360 goto end; 3361 } 3362 3363 if (!bse) 3364 bse = &bqs->errors; 3365 else { 3366 ULONG lastlen; 3367 3368 if (bse->is_metadata) 3369 lastlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY); 3370 else 3371 lastlen = offsetof(btrfs_scrub_error, data.filename) + bse->data.filename_length; 3372 3373 bse->next_entry = lastlen; 3374 bse = (btrfs_scrub_error*)(((uint8_t*)bse) + lastlen); 3375 } 3376 3377 bse->next_entry = 0; 3378 bse->address = err->address; 3379 bse->device = err->device; 3380 bse->recovered = err->recovered; 3381 bse->is_metadata = err->is_metadata; 3382 bse->parity = err->parity; 3383 3384 if (err->is_metadata) { 3385 bse->metadata.root = err->metadata.root; 3386 bse->metadata.level = err->metadata.level; 3387 bse->metadata.firstitem = err->metadata.firstitem; 3388 } else { 3389 bse->data.subvol = err->data.subvol; 3390 bse->data.offset = err->data.offset; 3391 bse->data.filename_length = err->data.filename_length; 3392 RtlCopyMemory(bse->data.filename, err->data.filename, err->data.filename_length); 3393 } 3394 3395 len -= errlen; 3396 le = le->Flink; 3397 } 3398 3399 Status = STATUS_SUCCESS; 3400 3401 end: 3402 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 3403 3404 return Status; 3405 } 3406 3407 NTSTATUS pause_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3408 LARGE_INTEGER time; 3409 3410 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3411 return STATUS_PRIVILEGE_NOT_HELD; 3412 3413 if (!Vcb->scrub.thread) 3414 return STATUS_DEVICE_NOT_READY; 3415 3416 if (Vcb->scrub.paused) 3417 return STATUS_DEVICE_NOT_READY; 3418 3419 Vcb->scrub.paused = true; 3420 KeClearEvent(&Vcb->scrub.event); 3421 3422 KeQuerySystemTime(&time); 3423 Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3424 3425 return STATUS_SUCCESS; 3426 } 3427 3428 NTSTATUS resume_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3429 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3430 return STATUS_PRIVILEGE_NOT_HELD; 3431 3432 if (!Vcb->scrub.thread) 3433 return STATUS_DEVICE_NOT_READY; 3434 3435 if (!Vcb->scrub.paused) 3436 return STATUS_DEVICE_NOT_READY; 3437 3438 Vcb->scrub.paused = false; 3439 KeSetEvent(&Vcb->scrub.event, 0, false); 3440 3441 KeQuerySystemTime(&Vcb->scrub.resume_time); 3442 3443 return STATUS_SUCCESS; 3444 } 3445 3446 NTSTATUS stop_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3447 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3448 return STATUS_PRIVILEGE_NOT_HELD; 3449 3450 if (!Vcb->scrub.thread) 3451 return STATUS_DEVICE_NOT_READY; 3452 3453 Vcb->scrub.paused = false; 3454 Vcb->scrub.stopping = true; 3455 KeSetEvent(&Vcb->scrub.event, 0, false); 3456 3457 return STATUS_SUCCESS; 3458 } 3459