1 /* Copyright (c) Mark Harmstone 2017 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 20 #define SCRUB_UNIT 0x100000 // 1 MB 21 22 struct _scrub_context; 23 24 typedef struct { 25 struct _scrub_context* context; 26 PIRP Irp; 27 UINT64 start; 28 UINT32 length; 29 IO_STATUS_BLOCK iosb; 30 UINT8* buf; 31 BOOL csum_error; 32 UINT32* bad_csums; 33 } scrub_context_stripe; 34 35 typedef struct _scrub_context { 36 KEVENT Event; 37 scrub_context_stripe* stripes; 38 LONG stripes_left; 39 } scrub_context; 40 41 typedef struct { 42 ANSI_STRING name; 43 BOOL orig_subvol; 44 LIST_ENTRY list_entry; 45 } path_part; 46 47 static void log_file_checksum_error(device_extension* Vcb, UINT64 addr, UINT64 devid, UINT64 subvol, UINT64 inode, UINT64 offset) { 48 LIST_ENTRY *le, parts; 49 root* r = NULL; 50 KEY searchkey; 51 traverse_ptr tp; 52 UINT64 dir; 53 BOOL orig_subvol = TRUE, not_in_tree = FALSE; 54 ANSI_STRING fn; 55 scrub_error* err; 56 NTSTATUS Status; 57 ULONG utf16len; 58 59 le = Vcb->roots.Flink; 60 while (le != &Vcb->roots) { 61 root* r2 = CONTAINING_RECORD(le, root, list_entry); 62 63 if (r2->id == subvol) { 64 r = r2; 65 break; 66 } 67 68 le = le->Flink; 69 } 70 71 if (!r) { 72 ERR("could not find subvol %llx\n", subvol); 73 return; 74 } 75 76 InitializeListHead(&parts); 77 78 dir = inode; 79 80 while (TRUE) { 81 if (dir == r->root_item.objid) { 82 if (r == Vcb->root_fileref->fcb->subvol) 83 break; 84 85 searchkey.obj_id = r->id; 86 searchkey.obj_type = TYPE_ROOT_BACKREF; 87 searchkey.offset = 0xffffffffffffffff; 88 89 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL); 90 if (!NT_SUCCESS(Status)) { 91 ERR("find_item returned %08x\n", Status); 92 goto end; 93 } 94 95 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 96 ROOT_REF* rr = (ROOT_REF*)tp.item->data; 97 path_part* pp; 98 99 if (tp.item->size < sizeof(ROOT_REF)) { 100 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF)); 101 goto end; 102 } 103 104 if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) { 105 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 106 tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n); 107 goto end; 108 } 109 110 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 111 if (!pp) { 112 ERR("out of memory\n"); 113 goto end; 114 } 115 116 pp->name.Buffer = rr->name; 117 pp->name.Length = pp->name.MaximumLength = rr->n; 118 pp->orig_subvol = FALSE; 119 120 InsertTailList(&parts, &pp->list_entry); 121 122 r = NULL; 123 124 le = Vcb->roots.Flink; 125 while (le != &Vcb->roots) { 126 root* r2 = CONTAINING_RECORD(le, root, list_entry); 127 128 if (r2->id == tp.item->key.offset) { 129 r = r2; 130 break; 131 } 132 133 le = le->Flink; 134 } 135 136 if (!r) { 137 ERR("could not find subvol %llx\n", tp.item->key.offset); 138 goto end; 139 } 140 141 dir = rr->dir; 142 orig_subvol = FALSE; 143 } else { 144 not_in_tree = TRUE; 145 break; 146 } 147 } else { 148 searchkey.obj_id = dir; 149 searchkey.obj_type = TYPE_INODE_EXTREF; 150 searchkey.offset = 0xffffffffffffffff; 151 152 Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL); 153 if (!NT_SUCCESS(Status)) { 154 ERR("find_item returned %08x\n", Status); 155 goto end; 156 } 157 158 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_REF) { 159 INODE_REF* ir = (INODE_REF*)tp.item->data; 160 path_part* pp; 161 162 if (tp.item->size < sizeof(INODE_REF)) { 163 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF)); 164 goto end; 165 } 166 167 if (tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) { 168 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 169 tp.item->size, offsetof(INODE_REF, name[0]) + ir->n); 170 goto end; 171 } 172 173 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 174 if (!pp) { 175 ERR("out of memory\n"); 176 goto end; 177 } 178 179 pp->name.Buffer = ir->name; 180 pp->name.Length = pp->name.MaximumLength = ir->n; 181 pp->orig_subvol = orig_subvol; 182 183 InsertTailList(&parts, &pp->list_entry); 184 185 if (dir == tp.item->key.offset) 186 break; 187 188 dir = tp.item->key.offset; 189 } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_EXTREF) { 190 INODE_EXTREF* ier = (INODE_EXTREF*)tp.item->data; 191 path_part* pp; 192 193 if (tp.item->size < sizeof(INODE_EXTREF)) { 194 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 195 tp.item->size, sizeof(INODE_EXTREF)); 196 goto end; 197 } 198 199 if (tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) { 200 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 201 tp.item->size, offsetof(INODE_EXTREF, name[0]) + ier->n); 202 goto end; 203 } 204 205 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 206 if (!pp) { 207 ERR("out of memory\n"); 208 goto end; 209 } 210 211 pp->name.Buffer = ier->name; 212 pp->name.Length = pp->name.MaximumLength = ier->n; 213 pp->orig_subvol = orig_subvol; 214 215 InsertTailList(&parts, &pp->list_entry); 216 217 if (dir == ier->dir) 218 break; 219 220 dir = ier->dir; 221 } else { 222 ERR("could not find INODE_REF for inode %llx in subvol %llx\n", dir, r->id); 223 goto end; 224 } 225 } 226 } 227 228 fn.MaximumLength = 0; 229 230 if (not_in_tree) { 231 le = parts.Blink; 232 while (le != &parts) { 233 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 234 LIST_ENTRY* le2 = le->Blink; 235 236 if (pp->orig_subvol) 237 break; 238 239 RemoveTailList(&parts); 240 ExFreePool(pp); 241 242 le = le2; 243 } 244 } 245 246 le = parts.Flink; 247 while (le != &parts) { 248 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 249 250 fn.MaximumLength += pp->name.Length + 1; 251 252 le = le->Flink; 253 } 254 255 fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG); 256 if (!fn.Buffer) { 257 ERR("out of memory\n"); 258 goto end; 259 } 260 261 fn.Length = 0; 262 263 le = parts.Blink; 264 while (le != &parts) { 265 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 266 267 fn.Buffer[fn.Length] = '\\'; 268 fn.Length++; 269 270 RtlCopyMemory(&fn.Buffer[fn.Length], pp->name.Buffer, pp->name.Length); 271 fn.Length += pp->name.Length; 272 273 le = le->Blink; 274 } 275 276 if (not_in_tree) 277 ERR("subvol %llx, %.*s, offset %llx\n", subvol, fn.Length, fn.Buffer, offset); 278 else 279 ERR("%.*s, offset %llx\n", fn.Length, fn.Buffer, offset); 280 281 Status = RtlUTF8ToUnicodeN(NULL, 0, &utf16len, fn.Buffer, fn.Length); 282 if (!NT_SUCCESS(Status)) { 283 ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status); 284 ExFreePool(fn.Buffer); 285 goto end; 286 } 287 288 err = ExAllocatePoolWithTag(PagedPool, offsetof(scrub_error, data.filename[0]) + utf16len, ALLOC_TAG); 289 if (!err) { 290 ERR("out of memory\n"); 291 ExFreePool(fn.Buffer); 292 goto end; 293 } 294 295 err->address = addr; 296 err->device = devid; 297 err->recovered = FALSE; 298 err->is_metadata = FALSE; 299 err->parity = FALSE; 300 301 err->data.subvol = not_in_tree ? subvol : 0; 302 err->data.offset = offset; 303 err->data.filename_length = (UINT16)utf16len; 304 305 Status = RtlUTF8ToUnicodeN(err->data.filename, utf16len, &utf16len, fn.Buffer, fn.Length); 306 if (!NT_SUCCESS(Status)) { 307 ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status); 308 ExFreePool(fn.Buffer); 309 ExFreePool(err); 310 goto end; 311 } 312 313 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE); 314 315 Vcb->scrub.num_errors++; 316 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 317 318 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 319 320 ExFreePool(fn.Buffer); 321 322 end: 323 while (!IsListEmpty(&parts)) { 324 path_part* pp = CONTAINING_RECORD(RemoveHeadList(&parts), path_part, list_entry); 325 326 ExFreePool(pp); 327 } 328 } 329 330 static void log_file_checksum_error_shared(device_extension* Vcb, UINT64 treeaddr, UINT64 addr, UINT64 devid, UINT64 extent) { 331 tree_header* tree; 332 NTSTATUS Status; 333 leaf_node* ln; 334 ULONG i; 335 336 tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); 337 if (!tree) { 338 ERR("out of memory\n"); 339 return; 340 } 341 342 Status = read_data(Vcb, treeaddr, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)tree, NULL, NULL, NULL, 0, FALSE, NormalPagePriority); 343 if (!NT_SUCCESS(Status)) { 344 ERR("read_data returned %08x\n", Status); 345 goto end; 346 } 347 348 if (tree->level != 0) { 349 ERR("tree level was %x, expected 0\n", tree->level); 350 goto end; 351 } 352 353 ln = (leaf_node*)&tree[1]; 354 355 for (i = 0; i < tree->num_items; i++) { 356 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { 357 EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)tree + sizeof(tree_header) + ln[i].offset); 358 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 359 360 if (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0 && ed2->address == addr) 361 log_file_checksum_error(Vcb, addr, devid, tree->tree_id, ln[i].key.obj_id, ln[i].key.offset + addr - extent); 362 } 363 } 364 365 end: 366 ExFreePool(tree); 367 } 368 369 static void log_tree_checksum_error(device_extension* Vcb, UINT64 addr, UINT64 devid, UINT64 root, UINT8 level, KEY* firstitem) { 370 scrub_error* err; 371 372 err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG); 373 if (!err) { 374 ERR("out of memory\n"); 375 return; 376 } 377 378 err->address = addr; 379 err->device = devid; 380 err->recovered = FALSE; 381 err->is_metadata = TRUE; 382 err->parity = FALSE; 383 384 err->metadata.root = root; 385 err->metadata.level = level; 386 387 if (firstitem) { 388 ERR("root %llx, level %u, first item (%llx,%x,%llx)\n", root, level, firstitem->obj_id, 389 firstitem->obj_type, firstitem->offset); 390 391 err->metadata.firstitem = *firstitem; 392 } else { 393 ERR("root %llx, level %u\n", root, level); 394 395 RtlZeroMemory(&err->metadata.firstitem, sizeof(KEY)); 396 } 397 398 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE); 399 400 Vcb->scrub.num_errors++; 401 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 402 403 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 404 } 405 406 static void log_tree_checksum_error_shared(device_extension* Vcb, UINT64 offset, UINT64 address, UINT64 devid) { 407 tree_header* tree; 408 NTSTATUS Status; 409 internal_node* in; 410 ULONG i; 411 412 tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); 413 if (!tree) { 414 ERR("out of memory\n"); 415 return; 416 } 417 418 Status = read_data(Vcb, offset, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)tree, NULL, NULL, NULL, 0, FALSE, NormalPagePriority); 419 if (!NT_SUCCESS(Status)) { 420 ERR("read_data returned %08x\n", Status); 421 goto end; 422 } 423 424 if (tree->level == 0) { 425 ERR("tree level was 0\n"); 426 goto end; 427 } 428 429 in = (internal_node*)&tree[1]; 430 431 for (i = 0; i < tree->num_items; i++) { 432 if (in[i].address == address) { 433 log_tree_checksum_error(Vcb, address, devid, tree->tree_id, tree->level - 1, &in[i].key); 434 break; 435 } 436 } 437 438 end: 439 ExFreePool(tree); 440 } 441 442 static void log_unrecoverable_error(device_extension* Vcb, UINT64 address, UINT64 devid) { 443 KEY searchkey; 444 traverse_ptr tp; 445 NTSTATUS Status; 446 EXTENT_ITEM* ei; 447 EXTENT_ITEM2* ei2 = NULL; 448 UINT8* ptr; 449 ULONG len; 450 UINT64 rc; 451 452 // FIXME - still log even if rest of this function fails 453 454 searchkey.obj_id = address; 455 searchkey.obj_type = TYPE_METADATA_ITEM; 456 searchkey.offset = 0xffffffffffffffff; 457 458 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); 459 if (!NT_SUCCESS(Status)) { 460 ERR("find_item returned %08x\n", Status); 461 return; 462 } 463 464 if ((tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM) || 465 tp.item->key.obj_id >= address + Vcb->superblock.sector_size || 466 (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.obj_id + tp.item->key.offset <= address) || 467 (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->key.obj_id + Vcb->superblock.node_size <= address) 468 ) 469 return; 470 471 if (tp.item->size < sizeof(EXTENT_ITEM)) { 472 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 473 return; 474 } 475 476 ei = (EXTENT_ITEM*)tp.item->data; 477 ptr = (UINT8*)&ei[1]; 478 len = tp.item->size - sizeof(EXTENT_ITEM); 479 480 if (tp.item->key.obj_id == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) { 481 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) { 482 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 483 tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)); 484 return; 485 } 486 487 ei2 = (EXTENT_ITEM2*)ptr; 488 489 ptr += sizeof(EXTENT_ITEM2); 490 len -= sizeof(EXTENT_ITEM2); 491 } 492 493 rc = 0; 494 495 while (len > 0) { 496 UINT8 type = *ptr; 497 498 ptr++; 499 len--; 500 501 if (type == TYPE_TREE_BLOCK_REF) { 502 TREE_BLOCK_REF* tbr; 503 504 if (len < sizeof(TREE_BLOCK_REF)) { 505 ERR("TREE_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(TREE_BLOCK_REF), len); 506 break; 507 } 508 509 tbr = (TREE_BLOCK_REF*)ptr; 510 511 log_tree_checksum_error(Vcb, address, devid, tbr->offset, ei2 ? ei2->level : (UINT8)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL); 512 513 rc++; 514 515 ptr += sizeof(TREE_BLOCK_REF); 516 len -= sizeof(TREE_BLOCK_REF); 517 } else if (type == TYPE_EXTENT_DATA_REF) { 518 EXTENT_DATA_REF* edr; 519 520 if (len < sizeof(EXTENT_DATA_REF)) { 521 ERR("EXTENT_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(EXTENT_DATA_REF), len); 522 break; 523 } 524 525 edr = (EXTENT_DATA_REF*)ptr; 526 527 log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id); 528 529 rc += edr->count; 530 531 ptr += sizeof(EXTENT_DATA_REF); 532 len -= sizeof(EXTENT_DATA_REF); 533 } else if (type == TYPE_SHARED_BLOCK_REF) { 534 SHARED_BLOCK_REF* sbr; 535 536 if (len < sizeof(SHARED_BLOCK_REF)) { 537 ERR("SHARED_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_BLOCK_REF), len); 538 break; 539 } 540 541 sbr = (SHARED_BLOCK_REF*)ptr; 542 543 log_tree_checksum_error_shared(Vcb, sbr->offset, address, devid); 544 545 rc++; 546 547 ptr += sizeof(SHARED_BLOCK_REF); 548 len -= sizeof(SHARED_BLOCK_REF); 549 } else if (type == TYPE_SHARED_DATA_REF) { 550 SHARED_DATA_REF* sdr; 551 552 if (len < sizeof(SHARED_DATA_REF)) { 553 ERR("SHARED_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_DATA_REF), len); 554 break; 555 } 556 557 sdr = (SHARED_DATA_REF*)ptr; 558 559 log_file_checksum_error_shared(Vcb, sdr->offset, address, devid, tp.item->key.obj_id); 560 561 rc += sdr->count; 562 563 ptr += sizeof(SHARED_DATA_REF); 564 len -= sizeof(SHARED_DATA_REF); 565 } else { 566 ERR("unknown extent type %x\n", type); 567 break; 568 } 569 } 570 571 if (rc < ei->refcount) { 572 do { 573 traverse_ptr next_tp; 574 575 if (find_next_item(Vcb, &tp, &next_tp, FALSE, NULL)) 576 tp = next_tp; 577 else 578 break; 579 580 if (tp.item->key.obj_id == address) { 581 if (tp.item->key.obj_type == TYPE_TREE_BLOCK_REF) 582 log_tree_checksum_error(Vcb, address, devid, tp.item->key.offset, ei2 ? ei2->level : (UINT8)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL); 583 else if (tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) { 584 EXTENT_DATA_REF* edr; 585 586 if (tp.item->size < sizeof(EXTENT_DATA_REF)) { 587 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 588 tp.item->size, sizeof(EXTENT_DATA_REF)); 589 break; 590 } 591 592 edr = (EXTENT_DATA_REF*)tp.item->data; 593 594 log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id); 595 } else if (tp.item->key.obj_type == TYPE_SHARED_BLOCK_REF) 596 log_tree_checksum_error_shared(Vcb, tp.item->key.offset, address, devid); 597 else if (tp.item->key.obj_type == TYPE_SHARED_DATA_REF) 598 log_file_checksum_error_shared(Vcb, tp.item->key.offset, address, devid, tp.item->key.obj_id); 599 } else 600 break; 601 } while (TRUE); 602 } 603 } 604 605 static void log_error(device_extension* Vcb, UINT64 addr, UINT64 devid, BOOL metadata, BOOL recoverable, BOOL parity) { 606 if (recoverable) { 607 scrub_error* err; 608 609 if (parity) { 610 ERR("recovering from parity error at %llx on device %llx\n", addr, devid); 611 } else { 612 if (metadata) 613 ERR("recovering from metadata checksum error at %llx on device %llx\n", addr, devid); 614 else 615 ERR("recovering from data checksum error at %llx on device %llx\n", addr, devid); 616 } 617 618 err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG); 619 if (!err) { 620 ERR("out of memory\n"); 621 return; 622 } 623 624 err->address = addr; 625 err->device = devid; 626 err->recovered = TRUE; 627 err->is_metadata = metadata; 628 err->parity = parity; 629 630 if (metadata) 631 RtlZeroMemory(&err->metadata, sizeof(err->metadata)); 632 else 633 RtlZeroMemory(&err->data, sizeof(err->data)); 634 635 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE); 636 637 Vcb->scrub.num_errors++; 638 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 639 640 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 641 } else { 642 if (metadata) 643 ERR("unrecoverable metadata checksum error at %llx\n", addr); 644 else 645 ERR("unrecoverable data checksum error at %llx\n", addr); 646 647 log_unrecoverable_error(Vcb, addr, devid); 648 } 649 } 650 651 _Function_class_(IO_COMPLETION_ROUTINE) 652 #ifdef __REACTOS__ 653 static NTSTATUS NTAPI scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 654 #else 655 static NTSTATUS scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 656 #endif 657 scrub_context_stripe* stripe = conptr; 658 scrub_context* context = (scrub_context*)stripe->context; 659 ULONG left = InterlockedDecrement(&context->stripes_left); 660 661 UNUSED(DeviceObject); 662 663 stripe->iosb = Irp->IoStatus; 664 665 if (left == 0) 666 KeSetEvent(&context->Event, 0, FALSE); 667 668 return STATUS_MORE_PROCESSING_REQUIRED; 669 } 670 671 static NTSTATUS scrub_extent_dup(device_extension* Vcb, chunk* c, UINT64 offset, UINT32* csum, scrub_context* context) { 672 NTSTATUS Status; 673 BOOL csum_error = FALSE; 674 ULONG i; 675 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 676 UINT16 present_devices = 0; 677 678 if (csum) { 679 ULONG good_stripe = 0xffffffff; 680 681 for (i = 0; i < c->chunk_item->num_stripes; i++) { 682 if (c->devices[i]->devobj) { 683 present_devices++; 684 685 // if first stripe is okay, we only need to check that the others are identical to it 686 if (good_stripe != 0xffffffff) { 687 if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf, 688 context->stripes[good_stripe].length) != context->stripes[i].length) { 689 context->stripes[i].csum_error = TRUE; 690 csum_error = TRUE; 691 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 692 } 693 } else { 694 Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, csum); 695 if (Status == STATUS_CRC_ERROR) { 696 context->stripes[i].csum_error = TRUE; 697 csum_error = TRUE; 698 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 699 } else if (!NT_SUCCESS(Status)) { 700 ERR("check_csum returned %08x\n", Status); 701 return Status; 702 } else 703 good_stripe = i; 704 } 705 } 706 } 707 } else { 708 ULONG good_stripe = 0xffffffff; 709 710 for (i = 0; i < c->chunk_item->num_stripes; i++) { 711 ULONG j; 712 713 if (c->devices[i]->devobj) { 714 // if first stripe is okay, we only need to check that the others are identical to it 715 if (good_stripe != 0xffffffff) { 716 if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf, 717 context->stripes[good_stripe].length) != context->stripes[i].length) { 718 context->stripes[i].csum_error = TRUE; 719 csum_error = TRUE; 720 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 721 } 722 } else { 723 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 724 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 725 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 726 727 if (crc32 != *((UINT32*)th->csum) || th->address != offset + UInt32x32To64(j, Vcb->superblock.node_size)) { 728 context->stripes[i].csum_error = TRUE; 729 csum_error = TRUE; 730 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 731 } 732 } 733 734 if (!context->stripes[i].csum_error) 735 good_stripe = i; 736 } 737 } 738 } 739 } 740 741 if (!csum_error) 742 return STATUS_SUCCESS; 743 744 // handle checksum error 745 746 for (i = 0; i < c->chunk_item->num_stripes; i++) { 747 if (context->stripes[i].csum_error) { 748 if (csum) { 749 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); 750 if (!context->stripes[i].bad_csums) { 751 ERR("out of memory\n"); 752 return STATUS_INSUFFICIENT_RESOURCES; 753 } 754 755 Status = calc_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, context->stripes[i].bad_csums); 756 if (!NT_SUCCESS(Status)) { 757 ERR("calc_csum returned %08x\n", Status); 758 return Status; 759 } 760 } else { 761 ULONG j; 762 763 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(UINT32) / Vcb->superblock.node_size, ALLOC_TAG); 764 if (!context->stripes[i].bad_csums) { 765 ERR("out of memory\n"); 766 return STATUS_INSUFFICIENT_RESOURCES; 767 } 768 769 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 770 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 771 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 772 773 context->stripes[i].bad_csums[j] = crc32; 774 } 775 } 776 } 777 } 778 779 if (present_devices > 1) { 780 ULONG good_stripe = 0xffffffff; 781 782 for (i = 0; i < c->chunk_item->num_stripes; i++) { 783 if (c->devices[i]->devobj && !context->stripes[i].csum_error) { 784 good_stripe = i; 785 break; 786 } 787 } 788 789 if (good_stripe != 0xffffffff) { 790 // log 791 792 for (i = 0; i < c->chunk_item->num_stripes; i++) { 793 if (context->stripes[i].csum_error) { 794 ULONG j; 795 796 if (csum) { 797 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { 798 if (context->stripes[i].bad_csums[j] != csum[j]) { 799 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); 800 801 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, TRUE, FALSE); 802 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 803 } 804 } 805 } else { 806 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 807 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 808 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 809 810 if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) { 811 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, TRUE, FALSE); 812 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 813 } 814 } 815 } 816 } 817 } 818 819 // write good data over bad 820 821 for (i = 0; i < c->chunk_item->num_stripes; i++) { 822 if (context->stripes[i].csum_error && !c->devices[i]->readonly) { 823 Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + offset - c->offset, 824 context->stripes[good_stripe].buf, context->stripes[i].length); 825 826 if (!NT_SUCCESS(Status)) { 827 ERR("write_data_phys returned %08x\n", Status); 828 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS); 829 return Status; 830 } 831 } 832 } 833 834 return STATUS_SUCCESS; 835 } 836 837 // if csum errors on all stripes, check sector by sector 838 839 for (i = 0; i < c->chunk_item->num_stripes; i++) { 840 ULONG j; 841 842 if (c->devices[i]->devobj) { 843 if (csum) { 844 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { 845 if (context->stripes[i].bad_csums[j] != csum[j]) { 846 ULONG k; 847 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); 848 BOOL recovered = FALSE; 849 850 for (k = 0; k < c->chunk_item->num_stripes; k++) { 851 if (i != k && c->devices[k]->devobj && context->stripes[k].bad_csums[j] == csum[j]) { 852 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, TRUE, FALSE); 853 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 854 855 RtlCopyMemory(context->stripes[i].buf + (j * Vcb->superblock.sector_size), 856 context->stripes[k].buf + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 857 858 recovered = TRUE; 859 break; 860 } 861 } 862 863 if (!recovered) { 864 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, FALSE, FALSE); 865 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 866 } 867 } 868 } 869 } else { 870 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 871 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 872 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 873 874 if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) { 875 ULONG k; 876 BOOL recovered = FALSE; 877 878 for (k = 0; k < c->chunk_item->num_stripes; k++) { 879 if (i != k && c->devices[k]->devobj) { 880 tree_header* th2 = (tree_header*)&context->stripes[k].buf[j * Vcb->superblock.node_size]; 881 882 if (context->stripes[k].bad_csums[j] == *((UINT32*)th2->csum) && th2->address == addr) { 883 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, TRUE, FALSE); 884 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 885 886 RtlCopyMemory(th, th2, Vcb->superblock.node_size); 887 888 recovered = TRUE; 889 break; 890 } 891 } 892 } 893 894 if (!recovered) { 895 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, FALSE, FALSE); 896 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 897 } 898 } 899 } 900 } 901 } 902 } 903 904 // write good data over bad 905 906 for (i = 0; i < c->chunk_item->num_stripes; i++) { 907 if (c->devices[i]->devobj && !c->devices[i]->readonly) { 908 Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + offset - c->offset, 909 context->stripes[i].buf, context->stripes[i].length); 910 if (!NT_SUCCESS(Status)) { 911 ERR("write_data_phys returned %08x\n", Status); 912 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 913 return Status; 914 } 915 } 916 } 917 918 return STATUS_SUCCESS; 919 } 920 921 for (i = 0; i < c->chunk_item->num_stripes; i++) { 922 if (c->devices[i]->devobj) { 923 ULONG j; 924 925 if (csum) { 926 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { 927 if (context->stripes[i].bad_csums[j] != csum[j]) { 928 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); 929 930 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, FALSE, FALSE); 931 } 932 } 933 } else { 934 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 935 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 936 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 937 938 if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) 939 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, FALSE, FALSE); 940 } 941 } 942 } 943 } 944 945 return STATUS_SUCCESS; 946 } 947 948 static NTSTATUS scrub_extent_raid0(device_extension* Vcb, chunk* c, UINT64 offset, UINT32 length, UINT16 startoffstripe, UINT32* csum, scrub_context* context) { 949 ULONG j; 950 UINT16 stripe; 951 UINT32 pos, *stripeoff; 952 953 pos = 0; 954 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * c->chunk_item->num_stripes, ALLOC_TAG); 955 if (!stripeoff) { 956 ERR("out of memory\n"); 957 return STATUS_INSUFFICIENT_RESOURCES; 958 } 959 960 RtlZeroMemory(stripeoff, sizeof(UINT32) * c->chunk_item->num_stripes); 961 962 stripe = startoffstripe; 963 while (pos < length) { 964 UINT32 readlen; 965 966 if (pos == 0) 967 readlen = (UINT32)min(context->stripes[stripe].length, c->chunk_item->stripe_length - (context->stripes[stripe].start % c->chunk_item->stripe_length)); 968 else 969 readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length); 970 971 if (csum) { 972 for (j = 0; j < readlen; j += Vcb->superblock.sector_size) { 973 UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + stripeoff[stripe], Vcb->superblock.sector_size); 974 975 if (crc32 != csum[pos / Vcb->superblock.sector_size]) { 976 UINT64 addr = offset + pos; 977 978 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE); 979 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 980 } 981 982 pos += Vcb->superblock.sector_size; 983 stripeoff[stripe] += Vcb->superblock.sector_size; 984 } 985 } else { 986 for (j = 0; j < readlen; j += Vcb->superblock.node_size) { 987 tree_header* th = (tree_header*)(context->stripes[stripe].buf + stripeoff[stripe]); 988 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 989 UINT64 addr = offset + pos; 990 991 if (crc32 != *((UINT32*)th->csum) || th->address != addr) { 992 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE); 993 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 994 } 995 996 pos += Vcb->superblock.node_size; 997 stripeoff[stripe] += Vcb->superblock.node_size; 998 } 999 } 1000 1001 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1002 } 1003 1004 ExFreePool(stripeoff); 1005 1006 return STATUS_SUCCESS; 1007 } 1008 1009 static NTSTATUS scrub_extent_raid10(device_extension* Vcb, chunk* c, UINT64 offset, UINT32 length, UINT16 startoffstripe, UINT32* csum, scrub_context* context) { 1010 ULONG j; 1011 UINT16 stripe, sub_stripes = max(c->chunk_item->sub_stripes, 1); 1012 UINT32 pos, *stripeoff; 1013 BOOL csum_error = FALSE; 1014 NTSTATUS Status; 1015 1016 pos = 0; 1017 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * c->chunk_item->num_stripes / sub_stripes, ALLOC_TAG); 1018 if (!stripeoff) { 1019 ERR("out of memory\n"); 1020 return STATUS_INSUFFICIENT_RESOURCES; 1021 } 1022 1023 RtlZeroMemory(stripeoff, sizeof(UINT32) * c->chunk_item->num_stripes / sub_stripes); 1024 1025 stripe = startoffstripe; 1026 while (pos < length) { 1027 UINT32 readlen; 1028 1029 if (pos == 0) 1030 readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length, 1031 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1032 else 1033 readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length); 1034 1035 if (csum) { 1036 ULONG good_stripe = 0xffffffff; 1037 UINT16 k; 1038 1039 for (k = 0; k < sub_stripes; k++) { 1040 if (c->devices[(stripe * sub_stripes) + k]->devobj) { 1041 // if first stripe is okay, we only need to check that the others are identical to it 1042 if (good_stripe != 0xffffffff) { 1043 if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe], 1044 context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe], 1045 readlen) != readlen) { 1046 context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE; 1047 csum_error = TRUE; 1048 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1049 } 1050 } else { 1051 for (j = 0; j < readlen; j += Vcb->superblock.sector_size) { 1052 UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j, Vcb->superblock.sector_size); 1053 1054 if (crc32 != csum[(pos + j) / Vcb->superblock.sector_size]) { 1055 csum_error = TRUE; 1056 context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE; 1057 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1058 break; 1059 } 1060 } 1061 1062 if (!context->stripes[(stripe * sub_stripes) + k].csum_error) 1063 good_stripe = k; 1064 } 1065 } 1066 } 1067 1068 pos += readlen; 1069 stripeoff[stripe] += readlen; 1070 } else { 1071 ULONG good_stripe = 0xffffffff; 1072 UINT16 k; 1073 1074 for (k = 0; k < sub_stripes; k++) { 1075 if (c->devices[(stripe * sub_stripes) + k]->devobj) { 1076 // if first stripe is okay, we only need to check that the others are identical to it 1077 if (good_stripe != 0xffffffff) { 1078 if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe], 1079 context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe], 1080 readlen) != readlen) { 1081 context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE; 1082 csum_error = TRUE; 1083 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1084 } 1085 } else { 1086 for (j = 0; j < readlen; j += Vcb->superblock.node_size) { 1087 tree_header* th = (tree_header*)(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j); 1088 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1089 UINT64 addr = offset + pos + j; 1090 1091 if (crc32 != *((UINT32*)th->csum) || th->address != addr) { 1092 csum_error = TRUE; 1093 context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE; 1094 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1095 break; 1096 } 1097 } 1098 1099 if (!context->stripes[(stripe * sub_stripes) + k].csum_error) 1100 good_stripe = k; 1101 } 1102 } 1103 } 1104 1105 pos += readlen; 1106 stripeoff[stripe] += readlen; 1107 } 1108 1109 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1110 } 1111 1112 if (!csum_error) { 1113 Status = STATUS_SUCCESS; 1114 goto end; 1115 } 1116 1117 for (j = 0; j < c->chunk_item->num_stripes; j += sub_stripes) { 1118 ULONG goodstripe = 0xffffffff; 1119 UINT16 k; 1120 BOOL hasbadstripe = FALSE; 1121 1122 if (context->stripes[j].length == 0) 1123 continue; 1124 1125 for (k = 0; k < sub_stripes; k++) { 1126 if (c->devices[j + k]->devobj) { 1127 if (!context->stripes[j + k].csum_error) 1128 goodstripe = k; 1129 else 1130 hasbadstripe = TRUE; 1131 } 1132 } 1133 1134 if (hasbadstripe) { 1135 if (goodstripe != 0xffffffff) { 1136 for (k = 0; k < sub_stripes; k++) { 1137 if (c->devices[j + k]->devobj && context->stripes[j + k].csum_error) { 1138 UINT32 so = 0; 1139 BOOL recovered = FALSE; 1140 1141 pos = 0; 1142 1143 stripe = startoffstripe; 1144 while (pos < length) { 1145 UINT32 readlen; 1146 1147 if (pos == 0) 1148 readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length, 1149 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1150 else 1151 readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length); 1152 1153 if (stripe == j / sub_stripes) { 1154 if (csum) { 1155 ULONG l; 1156 1157 for (l = 0; l < readlen; l += Vcb->superblock.sector_size) { 1158 if (RtlCompareMemory(context->stripes[j + k].buf + so, 1159 context->stripes[j + goodstripe].buf + so, 1160 Vcb->superblock.sector_size) != Vcb->superblock.sector_size) { 1161 UINT64 addr = offset + pos; 1162 1163 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, TRUE, FALSE); 1164 1165 recovered = TRUE; 1166 } 1167 1168 pos += Vcb->superblock.sector_size; 1169 so += Vcb->superblock.sector_size; 1170 } 1171 } else { 1172 ULONG l; 1173 1174 for (l = 0; l < readlen; l += Vcb->superblock.node_size) { 1175 if (RtlCompareMemory(context->stripes[j + k].buf + so, 1176 context->stripes[j + goodstripe].buf + so, 1177 Vcb->superblock.node_size) != Vcb->superblock.node_size) { 1178 UINT64 addr = offset + pos; 1179 1180 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, TRUE, FALSE); 1181 1182 recovered = TRUE; 1183 } 1184 1185 pos += Vcb->superblock.node_size; 1186 so += Vcb->superblock.node_size; 1187 } 1188 } 1189 } else 1190 pos += readlen; 1191 1192 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1193 } 1194 1195 if (recovered) { 1196 // write good data over bad 1197 1198 if (!c->devices[j + k]->readonly) { 1199 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1200 1201 Status = write_data_phys(c->devices[j + k]->devobj, cis[j + k].offset + offset - c->offset, 1202 context->stripes[j + goodstripe].buf, context->stripes[j + goodstripe].length); 1203 1204 if (!NT_SUCCESS(Status)) { 1205 ERR("write_data_phys returned %08x\n", Status); 1206 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS); 1207 goto end; 1208 } 1209 } 1210 } 1211 } 1212 } 1213 } else { 1214 UINT32 so = 0; 1215 BOOL recovered = FALSE; 1216 1217 if (csum) { 1218 for (k = 0; k < sub_stripes; k++) { 1219 if (c->devices[j + k]->devobj) { 1220 context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG); 1221 if (!context->stripes[j + k].bad_csums) { 1222 ERR("out of memory\n"); 1223 Status = STATUS_INSUFFICIENT_RESOURCES; 1224 goto end; 1225 } 1226 1227 Status = calc_csum(Vcb, context->stripes[j + k].buf, context->stripes[j + k].length / Vcb->superblock.sector_size, context->stripes[j + k].bad_csums); 1228 if (!NT_SUCCESS(Status)) { 1229 ERR("calc_csum returned %08x\n", Status); 1230 goto end; 1231 } 1232 } 1233 } 1234 } else { 1235 for (k = 0; k < sub_stripes; k++) { 1236 if (c->devices[j + k]->devobj) { 1237 ULONG l; 1238 1239 context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(UINT32) / Vcb->superblock.node_size, ALLOC_TAG); 1240 if (!context->stripes[j + k].bad_csums) { 1241 ERR("out of memory\n"); 1242 Status = STATUS_INSUFFICIENT_RESOURCES; 1243 goto end; 1244 } 1245 1246 for (l = 0; l < context->stripes[j + k].length / Vcb->superblock.node_size; l++) { 1247 tree_header* th = (tree_header*)&context->stripes[j + k].buf[l * Vcb->superblock.node_size]; 1248 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1249 1250 context->stripes[j + k].bad_csums[l] = crc32; 1251 } 1252 } 1253 } 1254 } 1255 1256 pos = 0; 1257 1258 stripe = startoffstripe; 1259 while (pos < length) { 1260 UINT32 readlen; 1261 1262 if (pos == 0) 1263 readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length, 1264 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1265 else 1266 readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length); 1267 1268 if (stripe == j / sub_stripes) { 1269 ULONG l; 1270 1271 if (csum) { 1272 for (l = 0; l < readlen; l += Vcb->superblock.sector_size) { 1273 UINT32 crc32 = csum[pos / Vcb->superblock.sector_size]; 1274 BOOL has_error = FALSE; 1275 1276 goodstripe = 0xffffffff; 1277 for (k = 0; k < sub_stripes; k++) { 1278 if (c->devices[j + k]->devobj) { 1279 if (context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32) 1280 has_error = TRUE; 1281 else 1282 goodstripe = k; 1283 } 1284 } 1285 1286 if (has_error) { 1287 if (goodstripe != 0xffffffff) { 1288 for (k = 0; k < sub_stripes; k++) { 1289 if (c->devices[j + k]->devobj && context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32) { 1290 UINT64 addr = offset + pos; 1291 1292 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, TRUE, FALSE); 1293 1294 recovered = TRUE; 1295 1296 RtlCopyMemory(context->stripes[j + k].buf + so, context->stripes[j + goodstripe].buf + so, 1297 Vcb->superblock.sector_size); 1298 } 1299 } 1300 } else { 1301 UINT64 addr = offset + pos; 1302 1303 for (k = 0; k < sub_stripes; k++) { 1304 if (c->devices[j + j]->devobj) { 1305 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, FALSE, FALSE); 1306 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1307 } 1308 } 1309 } 1310 } 1311 1312 pos += Vcb->superblock.sector_size; 1313 so += Vcb->superblock.sector_size; 1314 } 1315 } else { 1316 for (l = 0; l < readlen; l += Vcb->superblock.node_size) { 1317 for (k = 0; k < sub_stripes; k++) { 1318 if (c->devices[j + k]->devobj) { 1319 tree_header* th = (tree_header*)&context->stripes[j + k].buf[so]; 1320 UINT64 addr = offset + pos; 1321 1322 if (context->stripes[j + k].bad_csums[so / Vcb->superblock.node_size] != *((UINT32*)th->csum) || th->address != addr) { 1323 ULONG m; 1324 1325 recovered = FALSE; 1326 1327 for (m = 0; m < sub_stripes; m++) { 1328 if (m != k) { 1329 tree_header* th2 = (tree_header*)&context->stripes[j + m].buf[so]; 1330 1331 if (context->stripes[j + m].bad_csums[so / Vcb->superblock.node_size] == *((UINT32*)th2->csum) && th2->address == addr) { 1332 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, TRUE, FALSE); 1333 1334 RtlCopyMemory(th, th2, Vcb->superblock.node_size); 1335 1336 recovered = TRUE; 1337 break; 1338 } else 1339 log_device_error(Vcb, c->devices[j + m], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1340 } 1341 } 1342 1343 if (!recovered) 1344 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, FALSE, FALSE); 1345 } 1346 } 1347 } 1348 1349 pos += Vcb->superblock.node_size; 1350 so += Vcb->superblock.node_size; 1351 } 1352 } 1353 } else 1354 pos += readlen; 1355 1356 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1357 } 1358 1359 if (recovered) { 1360 // write good data over bad 1361 1362 for (k = 0; k < sub_stripes; k++) { 1363 if (c->devices[j + k]->devobj && !c->devices[j + k]->readonly) { 1364 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1365 1366 Status = write_data_phys(c->devices[j + k]->devobj, cis[j + k].offset + offset - c->offset, 1367 context->stripes[j + k].buf, context->stripes[j + k].length); 1368 1369 if (!NT_SUCCESS(Status)) { 1370 ERR("write_data_phys returned %08x\n", Status); 1371 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS); 1372 goto end; 1373 } 1374 } 1375 } 1376 } 1377 } 1378 } 1379 } 1380 1381 Status = STATUS_SUCCESS; 1382 1383 end: 1384 ExFreePool(stripeoff); 1385 1386 return Status; 1387 } 1388 1389 static NTSTATUS scrub_extent(device_extension* Vcb, chunk* c, ULONG type, UINT64 offset, UINT32 size, UINT32* csum) { 1390 ULONG i; 1391 scrub_context context; 1392 CHUNK_ITEM_STRIPE* cis; 1393 NTSTATUS Status; 1394 UINT16 startoffstripe, num_missing, allowed_missing; 1395 1396 TRACE("(%p, %p, %llx, %llx, %p)\n", Vcb, c, offset, size, csum); 1397 1398 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes, ALLOC_TAG); 1399 if (!context.stripes) { 1400 ERR("out of memory\n"); 1401 Status = STATUS_INSUFFICIENT_RESOURCES; 1402 goto end; 1403 } 1404 1405 RtlZeroMemory(context.stripes, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes); 1406 1407 context.stripes_left = 0; 1408 1409 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1410 1411 if (type == BLOCK_FLAG_RAID0) { 1412 UINT64 startoff, endoff; 1413 UINT16 endoffstripe; 1414 1415 get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe); 1416 get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe); 1417 1418 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1419 if (startoffstripe > i) 1420 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 1421 else if (startoffstripe == i) 1422 context.stripes[i].start = startoff; 1423 else 1424 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); 1425 1426 if (endoffstripe > i) 1427 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start); 1428 else if (endoffstripe == i) 1429 context.stripes[i].length = (UINT32)(endoff + 1 - context.stripes[i].start); 1430 else 1431 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start); 1432 } 1433 1434 allowed_missing = 0; 1435 } else if (type == BLOCK_FLAG_RAID10) { 1436 UINT64 startoff, endoff; 1437 UINT16 endoffstripe, j, sub_stripes = max(c->chunk_item->sub_stripes, 1); 1438 1439 get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe); 1440 get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe); 1441 1442 if ((c->chunk_item->num_stripes % sub_stripes) != 0) { 1443 ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", c->offset, c->chunk_item->num_stripes, sub_stripes); 1444 Status = STATUS_INTERNAL_ERROR; 1445 goto end; 1446 } 1447 1448 startoffstripe *= sub_stripes; 1449 endoffstripe *= sub_stripes; 1450 1451 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) { 1452 if (startoffstripe > i) 1453 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 1454 else if (startoffstripe == i) 1455 context.stripes[i].start = startoff; 1456 else 1457 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); 1458 1459 if (endoffstripe > i) 1460 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start); 1461 else if (endoffstripe == i) 1462 context.stripes[i].length = (UINT32)(endoff + 1 - context.stripes[i].start); 1463 else 1464 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start); 1465 1466 for (j = 1; j < sub_stripes; j++) { 1467 context.stripes[i+j].start = context.stripes[i].start; 1468 context.stripes[i+j].length = context.stripes[i].length; 1469 } 1470 } 1471 1472 startoffstripe /= sub_stripes; 1473 allowed_missing = 1; 1474 } else 1475 allowed_missing = c->chunk_item->num_stripes - 1; 1476 1477 num_missing = 0; 1478 1479 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1480 PIO_STACK_LOCATION IrpSp; 1481 1482 context.stripes[i].context = (struct _scrub_context*)&context; 1483 1484 if (type == BLOCK_FLAG_DUPLICATE) { 1485 context.stripes[i].start = offset - c->offset; 1486 context.stripes[i].length = size; 1487 } else if (type != BLOCK_FLAG_RAID0 && type != BLOCK_FLAG_RAID10) { 1488 ERR("unexpected chunk type %x\n", type); 1489 Status = STATUS_INTERNAL_ERROR; 1490 goto end; 1491 } 1492 1493 if (!c->devices[i]->devobj) { 1494 num_missing++; 1495 1496 if (num_missing > allowed_missing) { 1497 ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing, allowed_missing); 1498 Status = STATUS_INTERNAL_ERROR; 1499 goto end; 1500 } 1501 } else if (context.stripes[i].length > 0) { 1502 context.stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG); 1503 1504 if (!context.stripes[i].buf) { 1505 ERR("out of memory\n"); 1506 Status = STATUS_INSUFFICIENT_RESOURCES; 1507 goto end; 1508 } 1509 1510 context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, FALSE); 1511 1512 if (!context.stripes[i].Irp) { 1513 ERR("IoAllocateIrp failed\n"); 1514 Status = STATUS_INSUFFICIENT_RESOURCES; 1515 goto end; 1516 } 1517 1518 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 1519 IrpSp->MajorFunction = IRP_MJ_READ; 1520 1521 if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) { 1522 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG); 1523 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 1524 ERR("out of memory\n"); 1525 Status = STATUS_INSUFFICIENT_RESOURCES; 1526 goto end; 1527 } 1528 1529 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 1530 1531 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 1532 } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) { 1533 context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, context.stripes[i].length, FALSE, FALSE, NULL); 1534 if (!context.stripes[i].Irp->MdlAddress) { 1535 ERR("IoAllocateMdl failed\n"); 1536 Status = STATUS_INSUFFICIENT_RESOURCES; 1537 goto end; 1538 } 1539 1540 Status = STATUS_SUCCESS; 1541 1542 _SEH2_TRY { 1543 MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); 1544 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1545 Status = _SEH2_GetExceptionCode(); 1546 } _SEH2_END; 1547 1548 if (!NT_SUCCESS(Status)) { 1549 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1550 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 1551 context.stripes[i].Irp->MdlAddress = NULL; 1552 goto end; 1553 } 1554 } else 1555 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 1556 1557 IrpSp->Parameters.Read.Length = context.stripes[i].length; 1558 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].start + cis[i].offset; 1559 1560 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 1561 1562 IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion, &context.stripes[i], TRUE, TRUE, TRUE); 1563 1564 context.stripes_left++; 1565 1566 Vcb->scrub.data_scrubbed += context.stripes[i].length; 1567 } 1568 } 1569 1570 if (context.stripes_left == 0) { 1571 ERR("error - not reading any stripes\n"); 1572 Status = STATUS_INTERNAL_ERROR; 1573 goto end; 1574 } 1575 1576 KeInitializeEvent(&context.Event, NotificationEvent, FALSE); 1577 1578 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1579 if (c->devices[i]->devobj && context.stripes[i].length > 0) 1580 IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp); 1581 } 1582 1583 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); 1584 1585 // return an error if any of the stripes returned an error 1586 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1587 if (!NT_SUCCESS(context.stripes[i].iosb.Status)) { 1588 Status = context.stripes[i].iosb.Status; 1589 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS); 1590 goto end; 1591 } 1592 } 1593 1594 if (type == BLOCK_FLAG_DUPLICATE) { 1595 Status = scrub_extent_dup(Vcb, c, offset, csum, &context); 1596 if (!NT_SUCCESS(Status)) { 1597 ERR("scrub_extent_dup returned %08x\n", Status); 1598 goto end; 1599 } 1600 } else if (type == BLOCK_FLAG_RAID0) { 1601 Status = scrub_extent_raid0(Vcb, c, offset, size, startoffstripe, csum, &context); 1602 if (!NT_SUCCESS(Status)) { 1603 ERR("scrub_extent_raid0 returned %08x\n", Status); 1604 goto end; 1605 } 1606 } else if (type == BLOCK_FLAG_RAID10) { 1607 Status = scrub_extent_raid10(Vcb, c, offset, size, startoffstripe, csum, &context); 1608 if (!NT_SUCCESS(Status)) { 1609 ERR("scrub_extent_raid10 returned %08x\n", Status); 1610 goto end; 1611 } 1612 } 1613 1614 end: 1615 if (context.stripes) { 1616 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1617 if (context.stripes[i].Irp) { 1618 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) { 1619 MmUnlockPages(context.stripes[i].Irp->MdlAddress); 1620 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 1621 } 1622 IoFreeIrp(context.stripes[i].Irp); 1623 } 1624 1625 if (context.stripes[i].buf) 1626 ExFreePool(context.stripes[i].buf); 1627 1628 if (context.stripes[i].bad_csums) 1629 ExFreePool(context.stripes[i].bad_csums); 1630 } 1631 1632 ExFreePool(context.stripes); 1633 } 1634 1635 return Status; 1636 } 1637 1638 static NTSTATUS scrub_data_extent(device_extension* Vcb, chunk* c, UINT64 offset, ULONG type, UINT32* csum, RTL_BITMAP* bmp) { 1639 NTSTATUS Status; 1640 ULONG runlength, index; 1641 1642 runlength = RtlFindFirstRunClear(bmp, &index); 1643 1644 while (runlength != 0) { 1645 do { 1646 ULONG rl; 1647 1648 if (runlength * Vcb->superblock.sector_size > SCRUB_UNIT) 1649 rl = SCRUB_UNIT / Vcb->superblock.sector_size; 1650 else 1651 rl = runlength; 1652 1653 Status = scrub_extent(Vcb, c, type, offset + UInt32x32To64(index, Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index]); 1654 if (!NT_SUCCESS(Status)) { 1655 ERR("scrub_data_extent_dup returned %08x\n", Status); 1656 return Status; 1657 } 1658 1659 runlength -= rl; 1660 index += rl; 1661 } while (runlength > 0); 1662 1663 runlength = RtlFindNextForwardRunClear(bmp, index, &index); 1664 } 1665 1666 return STATUS_SUCCESS; 1667 } 1668 1669 typedef struct { 1670 UINT8* buf; 1671 PIRP Irp; 1672 void* context; 1673 IO_STATUS_BLOCK iosb; 1674 UINT64 offset; 1675 BOOL rewrite, missing; 1676 RTL_BITMAP error; 1677 ULONG* errorarr; 1678 } scrub_context_raid56_stripe; 1679 1680 typedef struct { 1681 scrub_context_raid56_stripe* stripes; 1682 LONG stripes_left; 1683 KEVENT Event; 1684 RTL_BITMAP alloc; 1685 RTL_BITMAP has_csum; 1686 RTL_BITMAP is_tree; 1687 UINT32* csum; 1688 UINT8* parity_scratch; 1689 UINT8* parity_scratch2; 1690 } scrub_context_raid56; 1691 1692 _Function_class_(IO_COMPLETION_ROUTINE) 1693 #ifdef __REACTOS__ 1694 static NTSTATUS NTAPI scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 1695 #else 1696 static NTSTATUS scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 1697 #endif 1698 scrub_context_raid56_stripe* stripe = conptr; 1699 scrub_context_raid56* context = (scrub_context_raid56*)stripe->context; 1700 LONG left = InterlockedDecrement(&context->stripes_left); 1701 1702 UNUSED(DeviceObject); 1703 1704 stripe->iosb = Irp->IoStatus; 1705 1706 if (left == 0) 1707 KeSetEvent(&context->Event, 0, FALSE); 1708 1709 return STATUS_MORE_PROCESSING_REQUIRED; 1710 } 1711 1712 static void scrub_raid5_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, UINT64 stripe_start, UINT64 bit_start, 1713 UINT64 num, UINT16 missing_devices) { 1714 ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off; 1715 UINT16 stripe, parity = (bit_start + num + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; 1716 UINT64 stripeoff; 1717 1718 stripe = (parity + 1) % c->chunk_item->num_stripes; 1719 off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1); 1720 stripeoff = num * sectors_per_stripe; 1721 1722 if (missing_devices == 0) 1723 RtlCopyMemory(context->parity_scratch, &context->stripes[parity].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1724 1725 while (stripe != parity) { 1726 RtlClearAllBits(&context->stripes[stripe].error); 1727 1728 for (i = 0; i < sectors_per_stripe; i++) { 1729 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 1730 if (RtlCheckBit(&context->is_tree, off)) { 1731 tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size]; 1732 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1733 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1734 1735 if (crc32 != *((UINT32*)th->csum) || th->address != addr) { 1736 RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size); 1737 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1738 1739 if (missing_devices > 0) 1740 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE); 1741 } 1742 1743 off += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1744 stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1745 i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1; 1746 1747 continue; 1748 } else if (RtlCheckBit(&context->has_csum, off)) { 1749 UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1750 1751 if (crc32 != context->csum[off]) { 1752 RtlSetBit(&context->stripes[stripe].error, i); 1753 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1754 1755 if (missing_devices > 0) { 1756 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1757 1758 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE); 1759 } 1760 } 1761 } 1762 } 1763 1764 off++; 1765 stripeoff++; 1766 } 1767 1768 if (missing_devices == 0) 1769 do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1770 1771 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1772 stripeoff = num * sectors_per_stripe; 1773 } 1774 1775 // check parity 1776 1777 if (missing_devices == 0) { 1778 RtlClearAllBits(&context->stripes[parity].error); 1779 1780 for (i = 0; i < sectors_per_stripe; i++) { 1781 ULONG o, j; 1782 1783 o = i * Vcb->superblock.sector_size; 1784 for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE 1785 if (context->parity_scratch[o] != 0) { 1786 RtlSetBit(&context->stripes[parity].error, i); 1787 break; 1788 } 1789 o++; 1790 } 1791 } 1792 } 1793 1794 // log and fix errors 1795 1796 if (missing_devices > 0) 1797 return; 1798 1799 for (i = 0; i < sectors_per_stripe; i++) { 1800 ULONG num_errors = 0, bad_off; 1801 UINT64 bad_stripe; 1802 BOOL alloc = FALSE; 1803 1804 stripe = (parity + 1) % c->chunk_item->num_stripes; 1805 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1806 1807 while (stripe != parity) { 1808 if (RtlCheckBit(&context->alloc, off)) { 1809 alloc = TRUE; 1810 1811 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 1812 bad_stripe = stripe; 1813 bad_off = off; 1814 num_errors++; 1815 } 1816 } 1817 1818 off += sectors_per_stripe; 1819 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1820 } 1821 1822 if (!alloc) 1823 continue; 1824 1825 if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity].error, i)) // everything fine 1826 continue; 1827 1828 if (num_errors == 0 && RtlCheckBit(&context->stripes[parity].error, i)) { // parity error 1829 UINT64 addr; 1830 1831 do_xor(&context->stripes[parity].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1832 &context->parity_scratch[i * Vcb->superblock.sector_size], 1833 Vcb->superblock.sector_size); 1834 1835 bad_off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1836 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size); 1837 1838 context->stripes[parity].rewrite = TRUE; 1839 1840 log_error(Vcb, addr, c->devices[parity]->devitem.dev_id, FALSE, TRUE, TRUE); 1841 log_device_error(Vcb, c->devices[parity], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1842 } else if (num_errors == 1) { 1843 UINT32 crc32; 1844 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size); 1845 1846 if (RtlCheckBit(&context->is_tree, bad_off)) { 1847 tree_header* th; 1848 1849 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 1850 &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1851 Vcb->superblock.node_size); 1852 1853 th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; 1854 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1855 1856 if (crc32 == *((UINT32*)th->csum) && th->address == addr) { 1857 RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1858 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 1859 1860 context->stripes[bad_stripe].rewrite = TRUE; 1861 1862 RtlClearBits(&context->stripes[bad_stripe].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 1863 1864 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, TRUE, TRUE, FALSE); 1865 } else 1866 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, TRUE, FALSE, FALSE); 1867 } else { 1868 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 1869 &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1870 Vcb->superblock.sector_size); 1871 1872 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 1873 1874 if (crc32 == context->csum[bad_off]) { 1875 RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1876 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 1877 1878 context->stripes[bad_stripe].rewrite = TRUE; 1879 1880 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, FALSE, TRUE, FALSE); 1881 } else 1882 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, FALSE, FALSE, FALSE); 1883 } 1884 } else { 1885 stripe = (parity + 1) % c->chunk_item->num_stripes; 1886 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1887 1888 while (stripe != parity) { 1889 if (RtlCheckBit(&context->alloc, off)) { 1890 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 1891 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1892 1893 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), FALSE, FALSE); 1894 } 1895 } 1896 1897 off += sectors_per_stripe; 1898 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1899 } 1900 } 1901 } 1902 } 1903 1904 static void scrub_raid6_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, UINT64 stripe_start, UINT64 bit_start, 1905 UINT64 num, UINT16 missing_devices) { 1906 ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off; 1907 UINT16 stripe, parity1 = (bit_start + num + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes; 1908 UINT16 parity2 = (parity1 + 1) % c->chunk_item->num_stripes; 1909 UINT64 stripeoff; 1910 1911 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 1912 off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2); 1913 stripeoff = num * sectors_per_stripe; 1914 1915 if (c->devices[parity1]->devobj) 1916 RtlCopyMemory(context->parity_scratch, &context->stripes[parity1].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1917 1918 if (c->devices[parity2]->devobj) 1919 RtlZeroMemory(context->parity_scratch2, (ULONG)c->chunk_item->stripe_length); 1920 1921 while (stripe != parity1) { 1922 RtlClearAllBits(&context->stripes[stripe].error); 1923 1924 for (i = 0; i < sectors_per_stripe; i++) { 1925 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 1926 if (RtlCheckBit(&context->is_tree, off)) { 1927 tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size]; 1928 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1929 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1930 1931 if (crc32 != *((UINT32*)th->csum) || th->address != addr) { 1932 RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size); 1933 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1934 1935 if (missing_devices == 2) 1936 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE); 1937 } 1938 1939 off += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1940 stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1941 i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1; 1942 1943 continue; 1944 } else if (RtlCheckBit(&context->has_csum, off)) { 1945 UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1946 1947 if (crc32 != context->csum[off]) { 1948 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1949 1950 RtlSetBit(&context->stripes[stripe].error, i); 1951 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1952 1953 if (missing_devices == 2) 1954 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE); 1955 } 1956 } 1957 } 1958 1959 off++; 1960 stripeoff++; 1961 } 1962 1963 if (c->devices[parity1]->devobj) 1964 do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (UINT32)c->chunk_item->stripe_length); 1965 1966 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1967 stripeoff = num * sectors_per_stripe; 1968 } 1969 1970 RtlClearAllBits(&context->stripes[parity1].error); 1971 1972 if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity2]->devobj)) { 1973 // check parity 1 1974 1975 for (i = 0; i < sectors_per_stripe; i++) { 1976 ULONG o, j; 1977 1978 o = i * Vcb->superblock.sector_size; 1979 for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE 1980 if (context->parity_scratch[o] != 0) { 1981 RtlSetBit(&context->stripes[parity1].error, i); 1982 break; 1983 } 1984 o++; 1985 } 1986 } 1987 } 1988 1989 RtlClearAllBits(&context->stripes[parity2].error); 1990 1991 if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity1]->devobj)) { 1992 // check parity 2 1993 1994 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 1995 1996 while (stripe != parity2) { 1997 galois_double(context->parity_scratch2, (UINT32)c->chunk_item->stripe_length); 1998 do_xor(context->parity_scratch2, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (UINT32)c->chunk_item->stripe_length); 1999 2000 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2001 } 2002 2003 for (i = 0; i < sectors_per_stripe; i++) { 2004 if (RtlCompareMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2005 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size) != Vcb->superblock.sector_size) 2006 RtlSetBit(&context->stripes[parity2].error, i); 2007 } 2008 } 2009 2010 if (missing_devices == 2) 2011 return; 2012 2013 // log and fix errors 2014 2015 for (i = 0; i < sectors_per_stripe; i++) { 2016 ULONG num_errors = 0; 2017 UINT64 bad_stripe1, bad_stripe2; 2018 ULONG bad_off1, bad_off2; 2019 BOOL alloc = FALSE; 2020 2021 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2022 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2023 2024 while (stripe != parity1) { 2025 if (RtlCheckBit(&context->alloc, off)) { 2026 alloc = TRUE; 2027 2028 if (!c->devices[stripe]->devobj || RtlCheckBit(&context->stripes[stripe].error, i)) { 2029 if (num_errors == 0) { 2030 bad_stripe1 = stripe; 2031 bad_off1 = off; 2032 } else if (num_errors == 1) { 2033 bad_stripe2 = stripe; 2034 bad_off2 = off; 2035 } 2036 num_errors++; 2037 } 2038 } 2039 2040 off += sectors_per_stripe; 2041 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2042 } 2043 2044 if (!alloc) 2045 continue; 2046 2047 if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity1].error, i) && !RtlCheckBit(&context->stripes[parity2].error, i)) // everything fine 2048 continue; 2049 2050 if (num_errors == 0) { // parity error 2051 UINT64 addr; 2052 2053 if (RtlCheckBit(&context->stripes[parity1].error, i)) { 2054 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2055 &context->parity_scratch[i * Vcb->superblock.sector_size], 2056 Vcb->superblock.sector_size); 2057 2058 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2059 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2060 2061 context->stripes[parity1].rewrite = TRUE; 2062 2063 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE); 2064 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2065 } 2066 2067 if (RtlCheckBit(&context->stripes[parity2].error, i)) { 2068 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2069 &context->parity_scratch2[i * Vcb->superblock.sector_size], 2070 Vcb->superblock.sector_size); 2071 2072 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2073 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2074 2075 context->stripes[parity2].rewrite = TRUE; 2076 2077 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE); 2078 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2079 } 2080 } else if (num_errors == 1) { 2081 UINT32 crc32a, crc32b, len; 2082 UINT16 stripe_num, bad_stripe_num; 2083 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2084 UINT8* scratch; 2085 2086 len = RtlCheckBit(&context->is_tree, bad_off1)? Vcb->superblock.node_size : Vcb->superblock.sector_size; 2087 2088 scratch = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG); 2089 if (!scratch) { 2090 ERR("out of memory\n"); 2091 return; 2092 } 2093 2094 RtlZeroMemory(scratch, len); 2095 2096 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 2097 &context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2098 2099 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2100 2101 if (c->devices[parity2]->devobj) { 2102 stripe_num = c->chunk_item->num_stripes - 3; 2103 while (stripe != parity2) { 2104 galois_double(scratch, len); 2105 2106 if (stripe != bad_stripe1) 2107 do_xor(scratch, &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2108 else 2109 bad_stripe_num = stripe_num; 2110 2111 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2112 stripe_num--; 2113 } 2114 2115 do_xor(scratch, &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2116 2117 if (bad_stripe_num != 0) 2118 galois_divpower(scratch, (UINT8)bad_stripe_num, len); 2119 } 2120 2121 if (RtlCheckBit(&context->is_tree, bad_off1)) { 2122 tree_header *th1 = NULL, *th2 = NULL; 2123 2124 if (c->devices[parity1]->devobj) { 2125 th1 = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; 2126 crc32a = ~calc_crc32c(0xffffffff, (UINT8*)&th1->fs_uuid, Vcb->superblock.node_size - sizeof(th1->csum)); 2127 } 2128 2129 if (c->devices[parity2]->devobj) { 2130 th2 = (tree_header*)scratch; 2131 crc32b = ~calc_crc32c(0xffffffff, (UINT8*)&th2->fs_uuid, Vcb->superblock.node_size - sizeof(th2->csum)); 2132 } 2133 2134 if ((c->devices[parity1]->devobj && crc32a == *((UINT32*)th1->csum) && th1->address == addr) || 2135 (c->devices[parity2]->devobj && crc32b == *((UINT32*)th2->csum) && th2->address == addr)) { 2136 if (!c->devices[parity1]->devobj || crc32a != *((UINT32*)th1->csum) || th1->address != addr) { 2137 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2138 scratch, Vcb->superblock.node_size); 2139 2140 if (c->devices[parity1]->devobj) { 2141 // fix parity 1 2142 2143 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2144 2145 RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2146 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2147 Vcb->superblock.node_size); 2148 2149 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2150 2151 while (stripe != parity1) { 2152 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2153 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2154 Vcb->superblock.node_size); 2155 2156 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2157 } 2158 2159 context->stripes[parity1].rewrite = TRUE; 2160 2161 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE); 2162 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2163 } 2164 } else { 2165 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2166 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 2167 2168 if (!c->devices[parity2]->devobj || crc32b != *((UINT32*)th2->csum) || th2->address != addr) { 2169 // fix parity 2 2170 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2171 2172 if (c->devices[parity2]->devobj) { 2173 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2174 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2175 Vcb->superblock.node_size); 2176 2177 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2178 2179 while (stripe != parity2) { 2180 galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.node_size); 2181 2182 do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2183 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2184 Vcb->superblock.node_size); 2185 2186 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2187 } 2188 2189 context->stripes[parity2].rewrite = TRUE; 2190 2191 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE); 2192 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2193 } 2194 } 2195 } 2196 2197 context->stripes[bad_stripe1].rewrite = TRUE; 2198 2199 RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 2200 2201 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, TRUE, FALSE); 2202 } else 2203 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, FALSE, FALSE); 2204 } else { 2205 if (c->devices[parity1]->devobj) 2206 crc32a = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2207 2208 if (c->devices[parity2]->devobj) 2209 crc32b = ~calc_crc32c(0xffffffff, scratch, Vcb->superblock.sector_size); 2210 2211 if ((c->devices[parity1]->devobj && crc32a == context->csum[bad_off1]) || (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1])) { 2212 if (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1]) { 2213 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2214 scratch, Vcb->superblock.sector_size); 2215 2216 if (c->devices[parity1]->devobj && crc32a != context->csum[bad_off1]) { 2217 // fix parity 1 2218 2219 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2220 2221 RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2222 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2223 Vcb->superblock.sector_size); 2224 2225 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2226 2227 while (stripe != parity1) { 2228 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2229 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2230 Vcb->superblock.sector_size); 2231 2232 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2233 } 2234 2235 context->stripes[parity1].rewrite = TRUE; 2236 2237 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE); 2238 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2239 } 2240 } else { 2241 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2242 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2243 2244 if (c->devices[parity2]->devobj && crc32b != context->csum[bad_off1]) { 2245 // fix parity 2 2246 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2247 2248 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2249 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2250 Vcb->superblock.sector_size); 2251 2252 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2253 2254 while (stripe != parity2) { 2255 galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size); 2256 2257 do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2258 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2259 Vcb->superblock.sector_size); 2260 2261 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2262 } 2263 2264 context->stripes[parity2].rewrite = TRUE; 2265 2266 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE); 2267 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2268 } 2269 } 2270 2271 context->stripes[bad_stripe1].rewrite = TRUE; 2272 2273 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, TRUE, FALSE); 2274 } else 2275 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, FALSE, FALSE); 2276 } 2277 2278 ExFreePool(scratch); 2279 } else if (num_errors == 2 && missing_devices == 0) { 2280 UINT16 x, y, k; 2281 UINT64 addr; 2282 UINT32 len = (RtlCheckBit(&context->is_tree, bad_off1) || RtlCheckBit(&context->is_tree, bad_off2)) ? Vcb->superblock.node_size : Vcb->superblock.sector_size; 2283 UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; 2284 UINT32 j; 2285 2286 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2287 2288 // put qxy in parity_scratch 2289 // put pxy in parity_scratch2 2290 2291 k = c->chunk_item->num_stripes - 3; 2292 if (stripe == bad_stripe1 || stripe == bad_stripe2) { 2293 RtlZeroMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], len); 2294 RtlZeroMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], len); 2295 2296 if (stripe == bad_stripe1) 2297 x = k; 2298 else 2299 y = k; 2300 } else { 2301 RtlCopyMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], 2302 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2303 RtlCopyMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], 2304 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2305 } 2306 2307 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2308 2309 k--; 2310 do { 2311 galois_double(&context->parity_scratch[i * Vcb->superblock.sector_size], len); 2312 2313 if (stripe != bad_stripe1 && stripe != bad_stripe2) { 2314 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 2315 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2316 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], 2317 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2318 } else if (stripe == bad_stripe1) 2319 x = k; 2320 else if (stripe == bad_stripe2) 2321 y = k; 2322 2323 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2324 k--; 2325 } while (stripe != parity2); 2326 2327 gyx = gpow2(y > x ? (y-x) : (255-x+y)); 2328 gx = gpow2(255-x); 2329 2330 denom = gdiv(1, gyx ^ 1); 2331 a = gmul(gyx, denom); 2332 b = gmul(gx, denom); 2333 2334 p = &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)]; 2335 q = &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)]; 2336 pxy = &context->parity_scratch2[i * Vcb->superblock.sector_size]; 2337 qxy = &context->parity_scratch[i * Vcb->superblock.sector_size]; 2338 2339 for (j = 0; j < len; j++) { 2340 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); 2341 2342 p++; 2343 q++; 2344 pxy++; 2345 qxy++; 2346 } 2347 2348 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->parity_scratch[i * Vcb->superblock.sector_size], len); 2349 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2350 2351 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2352 2353 if (RtlCheckBit(&context->is_tree, bad_off1)) { 2354 tree_header* th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; 2355 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 2356 2357 if (crc32 == *((UINT32*)th->csum) && th->address == addr) { 2358 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2359 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 2360 2361 context->stripes[bad_stripe1].rewrite = TRUE; 2362 2363 RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 2364 2365 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, TRUE, FALSE); 2366 } else 2367 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, FALSE, FALSE); 2368 } else { 2369 UINT32 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2370 2371 if (crc32 == context->csum[bad_off1]) { 2372 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2373 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2374 2375 context->stripes[bad_stripe1].rewrite = TRUE; 2376 2377 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, TRUE, FALSE); 2378 } else 2379 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, FALSE, FALSE); 2380 } 2381 2382 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off2 * Vcb->superblock.sector_size); 2383 2384 if (RtlCheckBit(&context->is_tree, bad_off2)) { 2385 tree_header* th = (tree_header*)&context->parity_scratch2[i * Vcb->superblock.sector_size]; 2386 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 2387 2388 if (crc32 == *((UINT32*)th->csum) && th->address == addr) { 2389 RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2390 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 2391 2392 context->stripes[bad_stripe2].rewrite = TRUE; 2393 2394 RtlClearBits(&context->stripes[bad_stripe2].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 2395 2396 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, TRUE, TRUE, FALSE); 2397 } else 2398 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, TRUE, FALSE, FALSE); 2399 } else { 2400 UINT32 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2401 2402 if (crc32 == context->csum[bad_off2]) { 2403 RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2404 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2405 2406 context->stripes[bad_stripe2].rewrite = TRUE; 2407 2408 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, FALSE, TRUE, FALSE); 2409 } else 2410 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, FALSE, FALSE, FALSE); 2411 } 2412 } else { 2413 stripe = (parity2 + 1) % c->chunk_item->num_stripes; 2414 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2415 2416 while (stripe != parity1) { 2417 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 2418 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 2419 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 2420 2421 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), FALSE, FALSE); 2422 } 2423 } 2424 2425 off += sectors_per_stripe; 2426 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2427 } 2428 } 2429 } 2430 } 2431 2432 static NTSTATUS scrub_chunk_raid56_stripe_run(device_extension* Vcb, chunk* c, UINT64 stripe_start, UINT64 stripe_end) { 2433 NTSTATUS Status; 2434 KEY searchkey; 2435 traverse_ptr tp; 2436 BOOL b; 2437 UINT64 run_start, run_end, full_stripe_len, stripe; 2438 UINT32 max_read, num_sectors; 2439 ULONG arrlen, *allocarr, *csumarr = NULL, *treearr, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1; 2440 scrub_context_raid56 context; 2441 UINT16 i; 2442 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 2443 2444 TRACE("(%p, %p, %llx, %llx)\n", Vcb, c, stripe_start, stripe_end); 2445 2446 full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length; 2447 run_start = c->offset + (stripe_start * full_stripe_len); 2448 run_end = c->offset + ((stripe_end + 1) * full_stripe_len); 2449 2450 searchkey.obj_id = run_start; 2451 searchkey.obj_type = TYPE_METADATA_ITEM; 2452 searchkey.offset = 0xffffffffffffffff; 2453 2454 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); 2455 if (!NT_SUCCESS(Status)) { 2456 ERR("find_item returned %08x\n", Status); 2457 return Status; 2458 } 2459 2460 num_sectors = (UINT32)((stripe_end - stripe_start + 1) * full_stripe_len / Vcb->superblock.sector_size); 2461 arrlen = (ULONG)sector_align((num_sectors / 8) + 1, sizeof(ULONG)); 2462 2463 allocarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2464 if (!allocarr) { 2465 ERR("out of memory\n"); 2466 return STATUS_INSUFFICIENT_RESOURCES; 2467 } 2468 2469 treearr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2470 if (!treearr) { 2471 ERR("out of memory\n"); 2472 ExFreePool(allocarr); 2473 return STATUS_INSUFFICIENT_RESOURCES; 2474 } 2475 2476 RtlInitializeBitMap(&context.alloc, allocarr, num_sectors); 2477 RtlClearAllBits(&context.alloc); 2478 2479 RtlInitializeBitMap(&context.is_tree, treearr, num_sectors); 2480 RtlClearAllBits(&context.is_tree); 2481 2482 context.parity_scratch = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG); 2483 if (!context.parity_scratch) { 2484 ERR("out of memory\n"); 2485 ExFreePool(allocarr); 2486 ExFreePool(treearr); 2487 return STATUS_INSUFFICIENT_RESOURCES; 2488 } 2489 2490 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2491 csumarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2492 if (!csumarr) { 2493 ERR("out of memory\n"); 2494 ExFreePool(allocarr); 2495 ExFreePool(treearr); 2496 ExFreePool(context.parity_scratch); 2497 return STATUS_INSUFFICIENT_RESOURCES; 2498 } 2499 2500 RtlInitializeBitMap(&context.has_csum, csumarr, num_sectors); 2501 RtlClearAllBits(&context.has_csum); 2502 2503 context.csum = ExAllocatePoolWithTag(PagedPool, num_sectors * sizeof(UINT32), ALLOC_TAG); 2504 if (!context.csum) { 2505 ERR("out of memory\n"); 2506 ExFreePool(allocarr); 2507 ExFreePool(treearr); 2508 ExFreePool(context.parity_scratch); 2509 ExFreePool(csumarr); 2510 return STATUS_INSUFFICIENT_RESOURCES; 2511 } 2512 } 2513 2514 if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2515 context.parity_scratch2 = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG); 2516 if (!context.parity_scratch2) { 2517 ERR("out of memory\n"); 2518 ExFreePool(allocarr); 2519 ExFreePool(treearr); 2520 ExFreePool(context.parity_scratch); 2521 2522 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2523 ExFreePool(csumarr); 2524 ExFreePool(context.csum); 2525 } 2526 2527 return STATUS_INSUFFICIENT_RESOURCES; 2528 } 2529 } 2530 2531 do { 2532 traverse_ptr next_tp; 2533 2534 if (tp.item->key.obj_id >= run_end) 2535 break; 2536 2537 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM) { 2538 UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2539 2540 if (tp.item->key.obj_id + size > run_start) { 2541 UINT64 extent_start = max(run_start, tp.item->key.obj_id); 2542 UINT64 extent_end = min(tp.item->key.obj_id + size, run_end); 2543 BOOL extent_is_tree = FALSE; 2544 2545 RtlSetBits(&context.alloc, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size)); 2546 2547 if (tp.item->key.obj_type == TYPE_METADATA_ITEM) 2548 extent_is_tree = TRUE; 2549 else { 2550 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; 2551 2552 if (tp.item->size < sizeof(EXTENT_ITEM)) { 2553 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 2554 Status = STATUS_INTERNAL_ERROR; 2555 goto end; 2556 } 2557 2558 if (ei->flags & EXTENT_ITEM_TREE_BLOCK) 2559 extent_is_tree = TRUE; 2560 } 2561 2562 if (extent_is_tree) 2563 RtlSetBits(&context.is_tree, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size)); 2564 else if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2565 traverse_ptr tp2; 2566 BOOL b2; 2567 2568 searchkey.obj_id = EXTENT_CSUM_ID; 2569 searchkey.obj_type = TYPE_EXTENT_CSUM; 2570 searchkey.offset = extent_start; 2571 2572 Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, FALSE, NULL); 2573 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { 2574 ERR("find_item returned %08x\n", Status); 2575 goto end; 2576 } 2577 2578 do { 2579 traverse_ptr next_tp2; 2580 2581 if (tp2.item->key.offset >= extent_end) 2582 break; 2583 2584 if (tp2.item->key.offset >= extent_start) { 2585 UINT64 csum_start = max(extent_start, tp2.item->key.offset); 2586 UINT64 csum_end = min(extent_end, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32))); 2587 2588 RtlSetBits(&context.has_csum, (ULONG)((csum_start - run_start) / Vcb->superblock.sector_size), (ULONG)((csum_end - csum_start) / Vcb->superblock.sector_size)); 2589 2590 RtlCopyMemory(&context.csum[(csum_start - run_start) / Vcb->superblock.sector_size], 2591 tp2.item->data + ((csum_start - tp2.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size), 2592 (ULONG)((csum_end - csum_start) * sizeof(UINT32) / Vcb->superblock.sector_size)); 2593 } 2594 2595 b2 = find_next_item(Vcb, &tp2, &next_tp2, FALSE, NULL); 2596 2597 if (b2) 2598 tp2 = next_tp2; 2599 } while (b2); 2600 } 2601 } 2602 } 2603 2604 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); 2605 2606 if (b) 2607 tp = next_tp; 2608 } while (b); 2609 2610 context.stripes = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_context_raid56_stripe) * c->chunk_item->num_stripes, ALLOC_TAG); 2611 if (!context.stripes) { 2612 ERR("out of memory\n"); 2613 Status = STATUS_INSUFFICIENT_RESOURCES; 2614 goto end; 2615 } 2616 2617 max_read = (UINT32)min(1048576 / c->chunk_item->stripe_length, stripe_end - stripe_start + 1); // only process 1 MB of data at a time 2618 2619 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2620 context.stripes[i].buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(max_read * c->chunk_item->stripe_length), ALLOC_TAG); 2621 if (!context.stripes[i].buf) { 2622 UINT64 j; 2623 2624 ERR("out of memory\n"); 2625 2626 for (j = 0; j < i; j++) { 2627 ExFreePool(context.stripes[j].buf); 2628 } 2629 ExFreePool(context.stripes); 2630 2631 Status = STATUS_INSUFFICIENT_RESOURCES; 2632 goto end; 2633 } 2634 2635 context.stripes[i].errorarr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align(((c->chunk_item->stripe_length / Vcb->superblock.sector_size) / 8) + 1, sizeof(ULONG)), ALLOC_TAG); 2636 if (!context.stripes[i].errorarr) { 2637 UINT64 j; 2638 2639 ERR("out of memory\n"); 2640 2641 ExFreePool(context.stripes[i].buf); 2642 2643 for (j = 0; j < i; j++) { 2644 ExFreePool(context.stripes[j].buf); 2645 } 2646 ExFreePool(context.stripes); 2647 2648 Status = STATUS_INSUFFICIENT_RESOURCES; 2649 goto end; 2650 } 2651 2652 RtlInitializeBitMap(&context.stripes[i].error, context.stripes[i].errorarr, (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size)); 2653 2654 context.stripes[i].context = &context; 2655 context.stripes[i].rewrite = FALSE; 2656 } 2657 2658 stripe = stripe_start; 2659 2660 Status = STATUS_SUCCESS; 2661 2662 chunk_lock_range(Vcb, c, run_start, run_end - run_start); 2663 2664 do { 2665 ULONG read_stripes; 2666 UINT16 missing_devices = 0; 2667 BOOL need_wait = FALSE; 2668 2669 if (max_read < stripe_end + 1 - stripe) 2670 read_stripes = max_read; 2671 else 2672 read_stripes = (ULONG)(stripe_end + 1 - stripe); 2673 2674 context.stripes_left = c->chunk_item->num_stripes; 2675 2676 // read megabyte by megabyte 2677 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2678 if (c->devices[i]->devobj) { 2679 PIO_STACK_LOCATION IrpSp; 2680 2681 context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, FALSE); 2682 2683 if (!context.stripes[i].Irp) { 2684 ERR("IoAllocateIrp failed\n"); 2685 Status = STATUS_INSUFFICIENT_RESOURCES; 2686 goto end3; 2687 } 2688 2689 context.stripes[i].Irp->MdlAddress = NULL; 2690 2691 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 2692 IrpSp->MajorFunction = IRP_MJ_READ; 2693 2694 if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) { 2695 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(read_stripes * c->chunk_item->stripe_length), ALLOC_TAG); 2696 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 2697 ERR("out of memory\n"); 2698 Status = STATUS_INSUFFICIENT_RESOURCES; 2699 goto end3; 2700 } 2701 2702 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 2703 2704 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 2705 } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) { 2706 context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, (ULONG)(read_stripes * c->chunk_item->stripe_length), FALSE, FALSE, NULL); 2707 if (!context.stripes[i].Irp->MdlAddress) { 2708 ERR("IoAllocateMdl failed\n"); 2709 Status = STATUS_INSUFFICIENT_RESOURCES; 2710 goto end3; 2711 } 2712 2713 Status = STATUS_SUCCESS; 2714 2715 _SEH2_TRY { 2716 MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); 2717 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2718 Status = _SEH2_GetExceptionCode(); 2719 } _SEH2_END; 2720 2721 if (!NT_SUCCESS(Status)) { 2722 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 2723 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 2724 goto end3; 2725 } 2726 } else 2727 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 2728 2729 context.stripes[i].offset = stripe * c->chunk_item->stripe_length; 2730 2731 IrpSp->Parameters.Read.Length = (ULONG)(read_stripes * c->chunk_item->stripe_length); 2732 IrpSp->Parameters.Read.ByteOffset.QuadPart = cis[i].offset + context.stripes[i].offset; 2733 2734 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 2735 context.stripes[i].missing = FALSE; 2736 2737 IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion_raid56, &context.stripes[i], TRUE, TRUE, TRUE); 2738 2739 Vcb->scrub.data_scrubbed += read_stripes * c->chunk_item->stripe_length; 2740 need_wait = TRUE; 2741 } else { 2742 context.stripes[i].Irp = NULL; 2743 context.stripes[i].missing = TRUE; 2744 missing_devices++; 2745 InterlockedDecrement(&context.stripes_left); 2746 } 2747 } 2748 2749 if (c->chunk_item->type & BLOCK_FLAG_RAID5 && missing_devices > 1) { 2750 ERR("too many missing devices (%u, maximum 1)\n", missing_devices); 2751 Status = STATUS_UNEXPECTED_IO_ERROR; 2752 goto end3; 2753 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6 && missing_devices > 2) { 2754 ERR("too many missing devices (%u, maximum 2)\n", missing_devices); 2755 Status = STATUS_UNEXPECTED_IO_ERROR; 2756 goto end3; 2757 } 2758 2759 if (need_wait) { 2760 KeInitializeEvent(&context.Event, NotificationEvent, FALSE); 2761 2762 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2763 if (c->devices[i]->devobj) 2764 IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp); 2765 } 2766 2767 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); 2768 } 2769 2770 // return an error if any of the stripes returned an error 2771 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2772 if (!context.stripes[i].missing && !NT_SUCCESS(context.stripes[i].iosb.Status)) { 2773 Status = context.stripes[i].iosb.Status; 2774 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS); 2775 goto end3; 2776 } 2777 } 2778 2779 if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2780 for (i = 0; i < read_stripes; i++) { 2781 scrub_raid6_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices); 2782 } 2783 } else { 2784 for (i = 0; i < read_stripes; i++) { 2785 scrub_raid5_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices); 2786 } 2787 } 2788 stripe += read_stripes; 2789 2790 end3: 2791 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2792 if (context.stripes[i].Irp) { 2793 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) { 2794 MmUnlockPages(context.stripes[i].Irp->MdlAddress); 2795 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 2796 } 2797 IoFreeIrp(context.stripes[i].Irp); 2798 context.stripes[i].Irp = NULL; 2799 2800 if (context.stripes[i].rewrite) { 2801 Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + context.stripes[i].offset, 2802 context.stripes[i].buf, (UINT32)(read_stripes * c->chunk_item->stripe_length)); 2803 2804 if (!NT_SUCCESS(Status)) { 2805 ERR("write_data_phys returned %08x\n", Status); 2806 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS); 2807 goto end2; 2808 } 2809 } 2810 } 2811 } 2812 2813 if (!NT_SUCCESS(Status)) 2814 break; 2815 } while (stripe < stripe_end); 2816 2817 end2: 2818 chunk_unlock_range(Vcb, c, run_start, run_end - run_start); 2819 2820 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2821 ExFreePool(context.stripes[i].buf); 2822 ExFreePool(context.stripes[i].errorarr); 2823 } 2824 ExFreePool(context.stripes); 2825 2826 end: 2827 ExFreePool(treearr); 2828 ExFreePool(allocarr); 2829 ExFreePool(context.parity_scratch); 2830 2831 if (c->chunk_item->type & BLOCK_FLAG_RAID6) 2832 ExFreePool(context.parity_scratch2); 2833 2834 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2835 ExFreePool(csumarr); 2836 ExFreePool(context.csum); 2837 } 2838 2839 return Status; 2840 } 2841 2842 static NTSTATUS scrub_chunk_raid56(device_extension* Vcb, chunk* c, UINT64* offset, BOOL* changed) { 2843 NTSTATUS Status; 2844 KEY searchkey; 2845 traverse_ptr tp; 2846 BOOL b; 2847 UINT64 full_stripe_len, stripe, stripe_start, stripe_end, total_data = 0; 2848 ULONG num_extents = 0, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1; 2849 2850 full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length; 2851 stripe = (*offset - c->offset) / full_stripe_len; 2852 2853 *offset = c->offset + (stripe * full_stripe_len); 2854 2855 searchkey.obj_id = *offset; 2856 searchkey.obj_type = TYPE_METADATA_ITEM; 2857 searchkey.offset = 0xffffffffffffffff; 2858 2859 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); 2860 if (!NT_SUCCESS(Status)) { 2861 ERR("find_item returned %08x\n", Status); 2862 return Status; 2863 } 2864 2865 *changed = FALSE; 2866 2867 do { 2868 traverse_ptr next_tp; 2869 2870 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) 2871 break; 2872 2873 if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { 2874 UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2875 2876 TRACE("%llx\n", tp.item->key.obj_id); 2877 2878 if (size < Vcb->superblock.sector_size) { 2879 ERR("extent %llx has size less than sector_size (%llx < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size); 2880 return STATUS_INTERNAL_ERROR; 2881 } 2882 2883 stripe = (tp.item->key.obj_id - c->offset) / full_stripe_len; 2884 2885 if (*changed) { 2886 if (stripe > stripe_end + 1) { 2887 Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end); 2888 if (!NT_SUCCESS(Status)) { 2889 ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status); 2890 return Status; 2891 } 2892 2893 stripe_start = stripe; 2894 } 2895 } else 2896 stripe_start = stripe; 2897 2898 stripe_end = (tp.item->key.obj_id + size - 1 - c->offset) / full_stripe_len; 2899 2900 *changed = TRUE; 2901 2902 total_data += size; 2903 num_extents++; 2904 2905 // only do so much at a time 2906 if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB 2907 break; 2908 } 2909 2910 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); 2911 2912 if (b) 2913 tp = next_tp; 2914 } while (b); 2915 2916 if (*changed) { 2917 Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end); 2918 if (!NT_SUCCESS(Status)) { 2919 ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status); 2920 return Status; 2921 } 2922 2923 *offset = c->offset + ((stripe_end + 1) * full_stripe_len); 2924 } 2925 2926 return STATUS_SUCCESS; 2927 } 2928 2929 static NTSTATUS scrub_chunk(device_extension* Vcb, chunk* c, UINT64* offset, BOOL* changed) { 2930 NTSTATUS Status; 2931 KEY searchkey; 2932 traverse_ptr tp; 2933 BOOL b = FALSE, tree_run = FALSE; 2934 ULONG type, num_extents = 0; 2935 UINT64 total_data = 0, tree_run_start, tree_run_end; 2936 2937 TRACE("chunk %llx\n", c->offset); 2938 2939 ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE); 2940 2941 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) 2942 type = BLOCK_FLAG_DUPLICATE; 2943 else if (c->chunk_item->type & BLOCK_FLAG_RAID0) 2944 type = BLOCK_FLAG_RAID0; 2945 else if (c->chunk_item->type & BLOCK_FLAG_RAID1) 2946 type = BLOCK_FLAG_DUPLICATE; 2947 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 2948 type = BLOCK_FLAG_RAID10; 2949 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { 2950 Status = scrub_chunk_raid56(Vcb, c, offset, changed); 2951 goto end; 2952 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2953 Status = scrub_chunk_raid56(Vcb, c, offset, changed); 2954 goto end; 2955 } else // SINGLE 2956 type = BLOCK_FLAG_DUPLICATE; 2957 2958 searchkey.obj_id = *offset; 2959 searchkey.obj_type = TYPE_METADATA_ITEM; 2960 searchkey.offset = 0xffffffffffffffff; 2961 2962 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL); 2963 if (!NT_SUCCESS(Status)) { 2964 ERR("error - find_item returned %08x\n", Status); 2965 goto end; 2966 } 2967 2968 do { 2969 traverse_ptr next_tp; 2970 2971 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) 2972 break; 2973 2974 if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { 2975 UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2976 BOOL is_tree; 2977 UINT32* csum = NULL; 2978 RTL_BITMAP bmp; 2979 ULONG* bmparr = NULL; 2980 2981 TRACE("%llx\n", tp.item->key.obj_id); 2982 2983 is_tree = FALSE; 2984 2985 if (tp.item->key.obj_type == TYPE_METADATA_ITEM) 2986 is_tree = TRUE; 2987 else { 2988 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; 2989 2990 if (tp.item->size < sizeof(EXTENT_ITEM)) { 2991 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 2992 Status = STATUS_INTERNAL_ERROR; 2993 goto end; 2994 } 2995 2996 if (ei->flags & EXTENT_ITEM_TREE_BLOCK) 2997 is_tree = TRUE; 2998 } 2999 3000 if (size < Vcb->superblock.sector_size) { 3001 ERR("extent %llx has size less than sector_size (%llx < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size); 3002 Status = STATUS_INTERNAL_ERROR; 3003 goto end; 3004 } 3005 3006 // load csum 3007 if (!is_tree) { 3008 traverse_ptr tp2; 3009 3010 csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(sizeof(UINT32) * size / Vcb->superblock.sector_size), ALLOC_TAG); 3011 if (!csum) { 3012 ERR("out of memory\n"); 3013 Status = STATUS_INSUFFICIENT_RESOURCES; 3014 goto end; 3015 } 3016 3017 bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)(sector_align(((size / Vcb->superblock.sector_size) >> 3) + 1, sizeof(ULONG))), ALLOC_TAG); 3018 if (!bmparr) { 3019 ERR("out of memory\n"); 3020 ExFreePool(csum); 3021 Status = STATUS_INSUFFICIENT_RESOURCES; 3022 goto end; 3023 } 3024 3025 RtlInitializeBitMap(&bmp, bmparr, (ULONG)(size / Vcb->superblock.sector_size)); 3026 RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum 3027 3028 searchkey.obj_id = EXTENT_CSUM_ID; 3029 searchkey.obj_type = TYPE_EXTENT_CSUM; 3030 searchkey.offset = tp.item->key.obj_id; 3031 3032 Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, FALSE, NULL); 3033 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { 3034 ERR("find_item returned %08x\n", Status); 3035 ExFreePool(csum); 3036 ExFreePool(bmparr); 3037 goto end; 3038 } 3039 3040 if (Status != STATUS_NOT_FOUND) { 3041 do { 3042 traverse_ptr next_tp2; 3043 3044 if (tp2.item->key.obj_type == TYPE_EXTENT_CSUM) { 3045 if (tp2.item->key.offset >= tp.item->key.obj_id + size) 3046 break; 3047 else if (tp2.item->size >= sizeof(UINT32) && tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= tp.item->key.obj_id) { 3048 UINT64 cs = max(tp.item->key.obj_id, tp2.item->key.offset); 3049 UINT64 ce = min(tp.item->key.obj_id + size, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32))); 3050 3051 RtlCopyMemory(csum + ((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), 3052 tp2.item->data + ((cs - tp2.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size), 3053 (ULONG)((ce - cs) * sizeof(UINT32) / Vcb->superblock.sector_size)); 3054 3055 RtlClearBits(&bmp, (ULONG)((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size)); 3056 3057 if (ce == tp.item->key.obj_id + size) 3058 break; 3059 } 3060 } 3061 3062 if (find_next_item(Vcb, &tp2, &next_tp2, FALSE, NULL)) 3063 tp2 = next_tp2; 3064 else 3065 break; 3066 } while (TRUE); 3067 } 3068 } 3069 3070 if (tree_run) { 3071 if (!is_tree || tp.item->key.obj_id > tree_run_end) { 3072 Status = scrub_extent(Vcb, c, type, tree_run_start, (UINT32)(tree_run_end - tree_run_start), NULL); 3073 if (!NT_SUCCESS(Status)) { 3074 ERR("scrub_extent returned %08x\n", Status); 3075 goto end; 3076 } 3077 3078 if (!is_tree) 3079 tree_run = FALSE; 3080 else { 3081 tree_run_start = tp.item->key.obj_id; 3082 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3083 } 3084 } else 3085 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3086 } else if (is_tree) { 3087 tree_run = TRUE; 3088 tree_run_start = tp.item->key.obj_id; 3089 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3090 } 3091 3092 if (!is_tree) { 3093 Status = scrub_data_extent(Vcb, c, tp.item->key.obj_id, type, csum, &bmp); 3094 if (!NT_SUCCESS(Status)) { 3095 ERR("scrub_data_extent returned %08x\n", Status); 3096 ExFreePool(csum); 3097 ExFreePool(bmparr); 3098 goto end; 3099 } 3100 3101 ExFreePool(csum); 3102 ExFreePool(bmparr); 3103 } 3104 3105 *offset = tp.item->key.obj_id + size; 3106 *changed = TRUE; 3107 3108 total_data += size; 3109 num_extents++; 3110 3111 // only do so much at a time 3112 if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB 3113 break; 3114 } 3115 3116 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL); 3117 3118 if (b) 3119 tp = next_tp; 3120 } while (b); 3121 3122 if (tree_run) { 3123 Status = scrub_extent(Vcb, c, type, tree_run_start, (UINT32)(tree_run_end - tree_run_start), NULL); 3124 if (!NT_SUCCESS(Status)) { 3125 ERR("scrub_extent returned %08x\n", Status); 3126 goto end; 3127 } 3128 } 3129 3130 Status = STATUS_SUCCESS; 3131 3132 end: 3133 ExReleaseResourceLite(&Vcb->tree_lock); 3134 3135 return Status; 3136 } 3137 3138 _Function_class_(KSTART_ROUTINE) 3139 #ifdef __REACTOS__ 3140 static void NTAPI scrub_thread(void* context) { 3141 #else 3142 static void scrub_thread(void* context) { 3143 #endif 3144 device_extension* Vcb = context; 3145 LIST_ENTRY chunks, *le; 3146 NTSTATUS Status; 3147 LARGE_INTEGER time; 3148 3149 KeInitializeEvent(&Vcb->scrub.finished, NotificationEvent, FALSE); 3150 3151 InitializeListHead(&chunks); 3152 3153 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE); 3154 3155 if (Vcb->need_write && !Vcb->readonly) 3156 Status = do_write(Vcb, NULL); 3157 else 3158 Status = STATUS_SUCCESS; 3159 3160 free_trees(Vcb); 3161 3162 if (!NT_SUCCESS(Status)) { 3163 ExReleaseResourceLite(&Vcb->tree_lock); 3164 ERR("do_write returned %08x\n", Status); 3165 Vcb->scrub.error = Status; 3166 goto end; 3167 } 3168 3169 ExConvertExclusiveToSharedLite(&Vcb->tree_lock); 3170 3171 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE); 3172 3173 KeQuerySystemTime(&Vcb->scrub.start_time); 3174 Vcb->scrub.finish_time.QuadPart = 0; 3175 Vcb->scrub.resume_time.QuadPart = Vcb->scrub.start_time.QuadPart; 3176 Vcb->scrub.duration.QuadPart = 0; 3177 Vcb->scrub.total_chunks = 0; 3178 Vcb->scrub.chunks_left = 0; 3179 Vcb->scrub.data_scrubbed = 0; 3180 Vcb->scrub.num_errors = 0; 3181 3182 while (!IsListEmpty(&Vcb->scrub.errors)) { 3183 scrub_error* err = CONTAINING_RECORD(RemoveHeadList(&Vcb->scrub.errors), scrub_error, list_entry); 3184 ExFreePool(err); 3185 } 3186 3187 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE); 3188 3189 le = Vcb->chunks.Flink; 3190 while (le != &Vcb->chunks) { 3191 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 3192 3193 acquire_chunk_lock(c, Vcb); 3194 3195 if (!c->readonly) { 3196 InsertTailList(&chunks, &c->list_entry_balance); 3197 Vcb->scrub.total_chunks++; 3198 Vcb->scrub.chunks_left++; 3199 } 3200 3201 release_chunk_lock(c, Vcb); 3202 3203 le = le->Flink; 3204 } 3205 3206 ExReleaseResourceLite(&Vcb->chunk_lock); 3207 3208 ExReleaseResource(&Vcb->scrub.stats_lock); 3209 3210 ExReleaseResourceLite(&Vcb->tree_lock); 3211 3212 while (!IsListEmpty(&chunks)) { 3213 chunk* c = CONTAINING_RECORD(RemoveHeadList(&chunks), chunk, list_entry_balance); 3214 UINT64 offset = c->offset; 3215 BOOL changed; 3216 3217 c->reloc = TRUE; 3218 3219 KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, FALSE, NULL); 3220 3221 if (!Vcb->scrub.stopping) { 3222 do { 3223 changed = FALSE; 3224 3225 Status = scrub_chunk(Vcb, c, &offset, &changed); 3226 if (!NT_SUCCESS(Status)) { 3227 ERR("scrub_chunk returned %08x\n", Status); 3228 Vcb->scrub.stopping = TRUE; 3229 Vcb->scrub.error = Status; 3230 break; 3231 } 3232 3233 if (offset == c->offset + c->chunk_item->size || Vcb->scrub.stopping) 3234 break; 3235 3236 KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, FALSE, NULL); 3237 } while (changed); 3238 } 3239 3240 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE); 3241 3242 if (!Vcb->scrub.stopping) 3243 Vcb->scrub.chunks_left--; 3244 3245 if (IsListEmpty(&chunks)) 3246 KeQuerySystemTime(&Vcb->scrub.finish_time); 3247 3248 ExReleaseResource(&Vcb->scrub.stats_lock); 3249 3250 c->reloc = FALSE; 3251 c->list_entry_balance.Flink = NULL; 3252 } 3253 3254 KeQuerySystemTime(&time); 3255 Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3256 3257 end: 3258 ZwClose(Vcb->scrub.thread); 3259 Vcb->scrub.thread = NULL; 3260 3261 KeSetEvent(&Vcb->scrub.finished, 0, FALSE); 3262 } 3263 3264 NTSTATUS start_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3265 NTSTATUS Status; 3266 3267 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3268 return STATUS_PRIVILEGE_NOT_HELD; 3269 3270 if (Vcb->locked) { 3271 WARN("cannot start scrub while locked\n"); 3272 return STATUS_DEVICE_NOT_READY; 3273 } 3274 3275 if (Vcb->balance.thread) { 3276 WARN("cannot start scrub while balance running\n"); 3277 return STATUS_DEVICE_NOT_READY; 3278 } 3279 3280 if (Vcb->scrub.thread) { 3281 WARN("scrub already running\n"); 3282 return STATUS_DEVICE_NOT_READY; 3283 } 3284 3285 if (Vcb->readonly) 3286 return STATUS_MEDIA_WRITE_PROTECTED; 3287 3288 Vcb->scrub.stopping = FALSE; 3289 Vcb->scrub.paused = FALSE; 3290 Vcb->scrub.error = STATUS_SUCCESS; 3291 KeInitializeEvent(&Vcb->scrub.event, NotificationEvent, !Vcb->scrub.paused); 3292 3293 Status = PsCreateSystemThread(&Vcb->scrub.thread, 0, NULL, NULL, NULL, scrub_thread, Vcb); 3294 if (!NT_SUCCESS(Status)) { 3295 ERR("PsCreateSystemThread returned %08x\n", Status); 3296 return Status; 3297 } 3298 3299 return STATUS_SUCCESS; 3300 } 3301 3302 NTSTATUS query_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode, void* data, ULONG length) { 3303 btrfs_query_scrub* bqs = (btrfs_query_scrub*)data; 3304 ULONG len; 3305 NTSTATUS Status; 3306 LIST_ENTRY* le; 3307 btrfs_scrub_error* bse = NULL; 3308 3309 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3310 return STATUS_PRIVILEGE_NOT_HELD; 3311 3312 if (length < offsetof(btrfs_query_scrub, errors)) 3313 return STATUS_BUFFER_TOO_SMALL; 3314 3315 ExAcquireResourceSharedLite(&Vcb->scrub.stats_lock, TRUE); 3316 3317 if (Vcb->scrub.thread && Vcb->scrub.chunks_left > 0) 3318 bqs->status = Vcb->scrub.paused ? BTRFS_SCRUB_PAUSED : BTRFS_SCRUB_RUNNING; 3319 else 3320 bqs->status = BTRFS_SCRUB_STOPPED; 3321 3322 bqs->start_time.QuadPart = Vcb->scrub.start_time.QuadPart; 3323 bqs->finish_time.QuadPart = Vcb->scrub.finish_time.QuadPart; 3324 bqs->chunks_left = Vcb->scrub.chunks_left; 3325 bqs->total_chunks = Vcb->scrub.total_chunks; 3326 bqs->data_scrubbed = Vcb->scrub.data_scrubbed; 3327 3328 bqs->duration = Vcb->scrub.duration.QuadPart; 3329 3330 if (bqs->status == BTRFS_SCRUB_RUNNING) { 3331 LARGE_INTEGER time; 3332 3333 KeQuerySystemTime(&time); 3334 bqs->duration += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3335 } 3336 3337 bqs->error = Vcb->scrub.error; 3338 3339 bqs->num_errors = Vcb->scrub.num_errors; 3340 3341 len = length - offsetof(btrfs_query_scrub, errors); 3342 3343 le = Vcb->scrub.errors.Flink; 3344 while (le != &Vcb->scrub.errors) { 3345 scrub_error* err = CONTAINING_RECORD(le, scrub_error, list_entry); 3346 ULONG errlen; 3347 3348 if (err->is_metadata) 3349 errlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY); 3350 else 3351 errlen = offsetof(btrfs_scrub_error, data.filename) + err->data.filename_length; 3352 3353 if (len < errlen) { 3354 Status = STATUS_BUFFER_OVERFLOW; 3355 goto end; 3356 } 3357 3358 if (!bse) 3359 bse = &bqs->errors; 3360 else { 3361 ULONG lastlen; 3362 3363 if (bse->is_metadata) 3364 lastlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY); 3365 else 3366 lastlen = offsetof(btrfs_scrub_error, data.filename) + bse->data.filename_length; 3367 3368 bse->next_entry = lastlen; 3369 bse = (btrfs_scrub_error*)(((UINT8*)bse) + lastlen); 3370 } 3371 3372 bse->next_entry = 0; 3373 bse->address = err->address; 3374 bse->device = err->device; 3375 bse->recovered = err->recovered; 3376 bse->is_metadata = err->is_metadata; 3377 bse->parity = err->parity; 3378 3379 if (err->is_metadata) { 3380 bse->metadata.root = err->metadata.root; 3381 bse->metadata.level = err->metadata.level; 3382 bse->metadata.firstitem = err->metadata.firstitem; 3383 } else { 3384 bse->data.subvol = err->data.subvol; 3385 bse->data.offset = err->data.offset; 3386 bse->data.filename_length = err->data.filename_length; 3387 RtlCopyMemory(bse->data.filename, err->data.filename, err->data.filename_length); 3388 } 3389 3390 len -= errlen; 3391 le = le->Flink; 3392 } 3393 3394 Status = STATUS_SUCCESS; 3395 3396 end: 3397 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 3398 3399 return Status; 3400 } 3401 3402 NTSTATUS pause_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3403 LARGE_INTEGER time; 3404 3405 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3406 return STATUS_PRIVILEGE_NOT_HELD; 3407 3408 if (!Vcb->scrub.thread) 3409 return STATUS_DEVICE_NOT_READY; 3410 3411 if (Vcb->scrub.paused) 3412 return STATUS_DEVICE_NOT_READY; 3413 3414 Vcb->scrub.paused = TRUE; 3415 KeClearEvent(&Vcb->scrub.event); 3416 3417 KeQuerySystemTime(&time); 3418 Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3419 3420 return STATUS_SUCCESS; 3421 } 3422 3423 NTSTATUS resume_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3424 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3425 return STATUS_PRIVILEGE_NOT_HELD; 3426 3427 if (!Vcb->scrub.thread) 3428 return STATUS_DEVICE_NOT_READY; 3429 3430 if (!Vcb->scrub.paused) 3431 return STATUS_DEVICE_NOT_READY; 3432 3433 Vcb->scrub.paused = FALSE; 3434 KeSetEvent(&Vcb->scrub.event, 0, FALSE); 3435 3436 KeQuerySystemTime(&Vcb->scrub.resume_time); 3437 3438 return STATUS_SUCCESS; 3439 } 3440 3441 NTSTATUS stop_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3442 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3443 return STATUS_PRIVILEGE_NOT_HELD; 3444 3445 if (!Vcb->scrub.thread) 3446 return STATUS_DEVICE_NOT_READY; 3447 3448 Vcb->scrub.paused = FALSE; 3449 Vcb->scrub.stopping = TRUE; 3450 KeSetEvent(&Vcb->scrub.event, 0, FALSE); 3451 3452 return STATUS_SUCCESS; 3453 } 3454