1 /* Copyright (c) Mark Harmstone 2017 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 20 #define SCRUB_UNIT 0x100000 // 1 MB 21 22 struct _scrub_context; 23 24 typedef struct { 25 struct _scrub_context* context; 26 PIRP Irp; 27 uint64_t start; 28 uint32_t length; 29 IO_STATUS_BLOCK iosb; 30 uint8_t* buf; 31 bool csum_error; 32 void* bad_csums; 33 } scrub_context_stripe; 34 35 typedef struct _scrub_context { 36 KEVENT Event; 37 scrub_context_stripe* stripes; 38 LONG stripes_left; 39 } scrub_context; 40 41 typedef struct { 42 ANSI_STRING name; 43 bool orig_subvol; 44 LIST_ENTRY list_entry; 45 } path_part; 46 47 static void log_file_checksum_error(device_extension* Vcb, uint64_t addr, uint64_t devid, uint64_t subvol, uint64_t inode, uint64_t offset) { 48 LIST_ENTRY *le, parts; 49 root* r = NULL; 50 KEY searchkey; 51 traverse_ptr tp; 52 uint64_t dir; 53 bool orig_subvol = true, not_in_tree = false; 54 ANSI_STRING fn; 55 scrub_error* err; 56 NTSTATUS Status; 57 ULONG utf16len; 58 59 le = Vcb->roots.Flink; 60 while (le != &Vcb->roots) { 61 root* r2 = CONTAINING_RECORD(le, root, list_entry); 62 63 if (r2->id == subvol) { 64 r = r2; 65 break; 66 } 67 68 le = le->Flink; 69 } 70 71 if (!r) { 72 ERR("could not find subvol %I64x\n", subvol); 73 return; 74 } 75 76 InitializeListHead(&parts); 77 78 dir = inode; 79 80 while (true) { 81 if (dir == r->root_item.objid) { 82 if (r == Vcb->root_fileref->fcb->subvol) 83 break; 84 85 searchkey.obj_id = r->id; 86 searchkey.obj_type = TYPE_ROOT_BACKREF; 87 searchkey.offset = 0xffffffffffffffff; 88 89 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, NULL); 90 if (!NT_SUCCESS(Status)) { 91 ERR("find_item returned %08lx\n", Status); 92 goto end; 93 } 94 95 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 96 ROOT_REF* rr = (ROOT_REF*)tp.item->data; 97 path_part* pp; 98 99 if (tp.item->size < sizeof(ROOT_REF)) { 100 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF)); 101 goto end; 102 } 103 104 if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) { 105 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 106 tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n); 107 goto end; 108 } 109 110 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 111 if (!pp) { 112 ERR("out of memory\n"); 113 goto end; 114 } 115 116 pp->name.Buffer = rr->name; 117 pp->name.Length = pp->name.MaximumLength = rr->n; 118 pp->orig_subvol = false; 119 120 InsertTailList(&parts, &pp->list_entry); 121 122 r = NULL; 123 124 le = Vcb->roots.Flink; 125 while (le != &Vcb->roots) { 126 root* r2 = CONTAINING_RECORD(le, root, list_entry); 127 128 if (r2->id == tp.item->key.offset) { 129 r = r2; 130 break; 131 } 132 133 le = le->Flink; 134 } 135 136 if (!r) { 137 ERR("could not find subvol %I64x\n", tp.item->key.offset); 138 goto end; 139 } 140 141 dir = rr->dir; 142 orig_subvol = false; 143 } else { 144 not_in_tree = true; 145 break; 146 } 147 } else { 148 searchkey.obj_id = dir; 149 searchkey.obj_type = TYPE_INODE_EXTREF; 150 searchkey.offset = 0xffffffffffffffff; 151 152 Status = find_item(Vcb, r, &tp, &searchkey, false, NULL); 153 if (!NT_SUCCESS(Status)) { 154 ERR("find_item returned %08lx\n", Status); 155 goto end; 156 } 157 158 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_REF) { 159 INODE_REF* ir = (INODE_REF*)tp.item->data; 160 path_part* pp; 161 162 if (tp.item->size < sizeof(INODE_REF)) { 163 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF)); 164 goto end; 165 } 166 167 if (tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) { 168 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 169 tp.item->size, offsetof(INODE_REF, name[0]) + ir->n); 170 goto end; 171 } 172 173 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 174 if (!pp) { 175 ERR("out of memory\n"); 176 goto end; 177 } 178 179 pp->name.Buffer = ir->name; 180 pp->name.Length = pp->name.MaximumLength = ir->n; 181 pp->orig_subvol = orig_subvol; 182 183 InsertTailList(&parts, &pp->list_entry); 184 185 if (dir == tp.item->key.offset) 186 break; 187 188 dir = tp.item->key.offset; 189 } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_EXTREF) { 190 INODE_EXTREF* ier = (INODE_EXTREF*)tp.item->data; 191 path_part* pp; 192 193 if (tp.item->size < sizeof(INODE_EXTREF)) { 194 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 195 tp.item->size, sizeof(INODE_EXTREF)); 196 goto end; 197 } 198 199 if (tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) { 200 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 201 tp.item->size, offsetof(INODE_EXTREF, name[0]) + ier->n); 202 goto end; 203 } 204 205 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 206 if (!pp) { 207 ERR("out of memory\n"); 208 goto end; 209 } 210 211 pp->name.Buffer = ier->name; 212 pp->name.Length = pp->name.MaximumLength = ier->n; 213 pp->orig_subvol = orig_subvol; 214 215 InsertTailList(&parts, &pp->list_entry); 216 217 if (dir == ier->dir) 218 break; 219 220 dir = ier->dir; 221 } else { 222 ERR("could not find INODE_REF for inode %I64x in subvol %I64x\n", dir, r->id); 223 goto end; 224 } 225 } 226 } 227 228 fn.MaximumLength = 0; 229 230 if (not_in_tree) { 231 le = parts.Blink; 232 while (le != &parts) { 233 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 234 LIST_ENTRY* le2 = le->Blink; 235 236 if (pp->orig_subvol) 237 break; 238 239 RemoveTailList(&parts); 240 ExFreePool(pp); 241 242 le = le2; 243 } 244 } 245 246 le = parts.Flink; 247 while (le != &parts) { 248 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 249 250 fn.MaximumLength += pp->name.Length + 1; 251 252 le = le->Flink; 253 } 254 255 fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG); 256 if (!fn.Buffer) { 257 ERR("out of memory\n"); 258 goto end; 259 } 260 261 fn.Length = 0; 262 263 le = parts.Blink; 264 while (le != &parts) { 265 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 266 267 fn.Buffer[fn.Length] = '\\'; 268 fn.Length++; 269 270 RtlCopyMemory(&fn.Buffer[fn.Length], pp->name.Buffer, pp->name.Length); 271 fn.Length += pp->name.Length; 272 273 le = le->Blink; 274 } 275 276 if (not_in_tree) 277 ERR("subvol %I64x, %.*s, offset %I64x\n", subvol, fn.Length, fn.Buffer, offset); 278 else 279 ERR("%.*s, offset %I64x\n", fn.Length, fn.Buffer, offset); 280 281 Status = utf8_to_utf16(NULL, 0, &utf16len, fn.Buffer, fn.Length); 282 if (!NT_SUCCESS(Status)) { 283 ERR("utf8_to_utf16 1 returned %08lx\n", Status); 284 ExFreePool(fn.Buffer); 285 goto end; 286 } 287 288 err = ExAllocatePoolWithTag(PagedPool, offsetof(scrub_error, data.filename[0]) + utf16len, ALLOC_TAG); 289 if (!err) { 290 ERR("out of memory\n"); 291 ExFreePool(fn.Buffer); 292 goto end; 293 } 294 295 err->address = addr; 296 err->device = devid; 297 err->recovered = false; 298 err->is_metadata = false; 299 err->parity = false; 300 301 err->data.subvol = not_in_tree ? subvol : 0; 302 err->data.offset = offset; 303 err->data.filename_length = (uint16_t)utf16len; 304 305 Status = utf8_to_utf16(err->data.filename, utf16len, &utf16len, fn.Buffer, fn.Length); 306 if (!NT_SUCCESS(Status)) { 307 ERR("utf8_to_utf16 2 returned %08lx\n", Status); 308 ExFreePool(fn.Buffer); 309 ExFreePool(err); 310 goto end; 311 } 312 313 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 314 315 Vcb->scrub.num_errors++; 316 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 317 318 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 319 320 ExFreePool(fn.Buffer); 321 322 end: 323 while (!IsListEmpty(&parts)) { 324 path_part* pp = CONTAINING_RECORD(RemoveHeadList(&parts), path_part, list_entry); 325 326 ExFreePool(pp); 327 } 328 } 329 330 static void log_file_checksum_error_shared(device_extension* Vcb, uint64_t treeaddr, uint64_t addr, uint64_t devid, uint64_t extent) { 331 tree_header* tree; 332 NTSTATUS Status; 333 leaf_node* ln; 334 ULONG i; 335 336 tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); 337 if (!tree) { 338 ERR("out of memory\n"); 339 return; 340 } 341 342 Status = read_data(Vcb, treeaddr, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority); 343 if (!NT_SUCCESS(Status)) { 344 ERR("read_data returned %08lx\n", Status); 345 goto end; 346 } 347 348 if (tree->level != 0) { 349 ERR("tree level was %x, expected 0\n", tree->level); 350 goto end; 351 } 352 353 ln = (leaf_node*)&tree[1]; 354 355 for (i = 0; i < tree->num_items; i++) { 356 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { 357 EXTENT_DATA* ed = (EXTENT_DATA*)((uint8_t*)tree + sizeof(tree_header) + ln[i].offset); 358 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 359 360 if (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0 && ed2->address == addr) 361 log_file_checksum_error(Vcb, addr, devid, tree->tree_id, ln[i].key.obj_id, ln[i].key.offset + addr - extent); 362 } 363 } 364 365 end: 366 ExFreePool(tree); 367 } 368 369 static void log_tree_checksum_error(device_extension* Vcb, uint64_t addr, uint64_t devid, uint64_t root, uint8_t level, KEY* firstitem) { 370 scrub_error* err; 371 372 err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG); 373 if (!err) { 374 ERR("out of memory\n"); 375 return; 376 } 377 378 err->address = addr; 379 err->device = devid; 380 err->recovered = false; 381 err->is_metadata = true; 382 err->parity = false; 383 384 err->metadata.root = root; 385 err->metadata.level = level; 386 387 if (firstitem) { 388 ERR("root %I64x, level %u, first item (%I64x,%x,%I64x)\n", root, level, firstitem->obj_id, 389 firstitem->obj_type, firstitem->offset); 390 391 err->metadata.firstitem = *firstitem; 392 } else { 393 ERR("root %I64x, level %u\n", root, level); 394 395 RtlZeroMemory(&err->metadata.firstitem, sizeof(KEY)); 396 } 397 398 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 399 400 Vcb->scrub.num_errors++; 401 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 402 403 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 404 } 405 406 static void log_tree_checksum_error_shared(device_extension* Vcb, uint64_t offset, uint64_t address, uint64_t devid) { 407 tree_header* tree; 408 NTSTATUS Status; 409 internal_node* in; 410 ULONG i; 411 412 tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); 413 if (!tree) { 414 ERR("out of memory\n"); 415 return; 416 } 417 418 Status = read_data(Vcb, offset, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority); 419 if (!NT_SUCCESS(Status)) { 420 ERR("read_data returned %08lx\n", Status); 421 goto end; 422 } 423 424 if (tree->level == 0) { 425 ERR("tree level was 0\n"); 426 goto end; 427 } 428 429 in = (internal_node*)&tree[1]; 430 431 for (i = 0; i < tree->num_items; i++) { 432 if (in[i].address == address) { 433 log_tree_checksum_error(Vcb, address, devid, tree->tree_id, tree->level - 1, &in[i].key); 434 break; 435 } 436 } 437 438 end: 439 ExFreePool(tree); 440 } 441 442 static void log_unrecoverable_error(device_extension* Vcb, uint64_t address, uint64_t devid) { 443 KEY searchkey; 444 traverse_ptr tp; 445 NTSTATUS Status; 446 EXTENT_ITEM* ei; 447 EXTENT_ITEM2* ei2 = NULL; 448 uint8_t* ptr; 449 ULONG len; 450 uint64_t rc; 451 452 // FIXME - still log even if rest of this function fails 453 454 searchkey.obj_id = address; 455 searchkey.obj_type = TYPE_METADATA_ITEM; 456 searchkey.offset = 0xffffffffffffffff; 457 458 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 459 if (!NT_SUCCESS(Status)) { 460 ERR("find_item returned %08lx\n", Status); 461 return; 462 } 463 464 if ((tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM) || 465 tp.item->key.obj_id >= address + Vcb->superblock.sector_size || 466 (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.obj_id + tp.item->key.offset <= address) || 467 (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->key.obj_id + Vcb->superblock.node_size <= address) 468 ) 469 return; 470 471 if (tp.item->size < sizeof(EXTENT_ITEM)) { 472 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 473 return; 474 } 475 476 ei = (EXTENT_ITEM*)tp.item->data; 477 ptr = (uint8_t*)&ei[1]; 478 len = tp.item->size - sizeof(EXTENT_ITEM); 479 480 if (tp.item->key.obj_id == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) { 481 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) { 482 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 483 tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)); 484 return; 485 } 486 487 ei2 = (EXTENT_ITEM2*)ptr; 488 489 ptr += sizeof(EXTENT_ITEM2); 490 len -= sizeof(EXTENT_ITEM2); 491 } 492 493 rc = 0; 494 495 while (len > 0) { 496 uint8_t type = *ptr; 497 498 ptr++; 499 len--; 500 501 if (type == TYPE_TREE_BLOCK_REF) { 502 TREE_BLOCK_REF* tbr; 503 504 if (len < sizeof(TREE_BLOCK_REF)) { 505 ERR("TREE_BLOCK_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(TREE_BLOCK_REF), len); 506 break; 507 } 508 509 tbr = (TREE_BLOCK_REF*)ptr; 510 511 log_tree_checksum_error(Vcb, address, devid, tbr->offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL); 512 513 rc++; 514 515 ptr += sizeof(TREE_BLOCK_REF); 516 len -= sizeof(TREE_BLOCK_REF); 517 } else if (type == TYPE_EXTENT_DATA_REF) { 518 EXTENT_DATA_REF* edr; 519 520 if (len < sizeof(EXTENT_DATA_REF)) { 521 ERR("EXTENT_DATA_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(EXTENT_DATA_REF), len); 522 break; 523 } 524 525 edr = (EXTENT_DATA_REF*)ptr; 526 527 log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id); 528 529 rc += edr->count; 530 531 ptr += sizeof(EXTENT_DATA_REF); 532 len -= sizeof(EXTENT_DATA_REF); 533 } else if (type == TYPE_SHARED_BLOCK_REF) { 534 SHARED_BLOCK_REF* sbr; 535 536 if (len < sizeof(SHARED_BLOCK_REF)) { 537 ERR("SHARED_BLOCK_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(SHARED_BLOCK_REF), len); 538 break; 539 } 540 541 sbr = (SHARED_BLOCK_REF*)ptr; 542 543 log_tree_checksum_error_shared(Vcb, sbr->offset, address, devid); 544 545 rc++; 546 547 ptr += sizeof(SHARED_BLOCK_REF); 548 len -= sizeof(SHARED_BLOCK_REF); 549 } else if (type == TYPE_SHARED_DATA_REF) { 550 SHARED_DATA_REF* sdr; 551 552 if (len < sizeof(SHARED_DATA_REF)) { 553 ERR("SHARED_DATA_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(SHARED_DATA_REF), len); 554 break; 555 } 556 557 sdr = (SHARED_DATA_REF*)ptr; 558 559 log_file_checksum_error_shared(Vcb, sdr->offset, address, devid, tp.item->key.obj_id); 560 561 rc += sdr->count; 562 563 ptr += sizeof(SHARED_DATA_REF); 564 len -= sizeof(SHARED_DATA_REF); 565 } else { 566 ERR("unknown extent type %x\n", type); 567 break; 568 } 569 } 570 571 if (rc < ei->refcount) { 572 do { 573 traverse_ptr next_tp; 574 575 if (find_next_item(Vcb, &tp, &next_tp, false, NULL)) 576 tp = next_tp; 577 else 578 break; 579 580 if (tp.item->key.obj_id == address) { 581 if (tp.item->key.obj_type == TYPE_TREE_BLOCK_REF) 582 log_tree_checksum_error(Vcb, address, devid, tp.item->key.offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL); 583 else if (tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) { 584 EXTENT_DATA_REF* edr; 585 586 if (tp.item->size < sizeof(EXTENT_DATA_REF)) { 587 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 588 tp.item->size, sizeof(EXTENT_DATA_REF)); 589 break; 590 } 591 592 edr = (EXTENT_DATA_REF*)tp.item->data; 593 594 log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id); 595 } else if (tp.item->key.obj_type == TYPE_SHARED_BLOCK_REF) 596 log_tree_checksum_error_shared(Vcb, tp.item->key.offset, address, devid); 597 else if (tp.item->key.obj_type == TYPE_SHARED_DATA_REF) 598 log_file_checksum_error_shared(Vcb, tp.item->key.offset, address, devid, tp.item->key.obj_id); 599 } else 600 break; 601 } while (true); 602 } 603 } 604 605 static void log_error(device_extension* Vcb, uint64_t addr, uint64_t devid, bool metadata, bool recoverable, bool parity) { 606 if (recoverable) { 607 scrub_error* err; 608 609 if (parity) { 610 ERR("recovering from parity error at %I64x on device %I64x\n", addr, devid); 611 } else { 612 if (metadata) 613 ERR("recovering from metadata checksum error at %I64x on device %I64x\n", addr, devid); 614 else 615 ERR("recovering from data checksum error at %I64x on device %I64x\n", addr, devid); 616 } 617 618 err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG); 619 if (!err) { 620 ERR("out of memory\n"); 621 return; 622 } 623 624 err->address = addr; 625 err->device = devid; 626 err->recovered = true; 627 err->is_metadata = metadata; 628 err->parity = parity; 629 630 if (metadata) 631 RtlZeroMemory(&err->metadata, sizeof(err->metadata)); 632 else 633 RtlZeroMemory(&err->data, sizeof(err->data)); 634 635 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 636 637 Vcb->scrub.num_errors++; 638 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 639 640 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 641 } else { 642 if (metadata) 643 ERR("unrecoverable metadata checksum error at %I64x\n", addr); 644 else 645 ERR("unrecoverable data checksum error at %I64x\n", addr); 646 647 log_unrecoverable_error(Vcb, addr, devid); 648 } 649 } 650 651 _Function_class_(IO_COMPLETION_ROUTINE) 652 static NTSTATUS __stdcall scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 653 scrub_context_stripe* stripe = conptr; 654 scrub_context* context = (scrub_context*)stripe->context; 655 ULONG left = InterlockedDecrement(&context->stripes_left); 656 657 UNUSED(DeviceObject); 658 659 stripe->iosb = Irp->IoStatus; 660 661 if (left == 0) 662 KeSetEvent(&context->Event, 0, false); 663 664 return STATUS_MORE_PROCESSING_REQUIRED; 665 } 666 667 static NTSTATUS scrub_extent_dup(device_extension* Vcb, chunk* c, uint64_t offset, void* csum, scrub_context* context) { 668 NTSTATUS Status; 669 bool csum_error = false; 670 ULONG i; 671 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 672 uint16_t present_devices = 0; 673 674 if (csum) { 675 ULONG good_stripe = 0xffffffff; 676 677 for (i = 0; i < c->chunk_item->num_stripes; i++) { 678 if (c->devices[i]->devobj) { 679 present_devices++; 680 681 // if first stripe is okay, we only need to check that the others are identical to it 682 if (good_stripe != 0xffffffff) { 683 if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf, 684 context->stripes[good_stripe].length) != context->stripes[i].length) { 685 context->stripes[i].csum_error = true; 686 csum_error = true; 687 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 688 } 689 } else { 690 Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].length >> Vcb->sector_shift, csum); 691 if (Status == STATUS_CRC_ERROR) { 692 context->stripes[i].csum_error = true; 693 csum_error = true; 694 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 695 } else if (!NT_SUCCESS(Status)) { 696 ERR("check_csum returned %08lx\n", Status); 697 return Status; 698 } else 699 good_stripe = i; 700 } 701 } 702 } 703 } else { 704 ULONG good_stripe = 0xffffffff; 705 706 for (i = 0; i < c->chunk_item->num_stripes; i++) { 707 ULONG j; 708 709 if (c->devices[i]->devobj) { 710 // if first stripe is okay, we only need to check that the others are identical to it 711 if (good_stripe != 0xffffffff) { 712 if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf, 713 context->stripes[good_stripe].length) != context->stripes[i].length) { 714 context->stripes[i].csum_error = true; 715 csum_error = true; 716 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 717 } 718 } else { 719 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 720 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 721 722 if (!check_tree_checksum(Vcb, th) || th->address != offset + UInt32x32To64(j, Vcb->superblock.node_size)) { 723 context->stripes[i].csum_error = true; 724 csum_error = true; 725 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 726 } 727 } 728 729 if (!context->stripes[i].csum_error) 730 good_stripe = i; 731 } 732 } 733 } 734 } 735 736 if (!csum_error) 737 return STATUS_SUCCESS; 738 739 // handle checksum error 740 741 for (i = 0; i < c->chunk_item->num_stripes; i++) { 742 if (context->stripes[i].csum_error) { 743 if (csum) { 744 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, (context->stripes[i].length * Vcb->csum_size) >> Vcb->sector_shift, ALLOC_TAG); 745 if (!context->stripes[i].bad_csums) { 746 ERR("out of memory\n"); 747 return STATUS_INSUFFICIENT_RESOURCES; 748 } 749 750 do_calc_job(Vcb, context->stripes[i].buf, context->stripes[i].length >> Vcb->sector_shift, context->stripes[i].bad_csums); 751 } else { 752 ULONG j; 753 754 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, (context->stripes[i].length * Vcb->csum_size) >> Vcb->sector_shift, ALLOC_TAG); 755 if (!context->stripes[i].bad_csums) { 756 ERR("out of memory\n"); 757 return STATUS_INSUFFICIENT_RESOURCES; 758 } 759 760 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 761 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 762 763 get_tree_checksum(Vcb, th, (uint8_t*)context->stripes[i].bad_csums + (Vcb->csum_size * j)); 764 } 765 } 766 } 767 } 768 769 if (present_devices > 1) { 770 ULONG good_stripe = 0xffffffff; 771 772 for (i = 0; i < c->chunk_item->num_stripes; i++) { 773 if (c->devices[i]->devobj && !context->stripes[i].csum_error) { 774 good_stripe = i; 775 break; 776 } 777 } 778 779 if (good_stripe != 0xffffffff) { 780 // log 781 782 for (i = 0; i < c->chunk_item->num_stripes; i++) { 783 if (context->stripes[i].csum_error) { 784 ULONG j; 785 786 if (csum) { 787 for (j = 0; j < context->stripes[i].length >> Vcb->sector_shift; j++) { 788 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j + Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) { 789 uint64_t addr = offset + ((uint64_t)j << Vcb->sector_shift); 790 791 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false); 792 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 793 } 794 } 795 } else { 796 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 797 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 798 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 799 800 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) { 801 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false); 802 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 803 } 804 } 805 } 806 } 807 } 808 809 // write good data over bad 810 811 for (i = 0; i < c->chunk_item->num_stripes; i++) { 812 if (context->stripes[i].csum_error && !c->devices[i]->readonly) { 813 Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset, 814 context->stripes[good_stripe].buf, context->stripes[i].length); 815 816 if (!NT_SUCCESS(Status)) { 817 ERR("write_data_phys returned %08lx\n", Status); 818 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS); 819 return Status; 820 } 821 } 822 } 823 824 return STATUS_SUCCESS; 825 } 826 827 // if csum errors on all stripes, check sector by sector 828 829 for (i = 0; i < c->chunk_item->num_stripes; i++) { 830 if (c->devices[i]->devobj) { 831 if (csum) { 832 for (ULONG j = 0; j < context->stripes[i].length >> Vcb->sector_shift; j++) { 833 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) { 834 ULONG k; 835 uint64_t addr = offset + ((uint64_t)j << Vcb->sector_shift); 836 bool recovered = false; 837 838 for (k = 0; k < c->chunk_item->num_stripes; k++) { 839 if (i != k && c->devices[k]->devobj && 840 RtlCompareMemory((uint8_t*)context->stripes[k].bad_csums + (j * Vcb->csum_size), 841 (uint8_t*)csum + (j * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) { 842 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false); 843 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 844 845 RtlCopyMemory(context->stripes[i].buf + (j << Vcb->sector_shift), 846 context->stripes[k].buf + (j << Vcb->sector_shift), Vcb->superblock.sector_size); 847 848 recovered = true; 849 break; 850 } 851 } 852 853 if (!recovered) { 854 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false); 855 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 856 } 857 } 858 } 859 } else { 860 for (ULONG j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 861 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 862 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 863 864 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) { 865 ULONG k; 866 bool recovered = false; 867 868 for (k = 0; k < c->chunk_item->num_stripes; k++) { 869 if (i != k && c->devices[k]->devobj) { 870 tree_header* th2 = (tree_header*)&context->stripes[k].buf[j * Vcb->superblock.node_size]; 871 872 if (RtlCompareMemory((uint8_t*)context->stripes[k].bad_csums + (j * Vcb->csum_size), th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr) { 873 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false); 874 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 875 876 RtlCopyMemory(th, th2, Vcb->superblock.node_size); 877 878 recovered = true; 879 break; 880 } 881 } 882 } 883 884 if (!recovered) { 885 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false); 886 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 887 } 888 } 889 } 890 } 891 } 892 } 893 894 // write good data over bad 895 896 for (i = 0; i < c->chunk_item->num_stripes; i++) { 897 if (c->devices[i]->devobj && !c->devices[i]->readonly) { 898 Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset, 899 context->stripes[i].buf, context->stripes[i].length); 900 if (!NT_SUCCESS(Status)) { 901 ERR("write_data_phys returned %08lx\n", Status); 902 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 903 return Status; 904 } 905 } 906 } 907 908 return STATUS_SUCCESS; 909 } 910 911 for (i = 0; i < c->chunk_item->num_stripes; i++) { 912 if (c->devices[i]->devobj) { 913 ULONG j; 914 915 if (csum) { 916 for (j = 0; j < context->stripes[i].length >> Vcb->sector_shift; j++) { 917 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j + Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) { 918 uint64_t addr = offset + ((uint64_t)j << Vcb->sector_shift); 919 920 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false); 921 } 922 } 923 } else { 924 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 925 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 926 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 927 928 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) 929 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false); 930 } 931 } 932 } 933 } 934 935 return STATUS_SUCCESS; 936 } 937 938 static NTSTATUS scrub_extent_raid0(device_extension* Vcb, chunk* c, uint64_t offset, uint32_t length, uint16_t startoffstripe, void* csum, scrub_context* context) { 939 ULONG j; 940 uint16_t stripe; 941 uint32_t pos, *stripeoff; 942 943 pos = 0; 944 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes, ALLOC_TAG); 945 if (!stripeoff) { 946 ERR("out of memory\n"); 947 return STATUS_INSUFFICIENT_RESOURCES; 948 } 949 950 RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes); 951 952 stripe = startoffstripe; 953 while (pos < length) { 954 uint32_t readlen; 955 956 if (pos == 0) 957 readlen = (uint32_t)min(context->stripes[stripe].length, c->chunk_item->stripe_length - (context->stripes[stripe].start % c->chunk_item->stripe_length)); 958 else 959 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 960 961 if (csum) { 962 for (j = 0; j < readlen; j += Vcb->superblock.sector_size) { 963 if (!check_sector_csum(Vcb, context->stripes[stripe].buf + stripeoff[stripe], (uint8_t*)csum + ((pos * Vcb->csum_size) >> Vcb->sector_shift))) { 964 uint64_t addr = offset + pos; 965 966 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false); 967 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 968 } 969 970 pos += Vcb->superblock.sector_size; 971 stripeoff[stripe] += Vcb->superblock.sector_size; 972 } 973 } else { 974 for (j = 0; j < readlen; j += Vcb->superblock.node_size) { 975 tree_header* th = (tree_header*)(context->stripes[stripe].buf + stripeoff[stripe]); 976 uint64_t addr = offset + pos; 977 978 if (!check_tree_checksum(Vcb, th) || th->address != addr) { 979 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false); 980 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 981 } 982 983 pos += Vcb->superblock.node_size; 984 stripeoff[stripe] += Vcb->superblock.node_size; 985 } 986 } 987 988 stripe = (stripe + 1) % c->chunk_item->num_stripes; 989 } 990 991 ExFreePool(stripeoff); 992 993 return STATUS_SUCCESS; 994 } 995 996 static NTSTATUS scrub_extent_raid10(device_extension* Vcb, chunk* c, uint64_t offset, uint32_t length, uint16_t startoffstripe, void* csum, scrub_context* context) { 997 ULONG j; 998 uint16_t stripe, sub_stripes = max(c->chunk_item->sub_stripes, 1); 999 uint32_t pos, *stripeoff; 1000 bool csum_error = false; 1001 NTSTATUS Status; 1002 1003 pos = 0; 1004 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes, ALLOC_TAG); 1005 if (!stripeoff) { 1006 ERR("out of memory\n"); 1007 return STATUS_INSUFFICIENT_RESOURCES; 1008 } 1009 1010 RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes); 1011 1012 stripe = startoffstripe; 1013 while (pos < length) { 1014 uint32_t readlen; 1015 1016 if (pos == 0) 1017 readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length, 1018 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1019 else 1020 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 1021 1022 if (csum) { 1023 ULONG good_stripe = 0xffffffff; 1024 uint16_t k; 1025 1026 for (k = 0; k < sub_stripes; k++) { 1027 if (c->devices[(stripe * sub_stripes) + k]->devobj) { 1028 // if first stripe is okay, we only need to check that the others are identical to it 1029 if (good_stripe != 0xffffffff) { 1030 if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe], 1031 context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe], 1032 readlen) != readlen) { 1033 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1034 csum_error = true; 1035 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1036 } 1037 } else { 1038 for (j = 0; j < readlen; j += Vcb->superblock.sector_size) { 1039 if (!check_sector_csum(Vcb, context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j, 1040 (uint8_t*)csum + (((pos + j) * Vcb->csum_size) >> Vcb->sector_shift))) { 1041 csum_error = true; 1042 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1043 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1044 break; 1045 } 1046 } 1047 1048 if (!context->stripes[(stripe * sub_stripes) + k].csum_error) 1049 good_stripe = k; 1050 } 1051 } 1052 } 1053 1054 pos += readlen; 1055 stripeoff[stripe] += readlen; 1056 } else { 1057 ULONG good_stripe = 0xffffffff; 1058 uint16_t k; 1059 1060 for (k = 0; k < sub_stripes; k++) { 1061 if (c->devices[(stripe * sub_stripes) + k]->devobj) { 1062 // if first stripe is okay, we only need to check that the others are identical to it 1063 if (good_stripe != 0xffffffff) { 1064 if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe], 1065 context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe], 1066 readlen) != readlen) { 1067 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1068 csum_error = true; 1069 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1070 } 1071 } else { 1072 for (j = 0; j < readlen; j += Vcb->superblock.node_size) { 1073 tree_header* th = (tree_header*)(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j); 1074 uint64_t addr = offset + pos + j; 1075 1076 if (!check_tree_checksum(Vcb, th) || th->address != addr) { 1077 csum_error = true; 1078 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1079 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1080 break; 1081 } 1082 } 1083 1084 if (!context->stripes[(stripe * sub_stripes) + k].csum_error) 1085 good_stripe = k; 1086 } 1087 } 1088 } 1089 1090 pos += readlen; 1091 stripeoff[stripe] += readlen; 1092 } 1093 1094 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1095 } 1096 1097 if (!csum_error) { 1098 Status = STATUS_SUCCESS; 1099 goto end; 1100 } 1101 1102 for (j = 0; j < c->chunk_item->num_stripes; j += sub_stripes) { 1103 ULONG goodstripe = 0xffffffff; 1104 uint16_t k; 1105 bool hasbadstripe = false; 1106 1107 if (context->stripes[j].length == 0) 1108 continue; 1109 1110 for (k = 0; k < sub_stripes; k++) { 1111 if (c->devices[j + k]->devobj) { 1112 if (!context->stripes[j + k].csum_error) 1113 goodstripe = k; 1114 else 1115 hasbadstripe = true; 1116 } 1117 } 1118 1119 if (hasbadstripe) { 1120 if (goodstripe != 0xffffffff) { 1121 for (k = 0; k < sub_stripes; k++) { 1122 if (c->devices[j + k]->devobj && context->stripes[j + k].csum_error) { 1123 uint32_t so = 0; 1124 bool recovered = false; 1125 1126 pos = 0; 1127 1128 stripe = startoffstripe; 1129 while (pos < length) { 1130 uint32_t readlen; 1131 1132 if (pos == 0) 1133 readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length, 1134 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1135 else 1136 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 1137 1138 if (stripe == j / sub_stripes) { 1139 if (csum) { 1140 ULONG l; 1141 1142 for (l = 0; l < readlen; l += Vcb->superblock.sector_size) { 1143 if (RtlCompareMemory(context->stripes[j + k].buf + so, 1144 context->stripes[j + goodstripe].buf + so, 1145 Vcb->superblock.sector_size) != Vcb->superblock.sector_size) { 1146 uint64_t addr = offset + pos; 1147 1148 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false); 1149 1150 recovered = true; 1151 } 1152 1153 pos += Vcb->superblock.sector_size; 1154 so += Vcb->superblock.sector_size; 1155 } 1156 } else { 1157 ULONG l; 1158 1159 for (l = 0; l < readlen; l += Vcb->superblock.node_size) { 1160 if (RtlCompareMemory(context->stripes[j + k].buf + so, 1161 context->stripes[j + goodstripe].buf + so, 1162 Vcb->superblock.node_size) != Vcb->superblock.node_size) { 1163 uint64_t addr = offset + pos; 1164 1165 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false); 1166 1167 recovered = true; 1168 } 1169 1170 pos += Vcb->superblock.node_size; 1171 so += Vcb->superblock.node_size; 1172 } 1173 } 1174 } else 1175 pos += readlen; 1176 1177 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1178 } 1179 1180 if (recovered) { 1181 // write good data over bad 1182 1183 if (!c->devices[j + k]->readonly) { 1184 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1185 1186 Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset, 1187 context->stripes[j + goodstripe].buf, context->stripes[j + goodstripe].length); 1188 1189 if (!NT_SUCCESS(Status)) { 1190 ERR("write_data_phys returned %08lx\n", Status); 1191 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS); 1192 goto end; 1193 } 1194 } 1195 } 1196 } 1197 } 1198 } else { 1199 uint32_t so = 0; 1200 bool recovered = false; 1201 1202 if (csum) { 1203 for (k = 0; k < sub_stripes; k++) { 1204 if (c->devices[j + k]->devobj) { 1205 context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, (context->stripes[j + k].length * Vcb->csum_size) >> Vcb->sector_shift, 1206 ALLOC_TAG); 1207 if (!context->stripes[j + k].bad_csums) { 1208 ERR("out of memory\n"); 1209 Status = STATUS_INSUFFICIENT_RESOURCES; 1210 goto end; 1211 } 1212 1213 do_calc_job(Vcb, context->stripes[j + k].buf, context->stripes[j + k].length >> Vcb->sector_shift, context->stripes[j + k].bad_csums); 1214 } 1215 } 1216 } else { 1217 for (k = 0; k < sub_stripes; k++) { 1218 if (c->devices[j + k]->devobj) { 1219 ULONG l; 1220 1221 context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * Vcb->csum_size / Vcb->superblock.node_size, 1222 ALLOC_TAG); 1223 if (!context->stripes[j + k].bad_csums) { 1224 ERR("out of memory\n"); 1225 Status = STATUS_INSUFFICIENT_RESOURCES; 1226 goto end; 1227 } 1228 1229 for (l = 0; l < context->stripes[j + k].length / Vcb->superblock.node_size; l++) { 1230 tree_header* th = (tree_header*)&context->stripes[j + k].buf[l * Vcb->superblock.node_size]; 1231 1232 get_tree_checksum(Vcb, th, (uint8_t*)context->stripes[j + k].bad_csums + (Vcb->csum_size * l)); 1233 } 1234 } 1235 } 1236 } 1237 1238 pos = 0; 1239 1240 stripe = startoffstripe; 1241 while (pos < length) { 1242 uint32_t readlen; 1243 1244 if (pos == 0) 1245 readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length, 1246 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1247 else 1248 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 1249 1250 if (stripe == j / sub_stripes) { 1251 ULONG l; 1252 1253 if (csum) { 1254 for (l = 0; l < readlen; l += Vcb->superblock.sector_size) { 1255 bool has_error = false; 1256 1257 goodstripe = 0xffffffff; 1258 for (k = 0; k < sub_stripes; k++) { 1259 if (c->devices[j + k]->devobj) { 1260 if (RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + ((so * Vcb->csum_size) >> Vcb->sector_shift), 1261 (uint8_t*)csum + ((pos * Vcb->csum_size) >> Vcb->sector_shift), 1262 Vcb->csum_size) != Vcb->csum_size) { 1263 has_error = true; 1264 } else 1265 goodstripe = k; 1266 } 1267 } 1268 1269 if (has_error) { 1270 if (goodstripe != 0xffffffff) { 1271 for (k = 0; k < sub_stripes; k++) { 1272 if (c->devices[j + k]->devobj && 1273 RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + ((so * Vcb->csum_size) >> Vcb->sector_shift), 1274 (uint8_t*)csum + ((pos * Vcb->csum_size) >> Vcb->sector_shift), 1275 Vcb->csum_size) != Vcb->csum_size) { 1276 uint64_t addr = offset + pos; 1277 1278 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false); 1279 1280 recovered = true; 1281 1282 RtlCopyMemory(context->stripes[j + k].buf + so, context->stripes[j + goodstripe].buf + so, 1283 Vcb->superblock.sector_size); 1284 } 1285 } 1286 } else { 1287 uint64_t addr = offset + pos; 1288 1289 for (k = 0; k < sub_stripes; k++) { 1290 if (c->devices[j + j]->devobj) { 1291 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, false, false); 1292 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1293 } 1294 } 1295 } 1296 } 1297 1298 pos += Vcb->superblock.sector_size; 1299 so += Vcb->superblock.sector_size; 1300 } 1301 } else { 1302 for (l = 0; l < readlen; l += Vcb->superblock.node_size) { 1303 for (k = 0; k < sub_stripes; k++) { 1304 if (c->devices[j + k]->devobj) { 1305 tree_header* th = (tree_header*)&context->stripes[j + k].buf[so]; 1306 uint64_t addr = offset + pos; 1307 1308 if (RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + (so * Vcb->csum_size / Vcb->superblock.node_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) { 1309 ULONG m; 1310 1311 recovered = false; 1312 1313 for (m = 0; m < sub_stripes; m++) { 1314 if (m != k) { 1315 tree_header* th2 = (tree_header*)&context->stripes[j + m].buf[so]; 1316 1317 if (RtlCompareMemory((uint8_t*)context->stripes[j + m].bad_csums + (so * Vcb->csum_size / Vcb->superblock.node_size), th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr) { 1318 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false); 1319 1320 RtlCopyMemory(th, th2, Vcb->superblock.node_size); 1321 1322 recovered = true; 1323 break; 1324 } else 1325 log_device_error(Vcb, c->devices[j + m], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1326 } 1327 } 1328 1329 if (!recovered) 1330 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, false, false); 1331 } 1332 } 1333 } 1334 1335 pos += Vcb->superblock.node_size; 1336 so += Vcb->superblock.node_size; 1337 } 1338 } 1339 } else 1340 pos += readlen; 1341 1342 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1343 } 1344 1345 if (recovered) { 1346 // write good data over bad 1347 1348 for (k = 0; k < sub_stripes; k++) { 1349 if (c->devices[j + k]->devobj && !c->devices[j + k]->readonly) { 1350 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1351 1352 Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset, 1353 context->stripes[j + k].buf, context->stripes[j + k].length); 1354 1355 if (!NT_SUCCESS(Status)) { 1356 ERR("write_data_phys returned %08lx\n", Status); 1357 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS); 1358 goto end; 1359 } 1360 } 1361 } 1362 } 1363 } 1364 } 1365 } 1366 1367 Status = STATUS_SUCCESS; 1368 1369 end: 1370 ExFreePool(stripeoff); 1371 1372 return Status; 1373 } 1374 1375 static NTSTATUS scrub_extent(device_extension* Vcb, chunk* c, ULONG type, uint64_t offset, uint32_t size, void* csum) { 1376 ULONG i; 1377 scrub_context context; 1378 CHUNK_ITEM_STRIPE* cis; 1379 NTSTATUS Status; 1380 uint16_t startoffstripe = 0, num_missing, allowed_missing; 1381 1382 TRACE("(%p, %p, %lx, %I64x, %x, %p)\n", Vcb, c, type, offset, size, csum); 1383 1384 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes, ALLOC_TAG); 1385 if (!context.stripes) { 1386 ERR("out of memory\n"); 1387 Status = STATUS_INSUFFICIENT_RESOURCES; 1388 goto end; 1389 } 1390 1391 RtlZeroMemory(context.stripes, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes); 1392 1393 context.stripes_left = 0; 1394 1395 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1396 1397 if (type == BLOCK_FLAG_RAID0) { 1398 uint64_t startoff, endoff; 1399 uint16_t endoffstripe; 1400 1401 get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe); 1402 get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe); 1403 1404 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1405 if (startoffstripe > i) 1406 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 1407 else if (startoffstripe == i) 1408 context.stripes[i].start = startoff; 1409 else 1410 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); 1411 1412 if (endoffstripe > i) 1413 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start); 1414 else if (endoffstripe == i) 1415 context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start); 1416 else 1417 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start); 1418 } 1419 1420 allowed_missing = 0; 1421 } else if (type == BLOCK_FLAG_RAID10) { 1422 uint64_t startoff, endoff; 1423 uint16_t endoffstripe, j, sub_stripes = max(c->chunk_item->sub_stripes, 1); 1424 1425 get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe); 1426 get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe); 1427 1428 if ((c->chunk_item->num_stripes % sub_stripes) != 0) { 1429 ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", c->offset, c->chunk_item->num_stripes, sub_stripes); 1430 Status = STATUS_INTERNAL_ERROR; 1431 goto end; 1432 } 1433 1434 startoffstripe *= sub_stripes; 1435 endoffstripe *= sub_stripes; 1436 1437 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) { 1438 if (startoffstripe > i) 1439 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 1440 else if (startoffstripe == i) 1441 context.stripes[i].start = startoff; 1442 else 1443 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); 1444 1445 if (endoffstripe > i) 1446 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start); 1447 else if (endoffstripe == i) 1448 context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start); 1449 else 1450 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start); 1451 1452 for (j = 1; j < sub_stripes; j++) { 1453 context.stripes[i+j].start = context.stripes[i].start; 1454 context.stripes[i+j].length = context.stripes[i].length; 1455 } 1456 } 1457 1458 startoffstripe /= sub_stripes; 1459 allowed_missing = 1; 1460 } else 1461 allowed_missing = c->chunk_item->num_stripes - 1; 1462 1463 num_missing = 0; 1464 1465 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1466 PIO_STACK_LOCATION IrpSp; 1467 1468 context.stripes[i].context = (struct _scrub_context*)&context; 1469 1470 if (type == BLOCK_FLAG_DUPLICATE) { 1471 context.stripes[i].start = offset - c->offset; 1472 context.stripes[i].length = size; 1473 } else if (type != BLOCK_FLAG_RAID0 && type != BLOCK_FLAG_RAID10) { 1474 ERR("unexpected chunk type %lx\n", type); 1475 Status = STATUS_INTERNAL_ERROR; 1476 goto end; 1477 } 1478 1479 if (!c->devices[i]->devobj) { 1480 num_missing++; 1481 1482 if (num_missing > allowed_missing) { 1483 ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing, allowed_missing); 1484 Status = STATUS_INTERNAL_ERROR; 1485 goto end; 1486 } 1487 } else if (context.stripes[i].length > 0) { 1488 context.stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG); 1489 1490 if (!context.stripes[i].buf) { 1491 ERR("out of memory\n"); 1492 Status = STATUS_INSUFFICIENT_RESOURCES; 1493 goto end; 1494 } 1495 1496 context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false); 1497 1498 if (!context.stripes[i].Irp) { 1499 ERR("IoAllocateIrp failed\n"); 1500 Status = STATUS_INSUFFICIENT_RESOURCES; 1501 goto end; 1502 } 1503 1504 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 1505 IrpSp->MajorFunction = IRP_MJ_READ; 1506 IrpSp->FileObject = c->devices[i]->fileobj; 1507 1508 if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) { 1509 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG); 1510 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 1511 ERR("out of memory\n"); 1512 Status = STATUS_INSUFFICIENT_RESOURCES; 1513 goto end; 1514 } 1515 1516 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 1517 1518 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 1519 } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) { 1520 context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, context.stripes[i].length, false, false, NULL); 1521 if (!context.stripes[i].Irp->MdlAddress) { 1522 ERR("IoAllocateMdl failed\n"); 1523 Status = STATUS_INSUFFICIENT_RESOURCES; 1524 goto end; 1525 } 1526 1527 Status = STATUS_SUCCESS; 1528 1529 _SEH2_TRY { 1530 MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); 1531 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1532 Status = _SEH2_GetExceptionCode(); 1533 } _SEH2_END; 1534 1535 if (!NT_SUCCESS(Status)) { 1536 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 1537 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 1538 context.stripes[i].Irp->MdlAddress = NULL; 1539 goto end; 1540 } 1541 } else 1542 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 1543 1544 IrpSp->Parameters.Read.Length = context.stripes[i].length; 1545 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].start + cis[i].offset; 1546 1547 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 1548 1549 IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion, &context.stripes[i], true, true, true); 1550 1551 context.stripes_left++; 1552 1553 Vcb->scrub.data_scrubbed += context.stripes[i].length; 1554 } 1555 } 1556 1557 if (context.stripes_left == 0) { 1558 ERR("error - not reading any stripes\n"); 1559 Status = STATUS_INTERNAL_ERROR; 1560 goto end; 1561 } 1562 1563 KeInitializeEvent(&context.Event, NotificationEvent, false); 1564 1565 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1566 if (c->devices[i]->devobj && context.stripes[i].length > 0) 1567 IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp); 1568 } 1569 1570 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 1571 1572 // return an error if any of the stripes returned an error 1573 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1574 if (!NT_SUCCESS(context.stripes[i].iosb.Status)) { 1575 Status = context.stripes[i].iosb.Status; 1576 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS); 1577 goto end; 1578 } 1579 } 1580 1581 if (type == BLOCK_FLAG_DUPLICATE) { 1582 Status = scrub_extent_dup(Vcb, c, offset, csum, &context); 1583 if (!NT_SUCCESS(Status)) { 1584 ERR("scrub_extent_dup returned %08lx\n", Status); 1585 goto end; 1586 } 1587 } else if (type == BLOCK_FLAG_RAID0) { 1588 Status = scrub_extent_raid0(Vcb, c, offset, size, startoffstripe, csum, &context); 1589 if (!NT_SUCCESS(Status)) { 1590 ERR("scrub_extent_raid0 returned %08lx\n", Status); 1591 goto end; 1592 } 1593 } else if (type == BLOCK_FLAG_RAID10) { 1594 Status = scrub_extent_raid10(Vcb, c, offset, size, startoffstripe, csum, &context); 1595 if (!NT_SUCCESS(Status)) { 1596 ERR("scrub_extent_raid10 returned %08lx\n", Status); 1597 goto end; 1598 } 1599 } 1600 1601 end: 1602 if (context.stripes) { 1603 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1604 if (context.stripes[i].Irp) { 1605 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) { 1606 MmUnlockPages(context.stripes[i].Irp->MdlAddress); 1607 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 1608 } 1609 IoFreeIrp(context.stripes[i].Irp); 1610 } 1611 1612 if (context.stripes[i].buf) 1613 ExFreePool(context.stripes[i].buf); 1614 1615 if (context.stripes[i].bad_csums) 1616 ExFreePool(context.stripes[i].bad_csums); 1617 } 1618 1619 ExFreePool(context.stripes); 1620 } 1621 1622 return Status; 1623 } 1624 1625 static NTSTATUS scrub_data_extent(device_extension* Vcb, chunk* c, uint64_t offset, ULONG type, void* csum, RTL_BITMAP* bmp, ULONG bmplen) { 1626 NTSTATUS Status; 1627 ULONG runlength, index; 1628 1629 runlength = RtlFindFirstRunClear(bmp, &index); 1630 1631 while (runlength != 0) { 1632 if (index >= bmplen) 1633 break; 1634 1635 if (index + runlength >= bmplen) { 1636 runlength = bmplen - index; 1637 1638 if (runlength == 0) 1639 break; 1640 } 1641 1642 do { 1643 ULONG rl; 1644 1645 if (runlength << Vcb->sector_shift > SCRUB_UNIT) 1646 rl = SCRUB_UNIT >> Vcb->sector_shift; 1647 else 1648 rl = runlength; 1649 1650 Status = scrub_extent(Vcb, c, type, offset + ((uint64_t)index << Vcb->sector_shift), 1651 rl << Vcb->sector_shift, (uint8_t*)csum + (index * Vcb->csum_size)); 1652 if (!NT_SUCCESS(Status)) { 1653 ERR("scrub_data_extent_dup returned %08lx\n", Status); 1654 return Status; 1655 } 1656 1657 runlength -= rl; 1658 index += rl; 1659 } while (runlength > 0); 1660 1661 runlength = RtlFindNextForwardRunClear(bmp, index, &index); 1662 } 1663 1664 return STATUS_SUCCESS; 1665 } 1666 1667 typedef struct { 1668 uint8_t* buf; 1669 PIRP Irp; 1670 void* context; 1671 IO_STATUS_BLOCK iosb; 1672 uint64_t offset; 1673 bool rewrite, missing; 1674 RTL_BITMAP error; 1675 ULONG* errorarr; 1676 } scrub_context_raid56_stripe; 1677 1678 typedef struct { 1679 scrub_context_raid56_stripe* stripes; 1680 LONG stripes_left; 1681 KEVENT Event; 1682 RTL_BITMAP alloc; 1683 RTL_BITMAP has_csum; 1684 RTL_BITMAP is_tree; 1685 void* csum; 1686 uint8_t* parity_scratch; 1687 uint8_t* parity_scratch2; 1688 } scrub_context_raid56; 1689 1690 _Function_class_(IO_COMPLETION_ROUTINE) 1691 static NTSTATUS __stdcall scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 1692 scrub_context_raid56_stripe* stripe = conptr; 1693 scrub_context_raid56* context = (scrub_context_raid56*)stripe->context; 1694 LONG left = InterlockedDecrement(&context->stripes_left); 1695 1696 UNUSED(DeviceObject); 1697 1698 stripe->iosb = Irp->IoStatus; 1699 1700 if (left == 0) 1701 KeSetEvent(&context->Event, 0, false); 1702 1703 return STATUS_MORE_PROCESSING_REQUIRED; 1704 } 1705 1706 static void scrub_raid5_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, uint64_t stripe_start, uint64_t bit_start, 1707 uint64_t num, uint16_t missing_devices) { 1708 ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length >> Vcb->sector_shift), off; 1709 uint16_t stripe, parity = (bit_start + num + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; 1710 uint64_t stripeoff; 1711 1712 stripe = (parity + 1) % c->chunk_item->num_stripes; 1713 off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1); 1714 stripeoff = num * sectors_per_stripe; 1715 1716 if (missing_devices == 0) 1717 RtlCopyMemory(context->parity_scratch, &context->stripes[parity].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1718 1719 while (stripe != parity) { 1720 RtlClearAllBits(&context->stripes[stripe].error); 1721 1722 for (ULONG i = 0; i < sectors_per_stripe; i++) { 1723 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 1724 if (RtlCheckBit(&context->is_tree, off)) { 1725 tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff << Vcb->sector_shift]; 1726 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift); 1727 1728 if (!check_tree_checksum(Vcb, th) || th->address != addr) { 1729 RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size >> Vcb->sector_shift); 1730 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1731 1732 if (missing_devices > 0) 1733 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false); 1734 } 1735 1736 off += Vcb->superblock.node_size >> Vcb->sector_shift; 1737 stripeoff += Vcb->superblock.node_size >> Vcb->sector_shift; 1738 i += (Vcb->superblock.node_size >> Vcb->sector_shift) - 1; 1739 1740 continue; 1741 } else if (RtlCheckBit(&context->has_csum, off)) { 1742 if (!check_sector_csum(Vcb, context->stripes[stripe].buf + (stripeoff << Vcb->sector_shift), (uint8_t*)context->csum + (Vcb->csum_size * off))) { 1743 RtlSetBit(&context->stripes[stripe].error, i); 1744 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1745 1746 if (missing_devices > 0) { 1747 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift); 1748 1749 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false); 1750 } 1751 } 1752 } 1753 } 1754 1755 off++; 1756 stripeoff++; 1757 } 1758 1759 if (missing_devices == 0) 1760 do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1761 1762 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1763 stripeoff = num * sectors_per_stripe; 1764 } 1765 1766 // check parity 1767 1768 if (missing_devices == 0) { 1769 RtlClearAllBits(&context->stripes[parity].error); 1770 1771 for (ULONG i = 0; i < sectors_per_stripe; i++) { 1772 ULONG o, j; 1773 1774 o = i << Vcb->sector_shift; 1775 for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE 1776 if (context->parity_scratch[o] != 0) { 1777 RtlSetBit(&context->stripes[parity].error, i); 1778 break; 1779 } 1780 o++; 1781 } 1782 } 1783 } 1784 1785 // log and fix errors 1786 1787 if (missing_devices > 0) 1788 return; 1789 1790 for (ULONG i = 0; i < sectors_per_stripe; i++) { 1791 ULONG num_errors = 0, bad_off = 0; 1792 uint64_t bad_stripe = 0; 1793 bool alloc = false; 1794 1795 stripe = (parity + 1) % c->chunk_item->num_stripes; 1796 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1797 1798 while (stripe != parity) { 1799 if (RtlCheckBit(&context->alloc, off)) { 1800 alloc = true; 1801 1802 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 1803 bad_stripe = stripe; 1804 bad_off = off; 1805 num_errors++; 1806 } 1807 } 1808 1809 off += sectors_per_stripe; 1810 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1811 } 1812 1813 if (!alloc) 1814 continue; 1815 1816 if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity].error, i)) // everything fine 1817 continue; 1818 1819 if (num_errors == 0 && RtlCheckBit(&context->stripes[parity].error, i)) { // parity error 1820 uint64_t addr; 1821 1822 do_xor(&context->stripes[parity].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 1823 &context->parity_scratch[i << Vcb->sector_shift], 1824 Vcb->superblock.sector_size); 1825 1826 bad_off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1827 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off << Vcb->sector_shift); 1828 1829 context->stripes[parity].rewrite = true; 1830 1831 log_error(Vcb, addr, c->devices[parity]->devitem.dev_id, false, true, true); 1832 log_device_error(Vcb, c->devices[parity], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1833 } else if (num_errors == 1) { 1834 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off << Vcb->sector_shift); 1835 1836 if (RtlCheckBit(&context->is_tree, bad_off)) { 1837 tree_header* th; 1838 1839 do_xor(&context->parity_scratch[i << Vcb->sector_shift], 1840 &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 1841 Vcb->superblock.node_size); 1842 1843 th = (tree_header*)&context->parity_scratch[i << Vcb->sector_shift]; 1844 1845 if (check_tree_checksum(Vcb, th) && th->address == addr) { 1846 RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 1847 &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.node_size); 1848 1849 context->stripes[bad_stripe].rewrite = true; 1850 1851 RtlClearBits(&context->stripes[bad_stripe].error, i + 1, (Vcb->superblock.node_size >> Vcb->sector_shift) - 1); 1852 1853 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, true, false); 1854 } else 1855 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, false, false); 1856 } else { 1857 uint8_t hash[MAX_HASH_SIZE]; 1858 1859 do_xor(&context->parity_scratch[i << Vcb->sector_shift], 1860 &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 1861 Vcb->superblock.sector_size); 1862 1863 get_sector_csum(Vcb, &context->parity_scratch[i << Vcb->sector_shift], hash); 1864 1865 if (RtlCompareMemory(hash, (uint8_t*)context->csum + (Vcb->csum_size * bad_off), Vcb->csum_size) == Vcb->csum_size) { 1866 RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 1867 &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.sector_size); 1868 1869 context->stripes[bad_stripe].rewrite = true; 1870 1871 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, true, false); 1872 } else 1873 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, false, false); 1874 } 1875 } else { 1876 stripe = (parity + 1) % c->chunk_item->num_stripes; 1877 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1878 1879 while (stripe != parity) { 1880 if (RtlCheckBit(&context->alloc, off)) { 1881 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 1882 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift); 1883 1884 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false); 1885 } 1886 } 1887 1888 off += sectors_per_stripe; 1889 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1890 } 1891 } 1892 } 1893 } 1894 1895 static void scrub_raid6_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, uint64_t stripe_start, uint64_t bit_start, 1896 uint64_t num, uint16_t missing_devices) { 1897 ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length >> Vcb->sector_shift), off; 1898 uint16_t stripe, parity1 = (bit_start + num + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes; 1899 uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes; 1900 uint64_t stripeoff; 1901 1902 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 1903 off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2); 1904 stripeoff = num * sectors_per_stripe; 1905 1906 if (c->devices[parity1]->devobj) 1907 RtlCopyMemory(context->parity_scratch, &context->stripes[parity1].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1908 1909 if (c->devices[parity2]->devobj) 1910 RtlZeroMemory(context->parity_scratch2, (ULONG)c->chunk_item->stripe_length); 1911 1912 while (stripe != parity1) { 1913 RtlClearAllBits(&context->stripes[stripe].error); 1914 1915 for (ULONG i = 0; i < sectors_per_stripe; i++) { 1916 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 1917 if (RtlCheckBit(&context->is_tree, off)) { 1918 tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff << Vcb->sector_shift]; 1919 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift); 1920 1921 if (!check_tree_checksum(Vcb, th) || th->address != addr) { 1922 RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size >> Vcb->sector_shift); 1923 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1924 1925 if (missing_devices == 2) 1926 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false); 1927 } 1928 1929 off += Vcb->superblock.node_size >> Vcb->sector_shift; 1930 stripeoff += Vcb->superblock.node_size >> Vcb->sector_shift; 1931 i += (Vcb->superblock.node_size >> Vcb->sector_shift) - 1; 1932 1933 continue; 1934 } else if (RtlCheckBit(&context->has_csum, off)) { 1935 uint8_t hash[MAX_HASH_SIZE]; 1936 1937 get_sector_csum(Vcb, context->stripes[stripe].buf + (stripeoff << Vcb->sector_shift), hash); 1938 1939 if (RtlCompareMemory(hash, (uint8_t*)context->csum + (Vcb->csum_size * off), Vcb->csum_size) != Vcb->csum_size) { 1940 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift); 1941 1942 RtlSetBit(&context->stripes[stripe].error, i); 1943 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1944 1945 if (missing_devices == 2) 1946 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false); 1947 } 1948 } 1949 } 1950 1951 off++; 1952 stripeoff++; 1953 } 1954 1955 if (c->devices[parity1]->devobj) 1956 do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length); 1957 1958 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1959 stripeoff = num * sectors_per_stripe; 1960 } 1961 1962 RtlClearAllBits(&context->stripes[parity1].error); 1963 1964 if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity2]->devobj)) { 1965 // check parity 1 1966 1967 for (ULONG i = 0; i < sectors_per_stripe; i++) { 1968 ULONG o, j; 1969 1970 o = i << Vcb->sector_shift; 1971 for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE 1972 if (context->parity_scratch[o] != 0) { 1973 RtlSetBit(&context->stripes[parity1].error, i); 1974 break; 1975 } 1976 o++; 1977 } 1978 } 1979 } 1980 1981 RtlClearAllBits(&context->stripes[parity2].error); 1982 1983 if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity1]->devobj)) { 1984 // check parity 2 1985 1986 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 1987 1988 while (stripe != parity2) { 1989 galois_double(context->parity_scratch2, (uint32_t)c->chunk_item->stripe_length); 1990 do_xor(context->parity_scratch2, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length); 1991 1992 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 1993 } 1994 1995 for (ULONG i = 0; i < sectors_per_stripe; i++) { 1996 if (RtlCompareMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 1997 &context->parity_scratch2[i << Vcb->sector_shift], Vcb->superblock.sector_size) != Vcb->superblock.sector_size) 1998 RtlSetBit(&context->stripes[parity2].error, i); 1999 } 2000 } 2001 2002 if (missing_devices == 2) 2003 return; 2004 2005 // log and fix errors 2006 2007 for (ULONG i = 0; i < sectors_per_stripe; i++) { 2008 ULONG num_errors = 0; 2009 uint64_t bad_stripe1 = 0, bad_stripe2 = 0; 2010 ULONG bad_off1 = 0, bad_off2 = 0; 2011 bool alloc = false; 2012 2013 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2014 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2015 2016 while (stripe != parity1) { 2017 if (RtlCheckBit(&context->alloc, off)) { 2018 alloc = true; 2019 2020 if (!c->devices[stripe]->devobj || RtlCheckBit(&context->stripes[stripe].error, i)) { 2021 if (num_errors == 0) { 2022 bad_stripe1 = stripe; 2023 bad_off1 = off; 2024 } else if (num_errors == 1) { 2025 bad_stripe2 = stripe; 2026 bad_off2 = off; 2027 } 2028 num_errors++; 2029 } 2030 } 2031 2032 off += sectors_per_stripe; 2033 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2034 } 2035 2036 if (!alloc) 2037 continue; 2038 2039 if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity1].error, i) && !RtlCheckBit(&context->stripes[parity2].error, i)) // everything fine 2040 continue; 2041 2042 if (num_errors == 0) { // parity error 2043 uint64_t addr; 2044 2045 if (RtlCheckBit(&context->stripes[parity1].error, i)) { 2046 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2047 &context->parity_scratch[i << Vcb->sector_shift], 2048 Vcb->superblock.sector_size); 2049 2050 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2051 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 << Vcb->sector_shift); 2052 2053 context->stripes[parity1].rewrite = true; 2054 2055 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true); 2056 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2057 } 2058 2059 if (RtlCheckBit(&context->stripes[parity2].error, i)) { 2060 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2061 &context->parity_scratch2[i << Vcb->sector_shift], 2062 Vcb->superblock.sector_size); 2063 2064 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2065 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 << Vcb->sector_shift); 2066 2067 context->stripes[parity2].rewrite = true; 2068 2069 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true); 2070 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2071 } 2072 } else if (num_errors == 1) { 2073 uint32_t len; 2074 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 << Vcb->sector_shift); 2075 uint8_t* scratch; 2076 2077 len = RtlCheckBit(&context->is_tree, bad_off1) ? Vcb->superblock.node_size : Vcb->superblock.sector_size; 2078 2079 scratch = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG); 2080 if (!scratch) { 2081 ERR("out of memory\n"); 2082 return; 2083 } 2084 2085 RtlZeroMemory(scratch, len); 2086 2087 do_xor(&context->parity_scratch[i << Vcb->sector_shift], 2088 &context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len); 2089 2090 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2091 2092 if (c->devices[parity2]->devobj) { 2093 uint16_t stripe_num, bad_stripe_num = 0; 2094 2095 stripe_num = c->chunk_item->num_stripes - 3; 2096 while (stripe != parity2) { 2097 galois_double(scratch, len); 2098 2099 if (stripe != bad_stripe1) 2100 do_xor(scratch, &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len); 2101 else 2102 bad_stripe_num = stripe_num; 2103 2104 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2105 stripe_num--; 2106 } 2107 2108 do_xor(scratch, &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len); 2109 2110 if (bad_stripe_num != 0) 2111 galois_divpower(scratch, (uint8_t)bad_stripe_num, len); 2112 } 2113 2114 if (RtlCheckBit(&context->is_tree, bad_off1)) { 2115 uint8_t hash1[MAX_HASH_SIZE]; 2116 uint8_t hash2[MAX_HASH_SIZE]; 2117 tree_header *th1 = NULL, *th2 = NULL; 2118 2119 if (c->devices[parity1]->devobj) { 2120 th1 = (tree_header*)&context->parity_scratch[i << Vcb->sector_shift]; 2121 get_tree_checksum(Vcb, th1, hash1); 2122 } 2123 2124 if (c->devices[parity2]->devobj) { 2125 th2 = (tree_header*)scratch; 2126 get_tree_checksum(Vcb, th2, hash2); 2127 } 2128 2129 if ((c->devices[parity1]->devobj && RtlCompareMemory(hash1, th1, Vcb->csum_size) == Vcb->csum_size && th1->address == addr) || 2130 (c->devices[parity2]->devobj && RtlCompareMemory(hash2, th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr)) { 2131 if (!c->devices[parity1]->devobj || RtlCompareMemory(hash1, th1, Vcb->csum_size) != Vcb->csum_size || th1->address != addr) { 2132 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2133 scratch, Vcb->superblock.node_size); 2134 2135 if (c->devices[parity1]->devobj) { 2136 // fix parity 1 2137 2138 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2139 2140 RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2141 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2142 Vcb->superblock.node_size); 2143 2144 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2145 2146 while (stripe != parity1) { 2147 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2148 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2149 Vcb->superblock.node_size); 2150 2151 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2152 } 2153 2154 context->stripes[parity1].rewrite = true; 2155 2156 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true); 2157 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2158 } 2159 } else { 2160 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2161 &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.node_size); 2162 2163 if (!c->devices[parity2]->devobj || RtlCompareMemory(hash2, th2, Vcb->csum_size) != Vcb->csum_size || th2->address != addr) { 2164 // fix parity 2 2165 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2166 2167 if (c->devices[parity2]->devobj) { 2168 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2169 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2170 Vcb->superblock.node_size); 2171 2172 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2173 2174 while (stripe != parity2) { 2175 galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], Vcb->superblock.node_size); 2176 2177 do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2178 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2179 Vcb->superblock.node_size); 2180 2181 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2182 } 2183 2184 context->stripes[parity2].rewrite = true; 2185 2186 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true); 2187 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2188 } 2189 } 2190 } 2191 2192 context->stripes[bad_stripe1].rewrite = true; 2193 2194 RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size >> Vcb->sector_shift) - 1); 2195 2196 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false); 2197 } else 2198 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false); 2199 } else { 2200 uint8_t hash1[MAX_HASH_SIZE]; 2201 uint8_t hash2[MAX_HASH_SIZE]; 2202 2203 if (c->devices[parity1]->devobj) 2204 get_sector_csum(Vcb, &context->parity_scratch[i << Vcb->sector_shift], hash1); 2205 2206 if (c->devices[parity2]->devobj) 2207 get_sector_csum(Vcb, scratch, hash2); 2208 2209 if ((c->devices[parity1]->devobj && RtlCompareMemory(hash1, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) || 2210 (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size)) { 2211 if (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) { 2212 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2213 scratch, Vcb->superblock.sector_size); 2214 2215 if (c->devices[parity1]->devobj && RtlCompareMemory(hash1, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) { 2216 // fix parity 1 2217 2218 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2219 2220 RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2221 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2222 Vcb->superblock.sector_size); 2223 2224 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2225 2226 while (stripe != parity1) { 2227 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2228 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2229 Vcb->superblock.sector_size); 2230 2231 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2232 } 2233 2234 context->stripes[parity1].rewrite = true; 2235 2236 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true); 2237 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2238 } 2239 } else { 2240 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2241 &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.sector_size); 2242 2243 if (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) { 2244 // fix parity 2 2245 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2246 2247 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2248 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2249 Vcb->superblock.sector_size); 2250 2251 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2252 2253 while (stripe != parity2) { 2254 galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], Vcb->superblock.sector_size); 2255 2256 do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2257 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2258 Vcb->superblock.sector_size); 2259 2260 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2261 } 2262 2263 context->stripes[parity2].rewrite = true; 2264 2265 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true); 2266 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2267 } 2268 } 2269 2270 context->stripes[bad_stripe1].rewrite = true; 2271 2272 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false); 2273 } else 2274 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false); 2275 } 2276 2277 ExFreePool(scratch); 2278 } else if (num_errors == 2 && missing_devices == 0) { 2279 uint16_t x = 0, y = 0, k; 2280 uint64_t addr; 2281 uint32_t len = (RtlCheckBit(&context->is_tree, bad_off1) || RtlCheckBit(&context->is_tree, bad_off2)) ? Vcb->superblock.node_size : Vcb->superblock.sector_size; 2282 uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; 2283 uint32_t j; 2284 2285 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2286 2287 // put qxy in parity_scratch 2288 // put pxy in parity_scratch2 2289 2290 k = c->chunk_item->num_stripes - 3; 2291 if (stripe == bad_stripe1 || stripe == bad_stripe2) { 2292 RtlZeroMemory(&context->parity_scratch[i << Vcb->sector_shift], len); 2293 RtlZeroMemory(&context->parity_scratch2[i << Vcb->sector_shift], len); 2294 2295 if (stripe == bad_stripe1) 2296 x = k; 2297 else 2298 y = k; 2299 } else { 2300 RtlCopyMemory(&context->parity_scratch[i << Vcb->sector_shift], 2301 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len); 2302 RtlCopyMemory(&context->parity_scratch2[i << Vcb->sector_shift], 2303 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len); 2304 } 2305 2306 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2307 2308 k--; 2309 do { 2310 galois_double(&context->parity_scratch[i << Vcb->sector_shift], len); 2311 2312 if (stripe != bad_stripe1 && stripe != bad_stripe2) { 2313 do_xor(&context->parity_scratch[i << Vcb->sector_shift], 2314 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len); 2315 do_xor(&context->parity_scratch2[i << Vcb->sector_shift], 2316 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len); 2317 } else if (stripe == bad_stripe1) 2318 x = k; 2319 else if (stripe == bad_stripe2) 2320 y = k; 2321 2322 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2323 k--; 2324 } while (stripe != parity2); 2325 2326 gyx = gpow2(y > x ? (y-x) : (255-x+y)); 2327 gx = gpow2(255-x); 2328 2329 denom = gdiv(1, gyx ^ 1); 2330 a = gmul(gyx, denom); 2331 b = gmul(gx, denom); 2332 2333 p = &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)]; 2334 q = &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)]; 2335 pxy = &context->parity_scratch2[i << Vcb->sector_shift]; 2336 qxy = &context->parity_scratch[i << Vcb->sector_shift]; 2337 2338 for (j = 0; j < len; j++) { 2339 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); 2340 2341 p++; 2342 q++; 2343 pxy++; 2344 qxy++; 2345 } 2346 2347 do_xor(&context->parity_scratch2[i << Vcb->sector_shift], &context->parity_scratch[i << Vcb->sector_shift], len); 2348 do_xor(&context->parity_scratch2[i << Vcb->sector_shift], &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len); 2349 2350 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 << Vcb->sector_shift); 2351 2352 if (RtlCheckBit(&context->is_tree, bad_off1)) { 2353 tree_header* th = (tree_header*)&context->parity_scratch[i << Vcb->sector_shift]; 2354 2355 if (check_tree_checksum(Vcb, th) && th->address == addr) { 2356 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2357 &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.node_size); 2358 2359 context->stripes[bad_stripe1].rewrite = true; 2360 2361 RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size >> Vcb->sector_shift) - 1); 2362 2363 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false); 2364 } else 2365 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false); 2366 } else { 2367 if (check_sector_csum(Vcb, &context->parity_scratch[i << Vcb->sector_shift], (uint8_t*)context->csum + (Vcb->csum_size * bad_off1))) { 2368 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2369 &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.sector_size); 2370 2371 context->stripes[bad_stripe1].rewrite = true; 2372 2373 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false); 2374 } else 2375 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false); 2376 } 2377 2378 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off2 << Vcb->sector_shift); 2379 2380 if (RtlCheckBit(&context->is_tree, bad_off2)) { 2381 tree_header* th = (tree_header*)&context->parity_scratch2[i << Vcb->sector_shift]; 2382 2383 if (check_tree_checksum(Vcb, th) && th->address == addr) { 2384 RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2385 &context->parity_scratch2[i << Vcb->sector_shift], Vcb->superblock.node_size); 2386 2387 context->stripes[bad_stripe2].rewrite = true; 2388 2389 RtlClearBits(&context->stripes[bad_stripe2].error, i + 1, (Vcb->superblock.node_size >> Vcb->sector_shift) - 1); 2390 2391 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, true, false); 2392 } else 2393 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, false, false); 2394 } else { 2395 if (check_sector_csum(Vcb, &context->parity_scratch2[i << Vcb->sector_shift], (uint8_t*)context->csum + (Vcb->csum_size * bad_off2))) { 2396 RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], 2397 &context->parity_scratch2[i << Vcb->sector_shift], Vcb->superblock.sector_size); 2398 2399 context->stripes[bad_stripe2].rewrite = true; 2400 2401 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, true, false); 2402 } else 2403 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, false, false); 2404 } 2405 } else { 2406 stripe = (parity2 + 1) % c->chunk_item->num_stripes; 2407 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2408 2409 while (stripe != parity1) { 2410 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 2411 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 2412 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift); 2413 2414 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false); 2415 } 2416 } 2417 2418 off += sectors_per_stripe; 2419 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2420 } 2421 } 2422 } 2423 } 2424 2425 static NTSTATUS scrub_chunk_raid56_stripe_run(device_extension* Vcb, chunk* c, uint64_t stripe_start, uint64_t stripe_end) { 2426 NTSTATUS Status; 2427 KEY searchkey; 2428 traverse_ptr tp; 2429 bool b; 2430 uint64_t run_start, run_end, full_stripe_len, stripe; 2431 uint32_t max_read, num_sectors; 2432 ULONG arrlen, *allocarr, *csumarr = NULL, *treearr, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1; 2433 scrub_context_raid56 context; 2434 uint16_t i; 2435 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 2436 2437 TRACE("(%p, %p, %I64x, %I64x)\n", Vcb, c, stripe_start, stripe_end); 2438 2439 full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length; 2440 run_start = c->offset + (stripe_start * full_stripe_len); 2441 run_end = c->offset + ((stripe_end + 1) * full_stripe_len); 2442 2443 searchkey.obj_id = run_start; 2444 searchkey.obj_type = TYPE_METADATA_ITEM; 2445 searchkey.offset = 0xffffffffffffffff; 2446 2447 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 2448 if (!NT_SUCCESS(Status)) { 2449 ERR("find_item returned %08lx\n", Status); 2450 return Status; 2451 } 2452 2453 num_sectors = (uint32_t)(((stripe_end - stripe_start + 1) * full_stripe_len) >> Vcb->sector_shift); 2454 arrlen = (ULONG)sector_align((num_sectors / 8) + 1, sizeof(ULONG)); 2455 2456 allocarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2457 if (!allocarr) { 2458 ERR("out of memory\n"); 2459 return STATUS_INSUFFICIENT_RESOURCES; 2460 } 2461 2462 treearr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2463 if (!treearr) { 2464 ERR("out of memory\n"); 2465 ExFreePool(allocarr); 2466 return STATUS_INSUFFICIENT_RESOURCES; 2467 } 2468 2469 RtlInitializeBitMap(&context.alloc, allocarr, num_sectors); 2470 RtlClearAllBits(&context.alloc); 2471 2472 RtlInitializeBitMap(&context.is_tree, treearr, num_sectors); 2473 RtlClearAllBits(&context.is_tree); 2474 2475 context.parity_scratch = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG); 2476 if (!context.parity_scratch) { 2477 ERR("out of memory\n"); 2478 ExFreePool(allocarr); 2479 ExFreePool(treearr); 2480 return STATUS_INSUFFICIENT_RESOURCES; 2481 } 2482 2483 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2484 csumarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2485 if (!csumarr) { 2486 ERR("out of memory\n"); 2487 ExFreePool(allocarr); 2488 ExFreePool(treearr); 2489 ExFreePool(context.parity_scratch); 2490 return STATUS_INSUFFICIENT_RESOURCES; 2491 } 2492 2493 RtlInitializeBitMap(&context.has_csum, csumarr, num_sectors); 2494 RtlClearAllBits(&context.has_csum); 2495 2496 context.csum = ExAllocatePoolWithTag(PagedPool, num_sectors * Vcb->csum_size, ALLOC_TAG); 2497 if (!context.csum) { 2498 ERR("out of memory\n"); 2499 ExFreePool(allocarr); 2500 ExFreePool(treearr); 2501 ExFreePool(context.parity_scratch); 2502 ExFreePool(csumarr); 2503 return STATUS_INSUFFICIENT_RESOURCES; 2504 } 2505 } 2506 2507 if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2508 context.parity_scratch2 = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG); 2509 if (!context.parity_scratch2) { 2510 ERR("out of memory\n"); 2511 ExFreePool(allocarr); 2512 ExFreePool(treearr); 2513 ExFreePool(context.parity_scratch); 2514 2515 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2516 ExFreePool(csumarr); 2517 ExFreePool(context.csum); 2518 } 2519 2520 return STATUS_INSUFFICIENT_RESOURCES; 2521 } 2522 } 2523 2524 do { 2525 traverse_ptr next_tp; 2526 2527 if (tp.item->key.obj_id >= run_end) 2528 break; 2529 2530 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM) { 2531 uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2532 2533 if (tp.item->key.obj_id + size > run_start) { 2534 uint64_t extent_start = max(run_start, tp.item->key.obj_id); 2535 uint64_t extent_end = min(tp.item->key.obj_id + size, run_end); 2536 bool extent_is_tree = false; 2537 2538 RtlSetBits(&context.alloc, (ULONG)((extent_start - run_start) >> Vcb->sector_shift), (ULONG)((extent_end - extent_start) >> Vcb->sector_shift)); 2539 2540 if (tp.item->key.obj_type == TYPE_METADATA_ITEM) 2541 extent_is_tree = true; 2542 else { 2543 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; 2544 2545 if (tp.item->size < sizeof(EXTENT_ITEM)) { 2546 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 2547 Status = STATUS_INTERNAL_ERROR; 2548 goto end; 2549 } 2550 2551 if (ei->flags & EXTENT_ITEM_TREE_BLOCK) 2552 extent_is_tree = true; 2553 } 2554 2555 if (extent_is_tree) 2556 RtlSetBits(&context.is_tree, (ULONG)((extent_start - run_start) >> Vcb->sector_shift), (ULONG)((extent_end - extent_start) >> Vcb->sector_shift)); 2557 else if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2558 traverse_ptr tp2; 2559 bool b2; 2560 2561 searchkey.obj_id = EXTENT_CSUM_ID; 2562 searchkey.obj_type = TYPE_EXTENT_CSUM; 2563 searchkey.offset = extent_start; 2564 2565 Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL); 2566 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { 2567 ERR("find_item returned %08lx\n", Status); 2568 goto end; 2569 } 2570 2571 do { 2572 traverse_ptr next_tp2; 2573 2574 if (tp2.item->key.offset >= extent_end) 2575 break; 2576 2577 if (tp2.item->key.offset >= extent_start) { 2578 uint64_t csum_start = max(extent_start, tp2.item->key.offset); 2579 uint64_t csum_end = min(extent_end, tp2.item->key.offset + (((uint64_t)tp2.item->size << Vcb->sector_shift) / Vcb->csum_size)); 2580 2581 RtlSetBits(&context.has_csum, (ULONG)((csum_start - run_start) >> Vcb->sector_shift), (ULONG)((csum_end - csum_start) >> Vcb->sector_shift)); 2582 2583 RtlCopyMemory((uint8_t*)context.csum + (((csum_start - run_start) * Vcb->csum_size) >> Vcb->sector_shift), 2584 tp2.item->data + (((csum_start - tp2.item->key.offset) * Vcb->csum_size) >> Vcb->sector_shift), 2585 (ULONG)(((csum_end - csum_start) * Vcb->csum_size) >> Vcb->sector_shift)); 2586 } 2587 2588 b2 = find_next_item(Vcb, &tp2, &next_tp2, false, NULL); 2589 2590 if (b2) 2591 tp2 = next_tp2; 2592 } while (b2); 2593 } 2594 } 2595 } 2596 2597 b = find_next_item(Vcb, &tp, &next_tp, false, NULL); 2598 2599 if (b) 2600 tp = next_tp; 2601 } while (b); 2602 2603 context.stripes = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_context_raid56_stripe) * c->chunk_item->num_stripes, ALLOC_TAG); 2604 if (!context.stripes) { 2605 ERR("out of memory\n"); 2606 Status = STATUS_INSUFFICIENT_RESOURCES; 2607 goto end; 2608 } 2609 2610 max_read = (uint32_t)min(1048576 / c->chunk_item->stripe_length, stripe_end - stripe_start + 1); // only process 1 MB of data at a time 2611 2612 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2613 context.stripes[i].buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(max_read * c->chunk_item->stripe_length), ALLOC_TAG); 2614 if (!context.stripes[i].buf) { 2615 uint64_t j; 2616 2617 ERR("out of memory\n"); 2618 2619 for (j = 0; j < i; j++) { 2620 ExFreePool(context.stripes[j].buf); 2621 } 2622 ExFreePool(context.stripes); 2623 2624 Status = STATUS_INSUFFICIENT_RESOURCES; 2625 goto end; 2626 } 2627 2628 context.stripes[i].errorarr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align(((c->chunk_item->stripe_length >> Vcb->sector_shift) / 8) + 1, sizeof(ULONG)), ALLOC_TAG); 2629 if (!context.stripes[i].errorarr) { 2630 uint64_t j; 2631 2632 ERR("out of memory\n"); 2633 2634 ExFreePool(context.stripes[i].buf); 2635 2636 for (j = 0; j < i; j++) { 2637 ExFreePool(context.stripes[j].buf); 2638 } 2639 ExFreePool(context.stripes); 2640 2641 Status = STATUS_INSUFFICIENT_RESOURCES; 2642 goto end; 2643 } 2644 2645 RtlInitializeBitMap(&context.stripes[i].error, context.stripes[i].errorarr, (ULONG)(c->chunk_item->stripe_length >> Vcb->sector_shift)); 2646 2647 context.stripes[i].context = &context; 2648 context.stripes[i].rewrite = false; 2649 } 2650 2651 stripe = stripe_start; 2652 2653 Status = STATUS_SUCCESS; 2654 2655 chunk_lock_range(Vcb, c, run_start, run_end - run_start); 2656 2657 do { 2658 ULONG read_stripes; 2659 uint16_t missing_devices = 0; 2660 bool need_wait = false; 2661 2662 if (max_read < stripe_end + 1 - stripe) 2663 read_stripes = max_read; 2664 else 2665 read_stripes = (ULONG)(stripe_end + 1 - stripe); 2666 2667 context.stripes_left = c->chunk_item->num_stripes; 2668 2669 // read megabyte by megabyte 2670 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2671 if (c->devices[i]->devobj) { 2672 PIO_STACK_LOCATION IrpSp; 2673 2674 context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false); 2675 2676 if (!context.stripes[i].Irp) { 2677 ERR("IoAllocateIrp failed\n"); 2678 Status = STATUS_INSUFFICIENT_RESOURCES; 2679 goto end3; 2680 } 2681 2682 context.stripes[i].Irp->MdlAddress = NULL; 2683 2684 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 2685 IrpSp->MajorFunction = IRP_MJ_READ; 2686 IrpSp->FileObject = c->devices[i]->fileobj; 2687 2688 if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) { 2689 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(read_stripes * c->chunk_item->stripe_length), ALLOC_TAG); 2690 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 2691 ERR("out of memory\n"); 2692 Status = STATUS_INSUFFICIENT_RESOURCES; 2693 goto end3; 2694 } 2695 2696 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 2697 2698 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 2699 } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) { 2700 context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, (ULONG)(read_stripes * c->chunk_item->stripe_length), false, false, NULL); 2701 if (!context.stripes[i].Irp->MdlAddress) { 2702 ERR("IoAllocateMdl failed\n"); 2703 Status = STATUS_INSUFFICIENT_RESOURCES; 2704 goto end3; 2705 } 2706 2707 Status = STATUS_SUCCESS; 2708 2709 _SEH2_TRY { 2710 MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); 2711 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2712 Status = _SEH2_GetExceptionCode(); 2713 } _SEH2_END; 2714 2715 if (!NT_SUCCESS(Status)) { 2716 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 2717 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 2718 goto end3; 2719 } 2720 } else 2721 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 2722 2723 context.stripes[i].offset = stripe * c->chunk_item->stripe_length; 2724 2725 IrpSp->Parameters.Read.Length = (ULONG)(read_stripes * c->chunk_item->stripe_length); 2726 IrpSp->Parameters.Read.ByteOffset.QuadPart = cis[i].offset + context.stripes[i].offset; 2727 2728 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 2729 context.stripes[i].missing = false; 2730 2731 IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion_raid56, &context.stripes[i], true, true, true); 2732 2733 Vcb->scrub.data_scrubbed += read_stripes * c->chunk_item->stripe_length; 2734 need_wait = true; 2735 } else { 2736 context.stripes[i].Irp = NULL; 2737 context.stripes[i].missing = true; 2738 missing_devices++; 2739 InterlockedDecrement(&context.stripes_left); 2740 } 2741 } 2742 2743 if (c->chunk_item->type & BLOCK_FLAG_RAID5 && missing_devices > 1) { 2744 ERR("too many missing devices (%u, maximum 1)\n", missing_devices); 2745 Status = STATUS_UNEXPECTED_IO_ERROR; 2746 goto end3; 2747 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6 && missing_devices > 2) { 2748 ERR("too many missing devices (%u, maximum 2)\n", missing_devices); 2749 Status = STATUS_UNEXPECTED_IO_ERROR; 2750 goto end3; 2751 } 2752 2753 if (need_wait) { 2754 KeInitializeEvent(&context.Event, NotificationEvent, false); 2755 2756 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2757 if (c->devices[i]->devobj) 2758 IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp); 2759 } 2760 2761 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 2762 } 2763 2764 // return an error if any of the stripes returned an error 2765 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2766 if (!context.stripes[i].missing && !NT_SUCCESS(context.stripes[i].iosb.Status)) { 2767 Status = context.stripes[i].iosb.Status; 2768 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS); 2769 goto end3; 2770 } 2771 } 2772 2773 if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2774 for (i = 0; i < read_stripes; i++) { 2775 scrub_raid6_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices); 2776 } 2777 } else { 2778 for (i = 0; i < read_stripes; i++) { 2779 scrub_raid5_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices); 2780 } 2781 } 2782 stripe += read_stripes; 2783 2784 end3: 2785 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2786 if (context.stripes[i].Irp) { 2787 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) { 2788 MmUnlockPages(context.stripes[i].Irp->MdlAddress); 2789 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 2790 } 2791 IoFreeIrp(context.stripes[i].Irp); 2792 context.stripes[i].Irp = NULL; 2793 2794 if (context.stripes[i].rewrite) { 2795 Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + context.stripes[i].offset, 2796 context.stripes[i].buf, (uint32_t)(read_stripes * c->chunk_item->stripe_length)); 2797 2798 if (!NT_SUCCESS(Status)) { 2799 ERR("write_data_phys returned %08lx\n", Status); 2800 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS); 2801 goto end2; 2802 } 2803 } 2804 } 2805 } 2806 2807 if (!NT_SUCCESS(Status)) 2808 break; 2809 } while (stripe < stripe_end); 2810 2811 end2: 2812 chunk_unlock_range(Vcb, c, run_start, run_end - run_start); 2813 2814 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2815 ExFreePool(context.stripes[i].buf); 2816 ExFreePool(context.stripes[i].errorarr); 2817 } 2818 ExFreePool(context.stripes); 2819 2820 end: 2821 ExFreePool(treearr); 2822 ExFreePool(allocarr); 2823 ExFreePool(context.parity_scratch); 2824 2825 if (c->chunk_item->type & BLOCK_FLAG_RAID6) 2826 ExFreePool(context.parity_scratch2); 2827 2828 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2829 ExFreePool(csumarr); 2830 ExFreePool(context.csum); 2831 } 2832 2833 return Status; 2834 } 2835 2836 static NTSTATUS scrub_chunk_raid56(device_extension* Vcb, chunk* c, uint64_t* offset, bool* changed) { 2837 NTSTATUS Status; 2838 KEY searchkey; 2839 traverse_ptr tp; 2840 bool b; 2841 uint64_t full_stripe_len, stripe, stripe_start = 0, stripe_end = 0, total_data = 0; 2842 ULONG num_extents = 0, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1; 2843 2844 full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length; 2845 stripe = (*offset - c->offset) / full_stripe_len; 2846 2847 *offset = c->offset + (stripe * full_stripe_len); 2848 2849 searchkey.obj_id = *offset; 2850 searchkey.obj_type = TYPE_METADATA_ITEM; 2851 searchkey.offset = 0xffffffffffffffff; 2852 2853 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 2854 if (!NT_SUCCESS(Status)) { 2855 ERR("find_item returned %08lx\n", Status); 2856 return Status; 2857 } 2858 2859 *changed = false; 2860 2861 do { 2862 traverse_ptr next_tp; 2863 2864 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) 2865 break; 2866 2867 if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { 2868 uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2869 2870 TRACE("%I64x\n", tp.item->key.obj_id); 2871 2872 if (size < Vcb->superblock.sector_size) { 2873 ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, size, Vcb->superblock.sector_size); 2874 return STATUS_INTERNAL_ERROR; 2875 } 2876 2877 stripe = (tp.item->key.obj_id - c->offset) / full_stripe_len; 2878 2879 if (*changed) { 2880 if (stripe > stripe_end + 1) { 2881 Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end); 2882 if (!NT_SUCCESS(Status)) { 2883 ERR("scrub_chunk_raid56_stripe_run returned %08lx\n", Status); 2884 return Status; 2885 } 2886 2887 stripe_start = stripe; 2888 } 2889 } else 2890 stripe_start = stripe; 2891 2892 stripe_end = (tp.item->key.obj_id + size - 1 - c->offset) / full_stripe_len; 2893 2894 *changed = true; 2895 2896 total_data += size; 2897 num_extents++; 2898 2899 // only do so much at a time 2900 if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB 2901 break; 2902 } 2903 2904 b = find_next_item(Vcb, &tp, &next_tp, false, NULL); 2905 2906 if (b) 2907 tp = next_tp; 2908 } while (b); 2909 2910 if (*changed) { 2911 Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end); 2912 if (!NT_SUCCESS(Status)) { 2913 ERR("scrub_chunk_raid56_stripe_run returned %08lx\n", Status); 2914 return Status; 2915 } 2916 2917 *offset = c->offset + ((stripe_end + 1) * full_stripe_len); 2918 } 2919 2920 return STATUS_SUCCESS; 2921 } 2922 2923 static NTSTATUS scrub_chunk(device_extension* Vcb, chunk* c, uint64_t* offset, bool* changed) { 2924 NTSTATUS Status; 2925 KEY searchkey; 2926 traverse_ptr tp; 2927 bool b = false, tree_run = false; 2928 ULONG type, num_extents = 0; 2929 uint64_t total_data = 0, tree_run_start = 0, tree_run_end = 0; 2930 2931 TRACE("chunk %I64x\n", c->offset); 2932 2933 ExAcquireResourceSharedLite(&Vcb->tree_lock, true); 2934 2935 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) 2936 type = BLOCK_FLAG_DUPLICATE; 2937 else if (c->chunk_item->type & BLOCK_FLAG_RAID0) 2938 type = BLOCK_FLAG_RAID0; 2939 else if (c->chunk_item->type & BLOCK_FLAG_RAID1) 2940 type = BLOCK_FLAG_DUPLICATE; 2941 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 2942 type = BLOCK_FLAG_RAID10; 2943 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { 2944 Status = scrub_chunk_raid56(Vcb, c, offset, changed); 2945 goto end; 2946 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2947 Status = scrub_chunk_raid56(Vcb, c, offset, changed); 2948 goto end; 2949 } else if (c->chunk_item->type & BLOCK_FLAG_RAID1C3) 2950 type = BLOCK_FLAG_DUPLICATE; 2951 else if (c->chunk_item->type & BLOCK_FLAG_RAID1C4) 2952 type = BLOCK_FLAG_DUPLICATE; 2953 else // SINGLE 2954 type = BLOCK_FLAG_DUPLICATE; 2955 2956 searchkey.obj_id = *offset; 2957 searchkey.obj_type = TYPE_METADATA_ITEM; 2958 searchkey.offset = 0xffffffffffffffff; 2959 2960 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 2961 if (!NT_SUCCESS(Status)) { 2962 ERR("error - find_item returned %08lx\n", Status); 2963 goto end; 2964 } 2965 2966 do { 2967 traverse_ptr next_tp; 2968 2969 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) 2970 break; 2971 2972 if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { 2973 uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2974 bool is_tree; 2975 void* csum = NULL; 2976 RTL_BITMAP bmp; 2977 ULONG* bmparr = NULL, bmplen; 2978 2979 TRACE("%I64x\n", tp.item->key.obj_id); 2980 2981 is_tree = false; 2982 2983 if (tp.item->key.obj_type == TYPE_METADATA_ITEM) 2984 is_tree = true; 2985 else { 2986 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; 2987 2988 if (tp.item->size < sizeof(EXTENT_ITEM)) { 2989 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 2990 Status = STATUS_INTERNAL_ERROR; 2991 goto end; 2992 } 2993 2994 if (ei->flags & EXTENT_ITEM_TREE_BLOCK) 2995 is_tree = true; 2996 } 2997 2998 if (size < Vcb->superblock.sector_size) { 2999 ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, size, Vcb->superblock.sector_size); 3000 Status = STATUS_INTERNAL_ERROR; 3001 goto end; 3002 } 3003 3004 // load csum 3005 if (!is_tree) { 3006 traverse_ptr tp2; 3007 3008 csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((Vcb->csum_size * size) >> Vcb->sector_shift), ALLOC_TAG); 3009 if (!csum) { 3010 ERR("out of memory\n"); 3011 Status = STATUS_INSUFFICIENT_RESOURCES; 3012 goto end; 3013 } 3014 3015 bmplen = (ULONG)(size >> Vcb->sector_shift); 3016 3017 bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)(sector_align((bmplen >> 3) + 1, sizeof(ULONG))), ALLOC_TAG); 3018 if (!bmparr) { 3019 ERR("out of memory\n"); 3020 ExFreePool(csum); 3021 Status = STATUS_INSUFFICIENT_RESOURCES; 3022 goto end; 3023 } 3024 3025 RtlInitializeBitMap(&bmp, bmparr, bmplen); 3026 RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum 3027 3028 searchkey.obj_id = EXTENT_CSUM_ID; 3029 searchkey.obj_type = TYPE_EXTENT_CSUM; 3030 searchkey.offset = tp.item->key.obj_id; 3031 3032 Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL); 3033 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { 3034 ERR("find_item returned %08lx\n", Status); 3035 ExFreePool(csum); 3036 ExFreePool(bmparr); 3037 goto end; 3038 } 3039 3040 if (Status != STATUS_NOT_FOUND) { 3041 do { 3042 traverse_ptr next_tp2; 3043 3044 if (tp2.item->key.obj_type == TYPE_EXTENT_CSUM) { 3045 if (tp2.item->key.offset >= tp.item->key.obj_id + size) 3046 break; 3047 else if (tp2.item->size >= Vcb->csum_size && tp2.item->key.offset + (((uint64_t)tp2.item->size << Vcb->sector_shift) / Vcb->csum_size) >= tp.item->key.obj_id) { 3048 uint64_t cs = max(tp.item->key.obj_id, tp2.item->key.offset); 3049 uint64_t ce = min(tp.item->key.obj_id + size, tp2.item->key.offset + (((uint64_t)tp2.item->size << Vcb->sector_shift) / Vcb->csum_size)); 3050 3051 RtlCopyMemory((uint8_t*)csum + (((cs - tp.item->key.obj_id) * Vcb->csum_size) >> Vcb->sector_shift), 3052 tp2.item->data + (((cs - tp2.item->key.offset) * Vcb->csum_size) >> Vcb->sector_shift), 3053 (ULONG)(((ce - cs) * Vcb->csum_size) >> Vcb->sector_shift)); 3054 3055 RtlClearBits(&bmp, (ULONG)((cs - tp.item->key.obj_id) >> Vcb->sector_shift), (ULONG)((ce - cs) >> Vcb->sector_shift)); 3056 3057 if (ce == tp.item->key.obj_id + size) 3058 break; 3059 } 3060 } 3061 3062 if (find_next_item(Vcb, &tp2, &next_tp2, false, NULL)) 3063 tp2 = next_tp2; 3064 else 3065 break; 3066 } while (true); 3067 } 3068 } 3069 3070 if (tree_run) { 3071 if (!is_tree || tp.item->key.obj_id > tree_run_end) { 3072 Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL); 3073 if (!NT_SUCCESS(Status)) { 3074 ERR("scrub_extent returned %08lx\n", Status); 3075 goto end; 3076 } 3077 3078 if (!is_tree) 3079 tree_run = false; 3080 else { 3081 tree_run_start = tp.item->key.obj_id; 3082 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3083 } 3084 } else 3085 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3086 } else if (is_tree) { 3087 tree_run = true; 3088 tree_run_start = tp.item->key.obj_id; 3089 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3090 } 3091 3092 if (!is_tree) { 3093 Status = scrub_data_extent(Vcb, c, tp.item->key.obj_id, type, csum, &bmp, bmplen); 3094 if (!NT_SUCCESS(Status)) { 3095 ERR("scrub_data_extent returned %08lx\n", Status); 3096 ExFreePool(csum); 3097 ExFreePool(bmparr); 3098 goto end; 3099 } 3100 3101 ExFreePool(csum); 3102 ExFreePool(bmparr); 3103 } 3104 3105 *offset = tp.item->key.obj_id + size; 3106 *changed = true; 3107 3108 total_data += size; 3109 num_extents++; 3110 3111 // only do so much at a time 3112 if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB 3113 break; 3114 } 3115 3116 b = find_next_item(Vcb, &tp, &next_tp, false, NULL); 3117 3118 if (b) 3119 tp = next_tp; 3120 } while (b); 3121 3122 if (tree_run) { 3123 Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL); 3124 if (!NT_SUCCESS(Status)) { 3125 ERR("scrub_extent returned %08lx\n", Status); 3126 goto end; 3127 } 3128 } 3129 3130 Status = STATUS_SUCCESS; 3131 3132 end: 3133 ExReleaseResourceLite(&Vcb->tree_lock); 3134 3135 return Status; 3136 } 3137 3138 _Function_class_(KSTART_ROUTINE) 3139 static void __stdcall scrub_thread(void* context) { 3140 device_extension* Vcb = context; 3141 LIST_ENTRY chunks, *le; 3142 NTSTATUS Status; 3143 LARGE_INTEGER time; 3144 3145 KeInitializeEvent(&Vcb->scrub.finished, NotificationEvent, false); 3146 3147 InitializeListHead(&chunks); 3148 3149 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true); 3150 3151 if (Vcb->need_write && !Vcb->readonly) 3152 Status = do_write(Vcb, NULL); 3153 else 3154 Status = STATUS_SUCCESS; 3155 3156 free_trees(Vcb); 3157 3158 if (!NT_SUCCESS(Status)) { 3159 ExReleaseResourceLite(&Vcb->tree_lock); 3160 ERR("do_write returned %08lx\n", Status); 3161 Vcb->scrub.error = Status; 3162 goto end; 3163 } 3164 3165 ExConvertExclusiveToSharedLite(&Vcb->tree_lock); 3166 3167 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 3168 3169 KeQuerySystemTime(&Vcb->scrub.start_time); 3170 Vcb->scrub.finish_time.QuadPart = 0; 3171 Vcb->scrub.resume_time.QuadPart = Vcb->scrub.start_time.QuadPart; 3172 Vcb->scrub.duration.QuadPart = 0; 3173 Vcb->scrub.total_chunks = 0; 3174 Vcb->scrub.chunks_left = 0; 3175 Vcb->scrub.data_scrubbed = 0; 3176 Vcb->scrub.num_errors = 0; 3177 3178 while (!IsListEmpty(&Vcb->scrub.errors)) { 3179 scrub_error* err = CONTAINING_RECORD(RemoveHeadList(&Vcb->scrub.errors), scrub_error, list_entry); 3180 ExFreePool(err); 3181 } 3182 3183 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true); 3184 3185 le = Vcb->chunks.Flink; 3186 while (le != &Vcb->chunks) { 3187 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 3188 3189 acquire_chunk_lock(c, Vcb); 3190 3191 if (!c->readonly) { 3192 InsertTailList(&chunks, &c->list_entry_balance); 3193 Vcb->scrub.total_chunks++; 3194 Vcb->scrub.chunks_left++; 3195 } 3196 3197 release_chunk_lock(c, Vcb); 3198 3199 le = le->Flink; 3200 } 3201 3202 ExReleaseResourceLite(&Vcb->chunk_lock); 3203 3204 ExReleaseResource(&Vcb->scrub.stats_lock); 3205 3206 ExReleaseResourceLite(&Vcb->tree_lock); 3207 3208 while (!IsListEmpty(&chunks)) { 3209 chunk* c = CONTAINING_RECORD(RemoveHeadList(&chunks), chunk, list_entry_balance); 3210 uint64_t offset = c->offset; 3211 bool changed; 3212 3213 c->reloc = true; 3214 3215 KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL); 3216 3217 if (!Vcb->scrub.stopping) { 3218 do { 3219 changed = false; 3220 3221 Status = scrub_chunk(Vcb, c, &offset, &changed); 3222 if (!NT_SUCCESS(Status)) { 3223 ERR("scrub_chunk returned %08lx\n", Status); 3224 Vcb->scrub.stopping = true; 3225 Vcb->scrub.error = Status; 3226 break; 3227 } 3228 3229 if (offset == c->offset + c->chunk_item->size || Vcb->scrub.stopping) 3230 break; 3231 3232 KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL); 3233 } while (changed); 3234 } 3235 3236 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 3237 3238 if (!Vcb->scrub.stopping) 3239 Vcb->scrub.chunks_left--; 3240 3241 if (IsListEmpty(&chunks)) 3242 KeQuerySystemTime(&Vcb->scrub.finish_time); 3243 3244 ExReleaseResource(&Vcb->scrub.stats_lock); 3245 3246 c->reloc = false; 3247 c->list_entry_balance.Flink = NULL; 3248 } 3249 3250 KeQuerySystemTime(&time); 3251 Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3252 3253 end: 3254 ZwClose(Vcb->scrub.thread); 3255 Vcb->scrub.thread = NULL; 3256 3257 KeSetEvent(&Vcb->scrub.finished, 0, false); 3258 } 3259 3260 NTSTATUS start_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3261 NTSTATUS Status; 3262 OBJECT_ATTRIBUTES oa; 3263 3264 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3265 return STATUS_PRIVILEGE_NOT_HELD; 3266 3267 if (Vcb->locked) { 3268 WARN("cannot start scrub while locked\n"); 3269 return STATUS_DEVICE_NOT_READY; 3270 } 3271 3272 if (Vcb->balance.thread) { 3273 WARN("cannot start scrub while balance running\n"); 3274 return STATUS_DEVICE_NOT_READY; 3275 } 3276 3277 if (Vcb->scrub.thread) { 3278 WARN("scrub already running\n"); 3279 return STATUS_DEVICE_NOT_READY; 3280 } 3281 3282 if (Vcb->readonly) 3283 return STATUS_MEDIA_WRITE_PROTECTED; 3284 3285 Vcb->scrub.stopping = false; 3286 Vcb->scrub.paused = false; 3287 Vcb->scrub.error = STATUS_SUCCESS; 3288 KeInitializeEvent(&Vcb->scrub.event, NotificationEvent, !Vcb->scrub.paused); 3289 3290 InitializeObjectAttributes(&oa, NULL, OBJ_KERNEL_HANDLE, NULL, NULL); 3291 3292 Status = PsCreateSystemThread(&Vcb->scrub.thread, 0, &oa, NULL, NULL, scrub_thread, Vcb); 3293 if (!NT_SUCCESS(Status)) { 3294 ERR("PsCreateSystemThread returned %08lx\n", Status); 3295 return Status; 3296 } 3297 3298 return STATUS_SUCCESS; 3299 } 3300 3301 NTSTATUS query_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode, void* data, ULONG length) { 3302 btrfs_query_scrub* bqs = (btrfs_query_scrub*)data; 3303 ULONG len; 3304 NTSTATUS Status; 3305 LIST_ENTRY* le; 3306 btrfs_scrub_error* bse = NULL; 3307 3308 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3309 return STATUS_PRIVILEGE_NOT_HELD; 3310 3311 if (length < offsetof(btrfs_query_scrub, errors)) 3312 return STATUS_BUFFER_TOO_SMALL; 3313 3314 ExAcquireResourceSharedLite(&Vcb->scrub.stats_lock, true); 3315 3316 if (Vcb->scrub.thread && Vcb->scrub.chunks_left > 0) 3317 bqs->status = Vcb->scrub.paused ? BTRFS_SCRUB_PAUSED : BTRFS_SCRUB_RUNNING; 3318 else 3319 bqs->status = BTRFS_SCRUB_STOPPED; 3320 3321 bqs->start_time.QuadPart = Vcb->scrub.start_time.QuadPart; 3322 bqs->finish_time.QuadPart = Vcb->scrub.finish_time.QuadPart; 3323 bqs->chunks_left = Vcb->scrub.chunks_left; 3324 bqs->total_chunks = Vcb->scrub.total_chunks; 3325 bqs->data_scrubbed = Vcb->scrub.data_scrubbed; 3326 3327 bqs->duration = Vcb->scrub.duration.QuadPart; 3328 3329 if (bqs->status == BTRFS_SCRUB_RUNNING) { 3330 LARGE_INTEGER time; 3331 3332 KeQuerySystemTime(&time); 3333 bqs->duration += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3334 } 3335 3336 bqs->error = Vcb->scrub.error; 3337 3338 bqs->num_errors = Vcb->scrub.num_errors; 3339 3340 len = length - offsetof(btrfs_query_scrub, errors); 3341 3342 le = Vcb->scrub.errors.Flink; 3343 while (le != &Vcb->scrub.errors) { 3344 scrub_error* err = CONTAINING_RECORD(le, scrub_error, list_entry); 3345 ULONG errlen; 3346 3347 if (err->is_metadata) 3348 errlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY); 3349 else 3350 errlen = offsetof(btrfs_scrub_error, data.filename) + err->data.filename_length; 3351 3352 if (len < errlen) { 3353 Status = STATUS_BUFFER_OVERFLOW; 3354 goto end; 3355 } 3356 3357 if (!bse) 3358 bse = &bqs->errors; 3359 else { 3360 ULONG lastlen; 3361 3362 if (bse->is_metadata) 3363 lastlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY); 3364 else 3365 lastlen = offsetof(btrfs_scrub_error, data.filename) + bse->data.filename_length; 3366 3367 bse->next_entry = lastlen; 3368 bse = (btrfs_scrub_error*)(((uint8_t*)bse) + lastlen); 3369 } 3370 3371 bse->next_entry = 0; 3372 bse->address = err->address; 3373 bse->device = err->device; 3374 bse->recovered = err->recovered; 3375 bse->is_metadata = err->is_metadata; 3376 bse->parity = err->parity; 3377 3378 if (err->is_metadata) { 3379 bse->metadata.root = err->metadata.root; 3380 bse->metadata.level = err->metadata.level; 3381 bse->metadata.firstitem = err->metadata.firstitem; 3382 } else { 3383 bse->data.subvol = err->data.subvol; 3384 bse->data.offset = err->data.offset; 3385 bse->data.filename_length = err->data.filename_length; 3386 RtlCopyMemory(bse->data.filename, err->data.filename, err->data.filename_length); 3387 } 3388 3389 len -= errlen; 3390 le = le->Flink; 3391 } 3392 3393 Status = STATUS_SUCCESS; 3394 3395 end: 3396 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 3397 3398 return Status; 3399 } 3400 3401 NTSTATUS pause_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3402 LARGE_INTEGER time; 3403 3404 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3405 return STATUS_PRIVILEGE_NOT_HELD; 3406 3407 if (!Vcb->scrub.thread) 3408 return STATUS_DEVICE_NOT_READY; 3409 3410 if (Vcb->scrub.paused) 3411 return STATUS_DEVICE_NOT_READY; 3412 3413 Vcb->scrub.paused = true; 3414 KeClearEvent(&Vcb->scrub.event); 3415 3416 KeQuerySystemTime(&time); 3417 Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3418 3419 return STATUS_SUCCESS; 3420 } 3421 3422 NTSTATUS resume_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3423 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3424 return STATUS_PRIVILEGE_NOT_HELD; 3425 3426 if (!Vcb->scrub.thread) 3427 return STATUS_DEVICE_NOT_READY; 3428 3429 if (!Vcb->scrub.paused) 3430 return STATUS_DEVICE_NOT_READY; 3431 3432 Vcb->scrub.paused = false; 3433 KeSetEvent(&Vcb->scrub.event, 0, false); 3434 3435 KeQuerySystemTime(&Vcb->scrub.resume_time); 3436 3437 return STATUS_SUCCESS; 3438 } 3439 3440 NTSTATUS stop_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3441 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3442 return STATUS_PRIVILEGE_NOT_HELD; 3443 3444 if (!Vcb->scrub.thread) 3445 return STATUS_DEVICE_NOT_READY; 3446 3447 Vcb->scrub.paused = false; 3448 Vcb->scrub.stopping = true; 3449 KeSetEvent(&Vcb->scrub.event, 0, false); 3450 3451 return STATUS_SUCCESS; 3452 } 3453