1 /* Copyright (c) Mark Harmstone 2017 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 20 #define SCRUB_UNIT 0x100000 // 1 MB 21 22 struct _scrub_context; 23 24 typedef struct { 25 struct _scrub_context* context; 26 PIRP Irp; 27 uint64_t start; 28 uint32_t length; 29 IO_STATUS_BLOCK iosb; 30 uint8_t* buf; 31 bool csum_error; 32 void* bad_csums; 33 } scrub_context_stripe; 34 35 typedef struct _scrub_context { 36 KEVENT Event; 37 scrub_context_stripe* stripes; 38 LONG stripes_left; 39 } scrub_context; 40 41 typedef struct { 42 ANSI_STRING name; 43 bool orig_subvol; 44 LIST_ENTRY list_entry; 45 } path_part; 46 47 static void log_file_checksum_error(device_extension* Vcb, uint64_t addr, uint64_t devid, uint64_t subvol, uint64_t inode, uint64_t offset) { 48 LIST_ENTRY *le, parts; 49 root* r = NULL; 50 KEY searchkey; 51 traverse_ptr tp; 52 uint64_t dir; 53 bool orig_subvol = true, not_in_tree = false; 54 ANSI_STRING fn; 55 scrub_error* err; 56 NTSTATUS Status; 57 ULONG utf16len; 58 59 le = Vcb->roots.Flink; 60 while (le != &Vcb->roots) { 61 root* r2 = CONTAINING_RECORD(le, root, list_entry); 62 63 if (r2->id == subvol) { 64 r = r2; 65 break; 66 } 67 68 le = le->Flink; 69 } 70 71 if (!r) { 72 ERR("could not find subvol %I64x\n", subvol); 73 return; 74 } 75 76 InitializeListHead(&parts); 77 78 dir = inode; 79 80 while (true) { 81 if (dir == r->root_item.objid) { 82 if (r == Vcb->root_fileref->fcb->subvol) 83 break; 84 85 searchkey.obj_id = r->id; 86 searchkey.obj_type = TYPE_ROOT_BACKREF; 87 searchkey.offset = 0xffffffffffffffff; 88 89 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, NULL); 90 if (!NT_SUCCESS(Status)) { 91 ERR("find_item returned %08lx\n", Status); 92 goto end; 93 } 94 95 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) { 96 ROOT_REF* rr = (ROOT_REF*)tp.item->data; 97 path_part* pp; 98 99 if (tp.item->size < sizeof(ROOT_REF)) { 100 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF)); 101 goto end; 102 } 103 104 if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) { 105 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 106 tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n); 107 goto end; 108 } 109 110 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 111 if (!pp) { 112 ERR("out of memory\n"); 113 goto end; 114 } 115 116 pp->name.Buffer = rr->name; 117 pp->name.Length = pp->name.MaximumLength = rr->n; 118 pp->orig_subvol = false; 119 120 InsertTailList(&parts, &pp->list_entry); 121 122 r = NULL; 123 124 le = Vcb->roots.Flink; 125 while (le != &Vcb->roots) { 126 root* r2 = CONTAINING_RECORD(le, root, list_entry); 127 128 if (r2->id == tp.item->key.offset) { 129 r = r2; 130 break; 131 } 132 133 le = le->Flink; 134 } 135 136 if (!r) { 137 ERR("could not find subvol %I64x\n", tp.item->key.offset); 138 goto end; 139 } 140 141 dir = rr->dir; 142 orig_subvol = false; 143 } else { 144 not_in_tree = true; 145 break; 146 } 147 } else { 148 searchkey.obj_id = dir; 149 searchkey.obj_type = TYPE_INODE_EXTREF; 150 searchkey.offset = 0xffffffffffffffff; 151 152 Status = find_item(Vcb, r, &tp, &searchkey, false, NULL); 153 if (!NT_SUCCESS(Status)) { 154 ERR("find_item returned %08lx\n", Status); 155 goto end; 156 } 157 158 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_REF) { 159 INODE_REF* ir = (INODE_REF*)tp.item->data; 160 path_part* pp; 161 162 if (tp.item->size < sizeof(INODE_REF)) { 163 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF)); 164 goto end; 165 } 166 167 if (tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) { 168 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 169 tp.item->size, offsetof(INODE_REF, name[0]) + ir->n); 170 goto end; 171 } 172 173 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 174 if (!pp) { 175 ERR("out of memory\n"); 176 goto end; 177 } 178 179 pp->name.Buffer = ir->name; 180 pp->name.Length = pp->name.MaximumLength = ir->n; 181 pp->orig_subvol = orig_subvol; 182 183 InsertTailList(&parts, &pp->list_entry); 184 185 if (dir == tp.item->key.offset) 186 break; 187 188 dir = tp.item->key.offset; 189 } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_EXTREF) { 190 INODE_EXTREF* ier = (INODE_EXTREF*)tp.item->data; 191 path_part* pp; 192 193 if (tp.item->size < sizeof(INODE_EXTREF)) { 194 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 195 tp.item->size, sizeof(INODE_EXTREF)); 196 goto end; 197 } 198 199 if (tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) { 200 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 201 tp.item->size, offsetof(INODE_EXTREF, name[0]) + ier->n); 202 goto end; 203 } 204 205 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG); 206 if (!pp) { 207 ERR("out of memory\n"); 208 goto end; 209 } 210 211 pp->name.Buffer = ier->name; 212 pp->name.Length = pp->name.MaximumLength = ier->n; 213 pp->orig_subvol = orig_subvol; 214 215 InsertTailList(&parts, &pp->list_entry); 216 217 if (dir == ier->dir) 218 break; 219 220 dir = ier->dir; 221 } else { 222 ERR("could not find INODE_REF for inode %I64x in subvol %I64x\n", dir, r->id); 223 goto end; 224 } 225 } 226 } 227 228 fn.MaximumLength = 0; 229 230 if (not_in_tree) { 231 le = parts.Blink; 232 while (le != &parts) { 233 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 234 LIST_ENTRY* le2 = le->Blink; 235 236 if (pp->orig_subvol) 237 break; 238 239 RemoveTailList(&parts); 240 ExFreePool(pp); 241 242 le = le2; 243 } 244 } 245 246 le = parts.Flink; 247 while (le != &parts) { 248 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 249 250 fn.MaximumLength += pp->name.Length + 1; 251 252 le = le->Flink; 253 } 254 255 fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG); 256 if (!fn.Buffer) { 257 ERR("out of memory\n"); 258 goto end; 259 } 260 261 fn.Length = 0; 262 263 le = parts.Blink; 264 while (le != &parts) { 265 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry); 266 267 fn.Buffer[fn.Length] = '\\'; 268 fn.Length++; 269 270 RtlCopyMemory(&fn.Buffer[fn.Length], pp->name.Buffer, pp->name.Length); 271 fn.Length += pp->name.Length; 272 273 le = le->Blink; 274 } 275 276 if (not_in_tree) 277 ERR("subvol %I64x, %.*s, offset %I64x\n", subvol, fn.Length, fn.Buffer, offset); 278 else 279 ERR("%.*s, offset %I64x\n", fn.Length, fn.Buffer, offset); 280 281 Status = utf8_to_utf16(NULL, 0, &utf16len, fn.Buffer, fn.Length); 282 if (!NT_SUCCESS(Status)) { 283 ERR("utf8_to_utf16 1 returned %08lx\n", Status); 284 ExFreePool(fn.Buffer); 285 goto end; 286 } 287 288 err = ExAllocatePoolWithTag(PagedPool, offsetof(scrub_error, data.filename[0]) + utf16len, ALLOC_TAG); 289 if (!err) { 290 ERR("out of memory\n"); 291 ExFreePool(fn.Buffer); 292 goto end; 293 } 294 295 err->address = addr; 296 err->device = devid; 297 err->recovered = false; 298 err->is_metadata = false; 299 err->parity = false; 300 301 err->data.subvol = not_in_tree ? subvol : 0; 302 err->data.offset = offset; 303 err->data.filename_length = (uint16_t)utf16len; 304 305 Status = utf8_to_utf16(err->data.filename, utf16len, &utf16len, fn.Buffer, fn.Length); 306 if (!NT_SUCCESS(Status)) { 307 ERR("utf8_to_utf16 2 returned %08lx\n", Status); 308 ExFreePool(fn.Buffer); 309 ExFreePool(err); 310 goto end; 311 } 312 313 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 314 315 Vcb->scrub.num_errors++; 316 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 317 318 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 319 320 ExFreePool(fn.Buffer); 321 322 end: 323 while (!IsListEmpty(&parts)) { 324 path_part* pp = CONTAINING_RECORD(RemoveHeadList(&parts), path_part, list_entry); 325 326 ExFreePool(pp); 327 } 328 } 329 330 static void log_file_checksum_error_shared(device_extension* Vcb, uint64_t treeaddr, uint64_t addr, uint64_t devid, uint64_t extent) { 331 tree_header* tree; 332 NTSTATUS Status; 333 leaf_node* ln; 334 ULONG i; 335 336 tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); 337 if (!tree) { 338 ERR("out of memory\n"); 339 return; 340 } 341 342 Status = read_data(Vcb, treeaddr, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority); 343 if (!NT_SUCCESS(Status)) { 344 ERR("read_data returned %08lx\n", Status); 345 goto end; 346 } 347 348 if (tree->level != 0) { 349 ERR("tree level was %x, expected 0\n", tree->level); 350 goto end; 351 } 352 353 ln = (leaf_node*)&tree[1]; 354 355 for (i = 0; i < tree->num_items; i++) { 356 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) { 357 EXTENT_DATA* ed = (EXTENT_DATA*)((uint8_t*)tree + sizeof(tree_header) + ln[i].offset); 358 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 359 360 if (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0 && ed2->address == addr) 361 log_file_checksum_error(Vcb, addr, devid, tree->tree_id, ln[i].key.obj_id, ln[i].key.offset + addr - extent); 362 } 363 } 364 365 end: 366 ExFreePool(tree); 367 } 368 369 static void log_tree_checksum_error(device_extension* Vcb, uint64_t addr, uint64_t devid, uint64_t root, uint8_t level, KEY* firstitem) { 370 scrub_error* err; 371 372 err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG); 373 if (!err) { 374 ERR("out of memory\n"); 375 return; 376 } 377 378 err->address = addr; 379 err->device = devid; 380 err->recovered = false; 381 err->is_metadata = true; 382 err->parity = false; 383 384 err->metadata.root = root; 385 err->metadata.level = level; 386 387 if (firstitem) { 388 ERR("root %I64x, level %u, first item (%I64x,%x,%I64x)\n", root, level, firstitem->obj_id, 389 firstitem->obj_type, firstitem->offset); 390 391 err->metadata.firstitem = *firstitem; 392 } else { 393 ERR("root %I64x, level %u\n", root, level); 394 395 RtlZeroMemory(&err->metadata.firstitem, sizeof(KEY)); 396 } 397 398 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 399 400 Vcb->scrub.num_errors++; 401 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 402 403 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 404 } 405 406 static void log_tree_checksum_error_shared(device_extension* Vcb, uint64_t offset, uint64_t address, uint64_t devid) { 407 tree_header* tree; 408 NTSTATUS Status; 409 internal_node* in; 410 ULONG i; 411 412 tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG); 413 if (!tree) { 414 ERR("out of memory\n"); 415 return; 416 } 417 418 Status = read_data(Vcb, offset, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority); 419 if (!NT_SUCCESS(Status)) { 420 ERR("read_data returned %08lx\n", Status); 421 goto end; 422 } 423 424 if (tree->level == 0) { 425 ERR("tree level was 0\n"); 426 goto end; 427 } 428 429 in = (internal_node*)&tree[1]; 430 431 for (i = 0; i < tree->num_items; i++) { 432 if (in[i].address == address) { 433 log_tree_checksum_error(Vcb, address, devid, tree->tree_id, tree->level - 1, &in[i].key); 434 break; 435 } 436 } 437 438 end: 439 ExFreePool(tree); 440 } 441 442 static void log_unrecoverable_error(device_extension* Vcb, uint64_t address, uint64_t devid) { 443 KEY searchkey; 444 traverse_ptr tp; 445 NTSTATUS Status; 446 EXTENT_ITEM* ei; 447 EXTENT_ITEM2* ei2 = NULL; 448 uint8_t* ptr; 449 ULONG len; 450 uint64_t rc; 451 452 // FIXME - still log even if rest of this function fails 453 454 searchkey.obj_id = address; 455 searchkey.obj_type = TYPE_METADATA_ITEM; 456 searchkey.offset = 0xffffffffffffffff; 457 458 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 459 if (!NT_SUCCESS(Status)) { 460 ERR("find_item returned %08lx\n", Status); 461 return; 462 } 463 464 if ((tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM) || 465 tp.item->key.obj_id >= address + Vcb->superblock.sector_size || 466 (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.obj_id + tp.item->key.offset <= address) || 467 (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->key.obj_id + Vcb->superblock.node_size <= address) 468 ) 469 return; 470 471 if (tp.item->size < sizeof(EXTENT_ITEM)) { 472 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 473 return; 474 } 475 476 ei = (EXTENT_ITEM*)tp.item->data; 477 ptr = (uint8_t*)&ei[1]; 478 len = tp.item->size - sizeof(EXTENT_ITEM); 479 480 if (tp.item->key.obj_id == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) { 481 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) { 482 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 483 tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)); 484 return; 485 } 486 487 ei2 = (EXTENT_ITEM2*)ptr; 488 489 ptr += sizeof(EXTENT_ITEM2); 490 len -= sizeof(EXTENT_ITEM2); 491 } 492 493 rc = 0; 494 495 while (len > 0) { 496 uint8_t type = *ptr; 497 498 ptr++; 499 len--; 500 501 if (type == TYPE_TREE_BLOCK_REF) { 502 TREE_BLOCK_REF* tbr; 503 504 if (len < sizeof(TREE_BLOCK_REF)) { 505 ERR("TREE_BLOCK_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(TREE_BLOCK_REF), len); 506 break; 507 } 508 509 tbr = (TREE_BLOCK_REF*)ptr; 510 511 log_tree_checksum_error(Vcb, address, devid, tbr->offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL); 512 513 rc++; 514 515 ptr += sizeof(TREE_BLOCK_REF); 516 len -= sizeof(TREE_BLOCK_REF); 517 } else if (type == TYPE_EXTENT_DATA_REF) { 518 EXTENT_DATA_REF* edr; 519 520 if (len < sizeof(EXTENT_DATA_REF)) { 521 ERR("EXTENT_DATA_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(EXTENT_DATA_REF), len); 522 break; 523 } 524 525 edr = (EXTENT_DATA_REF*)ptr; 526 527 log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id); 528 529 rc += edr->count; 530 531 ptr += sizeof(EXTENT_DATA_REF); 532 len -= sizeof(EXTENT_DATA_REF); 533 } else if (type == TYPE_SHARED_BLOCK_REF) { 534 SHARED_BLOCK_REF* sbr; 535 536 if (len < sizeof(SHARED_BLOCK_REF)) { 537 ERR("SHARED_BLOCK_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(SHARED_BLOCK_REF), len); 538 break; 539 } 540 541 sbr = (SHARED_BLOCK_REF*)ptr; 542 543 log_tree_checksum_error_shared(Vcb, sbr->offset, address, devid); 544 545 rc++; 546 547 ptr += sizeof(SHARED_BLOCK_REF); 548 len -= sizeof(SHARED_BLOCK_REF); 549 } else if (type == TYPE_SHARED_DATA_REF) { 550 SHARED_DATA_REF* sdr; 551 552 if (len < sizeof(SHARED_DATA_REF)) { 553 ERR("SHARED_DATA_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(SHARED_DATA_REF), len); 554 break; 555 } 556 557 sdr = (SHARED_DATA_REF*)ptr; 558 559 log_file_checksum_error_shared(Vcb, sdr->offset, address, devid, tp.item->key.obj_id); 560 561 rc += sdr->count; 562 563 ptr += sizeof(SHARED_DATA_REF); 564 len -= sizeof(SHARED_DATA_REF); 565 } else { 566 ERR("unknown extent type %x\n", type); 567 break; 568 } 569 } 570 571 if (rc < ei->refcount) { 572 do { 573 traverse_ptr next_tp; 574 575 if (find_next_item(Vcb, &tp, &next_tp, false, NULL)) 576 tp = next_tp; 577 else 578 break; 579 580 if (tp.item->key.obj_id == address) { 581 if (tp.item->key.obj_type == TYPE_TREE_BLOCK_REF) 582 log_tree_checksum_error(Vcb, address, devid, tp.item->key.offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL); 583 else if (tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) { 584 EXTENT_DATA_REF* edr; 585 586 if (tp.item->size < sizeof(EXTENT_DATA_REF)) { 587 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, 588 tp.item->size, sizeof(EXTENT_DATA_REF)); 589 break; 590 } 591 592 edr = (EXTENT_DATA_REF*)tp.item->data; 593 594 log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id); 595 } else if (tp.item->key.obj_type == TYPE_SHARED_BLOCK_REF) 596 log_tree_checksum_error_shared(Vcb, tp.item->key.offset, address, devid); 597 else if (tp.item->key.obj_type == TYPE_SHARED_DATA_REF) 598 log_file_checksum_error_shared(Vcb, tp.item->key.offset, address, devid, tp.item->key.obj_id); 599 } else 600 break; 601 } while (true); 602 } 603 } 604 605 static void log_error(device_extension* Vcb, uint64_t addr, uint64_t devid, bool metadata, bool recoverable, bool parity) { 606 if (recoverable) { 607 scrub_error* err; 608 609 if (parity) { 610 ERR("recovering from parity error at %I64x on device %I64x\n", addr, devid); 611 } else { 612 if (metadata) 613 ERR("recovering from metadata checksum error at %I64x on device %I64x\n", addr, devid); 614 else 615 ERR("recovering from data checksum error at %I64x on device %I64x\n", addr, devid); 616 } 617 618 err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG); 619 if (!err) { 620 ERR("out of memory\n"); 621 return; 622 } 623 624 err->address = addr; 625 err->device = devid; 626 err->recovered = true; 627 err->is_metadata = metadata; 628 err->parity = parity; 629 630 if (metadata) 631 RtlZeroMemory(&err->metadata, sizeof(err->metadata)); 632 else 633 RtlZeroMemory(&err->data, sizeof(err->data)); 634 635 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 636 637 Vcb->scrub.num_errors++; 638 InsertTailList(&Vcb->scrub.errors, &err->list_entry); 639 640 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 641 } else { 642 if (metadata) 643 ERR("unrecoverable metadata checksum error at %I64x\n", addr); 644 else 645 ERR("unrecoverable data checksum error at %I64x\n", addr); 646 647 log_unrecoverable_error(Vcb, addr, devid); 648 } 649 } 650 651 _Function_class_(IO_COMPLETION_ROUTINE) 652 static NTSTATUS __stdcall scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 653 scrub_context_stripe* stripe = conptr; 654 scrub_context* context = (scrub_context*)stripe->context; 655 ULONG left = InterlockedDecrement(&context->stripes_left); 656 657 UNUSED(DeviceObject); 658 659 stripe->iosb = Irp->IoStatus; 660 661 if (left == 0) 662 KeSetEvent(&context->Event, 0, false); 663 664 return STATUS_MORE_PROCESSING_REQUIRED; 665 } 666 667 static NTSTATUS scrub_extent_dup(device_extension* Vcb, chunk* c, uint64_t offset, void* csum, scrub_context* context) { 668 NTSTATUS Status; 669 bool csum_error = false; 670 ULONG i; 671 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 672 uint16_t present_devices = 0; 673 674 if (csum) { 675 ULONG good_stripe = 0xffffffff; 676 677 for (i = 0; i < c->chunk_item->num_stripes; i++) { 678 if (c->devices[i]->devobj) { 679 present_devices++; 680 681 // if first stripe is okay, we only need to check that the others are identical to it 682 if (good_stripe != 0xffffffff) { 683 if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf, 684 context->stripes[good_stripe].length) != context->stripes[i].length) { 685 context->stripes[i].csum_error = true; 686 csum_error = true; 687 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 688 } 689 } else { 690 Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, csum); 691 if (Status == STATUS_CRC_ERROR) { 692 context->stripes[i].csum_error = true; 693 csum_error = true; 694 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 695 } else if (!NT_SUCCESS(Status)) { 696 ERR("check_csum returned %08lx\n", Status); 697 return Status; 698 } else 699 good_stripe = i; 700 } 701 } 702 } 703 } else { 704 ULONG good_stripe = 0xffffffff; 705 706 for (i = 0; i < c->chunk_item->num_stripes; i++) { 707 ULONG j; 708 709 if (c->devices[i]->devobj) { 710 // if first stripe is okay, we only need to check that the others are identical to it 711 if (good_stripe != 0xffffffff) { 712 if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf, 713 context->stripes[good_stripe].length) != context->stripes[i].length) { 714 context->stripes[i].csum_error = true; 715 csum_error = true; 716 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 717 } 718 } else { 719 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 720 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 721 722 if (!check_tree_checksum(Vcb, th) || th->address != offset + UInt32x32To64(j, Vcb->superblock.node_size)) { 723 context->stripes[i].csum_error = true; 724 csum_error = true; 725 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 726 } 727 } 728 729 if (!context->stripes[i].csum_error) 730 good_stripe = i; 731 } 732 } 733 } 734 } 735 736 if (!csum_error) 737 return STATUS_SUCCESS; 738 739 // handle checksum error 740 741 for (i = 0; i < c->chunk_item->num_stripes; i++) { 742 if (context->stripes[i].csum_error) { 743 if (csum) { 744 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * Vcb->csum_size / Vcb->superblock.sector_size, ALLOC_TAG); 745 if (!context->stripes[i].bad_csums) { 746 ERR("out of memory\n"); 747 return STATUS_INSUFFICIENT_RESOURCES; 748 } 749 750 do_calc_job(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, context->stripes[i].bad_csums); 751 } else { 752 ULONG j; 753 754 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * Vcb->csum_size / Vcb->superblock.node_size, ALLOC_TAG); 755 if (!context->stripes[i].bad_csums) { 756 ERR("out of memory\n"); 757 return STATUS_INSUFFICIENT_RESOURCES; 758 } 759 760 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 761 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 762 763 get_tree_checksum(Vcb, th, (uint8_t*)context->stripes[i].bad_csums + (Vcb->csum_size * j)); 764 } 765 } 766 } 767 } 768 769 if (present_devices > 1) { 770 ULONG good_stripe = 0xffffffff; 771 772 for (i = 0; i < c->chunk_item->num_stripes; i++) { 773 if (c->devices[i]->devobj && !context->stripes[i].csum_error) { 774 good_stripe = i; 775 break; 776 } 777 } 778 779 if (good_stripe != 0xffffffff) { 780 // log 781 782 for (i = 0; i < c->chunk_item->num_stripes; i++) { 783 if (context->stripes[i].csum_error) { 784 ULONG j; 785 786 if (csum) { 787 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { 788 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j + Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) { 789 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); 790 791 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false); 792 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 793 } 794 } 795 } else { 796 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 797 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 798 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 799 800 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) { 801 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false); 802 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 803 } 804 } 805 } 806 } 807 } 808 809 // write good data over bad 810 811 for (i = 0; i < c->chunk_item->num_stripes; i++) { 812 if (context->stripes[i].csum_error && !c->devices[i]->readonly) { 813 Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset, 814 context->stripes[good_stripe].buf, context->stripes[i].length); 815 816 if (!NT_SUCCESS(Status)) { 817 ERR("write_data_phys returned %08lx\n", Status); 818 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS); 819 return Status; 820 } 821 } 822 } 823 824 return STATUS_SUCCESS; 825 } 826 827 // if csum errors on all stripes, check sector by sector 828 829 for (i = 0; i < c->chunk_item->num_stripes; i++) { 830 ULONG j; 831 832 if (c->devices[i]->devobj) { 833 if (csum) { 834 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { 835 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) { 836 ULONG k; 837 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); 838 bool recovered = false; 839 840 for (k = 0; k < c->chunk_item->num_stripes; k++) { 841 if (i != k && c->devices[k]->devobj && 842 RtlCompareMemory((uint8_t*)context->stripes[k].bad_csums + (j * Vcb->csum_size), 843 (uint8_t*)csum + (j * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) { 844 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false); 845 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 846 847 RtlCopyMemory(context->stripes[i].buf + (j * Vcb->superblock.sector_size), 848 context->stripes[k].buf + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 849 850 recovered = true; 851 break; 852 } 853 } 854 855 if (!recovered) { 856 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false); 857 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 858 } 859 } 860 } 861 } else { 862 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 863 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 864 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 865 866 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) { 867 ULONG k; 868 bool recovered = false; 869 870 for (k = 0; k < c->chunk_item->num_stripes; k++) { 871 if (i != k && c->devices[k]->devobj) { 872 tree_header* th2 = (tree_header*)&context->stripes[k].buf[j * Vcb->superblock.node_size]; 873 874 if (RtlCompareMemory((uint8_t*)context->stripes[k].bad_csums + (j * Vcb->csum_size), th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr) { 875 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false); 876 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 877 878 RtlCopyMemory(th, th2, Vcb->superblock.node_size); 879 880 recovered = true; 881 break; 882 } 883 } 884 } 885 886 if (!recovered) { 887 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false); 888 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 889 } 890 } 891 } 892 } 893 } 894 } 895 896 // write good data over bad 897 898 for (i = 0; i < c->chunk_item->num_stripes; i++) { 899 if (c->devices[i]->devobj && !c->devices[i]->readonly) { 900 Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset, 901 context->stripes[i].buf, context->stripes[i].length); 902 if (!NT_SUCCESS(Status)) { 903 ERR("write_data_phys returned %08lx\n", Status); 904 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 905 return Status; 906 } 907 } 908 } 909 910 return STATUS_SUCCESS; 911 } 912 913 for (i = 0; i < c->chunk_item->num_stripes; i++) { 914 if (c->devices[i]->devobj) { 915 ULONG j; 916 917 if (csum) { 918 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) { 919 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j + Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) { 920 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size); 921 922 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false); 923 } 924 } 925 } else { 926 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) { 927 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size]; 928 uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size); 929 930 if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) 931 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false); 932 } 933 } 934 } 935 } 936 937 return STATUS_SUCCESS; 938 } 939 940 static NTSTATUS scrub_extent_raid0(device_extension* Vcb, chunk* c, uint64_t offset, uint32_t length, uint16_t startoffstripe, void* csum, scrub_context* context) { 941 ULONG j; 942 uint16_t stripe; 943 uint32_t pos, *stripeoff; 944 945 pos = 0; 946 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes, ALLOC_TAG); 947 if (!stripeoff) { 948 ERR("out of memory\n"); 949 return STATUS_INSUFFICIENT_RESOURCES; 950 } 951 952 RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes); 953 954 stripe = startoffstripe; 955 while (pos < length) { 956 uint32_t readlen; 957 958 if (pos == 0) 959 readlen = (uint32_t)min(context->stripes[stripe].length, c->chunk_item->stripe_length - (context->stripes[stripe].start % c->chunk_item->stripe_length)); 960 else 961 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 962 963 if (csum) { 964 for (j = 0; j < readlen; j += Vcb->superblock.sector_size) { 965 if (!check_sector_csum(Vcb, context->stripes[stripe].buf + stripeoff[stripe], (uint8_t*)csum + (pos * Vcb->csum_size / Vcb->superblock.sector_size))) { 966 uint64_t addr = offset + pos; 967 968 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false); 969 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 970 } 971 972 pos += Vcb->superblock.sector_size; 973 stripeoff[stripe] += Vcb->superblock.sector_size; 974 } 975 } else { 976 for (j = 0; j < readlen; j += Vcb->superblock.node_size) { 977 tree_header* th = (tree_header*)(context->stripes[stripe].buf + stripeoff[stripe]); 978 uint64_t addr = offset + pos; 979 980 if (!check_tree_checksum(Vcb, th) || th->address != addr) { 981 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false); 982 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 983 } 984 985 pos += Vcb->superblock.node_size; 986 stripeoff[stripe] += Vcb->superblock.node_size; 987 } 988 } 989 990 stripe = (stripe + 1) % c->chunk_item->num_stripes; 991 } 992 993 ExFreePool(stripeoff); 994 995 return STATUS_SUCCESS; 996 } 997 998 static NTSTATUS scrub_extent_raid10(device_extension* Vcb, chunk* c, uint64_t offset, uint32_t length, uint16_t startoffstripe, void* csum, scrub_context* context) { 999 ULONG j; 1000 uint16_t stripe, sub_stripes = max(c->chunk_item->sub_stripes, 1); 1001 uint32_t pos, *stripeoff; 1002 bool csum_error = false; 1003 NTSTATUS Status; 1004 1005 pos = 0; 1006 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes, ALLOC_TAG); 1007 if (!stripeoff) { 1008 ERR("out of memory\n"); 1009 return STATUS_INSUFFICIENT_RESOURCES; 1010 } 1011 1012 RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes); 1013 1014 stripe = startoffstripe; 1015 while (pos < length) { 1016 uint32_t readlen; 1017 1018 if (pos == 0) 1019 readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length, 1020 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1021 else 1022 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 1023 1024 if (csum) { 1025 ULONG good_stripe = 0xffffffff; 1026 uint16_t k; 1027 1028 for (k = 0; k < sub_stripes; k++) { 1029 if (c->devices[(stripe * sub_stripes) + k]->devobj) { 1030 // if first stripe is okay, we only need to check that the others are identical to it 1031 if (good_stripe != 0xffffffff) { 1032 if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe], 1033 context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe], 1034 readlen) != readlen) { 1035 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1036 csum_error = true; 1037 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1038 } 1039 } else { 1040 for (j = 0; j < readlen; j += Vcb->superblock.sector_size) { 1041 if (!check_sector_csum(Vcb, context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j, 1042 (uint8_t*)csum + ((pos + j) * Vcb->csum_size / Vcb->superblock.sector_size))) { 1043 csum_error = true; 1044 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1045 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1046 break; 1047 } 1048 } 1049 1050 if (!context->stripes[(stripe * sub_stripes) + k].csum_error) 1051 good_stripe = k; 1052 } 1053 } 1054 } 1055 1056 pos += readlen; 1057 stripeoff[stripe] += readlen; 1058 } else { 1059 ULONG good_stripe = 0xffffffff; 1060 uint16_t k; 1061 1062 for (k = 0; k < sub_stripes; k++) { 1063 if (c->devices[(stripe * sub_stripes) + k]->devobj) { 1064 // if first stripe is okay, we only need to check that the others are identical to it 1065 if (good_stripe != 0xffffffff) { 1066 if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe], 1067 context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe], 1068 readlen) != readlen) { 1069 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1070 csum_error = true; 1071 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1072 } 1073 } else { 1074 for (j = 0; j < readlen; j += Vcb->superblock.node_size) { 1075 tree_header* th = (tree_header*)(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j); 1076 uint64_t addr = offset + pos + j; 1077 1078 if (!check_tree_checksum(Vcb, th) || th->address != addr) { 1079 csum_error = true; 1080 context->stripes[(stripe * sub_stripes) + k].csum_error = true; 1081 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1082 break; 1083 } 1084 } 1085 1086 if (!context->stripes[(stripe * sub_stripes) + k].csum_error) 1087 good_stripe = k; 1088 } 1089 } 1090 } 1091 1092 pos += readlen; 1093 stripeoff[stripe] += readlen; 1094 } 1095 1096 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1097 } 1098 1099 if (!csum_error) { 1100 Status = STATUS_SUCCESS; 1101 goto end; 1102 } 1103 1104 for (j = 0; j < c->chunk_item->num_stripes; j += sub_stripes) { 1105 ULONG goodstripe = 0xffffffff; 1106 uint16_t k; 1107 bool hasbadstripe = false; 1108 1109 if (context->stripes[j].length == 0) 1110 continue; 1111 1112 for (k = 0; k < sub_stripes; k++) { 1113 if (c->devices[j + k]->devobj) { 1114 if (!context->stripes[j + k].csum_error) 1115 goodstripe = k; 1116 else 1117 hasbadstripe = true; 1118 } 1119 } 1120 1121 if (hasbadstripe) { 1122 if (goodstripe != 0xffffffff) { 1123 for (k = 0; k < sub_stripes; k++) { 1124 if (c->devices[j + k]->devobj && context->stripes[j + k].csum_error) { 1125 uint32_t so = 0; 1126 bool recovered = false; 1127 1128 pos = 0; 1129 1130 stripe = startoffstripe; 1131 while (pos < length) { 1132 uint32_t readlen; 1133 1134 if (pos == 0) 1135 readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length, 1136 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1137 else 1138 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 1139 1140 if (stripe == j / sub_stripes) { 1141 if (csum) { 1142 ULONG l; 1143 1144 for (l = 0; l < readlen; l += Vcb->superblock.sector_size) { 1145 if (RtlCompareMemory(context->stripes[j + k].buf + so, 1146 context->stripes[j + goodstripe].buf + so, 1147 Vcb->superblock.sector_size) != Vcb->superblock.sector_size) { 1148 uint64_t addr = offset + pos; 1149 1150 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false); 1151 1152 recovered = true; 1153 } 1154 1155 pos += Vcb->superblock.sector_size; 1156 so += Vcb->superblock.sector_size; 1157 } 1158 } else { 1159 ULONG l; 1160 1161 for (l = 0; l < readlen; l += Vcb->superblock.node_size) { 1162 if (RtlCompareMemory(context->stripes[j + k].buf + so, 1163 context->stripes[j + goodstripe].buf + so, 1164 Vcb->superblock.node_size) != Vcb->superblock.node_size) { 1165 uint64_t addr = offset + pos; 1166 1167 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false); 1168 1169 recovered = true; 1170 } 1171 1172 pos += Vcb->superblock.node_size; 1173 so += Vcb->superblock.node_size; 1174 } 1175 } 1176 } else 1177 pos += readlen; 1178 1179 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1180 } 1181 1182 if (recovered) { 1183 // write good data over bad 1184 1185 if (!c->devices[j + k]->readonly) { 1186 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1187 1188 Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset, 1189 context->stripes[j + goodstripe].buf, context->stripes[j + goodstripe].length); 1190 1191 if (!NT_SUCCESS(Status)) { 1192 ERR("write_data_phys returned %08lx\n", Status); 1193 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS); 1194 goto end; 1195 } 1196 } 1197 } 1198 } 1199 } 1200 } else { 1201 uint32_t so = 0; 1202 bool recovered = false; 1203 1204 if (csum) { 1205 for (k = 0; k < sub_stripes; k++) { 1206 if (c->devices[j + k]->devobj) { 1207 context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * Vcb->csum_size / Vcb->superblock.sector_size, 1208 ALLOC_TAG); 1209 if (!context->stripes[j + k].bad_csums) { 1210 ERR("out of memory\n"); 1211 Status = STATUS_INSUFFICIENT_RESOURCES; 1212 goto end; 1213 } 1214 1215 do_calc_job(Vcb, context->stripes[j + k].buf, context->stripes[j + k].length / Vcb->superblock.sector_size, context->stripes[j + k].bad_csums); 1216 } 1217 } 1218 } else { 1219 for (k = 0; k < sub_stripes; k++) { 1220 if (c->devices[j + k]->devobj) { 1221 ULONG l; 1222 1223 context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * Vcb->csum_size / Vcb->superblock.node_size, 1224 ALLOC_TAG); 1225 if (!context->stripes[j + k].bad_csums) { 1226 ERR("out of memory\n"); 1227 Status = STATUS_INSUFFICIENT_RESOURCES; 1228 goto end; 1229 } 1230 1231 for (l = 0; l < context->stripes[j + k].length / Vcb->superblock.node_size; l++) { 1232 tree_header* th = (tree_header*)&context->stripes[j + k].buf[l * Vcb->superblock.node_size]; 1233 1234 get_tree_checksum(Vcb, th, (uint8_t*)context->stripes[j + k].bad_csums + (Vcb->csum_size * l)); 1235 } 1236 } 1237 } 1238 } 1239 1240 pos = 0; 1241 1242 stripe = startoffstripe; 1243 while (pos < length) { 1244 uint32_t readlen; 1245 1246 if (pos == 0) 1247 readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length, 1248 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length)); 1249 else 1250 readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length); 1251 1252 if (stripe == j / sub_stripes) { 1253 ULONG l; 1254 1255 if (csum) { 1256 for (l = 0; l < readlen; l += Vcb->superblock.sector_size) { 1257 bool has_error = false; 1258 1259 goodstripe = 0xffffffff; 1260 for (k = 0; k < sub_stripes; k++) { 1261 if (c->devices[j + k]->devobj) { 1262 if (RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + (so * Vcb->csum_size / Vcb->superblock.sector_size), 1263 (uint8_t*)csum + (pos * Vcb->csum_size / Vcb->superblock.sector_size), 1264 Vcb->csum_size) != Vcb->csum_size) { 1265 has_error = true; 1266 } else 1267 goodstripe = k; 1268 } 1269 } 1270 1271 if (has_error) { 1272 if (goodstripe != 0xffffffff) { 1273 for (k = 0; k < sub_stripes; k++) { 1274 if (c->devices[j + k]->devobj && 1275 RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + (so * Vcb->csum_size / Vcb->superblock.sector_size), 1276 (uint8_t*)csum + (pos * Vcb->csum_size / Vcb->superblock.sector_size), 1277 Vcb->csum_size) != Vcb->csum_size) { 1278 uint64_t addr = offset + pos; 1279 1280 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false); 1281 1282 recovered = true; 1283 1284 RtlCopyMemory(context->stripes[j + k].buf + so, context->stripes[j + goodstripe].buf + so, 1285 Vcb->superblock.sector_size); 1286 } 1287 } 1288 } else { 1289 uint64_t addr = offset + pos; 1290 1291 for (k = 0; k < sub_stripes; k++) { 1292 if (c->devices[j + j]->devobj) { 1293 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, false, false); 1294 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1295 } 1296 } 1297 } 1298 } 1299 1300 pos += Vcb->superblock.sector_size; 1301 so += Vcb->superblock.sector_size; 1302 } 1303 } else { 1304 for (l = 0; l < readlen; l += Vcb->superblock.node_size) { 1305 for (k = 0; k < sub_stripes; k++) { 1306 if (c->devices[j + k]->devobj) { 1307 tree_header* th = (tree_header*)&context->stripes[j + k].buf[so]; 1308 uint64_t addr = offset + pos; 1309 1310 if (RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + (so * Vcb->csum_size / Vcb->superblock.node_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) { 1311 ULONG m; 1312 1313 recovered = false; 1314 1315 for (m = 0; m < sub_stripes; m++) { 1316 if (m != k) { 1317 tree_header* th2 = (tree_header*)&context->stripes[j + m].buf[so]; 1318 1319 if (RtlCompareMemory((uint8_t*)context->stripes[j + m].bad_csums + (so * Vcb->csum_size / Vcb->superblock.node_size), th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr) { 1320 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false); 1321 1322 RtlCopyMemory(th, th2, Vcb->superblock.node_size); 1323 1324 recovered = true; 1325 break; 1326 } else 1327 log_device_error(Vcb, c->devices[j + m], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1328 } 1329 } 1330 1331 if (!recovered) 1332 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, false, false); 1333 } 1334 } 1335 } 1336 1337 pos += Vcb->superblock.node_size; 1338 so += Vcb->superblock.node_size; 1339 } 1340 } 1341 } else 1342 pos += readlen; 1343 1344 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes); 1345 } 1346 1347 if (recovered) { 1348 // write good data over bad 1349 1350 for (k = 0; k < sub_stripes; k++) { 1351 if (c->devices[j + k]->devobj && !c->devices[j + k]->readonly) { 1352 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1353 1354 Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset, 1355 context->stripes[j + k].buf, context->stripes[j + k].length); 1356 1357 if (!NT_SUCCESS(Status)) { 1358 ERR("write_data_phys returned %08lx\n", Status); 1359 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS); 1360 goto end; 1361 } 1362 } 1363 } 1364 } 1365 } 1366 } 1367 } 1368 1369 Status = STATUS_SUCCESS; 1370 1371 end: 1372 ExFreePool(stripeoff); 1373 1374 return Status; 1375 } 1376 1377 static NTSTATUS scrub_extent(device_extension* Vcb, chunk* c, ULONG type, uint64_t offset, uint32_t size, void* csum) { 1378 ULONG i; 1379 scrub_context context; 1380 CHUNK_ITEM_STRIPE* cis; 1381 NTSTATUS Status; 1382 uint16_t startoffstripe, num_missing, allowed_missing; 1383 1384 TRACE("(%p, %p, %lx, %I64x, %x, %p)\n", Vcb, c, type, offset, size, csum); 1385 1386 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes, ALLOC_TAG); 1387 if (!context.stripes) { 1388 ERR("out of memory\n"); 1389 Status = STATUS_INSUFFICIENT_RESOURCES; 1390 goto end; 1391 } 1392 1393 RtlZeroMemory(context.stripes, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes); 1394 1395 context.stripes_left = 0; 1396 1397 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 1398 1399 if (type == BLOCK_FLAG_RAID0) { 1400 uint64_t startoff, endoff; 1401 uint16_t endoffstripe; 1402 1403 get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe); 1404 get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe); 1405 1406 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1407 if (startoffstripe > i) 1408 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 1409 else if (startoffstripe == i) 1410 context.stripes[i].start = startoff; 1411 else 1412 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); 1413 1414 if (endoffstripe > i) 1415 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start); 1416 else if (endoffstripe == i) 1417 context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start); 1418 else 1419 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start); 1420 } 1421 1422 allowed_missing = 0; 1423 } else if (type == BLOCK_FLAG_RAID10) { 1424 uint64_t startoff, endoff; 1425 uint16_t endoffstripe, j, sub_stripes = max(c->chunk_item->sub_stripes, 1); 1426 1427 get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe); 1428 get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe); 1429 1430 if ((c->chunk_item->num_stripes % sub_stripes) != 0) { 1431 ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", c->offset, c->chunk_item->num_stripes, sub_stripes); 1432 Status = STATUS_INTERNAL_ERROR; 1433 goto end; 1434 } 1435 1436 startoffstripe *= sub_stripes; 1437 endoffstripe *= sub_stripes; 1438 1439 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) { 1440 if (startoffstripe > i) 1441 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length; 1442 else if (startoffstripe == i) 1443 context.stripes[i].start = startoff; 1444 else 1445 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length); 1446 1447 if (endoffstripe > i) 1448 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start); 1449 else if (endoffstripe == i) 1450 context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start); 1451 else 1452 context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start); 1453 1454 for (j = 1; j < sub_stripes; j++) { 1455 context.stripes[i+j].start = context.stripes[i].start; 1456 context.stripes[i+j].length = context.stripes[i].length; 1457 } 1458 } 1459 1460 startoffstripe /= sub_stripes; 1461 allowed_missing = 1; 1462 } else 1463 allowed_missing = c->chunk_item->num_stripes - 1; 1464 1465 num_missing = 0; 1466 1467 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1468 PIO_STACK_LOCATION IrpSp; 1469 1470 context.stripes[i].context = (struct _scrub_context*)&context; 1471 1472 if (type == BLOCK_FLAG_DUPLICATE) { 1473 context.stripes[i].start = offset - c->offset; 1474 context.stripes[i].length = size; 1475 } else if (type != BLOCK_FLAG_RAID0 && type != BLOCK_FLAG_RAID10) { 1476 ERR("unexpected chunk type %lx\n", type); 1477 Status = STATUS_INTERNAL_ERROR; 1478 goto end; 1479 } 1480 1481 if (!c->devices[i]->devobj) { 1482 num_missing++; 1483 1484 if (num_missing > allowed_missing) { 1485 ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing, allowed_missing); 1486 Status = STATUS_INTERNAL_ERROR; 1487 goto end; 1488 } 1489 } else if (context.stripes[i].length > 0) { 1490 context.stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG); 1491 1492 if (!context.stripes[i].buf) { 1493 ERR("out of memory\n"); 1494 Status = STATUS_INSUFFICIENT_RESOURCES; 1495 goto end; 1496 } 1497 1498 context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false); 1499 1500 if (!context.stripes[i].Irp) { 1501 ERR("IoAllocateIrp failed\n"); 1502 Status = STATUS_INSUFFICIENT_RESOURCES; 1503 goto end; 1504 } 1505 1506 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 1507 IrpSp->MajorFunction = IRP_MJ_READ; 1508 IrpSp->FileObject = c->devices[i]->fileobj; 1509 1510 if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) { 1511 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG); 1512 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 1513 ERR("out of memory\n"); 1514 Status = STATUS_INSUFFICIENT_RESOURCES; 1515 goto end; 1516 } 1517 1518 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 1519 1520 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 1521 } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) { 1522 context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, context.stripes[i].length, false, false, NULL); 1523 if (!context.stripes[i].Irp->MdlAddress) { 1524 ERR("IoAllocateMdl failed\n"); 1525 Status = STATUS_INSUFFICIENT_RESOURCES; 1526 goto end; 1527 } 1528 1529 Status = STATUS_SUCCESS; 1530 1531 _SEH2_TRY { 1532 MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); 1533 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1534 Status = _SEH2_GetExceptionCode(); 1535 } _SEH2_END; 1536 1537 if (!NT_SUCCESS(Status)) { 1538 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 1539 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 1540 context.stripes[i].Irp->MdlAddress = NULL; 1541 goto end; 1542 } 1543 } else 1544 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 1545 1546 IrpSp->Parameters.Read.Length = context.stripes[i].length; 1547 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].start + cis[i].offset; 1548 1549 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 1550 1551 IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion, &context.stripes[i], true, true, true); 1552 1553 context.stripes_left++; 1554 1555 Vcb->scrub.data_scrubbed += context.stripes[i].length; 1556 } 1557 } 1558 1559 if (context.stripes_left == 0) { 1560 ERR("error - not reading any stripes\n"); 1561 Status = STATUS_INTERNAL_ERROR; 1562 goto end; 1563 } 1564 1565 KeInitializeEvent(&context.Event, NotificationEvent, false); 1566 1567 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1568 if (c->devices[i]->devobj && context.stripes[i].length > 0) 1569 IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp); 1570 } 1571 1572 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 1573 1574 // return an error if any of the stripes returned an error 1575 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1576 if (!NT_SUCCESS(context.stripes[i].iosb.Status)) { 1577 Status = context.stripes[i].iosb.Status; 1578 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS); 1579 goto end; 1580 } 1581 } 1582 1583 if (type == BLOCK_FLAG_DUPLICATE) { 1584 Status = scrub_extent_dup(Vcb, c, offset, csum, &context); 1585 if (!NT_SUCCESS(Status)) { 1586 ERR("scrub_extent_dup returned %08lx\n", Status); 1587 goto end; 1588 } 1589 } else if (type == BLOCK_FLAG_RAID0) { 1590 Status = scrub_extent_raid0(Vcb, c, offset, size, startoffstripe, csum, &context); 1591 if (!NT_SUCCESS(Status)) { 1592 ERR("scrub_extent_raid0 returned %08lx\n", Status); 1593 goto end; 1594 } 1595 } else if (type == BLOCK_FLAG_RAID10) { 1596 Status = scrub_extent_raid10(Vcb, c, offset, size, startoffstripe, csum, &context); 1597 if (!NT_SUCCESS(Status)) { 1598 ERR("scrub_extent_raid10 returned %08lx\n", Status); 1599 goto end; 1600 } 1601 } 1602 1603 end: 1604 if (context.stripes) { 1605 for (i = 0; i < c->chunk_item->num_stripes; i++) { 1606 if (context.stripes[i].Irp) { 1607 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) { 1608 MmUnlockPages(context.stripes[i].Irp->MdlAddress); 1609 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 1610 } 1611 IoFreeIrp(context.stripes[i].Irp); 1612 } 1613 1614 if (context.stripes[i].buf) 1615 ExFreePool(context.stripes[i].buf); 1616 1617 if (context.stripes[i].bad_csums) 1618 ExFreePool(context.stripes[i].bad_csums); 1619 } 1620 1621 ExFreePool(context.stripes); 1622 } 1623 1624 return Status; 1625 } 1626 1627 static NTSTATUS scrub_data_extent(device_extension* Vcb, chunk* c, uint64_t offset, ULONG type, void* csum, RTL_BITMAP* bmp, ULONG bmplen) { 1628 NTSTATUS Status; 1629 ULONG runlength, index; 1630 1631 runlength = RtlFindFirstRunClear(bmp, &index); 1632 1633 while (runlength != 0) { 1634 if (index >= bmplen) 1635 break; 1636 1637 if (index + runlength >= bmplen) { 1638 runlength = bmplen - index; 1639 1640 if (runlength == 0) 1641 break; 1642 } 1643 1644 do { 1645 ULONG rl; 1646 1647 if (runlength * Vcb->superblock.sector_size > SCRUB_UNIT) 1648 rl = SCRUB_UNIT / Vcb->superblock.sector_size; 1649 else 1650 rl = runlength; 1651 1652 Status = scrub_extent(Vcb, c, type, offset + UInt32x32To64(index, Vcb->superblock.sector_size), 1653 rl * Vcb->superblock.sector_size, (uint8_t*)csum + (index * Vcb->csum_size)); 1654 if (!NT_SUCCESS(Status)) { 1655 ERR("scrub_data_extent_dup returned %08lx\n", Status); 1656 return Status; 1657 } 1658 1659 runlength -= rl; 1660 index += rl; 1661 } while (runlength > 0); 1662 1663 runlength = RtlFindNextForwardRunClear(bmp, index, &index); 1664 } 1665 1666 return STATUS_SUCCESS; 1667 } 1668 1669 typedef struct { 1670 uint8_t* buf; 1671 PIRP Irp; 1672 void* context; 1673 IO_STATUS_BLOCK iosb; 1674 uint64_t offset; 1675 bool rewrite, missing; 1676 RTL_BITMAP error; 1677 ULONG* errorarr; 1678 } scrub_context_raid56_stripe; 1679 1680 typedef struct { 1681 scrub_context_raid56_stripe* stripes; 1682 LONG stripes_left; 1683 KEVENT Event; 1684 RTL_BITMAP alloc; 1685 RTL_BITMAP has_csum; 1686 RTL_BITMAP is_tree; 1687 void* csum; 1688 uint8_t* parity_scratch; 1689 uint8_t* parity_scratch2; 1690 } scrub_context_raid56; 1691 1692 _Function_class_(IO_COMPLETION_ROUTINE) 1693 static NTSTATUS __stdcall scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 1694 scrub_context_raid56_stripe* stripe = conptr; 1695 scrub_context_raid56* context = (scrub_context_raid56*)stripe->context; 1696 LONG left = InterlockedDecrement(&context->stripes_left); 1697 1698 UNUSED(DeviceObject); 1699 1700 stripe->iosb = Irp->IoStatus; 1701 1702 if (left == 0) 1703 KeSetEvent(&context->Event, 0, false); 1704 1705 return STATUS_MORE_PROCESSING_REQUIRED; 1706 } 1707 1708 static void scrub_raid5_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, uint64_t stripe_start, uint64_t bit_start, 1709 uint64_t num, uint16_t missing_devices) { 1710 ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off; 1711 uint16_t stripe, parity = (bit_start + num + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes; 1712 uint64_t stripeoff; 1713 1714 stripe = (parity + 1) % c->chunk_item->num_stripes; 1715 off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1); 1716 stripeoff = num * sectors_per_stripe; 1717 1718 if (missing_devices == 0) 1719 RtlCopyMemory(context->parity_scratch, &context->stripes[parity].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1720 1721 while (stripe != parity) { 1722 RtlClearAllBits(&context->stripes[stripe].error); 1723 1724 for (i = 0; i < sectors_per_stripe; i++) { 1725 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 1726 if (RtlCheckBit(&context->is_tree, off)) { 1727 tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size]; 1728 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1729 1730 if (!check_tree_checksum(Vcb, th) || th->address != addr) { 1731 RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size); 1732 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1733 1734 if (missing_devices > 0) 1735 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false); 1736 } 1737 1738 off += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1739 stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1740 i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1; 1741 1742 continue; 1743 } else if (RtlCheckBit(&context->has_csum, off)) { 1744 if (!check_sector_csum(Vcb, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), (uint8_t*)context->csum + (Vcb->csum_size * off))) { 1745 RtlSetBit(&context->stripes[stripe].error, i); 1746 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1747 1748 if (missing_devices > 0) { 1749 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1750 1751 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false); 1752 } 1753 } 1754 } 1755 } 1756 1757 off++; 1758 stripeoff++; 1759 } 1760 1761 if (missing_devices == 0) 1762 do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1763 1764 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1765 stripeoff = num * sectors_per_stripe; 1766 } 1767 1768 // check parity 1769 1770 if (missing_devices == 0) { 1771 RtlClearAllBits(&context->stripes[parity].error); 1772 1773 for (i = 0; i < sectors_per_stripe; i++) { 1774 ULONG o, j; 1775 1776 o = i * Vcb->superblock.sector_size; 1777 for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE 1778 if (context->parity_scratch[o] != 0) { 1779 RtlSetBit(&context->stripes[parity].error, i); 1780 break; 1781 } 1782 o++; 1783 } 1784 } 1785 } 1786 1787 // log and fix errors 1788 1789 if (missing_devices > 0) 1790 return; 1791 1792 for (i = 0; i < sectors_per_stripe; i++) { 1793 ULONG num_errors = 0, bad_off; 1794 uint64_t bad_stripe; 1795 bool alloc = false; 1796 1797 stripe = (parity + 1) % c->chunk_item->num_stripes; 1798 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1799 1800 while (stripe != parity) { 1801 if (RtlCheckBit(&context->alloc, off)) { 1802 alloc = true; 1803 1804 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 1805 bad_stripe = stripe; 1806 bad_off = off; 1807 num_errors++; 1808 } 1809 } 1810 1811 off += sectors_per_stripe; 1812 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1813 } 1814 1815 if (!alloc) 1816 continue; 1817 1818 if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity].error, i)) // everything fine 1819 continue; 1820 1821 if (num_errors == 0 && RtlCheckBit(&context->stripes[parity].error, i)) { // parity error 1822 uint64_t addr; 1823 1824 do_xor(&context->stripes[parity].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1825 &context->parity_scratch[i * Vcb->superblock.sector_size], 1826 Vcb->superblock.sector_size); 1827 1828 bad_off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1829 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size); 1830 1831 context->stripes[parity].rewrite = true; 1832 1833 log_error(Vcb, addr, c->devices[parity]->devitem.dev_id, false, true, true); 1834 log_device_error(Vcb, c->devices[parity], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1835 } else if (num_errors == 1) { 1836 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size); 1837 1838 if (RtlCheckBit(&context->is_tree, bad_off)) { 1839 tree_header* th; 1840 1841 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 1842 &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1843 Vcb->superblock.node_size); 1844 1845 th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; 1846 1847 if (check_tree_checksum(Vcb, th) && th->address == addr) { 1848 RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1849 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 1850 1851 context->stripes[bad_stripe].rewrite = true; 1852 1853 RtlClearBits(&context->stripes[bad_stripe].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 1854 1855 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, true, false); 1856 } else 1857 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, false, false); 1858 } else { 1859 uint8_t hash[MAX_HASH_SIZE]; 1860 1861 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 1862 &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1863 Vcb->superblock.sector_size); 1864 1865 get_sector_csum(Vcb, &context->parity_scratch[i * Vcb->superblock.sector_size], hash); 1866 1867 if (RtlCompareMemory(hash, (uint8_t*)context->csum + (Vcb->csum_size * bad_off), Vcb->csum_size) == Vcb->csum_size) { 1868 RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1869 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 1870 1871 context->stripes[bad_stripe].rewrite = true; 1872 1873 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, true, false); 1874 } else 1875 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, false, false); 1876 } 1877 } else { 1878 stripe = (parity + 1) % c->chunk_item->num_stripes; 1879 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i; 1880 1881 while (stripe != parity) { 1882 if (RtlCheckBit(&context->alloc, off)) { 1883 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 1884 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1885 1886 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false); 1887 } 1888 } 1889 1890 off += sectors_per_stripe; 1891 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1892 } 1893 } 1894 } 1895 } 1896 1897 static void scrub_raid6_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, uint64_t stripe_start, uint64_t bit_start, 1898 uint64_t num, uint16_t missing_devices) { 1899 ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off; 1900 uint16_t stripe, parity1 = (bit_start + num + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes; 1901 uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes; 1902 uint64_t stripeoff; 1903 1904 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 1905 off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2); 1906 stripeoff = num * sectors_per_stripe; 1907 1908 if (c->devices[parity1]->devobj) 1909 RtlCopyMemory(context->parity_scratch, &context->stripes[parity1].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length); 1910 1911 if (c->devices[parity2]->devobj) 1912 RtlZeroMemory(context->parity_scratch2, (ULONG)c->chunk_item->stripe_length); 1913 1914 while (stripe != parity1) { 1915 RtlClearAllBits(&context->stripes[stripe].error); 1916 1917 for (i = 0; i < sectors_per_stripe; i++) { 1918 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 1919 if (RtlCheckBit(&context->is_tree, off)) { 1920 tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size]; 1921 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1922 1923 if (!check_tree_checksum(Vcb, th) || th->address != addr) { 1924 RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size); 1925 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1926 1927 if (missing_devices == 2) 1928 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false); 1929 } 1930 1931 off += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1932 stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size; 1933 i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1; 1934 1935 continue; 1936 } else if (RtlCheckBit(&context->has_csum, off)) { 1937 uint8_t hash[MAX_HASH_SIZE]; 1938 1939 get_sector_csum(Vcb, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), hash); 1940 1941 if (RtlCompareMemory(hash, (uint8_t*)context->csum + (Vcb->csum_size * off), Vcb->csum_size) != Vcb->csum_size) { 1942 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 1943 1944 RtlSetBit(&context->stripes[stripe].error, i); 1945 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1946 1947 if (missing_devices == 2) 1948 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false); 1949 } 1950 } 1951 } 1952 1953 off++; 1954 stripeoff++; 1955 } 1956 1957 if (c->devices[parity1]->devobj) 1958 do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length); 1959 1960 stripe = (stripe + 1) % c->chunk_item->num_stripes; 1961 stripeoff = num * sectors_per_stripe; 1962 } 1963 1964 RtlClearAllBits(&context->stripes[parity1].error); 1965 1966 if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity2]->devobj)) { 1967 // check parity 1 1968 1969 for (i = 0; i < sectors_per_stripe; i++) { 1970 ULONG o, j; 1971 1972 o = i * Vcb->superblock.sector_size; 1973 for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE 1974 if (context->parity_scratch[o] != 0) { 1975 RtlSetBit(&context->stripes[parity1].error, i); 1976 break; 1977 } 1978 o++; 1979 } 1980 } 1981 } 1982 1983 RtlClearAllBits(&context->stripes[parity2].error); 1984 1985 if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity1]->devobj)) { 1986 // check parity 2 1987 1988 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 1989 1990 while (stripe != parity2) { 1991 galois_double(context->parity_scratch2, (uint32_t)c->chunk_item->stripe_length); 1992 do_xor(context->parity_scratch2, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length); 1993 1994 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 1995 } 1996 1997 for (i = 0; i < sectors_per_stripe; i++) { 1998 if (RtlCompareMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 1999 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size) != Vcb->superblock.sector_size) 2000 RtlSetBit(&context->stripes[parity2].error, i); 2001 } 2002 } 2003 2004 if (missing_devices == 2) 2005 return; 2006 2007 // log and fix errors 2008 2009 for (i = 0; i < sectors_per_stripe; i++) { 2010 ULONG num_errors = 0; 2011 uint64_t bad_stripe1, bad_stripe2; 2012 ULONG bad_off1, bad_off2; 2013 bool alloc = false; 2014 2015 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2016 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2017 2018 while (stripe != parity1) { 2019 if (RtlCheckBit(&context->alloc, off)) { 2020 alloc = true; 2021 2022 if (!c->devices[stripe]->devobj || RtlCheckBit(&context->stripes[stripe].error, i)) { 2023 if (num_errors == 0) { 2024 bad_stripe1 = stripe; 2025 bad_off1 = off; 2026 } else if (num_errors == 1) { 2027 bad_stripe2 = stripe; 2028 bad_off2 = off; 2029 } 2030 num_errors++; 2031 } 2032 } 2033 2034 off += sectors_per_stripe; 2035 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2036 } 2037 2038 if (!alloc) 2039 continue; 2040 2041 if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity1].error, i) && !RtlCheckBit(&context->stripes[parity2].error, i)) // everything fine 2042 continue; 2043 2044 if (num_errors == 0) { // parity error 2045 uint64_t addr; 2046 2047 if (RtlCheckBit(&context->stripes[parity1].error, i)) { 2048 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2049 &context->parity_scratch[i * Vcb->superblock.sector_size], 2050 Vcb->superblock.sector_size); 2051 2052 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2053 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2054 2055 context->stripes[parity1].rewrite = true; 2056 2057 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true); 2058 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2059 } 2060 2061 if (RtlCheckBit(&context->stripes[parity2].error, i)) { 2062 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2063 &context->parity_scratch2[i * Vcb->superblock.sector_size], 2064 Vcb->superblock.sector_size); 2065 2066 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2067 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2068 2069 context->stripes[parity2].rewrite = true; 2070 2071 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true); 2072 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2073 } 2074 } else if (num_errors == 1) { 2075 uint32_t len; 2076 uint16_t stripe_num, bad_stripe_num; 2077 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2078 uint8_t* scratch; 2079 2080 len = RtlCheckBit(&context->is_tree, bad_off1)? Vcb->superblock.node_size : Vcb->superblock.sector_size; 2081 2082 scratch = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG); 2083 if (!scratch) { 2084 ERR("out of memory\n"); 2085 return; 2086 } 2087 2088 RtlZeroMemory(scratch, len); 2089 2090 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 2091 &context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2092 2093 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2094 2095 if (c->devices[parity2]->devobj) { 2096 stripe_num = c->chunk_item->num_stripes - 3; 2097 while (stripe != parity2) { 2098 galois_double(scratch, len); 2099 2100 if (stripe != bad_stripe1) 2101 do_xor(scratch, &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2102 else 2103 bad_stripe_num = stripe_num; 2104 2105 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2106 stripe_num--; 2107 } 2108 2109 do_xor(scratch, &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2110 2111 if (bad_stripe_num != 0) 2112 galois_divpower(scratch, (uint8_t)bad_stripe_num, len); 2113 } 2114 2115 if (RtlCheckBit(&context->is_tree, bad_off1)) { 2116 uint8_t hash1[MAX_HASH_SIZE]; 2117 uint8_t hash2[MAX_HASH_SIZE]; 2118 tree_header *th1 = NULL, *th2 = NULL; 2119 2120 if (c->devices[parity1]->devobj) { 2121 th1 = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; 2122 get_tree_checksum(Vcb, th1, hash1); 2123 } 2124 2125 if (c->devices[parity2]->devobj) { 2126 th2 = (tree_header*)scratch; 2127 get_tree_checksum(Vcb, th2, hash2); 2128 } 2129 2130 if ((c->devices[parity1]->devobj && RtlCompareMemory(hash1, th1, Vcb->csum_size) == Vcb->csum_size && th1->address == addr) || 2131 (c->devices[parity2]->devobj && RtlCompareMemory(hash2, th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr)) { 2132 if (!c->devices[parity1]->devobj || RtlCompareMemory(hash1, th1, Vcb->csum_size) != Vcb->csum_size || th1->address != addr) { 2133 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2134 scratch, Vcb->superblock.node_size); 2135 2136 if (c->devices[parity1]->devobj) { 2137 // fix parity 1 2138 2139 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2140 2141 RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2142 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2143 Vcb->superblock.node_size); 2144 2145 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2146 2147 while (stripe != parity1) { 2148 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2149 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2150 Vcb->superblock.node_size); 2151 2152 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2153 } 2154 2155 context->stripes[parity1].rewrite = true; 2156 2157 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true); 2158 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2159 } 2160 } else { 2161 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2162 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 2163 2164 if (!c->devices[parity2]->devobj || RtlCompareMemory(hash2, th2, Vcb->csum_size) != Vcb->csum_size || th2->address != addr) { 2165 // fix parity 2 2166 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2167 2168 if (c->devices[parity2]->devobj) { 2169 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2170 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2171 Vcb->superblock.node_size); 2172 2173 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2174 2175 while (stripe != parity2) { 2176 galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.node_size); 2177 2178 do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2179 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2180 Vcb->superblock.node_size); 2181 2182 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2183 } 2184 2185 context->stripes[parity2].rewrite = true; 2186 2187 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true); 2188 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2189 } 2190 } 2191 } 2192 2193 context->stripes[bad_stripe1].rewrite = true; 2194 2195 RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 2196 2197 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false); 2198 } else 2199 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false); 2200 } else { 2201 uint8_t hash1[MAX_HASH_SIZE]; 2202 uint8_t hash2[MAX_HASH_SIZE]; 2203 2204 if (c->devices[parity1]->devobj) 2205 get_sector_csum(Vcb, &context->parity_scratch[i * Vcb->superblock.sector_size], hash1); 2206 2207 if (c->devices[parity2]->devobj) 2208 get_sector_csum(Vcb, scratch, hash2); 2209 2210 if ((c->devices[parity1]->devobj && RtlCompareMemory(hash1, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) || 2211 (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size)) { 2212 if (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) { 2213 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2214 scratch, Vcb->superblock.sector_size); 2215 2216 if (c->devices[parity1]->devobj && RtlCompareMemory(hash1, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) { 2217 // fix parity 1 2218 2219 stripe = (parity1 + 2) % c->chunk_item->num_stripes; 2220 2221 RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2222 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2223 Vcb->superblock.sector_size); 2224 2225 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2226 2227 while (stripe != parity1) { 2228 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2229 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2230 Vcb->superblock.sector_size); 2231 2232 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2233 } 2234 2235 context->stripes[parity1].rewrite = true; 2236 2237 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true); 2238 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2239 } 2240 } else { 2241 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2242 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2243 2244 if (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) { 2245 // fix parity 2 2246 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2247 2248 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2249 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2250 Vcb->superblock.sector_size); 2251 2252 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2253 2254 while (stripe != parity2) { 2255 galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size); 2256 2257 do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2258 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2259 Vcb->superblock.sector_size); 2260 2261 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2262 } 2263 2264 context->stripes[parity2].rewrite = true; 2265 2266 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true); 2267 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 2268 } 2269 } 2270 2271 context->stripes[bad_stripe1].rewrite = true; 2272 2273 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false); 2274 } else 2275 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false); 2276 } 2277 2278 ExFreePool(scratch); 2279 } else if (num_errors == 2 && missing_devices == 0) { 2280 uint16_t x, y, k; 2281 uint64_t addr; 2282 uint32_t len = (RtlCheckBit(&context->is_tree, bad_off1) || RtlCheckBit(&context->is_tree, bad_off2)) ? Vcb->superblock.node_size : Vcb->superblock.sector_size; 2283 uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; 2284 uint32_t j; 2285 2286 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1); 2287 2288 // put qxy in parity_scratch 2289 // put pxy in parity_scratch2 2290 2291 k = c->chunk_item->num_stripes - 3; 2292 if (stripe == bad_stripe1 || stripe == bad_stripe2) { 2293 RtlZeroMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], len); 2294 RtlZeroMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], len); 2295 2296 if (stripe == bad_stripe1) 2297 x = k; 2298 else 2299 y = k; 2300 } else { 2301 RtlCopyMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], 2302 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2303 RtlCopyMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], 2304 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2305 } 2306 2307 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2308 2309 k--; 2310 do { 2311 galois_double(&context->parity_scratch[i * Vcb->superblock.sector_size], len); 2312 2313 if (stripe != bad_stripe1 && stripe != bad_stripe2) { 2314 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size], 2315 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2316 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], 2317 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2318 } else if (stripe == bad_stripe1) 2319 x = k; 2320 else if (stripe == bad_stripe2) 2321 y = k; 2322 2323 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1); 2324 k--; 2325 } while (stripe != parity2); 2326 2327 gyx = gpow2(y > x ? (y-x) : (255-x+y)); 2328 gx = gpow2(255-x); 2329 2330 denom = gdiv(1, gyx ^ 1); 2331 a = gmul(gyx, denom); 2332 b = gmul(gx, denom); 2333 2334 p = &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)]; 2335 q = &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)]; 2336 pxy = &context->parity_scratch2[i * Vcb->superblock.sector_size]; 2337 qxy = &context->parity_scratch[i * Vcb->superblock.sector_size]; 2338 2339 for (j = 0; j < len; j++) { 2340 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); 2341 2342 p++; 2343 q++; 2344 pxy++; 2345 qxy++; 2346 } 2347 2348 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->parity_scratch[i * Vcb->superblock.sector_size], len); 2349 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len); 2350 2351 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size); 2352 2353 if (RtlCheckBit(&context->is_tree, bad_off1)) { 2354 tree_header* th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size]; 2355 2356 if (check_tree_checksum(Vcb, th) && th->address == addr) { 2357 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2358 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 2359 2360 context->stripes[bad_stripe1].rewrite = true; 2361 2362 RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 2363 2364 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false); 2365 } else 2366 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false); 2367 } else { 2368 if (check_sector_csum(Vcb, &context->parity_scratch[i * Vcb->superblock.sector_size], (uint8_t*)context->csum + (Vcb->csum_size * bad_off1))) { 2369 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2370 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2371 2372 context->stripes[bad_stripe1].rewrite = true; 2373 2374 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false); 2375 } else 2376 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false); 2377 } 2378 2379 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off2 * Vcb->superblock.sector_size); 2380 2381 if (RtlCheckBit(&context->is_tree, bad_off2)) { 2382 tree_header* th = (tree_header*)&context->parity_scratch2[i * Vcb->superblock.sector_size]; 2383 2384 if (check_tree_checksum(Vcb, th) && th->address == addr) { 2385 RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2386 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.node_size); 2387 2388 context->stripes[bad_stripe2].rewrite = true; 2389 2390 RtlClearBits(&context->stripes[bad_stripe2].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1); 2391 2392 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, true, false); 2393 } else 2394 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, false, false); 2395 } else { 2396 if (check_sector_csum(Vcb, &context->parity_scratch2[i * Vcb->superblock.sector_size], (uint8_t*)context->csum + (Vcb->csum_size * bad_off2))) { 2397 RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], 2398 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size); 2399 2400 context->stripes[bad_stripe2].rewrite = true; 2401 2402 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, true, false); 2403 } else 2404 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, false, false); 2405 } 2406 } else { 2407 stripe = (parity2 + 1) % c->chunk_item->num_stripes; 2408 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i; 2409 2410 while (stripe != parity1) { 2411 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) { 2412 if (RtlCheckBit(&context->stripes[stripe].error, i)) { 2413 uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size); 2414 2415 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false); 2416 } 2417 } 2418 2419 off += sectors_per_stripe; 2420 stripe = (stripe + 1) % c->chunk_item->num_stripes; 2421 } 2422 } 2423 } 2424 } 2425 2426 static NTSTATUS scrub_chunk_raid56_stripe_run(device_extension* Vcb, chunk* c, uint64_t stripe_start, uint64_t stripe_end) { 2427 NTSTATUS Status; 2428 KEY searchkey; 2429 traverse_ptr tp; 2430 bool b; 2431 uint64_t run_start, run_end, full_stripe_len, stripe; 2432 uint32_t max_read, num_sectors; 2433 ULONG arrlen, *allocarr, *csumarr = NULL, *treearr, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1; 2434 scrub_context_raid56 context; 2435 uint16_t i; 2436 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1]; 2437 2438 TRACE("(%p, %p, %I64x, %I64x)\n", Vcb, c, stripe_start, stripe_end); 2439 2440 full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length; 2441 run_start = c->offset + (stripe_start * full_stripe_len); 2442 run_end = c->offset + ((stripe_end + 1) * full_stripe_len); 2443 2444 searchkey.obj_id = run_start; 2445 searchkey.obj_type = TYPE_METADATA_ITEM; 2446 searchkey.offset = 0xffffffffffffffff; 2447 2448 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 2449 if (!NT_SUCCESS(Status)) { 2450 ERR("find_item returned %08lx\n", Status); 2451 return Status; 2452 } 2453 2454 num_sectors = (uint32_t)((stripe_end - stripe_start + 1) * full_stripe_len / Vcb->superblock.sector_size); 2455 arrlen = (ULONG)sector_align((num_sectors / 8) + 1, sizeof(ULONG)); 2456 2457 allocarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2458 if (!allocarr) { 2459 ERR("out of memory\n"); 2460 return STATUS_INSUFFICIENT_RESOURCES; 2461 } 2462 2463 treearr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2464 if (!treearr) { 2465 ERR("out of memory\n"); 2466 ExFreePool(allocarr); 2467 return STATUS_INSUFFICIENT_RESOURCES; 2468 } 2469 2470 RtlInitializeBitMap(&context.alloc, allocarr, num_sectors); 2471 RtlClearAllBits(&context.alloc); 2472 2473 RtlInitializeBitMap(&context.is_tree, treearr, num_sectors); 2474 RtlClearAllBits(&context.is_tree); 2475 2476 context.parity_scratch = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG); 2477 if (!context.parity_scratch) { 2478 ERR("out of memory\n"); 2479 ExFreePool(allocarr); 2480 ExFreePool(treearr); 2481 return STATUS_INSUFFICIENT_RESOURCES; 2482 } 2483 2484 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2485 csumarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG); 2486 if (!csumarr) { 2487 ERR("out of memory\n"); 2488 ExFreePool(allocarr); 2489 ExFreePool(treearr); 2490 ExFreePool(context.parity_scratch); 2491 return STATUS_INSUFFICIENT_RESOURCES; 2492 } 2493 2494 RtlInitializeBitMap(&context.has_csum, csumarr, num_sectors); 2495 RtlClearAllBits(&context.has_csum); 2496 2497 context.csum = ExAllocatePoolWithTag(PagedPool, num_sectors * Vcb->csum_size, ALLOC_TAG); 2498 if (!context.csum) { 2499 ERR("out of memory\n"); 2500 ExFreePool(allocarr); 2501 ExFreePool(treearr); 2502 ExFreePool(context.parity_scratch); 2503 ExFreePool(csumarr); 2504 return STATUS_INSUFFICIENT_RESOURCES; 2505 } 2506 } 2507 2508 if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2509 context.parity_scratch2 = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG); 2510 if (!context.parity_scratch2) { 2511 ERR("out of memory\n"); 2512 ExFreePool(allocarr); 2513 ExFreePool(treearr); 2514 ExFreePool(context.parity_scratch); 2515 2516 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2517 ExFreePool(csumarr); 2518 ExFreePool(context.csum); 2519 } 2520 2521 return STATUS_INSUFFICIENT_RESOURCES; 2522 } 2523 } 2524 2525 do { 2526 traverse_ptr next_tp; 2527 2528 if (tp.item->key.obj_id >= run_end) 2529 break; 2530 2531 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM) { 2532 uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2533 2534 if (tp.item->key.obj_id + size > run_start) { 2535 uint64_t extent_start = max(run_start, tp.item->key.obj_id); 2536 uint64_t extent_end = min(tp.item->key.obj_id + size, run_end); 2537 bool extent_is_tree = false; 2538 2539 RtlSetBits(&context.alloc, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size)); 2540 2541 if (tp.item->key.obj_type == TYPE_METADATA_ITEM) 2542 extent_is_tree = true; 2543 else { 2544 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; 2545 2546 if (tp.item->size < sizeof(EXTENT_ITEM)) { 2547 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 2548 Status = STATUS_INTERNAL_ERROR; 2549 goto end; 2550 } 2551 2552 if (ei->flags & EXTENT_ITEM_TREE_BLOCK) 2553 extent_is_tree = true; 2554 } 2555 2556 if (extent_is_tree) 2557 RtlSetBits(&context.is_tree, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size)); 2558 else if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2559 traverse_ptr tp2; 2560 bool b2; 2561 2562 searchkey.obj_id = EXTENT_CSUM_ID; 2563 searchkey.obj_type = TYPE_EXTENT_CSUM; 2564 searchkey.offset = extent_start; 2565 2566 Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL); 2567 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { 2568 ERR("find_item returned %08lx\n", Status); 2569 goto end; 2570 } 2571 2572 do { 2573 traverse_ptr next_tp2; 2574 2575 if (tp2.item->key.offset >= extent_end) 2576 break; 2577 2578 if (tp2.item->key.offset >= extent_start) { 2579 uint64_t csum_start = max(extent_start, tp2.item->key.offset); 2580 uint64_t csum_end = min(extent_end, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / Vcb->csum_size)); 2581 2582 RtlSetBits(&context.has_csum, (ULONG)((csum_start - run_start) / Vcb->superblock.sector_size), (ULONG)((csum_end - csum_start) / Vcb->superblock.sector_size)); 2583 2584 RtlCopyMemory((uint8_t*)context.csum + ((csum_start - run_start) * Vcb->csum_size / Vcb->superblock.sector_size), 2585 tp2.item->data + ((csum_start - tp2.item->key.offset) * Vcb->csum_size / Vcb->superblock.sector_size), 2586 (ULONG)((csum_end - csum_start) * Vcb->csum_size / Vcb->superblock.sector_size)); 2587 } 2588 2589 b2 = find_next_item(Vcb, &tp2, &next_tp2, false, NULL); 2590 2591 if (b2) 2592 tp2 = next_tp2; 2593 } while (b2); 2594 } 2595 } 2596 } 2597 2598 b = find_next_item(Vcb, &tp, &next_tp, false, NULL); 2599 2600 if (b) 2601 tp = next_tp; 2602 } while (b); 2603 2604 context.stripes = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_context_raid56_stripe) * c->chunk_item->num_stripes, ALLOC_TAG); 2605 if (!context.stripes) { 2606 ERR("out of memory\n"); 2607 Status = STATUS_INSUFFICIENT_RESOURCES; 2608 goto end; 2609 } 2610 2611 max_read = (uint32_t)min(1048576 / c->chunk_item->stripe_length, stripe_end - stripe_start + 1); // only process 1 MB of data at a time 2612 2613 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2614 context.stripes[i].buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(max_read * c->chunk_item->stripe_length), ALLOC_TAG); 2615 if (!context.stripes[i].buf) { 2616 uint64_t j; 2617 2618 ERR("out of memory\n"); 2619 2620 for (j = 0; j < i; j++) { 2621 ExFreePool(context.stripes[j].buf); 2622 } 2623 ExFreePool(context.stripes); 2624 2625 Status = STATUS_INSUFFICIENT_RESOURCES; 2626 goto end; 2627 } 2628 2629 context.stripes[i].errorarr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align(((c->chunk_item->stripe_length / Vcb->superblock.sector_size) / 8) + 1, sizeof(ULONG)), ALLOC_TAG); 2630 if (!context.stripes[i].errorarr) { 2631 uint64_t j; 2632 2633 ERR("out of memory\n"); 2634 2635 ExFreePool(context.stripes[i].buf); 2636 2637 for (j = 0; j < i; j++) { 2638 ExFreePool(context.stripes[j].buf); 2639 } 2640 ExFreePool(context.stripes); 2641 2642 Status = STATUS_INSUFFICIENT_RESOURCES; 2643 goto end; 2644 } 2645 2646 RtlInitializeBitMap(&context.stripes[i].error, context.stripes[i].errorarr, (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size)); 2647 2648 context.stripes[i].context = &context; 2649 context.stripes[i].rewrite = false; 2650 } 2651 2652 stripe = stripe_start; 2653 2654 Status = STATUS_SUCCESS; 2655 2656 chunk_lock_range(Vcb, c, run_start, run_end - run_start); 2657 2658 do { 2659 ULONG read_stripes; 2660 uint16_t missing_devices = 0; 2661 bool need_wait = false; 2662 2663 if (max_read < stripe_end + 1 - stripe) 2664 read_stripes = max_read; 2665 else 2666 read_stripes = (ULONG)(stripe_end + 1 - stripe); 2667 2668 context.stripes_left = c->chunk_item->num_stripes; 2669 2670 // read megabyte by megabyte 2671 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2672 if (c->devices[i]->devobj) { 2673 PIO_STACK_LOCATION IrpSp; 2674 2675 context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false); 2676 2677 if (!context.stripes[i].Irp) { 2678 ERR("IoAllocateIrp failed\n"); 2679 Status = STATUS_INSUFFICIENT_RESOURCES; 2680 goto end3; 2681 } 2682 2683 context.stripes[i].Irp->MdlAddress = NULL; 2684 2685 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 2686 IrpSp->MajorFunction = IRP_MJ_READ; 2687 IrpSp->FileObject = c->devices[i]->fileobj; 2688 2689 if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) { 2690 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(read_stripes * c->chunk_item->stripe_length), ALLOC_TAG); 2691 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 2692 ERR("out of memory\n"); 2693 Status = STATUS_INSUFFICIENT_RESOURCES; 2694 goto end3; 2695 } 2696 2697 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 2698 2699 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 2700 } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) { 2701 context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, (ULONG)(read_stripes * c->chunk_item->stripe_length), false, false, NULL); 2702 if (!context.stripes[i].Irp->MdlAddress) { 2703 ERR("IoAllocateMdl failed\n"); 2704 Status = STATUS_INSUFFICIENT_RESOURCES; 2705 goto end3; 2706 } 2707 2708 Status = STATUS_SUCCESS; 2709 2710 _SEH2_TRY { 2711 MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess); 2712 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2713 Status = _SEH2_GetExceptionCode(); 2714 } _SEH2_END; 2715 2716 if (!NT_SUCCESS(Status)) { 2717 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 2718 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 2719 goto end3; 2720 } 2721 } else 2722 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf; 2723 2724 context.stripes[i].offset = stripe * c->chunk_item->stripe_length; 2725 2726 IrpSp->Parameters.Read.Length = (ULONG)(read_stripes * c->chunk_item->stripe_length); 2727 IrpSp->Parameters.Read.ByteOffset.QuadPart = cis[i].offset + context.stripes[i].offset; 2728 2729 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 2730 context.stripes[i].missing = false; 2731 2732 IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion_raid56, &context.stripes[i], true, true, true); 2733 2734 Vcb->scrub.data_scrubbed += read_stripes * c->chunk_item->stripe_length; 2735 need_wait = true; 2736 } else { 2737 context.stripes[i].Irp = NULL; 2738 context.stripes[i].missing = true; 2739 missing_devices++; 2740 InterlockedDecrement(&context.stripes_left); 2741 } 2742 } 2743 2744 if (c->chunk_item->type & BLOCK_FLAG_RAID5 && missing_devices > 1) { 2745 ERR("too many missing devices (%u, maximum 1)\n", missing_devices); 2746 Status = STATUS_UNEXPECTED_IO_ERROR; 2747 goto end3; 2748 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6 && missing_devices > 2) { 2749 ERR("too many missing devices (%u, maximum 2)\n", missing_devices); 2750 Status = STATUS_UNEXPECTED_IO_ERROR; 2751 goto end3; 2752 } 2753 2754 if (need_wait) { 2755 KeInitializeEvent(&context.Event, NotificationEvent, false); 2756 2757 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2758 if (c->devices[i]->devobj) 2759 IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp); 2760 } 2761 2762 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 2763 } 2764 2765 // return an error if any of the stripes returned an error 2766 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2767 if (!context.stripes[i].missing && !NT_SUCCESS(context.stripes[i].iosb.Status)) { 2768 Status = context.stripes[i].iosb.Status; 2769 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS); 2770 goto end3; 2771 } 2772 } 2773 2774 if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2775 for (i = 0; i < read_stripes; i++) { 2776 scrub_raid6_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices); 2777 } 2778 } else { 2779 for (i = 0; i < read_stripes; i++) { 2780 scrub_raid5_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices); 2781 } 2782 } 2783 stripe += read_stripes; 2784 2785 end3: 2786 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2787 if (context.stripes[i].Irp) { 2788 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) { 2789 MmUnlockPages(context.stripes[i].Irp->MdlAddress); 2790 IoFreeMdl(context.stripes[i].Irp->MdlAddress); 2791 } 2792 IoFreeIrp(context.stripes[i].Irp); 2793 context.stripes[i].Irp = NULL; 2794 2795 if (context.stripes[i].rewrite) { 2796 Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + context.stripes[i].offset, 2797 context.stripes[i].buf, (uint32_t)(read_stripes * c->chunk_item->stripe_length)); 2798 2799 if (!NT_SUCCESS(Status)) { 2800 ERR("write_data_phys returned %08lx\n", Status); 2801 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS); 2802 goto end2; 2803 } 2804 } 2805 } 2806 } 2807 2808 if (!NT_SUCCESS(Status)) 2809 break; 2810 } while (stripe < stripe_end); 2811 2812 end2: 2813 chunk_unlock_range(Vcb, c, run_start, run_end - run_start); 2814 2815 for (i = 0; i < c->chunk_item->num_stripes; i++) { 2816 ExFreePool(context.stripes[i].buf); 2817 ExFreePool(context.stripes[i].errorarr); 2818 } 2819 ExFreePool(context.stripes); 2820 2821 end: 2822 ExFreePool(treearr); 2823 ExFreePool(allocarr); 2824 ExFreePool(context.parity_scratch); 2825 2826 if (c->chunk_item->type & BLOCK_FLAG_RAID6) 2827 ExFreePool(context.parity_scratch2); 2828 2829 if (c->chunk_item->type & BLOCK_FLAG_DATA) { 2830 ExFreePool(csumarr); 2831 ExFreePool(context.csum); 2832 } 2833 2834 return Status; 2835 } 2836 2837 static NTSTATUS scrub_chunk_raid56(device_extension* Vcb, chunk* c, uint64_t* offset, bool* changed) { 2838 NTSTATUS Status; 2839 KEY searchkey; 2840 traverse_ptr tp; 2841 bool b; 2842 uint64_t full_stripe_len, stripe, stripe_start, stripe_end, total_data = 0; 2843 ULONG num_extents = 0, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1; 2844 2845 full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length; 2846 stripe = (*offset - c->offset) / full_stripe_len; 2847 2848 *offset = c->offset + (stripe * full_stripe_len); 2849 2850 searchkey.obj_id = *offset; 2851 searchkey.obj_type = TYPE_METADATA_ITEM; 2852 searchkey.offset = 0xffffffffffffffff; 2853 2854 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 2855 if (!NT_SUCCESS(Status)) { 2856 ERR("find_item returned %08lx\n", Status); 2857 return Status; 2858 } 2859 2860 *changed = false; 2861 2862 do { 2863 traverse_ptr next_tp; 2864 2865 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) 2866 break; 2867 2868 if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { 2869 uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2870 2871 TRACE("%I64x\n", tp.item->key.obj_id); 2872 2873 if (size < Vcb->superblock.sector_size) { 2874 ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, size, Vcb->superblock.sector_size); 2875 return STATUS_INTERNAL_ERROR; 2876 } 2877 2878 stripe = (tp.item->key.obj_id - c->offset) / full_stripe_len; 2879 2880 if (*changed) { 2881 if (stripe > stripe_end + 1) { 2882 Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end); 2883 if (!NT_SUCCESS(Status)) { 2884 ERR("scrub_chunk_raid56_stripe_run returned %08lx\n", Status); 2885 return Status; 2886 } 2887 2888 stripe_start = stripe; 2889 } 2890 } else 2891 stripe_start = stripe; 2892 2893 stripe_end = (tp.item->key.obj_id + size - 1 - c->offset) / full_stripe_len; 2894 2895 *changed = true; 2896 2897 total_data += size; 2898 num_extents++; 2899 2900 // only do so much at a time 2901 if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB 2902 break; 2903 } 2904 2905 b = find_next_item(Vcb, &tp, &next_tp, false, NULL); 2906 2907 if (b) 2908 tp = next_tp; 2909 } while (b); 2910 2911 if (*changed) { 2912 Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end); 2913 if (!NT_SUCCESS(Status)) { 2914 ERR("scrub_chunk_raid56_stripe_run returned %08lx\n", Status); 2915 return Status; 2916 } 2917 2918 *offset = c->offset + ((stripe_end + 1) * full_stripe_len); 2919 } 2920 2921 return STATUS_SUCCESS; 2922 } 2923 2924 static NTSTATUS scrub_chunk(device_extension* Vcb, chunk* c, uint64_t* offset, bool* changed) { 2925 NTSTATUS Status; 2926 KEY searchkey; 2927 traverse_ptr tp; 2928 bool b = false, tree_run = false; 2929 ULONG type, num_extents = 0; 2930 uint64_t total_data = 0, tree_run_start, tree_run_end; 2931 2932 TRACE("chunk %I64x\n", c->offset); 2933 2934 ExAcquireResourceSharedLite(&Vcb->tree_lock, true); 2935 2936 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) 2937 type = BLOCK_FLAG_DUPLICATE; 2938 else if (c->chunk_item->type & BLOCK_FLAG_RAID0) 2939 type = BLOCK_FLAG_RAID0; 2940 else if (c->chunk_item->type & BLOCK_FLAG_RAID1) 2941 type = BLOCK_FLAG_DUPLICATE; 2942 else if (c->chunk_item->type & BLOCK_FLAG_RAID10) 2943 type = BLOCK_FLAG_RAID10; 2944 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) { 2945 Status = scrub_chunk_raid56(Vcb, c, offset, changed); 2946 goto end; 2947 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) { 2948 Status = scrub_chunk_raid56(Vcb, c, offset, changed); 2949 goto end; 2950 } else if (c->chunk_item->type & BLOCK_FLAG_RAID1C3) 2951 type = BLOCK_FLAG_DUPLICATE; 2952 else if (c->chunk_item->type & BLOCK_FLAG_RAID1C4) 2953 type = BLOCK_FLAG_DUPLICATE; 2954 else // SINGLE 2955 type = BLOCK_FLAG_DUPLICATE; 2956 2957 searchkey.obj_id = *offset; 2958 searchkey.obj_type = TYPE_METADATA_ITEM; 2959 searchkey.offset = 0xffffffffffffffff; 2960 2961 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL); 2962 if (!NT_SUCCESS(Status)) { 2963 ERR("error - find_item returned %08lx\n", Status); 2964 goto end; 2965 } 2966 2967 do { 2968 traverse_ptr next_tp; 2969 2970 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size) 2971 break; 2972 2973 if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) { 2974 uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset; 2975 bool is_tree; 2976 void* csum = NULL; 2977 RTL_BITMAP bmp; 2978 ULONG* bmparr = NULL, bmplen; 2979 2980 TRACE("%I64x\n", tp.item->key.obj_id); 2981 2982 is_tree = false; 2983 2984 if (tp.item->key.obj_type == TYPE_METADATA_ITEM) 2985 is_tree = true; 2986 else { 2987 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data; 2988 2989 if (tp.item->size < sizeof(EXTENT_ITEM)) { 2990 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM)); 2991 Status = STATUS_INTERNAL_ERROR; 2992 goto end; 2993 } 2994 2995 if (ei->flags & EXTENT_ITEM_TREE_BLOCK) 2996 is_tree = true; 2997 } 2998 2999 if (size < Vcb->superblock.sector_size) { 3000 ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, size, Vcb->superblock.sector_size); 3001 Status = STATUS_INTERNAL_ERROR; 3002 goto end; 3003 } 3004 3005 // load csum 3006 if (!is_tree) { 3007 traverse_ptr tp2; 3008 3009 csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(Vcb->csum_size * size / Vcb->superblock.sector_size), ALLOC_TAG); 3010 if (!csum) { 3011 ERR("out of memory\n"); 3012 Status = STATUS_INSUFFICIENT_RESOURCES; 3013 goto end; 3014 } 3015 3016 bmplen = (ULONG)(size / Vcb->superblock.sector_size); 3017 3018 bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)(sector_align((bmplen >> 3) + 1, sizeof(ULONG))), ALLOC_TAG); 3019 if (!bmparr) { 3020 ERR("out of memory\n"); 3021 ExFreePool(csum); 3022 Status = STATUS_INSUFFICIENT_RESOURCES; 3023 goto end; 3024 } 3025 3026 RtlInitializeBitMap(&bmp, bmparr, bmplen); 3027 RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum 3028 3029 searchkey.obj_id = EXTENT_CSUM_ID; 3030 searchkey.obj_type = TYPE_EXTENT_CSUM; 3031 searchkey.offset = tp.item->key.obj_id; 3032 3033 Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL); 3034 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) { 3035 ERR("find_item returned %08lx\n", Status); 3036 ExFreePool(csum); 3037 ExFreePool(bmparr); 3038 goto end; 3039 } 3040 3041 if (Status != STATUS_NOT_FOUND) { 3042 do { 3043 traverse_ptr next_tp2; 3044 3045 if (tp2.item->key.obj_type == TYPE_EXTENT_CSUM) { 3046 if (tp2.item->key.offset >= tp.item->key.obj_id + size) 3047 break; 3048 else if (tp2.item->size >= Vcb->csum_size && tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / Vcb->csum_size) >= tp.item->key.obj_id) { 3049 uint64_t cs = max(tp.item->key.obj_id, tp2.item->key.offset); 3050 uint64_t ce = min(tp.item->key.obj_id + size, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / Vcb->csum_size)); 3051 3052 RtlCopyMemory((uint8_t*)csum + ((cs - tp.item->key.obj_id) * Vcb->csum_size / Vcb->superblock.sector_size), 3053 tp2.item->data + ((cs - tp2.item->key.offset) * Vcb->csum_size / Vcb->superblock.sector_size), 3054 (ULONG)((ce - cs) * Vcb->csum_size / Vcb->superblock.sector_size)); 3055 3056 RtlClearBits(&bmp, (ULONG)((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size)); 3057 3058 if (ce == tp.item->key.obj_id + size) 3059 break; 3060 } 3061 } 3062 3063 if (find_next_item(Vcb, &tp2, &next_tp2, false, NULL)) 3064 tp2 = next_tp2; 3065 else 3066 break; 3067 } while (true); 3068 } 3069 } 3070 3071 if (tree_run) { 3072 if (!is_tree || tp.item->key.obj_id > tree_run_end) { 3073 Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL); 3074 if (!NT_SUCCESS(Status)) { 3075 ERR("scrub_extent returned %08lx\n", Status); 3076 goto end; 3077 } 3078 3079 if (!is_tree) 3080 tree_run = false; 3081 else { 3082 tree_run_start = tp.item->key.obj_id; 3083 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3084 } 3085 } else 3086 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3087 } else if (is_tree) { 3088 tree_run = true; 3089 tree_run_start = tp.item->key.obj_id; 3090 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size; 3091 } 3092 3093 if (!is_tree) { 3094 Status = scrub_data_extent(Vcb, c, tp.item->key.obj_id, type, csum, &bmp, bmplen); 3095 if (!NT_SUCCESS(Status)) { 3096 ERR("scrub_data_extent returned %08lx\n", Status); 3097 ExFreePool(csum); 3098 ExFreePool(bmparr); 3099 goto end; 3100 } 3101 3102 ExFreePool(csum); 3103 ExFreePool(bmparr); 3104 } 3105 3106 *offset = tp.item->key.obj_id + size; 3107 *changed = true; 3108 3109 total_data += size; 3110 num_extents++; 3111 3112 // only do so much at a time 3113 if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB 3114 break; 3115 } 3116 3117 b = find_next_item(Vcb, &tp, &next_tp, false, NULL); 3118 3119 if (b) 3120 tp = next_tp; 3121 } while (b); 3122 3123 if (tree_run) { 3124 Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL); 3125 if (!NT_SUCCESS(Status)) { 3126 ERR("scrub_extent returned %08lx\n", Status); 3127 goto end; 3128 } 3129 } 3130 3131 Status = STATUS_SUCCESS; 3132 3133 end: 3134 ExReleaseResourceLite(&Vcb->tree_lock); 3135 3136 return Status; 3137 } 3138 3139 _Function_class_(KSTART_ROUTINE) 3140 static void __stdcall scrub_thread(void* context) { 3141 device_extension* Vcb = context; 3142 LIST_ENTRY chunks, *le; 3143 NTSTATUS Status; 3144 LARGE_INTEGER time; 3145 3146 KeInitializeEvent(&Vcb->scrub.finished, NotificationEvent, false); 3147 3148 InitializeListHead(&chunks); 3149 3150 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true); 3151 3152 if (Vcb->need_write && !Vcb->readonly) 3153 Status = do_write(Vcb, NULL); 3154 else 3155 Status = STATUS_SUCCESS; 3156 3157 free_trees(Vcb); 3158 3159 if (!NT_SUCCESS(Status)) { 3160 ExReleaseResourceLite(&Vcb->tree_lock); 3161 ERR("do_write returned %08lx\n", Status); 3162 Vcb->scrub.error = Status; 3163 goto end; 3164 } 3165 3166 ExConvertExclusiveToSharedLite(&Vcb->tree_lock); 3167 3168 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 3169 3170 KeQuerySystemTime(&Vcb->scrub.start_time); 3171 Vcb->scrub.finish_time.QuadPart = 0; 3172 Vcb->scrub.resume_time.QuadPart = Vcb->scrub.start_time.QuadPart; 3173 Vcb->scrub.duration.QuadPart = 0; 3174 Vcb->scrub.total_chunks = 0; 3175 Vcb->scrub.chunks_left = 0; 3176 Vcb->scrub.data_scrubbed = 0; 3177 Vcb->scrub.num_errors = 0; 3178 3179 while (!IsListEmpty(&Vcb->scrub.errors)) { 3180 scrub_error* err = CONTAINING_RECORD(RemoveHeadList(&Vcb->scrub.errors), scrub_error, list_entry); 3181 ExFreePool(err); 3182 } 3183 3184 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true); 3185 3186 le = Vcb->chunks.Flink; 3187 while (le != &Vcb->chunks) { 3188 chunk* c = CONTAINING_RECORD(le, chunk, list_entry); 3189 3190 acquire_chunk_lock(c, Vcb); 3191 3192 if (!c->readonly) { 3193 InsertTailList(&chunks, &c->list_entry_balance); 3194 Vcb->scrub.total_chunks++; 3195 Vcb->scrub.chunks_left++; 3196 } 3197 3198 release_chunk_lock(c, Vcb); 3199 3200 le = le->Flink; 3201 } 3202 3203 ExReleaseResourceLite(&Vcb->chunk_lock); 3204 3205 ExReleaseResource(&Vcb->scrub.stats_lock); 3206 3207 ExReleaseResourceLite(&Vcb->tree_lock); 3208 3209 while (!IsListEmpty(&chunks)) { 3210 chunk* c = CONTAINING_RECORD(RemoveHeadList(&chunks), chunk, list_entry_balance); 3211 uint64_t offset = c->offset; 3212 bool changed; 3213 3214 c->reloc = true; 3215 3216 KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL); 3217 3218 if (!Vcb->scrub.stopping) { 3219 do { 3220 changed = false; 3221 3222 Status = scrub_chunk(Vcb, c, &offset, &changed); 3223 if (!NT_SUCCESS(Status)) { 3224 ERR("scrub_chunk returned %08lx\n", Status); 3225 Vcb->scrub.stopping = true; 3226 Vcb->scrub.error = Status; 3227 break; 3228 } 3229 3230 if (offset == c->offset + c->chunk_item->size || Vcb->scrub.stopping) 3231 break; 3232 3233 KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL); 3234 } while (changed); 3235 } 3236 3237 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true); 3238 3239 if (!Vcb->scrub.stopping) 3240 Vcb->scrub.chunks_left--; 3241 3242 if (IsListEmpty(&chunks)) 3243 KeQuerySystemTime(&Vcb->scrub.finish_time); 3244 3245 ExReleaseResource(&Vcb->scrub.stats_lock); 3246 3247 c->reloc = false; 3248 c->list_entry_balance.Flink = NULL; 3249 } 3250 3251 KeQuerySystemTime(&time); 3252 Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3253 3254 end: 3255 ZwClose(Vcb->scrub.thread); 3256 Vcb->scrub.thread = NULL; 3257 3258 KeSetEvent(&Vcb->scrub.finished, 0, false); 3259 } 3260 3261 NTSTATUS start_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3262 NTSTATUS Status; 3263 OBJECT_ATTRIBUTES oa; 3264 3265 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3266 return STATUS_PRIVILEGE_NOT_HELD; 3267 3268 if (Vcb->locked) { 3269 WARN("cannot start scrub while locked\n"); 3270 return STATUS_DEVICE_NOT_READY; 3271 } 3272 3273 if (Vcb->balance.thread) { 3274 WARN("cannot start scrub while balance running\n"); 3275 return STATUS_DEVICE_NOT_READY; 3276 } 3277 3278 if (Vcb->scrub.thread) { 3279 WARN("scrub already running\n"); 3280 return STATUS_DEVICE_NOT_READY; 3281 } 3282 3283 if (Vcb->readonly) 3284 return STATUS_MEDIA_WRITE_PROTECTED; 3285 3286 Vcb->scrub.stopping = false; 3287 Vcb->scrub.paused = false; 3288 Vcb->scrub.error = STATUS_SUCCESS; 3289 KeInitializeEvent(&Vcb->scrub.event, NotificationEvent, !Vcb->scrub.paused); 3290 3291 InitializeObjectAttributes(&oa, NULL, OBJ_KERNEL_HANDLE, NULL, NULL); 3292 3293 Status = PsCreateSystemThread(&Vcb->scrub.thread, 0, &oa, NULL, NULL, scrub_thread, Vcb); 3294 if (!NT_SUCCESS(Status)) { 3295 ERR("PsCreateSystemThread returned %08lx\n", Status); 3296 return Status; 3297 } 3298 3299 return STATUS_SUCCESS; 3300 } 3301 3302 NTSTATUS query_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode, void* data, ULONG length) { 3303 btrfs_query_scrub* bqs = (btrfs_query_scrub*)data; 3304 ULONG len; 3305 NTSTATUS Status; 3306 LIST_ENTRY* le; 3307 btrfs_scrub_error* bse = NULL; 3308 3309 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3310 return STATUS_PRIVILEGE_NOT_HELD; 3311 3312 if (length < offsetof(btrfs_query_scrub, errors)) 3313 return STATUS_BUFFER_TOO_SMALL; 3314 3315 ExAcquireResourceSharedLite(&Vcb->scrub.stats_lock, true); 3316 3317 if (Vcb->scrub.thread && Vcb->scrub.chunks_left > 0) 3318 bqs->status = Vcb->scrub.paused ? BTRFS_SCRUB_PAUSED : BTRFS_SCRUB_RUNNING; 3319 else 3320 bqs->status = BTRFS_SCRUB_STOPPED; 3321 3322 bqs->start_time.QuadPart = Vcb->scrub.start_time.QuadPart; 3323 bqs->finish_time.QuadPart = Vcb->scrub.finish_time.QuadPart; 3324 bqs->chunks_left = Vcb->scrub.chunks_left; 3325 bqs->total_chunks = Vcb->scrub.total_chunks; 3326 bqs->data_scrubbed = Vcb->scrub.data_scrubbed; 3327 3328 bqs->duration = Vcb->scrub.duration.QuadPart; 3329 3330 if (bqs->status == BTRFS_SCRUB_RUNNING) { 3331 LARGE_INTEGER time; 3332 3333 KeQuerySystemTime(&time); 3334 bqs->duration += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3335 } 3336 3337 bqs->error = Vcb->scrub.error; 3338 3339 bqs->num_errors = Vcb->scrub.num_errors; 3340 3341 len = length - offsetof(btrfs_query_scrub, errors); 3342 3343 le = Vcb->scrub.errors.Flink; 3344 while (le != &Vcb->scrub.errors) { 3345 scrub_error* err = CONTAINING_RECORD(le, scrub_error, list_entry); 3346 ULONG errlen; 3347 3348 if (err->is_metadata) 3349 errlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY); 3350 else 3351 errlen = offsetof(btrfs_scrub_error, data.filename) + err->data.filename_length; 3352 3353 if (len < errlen) { 3354 Status = STATUS_BUFFER_OVERFLOW; 3355 goto end; 3356 } 3357 3358 if (!bse) 3359 bse = &bqs->errors; 3360 else { 3361 ULONG lastlen; 3362 3363 if (bse->is_metadata) 3364 lastlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY); 3365 else 3366 lastlen = offsetof(btrfs_scrub_error, data.filename) + bse->data.filename_length; 3367 3368 bse->next_entry = lastlen; 3369 bse = (btrfs_scrub_error*)(((uint8_t*)bse) + lastlen); 3370 } 3371 3372 bse->next_entry = 0; 3373 bse->address = err->address; 3374 bse->device = err->device; 3375 bse->recovered = err->recovered; 3376 bse->is_metadata = err->is_metadata; 3377 bse->parity = err->parity; 3378 3379 if (err->is_metadata) { 3380 bse->metadata.root = err->metadata.root; 3381 bse->metadata.level = err->metadata.level; 3382 bse->metadata.firstitem = err->metadata.firstitem; 3383 } else { 3384 bse->data.subvol = err->data.subvol; 3385 bse->data.offset = err->data.offset; 3386 bse->data.filename_length = err->data.filename_length; 3387 RtlCopyMemory(bse->data.filename, err->data.filename, err->data.filename_length); 3388 } 3389 3390 len -= errlen; 3391 le = le->Flink; 3392 } 3393 3394 Status = STATUS_SUCCESS; 3395 3396 end: 3397 ExReleaseResourceLite(&Vcb->scrub.stats_lock); 3398 3399 return Status; 3400 } 3401 3402 NTSTATUS pause_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3403 LARGE_INTEGER time; 3404 3405 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3406 return STATUS_PRIVILEGE_NOT_HELD; 3407 3408 if (!Vcb->scrub.thread) 3409 return STATUS_DEVICE_NOT_READY; 3410 3411 if (Vcb->scrub.paused) 3412 return STATUS_DEVICE_NOT_READY; 3413 3414 Vcb->scrub.paused = true; 3415 KeClearEvent(&Vcb->scrub.event); 3416 3417 KeQuerySystemTime(&time); 3418 Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart; 3419 3420 return STATUS_SUCCESS; 3421 } 3422 3423 NTSTATUS resume_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3424 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3425 return STATUS_PRIVILEGE_NOT_HELD; 3426 3427 if (!Vcb->scrub.thread) 3428 return STATUS_DEVICE_NOT_READY; 3429 3430 if (!Vcb->scrub.paused) 3431 return STATUS_DEVICE_NOT_READY; 3432 3433 Vcb->scrub.paused = false; 3434 KeSetEvent(&Vcb->scrub.event, 0, false); 3435 3436 KeQuerySystemTime(&Vcb->scrub.resume_time); 3437 3438 return STATUS_SUCCESS; 3439 } 3440 3441 NTSTATUS stop_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) { 3442 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode)) 3443 return STATUS_PRIVILEGE_NOT_HELD; 3444 3445 if (!Vcb->scrub.thread) 3446 return STATUS_DEVICE_NOT_READY; 3447 3448 Vcb->scrub.paused = false; 3449 Vcb->scrub.stopping = true; 3450 KeSetEvent(&Vcb->scrub.event, 0, false); 3451 3452 return STATUS_SUCCESS; 3453 } 3454