1 /* Copyright (c) Mark Harmstone 2016-17 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 #include "xxhash.h" 20 #include "crc32c.h" 21 22 enum read_data_status { 23 ReadDataStatus_Pending, 24 ReadDataStatus_Success, 25 ReadDataStatus_Error, 26 ReadDataStatus_MissingDevice, 27 ReadDataStatus_Skip 28 }; 29 30 struct read_data_context; 31 32 typedef struct { 33 struct read_data_context* context; 34 uint16_t stripenum; 35 bool rewrite; 36 PIRP Irp; 37 IO_STATUS_BLOCK iosb; 38 enum read_data_status status; 39 PMDL mdl; 40 uint64_t stripestart; 41 uint64_t stripeend; 42 } read_data_stripe; 43 44 typedef struct { 45 KEVENT Event; 46 NTSTATUS Status; 47 chunk* c; 48 uint64_t address; 49 uint32_t buflen; 50 LONG num_stripes, stripes_left; 51 uint64_t type; 52 uint32_t sector_size; 53 uint16_t firstoff, startoffstripe, sectors_per_stripe; 54 void* csum; 55 bool tree; 56 read_data_stripe* stripes; 57 uint8_t* va; 58 } read_data_context; 59 60 extern bool diskacc; 61 extern tPsUpdateDiskCounters fPsUpdateDiskCounters; 62 extern tCcCopyReadEx fCcCopyReadEx; 63 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters; 64 65 #define LZO_PAGE_SIZE 4096 66 67 _Function_class_(IO_COMPLETION_ROUTINE) 68 static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 69 read_data_stripe* stripe = conptr; 70 read_data_context* context = (read_data_context*)stripe->context; 71 72 UNUSED(DeviceObject); 73 74 stripe->iosb = Irp->IoStatus; 75 76 if (NT_SUCCESS(Irp->IoStatus.Status)) 77 stripe->status = ReadDataStatus_Success; 78 else 79 stripe->status = ReadDataStatus_Error; 80 81 if (InterlockedDecrement(&context->stripes_left) == 0) 82 KeSetEvent(&context->Event, 0, false); 83 84 return STATUS_MORE_PROCESSING_REQUIRED; 85 } 86 87 NTSTATUS check_csum(device_extension* Vcb, uint8_t* data, uint32_t sectors, void* csum) { 88 void* csum2; 89 90 csum2 = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * sectors, ALLOC_TAG); 91 if (!csum2) { 92 ERR("out of memory\n"); 93 return STATUS_INSUFFICIENT_RESOURCES; 94 } 95 96 do_calc_job(Vcb, data, sectors, csum2); 97 98 if (RtlCompareMemory(csum2, csum, sectors * Vcb->csum_size) != sectors * Vcb->csum_size) { 99 ExFreePool(csum2); 100 return STATUS_CRC_ERROR; 101 } 102 103 ExFreePool(csum2); 104 105 return STATUS_SUCCESS; 106 } 107 108 void get_tree_checksum(device_extension* Vcb, tree_header* th, void* csum) { 109 switch (Vcb->superblock.csum_type) { 110 case CSUM_TYPE_CRC32C: 111 *(uint32_t*)csum = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 112 break; 113 114 case CSUM_TYPE_XXHASH: 115 *(uint64_t*)csum = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0); 116 break; 117 118 case CSUM_TYPE_SHA256: 119 calc_sha256(csum, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 120 break; 121 122 case CSUM_TYPE_BLAKE2: 123 blake2b(csum, BLAKE2_HASH_SIZE, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 124 break; 125 } 126 } 127 128 bool check_tree_checksum(device_extension* Vcb, tree_header* th) { 129 switch (Vcb->superblock.csum_type) { 130 case CSUM_TYPE_CRC32C: { 131 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 132 133 if (crc32 == *((uint32_t*)th->csum)) 134 return true; 135 136 WARN("hash was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum)); 137 138 break; 139 } 140 141 case CSUM_TYPE_XXHASH: { 142 uint64_t hash = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0); 143 144 if (hash == *((uint64_t*)th->csum)) 145 return true; 146 147 WARN("hash was %I64x, expected %I64x\n", hash, *((uint64_t*)th->csum)); 148 149 break; 150 } 151 152 case CSUM_TYPE_SHA256: { 153 uint8_t hash[SHA256_HASH_SIZE]; 154 155 calc_sha256(hash, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 156 157 if (RtlCompareMemory(hash, th, SHA256_HASH_SIZE) == SHA256_HASH_SIZE) 158 return true; 159 160 WARN("hash was invalid\n"); 161 162 break; 163 } 164 165 case CSUM_TYPE_BLAKE2: { 166 uint8_t hash[BLAKE2_HASH_SIZE]; 167 168 blake2b(hash, sizeof(hash), (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 169 170 if (RtlCompareMemory(hash, th, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE) 171 return true; 172 173 WARN("hash was invalid\n"); 174 175 break; 176 } 177 } 178 179 return false; 180 } 181 182 void get_sector_csum(device_extension* Vcb, void* buf, void* csum) { 183 switch (Vcb->superblock.csum_type) { 184 case CSUM_TYPE_CRC32C: 185 *(uint32_t*)csum = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size); 186 break; 187 188 case CSUM_TYPE_XXHASH: 189 *(uint64_t*)csum = XXH64(buf, Vcb->superblock.sector_size, 0); 190 break; 191 192 case CSUM_TYPE_SHA256: 193 calc_sha256(csum, buf, Vcb->superblock.sector_size); 194 break; 195 196 case CSUM_TYPE_BLAKE2: 197 blake2b(csum, BLAKE2_HASH_SIZE, buf, Vcb->superblock.sector_size); 198 break; 199 } 200 } 201 202 bool check_sector_csum(device_extension* Vcb, void* buf, void* csum) { 203 switch (Vcb->superblock.csum_type) { 204 case CSUM_TYPE_CRC32C: { 205 uint32_t crc32 = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size); 206 207 return *(uint32_t*)csum == crc32; 208 } 209 210 case CSUM_TYPE_XXHASH: { 211 uint64_t hash = XXH64(buf, Vcb->superblock.sector_size, 0); 212 213 return *(uint64_t*)csum == hash; 214 } 215 216 case CSUM_TYPE_SHA256: { 217 uint8_t hash[SHA256_HASH_SIZE]; 218 219 calc_sha256(hash, buf, Vcb->superblock.sector_size); 220 221 return RtlCompareMemory(hash, csum, SHA256_HASH_SIZE) == SHA256_HASH_SIZE; 222 } 223 224 case CSUM_TYPE_BLAKE2: { 225 uint8_t hash[BLAKE2_HASH_SIZE]; 226 227 blake2b(hash, sizeof(hash), buf, Vcb->superblock.sector_size); 228 229 return RtlCompareMemory(hash, csum, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE; 230 } 231 } 232 233 return false; 234 } 235 236 static NTSTATUS read_data_dup(device_extension* Vcb, uint8_t* buf, uint64_t addr, read_data_context* context, CHUNK_ITEM* ci, 237 device** devices, uint64_t generation) { 238 bool checksum_error = false; 239 uint16_t j, stripe = 0; 240 NTSTATUS Status; 241 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 242 243 for (j = 0; j < ci->num_stripes; j++) { 244 if (context->stripes[j].status == ReadDataStatus_Error) { 245 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status); 246 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 247 return context->stripes[j].iosb.Status; 248 } else if (context->stripes[j].status == ReadDataStatus_Success) { 249 stripe = j; 250 break; 251 } 252 } 253 254 if (context->stripes[stripe].status != ReadDataStatus_Success) 255 return STATUS_INTERNAL_ERROR; 256 257 if (context->tree) { 258 tree_header* th = (tree_header*)buf; 259 260 if (th->address != context->address || !check_tree_checksum(Vcb, th)) { 261 checksum_error = true; 262 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 263 } else if (generation != 0 && th->generation != generation) { 264 checksum_error = true; 265 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 266 } 267 } else if (context->csum) { 268 Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum); 269 270 if (Status == STATUS_CRC_ERROR) { 271 checksum_error = true; 272 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 273 } else if (!NT_SUCCESS(Status)) { 274 ERR("check_csum returned %08lx\n", Status); 275 return Status; 276 } 277 } 278 279 if (!checksum_error) 280 return STATUS_SUCCESS; 281 282 if (ci->num_stripes == 1) 283 return STATUS_CRC_ERROR; 284 285 if (context->tree) { 286 tree_header* t2; 287 bool recovered = false; 288 289 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 290 if (!t2) { 291 ERR("out of memory\n"); 292 return STATUS_INSUFFICIENT_RESOURCES; 293 } 294 295 for (j = 0; j < ci->num_stripes; j++) { 296 if (j != stripe && devices[j] && devices[j]->devobj) { 297 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart, 298 Vcb->superblock.node_size, (uint8_t*)t2, false); 299 if (!NT_SUCCESS(Status)) { 300 WARN("sync_read_phys returned %08lx\n", Status); 301 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 302 } else { 303 bool checksum_error = !check_tree_checksum(Vcb, t2); 304 305 if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) { 306 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 307 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 308 recovered = true; 309 310 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 311 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart, 312 t2, Vcb->superblock.node_size); 313 if (!NT_SUCCESS(Status)) { 314 WARN("write_data_phys returned %08lx\n", Status); 315 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 316 } 317 } 318 319 break; 320 } else if (t2->address != addr || checksum_error) 321 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 322 else 323 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS); 324 } 325 } 326 } 327 328 if (!recovered) { 329 ERR("unrecoverable checksum error at %I64x\n", addr); 330 ExFreePool(t2); 331 return STATUS_CRC_ERROR; 332 } 333 334 ExFreePool(t2); 335 } else { 336 ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information >> Vcb->sector_shift; 337 uint8_t* sector; 338 void* ptr = context->csum; 339 340 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 341 if (!sector) { 342 ERR("out of memory\n"); 343 return STATUS_INSUFFICIENT_RESOURCES; 344 } 345 346 for (ULONG i = 0; i < sectors; i++) { 347 if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) { 348 bool recovered = false; 349 350 for (j = 0; j < ci->num_stripes; j++) { 351 if (j != stripe && devices[j] && devices[j]->devobj) { 352 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, 353 cis[j].offset + context->stripes[stripe].stripestart + ((uint64_t)i << Vcb->sector_shift), 354 Vcb->superblock.sector_size, sector, false); 355 if (!NT_SUCCESS(Status)) { 356 WARN("sync_read_phys returned %08lx\n", Status); 357 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 358 } else { 359 if (check_sector_csum(Vcb, sector, ptr)) { 360 RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size); 361 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe]->devitem.dev_id); 362 recovered = true; 363 364 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 365 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, 366 cis[stripe].offset + context->stripes[stripe].stripestart + ((uint64_t)i << Vcb->sector_shift), 367 sector, Vcb->superblock.sector_size); 368 if (!NT_SUCCESS(Status)) { 369 WARN("write_data_phys returned %08lx\n", Status); 370 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 371 } 372 } 373 374 break; 375 } else 376 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 377 } 378 } 379 } 380 381 if (!recovered) { 382 ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift)); 383 ExFreePool(sector); 384 return STATUS_CRC_ERROR; 385 } 386 } 387 388 ptr = (uint8_t*)ptr + Vcb->csum_size; 389 } 390 391 ExFreePool(sector); 392 } 393 394 return STATUS_SUCCESS; 395 } 396 397 static NTSTATUS read_data_raid0(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, 398 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) { 399 for (uint16_t i = 0; i < ci->num_stripes; i++) { 400 if (context->stripes[i].status == ReadDataStatus_Error) { 401 WARN("stripe %u returned error %08lx\n", i, context->stripes[i].iosb.Status); 402 log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS); 403 return context->stripes[i].iosb.Status; 404 } 405 } 406 407 if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries 408 tree_header* th = (tree_header*)buf; 409 bool checksum_error = !check_tree_checksum(Vcb, th); 410 411 if (checksum_error || addr != th->address || (generation != 0 && generation != th->generation)) { 412 uint64_t off; 413 uint16_t stripe; 414 415 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe); 416 417 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 418 419 if (checksum_error) { 420 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 421 return STATUS_CRC_ERROR; 422 } else if (addr != th->address) { 423 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr); 424 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 425 return STATUS_CRC_ERROR; 426 } else if (generation != 0 && generation != th->generation) { 427 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation); 428 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 429 return STATUS_CRC_ERROR; 430 } 431 } 432 } else if (context->csum) { 433 NTSTATUS Status; 434 435 Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum); 436 437 if (Status == STATUS_CRC_ERROR) { 438 void* ptr = context->csum; 439 440 for (uint32_t i = 0; i < length >> Vcb->sector_shift; i++) { 441 if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) { 442 uint64_t off; 443 uint16_t stripe; 444 445 get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length, ci->num_stripes, &off, &stripe); 446 447 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 448 449 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 450 451 return Status; 452 } 453 454 ptr = (uint8_t*)ptr + Vcb->csum_size; 455 } 456 457 return Status; 458 } else if (!NT_SUCCESS(Status)) { 459 ERR("check_csum returned %08lx\n", Status); 460 return Status; 461 } 462 } 463 464 return STATUS_SUCCESS; 465 } 466 467 static NTSTATUS read_data_raid10(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, 468 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) { 469 uint16_t stripe = 0; 470 NTSTATUS Status; 471 bool checksum_error = false; 472 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 473 474 for (uint16_t j = 0; j < ci->num_stripes; j++) { 475 if (context->stripes[j].status == ReadDataStatus_Error) { 476 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status); 477 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 478 return context->stripes[j].iosb.Status; 479 } else if (context->stripes[j].status == ReadDataStatus_Success) 480 stripe = j; 481 } 482 483 if (context->tree) { 484 tree_header* th = (tree_header*)buf; 485 486 if (!check_tree_checksum(Vcb, th)) { 487 checksum_error = true; 488 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 489 } else if (addr != th->address) { 490 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr); 491 checksum_error = true; 492 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 493 } else if (generation != 0 && generation != th->generation) { 494 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation); 495 checksum_error = true; 496 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 497 } 498 } else if (context->csum) { 499 Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum); 500 501 if (Status == STATUS_CRC_ERROR) 502 checksum_error = true; 503 else if (!NT_SUCCESS(Status)) { 504 ERR("check_csum returned %08lx\n", Status); 505 return Status; 506 } 507 } 508 509 if (!checksum_error) 510 return STATUS_SUCCESS; 511 512 if (context->tree) { 513 tree_header* t2; 514 uint64_t off; 515 uint16_t badsubstripe = 0; 516 bool recovered = false; 517 518 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 519 if (!t2) { 520 ERR("out of memory\n"); 521 return STATUS_INSUFFICIENT_RESOURCES; 522 } 523 524 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe); 525 526 stripe *= ci->sub_stripes; 527 528 for (uint16_t j = 0; j < ci->sub_stripes; j++) { 529 if (context->stripes[stripe + j].status == ReadDataStatus_Success) { 530 badsubstripe = j; 531 break; 532 } 533 } 534 535 for (uint16_t j = 0; j < ci->sub_stripes; j++) { 536 if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) { 537 Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off, 538 Vcb->superblock.node_size, (uint8_t*)t2, false); 539 if (!NT_SUCCESS(Status)) { 540 WARN("sync_read_phys returned %08lx\n", Status); 541 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS); 542 } else { 543 bool checksum_error = !check_tree_checksum(Vcb, t2); 544 545 if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) { 546 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 547 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id); 548 recovered = true; 549 550 if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad 551 Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj, 552 cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size); 553 if (!NT_SUCCESS(Status)) { 554 WARN("write_data_phys returned %08lx\n", Status); 555 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 556 } 557 } 558 559 break; 560 } else if (t2->address != addr || checksum_error) 561 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 562 else 563 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS); 564 } 565 } 566 } 567 568 if (!recovered) { 569 ERR("unrecoverable checksum error at %I64x\n", addr); 570 ExFreePool(t2); 571 return STATUS_CRC_ERROR; 572 } 573 574 ExFreePool(t2); 575 } else { 576 ULONG sectors = length >> Vcb->sector_shift; 577 uint8_t* sector; 578 void* ptr = context->csum; 579 580 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 581 if (!sector) { 582 ERR("out of memory\n"); 583 return STATUS_INSUFFICIENT_RESOURCES; 584 } 585 586 for (ULONG i = 0; i < sectors; i++) { 587 if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) { 588 uint64_t off; 589 uint16_t stripe2, badsubstripe = 0; 590 bool recovered = false; 591 592 get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length, 593 ci->num_stripes / ci->sub_stripes, &off, &stripe2); 594 595 stripe2 *= ci->sub_stripes; 596 597 for (uint16_t j = 0; j < ci->sub_stripes; j++) { 598 if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) { 599 badsubstripe = j; 600 break; 601 } 602 } 603 604 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 605 606 for (uint16_t j = 0; j < ci->sub_stripes; j++) { 607 if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) { 608 Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off, 609 Vcb->superblock.sector_size, sector, false); 610 if (!NT_SUCCESS(Status)) { 611 WARN("sync_read_phys returned %08lx\n", Status); 612 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS); 613 } else { 614 if (check_sector_csum(Vcb, sector, ptr)) { 615 RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size); 616 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe2 + j]->devitem.dev_id); 617 recovered = true; 618 619 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad 620 Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj, 621 cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size); 622 if (!NT_SUCCESS(Status)) { 623 WARN("write_data_phys returned %08lx\n", Status); 624 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS); 625 } 626 } 627 628 break; 629 } else 630 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 631 } 632 } 633 } 634 635 if (!recovered) { 636 ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift)); 637 ExFreePool(sector); 638 return STATUS_CRC_ERROR; 639 } 640 } 641 642 ptr = (uint8_t*)ptr + Vcb->csum_size; 643 } 644 645 ExFreePool(sector); 646 } 647 648 return STATUS_SUCCESS; 649 } 650 651 static NTSTATUS read_data_raid5(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci, 652 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) { 653 NTSTATUS Status; 654 bool checksum_error = false; 655 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 656 uint16_t j, stripe = 0; 657 bool no_success = true; 658 659 for (j = 0; j < ci->num_stripes; j++) { 660 if (context->stripes[j].status == ReadDataStatus_Error) { 661 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status); 662 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 663 return context->stripes[j].iosb.Status; 664 } else if (context->stripes[j].status == ReadDataStatus_Success) { 665 stripe = j; 666 no_success = false; 667 } 668 } 669 670 if (c) { // check partial stripes 671 LIST_ENTRY* le; 672 uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length; 673 674 ExAcquireResourceSharedLite(&c->partial_stripes_lock, true); 675 676 le = c->partial_stripes.Flink; 677 while (le != &c->partial_stripes) { 678 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 679 680 if (ps->address + ps_length > addr && ps->address < addr + length) { 681 ULONG runlength, index; 682 683 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 684 685 while (runlength != 0) { 686 if (index >= ps->bmplen) 687 break; 688 689 if (index + runlength >= ps->bmplen) { 690 runlength = ps->bmplen - index; 691 692 if (runlength == 0) 693 break; 694 } 695 696 uint64_t runstart = ps->address + (index << Vcb->sector_shift); 697 uint64_t runend = runstart + (runlength << Vcb->sector_shift); 698 uint64_t start = max(runstart, addr); 699 uint64_t end = min(runend, addr + length); 700 701 if (end > start) 702 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 703 704 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 705 } 706 } else if (ps->address >= addr + length) 707 break; 708 709 le = le->Flink; 710 } 711 712 ExReleaseResourceLite(&c->partial_stripes_lock); 713 } 714 715 if (context->tree) { 716 tree_header* th = (tree_header*)buf; 717 718 if (addr != th->address || !check_tree_checksum(Vcb, th)) { 719 checksum_error = true; 720 if (!no_success && !degraded) 721 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 722 } else if (generation != 0 && generation != th->generation) { 723 checksum_error = true; 724 if (!no_success && !degraded) 725 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 726 } 727 } else if (context->csum) { 728 Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum); 729 730 if (Status == STATUS_CRC_ERROR) { 731 if (!degraded) 732 WARN("checksum error\n"); 733 checksum_error = true; 734 } else if (!NT_SUCCESS(Status)) { 735 ERR("check_csum returned %08lx\n", Status); 736 return Status; 737 } 738 } else if (degraded) 739 checksum_error = true; 740 741 if (!checksum_error) 742 return STATUS_SUCCESS; 743 744 if (context->tree) { 745 uint16_t parity; 746 uint64_t off; 747 bool recovered = false, first = true, failed = false; 748 uint8_t* t2; 749 750 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG); 751 if (!t2) { 752 ERR("out of memory\n"); 753 return STATUS_INSUFFICIENT_RESOURCES; 754 } 755 756 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe); 757 758 parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 759 760 stripe = (parity + stripe + 1) % ci->num_stripes; 761 762 for (j = 0; j < ci->num_stripes; j++) { 763 if (j != stripe) { 764 if (devices[j] && devices[j]->devobj) { 765 if (first) { 766 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false); 767 if (!NT_SUCCESS(Status)) { 768 ERR("sync_read_phys returned %08lx\n", Status); 769 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 770 failed = true; 771 break; 772 } 773 774 first = false; 775 } else { 776 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false); 777 if (!NT_SUCCESS(Status)) { 778 ERR("sync_read_phys returned %08lx\n", Status); 779 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 780 failed = true; 781 break; 782 } 783 784 do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size); 785 } 786 } else { 787 failed = true; 788 break; 789 } 790 } 791 } 792 793 if (!failed) { 794 tree_header* t3 = (tree_header*)t2; 795 796 if (t3->address == addr && check_tree_checksum(Vcb, t3) && (generation == 0 || t3->generation == generation)) { 797 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 798 799 if (!degraded) 800 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 801 802 recovered = true; 803 804 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 805 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size); 806 if (!NT_SUCCESS(Status)) { 807 WARN("write_data_phys returned %08lx\n", Status); 808 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 809 } 810 } 811 } 812 } 813 814 if (!recovered) { 815 ERR("unrecoverable checksum error at %I64x\n", addr); 816 ExFreePool(t2); 817 return STATUS_CRC_ERROR; 818 } 819 820 ExFreePool(t2); 821 } else { 822 ULONG sectors = length >> Vcb->sector_shift; 823 uint8_t* sector; 824 void* ptr = context->csum; 825 826 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG); 827 if (!sector) { 828 ERR("out of memory\n"); 829 return STATUS_INSUFFICIENT_RESOURCES; 830 } 831 832 for (ULONG i = 0; i < sectors; i++) { 833 uint16_t parity; 834 uint64_t off; 835 836 get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length, 837 ci->num_stripes - 1, &off, &stripe); 838 839 parity = (((addr - offset + ((uint64_t)i << Vcb->sector_shift)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 840 841 stripe = (parity + stripe + 1) % ci->num_stripes; 842 843 if (!devices[stripe] || !devices[stripe]->devobj || (ptr && !check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr))) { 844 bool recovered = false, first = true, failed = false; 845 846 if (devices[stripe] && devices[stripe]->devobj) 847 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS); 848 849 for (j = 0; j < ci->num_stripes; j++) { 850 if (j != stripe) { 851 if (devices[j] && devices[j]->devobj) { 852 if (first) { 853 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false); 854 if (!NT_SUCCESS(Status)) { 855 ERR("sync_read_phys returned %08lx\n", Status); 856 failed = true; 857 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 858 break; 859 } 860 861 first = false; 862 } else { 863 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, 864 sector + Vcb->superblock.sector_size, false); 865 if (!NT_SUCCESS(Status)) { 866 ERR("sync_read_phys returned %08lx\n", Status); 867 failed = true; 868 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 869 break; 870 } 871 872 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size); 873 } 874 } else { 875 failed = true; 876 break; 877 } 878 } 879 } 880 881 if (!failed) { 882 if (!ptr || check_sector_csum(Vcb, sector, ptr)) { 883 RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size); 884 885 if (!degraded) 886 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe]->devitem.dev_id); 887 888 recovered = true; 889 890 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 891 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, 892 sector, Vcb->superblock.sector_size); 893 if (!NT_SUCCESS(Status)) { 894 WARN("write_data_phys returned %08lx\n", Status); 895 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 896 } 897 } 898 } 899 } 900 901 if (!recovered) { 902 ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift)); 903 ExFreePool(sector); 904 return STATUS_CRC_ERROR; 905 } 906 } 907 908 if (ptr) 909 ptr = (uint8_t*)ptr + Vcb->csum_size; 910 } 911 912 ExFreePool(sector); 913 } 914 915 return STATUS_SUCCESS; 916 } 917 918 void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) { 919 if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data 920 uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1; 921 uint16_t stripe; 922 923 stripe = num_stripes - 3; 924 925 if (stripe == missing) 926 RtlZeroMemory(out, sector_size); 927 else 928 RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size); 929 930 do { 931 stripe--; 932 933 galois_double(out, sector_size); 934 935 if (stripe != missing) 936 do_xor(out, sectors + (stripe * sector_size), sector_size); 937 } while (stripe > 0); 938 939 do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size); 940 941 if (missing != 0) 942 galois_divpower(out, (uint8_t)missing, sector_size); 943 } else { // reconstruct from p and q 944 uint16_t x = missing1, y = missing2, stripe; 945 uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; 946 uint32_t j; 947 948 stripe = num_stripes - 3; 949 950 pxy = out + sector_size; 951 qxy = out; 952 953 if (stripe == missing1 || stripe == missing2) { 954 RtlZeroMemory(qxy, sector_size); 955 RtlZeroMemory(pxy, sector_size); 956 } else { 957 RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size); 958 RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size); 959 } 960 961 do { 962 stripe--; 963 964 galois_double(qxy, sector_size); 965 966 if (stripe != missing1 && stripe != missing2) { 967 do_xor(qxy, sectors + (stripe * sector_size), sector_size); 968 do_xor(pxy, sectors + (stripe * sector_size), sector_size); 969 } 970 } while (stripe > 0); 971 972 gyx = gpow2(y > x ? (y-x) : (255-x+y)); 973 gx = gpow2(255-x); 974 975 denom = gdiv(1, gyx ^ 1); 976 a = gmul(gyx, denom); 977 b = gmul(gx, denom); 978 979 p = sectors + ((num_stripes - 2) * sector_size); 980 q = sectors + ((num_stripes - 1) * sector_size); 981 982 for (j = 0; j < sector_size; j++) { 983 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); 984 985 p++; 986 q++; 987 pxy++; 988 qxy++; 989 } 990 991 do_xor(out + sector_size, out, sector_size); 992 do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size); 993 } 994 } 995 996 static NTSTATUS read_data_raid6(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci, 997 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) { 998 NTSTATUS Status; 999 bool checksum_error = false; 1000 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 1001 uint16_t stripe = 0, j; 1002 bool no_success = true; 1003 1004 for (j = 0; j < ci->num_stripes; j++) { 1005 if (context->stripes[j].status == ReadDataStatus_Error) { 1006 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status); 1007 1008 if (devices[j]) 1009 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1010 return context->stripes[j].iosb.Status; 1011 } else if (context->stripes[j].status == ReadDataStatus_Success) { 1012 stripe = j; 1013 no_success = false; 1014 } 1015 } 1016 1017 if (c) { // check partial stripes 1018 LIST_ENTRY* le; 1019 uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length; 1020 1021 ExAcquireResourceSharedLite(&c->partial_stripes_lock, true); 1022 1023 le = c->partial_stripes.Flink; 1024 while (le != &c->partial_stripes) { 1025 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 1026 1027 if (ps->address + ps_length > addr && ps->address < addr + length) { 1028 ULONG runlength, index; 1029 1030 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 1031 1032 while (runlength != 0) { 1033 if (index >= ps->bmplen) 1034 break; 1035 1036 if (index + runlength >= ps->bmplen) { 1037 runlength = ps->bmplen - index; 1038 1039 if (runlength == 0) 1040 break; 1041 } 1042 1043 uint64_t runstart = ps->address + (index << Vcb->sector_shift); 1044 uint64_t runend = runstart + (runlength << Vcb->sector_shift); 1045 uint64_t start = max(runstart, addr); 1046 uint64_t end = min(runend, addr + length); 1047 1048 if (end > start) 1049 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 1050 1051 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 1052 } 1053 } else if (ps->address >= addr + length) 1054 break; 1055 1056 le = le->Flink; 1057 } 1058 1059 ExReleaseResourceLite(&c->partial_stripes_lock); 1060 } 1061 1062 if (context->tree) { 1063 tree_header* th = (tree_header*)buf; 1064 1065 if (addr != th->address || !check_tree_checksum(Vcb, th)) { 1066 checksum_error = true; 1067 if (!no_success && !degraded && devices[stripe]) 1068 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1069 } else if (generation != 0 && generation != th->generation) { 1070 checksum_error = true; 1071 if (!no_success && !degraded && devices[stripe]) 1072 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 1073 } 1074 } else if (context->csum) { 1075 Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum); 1076 1077 if (Status == STATUS_CRC_ERROR) { 1078 if (!degraded) 1079 WARN("checksum error\n"); 1080 checksum_error = true; 1081 } else if (!NT_SUCCESS(Status)) { 1082 ERR("check_csum returned %08lx\n", Status); 1083 return Status; 1084 } 1085 } else if (degraded) 1086 checksum_error = true; 1087 1088 if (!checksum_error) 1089 return STATUS_SUCCESS; 1090 1091 if (context->tree) { 1092 uint8_t* sector; 1093 uint16_t k, physstripe, parity1, parity2, error_stripe = 0; 1094 uint64_t off; 1095 bool recovered = false, failed = false; 1096 ULONG num_errors = 0; 1097 1098 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG); 1099 if (!sector) { 1100 ERR("out of memory\n"); 1101 return STATUS_INSUFFICIENT_RESOURCES; 1102 } 1103 1104 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe); 1105 1106 parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1107 parity2 = (parity1 + 1) % ci->num_stripes; 1108 1109 physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1110 1111 j = (parity2 + 1) % ci->num_stripes; 1112 1113 for (k = 0; k < ci->num_stripes - 1; k++) { 1114 if (j != physstripe) { 1115 if (devices[j] && devices[j]->devobj) { 1116 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, 1117 sector + (k * Vcb->superblock.node_size), false); 1118 if (!NT_SUCCESS(Status)) { 1119 ERR("sync_read_phys returned %08lx\n", Status); 1120 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1121 num_errors++; 1122 error_stripe = k; 1123 1124 if (num_errors > 1) { 1125 failed = true; 1126 break; 1127 } 1128 } 1129 } else { 1130 num_errors++; 1131 error_stripe = k; 1132 1133 if (num_errors > 1) { 1134 failed = true; 1135 break; 1136 } 1137 } 1138 } 1139 1140 j = (j + 1) % ci->num_stripes; 1141 } 1142 1143 if (!failed) { 1144 if (num_errors == 0) { 1145 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size)); 1146 1147 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), 1148 Vcb->superblock.node_size); 1149 1150 for (j = 0; j < ci->num_stripes - 2; j++) { 1151 if (j != stripe) 1152 do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size); 1153 } 1154 1155 if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) { 1156 RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1157 1158 if (devices[physstripe] && devices[physstripe]->devobj) 1159 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id); 1160 1161 recovered = true; 1162 1163 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1164 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1165 sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1166 if (!NT_SUCCESS(Status)) { 1167 WARN("write_data_phys returned %08lx\n", Status); 1168 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1169 } 1170 } 1171 } 1172 } 1173 1174 if (!recovered) { 1175 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size)); 1176 bool read_q = false; 1177 1178 if (devices[parity2] && devices[parity2]->devobj) { 1179 Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off, 1180 Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false); 1181 if (!NT_SUCCESS(Status)) { 1182 ERR("sync_read_phys returned %08lx\n", Status); 1183 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1184 } else 1185 read_q = true; 1186 } 1187 1188 if (read_q) { 1189 if (num_errors == 1) { 1190 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1191 1192 if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) 1193 recovered = true; 1194 } else { 1195 for (j = 0; j < ci->num_stripes - 1; j++) { 1196 if (j != stripe) { 1197 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1198 1199 if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) { 1200 recovered = true; 1201 error_stripe = j; 1202 break; 1203 } 1204 } 1205 } 1206 } 1207 } 1208 1209 if (recovered) { 1210 uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1211 1212 if (devices[physstripe] && devices[physstripe]->devobj) 1213 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id); 1214 1215 RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1216 1217 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1218 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1219 sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1220 if (!NT_SUCCESS(Status)) { 1221 WARN("write_data_phys returned %08lx\n", Status); 1222 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1223 } 1224 } 1225 1226 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1227 if (error_stripe == ci->num_stripes - 2) { 1228 ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id); 1229 1230 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1231 1232 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1233 1234 for (j = 0; j < ci->num_stripes - 2; j++) { 1235 if (j == stripe) { 1236 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size), 1237 Vcb->superblock.node_size); 1238 } else { 1239 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), 1240 Vcb->superblock.node_size); 1241 } 1242 } 1243 } else { 1244 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length), 1245 devices[error_stripe_phys]->devitem.dev_id); 1246 1247 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1248 1249 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size), 1250 sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1251 } 1252 } 1253 1254 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1255 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off, 1256 sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1257 if (!NT_SUCCESS(Status)) { 1258 WARN("write_data_phys returned %08lx\n", Status); 1259 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1260 } 1261 } 1262 } 1263 } 1264 } 1265 1266 if (!recovered) { 1267 ERR("unrecoverable checksum error at %I64x\n", addr); 1268 ExFreePool(sector); 1269 return STATUS_CRC_ERROR; 1270 } 1271 1272 ExFreePool(sector); 1273 } else { 1274 ULONG sectors = length >> Vcb->sector_shift; 1275 uint8_t* sector; 1276 void* ptr = context->csum; 1277 1278 sector = ExAllocatePoolWithTag(NonPagedPool, (ci->num_stripes + 2) << Vcb->sector_shift, ALLOC_TAG); 1279 if (!sector) { 1280 ERR("out of memory\n"); 1281 return STATUS_INSUFFICIENT_RESOURCES; 1282 } 1283 1284 for (ULONG i = 0; i < sectors; i++) { 1285 uint64_t off; 1286 uint16_t physstripe, parity1, parity2; 1287 1288 get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length, 1289 ci->num_stripes - 2, &off, &stripe); 1290 1291 parity1 = (((addr - offset + ((uint64_t)i << Vcb->sector_shift)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1292 parity2 = (parity1 + 1) % ci->num_stripes; 1293 1294 physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1295 1296 if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && !check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr))) { 1297 uint16_t error_stripe = 0; 1298 bool recovered = false, failed = false; 1299 ULONG num_errors = 0; 1300 1301 if (devices[physstripe] && devices[physstripe]->devobj) 1302 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS); 1303 1304 j = (parity2 + 1) % ci->num_stripes; 1305 1306 for (uint16_t k = 0; k < ci->num_stripes - 1; k++) { 1307 if (j != physstripe) { 1308 if (devices[j] && devices[j]->devobj) { 1309 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, 1310 sector + ((ULONG)k << Vcb->sector_shift), false); 1311 if (!NT_SUCCESS(Status)) { 1312 ERR("sync_read_phys returned %08lx\n", Status); 1313 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1314 num_errors++; 1315 error_stripe = k; 1316 1317 if (num_errors > 1) { 1318 failed = true; 1319 break; 1320 } 1321 } 1322 } else { 1323 num_errors++; 1324 error_stripe = k; 1325 1326 if (num_errors > 1) { 1327 failed = true; 1328 break; 1329 } 1330 } 1331 } 1332 1333 j = (j + 1) % ci->num_stripes; 1334 } 1335 1336 if (!failed) { 1337 if (num_errors == 0) { 1338 RtlCopyMemory(sector + ((unsigned int)stripe << Vcb->sector_shift), sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), Vcb->superblock.sector_size); 1339 1340 for (j = 0; j < ci->num_stripes - 2; j++) { 1341 if (j != stripe) 1342 do_xor(sector + ((unsigned int)stripe << Vcb->sector_shift), sector + ((unsigned int)j << Vcb->sector_shift), Vcb->superblock.sector_size); 1343 } 1344 1345 if (!ptr || check_sector_csum(Vcb, sector + ((unsigned int)stripe << Vcb->sector_shift), ptr)) { 1346 RtlCopyMemory(buf + (i << Vcb->sector_shift), sector + ((unsigned int)stripe << Vcb->sector_shift), Vcb->superblock.sector_size); 1347 1348 if (devices[physstripe] && devices[physstripe]->devobj) 1349 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), 1350 devices[physstripe]->devitem.dev_id); 1351 1352 recovered = true; 1353 1354 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1355 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1356 sector + ((unsigned int)stripe << Vcb->sector_shift), Vcb->superblock.sector_size); 1357 if (!NT_SUCCESS(Status)) { 1358 WARN("write_data_phys returned %08lx\n", Status); 1359 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1360 } 1361 } 1362 } 1363 } 1364 1365 if (!recovered) { 1366 bool read_q = false; 1367 1368 if (devices[parity2] && devices[parity2]->devobj) { 1369 Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off, 1370 Vcb->superblock.sector_size, sector + ((unsigned int)(ci->num_stripes - 1) << Vcb->sector_shift), false); 1371 if (!NT_SUCCESS(Status)) { 1372 ERR("sync_read_phys returned %08lx\n", Status); 1373 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS); 1374 } else 1375 read_q = true; 1376 } 1377 1378 if (read_q) { 1379 if (num_errors == 1) { 1380 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift)); 1381 1382 if (!devices[physstripe] || !devices[physstripe]->devobj) 1383 recovered = true; 1384 else 1385 recovered = check_sector_csum(Vcb, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), ptr); 1386 } else { 1387 for (j = 0; j < ci->num_stripes - 1; j++) { 1388 if (j != stripe) { 1389 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift)); 1390 1391 if (check_sector_csum(Vcb, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), ptr)) { 1392 recovered = true; 1393 error_stripe = j; 1394 break; 1395 } 1396 } 1397 } 1398 } 1399 } 1400 1401 if (recovered) { 1402 uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1403 1404 if (devices[physstripe] && devices[physstripe]->devobj) 1405 ERR("recovering from checksum error at %I64x, device %I64x\n", 1406 addr + ((uint64_t)i << Vcb->sector_shift), devices[physstripe]->devitem.dev_id); 1407 1408 RtlCopyMemory(buf + (i << Vcb->sector_shift), sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), Vcb->superblock.sector_size); 1409 1410 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1411 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1412 sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), Vcb->superblock.sector_size); 1413 if (!NT_SUCCESS(Status)) { 1414 WARN("write_data_phys returned %08lx\n", Status); 1415 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1416 } 1417 } 1418 1419 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1420 if (error_stripe == ci->num_stripes - 2) { 1421 ERR("recovering from parity error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), 1422 devices[error_stripe_phys]->devitem.dev_id); 1423 1424 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1425 1426 RtlZeroMemory(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), Vcb->superblock.sector_size); 1427 1428 for (j = 0; j < ci->num_stripes - 2; j++) { 1429 if (j == stripe) { 1430 do_xor(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), 1431 Vcb->superblock.sector_size); 1432 } else { 1433 do_xor(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), sector + ((unsigned int)j << Vcb->sector_shift), 1434 Vcb->superblock.sector_size); 1435 } 1436 } 1437 } else { 1438 ERR("recovering from checksum error at %I64x, device %I64x\n", 1439 addr + ((uint64_t)i << Vcb->sector_shift) + ((error_stripe - stripe) * ci->stripe_length), 1440 devices[error_stripe_phys]->devitem.dev_id); 1441 1442 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1443 1444 RtlCopyMemory(sector + ((unsigned int)error_stripe << Vcb->sector_shift), 1445 sector + ((unsigned int)(ci->num_stripes + 1) << Vcb->sector_shift), Vcb->superblock.sector_size); 1446 } 1447 } 1448 1449 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1450 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off, 1451 sector + ((unsigned int)error_stripe << Vcb->sector_shift), Vcb->superblock.sector_size); 1452 if (!NT_SUCCESS(Status)) { 1453 WARN("write_data_phys returned %08lx\n", Status); 1454 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1455 } 1456 } 1457 } 1458 } 1459 } 1460 1461 if (!recovered) { 1462 ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift)); 1463 ExFreePool(sector); 1464 return STATUS_CRC_ERROR; 1465 } 1466 } 1467 1468 if (ptr) 1469 ptr = (uint8_t*)ptr + Vcb->csum_size; 1470 } 1471 1472 ExFreePool(sector); 1473 } 1474 1475 return STATUS_SUCCESS; 1476 } 1477 1478 NTSTATUS read_data(_In_ device_extension* Vcb, _In_ uint64_t addr, _In_ uint32_t length, _In_reads_bytes_opt_(length*sizeof(uint32_t)/Vcb->superblock.sector_size) void* csum, 1479 _In_ bool is_tree, _Out_writes_bytes_(length) uint8_t* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ uint64_t generation, _In_ bool file_read, 1480 _In_ ULONG priority) { 1481 CHUNK_ITEM* ci; 1482 CHUNK_ITEM_STRIPE* cis; 1483 read_data_context context; 1484 uint64_t type, offset, total_reading = 0; 1485 NTSTATUS Status; 1486 device** devices = NULL; 1487 uint16_t i, startoffstripe, allowed_missing, missing_devices = 0; 1488 uint8_t* dummypage = NULL; 1489 PMDL dummy_mdl = NULL; 1490 bool need_to_wait; 1491 uint64_t lockaddr, locklen; 1492 1493 if (Vcb->log_to_phys_loaded) { 1494 if (!c) { 1495 c = get_chunk_from_address(Vcb, addr); 1496 1497 if (!c) { 1498 ERR("get_chunk_from_address failed\n"); 1499 return STATUS_INTERNAL_ERROR; 1500 } 1501 } 1502 1503 ci = c->chunk_item; 1504 offset = c->offset; 1505 devices = c->devices; 1506 1507 if (pc) 1508 *pc = c; 1509 } else { 1510 LIST_ENTRY* le = Vcb->sys_chunks.Flink; 1511 1512 ci = NULL; 1513 1514 c = NULL; 1515 while (le != &Vcb->sys_chunks) { 1516 sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry); 1517 1518 if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) { 1519 CHUNK_ITEM* chunk_item = sc->data; 1520 1521 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) { 1522 ci = chunk_item; 1523 offset = sc->key.offset; 1524 cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1]; 1525 1526 devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG); 1527 if (!devices) { 1528 ERR("out of memory\n"); 1529 return STATUS_INSUFFICIENT_RESOURCES; 1530 } 1531 1532 for (i = 0; i < ci->num_stripes; i++) { 1533 devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid); 1534 } 1535 1536 break; 1537 } 1538 } 1539 1540 le = le->Flink; 1541 } 1542 1543 if (!ci) { 1544 ERR("could not find chunk for %I64x in bootstrap\n", addr); 1545 return STATUS_INTERNAL_ERROR; 1546 } 1547 1548 if (pc) 1549 *pc = NULL; 1550 } 1551 1552 if (ci->type & BLOCK_FLAG_DUPLICATE) { 1553 type = BLOCK_FLAG_DUPLICATE; 1554 allowed_missing = ci->num_stripes - 1; 1555 } else if (ci->type & BLOCK_FLAG_RAID0) { 1556 type = BLOCK_FLAG_RAID0; 1557 allowed_missing = 0; 1558 } else if (ci->type & BLOCK_FLAG_RAID1) { 1559 type = BLOCK_FLAG_DUPLICATE; 1560 allowed_missing = 1; 1561 } else if (ci->type & BLOCK_FLAG_RAID10) { 1562 type = BLOCK_FLAG_RAID10; 1563 allowed_missing = 1; 1564 } else if (ci->type & BLOCK_FLAG_RAID5) { 1565 type = BLOCK_FLAG_RAID5; 1566 allowed_missing = 1; 1567 } else if (ci->type & BLOCK_FLAG_RAID6) { 1568 type = BLOCK_FLAG_RAID6; 1569 allowed_missing = 2; 1570 } else if (ci->type & BLOCK_FLAG_RAID1C3) { 1571 type = BLOCK_FLAG_DUPLICATE; 1572 allowed_missing = 2; 1573 } else if (ci->type & BLOCK_FLAG_RAID1C4) { 1574 type = BLOCK_FLAG_DUPLICATE; 1575 allowed_missing = 3; 1576 } else { // SINGLE 1577 type = BLOCK_FLAG_DUPLICATE; 1578 allowed_missing = 0; 1579 } 1580 1581 cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 1582 1583 RtlZeroMemory(&context, sizeof(read_data_context)); 1584 KeInitializeEvent(&context.Event, NotificationEvent, false); 1585 1586 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG); 1587 if (!context.stripes) { 1588 ERR("out of memory\n"); 1589 return STATUS_INSUFFICIENT_RESOURCES; 1590 } 1591 1592 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) { 1593 get_raid56_lock_range(c, addr, length, &lockaddr, &locklen); 1594 chunk_lock_range(Vcb, c, lockaddr, locklen); 1595 } 1596 1597 RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes); 1598 1599 context.buflen = length; 1600 context.num_stripes = ci->num_stripes; 1601 context.stripes_left = context.num_stripes; 1602 context.sector_size = Vcb->superblock.sector_size; 1603 context.csum = csum; 1604 context.tree = is_tree; 1605 context.type = type; 1606 1607 if (type == BLOCK_FLAG_RAID0) { 1608 uint64_t startoff, endoff; 1609 uint16_t endoffstripe, stripe; 1610 uint32_t *stripeoff, pos; 1611 PMDL master_mdl; 1612 PFN_NUMBER* pfns; 1613 1614 // FIXME - test this still works if page size isn't the same as sector size 1615 1616 // This relies on the fact that MDLs are followed in memory by the page file numbers, 1617 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0 1618 // data for you without doing a memcpy yourself. 1619 // MDLs are officially opaque, so this might very well break in future versions of Windows. 1620 1621 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe); 1622 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe); 1623 1624 if (file_read) { 1625 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL 1626 // with duplicated dummy PFNs, which confuse check_csum. Ah well. 1627 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested. 1628 1629 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1630 1631 if (!context.va) { 1632 ERR("out of memory\n"); 1633 Status = STATUS_INSUFFICIENT_RESOURCES; 1634 goto exit; 1635 } 1636 } else 1637 context.va = buf; 1638 1639 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 1640 if (!master_mdl) { 1641 ERR("out of memory\n"); 1642 Status = STATUS_INSUFFICIENT_RESOURCES; 1643 goto exit; 1644 } 1645 1646 Status = STATUS_SUCCESS; 1647 1648 _SEH2_TRY { 1649 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1650 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1651 Status = _SEH2_GetExceptionCode(); 1652 } _SEH2_END; 1653 1654 if (!NT_SUCCESS(Status)) { 1655 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 1656 IoFreeMdl(master_mdl); 1657 goto exit; 1658 } 1659 1660 pfns = (PFN_NUMBER*)(master_mdl + 1); 1661 1662 for (i = 0; i < ci->num_stripes; i++) { 1663 if (startoffstripe > i) 1664 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1665 else if (startoffstripe == i) 1666 context.stripes[i].stripestart = startoff; 1667 else 1668 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length); 1669 1670 if (endoffstripe > i) 1671 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1672 else if (endoffstripe == i) 1673 context.stripes[i].stripeend = endoff + 1; 1674 else 1675 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length); 1676 1677 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 1678 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL); 1679 1680 if (!context.stripes[i].mdl) { 1681 ERR("IoAllocateMdl failed\n"); 1682 MmUnlockPages(master_mdl); 1683 IoFreeMdl(master_mdl); 1684 Status = STATUS_INSUFFICIENT_RESOURCES; 1685 goto exit; 1686 } 1687 } 1688 } 1689 1690 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG); 1691 if (!stripeoff) { 1692 ERR("out of memory\n"); 1693 MmUnlockPages(master_mdl); 1694 IoFreeMdl(master_mdl); 1695 Status = STATUS_INSUFFICIENT_RESOURCES; 1696 goto exit; 1697 } 1698 1699 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes); 1700 1701 pos = 0; 1702 stripe = startoffstripe; 1703 while (pos < length) { 1704 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 1705 1706 if (pos == 0) { 1707 uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)); 1708 1709 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1710 1711 stripeoff[stripe] += readlen; 1712 pos += readlen; 1713 } else if (length - pos < ci->stripe_length) { 1714 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1715 1716 pos = length; 1717 } else { 1718 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1719 1720 stripeoff[stripe] += (uint32_t)ci->stripe_length; 1721 pos += (uint32_t)ci->stripe_length; 1722 } 1723 1724 stripe = (stripe + 1) % ci->num_stripes; 1725 } 1726 1727 MmUnlockPages(master_mdl); 1728 IoFreeMdl(master_mdl); 1729 1730 ExFreePool(stripeoff); 1731 } else if (type == BLOCK_FLAG_RAID10) { 1732 uint64_t startoff, endoff; 1733 uint16_t endoffstripe, j, stripe; 1734 ULONG orig_ls; 1735 PMDL master_mdl; 1736 PFN_NUMBER* pfns; 1737 uint32_t* stripeoff, pos; 1738 read_data_stripe** stripes; 1739 1740 if (c) 1741 orig_ls = c->last_stripe; 1742 else 1743 orig_ls = 0; 1744 1745 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe); 1746 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe); 1747 1748 if ((ci->num_stripes % ci->sub_stripes) != 0) { 1749 ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes); 1750 Status = STATUS_INTERNAL_ERROR; 1751 goto exit; 1752 } 1753 1754 if (file_read) { 1755 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1756 1757 if (!context.va) { 1758 ERR("out of memory\n"); 1759 Status = STATUS_INSUFFICIENT_RESOURCES; 1760 goto exit; 1761 } 1762 } else 1763 context.va = buf; 1764 1765 context.firstoff = (uint16_t)((startoff % ci->stripe_length) >> Vcb->sector_shift); 1766 context.startoffstripe = startoffstripe; 1767 context.sectors_per_stripe = (uint16_t)(ci->stripe_length >> Vcb->sector_shift); 1768 1769 startoffstripe *= ci->sub_stripes; 1770 endoffstripe *= ci->sub_stripes; 1771 1772 if (c) 1773 c->last_stripe = (orig_ls + 1) % ci->sub_stripes; 1774 1775 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 1776 if (!master_mdl) { 1777 ERR("out of memory\n"); 1778 Status = STATUS_INSUFFICIENT_RESOURCES; 1779 goto exit; 1780 } 1781 1782 Status = STATUS_SUCCESS; 1783 1784 _SEH2_TRY { 1785 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1786 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1787 Status = _SEH2_GetExceptionCode(); 1788 } _SEH2_END; 1789 1790 if (!NT_SUCCESS(Status)) { 1791 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 1792 IoFreeMdl(master_mdl); 1793 goto exit; 1794 } 1795 1796 pfns = (PFN_NUMBER*)(master_mdl + 1); 1797 1798 stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1799 if (!stripes) { 1800 ERR("out of memory\n"); 1801 MmUnlockPages(master_mdl); 1802 IoFreeMdl(master_mdl); 1803 Status = STATUS_INSUFFICIENT_RESOURCES; 1804 goto exit; 1805 } 1806 1807 RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes); 1808 1809 for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { 1810 uint64_t sstart, send; 1811 bool stripeset = false; 1812 1813 if (startoffstripe > i) 1814 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1815 else if (startoffstripe == i) 1816 sstart = startoff; 1817 else 1818 sstart = startoff - (startoff % ci->stripe_length); 1819 1820 if (endoffstripe > i) 1821 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1822 else if (endoffstripe == i) 1823 send = endoff + 1; 1824 else 1825 send = endoff - (endoff % ci->stripe_length); 1826 1827 for (j = 0; j < ci->sub_stripes; j++) { 1828 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) { 1829 context.stripes[i+j].stripestart = sstart; 1830 context.stripes[i+j].stripeend = send; 1831 stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1832 1833 if (sstart != send) { 1834 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL); 1835 1836 if (!context.stripes[i+j].mdl) { 1837 ERR("IoAllocateMdl failed\n"); 1838 MmUnlockPages(master_mdl); 1839 IoFreeMdl(master_mdl); 1840 Status = STATUS_INSUFFICIENT_RESOURCES; 1841 goto exit; 1842 } 1843 } 1844 1845 stripeset = true; 1846 } else 1847 context.stripes[i+j].status = ReadDataStatus_Skip; 1848 } 1849 1850 if (!stripeset) { 1851 for (j = 0; j < ci->sub_stripes; j++) { 1852 if (devices[i+j] && devices[i+j]->devobj) { 1853 context.stripes[i+j].stripestart = sstart; 1854 context.stripes[i+j].stripeend = send; 1855 context.stripes[i+j].status = ReadDataStatus_Pending; 1856 stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1857 1858 if (sstart != send) { 1859 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL); 1860 1861 if (!context.stripes[i+j].mdl) { 1862 ERR("IoAllocateMdl failed\n"); 1863 MmUnlockPages(master_mdl); 1864 IoFreeMdl(master_mdl); 1865 Status = STATUS_INSUFFICIENT_RESOURCES; 1866 goto exit; 1867 } 1868 } 1869 1870 stripeset = true; 1871 break; 1872 } 1873 } 1874 1875 if (!stripeset) { 1876 ERR("could not find stripe to read\n"); 1877 Status = STATUS_DEVICE_NOT_READY; 1878 goto exit; 1879 } 1880 } 1881 } 1882 1883 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1884 if (!stripeoff) { 1885 ERR("out of memory\n"); 1886 MmUnlockPages(master_mdl); 1887 IoFreeMdl(master_mdl); 1888 Status = STATUS_INSUFFICIENT_RESOURCES; 1889 goto exit; 1890 } 1891 1892 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes); 1893 1894 pos = 0; 1895 stripe = startoffstripe / ci->sub_stripes; 1896 while (pos < length) { 1897 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1); 1898 1899 if (pos == 0) { 1900 uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart, 1901 ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length)); 1902 1903 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1904 1905 stripeoff[stripe] += readlen; 1906 pos += readlen; 1907 } else if (length - pos < ci->stripe_length) { 1908 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1909 1910 pos = length; 1911 } else { 1912 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1913 1914 stripeoff[stripe] += (ULONG)ci->stripe_length; 1915 pos += (ULONG)ci->stripe_length; 1916 } 1917 1918 stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); 1919 } 1920 1921 MmUnlockPages(master_mdl); 1922 IoFreeMdl(master_mdl); 1923 1924 ExFreePool(stripeoff); 1925 ExFreePool(stripes); 1926 } else if (type == BLOCK_FLAG_DUPLICATE) { 1927 uint64_t orig_ls; 1928 1929 if (c) 1930 orig_ls = i = c->last_stripe; 1931 else 1932 orig_ls = i = 0; 1933 1934 while (!devices[i] || !devices[i]->devobj) { 1935 i = (i + 1) % ci->num_stripes; 1936 1937 if (i == orig_ls) { 1938 ERR("no devices available to service request\n"); 1939 Status = STATUS_DEVICE_NOT_READY; 1940 goto exit; 1941 } 1942 } 1943 1944 if (c) 1945 c->last_stripe = (i + 1) % ci->num_stripes; 1946 1947 context.stripes[i].stripestart = addr - offset; 1948 context.stripes[i].stripeend = context.stripes[i].stripestart + length; 1949 1950 if (file_read) { 1951 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1952 1953 if (!context.va) { 1954 ERR("out of memory\n"); 1955 Status = STATUS_INSUFFICIENT_RESOURCES; 1956 goto exit; 1957 } 1958 1959 context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL); 1960 if (!context.stripes[i].mdl) { 1961 ERR("IoAllocateMdl failed\n"); 1962 Status = STATUS_INSUFFICIENT_RESOURCES; 1963 goto exit; 1964 } 1965 1966 MmBuildMdlForNonPagedPool(context.stripes[i].mdl); 1967 } else { 1968 context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL); 1969 1970 if (!context.stripes[i].mdl) { 1971 ERR("IoAllocateMdl failed\n"); 1972 Status = STATUS_INSUFFICIENT_RESOURCES; 1973 goto exit; 1974 } 1975 1976 Status = STATUS_SUCCESS; 1977 1978 _SEH2_TRY { 1979 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess); 1980 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1981 Status = _SEH2_GetExceptionCode(); 1982 } _SEH2_END; 1983 1984 if (!NT_SUCCESS(Status)) { 1985 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 1986 goto exit; 1987 } 1988 } 1989 } else if (type == BLOCK_FLAG_RAID5) { 1990 uint64_t startoff, endoff; 1991 uint16_t endoffstripe, parity; 1992 uint32_t *stripeoff, pos; 1993 PMDL master_mdl; 1994 PFN_NUMBER *pfns, dummy = 0; 1995 bool need_dummy = false; 1996 1997 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe); 1998 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe); 1999 2000 if (file_read) { 2001 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 2002 2003 if (!context.va) { 2004 ERR("out of memory\n"); 2005 Status = STATUS_INSUFFICIENT_RESOURCES; 2006 goto exit; 2007 } 2008 } else 2009 context.va = buf; 2010 2011 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 2012 if (!master_mdl) { 2013 ERR("out of memory\n"); 2014 Status = STATUS_INSUFFICIENT_RESOURCES; 2015 goto exit; 2016 } 2017 2018 Status = STATUS_SUCCESS; 2019 2020 _SEH2_TRY { 2021 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 2022 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2023 Status = _SEH2_GetExceptionCode(); 2024 } _SEH2_END; 2025 2026 if (!NT_SUCCESS(Status)) { 2027 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 2028 IoFreeMdl(master_mdl); 2029 goto exit; 2030 } 2031 2032 pfns = (PFN_NUMBER*)(master_mdl + 1); 2033 2034 pos = 0; 2035 while (pos < length) { 2036 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 2037 2038 if (pos == 0) { 2039 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes; 2040 ULONG skip, readlen; 2041 2042 i = startoffstripe; 2043 while (stripe != parity) { 2044 if (i == startoffstripe) { 2045 readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length))); 2046 2047 context.stripes[stripe].stripestart = startoff; 2048 context.stripes[stripe].stripeend = startoff + readlen; 2049 2050 pos += readlen; 2051 2052 if (pos == length) 2053 break; 2054 } else { 2055 readlen = min(length - pos, (ULONG)ci->stripe_length); 2056 2057 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2058 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2059 2060 pos += readlen; 2061 2062 if (pos == length) 2063 break; 2064 } 2065 2066 i++; 2067 stripe = (stripe + 1) % ci->num_stripes; 2068 } 2069 2070 if (pos == length) 2071 break; 2072 2073 for (i = 0; i < startoffstripe; i++) { 2074 uint16_t stripe2 = (parity + i + 1) % ci->num_stripes; 2075 2076 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2077 } 2078 2079 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2080 2081 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) { 2082 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1); 2083 2084 for (i = 0; i < ci->num_stripes; i++) { 2085 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2086 } 2087 2088 pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length); 2089 need_dummy = true; 2090 } 2091 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2092 for (i = 0; i < ci->num_stripes; i++) { 2093 context.stripes[i].stripeend += ci->stripe_length; 2094 } 2095 2096 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1)); 2097 need_dummy = true; 2098 } else { 2099 uint16_t stripe = (parity + 1) % ci->num_stripes; 2100 2101 i = 0; 2102 while (stripe != parity) { 2103 if (endoffstripe == i) { 2104 context.stripes[stripe].stripeend = endoff + 1; 2105 break; 2106 } else if (endoffstripe > i) 2107 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2108 2109 i++; 2110 stripe = (stripe + 1) % ci->num_stripes; 2111 } 2112 2113 break; 2114 } 2115 } 2116 2117 for (i = 0; i < ci->num_stripes; i++) { 2118 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2119 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), 2120 false, false, NULL); 2121 2122 if (!context.stripes[i].mdl) { 2123 ERR("IoAllocateMdl failed\n"); 2124 MmUnlockPages(master_mdl); 2125 IoFreeMdl(master_mdl); 2126 Status = STATUS_INSUFFICIENT_RESOURCES; 2127 goto exit; 2128 } 2129 } 2130 } 2131 2132 if (need_dummy) { 2133 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2134 if (!dummypage) { 2135 ERR("out of memory\n"); 2136 MmUnlockPages(master_mdl); 2137 IoFreeMdl(master_mdl); 2138 Status = STATUS_INSUFFICIENT_RESOURCES; 2139 goto exit; 2140 } 2141 2142 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL); 2143 if (!dummy_mdl) { 2144 ERR("IoAllocateMdl failed\n"); 2145 MmUnlockPages(master_mdl); 2146 IoFreeMdl(master_mdl); 2147 Status = STATUS_INSUFFICIENT_RESOURCES; 2148 goto exit; 2149 } 2150 2151 MmBuildMdlForNonPagedPool(dummy_mdl); 2152 2153 dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2154 } 2155 2156 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG); 2157 if (!stripeoff) { 2158 ERR("out of memory\n"); 2159 MmUnlockPages(master_mdl); 2160 IoFreeMdl(master_mdl); 2161 Status = STATUS_INSUFFICIENT_RESOURCES; 2162 goto exit; 2163 } 2164 2165 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes); 2166 2167 pos = 0; 2168 2169 while (pos < length) { 2170 PFN_NUMBER* stripe_pfns; 2171 2172 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 2173 2174 if (pos == 0) { 2175 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes; 2176 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2177 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2178 2179 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2180 2181 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2182 2183 stripeoff[stripe] = readlen; 2184 pos += readlen; 2185 2186 stripe = (stripe + 1) % ci->num_stripes; 2187 2188 while (stripe != parity) { 2189 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2190 readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2191 2192 if (readlen == 0) 2193 break; 2194 2195 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2196 2197 stripeoff[stripe] = readlen; 2198 pos += readlen; 2199 2200 stripe = (stripe + 1) % ci->num_stripes; 2201 } 2202 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2203 uint16_t stripe = (parity + 1) % ci->num_stripes; 2204 ULONG k; 2205 2206 while (stripe != parity) { 2207 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2208 2209 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2210 2211 stripeoff[stripe] += (uint32_t)ci->stripe_length; 2212 pos += (uint32_t)ci->stripe_length; 2213 2214 stripe = (stripe + 1) % ci->num_stripes; 2215 } 2216 2217 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1); 2218 2219 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2220 stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy; 2221 stripeoff[parity] += PAGE_SIZE; 2222 } 2223 } else { 2224 uint16_t stripe = (parity + 1) % ci->num_stripes; 2225 uint32_t readlen; 2226 2227 while (pos < length) { 2228 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2229 readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2230 2231 if (readlen == 0) 2232 break; 2233 2234 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2235 2236 stripeoff[stripe] += readlen; 2237 pos += readlen; 2238 2239 stripe = (stripe + 1) % ci->num_stripes; 2240 } 2241 } 2242 } 2243 2244 MmUnlockPages(master_mdl); 2245 IoFreeMdl(master_mdl); 2246 2247 ExFreePool(stripeoff); 2248 } else if (type == BLOCK_FLAG_RAID6) { 2249 uint64_t startoff, endoff; 2250 uint16_t endoffstripe, parity1; 2251 uint32_t *stripeoff, pos; 2252 PMDL master_mdl; 2253 PFN_NUMBER *pfns, dummy = 0; 2254 bool need_dummy = false; 2255 2256 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe); 2257 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe); 2258 2259 if (file_read) { 2260 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 2261 2262 if (!context.va) { 2263 ERR("out of memory\n"); 2264 Status = STATUS_INSUFFICIENT_RESOURCES; 2265 goto exit; 2266 } 2267 } else 2268 context.va = buf; 2269 2270 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 2271 if (!master_mdl) { 2272 ERR("out of memory\n"); 2273 Status = STATUS_INSUFFICIENT_RESOURCES; 2274 goto exit; 2275 } 2276 2277 Status = STATUS_SUCCESS; 2278 2279 _SEH2_TRY { 2280 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 2281 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2282 Status = _SEH2_GetExceptionCode(); 2283 } _SEH2_END; 2284 2285 if (!NT_SUCCESS(Status)) { 2286 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 2287 IoFreeMdl(master_mdl); 2288 goto exit; 2289 } 2290 2291 pfns = (PFN_NUMBER*)(master_mdl + 1); 2292 2293 pos = 0; 2294 while (pos < length) { 2295 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2296 2297 if (pos == 0) { 2298 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2; 2299 ULONG skip, readlen; 2300 2301 i = startoffstripe; 2302 while (stripe != parity1) { 2303 if (i == startoffstripe) { 2304 readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length)); 2305 2306 context.stripes[stripe].stripestart = startoff; 2307 context.stripes[stripe].stripeend = startoff + readlen; 2308 2309 pos += readlen; 2310 2311 if (pos == length) 2312 break; 2313 } else { 2314 readlen = min(length - pos, (ULONG)ci->stripe_length); 2315 2316 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2317 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2318 2319 pos += readlen; 2320 2321 if (pos == length) 2322 break; 2323 } 2324 2325 i++; 2326 stripe = (stripe + 1) % ci->num_stripes; 2327 } 2328 2329 if (pos == length) 2330 break; 2331 2332 for (i = 0; i < startoffstripe; i++) { 2333 uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes; 2334 2335 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2336 } 2337 2338 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2339 2340 parity2 = (parity1 + 1) % ci->num_stripes; 2341 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2342 2343 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) { 2344 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1); 2345 2346 for (i = 0; i < ci->num_stripes; i++) { 2347 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2348 } 2349 2350 pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length); 2351 need_dummy = true; 2352 } 2353 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2354 for (i = 0; i < ci->num_stripes; i++) { 2355 context.stripes[i].stripeend += ci->stripe_length; 2356 } 2357 2358 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2)); 2359 need_dummy = true; 2360 } else { 2361 uint16_t stripe = (parity1 + 2) % ci->num_stripes; 2362 2363 i = 0; 2364 while (stripe != parity1) { 2365 if (endoffstripe == i) { 2366 context.stripes[stripe].stripeend = endoff + 1; 2367 break; 2368 } else if (endoffstripe > i) 2369 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2370 2371 i++; 2372 stripe = (stripe + 1) % ci->num_stripes; 2373 } 2374 2375 break; 2376 } 2377 } 2378 2379 for (i = 0; i < ci->num_stripes; i++) { 2380 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2381 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL); 2382 2383 if (!context.stripes[i].mdl) { 2384 ERR("IoAllocateMdl failed\n"); 2385 MmUnlockPages(master_mdl); 2386 IoFreeMdl(master_mdl); 2387 Status = STATUS_INSUFFICIENT_RESOURCES; 2388 goto exit; 2389 } 2390 } 2391 } 2392 2393 if (need_dummy) { 2394 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2395 if (!dummypage) { 2396 ERR("out of memory\n"); 2397 MmUnlockPages(master_mdl); 2398 IoFreeMdl(master_mdl); 2399 Status = STATUS_INSUFFICIENT_RESOURCES; 2400 goto exit; 2401 } 2402 2403 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL); 2404 if (!dummy_mdl) { 2405 ERR("IoAllocateMdl failed\n"); 2406 MmUnlockPages(master_mdl); 2407 IoFreeMdl(master_mdl); 2408 Status = STATUS_INSUFFICIENT_RESOURCES; 2409 goto exit; 2410 } 2411 2412 MmBuildMdlForNonPagedPool(dummy_mdl); 2413 2414 dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2415 } 2416 2417 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG); 2418 if (!stripeoff) { 2419 ERR("out of memory\n"); 2420 MmUnlockPages(master_mdl); 2421 IoFreeMdl(master_mdl); 2422 Status = STATUS_INSUFFICIENT_RESOURCES; 2423 goto exit; 2424 } 2425 2426 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes); 2427 2428 pos = 0; 2429 2430 while (pos < length) { 2431 PFN_NUMBER* stripe_pfns; 2432 2433 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2434 2435 if (pos == 0) { 2436 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes; 2437 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2438 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2439 2440 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2441 2442 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2443 2444 stripeoff[stripe] = readlen; 2445 pos += readlen; 2446 2447 stripe = (stripe + 1) % ci->num_stripes; 2448 2449 while (stripe != parity1) { 2450 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2451 readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2452 2453 if (readlen == 0) 2454 break; 2455 2456 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2457 2458 stripeoff[stripe] = readlen; 2459 pos += readlen; 2460 2461 stripe = (stripe + 1) % ci->num_stripes; 2462 } 2463 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2464 uint16_t stripe = (parity1 + 2) % ci->num_stripes; 2465 uint16_t parity2 = (parity1 + 1) % ci->num_stripes; 2466 ULONG k; 2467 2468 while (stripe != parity1) { 2469 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2470 2471 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2472 2473 stripeoff[stripe] += (uint32_t)ci->stripe_length; 2474 pos += (uint32_t)ci->stripe_length; 2475 2476 stripe = (stripe + 1) % ci->num_stripes; 2477 } 2478 2479 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1); 2480 2481 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2482 stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy; 2483 stripeoff[parity1] += PAGE_SIZE; 2484 } 2485 2486 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1); 2487 2488 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2489 stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy; 2490 stripeoff[parity2] += PAGE_SIZE; 2491 } 2492 } else { 2493 uint16_t stripe = (parity1 + 2) % ci->num_stripes; 2494 uint32_t readlen; 2495 2496 while (pos < length) { 2497 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2498 readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2499 2500 if (readlen == 0) 2501 break; 2502 2503 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2504 2505 stripeoff[stripe] += readlen; 2506 pos += readlen; 2507 2508 stripe = (stripe + 1) % ci->num_stripes; 2509 } 2510 } 2511 } 2512 2513 MmUnlockPages(master_mdl); 2514 IoFreeMdl(master_mdl); 2515 2516 ExFreePool(stripeoff); 2517 } 2518 2519 context.address = addr; 2520 2521 for (i = 0; i < ci->num_stripes; i++) { 2522 if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) { 2523 context.stripes[i].status = ReadDataStatus_MissingDevice; 2524 context.stripes_left--; 2525 2526 if (!devices[i] || !devices[i]->devobj) 2527 missing_devices++; 2528 } 2529 } 2530 2531 if (missing_devices > allowed_missing) { 2532 ERR("not enough devices to service request (%u missing)\n", missing_devices); 2533 Status = STATUS_UNEXPECTED_IO_ERROR; 2534 goto exit; 2535 } 2536 2537 for (i = 0; i < ci->num_stripes; i++) { 2538 PIO_STACK_LOCATION IrpSp; 2539 2540 if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) { 2541 context.stripes[i].context = (struct read_data_context*)&context; 2542 2543 if (type == BLOCK_FLAG_RAID10) { 2544 context.stripes[i].stripenum = i / ci->sub_stripes; 2545 } 2546 2547 if (!Irp) { 2548 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false); 2549 2550 if (!context.stripes[i].Irp) { 2551 ERR("IoAllocateIrp failed\n"); 2552 Status = STATUS_INSUFFICIENT_RESOURCES; 2553 goto exit; 2554 } 2555 } else { 2556 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); 2557 2558 if (!context.stripes[i].Irp) { 2559 ERR("IoMakeAssociatedIrp failed\n"); 2560 Status = STATUS_INSUFFICIENT_RESOURCES; 2561 goto exit; 2562 } 2563 } 2564 2565 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 2566 IrpSp->MajorFunction = IRP_MJ_READ; 2567 IrpSp->MinorFunction = IRP_MN_NORMAL; 2568 IrpSp->FileObject = devices[i]->fileobj; 2569 2570 if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { 2571 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG); 2572 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 2573 ERR("out of memory\n"); 2574 Status = STATUS_INSUFFICIENT_RESOURCES; 2575 goto exit; 2576 } 2577 2578 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 2579 2580 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2581 } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) 2582 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl; 2583 else 2584 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2585 2586 IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart); 2587 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset; 2588 2589 total_reading += IrpSp->Parameters.Read.Length; 2590 2591 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 2592 2593 IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true); 2594 2595 context.stripes[i].status = ReadDataStatus_Pending; 2596 } 2597 } 2598 2599 need_to_wait = false; 2600 for (i = 0; i < ci->num_stripes; i++) { 2601 if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) { 2602 IoCallDriver(devices[i]->devobj, context.stripes[i].Irp); 2603 need_to_wait = true; 2604 } 2605 } 2606 2607 if (need_to_wait) 2608 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 2609 2610 if (diskacc) 2611 fFsRtlUpdateDiskCounters(total_reading, 0); 2612 2613 // check if any of the devices return a "user-induced" error 2614 2615 for (i = 0; i < ci->num_stripes; i++) { 2616 if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) { 2617 Status = context.stripes[i].iosb.Status; 2618 goto exit; 2619 } 2620 } 2621 2622 if (type == BLOCK_FLAG_RAID0) { 2623 Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2624 if (!NT_SUCCESS(Status)) { 2625 ERR("read_data_raid0 returned %08lx\n", Status); 2626 2627 if (file_read) 2628 ExFreePool(context.va); 2629 2630 goto exit; 2631 } 2632 2633 if (file_read) { 2634 RtlCopyMemory(buf, context.va, length); 2635 ExFreePool(context.va); 2636 } 2637 } else if (type == BLOCK_FLAG_RAID10) { 2638 Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2639 2640 if (!NT_SUCCESS(Status)) { 2641 ERR("read_data_raid10 returned %08lx\n", Status); 2642 2643 if (file_read) 2644 ExFreePool(context.va); 2645 2646 goto exit; 2647 } 2648 2649 if (file_read) { 2650 RtlCopyMemory(buf, context.va, length); 2651 ExFreePool(context.va); 2652 } 2653 } else if (type == BLOCK_FLAG_DUPLICATE) { 2654 Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation); 2655 if (!NT_SUCCESS(Status)) { 2656 ERR("read_data_dup returned %08lx\n", Status); 2657 2658 if (file_read) 2659 ExFreePool(context.va); 2660 2661 goto exit; 2662 } 2663 2664 if (file_read) { 2665 RtlCopyMemory(buf, context.va, length); 2666 ExFreePool(context.va); 2667 } 2668 } else if (type == BLOCK_FLAG_RAID5) { 2669 Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false); 2670 if (!NT_SUCCESS(Status)) { 2671 ERR("read_data_raid5 returned %08lx\n", Status); 2672 2673 if (file_read) 2674 ExFreePool(context.va); 2675 2676 goto exit; 2677 } 2678 2679 if (file_read) { 2680 RtlCopyMemory(buf, context.va, length); 2681 ExFreePool(context.va); 2682 } 2683 } else if (type == BLOCK_FLAG_RAID6) { 2684 Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false); 2685 if (!NT_SUCCESS(Status)) { 2686 ERR("read_data_raid6 returned %08lx\n", Status); 2687 2688 if (file_read) 2689 ExFreePool(context.va); 2690 2691 goto exit; 2692 } 2693 2694 if (file_read) { 2695 RtlCopyMemory(buf, context.va, length); 2696 ExFreePool(context.va); 2697 } 2698 } 2699 2700 exit: 2701 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) 2702 chunk_unlock_range(Vcb, c, lockaddr, locklen); 2703 2704 if (dummy_mdl) 2705 IoFreeMdl(dummy_mdl); 2706 2707 if (dummypage) 2708 ExFreePool(dummypage); 2709 2710 for (i = 0; i < ci->num_stripes; i++) { 2711 if (context.stripes[i].mdl) { 2712 if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED) 2713 MmUnlockPages(context.stripes[i].mdl); 2714 2715 IoFreeMdl(context.stripes[i].mdl); 2716 } 2717 2718 if (context.stripes[i].Irp) 2719 IoFreeIrp(context.stripes[i].Irp); 2720 } 2721 2722 ExFreePool(context.stripes); 2723 2724 if (!Vcb->log_to_phys_loaded) 2725 ExFreePool(devices); 2726 2727 return Status; 2728 } 2729 2730 __attribute__((nonnull(1, 2))) 2731 NTSTATUS read_stream(fcb* fcb, uint8_t* data, uint64_t start, ULONG length, ULONG* pbr) { 2732 ULONG readlen; 2733 2734 TRACE("(%p, %p, %I64x, %lx, %p)\n", fcb, data, start, length, pbr); 2735 2736 if (pbr) *pbr = 0; 2737 2738 if (start >= fcb->adsdata.Length) { 2739 TRACE("tried to read beyond end of stream\n"); 2740 return STATUS_END_OF_FILE; 2741 } 2742 2743 if (length == 0) { 2744 WARN("tried to read zero bytes\n"); 2745 return STATUS_SUCCESS; 2746 } 2747 2748 if (start + length < fcb->adsdata.Length) 2749 readlen = length; 2750 else 2751 readlen = fcb->adsdata.Length - (ULONG)start; 2752 2753 if (readlen > 0) 2754 RtlCopyMemory(data, fcb->adsdata.Buffer + start, readlen); 2755 2756 if (pbr) *pbr = readlen; 2757 2758 return STATUS_SUCCESS; 2759 } 2760 2761 typedef struct { 2762 uint64_t off; 2763 uint64_t ed_size; 2764 uint64_t ed_offset; 2765 uint64_t ed_num_bytes; 2766 } read_part_extent; 2767 2768 typedef struct { 2769 LIST_ENTRY list_entry; 2770 uint64_t addr; 2771 chunk* c; 2772 uint32_t read; 2773 uint32_t to_read; 2774 void* csum; 2775 bool csum_free; 2776 uint8_t* buf; 2777 bool buf_free; 2778 uint32_t bumpoff; 2779 bool mdl; 2780 void* data; 2781 uint8_t compression; 2782 unsigned int num_extents; 2783 read_part_extent extents[1]; 2784 } read_part; 2785 2786 typedef struct { 2787 LIST_ENTRY list_entry; 2788 calc_job* cj; 2789 void* decomp; 2790 void* data; 2791 unsigned int offset; 2792 size_t length; 2793 } comp_calc_job; 2794 2795 __attribute__((nonnull(1, 2))) 2796 NTSTATUS read_file(fcb* fcb, uint8_t* data, uint64_t start, uint64_t length, ULONG* pbr, PIRP Irp) { 2797 NTSTATUS Status; 2798 uint32_t bytes_read = 0; 2799 uint64_t last_end; 2800 LIST_ENTRY* le; 2801 POOL_TYPE pool_type; 2802 LIST_ENTRY read_parts, calc_jobs; 2803 2804 TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr); 2805 2806 if (pbr) 2807 *pbr = 0; 2808 2809 if (start >= fcb->inode_item.st_size) { 2810 WARN("Tried to read beyond end of file\n"); 2811 return STATUS_END_OF_FILE; 2812 } 2813 2814 InitializeListHead(&read_parts); 2815 InitializeListHead(&calc_jobs); 2816 2817 pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool; 2818 2819 le = fcb->extents.Flink; 2820 2821 last_end = start; 2822 2823 while (le != &fcb->extents) { 2824 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 2825 2826 if (!ext->ignore) { 2827 EXTENT_DATA* ed = &ext->extent_data; 2828 uint64_t len; 2829 2830 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) 2831 len = ((EXTENT_DATA2*)ed->data)->num_bytes; 2832 else 2833 len = ed->decoded_size; 2834 2835 if (ext->offset + len <= start) { 2836 last_end = ext->offset + len; 2837 goto nextitem; 2838 } 2839 2840 if (ext->offset > last_end && ext->offset > start + bytes_read) { 2841 uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end)); 2842 2843 RtlZeroMemory(data + bytes_read, read); 2844 bytes_read += read; 2845 length -= read; 2846 } 2847 2848 if (length == 0 || ext->offset > start + bytes_read + length) 2849 break; 2850 2851 if (ed->encryption != BTRFS_ENCRYPTION_NONE) { 2852 WARN("Encryption not supported\n"); 2853 Status = STATUS_NOT_IMPLEMENTED; 2854 goto exit; 2855 } 2856 2857 if (ed->encoding != BTRFS_ENCODING_NONE) { 2858 WARN("Other encodings not supported\n"); 2859 Status = STATUS_NOT_IMPLEMENTED; 2860 goto exit; 2861 } 2862 2863 switch (ed->type) { 2864 case EXTENT_TYPE_INLINE: 2865 { 2866 uint64_t off = start + bytes_read - ext->offset; 2867 uint32_t read; 2868 2869 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2870 read = (uint32_t)min(min(len, ext->datalen) - off, length); 2871 2872 RtlCopyMemory(data + bytes_read, &ed->data[off], read); 2873 } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) { 2874 uint8_t* decomp; 2875 bool decomp_alloc; 2876 uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]); 2877 2878 if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) { 2879 ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size); 2880 Status = STATUS_INTERNAL_ERROR; 2881 goto exit; 2882 } 2883 2884 read = (uint32_t)min(ed->decoded_size - off, length); 2885 2886 if (off > 0) { 2887 decomp = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)ed->decoded_size, ALLOC_TAG); 2888 if (!decomp) { 2889 ERR("out of memory\n"); 2890 Status = STATUS_INSUFFICIENT_RESOURCES; 2891 goto exit; 2892 } 2893 2894 decomp_alloc = true; 2895 } else { 2896 decomp = data + bytes_read; 2897 decomp_alloc = false; 2898 } 2899 2900 if (ed->compression == BTRFS_COMPRESSION_ZLIB) { 2901 Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off)); 2902 if (!NT_SUCCESS(Status)) { 2903 ERR("zlib_decompress returned %08lx\n", Status); 2904 if (decomp_alloc) ExFreePool(decomp); 2905 goto exit; 2906 } 2907 } else if (ed->compression == BTRFS_COMPRESSION_LZO) { 2908 if (inlen < sizeof(uint32_t)) { 2909 ERR("extent data was truncated\n"); 2910 Status = STATUS_INTERNAL_ERROR; 2911 if (decomp_alloc) ExFreePool(decomp); 2912 goto exit; 2913 } else 2914 inlen -= sizeof(uint32_t); 2915 2916 Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t)); 2917 if (!NT_SUCCESS(Status)) { 2918 ERR("lzo_decompress returned %08lx\n", Status); 2919 if (decomp_alloc) ExFreePool(decomp); 2920 goto exit; 2921 } 2922 } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) { 2923 Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off)); 2924 if (!NT_SUCCESS(Status)) { 2925 ERR("zstd_decompress returned %08lx\n", Status); 2926 if (decomp_alloc) ExFreePool(decomp); 2927 goto exit; 2928 } 2929 } 2930 2931 if (decomp_alloc) { 2932 RtlCopyMemory(data + bytes_read, decomp + off, read); 2933 ExFreePool(decomp); 2934 } 2935 } else { 2936 ERR("unhandled compression type %x\n", ed->compression); 2937 Status = STATUS_NOT_IMPLEMENTED; 2938 goto exit; 2939 } 2940 2941 bytes_read += read; 2942 length -= read; 2943 2944 break; 2945 } 2946 2947 case EXTENT_TYPE_REGULAR: 2948 { 2949 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data; 2950 read_part* rp; 2951 2952 rp = ExAllocatePoolWithTag(pool_type, sizeof(read_part), ALLOC_TAG); 2953 if (!rp) { 2954 ERR("out of memory\n"); 2955 Status = STATUS_INSUFFICIENT_RESOURCES; 2956 goto exit; 2957 } 2958 2959 rp->mdl = (Irp && Irp->MdlAddress) ? true : false; 2960 rp->extents[0].off = start + bytes_read - ext->offset; 2961 rp->bumpoff = 0; 2962 rp->num_extents = 1; 2963 rp->csum_free = false; 2964 2965 rp->read = (uint32_t)(len - rp->extents[0].off); 2966 if (rp->read > length) rp->read = (uint32_t)length; 2967 2968 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2969 rp->addr = ed2->address + ed2->offset + rp->extents[0].off; 2970 rp->to_read = (uint32_t)sector_align(rp->read, fcb->Vcb->superblock.sector_size); 2971 2972 if (rp->addr & (fcb->Vcb->superblock.sector_size - 1)) { 2973 rp->bumpoff = rp->addr & (fcb->Vcb->superblock.sector_size - 1); 2974 rp->addr -= rp->bumpoff; 2975 rp->to_read = (uint32_t)sector_align(rp->read + rp->bumpoff, fcb->Vcb->superblock.sector_size); 2976 } 2977 } else { 2978 rp->addr = ed2->address; 2979 rp->to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size); 2980 } 2981 2982 if (ed->compression == BTRFS_COMPRESSION_NONE && (start & (fcb->Vcb->superblock.sector_size - 1)) == 0 && 2983 (length & (fcb->Vcb->superblock.sector_size - 1)) == 0) { 2984 rp->buf = data + bytes_read; 2985 rp->buf_free = false; 2986 } else { 2987 rp->buf = ExAllocatePoolWithTag(pool_type, rp->to_read, ALLOC_TAG); 2988 rp->buf_free = true; 2989 2990 if (!rp->buf) { 2991 ERR("out of memory\n"); 2992 Status = STATUS_INSUFFICIENT_RESOURCES; 2993 ExFreePool(rp); 2994 goto exit; 2995 } 2996 2997 rp->mdl = false; 2998 } 2999 3000 rp->c = get_chunk_from_address(fcb->Vcb, rp->addr); 3001 3002 if (!rp->c) { 3003 ERR("get_chunk_from_address(%I64x) failed\n", rp->addr); 3004 3005 if (rp->buf_free) 3006 ExFreePool(rp->buf); 3007 3008 ExFreePool(rp); 3009 3010 Status = STATUS_INTERNAL_ERROR; 3011 goto exit; 3012 } 3013 3014 if (ext->csum) { 3015 if (ed->compression == BTRFS_COMPRESSION_NONE) { 3016 rp->csum = (uint8_t*)ext->csum + (fcb->Vcb->csum_size * (rp->extents[0].off >> fcb->Vcb->sector_shift)); 3017 } else 3018 rp->csum = ext->csum; 3019 } else 3020 rp->csum = NULL; 3021 3022 rp->data = data + bytes_read; 3023 rp->compression = ed->compression; 3024 rp->extents[0].ed_offset = ed2->offset; 3025 rp->extents[0].ed_size = ed2->size; 3026 rp->extents[0].ed_num_bytes = ed2->num_bytes; 3027 3028 InsertTailList(&read_parts, &rp->list_entry); 3029 3030 bytes_read += rp->read; 3031 length -= rp->read; 3032 3033 break; 3034 } 3035 3036 case EXTENT_TYPE_PREALLOC: 3037 { 3038 uint64_t off = start + bytes_read - ext->offset; 3039 uint32_t read = (uint32_t)(len - off); 3040 3041 if (read > length) read = (uint32_t)length; 3042 3043 RtlZeroMemory(data + bytes_read, read); 3044 3045 bytes_read += read; 3046 length -= read; 3047 3048 break; 3049 } 3050 3051 default: 3052 WARN("Unsupported extent data type %u\n", ed->type); 3053 Status = STATUS_NOT_IMPLEMENTED; 3054 goto exit; 3055 } 3056 3057 last_end = ext->offset + len; 3058 3059 if (length == 0) 3060 break; 3061 } 3062 3063 nextitem: 3064 le = le->Flink; 3065 } 3066 3067 if (!IsListEmpty(&read_parts) && read_parts.Flink->Flink != &read_parts) { // at least two entries in list 3068 read_part* last_rp = CONTAINING_RECORD(read_parts.Flink, read_part, list_entry); 3069 3070 le = read_parts.Flink->Flink; 3071 while (le != &read_parts) { 3072 LIST_ENTRY* le2 = le->Flink; 3073 read_part* rp = CONTAINING_RECORD(le, read_part, list_entry); 3074 3075 // merge together runs 3076 if (rp->compression != BTRFS_COMPRESSION_NONE && rp->compression == last_rp->compression && rp->addr == last_rp->addr + last_rp->to_read && 3077 rp->data == (uint8_t*)last_rp->data + last_rp->read && rp->c == last_rp->c && ((rp->csum && last_rp->csum) || (!rp->csum && !last_rp->csum))) { 3078 read_part* rp2; 3079 3080 rp2 = ExAllocatePoolWithTag(pool_type, offsetof(read_part, extents) + (sizeof(read_part_extent) * (last_rp->num_extents + 1)), ALLOC_TAG); 3081 3082 rp2->addr = last_rp->addr; 3083 rp2->c = last_rp->c; 3084 rp2->read = last_rp->read + rp->read; 3085 rp2->to_read = last_rp->to_read + rp->to_read; 3086 rp2->csum_free = false; 3087 3088 if (last_rp->csum) { 3089 uint32_t sectors = (last_rp->to_read + rp->to_read) >> fcb->Vcb->sector_shift; 3090 3091 rp2->csum = ExAllocatePoolWithTag(pool_type, sectors * fcb->Vcb->csum_size, ALLOC_TAG); 3092 if (!rp2->csum) { 3093 ERR("out of memory\n"); 3094 ExFreePool(rp2); 3095 Status = STATUS_INSUFFICIENT_RESOURCES; 3096 goto exit; 3097 } 3098 3099 RtlCopyMemory(rp2->csum, last_rp->csum, (last_rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift); 3100 RtlCopyMemory((uint8_t*)rp2->csum + ((last_rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift), rp->csum, 3101 (rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift); 3102 3103 rp2->csum_free = true; 3104 } else 3105 rp2->csum = NULL; 3106 3107 rp2->buf = ExAllocatePoolWithTag(pool_type, rp2->to_read, ALLOC_TAG); 3108 if (!rp2->buf) { 3109 ERR("out of memory\n"); 3110 3111 if (rp2->csum) 3112 ExFreePool(rp2->csum); 3113 3114 ExFreePool(rp2); 3115 Status = STATUS_INSUFFICIENT_RESOURCES; 3116 goto exit; 3117 } 3118 3119 rp2->buf_free = true; 3120 rp2->bumpoff = 0; 3121 rp2->mdl = false; 3122 rp2->data = last_rp->data; 3123 rp2->compression = last_rp->compression; 3124 rp2->num_extents = last_rp->num_extents + 1; 3125 3126 RtlCopyMemory(rp2->extents, last_rp->extents, last_rp->num_extents * sizeof(read_part_extent)); 3127 RtlCopyMemory(&rp2->extents[last_rp->num_extents], rp->extents, sizeof(read_part_extent)); 3128 3129 InsertHeadList(le->Blink, &rp2->list_entry); 3130 3131 if (rp->buf_free) 3132 ExFreePool(rp->buf); 3133 3134 if (rp->csum_free) 3135 ExFreePool(rp->csum); 3136 3137 RemoveEntryList(&rp->list_entry); 3138 3139 ExFreePool(rp); 3140 3141 if (last_rp->buf_free) 3142 ExFreePool(last_rp->buf); 3143 3144 if (last_rp->csum_free) 3145 ExFreePool(last_rp->csum); 3146 3147 RemoveEntryList(&last_rp->list_entry); 3148 3149 ExFreePool(last_rp); 3150 3151 last_rp = rp2; 3152 } else 3153 last_rp = rp; 3154 3155 le = le2; 3156 } 3157 } 3158 3159 le = read_parts.Flink; 3160 while (le != &read_parts) { 3161 read_part* rp = CONTAINING_RECORD(le, read_part, list_entry); 3162 3163 Status = read_data(fcb->Vcb, rp->addr, rp->to_read, rp->csum, false, rp->buf, rp->c, NULL, Irp, 0, rp->mdl, 3164 fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 3165 if (!NT_SUCCESS(Status)) { 3166 ERR("read_data returned %08lx\n", Status); 3167 goto exit; 3168 } 3169 3170 if (rp->compression == BTRFS_COMPRESSION_NONE) { 3171 if (rp->buf_free) 3172 RtlCopyMemory(rp->data, rp->buf + rp->bumpoff, rp->read); 3173 } else { 3174 uint8_t* buf = rp->buf; 3175 3176 for (unsigned int i = 0; i < rp->num_extents; i++) { 3177 uint8_t *decomp = NULL, *buf2; 3178 ULONG outlen, inlen, off2; 3179 uint32_t inpageoff = 0; 3180 comp_calc_job* ccj; 3181 3182 off2 = (ULONG)(rp->extents[i].ed_offset + rp->extents[i].off); 3183 buf2 = buf; 3184 inlen = (ULONG)rp->extents[i].ed_size; 3185 3186 if (rp->compression == BTRFS_COMPRESSION_LZO) { 3187 ULONG inoff = sizeof(uint32_t); 3188 3189 inlen -= sizeof(uint32_t); 3190 3191 // If reading a few sectors in, skip to the interesting bit 3192 while (off2 > LZO_PAGE_SIZE) { 3193 uint32_t partlen; 3194 3195 if (inlen < sizeof(uint32_t)) 3196 break; 3197 3198 partlen = *(uint32_t*)(buf2 + inoff); 3199 3200 if (partlen < inlen) { 3201 off2 -= LZO_PAGE_SIZE; 3202 inoff += partlen + sizeof(uint32_t); 3203 inlen -= partlen + sizeof(uint32_t); 3204 3205 if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t)) 3206 inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE; 3207 } else 3208 break; 3209 } 3210 3211 buf2 = &buf2[inoff]; 3212 inpageoff = inoff % LZO_PAGE_SIZE; 3213 } 3214 3215 /* Previous versions of this code decompressed directly into the destination buffer, 3216 * but unfortunately that can't be relied on - Windows likes to use dummy pages sometimes 3217 * when mmap-ing, which breaks the backtracking used by e.g. zstd. */ 3218 3219 if (off2 != 0) 3220 outlen = off2 + min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off)); 3221 else 3222 outlen = min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off)); 3223 3224 decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG); 3225 if (!decomp) { 3226 ERR("out of memory\n"); 3227 Status = STATUS_INSUFFICIENT_RESOURCES; 3228 goto exit; 3229 } 3230 3231 ccj = (comp_calc_job*)ExAllocatePoolWithTag(pool_type, sizeof(comp_calc_job), ALLOC_TAG); 3232 if (!ccj) { 3233 ERR("out of memory\n"); 3234 3235 ExFreePool(decomp); 3236 3237 Status = STATUS_INSUFFICIENT_RESOURCES; 3238 goto exit; 3239 } 3240 3241 ccj->data = rp->data; 3242 ccj->decomp = decomp; 3243 3244 ccj->offset = off2; 3245 ccj->length = (size_t)min(rp->read, rp->extents[i].ed_num_bytes - rp->extents[i].off); 3246 3247 Status = add_calc_job_decomp(fcb->Vcb, rp->compression, buf2, inlen, decomp, outlen, 3248 inpageoff, &ccj->cj); 3249 if (!NT_SUCCESS(Status)) { 3250 ERR("add_calc_job_decomp returned %08lx\n", Status); 3251 3252 ExFreePool(decomp); 3253 ExFreePool(ccj); 3254 3255 goto exit; 3256 } 3257 3258 InsertTailList(&calc_jobs, &ccj->list_entry); 3259 3260 buf += rp->extents[i].ed_size; 3261 rp->data = (uint8_t*)rp->data + rp->extents[i].ed_num_bytes - rp->extents[i].off; 3262 rp->read -= (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off); 3263 } 3264 } 3265 3266 le = le->Flink; 3267 } 3268 3269 if (length > 0 && start + bytes_read < fcb->inode_item.st_size) { 3270 uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length); 3271 3272 RtlZeroMemory(data + bytes_read, read); 3273 3274 bytes_read += read; 3275 length -= read; 3276 } 3277 3278 Status = STATUS_SUCCESS; 3279 3280 while (!IsListEmpty(&calc_jobs)) { 3281 comp_calc_job* ccj = CONTAINING_RECORD(RemoveTailList(&calc_jobs), comp_calc_job, list_entry); 3282 3283 calc_thread_main(fcb->Vcb, ccj->cj); 3284 3285 KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL); 3286 3287 if (!NT_SUCCESS(ccj->cj->Status)) 3288 Status = ccj->cj->Status; 3289 3290 RtlCopyMemory(ccj->data, (uint8_t*)ccj->decomp + ccj->offset, ccj->length); 3291 ExFreePool(ccj->decomp); 3292 3293 ExFreePool(ccj); 3294 } 3295 3296 if (pbr) 3297 *pbr = bytes_read; 3298 3299 exit: 3300 while (!IsListEmpty(&read_parts)) { 3301 read_part* rp = CONTAINING_RECORD(RemoveHeadList(&read_parts), read_part, list_entry); 3302 3303 if (rp->buf_free) 3304 ExFreePool(rp->buf); 3305 3306 if (rp->csum_free) 3307 ExFreePool(rp->csum); 3308 3309 ExFreePool(rp); 3310 } 3311 3312 while (!IsListEmpty(&calc_jobs)) { 3313 comp_calc_job* ccj = CONTAINING_RECORD(RemoveHeadList(&calc_jobs), comp_calc_job, list_entry); 3314 3315 KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL); 3316 3317 if (ccj->decomp) 3318 ExFreePool(ccj->decomp); 3319 3320 ExFreePool(ccj->cj); 3321 3322 ExFreePool(ccj); 3323 } 3324 3325 return Status; 3326 } 3327 3328 NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) { 3329 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3330 PFILE_OBJECT FileObject = IrpSp->FileObject; 3331 fcb* fcb = FileObject->FsContext; 3332 uint8_t* data = NULL; 3333 ULONG length = IrpSp->Parameters.Read.Length, addon = 0; 3334 uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart; 3335 3336 *bytes_read = 0; 3337 3338 if (!fcb || !fcb->Vcb || !fcb->subvol) 3339 return STATUS_INTERNAL_ERROR; 3340 3341 TRACE("fcb = %p\n", fcb); 3342 TRACE("offset = %I64x, length = %lx\n", start, length); 3343 TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false"); 3344 3345 if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY) 3346 return STATUS_INVALID_DEVICE_REQUEST; 3347 3348 if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) { 3349 WARN("tried to read locked region\n"); 3350 return STATUS_FILE_LOCK_CONFLICT; 3351 } 3352 3353 if (length == 0) { 3354 TRACE("tried to read zero bytes\n"); 3355 return STATUS_SUCCESS; 3356 } 3357 3358 if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) { 3359 TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart); 3360 return STATUS_END_OF_FILE; 3361 } 3362 3363 TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size); 3364 3365 if (!(Irp->Flags & IRP_NOCACHE) && IrpSp->MinorFunction & IRP_MN_MDL) { 3366 NTSTATUS Status = STATUS_SUCCESS; 3367 3368 _SEH2_TRY { 3369 if (!FileObject->PrivateCacheMap) { 3370 CC_FILE_SIZES ccfs; 3371 3372 ccfs.AllocationSize = fcb->Header.AllocationSize; 3373 ccfs.FileSize = fcb->Header.FileSize; 3374 ccfs.ValidDataLength = fcb->Header.ValidDataLength; 3375 3376 init_file_cache(FileObject, &ccfs); 3377 } 3378 3379 CcMdlRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus); 3380 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 3381 Status = _SEH2_GetExceptionCode(); 3382 } _SEH2_END; 3383 3384 if (NT_SUCCESS(Status)) { 3385 Status = Irp->IoStatus.Status; 3386 Irp->IoStatus.Information += addon; 3387 *bytes_read = (ULONG)Irp->IoStatus.Information; 3388 } else 3389 ERR("EXCEPTION - %08lx\n", Status); 3390 3391 return Status; 3392 } 3393 3394 data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 3395 3396 if (Irp->MdlAddress && !data) { 3397 ERR("MmGetSystemAddressForMdlSafe returned NULL\n"); 3398 return STATUS_INSUFFICIENT_RESOURCES; 3399 } 3400 3401 if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) { 3402 length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3403 RtlZeroMemory(data, length); 3404 Irp->IoStatus.Information = *bytes_read = length; 3405 return STATUS_SUCCESS; 3406 } 3407 3408 if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) { 3409 addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3410 RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon); 3411 length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start); 3412 } 3413 3414 if (!(Irp->Flags & IRP_NOCACHE)) { 3415 NTSTATUS Status = STATUS_SUCCESS; 3416 3417 _SEH2_TRY { 3418 if (!FileObject->PrivateCacheMap) { 3419 CC_FILE_SIZES ccfs; 3420 3421 ccfs.AllocationSize = fcb->Header.AllocationSize; 3422 ccfs.FileSize = fcb->Header.FileSize; 3423 ccfs.ValidDataLength = fcb->Header.ValidDataLength; 3424 3425 init_file_cache(FileObject, &ccfs); 3426 } 3427 3428 if (fCcCopyReadEx) { 3429 TRACE("CcCopyReadEx(%p, %I64x, %lx, %u, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, 3430 length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread); 3431 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart); 3432 if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) { 3433 TRACE("CcCopyReadEx could not wait\n"); 3434 3435 IoMarkIrpPending(Irp); 3436 return STATUS_PENDING; 3437 } 3438 TRACE("CcCopyReadEx finished\n"); 3439 } else { 3440 TRACE("CcCopyRead(%p, %I64x, %lx, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus); 3441 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart); 3442 if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) { 3443 TRACE("CcCopyRead could not wait\n"); 3444 3445 IoMarkIrpPending(Irp); 3446 return STATUS_PENDING; 3447 } 3448 TRACE("CcCopyRead finished\n"); 3449 } 3450 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 3451 Status = _SEH2_GetExceptionCode(); 3452 } _SEH2_END; 3453 3454 if (NT_SUCCESS(Status)) { 3455 Status = Irp->IoStatus.Status; 3456 Irp->IoStatus.Information += addon; 3457 *bytes_read = (ULONG)Irp->IoStatus.Information; 3458 } else 3459 ERR("EXCEPTION - %08lx\n", Status); 3460 3461 return Status; 3462 } else { 3463 NTSTATUS Status; 3464 3465 if (!wait) { 3466 IoMarkIrpPending(Irp); 3467 return STATUS_PENDING; 3468 } 3469 3470 if (fcb->ads) { 3471 Status = read_stream(fcb, data, start, length, bytes_read); 3472 3473 if (!NT_SUCCESS(Status)) 3474 ERR("read_stream returned %08lx\n", Status); 3475 } else { 3476 Status = read_file(fcb, data, start, length, bytes_read, Irp); 3477 3478 if (!NT_SUCCESS(Status)) 3479 ERR("read_file returned %08lx\n", Status); 3480 } 3481 3482 *bytes_read += addon; 3483 TRACE("read %lu bytes\n", *bytes_read); 3484 3485 Irp->IoStatus.Information = *bytes_read; 3486 3487 if (diskacc && Status != STATUS_PENDING) { 3488 PETHREAD thread = NULL; 3489 3490 if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread)) 3491 thread = Irp->Tail.Overlay.Thread; 3492 else if (!IoIsSystemThread(PsGetCurrentThread())) 3493 thread = PsGetCurrentThread(); 3494 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp) 3495 thread = PsGetCurrentThread(); 3496 3497 if (thread) 3498 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0); 3499 } 3500 3501 return Status; 3502 } 3503 } 3504 3505 _Dispatch_type_(IRP_MJ_READ) 3506 _Function_class_(DRIVER_DISPATCH) 3507 NTSTATUS __stdcall drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { 3508 device_extension* Vcb = DeviceObject->DeviceExtension; 3509 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3510 PFILE_OBJECT FileObject = IrpSp->FileObject; 3511 ULONG bytes_read = 0; 3512 NTSTATUS Status; 3513 bool top_level; 3514 fcb* fcb; 3515 ccb* ccb; 3516 bool acquired_fcb_lock = false, wait; 3517 3518 FsRtlEnterFileSystem(); 3519 3520 top_level = is_top_level(Irp); 3521 3522 TRACE("read\n"); 3523 3524 if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { 3525 Status = vol_read(DeviceObject, Irp); 3526 goto exit2; 3527 } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { 3528 Status = STATUS_INVALID_PARAMETER; 3529 goto end; 3530 } 3531 3532 Irp->IoStatus.Information = 0; 3533 3534 if (IrpSp->MinorFunction & IRP_MN_COMPLETE) { 3535 CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress); 3536 3537 Irp->MdlAddress = NULL; 3538 Status = STATUS_SUCCESS; 3539 3540 goto exit; 3541 } 3542 3543 fcb = FileObject->FsContext; 3544 3545 if (!fcb) { 3546 ERR("fcb was NULL\n"); 3547 Status = STATUS_INVALID_PARAMETER; 3548 goto exit; 3549 } 3550 3551 ccb = FileObject->FsContext2; 3552 3553 if (!ccb) { 3554 ERR("ccb was NULL\n"); 3555 Status = STATUS_INVALID_PARAMETER; 3556 goto exit; 3557 } 3558 3559 if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) { 3560 WARN("insufficient privileges\n"); 3561 Status = STATUS_ACCESS_DENIED; 3562 goto exit; 3563 } 3564 3565 if (fcb == Vcb->volume_fcb) { 3566 TRACE("reading volume FCB\n"); 3567 3568 IoSkipCurrentIrpStackLocation(Irp); 3569 3570 Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); 3571 3572 goto exit2; 3573 } 3574 3575 if (!(Irp->Flags & IRP_PAGING_IO)) 3576 FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL); 3577 3578 wait = IoIsOperationSynchronous(Irp); 3579 3580 // Don't offload jobs when doing paging IO - otherwise this can lead to 3581 // deadlocks in CcCopyRead. 3582 if (Irp->Flags & IRP_PAGING_IO) 3583 wait = true; 3584 3585 if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer && FileObject->SectionObjectPointer->DataSectionObject) { 3586 IO_STATUS_BLOCK iosb; 3587 3588 CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb); 3589 if (!NT_SUCCESS(iosb.Status)) { 3590 ERR("CcFlushCache returned %08lx\n", iosb.Status); 3591 return iosb.Status; 3592 } 3593 } 3594 3595 if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) { 3596 if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { 3597 Status = STATUS_PENDING; 3598 IoMarkIrpPending(Irp); 3599 goto exit; 3600 } 3601 3602 acquired_fcb_lock = true; 3603 } 3604 3605 Status = do_read(Irp, wait, &bytes_read); 3606 3607 if (acquired_fcb_lock) 3608 ExReleaseResourceLite(fcb->Header.Resource); 3609 3610 exit: 3611 if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO)) 3612 FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0); 3613 3614 end: 3615 Irp->IoStatus.Status = Status; 3616 3617 TRACE("Irp->IoStatus.Status = %08lx\n", Irp->IoStatus.Status); 3618 TRACE("Irp->IoStatus.Information = %Iu\n", Irp->IoStatus.Information); 3619 TRACE("returning %08lx\n", Status); 3620 3621 if (Status != STATUS_PENDING) 3622 IoCompleteRequest(Irp, IO_NO_INCREMENT); 3623 else { 3624 if (!add_thread_job(Vcb, Irp)) 3625 Status = do_read_job(Irp); 3626 } 3627 3628 exit2: 3629 if (top_level) 3630 IoSetTopLevelIrp(NULL); 3631 3632 FsRtlExitFileSystem(); 3633 3634 return Status; 3635 } 3636