1 /* Copyright (c) Mark Harmstone 2016-17 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 #include "xxhash.h" 20 #include "crc32c.h" 21 22 enum read_data_status { 23 ReadDataStatus_Pending, 24 ReadDataStatus_Success, 25 ReadDataStatus_Error, 26 ReadDataStatus_MissingDevice, 27 ReadDataStatus_Skip 28 }; 29 30 struct read_data_context; 31 32 typedef struct { 33 struct read_data_context* context; 34 uint16_t stripenum; 35 bool rewrite; 36 PIRP Irp; 37 IO_STATUS_BLOCK iosb; 38 enum read_data_status status; 39 PMDL mdl; 40 uint64_t stripestart; 41 uint64_t stripeend; 42 } read_data_stripe; 43 44 typedef struct { 45 KEVENT Event; 46 NTSTATUS Status; 47 chunk* c; 48 uint64_t address; 49 uint32_t buflen; 50 LONG num_stripes, stripes_left; 51 uint64_t type; 52 uint32_t sector_size; 53 uint16_t firstoff, startoffstripe, sectors_per_stripe; 54 void* csum; 55 bool tree; 56 read_data_stripe* stripes; 57 uint8_t* va; 58 } read_data_context; 59 60 extern bool diskacc; 61 extern tPsUpdateDiskCounters fPsUpdateDiskCounters; 62 extern tCcCopyReadEx fCcCopyReadEx; 63 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters; 64 65 #define LZO_PAGE_SIZE 4096 66 67 _Function_class_(IO_COMPLETION_ROUTINE) 68 static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 69 read_data_stripe* stripe = conptr; 70 read_data_context* context = (read_data_context*)stripe->context; 71 72 UNUSED(DeviceObject); 73 74 stripe->iosb = Irp->IoStatus; 75 76 if (NT_SUCCESS(Irp->IoStatus.Status)) 77 stripe->status = ReadDataStatus_Success; 78 else 79 stripe->status = ReadDataStatus_Error; 80 81 if (InterlockedDecrement(&context->stripes_left) == 0) 82 KeSetEvent(&context->Event, 0, false); 83 84 return STATUS_MORE_PROCESSING_REQUIRED; 85 } 86 87 NTSTATUS check_csum(device_extension* Vcb, uint8_t* data, uint32_t sectors, void* csum) { 88 void* csum2; 89 90 csum2 = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * sectors, ALLOC_TAG); 91 if (!csum2) { 92 ERR("out of memory\n"); 93 return STATUS_INSUFFICIENT_RESOURCES; 94 } 95 96 do_calc_job(Vcb, data, sectors, csum2); 97 98 if (RtlCompareMemory(csum2, csum, sectors * Vcb->csum_size) != sectors * Vcb->csum_size) { 99 ExFreePool(csum2); 100 return STATUS_CRC_ERROR; 101 } 102 103 ExFreePool(csum2); 104 105 return STATUS_SUCCESS; 106 } 107 108 void get_tree_checksum(device_extension* Vcb, tree_header* th, void* csum) { 109 switch (Vcb->superblock.csum_type) { 110 case CSUM_TYPE_CRC32C: 111 *(uint32_t*)csum = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 112 break; 113 114 case CSUM_TYPE_XXHASH: 115 *(uint64_t*)csum = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0); 116 break; 117 118 case CSUM_TYPE_SHA256: 119 calc_sha256(csum, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 120 break; 121 122 case CSUM_TYPE_BLAKE2: 123 blake2b(csum, BLAKE2_HASH_SIZE, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 124 break; 125 } 126 } 127 128 bool check_tree_checksum(device_extension* Vcb, tree_header* th) { 129 switch (Vcb->superblock.csum_type) { 130 case CSUM_TYPE_CRC32C: { 131 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 132 133 if (crc32 == *((uint32_t*)th->csum)) 134 return true; 135 136 WARN("hash was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum)); 137 138 break; 139 } 140 141 case CSUM_TYPE_XXHASH: { 142 uint64_t hash = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0); 143 144 if (hash == *((uint64_t*)th->csum)) 145 return true; 146 147 WARN("hash was %I64x, expected %I64x\n", hash, *((uint64_t*)th->csum)); 148 149 break; 150 } 151 152 case CSUM_TYPE_SHA256: { 153 uint8_t hash[SHA256_HASH_SIZE]; 154 155 calc_sha256(hash, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 156 157 if (RtlCompareMemory(hash, th, SHA256_HASH_SIZE) == SHA256_HASH_SIZE) 158 return true; 159 160 WARN("hash was invalid\n"); 161 162 break; 163 } 164 165 case CSUM_TYPE_BLAKE2: { 166 uint8_t hash[BLAKE2_HASH_SIZE]; 167 168 blake2b(hash, sizeof(hash), (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 169 170 if (RtlCompareMemory(hash, th, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE) 171 return true; 172 173 WARN("hash was invalid\n"); 174 175 break; 176 } 177 } 178 179 return false; 180 } 181 182 void get_sector_csum(device_extension* Vcb, void* buf, void* csum) { 183 switch (Vcb->superblock.csum_type) { 184 case CSUM_TYPE_CRC32C: 185 *(uint32_t*)csum = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size); 186 break; 187 188 case CSUM_TYPE_XXHASH: 189 *(uint64_t*)csum = XXH64(buf, Vcb->superblock.sector_size, 0); 190 break; 191 192 case CSUM_TYPE_SHA256: 193 calc_sha256(csum, buf, Vcb->superblock.sector_size); 194 break; 195 196 case CSUM_TYPE_BLAKE2: 197 blake2b(csum, BLAKE2_HASH_SIZE, buf, Vcb->superblock.sector_size); 198 break; 199 } 200 } 201 202 bool check_sector_csum(device_extension* Vcb, void* buf, void* csum) { 203 switch (Vcb->superblock.csum_type) { 204 case CSUM_TYPE_CRC32C: { 205 uint32_t crc32 = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size); 206 207 return *(uint32_t*)csum == crc32; 208 } 209 210 case CSUM_TYPE_XXHASH: { 211 uint64_t hash = XXH64(buf, Vcb->superblock.sector_size, 0); 212 213 return *(uint64_t*)csum == hash; 214 } 215 216 case CSUM_TYPE_SHA256: { 217 uint8_t hash[SHA256_HASH_SIZE]; 218 219 calc_sha256(hash, buf, Vcb->superblock.sector_size); 220 221 return RtlCompareMemory(hash, csum, SHA256_HASH_SIZE) == SHA256_HASH_SIZE; 222 } 223 224 case CSUM_TYPE_BLAKE2: { 225 uint8_t hash[BLAKE2_HASH_SIZE]; 226 227 blake2b(hash, sizeof(hash), buf, Vcb->superblock.sector_size); 228 229 return RtlCompareMemory(hash, csum, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE; 230 } 231 } 232 233 return false; 234 } 235 236 static NTSTATUS read_data_dup(device_extension* Vcb, uint8_t* buf, uint64_t addr, read_data_context* context, CHUNK_ITEM* ci, 237 device** devices, uint64_t generation) { 238 ULONG i; 239 bool checksum_error = false; 240 uint16_t j, stripe = 0; 241 NTSTATUS Status; 242 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 243 244 for (j = 0; j < ci->num_stripes; j++) { 245 if (context->stripes[j].status == ReadDataStatus_Error) { 246 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status); 247 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 248 return context->stripes[j].iosb.Status; 249 } else if (context->stripes[j].status == ReadDataStatus_Success) { 250 stripe = j; 251 break; 252 } 253 } 254 255 if (context->stripes[stripe].status != ReadDataStatus_Success) 256 return STATUS_INTERNAL_ERROR; 257 258 if (context->tree) { 259 tree_header* th = (tree_header*)buf; 260 261 if (th->address != context->address || !check_tree_checksum(Vcb, th)) { 262 checksum_error = true; 263 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 264 } else if (generation != 0 && th->generation != generation) { 265 checksum_error = true; 266 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 267 } 268 } else if (context->csum) { 269 Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum); 270 271 if (Status == STATUS_CRC_ERROR) { 272 checksum_error = true; 273 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 274 } else if (!NT_SUCCESS(Status)) { 275 ERR("check_csum returned %08lx\n", Status); 276 return Status; 277 } 278 } 279 280 if (!checksum_error) 281 return STATUS_SUCCESS; 282 283 if (ci->num_stripes == 1) 284 return STATUS_CRC_ERROR; 285 286 if (context->tree) { 287 tree_header* t2; 288 bool recovered = false; 289 290 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 291 if (!t2) { 292 ERR("out of memory\n"); 293 return STATUS_INSUFFICIENT_RESOURCES; 294 } 295 296 for (j = 0; j < ci->num_stripes; j++) { 297 if (j != stripe && devices[j] && devices[j]->devobj) { 298 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart, 299 Vcb->superblock.node_size, (uint8_t*)t2, false); 300 if (!NT_SUCCESS(Status)) { 301 WARN("sync_read_phys returned %08lx\n", Status); 302 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 303 } else { 304 bool checksum_error = !check_tree_checksum(Vcb, t2); 305 306 if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) { 307 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 308 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 309 recovered = true; 310 311 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 312 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart, 313 t2, Vcb->superblock.node_size); 314 if (!NT_SUCCESS(Status)) { 315 WARN("write_data_phys returned %08lx\n", Status); 316 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 317 } 318 } 319 320 break; 321 } else if (t2->address != addr || checksum_error) 322 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 323 else 324 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS); 325 } 326 } 327 } 328 329 if (!recovered) { 330 ERR("unrecoverable checksum error at %I64x\n", addr); 331 ExFreePool(t2); 332 return STATUS_CRC_ERROR; 333 } 334 335 ExFreePool(t2); 336 } else { 337 ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size; 338 uint8_t* sector; 339 void* ptr = context->csum; 340 341 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 342 if (!sector) { 343 ERR("out of memory\n"); 344 return STATUS_INSUFFICIENT_RESOURCES; 345 } 346 347 for (i = 0; i < sectors; i++) { 348 if (!check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr)) { 349 bool recovered = false; 350 351 for (j = 0; j < ci->num_stripes; j++) { 352 if (j != stripe && devices[j] && devices[j]->devobj) { 353 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, 354 cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), 355 Vcb->superblock.sector_size, sector, false); 356 if (!NT_SUCCESS(Status)) { 357 WARN("sync_read_phys returned %08lx\n", Status); 358 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 359 } else { 360 if (check_sector_csum(Vcb, sector, ptr)) { 361 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 362 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); 363 recovered = true; 364 365 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 366 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, 367 cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), 368 sector, Vcb->superblock.sector_size); 369 if (!NT_SUCCESS(Status)) { 370 WARN("write_data_phys returned %08lx\n", Status); 371 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 372 } 373 } 374 375 break; 376 } else 377 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 378 } 379 } 380 } 381 382 if (!recovered) { 383 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 384 ExFreePool(sector); 385 return STATUS_CRC_ERROR; 386 } 387 } 388 389 ptr = (uint8_t*)ptr + Vcb->csum_size; 390 } 391 392 ExFreePool(sector); 393 } 394 395 return STATUS_SUCCESS; 396 } 397 398 static NTSTATUS read_data_raid0(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, 399 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) { 400 uint64_t i; 401 402 for (i = 0; i < ci->num_stripes; i++) { 403 if (context->stripes[i].status == ReadDataStatus_Error) { 404 WARN("stripe %I64u returned error %08lx\n", i, context->stripes[i].iosb.Status); 405 log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS); 406 return context->stripes[i].iosb.Status; 407 } 408 } 409 410 if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries 411 tree_header* th = (tree_header*)buf; 412 bool checksum_error = !check_tree_checksum(Vcb, th); 413 414 if (checksum_error || addr != th->address || (generation != 0 && generation != th->generation)) { 415 uint64_t off; 416 uint16_t stripe; 417 418 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe); 419 420 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 421 422 if (checksum_error) { 423 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 424 return STATUS_CRC_ERROR; 425 } else if (addr != th->address) { 426 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr); 427 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 428 return STATUS_CRC_ERROR; 429 } else if (generation != 0 && generation != th->generation) { 430 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation); 431 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 432 return STATUS_CRC_ERROR; 433 } 434 } 435 } else if (context->csum) { 436 NTSTATUS Status; 437 438 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 439 440 if (Status == STATUS_CRC_ERROR) { 441 void* ptr = context->csum; 442 443 for (i = 0; i < length / Vcb->superblock.sector_size; i++) { 444 if (!check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr)) { 445 uint64_t off; 446 uint16_t stripe; 447 448 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe); 449 450 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 451 452 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 453 454 return Status; 455 } 456 457 ptr = (uint8_t*)ptr + Vcb->csum_size; 458 } 459 460 return Status; 461 } else if (!NT_SUCCESS(Status)) { 462 ERR("check_csum returned %08lx\n", Status); 463 return Status; 464 } 465 } 466 467 return STATUS_SUCCESS; 468 } 469 470 static NTSTATUS read_data_raid10(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, 471 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) { 472 uint64_t i; 473 uint16_t j, stripe; 474 NTSTATUS Status; 475 bool checksum_error = false; 476 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 477 478 for (j = 0; j < ci->num_stripes; j++) { 479 if (context->stripes[j].status == ReadDataStatus_Error) { 480 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status); 481 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 482 return context->stripes[j].iosb.Status; 483 } else if (context->stripes[j].status == ReadDataStatus_Success) 484 stripe = j; 485 } 486 487 if (context->tree) { 488 tree_header* th = (tree_header*)buf; 489 490 if (!check_tree_checksum(Vcb, th)) { 491 checksum_error = true; 492 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 493 } else if (addr != th->address) { 494 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr); 495 checksum_error = true; 496 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 497 } else if (generation != 0 && generation != th->generation) { 498 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation); 499 checksum_error = true; 500 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 501 } 502 } else if (context->csum) { 503 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 504 505 if (Status == STATUS_CRC_ERROR) 506 checksum_error = true; 507 else if (!NT_SUCCESS(Status)) { 508 ERR("check_csum returned %08lx\n", Status); 509 return Status; 510 } 511 } 512 513 if (!checksum_error) 514 return STATUS_SUCCESS; 515 516 if (context->tree) { 517 tree_header* t2; 518 uint64_t off; 519 uint16_t badsubstripe = 0; 520 bool recovered = false; 521 522 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 523 if (!t2) { 524 ERR("out of memory\n"); 525 return STATUS_INSUFFICIENT_RESOURCES; 526 } 527 528 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe); 529 530 stripe *= ci->sub_stripes; 531 532 for (j = 0; j < ci->sub_stripes; j++) { 533 if (context->stripes[stripe + j].status == ReadDataStatus_Success) { 534 badsubstripe = j; 535 break; 536 } 537 } 538 539 for (j = 0; j < ci->sub_stripes; j++) { 540 if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) { 541 Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off, 542 Vcb->superblock.node_size, (uint8_t*)t2, false); 543 if (!NT_SUCCESS(Status)) { 544 WARN("sync_read_phys returned %08lx\n", Status); 545 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS); 546 } else { 547 bool checksum_error = !check_tree_checksum(Vcb, t2); 548 549 if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) { 550 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 551 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id); 552 recovered = true; 553 554 if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad 555 Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj, 556 cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size); 557 if (!NT_SUCCESS(Status)) { 558 WARN("write_data_phys returned %08lx\n", Status); 559 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 560 } 561 } 562 563 break; 564 } else if (t2->address != addr || checksum_error) 565 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 566 else 567 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS); 568 } 569 } 570 } 571 572 if (!recovered) { 573 ERR("unrecoverable checksum error at %I64x\n", addr); 574 ExFreePool(t2); 575 return STATUS_CRC_ERROR; 576 } 577 578 ExFreePool(t2); 579 } else { 580 ULONG sectors = length / Vcb->superblock.sector_size; 581 uint8_t* sector; 582 void* ptr = context->csum; 583 584 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 585 if (!sector) { 586 ERR("out of memory\n"); 587 return STATUS_INSUFFICIENT_RESOURCES; 588 } 589 590 for (i = 0; i < sectors; i++) { 591 if (!check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr)) { 592 uint64_t off; 593 uint16_t stripe2, badsubstripe = 0; 594 bool recovered = false; 595 596 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 597 ci->num_stripes / ci->sub_stripes, &off, &stripe2); 598 599 stripe2 *= ci->sub_stripes; 600 601 for (j = 0; j < ci->sub_stripes; j++) { 602 if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) { 603 badsubstripe = j; 604 break; 605 } 606 } 607 608 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 609 610 for (j = 0; j < ci->sub_stripes; j++) { 611 if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) { 612 Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off, 613 Vcb->superblock.sector_size, sector, false); 614 if (!NT_SUCCESS(Status)) { 615 WARN("sync_read_phys returned %08lx\n", Status); 616 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS); 617 } else { 618 if (check_sector_csum(Vcb, sector, ptr)) { 619 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 620 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id); 621 recovered = true; 622 623 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad 624 Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj, 625 cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size); 626 if (!NT_SUCCESS(Status)) { 627 WARN("write_data_phys returned %08lx\n", Status); 628 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS); 629 } 630 } 631 632 break; 633 } else 634 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 635 } 636 } 637 } 638 639 if (!recovered) { 640 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 641 ExFreePool(sector); 642 return STATUS_CRC_ERROR; 643 } 644 } 645 646 ptr = (uint8_t*)ptr + Vcb->csum_size; 647 } 648 649 ExFreePool(sector); 650 } 651 652 return STATUS_SUCCESS; 653 } 654 655 static NTSTATUS read_data_raid5(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci, 656 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) { 657 ULONG i; 658 NTSTATUS Status; 659 bool checksum_error = false; 660 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 661 uint16_t j, stripe; 662 bool no_success = true; 663 664 for (j = 0; j < ci->num_stripes; j++) { 665 if (context->stripes[j].status == ReadDataStatus_Error) { 666 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status); 667 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 668 return context->stripes[j].iosb.Status; 669 } else if (context->stripes[j].status == ReadDataStatus_Success) { 670 stripe = j; 671 no_success = false; 672 } 673 } 674 675 if (c) { // check partial stripes 676 LIST_ENTRY* le; 677 uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length; 678 679 ExAcquireResourceSharedLite(&c->partial_stripes_lock, true); 680 681 le = c->partial_stripes.Flink; 682 while (le != &c->partial_stripes) { 683 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 684 685 if (ps->address + ps_length > addr && ps->address < addr + length) { 686 ULONG runlength, index; 687 688 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 689 690 while (runlength != 0) { 691 #ifdef __REACTOS__ 692 uint64_t runstart, runend, start, end; 693 #endif 694 if (index >= ps->bmplen) 695 break; 696 697 if (index + runlength >= ps->bmplen) { 698 runlength = ps->bmplen - index; 699 700 if (runlength == 0) 701 break; 702 } 703 704 #ifndef __REACTOS__ 705 uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size); 706 uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size); 707 uint64_t start = max(runstart, addr); 708 uint64_t end = min(runend, addr + length); 709 #else 710 runstart = ps->address + (index * Vcb->superblock.sector_size); 711 runend = runstart + (runlength * Vcb->superblock.sector_size); 712 start = max(runstart, addr); 713 end = min(runend, addr + length); 714 #endif 715 716 if (end > start) 717 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 718 719 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 720 } 721 } else if (ps->address >= addr + length) 722 break; 723 724 le = le->Flink; 725 } 726 727 ExReleaseResourceLite(&c->partial_stripes_lock); 728 } 729 730 if (context->tree) { 731 tree_header* th = (tree_header*)buf; 732 733 if (addr != th->address || !check_tree_checksum(Vcb, th)) { 734 checksum_error = true; 735 if (!no_success && !degraded) 736 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 737 } else if (generation != 0 && generation != th->generation) { 738 checksum_error = true; 739 if (!no_success && !degraded) 740 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 741 } 742 } else if (context->csum) { 743 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 744 745 if (Status == STATUS_CRC_ERROR) { 746 if (!degraded) 747 WARN("checksum error\n"); 748 checksum_error = true; 749 } else if (!NT_SUCCESS(Status)) { 750 ERR("check_csum returned %08lx\n", Status); 751 return Status; 752 } 753 } else if (degraded) 754 checksum_error = true; 755 756 if (!checksum_error) 757 return STATUS_SUCCESS; 758 759 if (context->tree) { 760 uint16_t parity; 761 uint64_t off; 762 bool recovered = false, first = true, failed = false; 763 uint8_t* t2; 764 765 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG); 766 if (!t2) { 767 ERR("out of memory\n"); 768 return STATUS_INSUFFICIENT_RESOURCES; 769 } 770 771 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe); 772 773 parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 774 775 stripe = (parity + stripe + 1) % ci->num_stripes; 776 777 for (j = 0; j < ci->num_stripes; j++) { 778 if (j != stripe) { 779 if (devices[j] && devices[j]->devobj) { 780 if (first) { 781 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false); 782 if (!NT_SUCCESS(Status)) { 783 ERR("sync_read_phys returned %08lx\n", Status); 784 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 785 failed = true; 786 break; 787 } 788 789 first = false; 790 } else { 791 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false); 792 if (!NT_SUCCESS(Status)) { 793 ERR("sync_read_phys returned %08lx\n", Status); 794 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 795 failed = true; 796 break; 797 } 798 799 do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size); 800 } 801 } else { 802 failed = true; 803 break; 804 } 805 } 806 } 807 808 if (!failed) { 809 tree_header* t3 = (tree_header*)t2; 810 811 if (t3->address == addr && check_tree_checksum(Vcb, t3) && (generation == 0 || t3->generation == generation)) { 812 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 813 814 if (!degraded) 815 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 816 817 recovered = true; 818 819 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 820 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size); 821 if (!NT_SUCCESS(Status)) { 822 WARN("write_data_phys returned %08lx\n", Status); 823 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 824 } 825 } 826 } 827 } 828 829 if (!recovered) { 830 ERR("unrecoverable checksum error at %I64x\n", addr); 831 ExFreePool(t2); 832 return STATUS_CRC_ERROR; 833 } 834 835 ExFreePool(t2); 836 } else { 837 ULONG sectors = length / Vcb->superblock.sector_size; 838 uint8_t* sector; 839 void* ptr = context->csum; 840 841 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG); 842 if (!sector) { 843 ERR("out of memory\n"); 844 return STATUS_INSUFFICIENT_RESOURCES; 845 } 846 847 for (i = 0; i < sectors; i++) { 848 uint16_t parity; 849 uint64_t off; 850 851 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 852 ci->num_stripes - 1, &off, &stripe); 853 854 parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 855 856 stripe = (parity + stripe + 1) % ci->num_stripes; 857 858 if (!devices[stripe] || !devices[stripe]->devobj || (ptr && !check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr))) { 859 bool recovered = false, first = true, failed = false; 860 861 if (devices[stripe] && devices[stripe]->devobj) 862 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS); 863 864 for (j = 0; j < ci->num_stripes; j++) { 865 if (j != stripe) { 866 if (devices[j] && devices[j]->devobj) { 867 if (first) { 868 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false); 869 if (!NT_SUCCESS(Status)) { 870 ERR("sync_read_phys returned %08lx\n", Status); 871 failed = true; 872 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 873 break; 874 } 875 876 first = false; 877 } else { 878 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, 879 sector + Vcb->superblock.sector_size, false); 880 if (!NT_SUCCESS(Status)) { 881 ERR("sync_read_phys returned %08lx\n", Status); 882 failed = true; 883 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 884 break; 885 } 886 887 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size); 888 } 889 } else { 890 failed = true; 891 break; 892 } 893 } 894 } 895 896 if (!failed) { 897 if (!ptr || check_sector_csum(Vcb, sector, ptr)) { 898 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 899 900 if (!degraded) 901 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); 902 903 recovered = true; 904 905 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 906 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, 907 sector, Vcb->superblock.sector_size); 908 if (!NT_SUCCESS(Status)) { 909 WARN("write_data_phys returned %08lx\n", Status); 910 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 911 } 912 } 913 } 914 } 915 916 if (!recovered) { 917 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 918 ExFreePool(sector); 919 return STATUS_CRC_ERROR; 920 } 921 } 922 923 if (ptr) 924 ptr = (uint8_t*)ptr + Vcb->csum_size; 925 } 926 927 ExFreePool(sector); 928 } 929 930 return STATUS_SUCCESS; 931 } 932 933 void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) { 934 if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data 935 uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1; 936 uint16_t stripe; 937 938 stripe = num_stripes - 3; 939 940 if (stripe == missing) 941 RtlZeroMemory(out, sector_size); 942 else 943 RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size); 944 945 do { 946 stripe--; 947 948 galois_double(out, sector_size); 949 950 if (stripe != missing) 951 do_xor(out, sectors + (stripe * sector_size), sector_size); 952 } while (stripe > 0); 953 954 do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size); 955 956 if (missing != 0) 957 galois_divpower(out, (uint8_t)missing, sector_size); 958 } else { // reconstruct from p and q 959 uint16_t x, y, stripe; 960 uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; 961 uint32_t j; 962 963 stripe = num_stripes - 3; 964 965 pxy = out + sector_size; 966 qxy = out; 967 968 if (stripe == missing1 || stripe == missing2) { 969 RtlZeroMemory(qxy, sector_size); 970 RtlZeroMemory(pxy, sector_size); 971 972 if (stripe == missing1) 973 x = stripe; 974 else 975 y = stripe; 976 } else { 977 RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size); 978 RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size); 979 } 980 981 do { 982 stripe--; 983 984 galois_double(qxy, sector_size); 985 986 if (stripe != missing1 && stripe != missing2) { 987 do_xor(qxy, sectors + (stripe * sector_size), sector_size); 988 do_xor(pxy, sectors + (stripe * sector_size), sector_size); 989 } else if (stripe == missing1) 990 x = stripe; 991 else if (stripe == missing2) 992 y = stripe; 993 } while (stripe > 0); 994 995 gyx = gpow2(y > x ? (y-x) : (255-x+y)); 996 gx = gpow2(255-x); 997 998 denom = gdiv(1, gyx ^ 1); 999 a = gmul(gyx, denom); 1000 b = gmul(gx, denom); 1001 1002 p = sectors + ((num_stripes - 2) * sector_size); 1003 q = sectors + ((num_stripes - 1) * sector_size); 1004 1005 for (j = 0; j < sector_size; j++) { 1006 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); 1007 1008 p++; 1009 q++; 1010 pxy++; 1011 qxy++; 1012 } 1013 1014 do_xor(out + sector_size, out, sector_size); 1015 do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size); 1016 } 1017 } 1018 1019 static NTSTATUS read_data_raid6(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci, 1020 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) { 1021 NTSTATUS Status; 1022 ULONG i; 1023 bool checksum_error = false; 1024 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 1025 uint16_t stripe, j; 1026 bool no_success = true; 1027 1028 for (j = 0; j < ci->num_stripes; j++) { 1029 if (context->stripes[j].status == ReadDataStatus_Error) { 1030 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status); 1031 1032 if (devices[j]) 1033 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1034 return context->stripes[j].iosb.Status; 1035 } else if (context->stripes[j].status == ReadDataStatus_Success) { 1036 stripe = j; 1037 no_success = false; 1038 } 1039 } 1040 1041 if (c) { // check partial stripes 1042 LIST_ENTRY* le; 1043 uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length; 1044 1045 ExAcquireResourceSharedLite(&c->partial_stripes_lock, true); 1046 1047 le = c->partial_stripes.Flink; 1048 while (le != &c->partial_stripes) { 1049 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 1050 1051 if (ps->address + ps_length > addr && ps->address < addr + length) { 1052 ULONG runlength, index; 1053 1054 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 1055 1056 while (runlength != 0) { 1057 #ifdef __REACTOS__ 1058 uint64_t runstart, runend, start, end; 1059 #endif 1060 if (index >= ps->bmplen) 1061 break; 1062 1063 if (index + runlength >= ps->bmplen) { 1064 runlength = ps->bmplen - index; 1065 1066 if (runlength == 0) 1067 break; 1068 } 1069 1070 #ifndef __REACTOS__ 1071 uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size); 1072 uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size); 1073 uint64_t start = max(runstart, addr); 1074 uint64_t end = min(runend, addr + length); 1075 #else 1076 runstart = ps->address + (index * Vcb->superblock.sector_size); 1077 runend = runstart + (runlength * Vcb->superblock.sector_size); 1078 start = max(runstart, addr); 1079 end = min(runend, addr + length); 1080 #endif 1081 1082 if (end > start) 1083 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 1084 1085 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 1086 } 1087 } else if (ps->address >= addr + length) 1088 break; 1089 1090 le = le->Flink; 1091 } 1092 1093 ExReleaseResourceLite(&c->partial_stripes_lock); 1094 } 1095 1096 if (context->tree) { 1097 tree_header* th = (tree_header*)buf; 1098 1099 if (addr != th->address || !check_tree_checksum(Vcb, th)) { 1100 checksum_error = true; 1101 if (!no_success && !degraded && devices[stripe]) 1102 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1103 } else if (generation != 0 && generation != th->generation) { 1104 checksum_error = true; 1105 if (!no_success && !degraded && devices[stripe]) 1106 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 1107 } 1108 } else if (context->csum) { 1109 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 1110 1111 if (Status == STATUS_CRC_ERROR) { 1112 if (!degraded) 1113 WARN("checksum error\n"); 1114 checksum_error = true; 1115 } else if (!NT_SUCCESS(Status)) { 1116 ERR("check_csum returned %08lx\n", Status); 1117 return Status; 1118 } 1119 } else if (degraded) 1120 checksum_error = true; 1121 1122 if (!checksum_error) 1123 return STATUS_SUCCESS; 1124 1125 if (context->tree) { 1126 uint8_t* sector; 1127 uint16_t k, physstripe, parity1, parity2, error_stripe; 1128 uint64_t off; 1129 bool recovered = false, failed = false; 1130 ULONG num_errors = 0; 1131 1132 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG); 1133 if (!sector) { 1134 ERR("out of memory\n"); 1135 return STATUS_INSUFFICIENT_RESOURCES; 1136 } 1137 1138 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe); 1139 1140 parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1141 parity2 = (parity1 + 1) % ci->num_stripes; 1142 1143 physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1144 1145 j = (parity2 + 1) % ci->num_stripes; 1146 1147 for (k = 0; k < ci->num_stripes - 1; k++) { 1148 if (j != physstripe) { 1149 if (devices[j] && devices[j]->devobj) { 1150 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, 1151 sector + (k * Vcb->superblock.node_size), false); 1152 if (!NT_SUCCESS(Status)) { 1153 ERR("sync_read_phys returned %08lx\n", Status); 1154 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1155 num_errors++; 1156 error_stripe = k; 1157 1158 if (num_errors > 1) { 1159 failed = true; 1160 break; 1161 } 1162 } 1163 } else { 1164 num_errors++; 1165 error_stripe = k; 1166 1167 if (num_errors > 1) { 1168 failed = true; 1169 break; 1170 } 1171 } 1172 } 1173 1174 j = (j + 1) % ci->num_stripes; 1175 } 1176 1177 if (!failed) { 1178 if (num_errors == 0) { 1179 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size)); 1180 1181 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), 1182 Vcb->superblock.node_size); 1183 1184 for (j = 0; j < ci->num_stripes - 2; j++) { 1185 if (j != stripe) 1186 do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size); 1187 } 1188 1189 if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) { 1190 RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1191 1192 if (devices[physstripe] && devices[physstripe]->devobj) 1193 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id); 1194 1195 recovered = true; 1196 1197 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1198 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1199 sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1200 if (!NT_SUCCESS(Status)) { 1201 WARN("write_data_phys returned %08lx\n", Status); 1202 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1203 } 1204 } 1205 } 1206 } 1207 1208 if (!recovered) { 1209 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size)); 1210 bool read_q = false; 1211 1212 if (devices[parity2] && devices[parity2]->devobj) { 1213 Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off, 1214 Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false); 1215 if (!NT_SUCCESS(Status)) { 1216 ERR("sync_read_phys returned %08lx\n", Status); 1217 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1218 } else 1219 read_q = true; 1220 } 1221 1222 if (read_q) { 1223 if (num_errors == 1) { 1224 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1225 1226 if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) 1227 recovered = true; 1228 } else { 1229 for (j = 0; j < ci->num_stripes - 1; j++) { 1230 if (j != stripe) { 1231 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1232 1233 if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) { 1234 recovered = true; 1235 error_stripe = j; 1236 break; 1237 } 1238 } 1239 } 1240 } 1241 } 1242 1243 if (recovered) { 1244 uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1245 1246 if (devices[physstripe] && devices[physstripe]->devobj) 1247 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id); 1248 1249 RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1250 1251 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1252 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1253 sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1254 if (!NT_SUCCESS(Status)) { 1255 WARN("write_data_phys returned %08lx\n", Status); 1256 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1257 } 1258 } 1259 1260 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1261 if (error_stripe == ci->num_stripes - 2) { 1262 ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id); 1263 1264 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1265 1266 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1267 1268 for (j = 0; j < ci->num_stripes - 2; j++) { 1269 if (j == stripe) { 1270 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size), 1271 Vcb->superblock.node_size); 1272 } else { 1273 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), 1274 Vcb->superblock.node_size); 1275 } 1276 } 1277 } else { 1278 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length), 1279 devices[error_stripe_phys]->devitem.dev_id); 1280 1281 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1282 1283 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size), 1284 sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1285 } 1286 } 1287 1288 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1289 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off, 1290 sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1291 if (!NT_SUCCESS(Status)) { 1292 WARN("write_data_phys returned %08lx\n", Status); 1293 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1294 } 1295 } 1296 } 1297 } 1298 } 1299 1300 if (!recovered) { 1301 ERR("unrecoverable checksum error at %I64x\n", addr); 1302 ExFreePool(sector); 1303 return STATUS_CRC_ERROR; 1304 } 1305 1306 ExFreePool(sector); 1307 } else { 1308 ULONG sectors = length / Vcb->superblock.sector_size; 1309 uint8_t* sector; 1310 void* ptr = context->csum; 1311 1312 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG); 1313 if (!sector) { 1314 ERR("out of memory\n"); 1315 return STATUS_INSUFFICIENT_RESOURCES; 1316 } 1317 1318 for (i = 0; i < sectors; i++) { 1319 uint64_t off; 1320 uint16_t physstripe, parity1, parity2; 1321 1322 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 1323 ci->num_stripes - 2, &off, &stripe); 1324 1325 parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1326 parity2 = (parity1 + 1) % ci->num_stripes; 1327 1328 physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1329 1330 if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && !check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr))) { 1331 uint16_t k, error_stripe; 1332 bool recovered = false, failed = false; 1333 ULONG num_errors = 0; 1334 1335 if (devices[physstripe] && devices[physstripe]->devobj) 1336 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS); 1337 1338 j = (parity2 + 1) % ci->num_stripes; 1339 1340 for (k = 0; k < ci->num_stripes - 1; k++) { 1341 if (j != physstripe) { 1342 if (devices[j] && devices[j]->devobj) { 1343 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, 1344 sector + (k * Vcb->superblock.sector_size), false); 1345 if (!NT_SUCCESS(Status)) { 1346 ERR("sync_read_phys returned %08lx\n", Status); 1347 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1348 num_errors++; 1349 error_stripe = k; 1350 1351 if (num_errors > 1) { 1352 failed = true; 1353 break; 1354 } 1355 } 1356 } else { 1357 num_errors++; 1358 error_stripe = k; 1359 1360 if (num_errors > 1) { 1361 failed = true; 1362 break; 1363 } 1364 } 1365 } 1366 1367 j = (j + 1) % ci->num_stripes; 1368 } 1369 1370 if (!failed) { 1371 if (num_errors == 0) { 1372 RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1373 1374 for (j = 0; j < ci->num_stripes - 2; j++) { 1375 if (j != stripe) 1376 do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1377 } 1378 1379 if (!ptr || check_sector_csum(Vcb, sector + (stripe * Vcb->superblock.sector_size), ptr)) { 1380 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1381 1382 if (devices[physstripe] && devices[physstripe]->devobj) 1383 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), 1384 devices[physstripe]->devitem.dev_id); 1385 1386 recovered = true; 1387 1388 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1389 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1390 sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1391 if (!NT_SUCCESS(Status)) { 1392 WARN("write_data_phys returned %08lx\n", Status); 1393 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1394 } 1395 } 1396 } 1397 } 1398 1399 if (!recovered) { 1400 bool read_q = false; 1401 1402 if (devices[parity2] && devices[parity2]->devobj) { 1403 Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off, 1404 Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), false); 1405 if (!NT_SUCCESS(Status)) { 1406 ERR("sync_read_phys returned %08lx\n", Status); 1407 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS); 1408 } else 1409 read_q = true; 1410 } 1411 1412 if (read_q) { 1413 if (num_errors == 1) { 1414 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size)); 1415 1416 if (!devices[physstripe] || !devices[physstripe]->devobj) 1417 recovered = true; 1418 else 1419 recovered = check_sector_csum(Vcb, sector + (ci->num_stripes * Vcb->superblock.sector_size), ptr); 1420 } else { 1421 for (j = 0; j < ci->num_stripes - 1; j++) { 1422 if (j != stripe) { 1423 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size)); 1424 1425 if (check_sector_csum(Vcb, sector + (ci->num_stripes * Vcb->superblock.sector_size), ptr)) { 1426 recovered = true; 1427 error_stripe = j; 1428 break; 1429 } 1430 } 1431 } 1432 } 1433 } 1434 1435 if (recovered) { 1436 uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1437 1438 if (devices[physstripe] && devices[physstripe]->devobj) 1439 ERR("recovering from checksum error at %I64x, device %I64x\n", 1440 addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id); 1441 1442 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1443 1444 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1445 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1446 sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1447 if (!NT_SUCCESS(Status)) { 1448 WARN("write_data_phys returned %08lx\n", Status); 1449 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1450 } 1451 } 1452 1453 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1454 if (error_stripe == ci->num_stripes - 2) { 1455 ERR("recovering from parity error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), 1456 devices[error_stripe_phys]->devitem.dev_id); 1457 1458 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1459 1460 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1461 1462 for (j = 0; j < ci->num_stripes - 2; j++) { 1463 if (j == stripe) { 1464 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), 1465 Vcb->superblock.sector_size); 1466 } else { 1467 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), 1468 Vcb->superblock.sector_size); 1469 } 1470 } 1471 } else { 1472 ERR("recovering from checksum error at %I64x, device %I64x\n", 1473 addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length), 1474 devices[error_stripe_phys]->devitem.dev_id); 1475 1476 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1477 1478 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size), 1479 sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1480 } 1481 } 1482 1483 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1484 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off, 1485 sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1486 if (!NT_SUCCESS(Status)) { 1487 WARN("write_data_phys returned %08lx\n", Status); 1488 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1489 } 1490 } 1491 } 1492 } 1493 } 1494 1495 if (!recovered) { 1496 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 1497 ExFreePool(sector); 1498 return STATUS_CRC_ERROR; 1499 } 1500 } 1501 1502 if (ptr) 1503 ptr = (uint8_t*)ptr + Vcb->csum_size; 1504 } 1505 1506 ExFreePool(sector); 1507 } 1508 1509 return STATUS_SUCCESS; 1510 } 1511 1512 NTSTATUS read_data(_In_ device_extension* Vcb, _In_ uint64_t addr, _In_ uint32_t length, _In_reads_bytes_opt_(length*sizeof(uint32_t)/Vcb->superblock.sector_size) void* csum, 1513 _In_ bool is_tree, _Out_writes_bytes_(length) uint8_t* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ uint64_t generation, _In_ bool file_read, 1514 _In_ ULONG priority) { 1515 CHUNK_ITEM* ci; 1516 CHUNK_ITEM_STRIPE* cis; 1517 read_data_context context; 1518 uint64_t type, offset, total_reading = 0; 1519 NTSTATUS Status; 1520 device** devices = NULL; 1521 uint16_t i, startoffstripe, allowed_missing, missing_devices = 0; 1522 uint8_t* dummypage = NULL; 1523 PMDL dummy_mdl = NULL; 1524 bool need_to_wait; 1525 uint64_t lockaddr, locklen; 1526 1527 if (Vcb->log_to_phys_loaded) { 1528 if (!c) { 1529 c = get_chunk_from_address(Vcb, addr); 1530 1531 if (!c) { 1532 ERR("get_chunk_from_address failed\n"); 1533 return STATUS_INTERNAL_ERROR; 1534 } 1535 } 1536 1537 ci = c->chunk_item; 1538 offset = c->offset; 1539 devices = c->devices; 1540 1541 if (pc) 1542 *pc = c; 1543 } else { 1544 LIST_ENTRY* le = Vcb->sys_chunks.Flink; 1545 1546 ci = NULL; 1547 1548 c = NULL; 1549 while (le != &Vcb->sys_chunks) { 1550 sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry); 1551 1552 if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) { 1553 CHUNK_ITEM* chunk_item = sc->data; 1554 1555 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) { 1556 ci = chunk_item; 1557 offset = sc->key.offset; 1558 cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1]; 1559 1560 devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG); 1561 if (!devices) { 1562 ERR("out of memory\n"); 1563 return STATUS_INSUFFICIENT_RESOURCES; 1564 } 1565 1566 for (i = 0; i < ci->num_stripes; i++) { 1567 devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid); 1568 } 1569 1570 break; 1571 } 1572 } 1573 1574 le = le->Flink; 1575 } 1576 1577 if (!ci) { 1578 ERR("could not find chunk for %I64x in bootstrap\n", addr); 1579 return STATUS_INTERNAL_ERROR; 1580 } 1581 1582 if (pc) 1583 *pc = NULL; 1584 } 1585 1586 if (ci->type & BLOCK_FLAG_DUPLICATE) { 1587 type = BLOCK_FLAG_DUPLICATE; 1588 allowed_missing = ci->num_stripes - 1; 1589 } else if (ci->type & BLOCK_FLAG_RAID0) { 1590 type = BLOCK_FLAG_RAID0; 1591 allowed_missing = 0; 1592 } else if (ci->type & BLOCK_FLAG_RAID1) { 1593 type = BLOCK_FLAG_DUPLICATE; 1594 allowed_missing = 1; 1595 } else if (ci->type & BLOCK_FLAG_RAID10) { 1596 type = BLOCK_FLAG_RAID10; 1597 allowed_missing = 1; 1598 } else if (ci->type & BLOCK_FLAG_RAID5) { 1599 type = BLOCK_FLAG_RAID5; 1600 allowed_missing = 1; 1601 } else if (ci->type & BLOCK_FLAG_RAID6) { 1602 type = BLOCK_FLAG_RAID6; 1603 allowed_missing = 2; 1604 } else if (ci->type & BLOCK_FLAG_RAID1C3) { 1605 type = BLOCK_FLAG_DUPLICATE; 1606 allowed_missing = 2; 1607 } else if (ci->type & BLOCK_FLAG_RAID1C4) { 1608 type = BLOCK_FLAG_DUPLICATE; 1609 allowed_missing = 3; 1610 } else { // SINGLE 1611 type = BLOCK_FLAG_DUPLICATE; 1612 allowed_missing = 0; 1613 } 1614 1615 cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 1616 1617 RtlZeroMemory(&context, sizeof(read_data_context)); 1618 KeInitializeEvent(&context.Event, NotificationEvent, false); 1619 1620 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG); 1621 if (!context.stripes) { 1622 ERR("out of memory\n"); 1623 return STATUS_INSUFFICIENT_RESOURCES; 1624 } 1625 1626 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) { 1627 get_raid56_lock_range(c, addr, length, &lockaddr, &locklen); 1628 chunk_lock_range(Vcb, c, lockaddr, locklen); 1629 } 1630 1631 RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes); 1632 1633 context.buflen = length; 1634 context.num_stripes = ci->num_stripes; 1635 context.stripes_left = context.num_stripes; 1636 context.sector_size = Vcb->superblock.sector_size; 1637 context.csum = csum; 1638 context.tree = is_tree; 1639 context.type = type; 1640 1641 if (type == BLOCK_FLAG_RAID0) { 1642 uint64_t startoff, endoff; 1643 uint16_t endoffstripe, stripe; 1644 uint32_t *stripeoff, pos; 1645 PMDL master_mdl; 1646 PFN_NUMBER* pfns; 1647 1648 // FIXME - test this still works if page size isn't the same as sector size 1649 1650 // This relies on the fact that MDLs are followed in memory by the page file numbers, 1651 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0 1652 // data for you without doing a memcpy yourself. 1653 // MDLs are officially opaque, so this might very well break in future versions of Windows. 1654 1655 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe); 1656 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe); 1657 1658 if (file_read) { 1659 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL 1660 // with duplicated dummy PFNs, which confuse check_csum. Ah well. 1661 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested. 1662 1663 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1664 1665 if (!context.va) { 1666 ERR("out of memory\n"); 1667 Status = STATUS_INSUFFICIENT_RESOURCES; 1668 goto exit; 1669 } 1670 } else 1671 context.va = buf; 1672 1673 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 1674 if (!master_mdl) { 1675 ERR("out of memory\n"); 1676 Status = STATUS_INSUFFICIENT_RESOURCES; 1677 goto exit; 1678 } 1679 1680 Status = STATUS_SUCCESS; 1681 1682 _SEH2_TRY { 1683 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1684 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1685 Status = _SEH2_GetExceptionCode(); 1686 } _SEH2_END; 1687 1688 if (!NT_SUCCESS(Status)) { 1689 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 1690 IoFreeMdl(master_mdl); 1691 goto exit; 1692 } 1693 1694 pfns = (PFN_NUMBER*)(master_mdl + 1); 1695 1696 for (i = 0; i < ci->num_stripes; i++) { 1697 if (startoffstripe > i) 1698 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1699 else if (startoffstripe == i) 1700 context.stripes[i].stripestart = startoff; 1701 else 1702 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length); 1703 1704 if (endoffstripe > i) 1705 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1706 else if (endoffstripe == i) 1707 context.stripes[i].stripeend = endoff + 1; 1708 else 1709 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length); 1710 1711 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 1712 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL); 1713 1714 if (!context.stripes[i].mdl) { 1715 ERR("IoAllocateMdl failed\n"); 1716 MmUnlockPages(master_mdl); 1717 IoFreeMdl(master_mdl); 1718 Status = STATUS_INSUFFICIENT_RESOURCES; 1719 goto exit; 1720 } 1721 } 1722 } 1723 1724 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG); 1725 if (!stripeoff) { 1726 ERR("out of memory\n"); 1727 MmUnlockPages(master_mdl); 1728 IoFreeMdl(master_mdl); 1729 Status = STATUS_INSUFFICIENT_RESOURCES; 1730 goto exit; 1731 } 1732 1733 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes); 1734 1735 pos = 0; 1736 stripe = startoffstripe; 1737 while (pos < length) { 1738 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 1739 1740 if (pos == 0) { 1741 uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)); 1742 1743 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1744 1745 stripeoff[stripe] += readlen; 1746 pos += readlen; 1747 } else if (length - pos < ci->stripe_length) { 1748 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1749 1750 pos = length; 1751 } else { 1752 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1753 1754 stripeoff[stripe] += (uint32_t)ci->stripe_length; 1755 pos += (uint32_t)ci->stripe_length; 1756 } 1757 1758 stripe = (stripe + 1) % ci->num_stripes; 1759 } 1760 1761 MmUnlockPages(master_mdl); 1762 IoFreeMdl(master_mdl); 1763 1764 ExFreePool(stripeoff); 1765 } else if (type == BLOCK_FLAG_RAID10) { 1766 uint64_t startoff, endoff; 1767 uint16_t endoffstripe, j, stripe; 1768 ULONG orig_ls; 1769 PMDL master_mdl; 1770 PFN_NUMBER* pfns; 1771 uint32_t* stripeoff, pos; 1772 read_data_stripe** stripes; 1773 1774 if (c) 1775 orig_ls = c->last_stripe; 1776 else 1777 orig_ls = 0; 1778 1779 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe); 1780 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe); 1781 1782 if ((ci->num_stripes % ci->sub_stripes) != 0) { 1783 ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes); 1784 Status = STATUS_INTERNAL_ERROR; 1785 goto exit; 1786 } 1787 1788 if (file_read) { 1789 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1790 1791 if (!context.va) { 1792 ERR("out of memory\n"); 1793 Status = STATUS_INSUFFICIENT_RESOURCES; 1794 goto exit; 1795 } 1796 } else 1797 context.va = buf; 1798 1799 context.firstoff = (uint16_t)((startoff % ci->stripe_length) / Vcb->superblock.sector_size); 1800 context.startoffstripe = startoffstripe; 1801 context.sectors_per_stripe = (uint16_t)(ci->stripe_length / Vcb->superblock.sector_size); 1802 1803 startoffstripe *= ci->sub_stripes; 1804 endoffstripe *= ci->sub_stripes; 1805 1806 if (c) 1807 c->last_stripe = (orig_ls + 1) % ci->sub_stripes; 1808 1809 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 1810 if (!master_mdl) { 1811 ERR("out of memory\n"); 1812 Status = STATUS_INSUFFICIENT_RESOURCES; 1813 goto exit; 1814 } 1815 1816 Status = STATUS_SUCCESS; 1817 1818 _SEH2_TRY { 1819 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1820 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1821 Status = _SEH2_GetExceptionCode(); 1822 } _SEH2_END; 1823 1824 if (!NT_SUCCESS(Status)) { 1825 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 1826 IoFreeMdl(master_mdl); 1827 goto exit; 1828 } 1829 1830 pfns = (PFN_NUMBER*)(master_mdl + 1); 1831 1832 stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1833 if (!stripes) { 1834 ERR("out of memory\n"); 1835 MmUnlockPages(master_mdl); 1836 IoFreeMdl(master_mdl); 1837 Status = STATUS_INSUFFICIENT_RESOURCES; 1838 goto exit; 1839 } 1840 1841 RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes); 1842 1843 for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { 1844 uint64_t sstart, send; 1845 bool stripeset = false; 1846 1847 if (startoffstripe > i) 1848 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1849 else if (startoffstripe == i) 1850 sstart = startoff; 1851 else 1852 sstart = startoff - (startoff % ci->stripe_length); 1853 1854 if (endoffstripe > i) 1855 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1856 else if (endoffstripe == i) 1857 send = endoff + 1; 1858 else 1859 send = endoff - (endoff % ci->stripe_length); 1860 1861 for (j = 0; j < ci->sub_stripes; j++) { 1862 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) { 1863 context.stripes[i+j].stripestart = sstart; 1864 context.stripes[i+j].stripeend = send; 1865 stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1866 1867 if (sstart != send) { 1868 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL); 1869 1870 if (!context.stripes[i+j].mdl) { 1871 ERR("IoAllocateMdl failed\n"); 1872 MmUnlockPages(master_mdl); 1873 IoFreeMdl(master_mdl); 1874 Status = STATUS_INSUFFICIENT_RESOURCES; 1875 goto exit; 1876 } 1877 } 1878 1879 stripeset = true; 1880 } else 1881 context.stripes[i+j].status = ReadDataStatus_Skip; 1882 } 1883 1884 if (!stripeset) { 1885 for (j = 0; j < ci->sub_stripes; j++) { 1886 if (devices[i+j] && devices[i+j]->devobj) { 1887 context.stripes[i+j].stripestart = sstart; 1888 context.stripes[i+j].stripeend = send; 1889 context.stripes[i+j].status = ReadDataStatus_Pending; 1890 stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1891 1892 if (sstart != send) { 1893 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL); 1894 1895 if (!context.stripes[i+j].mdl) { 1896 ERR("IoAllocateMdl failed\n"); 1897 MmUnlockPages(master_mdl); 1898 IoFreeMdl(master_mdl); 1899 Status = STATUS_INSUFFICIENT_RESOURCES; 1900 goto exit; 1901 } 1902 } 1903 1904 stripeset = true; 1905 break; 1906 } 1907 } 1908 1909 if (!stripeset) { 1910 ERR("could not find stripe to read\n"); 1911 Status = STATUS_DEVICE_NOT_READY; 1912 goto exit; 1913 } 1914 } 1915 } 1916 1917 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1918 if (!stripeoff) { 1919 ERR("out of memory\n"); 1920 MmUnlockPages(master_mdl); 1921 IoFreeMdl(master_mdl); 1922 Status = STATUS_INSUFFICIENT_RESOURCES; 1923 goto exit; 1924 } 1925 1926 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes); 1927 1928 pos = 0; 1929 stripe = startoffstripe / ci->sub_stripes; 1930 while (pos < length) { 1931 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1); 1932 1933 if (pos == 0) { 1934 uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart, 1935 ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length)); 1936 1937 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1938 1939 stripeoff[stripe] += readlen; 1940 pos += readlen; 1941 } else if (length - pos < ci->stripe_length) { 1942 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1943 1944 pos = length; 1945 } else { 1946 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1947 1948 stripeoff[stripe] += (ULONG)ci->stripe_length; 1949 pos += (ULONG)ci->stripe_length; 1950 } 1951 1952 stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); 1953 } 1954 1955 MmUnlockPages(master_mdl); 1956 IoFreeMdl(master_mdl); 1957 1958 ExFreePool(stripeoff); 1959 ExFreePool(stripes); 1960 } else if (type == BLOCK_FLAG_DUPLICATE) { 1961 uint64_t orig_ls; 1962 1963 if (c) 1964 orig_ls = i = c->last_stripe; 1965 else 1966 orig_ls = i = 0; 1967 1968 while (!devices[i] || !devices[i]->devobj) { 1969 i = (i + 1) % ci->num_stripes; 1970 1971 if (i == orig_ls) { 1972 ERR("no devices available to service request\n"); 1973 Status = STATUS_DEVICE_NOT_READY; 1974 goto exit; 1975 } 1976 } 1977 1978 if (c) 1979 c->last_stripe = (i + 1) % ci->num_stripes; 1980 1981 context.stripes[i].stripestart = addr - offset; 1982 context.stripes[i].stripeend = context.stripes[i].stripestart + length; 1983 1984 if (file_read) { 1985 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1986 1987 if (!context.va) { 1988 ERR("out of memory\n"); 1989 Status = STATUS_INSUFFICIENT_RESOURCES; 1990 goto exit; 1991 } 1992 1993 context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL); 1994 if (!context.stripes[i].mdl) { 1995 ERR("IoAllocateMdl failed\n"); 1996 Status = STATUS_INSUFFICIENT_RESOURCES; 1997 goto exit; 1998 } 1999 2000 MmBuildMdlForNonPagedPool(context.stripes[i].mdl); 2001 } else { 2002 context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL); 2003 2004 if (!context.stripes[i].mdl) { 2005 ERR("IoAllocateMdl failed\n"); 2006 Status = STATUS_INSUFFICIENT_RESOURCES; 2007 goto exit; 2008 } 2009 2010 Status = STATUS_SUCCESS; 2011 2012 _SEH2_TRY { 2013 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess); 2014 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2015 Status = _SEH2_GetExceptionCode(); 2016 } _SEH2_END; 2017 2018 if (!NT_SUCCESS(Status)) { 2019 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 2020 goto exit; 2021 } 2022 } 2023 } else if (type == BLOCK_FLAG_RAID5) { 2024 uint64_t startoff, endoff; 2025 uint16_t endoffstripe, parity; 2026 uint32_t *stripeoff, pos; 2027 PMDL master_mdl; 2028 PFN_NUMBER *pfns, dummy; 2029 bool need_dummy = false; 2030 2031 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe); 2032 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe); 2033 2034 if (file_read) { 2035 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 2036 2037 if (!context.va) { 2038 ERR("out of memory\n"); 2039 Status = STATUS_INSUFFICIENT_RESOURCES; 2040 goto exit; 2041 } 2042 } else 2043 context.va = buf; 2044 2045 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 2046 if (!master_mdl) { 2047 ERR("out of memory\n"); 2048 Status = STATUS_INSUFFICIENT_RESOURCES; 2049 goto exit; 2050 } 2051 2052 Status = STATUS_SUCCESS; 2053 2054 _SEH2_TRY { 2055 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 2056 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2057 Status = _SEH2_GetExceptionCode(); 2058 } _SEH2_END; 2059 2060 if (!NT_SUCCESS(Status)) { 2061 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 2062 IoFreeMdl(master_mdl); 2063 goto exit; 2064 } 2065 2066 pfns = (PFN_NUMBER*)(master_mdl + 1); 2067 2068 pos = 0; 2069 while (pos < length) { 2070 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 2071 2072 if (pos == 0) { 2073 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes; 2074 ULONG skip, readlen; 2075 2076 i = startoffstripe; 2077 while (stripe != parity) { 2078 if (i == startoffstripe) { 2079 readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length))); 2080 2081 context.stripes[stripe].stripestart = startoff; 2082 context.stripes[stripe].stripeend = startoff + readlen; 2083 2084 pos += readlen; 2085 2086 if (pos == length) 2087 break; 2088 } else { 2089 readlen = min(length - pos, (ULONG)ci->stripe_length); 2090 2091 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2092 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2093 2094 pos += readlen; 2095 2096 if (pos == length) 2097 break; 2098 } 2099 2100 i++; 2101 stripe = (stripe + 1) % ci->num_stripes; 2102 } 2103 2104 if (pos == length) 2105 break; 2106 2107 for (i = 0; i < startoffstripe; i++) { 2108 uint16_t stripe2 = (parity + i + 1) % ci->num_stripes; 2109 2110 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2111 } 2112 2113 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2114 2115 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) { 2116 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1); 2117 2118 for (i = 0; i < ci->num_stripes; i++) { 2119 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2120 } 2121 2122 pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length); 2123 need_dummy = true; 2124 } 2125 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2126 for (i = 0; i < ci->num_stripes; i++) { 2127 context.stripes[i].stripeend += ci->stripe_length; 2128 } 2129 2130 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1)); 2131 need_dummy = true; 2132 } else { 2133 uint16_t stripe = (parity + 1) % ci->num_stripes; 2134 2135 i = 0; 2136 while (stripe != parity) { 2137 if (endoffstripe == i) { 2138 context.stripes[stripe].stripeend = endoff + 1; 2139 break; 2140 } else if (endoffstripe > i) 2141 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2142 2143 i++; 2144 stripe = (stripe + 1) % ci->num_stripes; 2145 } 2146 2147 break; 2148 } 2149 } 2150 2151 for (i = 0; i < ci->num_stripes; i++) { 2152 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2153 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), 2154 false, false, NULL); 2155 2156 if (!context.stripes[i].mdl) { 2157 ERR("IoAllocateMdl failed\n"); 2158 MmUnlockPages(master_mdl); 2159 IoFreeMdl(master_mdl); 2160 Status = STATUS_INSUFFICIENT_RESOURCES; 2161 goto exit; 2162 } 2163 } 2164 } 2165 2166 if (need_dummy) { 2167 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2168 if (!dummypage) { 2169 ERR("out of memory\n"); 2170 MmUnlockPages(master_mdl); 2171 IoFreeMdl(master_mdl); 2172 Status = STATUS_INSUFFICIENT_RESOURCES; 2173 goto exit; 2174 } 2175 2176 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL); 2177 if (!dummy_mdl) { 2178 ERR("IoAllocateMdl failed\n"); 2179 MmUnlockPages(master_mdl); 2180 IoFreeMdl(master_mdl); 2181 Status = STATUS_INSUFFICIENT_RESOURCES; 2182 goto exit; 2183 } 2184 2185 MmBuildMdlForNonPagedPool(dummy_mdl); 2186 2187 dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2188 } 2189 2190 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG); 2191 if (!stripeoff) { 2192 ERR("out of memory\n"); 2193 MmUnlockPages(master_mdl); 2194 IoFreeMdl(master_mdl); 2195 Status = STATUS_INSUFFICIENT_RESOURCES; 2196 goto exit; 2197 } 2198 2199 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes); 2200 2201 pos = 0; 2202 2203 while (pos < length) { 2204 PFN_NUMBER* stripe_pfns; 2205 2206 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 2207 2208 if (pos == 0) { 2209 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes; 2210 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2211 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2212 2213 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2214 2215 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2216 2217 stripeoff[stripe] = readlen; 2218 pos += readlen; 2219 2220 stripe = (stripe + 1) % ci->num_stripes; 2221 2222 while (stripe != parity) { 2223 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2224 readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2225 2226 if (readlen == 0) 2227 break; 2228 2229 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2230 2231 stripeoff[stripe] = readlen; 2232 pos += readlen; 2233 2234 stripe = (stripe + 1) % ci->num_stripes; 2235 } 2236 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2237 uint16_t stripe = (parity + 1) % ci->num_stripes; 2238 ULONG k; 2239 2240 while (stripe != parity) { 2241 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2242 2243 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2244 2245 stripeoff[stripe] += (uint32_t)ci->stripe_length; 2246 pos += (uint32_t)ci->stripe_length; 2247 2248 stripe = (stripe + 1) % ci->num_stripes; 2249 } 2250 2251 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1); 2252 2253 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2254 stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy; 2255 stripeoff[parity] += PAGE_SIZE; 2256 } 2257 } else { 2258 uint16_t stripe = (parity + 1) % ci->num_stripes; 2259 uint32_t readlen; 2260 2261 while (pos < length) { 2262 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2263 readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2264 2265 if (readlen == 0) 2266 break; 2267 2268 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2269 2270 stripeoff[stripe] += readlen; 2271 pos += readlen; 2272 2273 stripe = (stripe + 1) % ci->num_stripes; 2274 } 2275 } 2276 } 2277 2278 MmUnlockPages(master_mdl); 2279 IoFreeMdl(master_mdl); 2280 2281 ExFreePool(stripeoff); 2282 } else if (type == BLOCK_FLAG_RAID6) { 2283 uint64_t startoff, endoff; 2284 uint16_t endoffstripe, parity1; 2285 uint32_t *stripeoff, pos; 2286 PMDL master_mdl; 2287 PFN_NUMBER *pfns, dummy; 2288 bool need_dummy = false; 2289 2290 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe); 2291 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe); 2292 2293 if (file_read) { 2294 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 2295 2296 if (!context.va) { 2297 ERR("out of memory\n"); 2298 Status = STATUS_INSUFFICIENT_RESOURCES; 2299 goto exit; 2300 } 2301 } else 2302 context.va = buf; 2303 2304 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 2305 if (!master_mdl) { 2306 ERR("out of memory\n"); 2307 Status = STATUS_INSUFFICIENT_RESOURCES; 2308 goto exit; 2309 } 2310 2311 Status = STATUS_SUCCESS; 2312 2313 _SEH2_TRY { 2314 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 2315 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2316 Status = _SEH2_GetExceptionCode(); 2317 } _SEH2_END; 2318 2319 if (!NT_SUCCESS(Status)) { 2320 ERR("MmProbeAndLockPages threw exception %08lx\n", Status); 2321 IoFreeMdl(master_mdl); 2322 goto exit; 2323 } 2324 2325 pfns = (PFN_NUMBER*)(master_mdl + 1); 2326 2327 pos = 0; 2328 while (pos < length) { 2329 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2330 2331 if (pos == 0) { 2332 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2; 2333 ULONG skip, readlen; 2334 2335 i = startoffstripe; 2336 while (stripe != parity1) { 2337 if (i == startoffstripe) { 2338 readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length)); 2339 2340 context.stripes[stripe].stripestart = startoff; 2341 context.stripes[stripe].stripeend = startoff + readlen; 2342 2343 pos += readlen; 2344 2345 if (pos == length) 2346 break; 2347 } else { 2348 readlen = min(length - pos, (ULONG)ci->stripe_length); 2349 2350 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2351 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2352 2353 pos += readlen; 2354 2355 if (pos == length) 2356 break; 2357 } 2358 2359 i++; 2360 stripe = (stripe + 1) % ci->num_stripes; 2361 } 2362 2363 if (pos == length) 2364 break; 2365 2366 for (i = 0; i < startoffstripe; i++) { 2367 uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes; 2368 2369 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2370 } 2371 2372 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2373 2374 parity2 = (parity1 + 1) % ci->num_stripes; 2375 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2376 2377 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) { 2378 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1); 2379 2380 for (i = 0; i < ci->num_stripes; i++) { 2381 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2382 } 2383 2384 pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length); 2385 need_dummy = true; 2386 } 2387 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2388 for (i = 0; i < ci->num_stripes; i++) { 2389 context.stripes[i].stripeend += ci->stripe_length; 2390 } 2391 2392 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2)); 2393 need_dummy = true; 2394 } else { 2395 uint16_t stripe = (parity1 + 2) % ci->num_stripes; 2396 2397 i = 0; 2398 while (stripe != parity1) { 2399 if (endoffstripe == i) { 2400 context.stripes[stripe].stripeend = endoff + 1; 2401 break; 2402 } else if (endoffstripe > i) 2403 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2404 2405 i++; 2406 stripe = (stripe + 1) % ci->num_stripes; 2407 } 2408 2409 break; 2410 } 2411 } 2412 2413 for (i = 0; i < ci->num_stripes; i++) { 2414 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2415 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL); 2416 2417 if (!context.stripes[i].mdl) { 2418 ERR("IoAllocateMdl failed\n"); 2419 MmUnlockPages(master_mdl); 2420 IoFreeMdl(master_mdl); 2421 Status = STATUS_INSUFFICIENT_RESOURCES; 2422 goto exit; 2423 } 2424 } 2425 } 2426 2427 if (need_dummy) { 2428 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2429 if (!dummypage) { 2430 ERR("out of memory\n"); 2431 MmUnlockPages(master_mdl); 2432 IoFreeMdl(master_mdl); 2433 Status = STATUS_INSUFFICIENT_RESOURCES; 2434 goto exit; 2435 } 2436 2437 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL); 2438 if (!dummy_mdl) { 2439 ERR("IoAllocateMdl failed\n"); 2440 MmUnlockPages(master_mdl); 2441 IoFreeMdl(master_mdl); 2442 Status = STATUS_INSUFFICIENT_RESOURCES; 2443 goto exit; 2444 } 2445 2446 MmBuildMdlForNonPagedPool(dummy_mdl); 2447 2448 dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2449 } 2450 2451 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG); 2452 if (!stripeoff) { 2453 ERR("out of memory\n"); 2454 MmUnlockPages(master_mdl); 2455 IoFreeMdl(master_mdl); 2456 Status = STATUS_INSUFFICIENT_RESOURCES; 2457 goto exit; 2458 } 2459 2460 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes); 2461 2462 pos = 0; 2463 2464 while (pos < length) { 2465 PFN_NUMBER* stripe_pfns; 2466 2467 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2468 2469 if (pos == 0) { 2470 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes; 2471 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2472 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2473 2474 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2475 2476 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2477 2478 stripeoff[stripe] = readlen; 2479 pos += readlen; 2480 2481 stripe = (stripe + 1) % ci->num_stripes; 2482 2483 while (stripe != parity1) { 2484 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2485 readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2486 2487 if (readlen == 0) 2488 break; 2489 2490 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2491 2492 stripeoff[stripe] = readlen; 2493 pos += readlen; 2494 2495 stripe = (stripe + 1) % ci->num_stripes; 2496 } 2497 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2498 uint16_t stripe = (parity1 + 2) % ci->num_stripes; 2499 uint16_t parity2 = (parity1 + 1) % ci->num_stripes; 2500 ULONG k; 2501 2502 while (stripe != parity1) { 2503 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2504 2505 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2506 2507 stripeoff[stripe] += (uint32_t)ci->stripe_length; 2508 pos += (uint32_t)ci->stripe_length; 2509 2510 stripe = (stripe + 1) % ci->num_stripes; 2511 } 2512 2513 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1); 2514 2515 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2516 stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy; 2517 stripeoff[parity1] += PAGE_SIZE; 2518 } 2519 2520 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1); 2521 2522 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2523 stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy; 2524 stripeoff[parity2] += PAGE_SIZE; 2525 } 2526 } else { 2527 uint16_t stripe = (parity1 + 2) % ci->num_stripes; 2528 uint32_t readlen; 2529 2530 while (pos < length) { 2531 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2532 readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2533 2534 if (readlen == 0) 2535 break; 2536 2537 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2538 2539 stripeoff[stripe] += readlen; 2540 pos += readlen; 2541 2542 stripe = (stripe + 1) % ci->num_stripes; 2543 } 2544 } 2545 } 2546 2547 MmUnlockPages(master_mdl); 2548 IoFreeMdl(master_mdl); 2549 2550 ExFreePool(stripeoff); 2551 } 2552 2553 context.address = addr; 2554 2555 for (i = 0; i < ci->num_stripes; i++) { 2556 if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) { 2557 context.stripes[i].status = ReadDataStatus_MissingDevice; 2558 context.stripes_left--; 2559 2560 if (!devices[i] || !devices[i]->devobj) 2561 missing_devices++; 2562 } 2563 } 2564 2565 if (missing_devices > allowed_missing) { 2566 ERR("not enough devices to service request (%u missing)\n", missing_devices); 2567 Status = STATUS_UNEXPECTED_IO_ERROR; 2568 goto exit; 2569 } 2570 2571 for (i = 0; i < ci->num_stripes; i++) { 2572 PIO_STACK_LOCATION IrpSp; 2573 2574 if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) { 2575 context.stripes[i].context = (struct read_data_context*)&context; 2576 2577 if (type == BLOCK_FLAG_RAID10) { 2578 context.stripes[i].stripenum = i / ci->sub_stripes; 2579 } 2580 2581 if (!Irp) { 2582 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false); 2583 2584 if (!context.stripes[i].Irp) { 2585 ERR("IoAllocateIrp failed\n"); 2586 Status = STATUS_INSUFFICIENT_RESOURCES; 2587 goto exit; 2588 } 2589 } else { 2590 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); 2591 2592 if (!context.stripes[i].Irp) { 2593 ERR("IoMakeAssociatedIrp failed\n"); 2594 Status = STATUS_INSUFFICIENT_RESOURCES; 2595 goto exit; 2596 } 2597 } 2598 2599 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 2600 IrpSp->MajorFunction = IRP_MJ_READ; 2601 IrpSp->MinorFunction = IRP_MN_NORMAL; 2602 IrpSp->FileObject = devices[i]->fileobj; 2603 2604 if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { 2605 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG); 2606 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 2607 ERR("out of memory\n"); 2608 Status = STATUS_INSUFFICIENT_RESOURCES; 2609 goto exit; 2610 } 2611 2612 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 2613 2614 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2615 } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) 2616 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl; 2617 else 2618 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2619 2620 IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart); 2621 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset; 2622 2623 total_reading += IrpSp->Parameters.Read.Length; 2624 2625 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 2626 2627 IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true); 2628 2629 context.stripes[i].status = ReadDataStatus_Pending; 2630 } 2631 } 2632 2633 need_to_wait = false; 2634 for (i = 0; i < ci->num_stripes; i++) { 2635 if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) { 2636 IoCallDriver(devices[i]->devobj, context.stripes[i].Irp); 2637 need_to_wait = true; 2638 } 2639 } 2640 2641 if (need_to_wait) 2642 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 2643 2644 if (diskacc) 2645 fFsRtlUpdateDiskCounters(total_reading, 0); 2646 2647 // check if any of the devices return a "user-induced" error 2648 2649 for (i = 0; i < ci->num_stripes; i++) { 2650 if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) { 2651 Status = context.stripes[i].iosb.Status; 2652 goto exit; 2653 } 2654 } 2655 2656 if (type == BLOCK_FLAG_RAID0) { 2657 Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2658 if (!NT_SUCCESS(Status)) { 2659 ERR("read_data_raid0 returned %08lx\n", Status); 2660 2661 if (file_read) 2662 ExFreePool(context.va); 2663 2664 goto exit; 2665 } 2666 2667 if (file_read) { 2668 RtlCopyMemory(buf, context.va, length); 2669 ExFreePool(context.va); 2670 } 2671 } else if (type == BLOCK_FLAG_RAID10) { 2672 Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2673 2674 if (!NT_SUCCESS(Status)) { 2675 ERR("read_data_raid10 returned %08lx\n", Status); 2676 2677 if (file_read) 2678 ExFreePool(context.va); 2679 2680 goto exit; 2681 } 2682 2683 if (file_read) { 2684 RtlCopyMemory(buf, context.va, length); 2685 ExFreePool(context.va); 2686 } 2687 } else if (type == BLOCK_FLAG_DUPLICATE) { 2688 Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation); 2689 if (!NT_SUCCESS(Status)) { 2690 ERR("read_data_dup returned %08lx\n", Status); 2691 2692 if (file_read) 2693 ExFreePool(context.va); 2694 2695 goto exit; 2696 } 2697 2698 if (file_read) { 2699 RtlCopyMemory(buf, context.va, length); 2700 ExFreePool(context.va); 2701 } 2702 } else if (type == BLOCK_FLAG_RAID5) { 2703 Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false); 2704 if (!NT_SUCCESS(Status)) { 2705 ERR("read_data_raid5 returned %08lx\n", Status); 2706 2707 if (file_read) 2708 ExFreePool(context.va); 2709 2710 goto exit; 2711 } 2712 2713 if (file_read) { 2714 RtlCopyMemory(buf, context.va, length); 2715 ExFreePool(context.va); 2716 } 2717 } else if (type == BLOCK_FLAG_RAID6) { 2718 Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false); 2719 if (!NT_SUCCESS(Status)) { 2720 ERR("read_data_raid6 returned %08lx\n", Status); 2721 2722 if (file_read) 2723 ExFreePool(context.va); 2724 2725 goto exit; 2726 } 2727 2728 if (file_read) { 2729 RtlCopyMemory(buf, context.va, length); 2730 ExFreePool(context.va); 2731 } 2732 } 2733 2734 exit: 2735 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) 2736 chunk_unlock_range(Vcb, c, lockaddr, locklen); 2737 2738 if (dummy_mdl) 2739 IoFreeMdl(dummy_mdl); 2740 2741 if (dummypage) 2742 ExFreePool(dummypage); 2743 2744 for (i = 0; i < ci->num_stripes; i++) { 2745 if (context.stripes[i].mdl) { 2746 if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED) 2747 MmUnlockPages(context.stripes[i].mdl); 2748 2749 IoFreeMdl(context.stripes[i].mdl); 2750 } 2751 2752 if (context.stripes[i].Irp) 2753 IoFreeIrp(context.stripes[i].Irp); 2754 } 2755 2756 ExFreePool(context.stripes); 2757 2758 if (!Vcb->log_to_phys_loaded) 2759 ExFreePool(devices); 2760 2761 return Status; 2762 } 2763 2764 NTSTATUS read_stream(fcb* fcb, uint8_t* data, uint64_t start, ULONG length, ULONG* pbr) { 2765 ULONG readlen; 2766 2767 TRACE("(%p, %p, %I64x, %lx, %p)\n", fcb, data, start, length, pbr); 2768 2769 if (pbr) *pbr = 0; 2770 2771 if (start >= fcb->adsdata.Length) { 2772 TRACE("tried to read beyond end of stream\n"); 2773 return STATUS_END_OF_FILE; 2774 } 2775 2776 if (length == 0) { 2777 WARN("tried to read zero bytes\n"); 2778 return STATUS_SUCCESS; 2779 } 2780 2781 if (start + length < fcb->adsdata.Length) 2782 readlen = length; 2783 else 2784 readlen = fcb->adsdata.Length - (ULONG)start; 2785 2786 if (readlen > 0) 2787 RtlCopyMemory(data, fcb->adsdata.Buffer + start, readlen); 2788 2789 if (pbr) *pbr = readlen; 2790 2791 return STATUS_SUCCESS; 2792 } 2793 2794 typedef struct { 2795 uint64_t off; 2796 uint64_t ed_size; 2797 uint64_t ed_offset; 2798 uint64_t ed_num_bytes; 2799 } read_part_extent; 2800 2801 typedef struct { 2802 LIST_ENTRY list_entry; 2803 uint64_t addr; 2804 chunk* c; 2805 uint32_t read; 2806 uint32_t to_read; 2807 void* csum; 2808 bool csum_free; 2809 uint8_t* buf; 2810 bool buf_free; 2811 uint32_t bumpoff; 2812 bool mdl; 2813 void* data; 2814 uint8_t compression; 2815 unsigned int num_extents; 2816 read_part_extent extents[1]; 2817 } read_part; 2818 2819 typedef struct { 2820 LIST_ENTRY list_entry; 2821 calc_job* cj; 2822 void* decomp; 2823 void* data; 2824 unsigned int offset; 2825 size_t length; 2826 } comp_calc_job; 2827 2828 NTSTATUS read_file(fcb* fcb, uint8_t* data, uint64_t start, uint64_t length, ULONG* pbr, PIRP Irp) { 2829 NTSTATUS Status; 2830 uint32_t bytes_read = 0; 2831 uint64_t last_end; 2832 LIST_ENTRY* le; 2833 POOL_TYPE pool_type; 2834 LIST_ENTRY read_parts, calc_jobs; 2835 2836 TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr); 2837 2838 if (pbr) 2839 *pbr = 0; 2840 2841 if (start >= fcb->inode_item.st_size) { 2842 WARN("Tried to read beyond end of file\n"); 2843 return STATUS_END_OF_FILE; 2844 } 2845 2846 InitializeListHead(&read_parts); 2847 InitializeListHead(&calc_jobs); 2848 2849 pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool; 2850 2851 le = fcb->extents.Flink; 2852 2853 last_end = start; 2854 2855 while (le != &fcb->extents) { 2856 uint64_t len; 2857 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 2858 2859 if (!ext->ignore) { 2860 EXTENT_DATA* ed = &ext->extent_data; 2861 EXTENT_DATA2* ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL; 2862 2863 len = ed2 ? ed2->num_bytes : ed->decoded_size; 2864 2865 if (ext->offset + len <= start) { 2866 last_end = ext->offset + len; 2867 goto nextitem; 2868 } 2869 2870 if (ext->offset > last_end && ext->offset > start + bytes_read) { 2871 uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end)); 2872 2873 RtlZeroMemory(data + bytes_read, read); 2874 bytes_read += read; 2875 length -= read; 2876 } 2877 2878 if (length == 0 || ext->offset > start + bytes_read + length) 2879 break; 2880 2881 if (ed->encryption != BTRFS_ENCRYPTION_NONE) { 2882 WARN("Encryption not supported\n"); 2883 Status = STATUS_NOT_IMPLEMENTED; 2884 goto exit; 2885 } 2886 2887 if (ed->encoding != BTRFS_ENCODING_NONE) { 2888 WARN("Other encodings not supported\n"); 2889 Status = STATUS_NOT_IMPLEMENTED; 2890 goto exit; 2891 } 2892 2893 switch (ed->type) { 2894 case EXTENT_TYPE_INLINE: 2895 { 2896 uint64_t off = start + bytes_read - ext->offset; 2897 uint32_t read; 2898 2899 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2900 read = (uint32_t)min(min(len, ext->datalen) - off, length); 2901 2902 RtlCopyMemory(data + bytes_read, &ed->data[off], read); 2903 } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) { 2904 uint8_t* decomp; 2905 bool decomp_alloc; 2906 uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]); 2907 2908 if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) { 2909 ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size); 2910 Status = STATUS_INTERNAL_ERROR; 2911 goto exit; 2912 } 2913 2914 read = (uint32_t)min(ed->decoded_size - off, length); 2915 2916 if (off > 0) { 2917 decomp = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)ed->decoded_size, ALLOC_TAG); 2918 if (!decomp) { 2919 ERR("out of memory\n"); 2920 Status = STATUS_INSUFFICIENT_RESOURCES; 2921 goto exit; 2922 } 2923 2924 decomp_alloc = true; 2925 } else { 2926 decomp = data + bytes_read; 2927 decomp_alloc = false; 2928 } 2929 2930 if (ed->compression == BTRFS_COMPRESSION_ZLIB) { 2931 Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off)); 2932 if (!NT_SUCCESS(Status)) { 2933 ERR("zlib_decompress returned %08lx\n", Status); 2934 if (decomp_alloc) ExFreePool(decomp); 2935 goto exit; 2936 } 2937 } else if (ed->compression == BTRFS_COMPRESSION_LZO) { 2938 if (inlen < sizeof(uint32_t)) { 2939 ERR("extent data was truncated\n"); 2940 Status = STATUS_INTERNAL_ERROR; 2941 if (decomp_alloc) ExFreePool(decomp); 2942 goto exit; 2943 } else 2944 inlen -= sizeof(uint32_t); 2945 2946 Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t)); 2947 if (!NT_SUCCESS(Status)) { 2948 ERR("lzo_decompress returned %08lx\n", Status); 2949 if (decomp_alloc) ExFreePool(decomp); 2950 goto exit; 2951 } 2952 } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) { 2953 Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off)); 2954 if (!NT_SUCCESS(Status)) { 2955 ERR("zstd_decompress returned %08lx\n", Status); 2956 if (decomp_alloc) ExFreePool(decomp); 2957 goto exit; 2958 } 2959 } 2960 2961 if (decomp_alloc) { 2962 RtlCopyMemory(data + bytes_read, decomp + off, read); 2963 ExFreePool(decomp); 2964 } 2965 } else { 2966 ERR("unhandled compression type %x\n", ed->compression); 2967 Status = STATUS_NOT_IMPLEMENTED; 2968 goto exit; 2969 } 2970 2971 bytes_read += read; 2972 length -= read; 2973 2974 break; 2975 } 2976 2977 case EXTENT_TYPE_REGULAR: 2978 { 2979 read_part* rp; 2980 2981 rp = ExAllocatePoolWithTag(pool_type, sizeof(read_part), ALLOC_TAG); 2982 if (!rp) { 2983 ERR("out of memory\n"); 2984 Status = STATUS_INSUFFICIENT_RESOURCES; 2985 goto exit; 2986 } 2987 2988 rp->mdl = (Irp && Irp->MdlAddress) ? true : false; 2989 rp->extents[0].off = start + bytes_read - ext->offset; 2990 rp->bumpoff = 0; 2991 rp->num_extents = 1; 2992 rp->csum_free = false; 2993 2994 rp->read = (uint32_t)(len - rp->extents[0].off); 2995 if (rp->read > length) rp->read = (uint32_t)length; 2996 2997 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2998 rp->addr = ed2->address + ed2->offset + rp->extents[0].off; 2999 rp->to_read = (uint32_t)sector_align(rp->read, fcb->Vcb->superblock.sector_size); 3000 3001 if (rp->addr % fcb->Vcb->superblock.sector_size > 0) { 3002 rp->bumpoff = rp->addr % fcb->Vcb->superblock.sector_size; 3003 rp->addr -= rp->bumpoff; 3004 rp->to_read = (uint32_t)sector_align(rp->read + rp->bumpoff, fcb->Vcb->superblock.sector_size); 3005 } 3006 } else { 3007 rp->addr = ed2->address; 3008 rp->to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size); 3009 } 3010 3011 if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 && 3012 length % fcb->Vcb->superblock.sector_size == 0) { 3013 rp->buf = data + bytes_read; 3014 rp->buf_free = false; 3015 } else { 3016 rp->buf = ExAllocatePoolWithTag(pool_type, rp->to_read, ALLOC_TAG); 3017 rp->buf_free = true; 3018 3019 if (!rp->buf) { 3020 ERR("out of memory\n"); 3021 Status = STATUS_INSUFFICIENT_RESOURCES; 3022 ExFreePool(rp); 3023 goto exit; 3024 } 3025 3026 rp->mdl = false; 3027 } 3028 3029 rp->c = get_chunk_from_address(fcb->Vcb, rp->addr); 3030 3031 if (!rp->c) { 3032 ERR("get_chunk_from_address(%I64x) failed\n", rp->addr); 3033 3034 if (rp->buf_free) 3035 ExFreePool(rp->buf); 3036 3037 ExFreePool(rp); 3038 3039 goto exit; 3040 } 3041 3042 if (ext->csum) { 3043 if (ed->compression == BTRFS_COMPRESSION_NONE) { 3044 rp->csum = (uint8_t*)ext->csum + (fcb->Vcb->csum_size * (rp->extents[0].off / fcb->Vcb->superblock.sector_size)); 3045 } else 3046 rp->csum = ext->csum; 3047 } else 3048 rp->csum = NULL; 3049 3050 rp->data = data + bytes_read; 3051 rp->compression = ed->compression; 3052 rp->extents[0].ed_offset = ed2->offset; 3053 rp->extents[0].ed_size = ed2->size; 3054 rp->extents[0].ed_num_bytes = ed2->num_bytes; 3055 3056 InsertTailList(&read_parts, &rp->list_entry); 3057 3058 bytes_read += rp->read; 3059 length -= rp->read; 3060 3061 break; 3062 } 3063 3064 case EXTENT_TYPE_PREALLOC: 3065 { 3066 uint64_t off = start + bytes_read - ext->offset; 3067 uint32_t read = (uint32_t)(len - off); 3068 3069 if (read > length) read = (uint32_t)length; 3070 3071 RtlZeroMemory(data + bytes_read, read); 3072 3073 bytes_read += read; 3074 length -= read; 3075 3076 break; 3077 } 3078 3079 default: 3080 WARN("Unsupported extent data type %u\n", ed->type); 3081 Status = STATUS_NOT_IMPLEMENTED; 3082 goto exit; 3083 } 3084 3085 last_end = ext->offset + len; 3086 3087 if (length == 0) 3088 break; 3089 } 3090 3091 nextitem: 3092 le = le->Flink; 3093 } 3094 3095 if (!IsListEmpty(&read_parts) && read_parts.Flink->Flink != &read_parts) { // at least two entries in list 3096 read_part* last_rp = CONTAINING_RECORD(read_parts.Flink, read_part, list_entry); 3097 3098 le = read_parts.Flink->Flink; 3099 while (le != &read_parts) { 3100 LIST_ENTRY* le2 = le->Flink; 3101 read_part* rp = CONTAINING_RECORD(le, read_part, list_entry); 3102 3103 // merge together runs 3104 if (rp->compression != BTRFS_COMPRESSION_NONE && rp->compression == last_rp->compression && rp->addr == last_rp->addr + last_rp->to_read && 3105 rp->data == (uint8_t*)last_rp->data + last_rp->read && rp->c == last_rp->c && ((rp->csum && last_rp->csum) || (!rp->csum && !last_rp->csum))) { 3106 read_part* rp2; 3107 3108 rp2 = ExAllocatePoolWithTag(pool_type, offsetof(read_part, extents) + (sizeof(read_part_extent) * (last_rp->num_extents + 1)), ALLOC_TAG); 3109 3110 rp2->addr = last_rp->addr; 3111 rp2->c = last_rp->c; 3112 rp2->read = last_rp->read + rp->read; 3113 rp2->to_read = last_rp->to_read + rp->to_read; 3114 rp2->csum_free = false; 3115 3116 if (last_rp->csum) { 3117 uint32_t sectors = (last_rp->to_read + rp->to_read) / fcb->Vcb->superblock.sector_size; 3118 3119 rp2->csum = ExAllocatePoolWithTag(pool_type, sectors * fcb->Vcb->csum_size, ALLOC_TAG); 3120 if (!rp2->csum) { 3121 ERR("out of memory\n"); 3122 ExFreePool(rp2); 3123 Status = STATUS_INSUFFICIENT_RESOURCES; 3124 goto exit; 3125 } 3126 3127 RtlCopyMemory(rp2->csum, last_rp->csum, last_rp->to_read * fcb->Vcb->csum_size / fcb->Vcb->superblock.sector_size); 3128 RtlCopyMemory((uint8_t*)rp2->csum + (last_rp->to_read * fcb->Vcb->csum_size / fcb->Vcb->superblock.sector_size), rp->csum, 3129 rp->to_read * fcb->Vcb->csum_size / fcb->Vcb->superblock.sector_size); 3130 3131 rp2->csum_free = true; 3132 } else 3133 rp2->csum = NULL; 3134 3135 rp2->buf = ExAllocatePoolWithTag(pool_type, rp2->to_read, ALLOC_TAG); 3136 if (!rp2->buf) { 3137 ERR("out of memory\n"); 3138 3139 if (rp2->csum) 3140 ExFreePool(rp2->csum); 3141 3142 ExFreePool(rp2); 3143 Status = STATUS_INSUFFICIENT_RESOURCES; 3144 goto exit; 3145 } 3146 3147 rp2->buf_free = true; 3148 rp2->bumpoff = 0; 3149 rp2->mdl = false; 3150 rp2->data = last_rp->data; 3151 rp2->compression = last_rp->compression; 3152 rp2->num_extents = last_rp->num_extents + 1; 3153 3154 RtlCopyMemory(rp2->extents, last_rp->extents, last_rp->num_extents * sizeof(read_part_extent)); 3155 RtlCopyMemory(&rp2->extents[last_rp->num_extents], rp->extents, sizeof(read_part_extent)); 3156 3157 InsertHeadList(le->Blink, &rp2->list_entry); 3158 3159 if (rp->buf_free) 3160 ExFreePool(rp->buf); 3161 3162 if (rp->csum_free) 3163 ExFreePool(rp->csum); 3164 3165 RemoveEntryList(&rp->list_entry); 3166 3167 ExFreePool(rp); 3168 3169 if (last_rp->buf_free) 3170 ExFreePool(last_rp->buf); 3171 3172 if (last_rp->csum_free) 3173 ExFreePool(last_rp->csum); 3174 3175 RemoveEntryList(&last_rp->list_entry); 3176 3177 ExFreePool(last_rp); 3178 3179 last_rp = rp2; 3180 } else 3181 last_rp = rp; 3182 3183 le = le2; 3184 } 3185 } 3186 3187 le = read_parts.Flink; 3188 while (le != &read_parts) { 3189 read_part* rp = CONTAINING_RECORD(le, read_part, list_entry); 3190 3191 Status = read_data(fcb->Vcb, rp->addr, rp->to_read, rp->csum, false, rp->buf, rp->c, NULL, Irp, 0, rp->mdl, 3192 fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 3193 if (!NT_SUCCESS(Status)) { 3194 ERR("read_data returned %08lx\n", Status); 3195 goto exit; 3196 } 3197 3198 if (rp->compression == BTRFS_COMPRESSION_NONE) { 3199 if (rp->buf_free) 3200 RtlCopyMemory(rp->data, rp->buf + rp->bumpoff, rp->read); 3201 } else { 3202 uint8_t* buf = rp->buf; 3203 #ifdef __REACTOS__ 3204 unsigned int i; 3205 for (i = 0; i < rp->num_extents; i++) { 3206 #else 3207 for (unsigned int i = 0; i < rp->num_extents; i++) { 3208 #endif // __REACTOS__ 3209 uint8_t *decomp = NULL, *buf2; 3210 ULONG outlen, inlen, off2; 3211 uint32_t inpageoff = 0; 3212 comp_calc_job* ccj; 3213 3214 off2 = (ULONG)(rp->extents[i].ed_offset + rp->extents[i].off); 3215 buf2 = buf; 3216 inlen = (ULONG)rp->extents[i].ed_size; 3217 3218 if (rp->compression == BTRFS_COMPRESSION_LZO) { 3219 ULONG inoff = sizeof(uint32_t); 3220 3221 inlen -= sizeof(uint32_t); 3222 3223 // If reading a few sectors in, skip to the interesting bit 3224 while (off2 > LZO_PAGE_SIZE) { 3225 uint32_t partlen; 3226 3227 if (inlen < sizeof(uint32_t)) 3228 break; 3229 3230 partlen = *(uint32_t*)(buf2 + inoff); 3231 3232 if (partlen < inlen) { 3233 off2 -= LZO_PAGE_SIZE; 3234 inoff += partlen + sizeof(uint32_t); 3235 inlen -= partlen + sizeof(uint32_t); 3236 3237 if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t)) 3238 inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE; 3239 } else 3240 break; 3241 } 3242 3243 buf2 = &buf2[inoff]; 3244 inpageoff = inoff % LZO_PAGE_SIZE; 3245 } 3246 3247 if (off2 != 0) { 3248 outlen = off2 + min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off)); 3249 3250 decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG); 3251 if (!decomp) { 3252 ERR("out of memory\n"); 3253 Status = STATUS_INSUFFICIENT_RESOURCES; 3254 goto exit; 3255 } 3256 } else 3257 outlen = min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off)); 3258 3259 ccj = (comp_calc_job*)ExAllocatePoolWithTag(pool_type, sizeof(comp_calc_job), ALLOC_TAG); 3260 if (!ccj) { 3261 ERR("out of memory\n"); 3262 3263 if (decomp) 3264 ExFreePool(decomp); 3265 3266 Status = STATUS_INSUFFICIENT_RESOURCES; 3267 goto exit; 3268 } 3269 3270 Status = add_calc_job_decomp(fcb->Vcb, rp->compression, buf2, inlen, decomp ? decomp : rp->data, outlen, 3271 inpageoff, &ccj->cj); 3272 if (!NT_SUCCESS(Status)) { 3273 ERR("add_calc_job_decomp returned %08lx\n", Status); 3274 3275 if (decomp) 3276 ExFreePool(decomp); 3277 3278 ExFreePool(ccj); 3279 3280 goto exit; 3281 } 3282 3283 ccj->data = rp->data; 3284 ccj->decomp = decomp; 3285 3286 ccj->offset = off2; 3287 ccj->length = (size_t)min(rp->read, rp->extents[i].ed_num_bytes - rp->extents[i].off); 3288 3289 InsertTailList(&calc_jobs, &ccj->list_entry); 3290 3291 buf += rp->extents[i].ed_size; 3292 rp->data = (uint8_t*)rp->data + rp->extents[i].ed_num_bytes - rp->extents[i].off; 3293 rp->read -= (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off); 3294 } 3295 } 3296 3297 le = le->Flink; 3298 } 3299 3300 if (length > 0 && start + bytes_read < fcb->inode_item.st_size) { 3301 uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length); 3302 3303 RtlZeroMemory(data + bytes_read, read); 3304 3305 bytes_read += read; 3306 length -= read; 3307 } 3308 3309 Status = STATUS_SUCCESS; 3310 3311 while (!IsListEmpty(&calc_jobs)) { 3312 comp_calc_job* ccj = CONTAINING_RECORD(RemoveTailList(&calc_jobs), comp_calc_job, list_entry); 3313 3314 calc_thread_main(fcb->Vcb, ccj->cj); 3315 3316 KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL); 3317 3318 if (!NT_SUCCESS(ccj->cj->Status)) 3319 Status = ccj->cj->Status; 3320 3321 if (ccj->decomp) { 3322 RtlCopyMemory(ccj->data, (uint8_t*)ccj->decomp + ccj->offset, ccj->length); 3323 ExFreePool(ccj->decomp); 3324 } 3325 3326 ExFreePool(ccj); 3327 } 3328 3329 if (pbr) 3330 *pbr = bytes_read; 3331 3332 exit: 3333 while (!IsListEmpty(&read_parts)) { 3334 read_part* rp = CONTAINING_RECORD(RemoveHeadList(&read_parts), read_part, list_entry); 3335 3336 if (rp->buf_free) 3337 ExFreePool(rp->buf); 3338 3339 if (rp->csum_free) 3340 ExFreePool(rp->csum); 3341 3342 ExFreePool(rp); 3343 } 3344 3345 while (!IsListEmpty(&calc_jobs)) { 3346 comp_calc_job* ccj = CONTAINING_RECORD(RemoveHeadList(&calc_jobs), comp_calc_job, list_entry); 3347 3348 KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL); 3349 3350 if (ccj->decomp) 3351 ExFreePool(ccj->decomp); 3352 3353 ExFreePool(ccj->cj); 3354 3355 ExFreePool(ccj); 3356 } 3357 3358 return Status; 3359 } 3360 3361 NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) { 3362 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3363 PFILE_OBJECT FileObject = IrpSp->FileObject; 3364 fcb* fcb = FileObject->FsContext; 3365 uint8_t* data = NULL; 3366 ULONG length = IrpSp->Parameters.Read.Length, addon = 0; 3367 uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart; 3368 3369 *bytes_read = 0; 3370 3371 if (!fcb || !fcb->Vcb || !fcb->subvol) 3372 return STATUS_INTERNAL_ERROR; 3373 3374 TRACE("fcb = %p\n", fcb); 3375 TRACE("offset = %I64x, length = %lx\n", start, length); 3376 TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false"); 3377 3378 if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY) 3379 return STATUS_INVALID_DEVICE_REQUEST; 3380 3381 if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) { 3382 WARN("tried to read locked region\n"); 3383 return STATUS_FILE_LOCK_CONFLICT; 3384 } 3385 3386 if (length == 0) { 3387 TRACE("tried to read zero bytes\n"); 3388 return STATUS_SUCCESS; 3389 } 3390 3391 if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) { 3392 TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart); 3393 return STATUS_END_OF_FILE; 3394 } 3395 3396 TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size); 3397 3398 if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) { 3399 data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 3400 3401 if (Irp->MdlAddress && !data) { 3402 ERR("MmGetSystemAddressForMdlSafe returned NULL\n"); 3403 return STATUS_INSUFFICIENT_RESOURCES; 3404 } 3405 3406 if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) { 3407 length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3408 RtlZeroMemory(data, length); 3409 Irp->IoStatus.Information = *bytes_read = length; 3410 return STATUS_SUCCESS; 3411 } 3412 3413 if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) { 3414 addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3415 RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon); 3416 length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start); 3417 } 3418 } 3419 3420 if (!(Irp->Flags & IRP_NOCACHE)) { 3421 NTSTATUS Status = STATUS_SUCCESS; 3422 3423 _SEH2_TRY { 3424 if (!FileObject->PrivateCacheMap) { 3425 CC_FILE_SIZES ccfs; 3426 3427 ccfs.AllocationSize = fcb->Header.AllocationSize; 3428 ccfs.FileSize = fcb->Header.FileSize; 3429 ccfs.ValidDataLength = fcb->Header.ValidDataLength; 3430 3431 init_file_cache(FileObject, &ccfs); 3432 } 3433 3434 if (IrpSp->MinorFunction & IRP_MN_MDL) { 3435 CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus); 3436 } else { 3437 if (fCcCopyReadEx) { 3438 TRACE("CcCopyReadEx(%p, %I64x, %lx, %u, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, 3439 length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread); 3440 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart); 3441 if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) { 3442 TRACE("CcCopyReadEx could not wait\n"); 3443 3444 IoMarkIrpPending(Irp); 3445 return STATUS_PENDING; 3446 } 3447 TRACE("CcCopyReadEx finished\n"); 3448 } else { 3449 TRACE("CcCopyRead(%p, %I64x, %lx, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus); 3450 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart); 3451 if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) { 3452 TRACE("CcCopyRead could not wait\n"); 3453 3454 IoMarkIrpPending(Irp); 3455 return STATUS_PENDING; 3456 } 3457 TRACE("CcCopyRead finished\n"); 3458 } 3459 } 3460 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 3461 Status = _SEH2_GetExceptionCode(); 3462 } _SEH2_END; 3463 3464 if (NT_SUCCESS(Status)) { 3465 Status = Irp->IoStatus.Status; 3466 Irp->IoStatus.Information += addon; 3467 *bytes_read = (ULONG)Irp->IoStatus.Information; 3468 } else 3469 ERR("EXCEPTION - %08lx\n", Status); 3470 3471 return Status; 3472 } else { 3473 NTSTATUS Status; 3474 3475 if (!wait) { 3476 IoMarkIrpPending(Irp); 3477 return STATUS_PENDING; 3478 } 3479 3480 if (fcb->ads) { 3481 Status = read_stream(fcb, data, start, length, bytes_read); 3482 3483 if (!NT_SUCCESS(Status)) 3484 ERR("read_stream returned %08lx\n", Status); 3485 } else { 3486 Status = read_file(fcb, data, start, length, bytes_read, Irp); 3487 3488 if (!NT_SUCCESS(Status)) 3489 ERR("read_file returned %08lx\n", Status); 3490 } 3491 3492 *bytes_read += addon; 3493 TRACE("read %lu bytes\n", *bytes_read); 3494 3495 Irp->IoStatus.Information = *bytes_read; 3496 3497 if (diskacc && Status != STATUS_PENDING) { 3498 PETHREAD thread = NULL; 3499 3500 if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread)) 3501 thread = Irp->Tail.Overlay.Thread; 3502 else if (!IoIsSystemThread(PsGetCurrentThread())) 3503 thread = PsGetCurrentThread(); 3504 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp) 3505 thread = PsGetCurrentThread(); 3506 3507 if (thread) 3508 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0); 3509 } 3510 3511 return Status; 3512 } 3513 } 3514 3515 _Dispatch_type_(IRP_MJ_READ) 3516 _Function_class_(DRIVER_DISPATCH) 3517 NTSTATUS __stdcall drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { 3518 device_extension* Vcb = DeviceObject->DeviceExtension; 3519 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3520 PFILE_OBJECT FileObject = IrpSp->FileObject; 3521 ULONG bytes_read = 0; 3522 NTSTATUS Status; 3523 bool top_level; 3524 fcb* fcb; 3525 ccb* ccb; 3526 bool acquired_fcb_lock = false, wait; 3527 3528 FsRtlEnterFileSystem(); 3529 3530 top_level = is_top_level(Irp); 3531 3532 TRACE("read\n"); 3533 3534 if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { 3535 Status = vol_read(DeviceObject, Irp); 3536 goto exit2; 3537 } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { 3538 Status = STATUS_INVALID_PARAMETER; 3539 goto end; 3540 } 3541 3542 Irp->IoStatus.Information = 0; 3543 3544 if (IrpSp->MinorFunction & IRP_MN_COMPLETE) { 3545 CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress); 3546 3547 Irp->MdlAddress = NULL; 3548 Status = STATUS_SUCCESS; 3549 3550 goto exit; 3551 } 3552 3553 fcb = FileObject->FsContext; 3554 3555 if (!fcb) { 3556 ERR("fcb was NULL\n"); 3557 Status = STATUS_INVALID_PARAMETER; 3558 goto exit; 3559 } 3560 3561 ccb = FileObject->FsContext2; 3562 3563 if (!ccb) { 3564 ERR("ccb was NULL\n"); 3565 Status = STATUS_INVALID_PARAMETER; 3566 goto exit; 3567 } 3568 3569 if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) { 3570 WARN("insufficient privileges\n"); 3571 Status = STATUS_ACCESS_DENIED; 3572 goto exit; 3573 } 3574 3575 if (fcb == Vcb->volume_fcb) { 3576 TRACE("reading volume FCB\n"); 3577 3578 IoSkipCurrentIrpStackLocation(Irp); 3579 3580 Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); 3581 3582 goto exit2; 3583 } 3584 3585 if (!(Irp->Flags & IRP_PAGING_IO)) 3586 FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL); 3587 3588 wait = IoIsOperationSynchronous(Irp); 3589 3590 // Don't offload jobs when doing paging IO - otherwise this can lead to 3591 // deadlocks in CcCopyRead. 3592 if (Irp->Flags & IRP_PAGING_IO) 3593 wait = true; 3594 3595 if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer && FileObject->SectionObjectPointer->DataSectionObject) { 3596 IO_STATUS_BLOCK iosb; 3597 3598 CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb); 3599 if (!NT_SUCCESS(iosb.Status)) { 3600 ERR("CcFlushCache returned %08lx\n", iosb.Status); 3601 return iosb.Status; 3602 } 3603 } 3604 3605 if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) { 3606 if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { 3607 Status = STATUS_PENDING; 3608 IoMarkIrpPending(Irp); 3609 goto exit; 3610 } 3611 3612 acquired_fcb_lock = true; 3613 } 3614 3615 Status = do_read(Irp, wait, &bytes_read); 3616 3617 if (acquired_fcb_lock) 3618 ExReleaseResourceLite(fcb->Header.Resource); 3619 3620 exit: 3621 if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO)) 3622 FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0); 3623 3624 end: 3625 Irp->IoStatus.Status = Status; 3626 3627 TRACE("Irp->IoStatus.Status = %08lx\n", Irp->IoStatus.Status); 3628 TRACE("Irp->IoStatus.Information = %Iu\n", Irp->IoStatus.Information); 3629 TRACE("returning %08lx\n", Status); 3630 3631 if (Status != STATUS_PENDING) 3632 IoCompleteRequest(Irp, IO_NO_INCREMENT); 3633 else { 3634 if (!add_thread_job(Vcb, Irp)) 3635 Status = do_read_job(Irp); 3636 } 3637 3638 exit2: 3639 if (top_level) 3640 IoSetTopLevelIrp(NULL); 3641 3642 FsRtlExitFileSystem(); 3643 3644 return Status; 3645 } 3646