1 /* Copyright (c) Mark Harmstone 2016-17 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 20 enum read_data_status { 21 ReadDataStatus_Pending, 22 ReadDataStatus_Success, 23 ReadDataStatus_Error, 24 ReadDataStatus_MissingDevice, 25 ReadDataStatus_Skip 26 }; 27 28 struct read_data_context; 29 30 typedef struct { 31 struct read_data_context* context; 32 UINT16 stripenum; 33 BOOL rewrite; 34 PIRP Irp; 35 IO_STATUS_BLOCK iosb; 36 enum read_data_status status; 37 PMDL mdl; 38 UINT64 stripestart; 39 UINT64 stripeend; 40 } read_data_stripe; 41 42 typedef struct { 43 KEVENT Event; 44 NTSTATUS Status; 45 chunk* c; 46 UINT64 address; 47 UINT32 buflen; 48 LONG num_stripes, stripes_left; 49 UINT64 type; 50 UINT32 sector_size; 51 UINT16 firstoff, startoffstripe, sectors_per_stripe; 52 UINT32* csum; 53 BOOL tree; 54 read_data_stripe* stripes; 55 UINT8* va; 56 } read_data_context; 57 58 extern BOOL diskacc; 59 extern tPsUpdateDiskCounters fPsUpdateDiskCounters; 60 extern tCcCopyReadEx fCcCopyReadEx; 61 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters; 62 63 #define LINUX_PAGE_SIZE 4096 64 65 _Function_class_(IO_COMPLETION_ROUTINE) 66 #ifdef __REACTOS__ 67 static NTSTATUS NTAPI read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 68 #else 69 static NTSTATUS read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 70 #endif 71 read_data_stripe* stripe = conptr; 72 read_data_context* context = (read_data_context*)stripe->context; 73 74 UNUSED(DeviceObject); 75 76 stripe->iosb = Irp->IoStatus; 77 78 if (NT_SUCCESS(Irp->IoStatus.Status)) 79 stripe->status = ReadDataStatus_Success; 80 else 81 stripe->status = ReadDataStatus_Error; 82 83 if (InterlockedDecrement(&context->stripes_left) == 0) 84 KeSetEvent(&context->Event, 0, FALSE); 85 86 return STATUS_MORE_PROCESSING_REQUIRED; 87 } 88 89 NTSTATUS check_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) { 90 NTSTATUS Status; 91 calc_job* cj; 92 UINT32* csum2; 93 94 // From experimenting, it seems that 40 sectors is roughly the crossover 95 // point where offloading the crc32 calculation becomes worth it. 96 97 if (sectors < 40 || KeQueryActiveProcessorCount(NULL) < 2) { 98 ULONG j; 99 100 for (j = 0; j < sectors; j++) { 101 UINT32 crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 102 103 if (crc32 != csum[j]) { 104 return STATUS_CRC_ERROR; 105 } 106 } 107 108 return STATUS_SUCCESS; 109 } 110 111 csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sectors, ALLOC_TAG); 112 if (!csum2) { 113 ERR("out of memory\n"); 114 return STATUS_INSUFFICIENT_RESOURCES; 115 } 116 117 Status = add_calc_job(Vcb, data, sectors, csum2, &cj); 118 if (!NT_SUCCESS(Status)) { 119 ERR("add_calc_job returned %08x\n", Status); 120 ExFreePool(csum2); 121 return Status; 122 } 123 124 KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL); 125 126 if (RtlCompareMemory(csum2, csum, sectors * sizeof(UINT32)) != sectors * sizeof(UINT32)) { 127 free_calc_job(cj); 128 ExFreePool(csum2); 129 return STATUS_CRC_ERROR; 130 } 131 132 free_calc_job(cj); 133 ExFreePool(csum2); 134 135 return STATUS_SUCCESS; 136 } 137 138 static NTSTATUS read_data_dup(device_extension* Vcb, UINT8* buf, UINT64 addr, read_data_context* context, CHUNK_ITEM* ci, 139 device** devices, UINT64 generation) { 140 ULONG i; 141 BOOL checksum_error = FALSE; 142 UINT16 j, stripe = 0; 143 NTSTATUS Status; 144 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 145 146 for (j = 0; j < ci->num_stripes; j++) { 147 if (context->stripes[j].status == ReadDataStatus_Error) { 148 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 149 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 150 return context->stripes[j].iosb.Status; 151 } else if (context->stripes[j].status == ReadDataStatus_Success) { 152 stripe = j; 153 break; 154 } 155 } 156 157 if (context->stripes[stripe].status != ReadDataStatus_Success) 158 return STATUS_INTERNAL_ERROR; 159 160 if (context->tree) { 161 tree_header* th = (tree_header*)buf; 162 UINT32 crc32; 163 164 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); 165 166 if (th->address != context->address || crc32 != *((UINT32*)th->csum)) { 167 checksum_error = TRUE; 168 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 169 } else if (generation != 0 && th->generation != generation) { 170 checksum_error = TRUE; 171 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 172 } 173 } else if (context->csum) { 174 #ifdef DEBUG_STATS 175 LARGE_INTEGER time1, time2; 176 177 time1 = KeQueryPerformanceCounter(NULL); 178 #endif 179 Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum); 180 181 if (Status == STATUS_CRC_ERROR) { 182 checksum_error = TRUE; 183 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 184 } else if (!NT_SUCCESS(Status)) { 185 ERR("check_csum returned %08x\n", Status); 186 return Status; 187 } 188 #ifdef DEBUG_STATS 189 time2 = KeQueryPerformanceCounter(NULL); 190 191 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 192 #endif 193 } 194 195 if (!checksum_error) 196 return STATUS_SUCCESS; 197 198 if (ci->num_stripes == 1) 199 return STATUS_CRC_ERROR; 200 201 if (context->tree) { 202 tree_header* t2; 203 BOOL recovered = FALSE; 204 205 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 206 if (!t2) { 207 ERR("out of memory\n"); 208 return STATUS_INSUFFICIENT_RESOURCES; 209 } 210 211 for (j = 0; j < ci->num_stripes; j++) { 212 if (j != stripe && devices[j] && devices[j]->devobj) { 213 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart, Vcb->superblock.node_size, (UINT8*)t2, FALSE); 214 if (!NT_SUCCESS(Status)) { 215 WARN("sync_read_phys returned %08x\n", Status); 216 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 217 } else { 218 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum)); 219 220 if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) { 221 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 222 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 223 recovered = TRUE; 224 225 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 226 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart, 227 t2, Vcb->superblock.node_size); 228 if (!NT_SUCCESS(Status)) { 229 WARN("write_data_phys returned %08x\n", Status); 230 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 231 } 232 } 233 234 break; 235 } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum)) 236 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 237 else 238 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS); 239 } 240 } 241 } 242 243 if (!recovered) { 244 ERR("unrecoverable checksum error at %llx\n", addr); 245 ExFreePool(t2); 246 return STATUS_CRC_ERROR; 247 } 248 249 ExFreePool(t2); 250 } else { 251 ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size; 252 UINT8* sector; 253 254 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 255 if (!sector) { 256 ERR("out of memory\n"); 257 return STATUS_INSUFFICIENT_RESOURCES; 258 } 259 260 for (i = 0; i < sectors; i++) { 261 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 262 263 if (context->csum[i] != crc32) { 264 BOOL recovered = FALSE; 265 266 for (j = 0; j < ci->num_stripes; j++) { 267 if (j != stripe && devices[j] && devices[j]->devobj) { 268 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), 269 Vcb->superblock.sector_size, sector, FALSE); 270 if (!NT_SUCCESS(Status)) { 271 WARN("sync_read_phys returned %08x\n", Status); 272 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 273 } else { 274 UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 275 276 if (crc32b == context->csum[i]) { 277 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 278 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); 279 recovered = TRUE; 280 281 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 282 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), 283 sector, Vcb->superblock.sector_size); 284 if (!NT_SUCCESS(Status)) { 285 WARN("write_data_phys returned %08x\n", Status); 286 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 287 } 288 } 289 290 break; 291 } else 292 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 293 } 294 } 295 } 296 297 if (!recovered) { 298 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 299 ExFreePool(sector); 300 return STATUS_CRC_ERROR; 301 } 302 } 303 } 304 305 ExFreePool(sector); 306 } 307 308 return STATUS_SUCCESS; 309 } 310 311 static NTSTATUS read_data_raid0(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, 312 CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) { 313 UINT64 i; 314 315 for (i = 0; i < ci->num_stripes; i++) { 316 if (context->stripes[i].status == ReadDataStatus_Error) { 317 WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); 318 log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS); 319 return context->stripes[i].iosb.Status; 320 } 321 } 322 323 if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries 324 tree_header* th = (tree_header*)buf; 325 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 326 327 if (crc32 != *((UINT32*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) { 328 UINT64 off; 329 UINT16 stripe; 330 331 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe); 332 333 ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 334 335 if (crc32 != *((UINT32*)th->csum)) { 336 WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); 337 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 338 return STATUS_CRC_ERROR; 339 } else if (addr != th->address) { 340 WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); 341 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 342 return STATUS_CRC_ERROR; 343 } else if (generation != 0 && generation != th->generation) { 344 WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation); 345 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 346 return STATUS_CRC_ERROR; 347 } 348 } 349 } else if (context->csum) { 350 NTSTATUS Status; 351 #ifdef DEBUG_STATS 352 LARGE_INTEGER time1, time2; 353 354 time1 = KeQueryPerformanceCounter(NULL); 355 #endif 356 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 357 358 if (Status == STATUS_CRC_ERROR) { 359 for (i = 0; i < length / Vcb->superblock.sector_size; i++) { 360 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 361 362 if (context->csum[i] != crc32) { 363 UINT64 off; 364 UINT16 stripe; 365 366 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe); 367 368 ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 369 370 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 371 372 return Status; 373 } 374 } 375 376 return Status; 377 } else if (!NT_SUCCESS(Status)) { 378 ERR("check_csum returned %08x\n", Status); 379 return Status; 380 } 381 #ifdef DEBUG_STATS 382 time2 = KeQueryPerformanceCounter(NULL); 383 384 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 385 #endif 386 } 387 388 return STATUS_SUCCESS; 389 } 390 391 static NTSTATUS read_data_raid10(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, 392 CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) { 393 UINT64 i; 394 UINT16 j, stripe; 395 NTSTATUS Status; 396 BOOL checksum_error = FALSE; 397 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 398 399 for (j = 0; j < ci->num_stripes; j++) { 400 if (context->stripes[j].status == ReadDataStatus_Error) { 401 WARN("stripe %llu returned error %08x\n", j, context->stripes[j].iosb.Status); 402 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 403 return context->stripes[j].iosb.Status; 404 } else if (context->stripes[j].status == ReadDataStatus_Success) 405 stripe = j; 406 } 407 408 if (context->tree) { 409 tree_header* th = (tree_header*)buf; 410 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 411 412 if (crc32 != *((UINT32*)th->csum)) { 413 WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); 414 checksum_error = TRUE; 415 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 416 } else if (addr != th->address) { 417 WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); 418 checksum_error = TRUE; 419 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 420 } else if (generation != 0 && generation != th->generation) { 421 WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation); 422 checksum_error = TRUE; 423 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 424 } 425 } else if (context->csum) { 426 #ifdef DEBUG_STATS 427 LARGE_INTEGER time1, time2; 428 429 time1 = KeQueryPerformanceCounter(NULL); 430 #endif 431 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 432 433 if (Status == STATUS_CRC_ERROR) 434 checksum_error = TRUE; 435 else if (!NT_SUCCESS(Status)) { 436 ERR("check_csum returned %08x\n", Status); 437 return Status; 438 } 439 #ifdef DEBUG_STATS 440 time2 = KeQueryPerformanceCounter(NULL); 441 442 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 443 #endif 444 } 445 446 if (!checksum_error) 447 return STATUS_SUCCESS; 448 449 if (context->tree) { 450 tree_header* t2; 451 UINT64 off; 452 UINT16 badsubstripe = 0; 453 BOOL recovered = FALSE; 454 455 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 456 if (!t2) { 457 ERR("out of memory\n"); 458 return STATUS_INSUFFICIENT_RESOURCES; 459 } 460 461 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe); 462 463 stripe *= ci->sub_stripes; 464 465 for (j = 0; j < ci->sub_stripes; j++) { 466 if (context->stripes[stripe + j].status == ReadDataStatus_Success) { 467 badsubstripe = j; 468 break; 469 } 470 } 471 472 for (j = 0; j < ci->sub_stripes; j++) { 473 if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) { 474 Status = sync_read_phys(devices[stripe + j]->devobj, cis[stripe + j].offset + off, 475 Vcb->superblock.node_size, (UINT8*)t2, FALSE); 476 if (!NT_SUCCESS(Status)) { 477 WARN("sync_read_phys returned %08x\n", Status); 478 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS); 479 } else { 480 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum)); 481 482 if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) { 483 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 484 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe + j]->devitem.dev_id); 485 recovered = TRUE; 486 487 if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad 488 Status = write_data_phys(devices[stripe + badsubstripe]->devobj, cis[stripe + badsubstripe].offset + off, 489 t2, Vcb->superblock.node_size); 490 if (!NT_SUCCESS(Status)) { 491 WARN("write_data_phys returned %08x\n", Status); 492 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 493 } 494 } 495 496 break; 497 } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum)) 498 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 499 else 500 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS); 501 } 502 } 503 } 504 505 if (!recovered) { 506 ERR("unrecoverable checksum error at %llx\n", addr); 507 ExFreePool(t2); 508 return STATUS_CRC_ERROR; 509 } 510 511 ExFreePool(t2); 512 } else { 513 ULONG sectors = length / Vcb->superblock.sector_size; 514 UINT8* sector; 515 516 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 517 if (!sector) { 518 ERR("out of memory\n"); 519 return STATUS_INSUFFICIENT_RESOURCES; 520 } 521 522 for (i = 0; i < sectors; i++) { 523 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 524 525 if (context->csum[i] != crc32) { 526 UINT64 off; 527 UINT16 stripe2, badsubstripe = 0; 528 BOOL recovered = FALSE; 529 530 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 531 ci->num_stripes / ci->sub_stripes, &off, &stripe2); 532 533 stripe2 *= ci->sub_stripes; 534 535 for (j = 0; j < ci->sub_stripes; j++) { 536 if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) { 537 badsubstripe = j; 538 break; 539 } 540 } 541 542 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 543 544 for (j = 0; j < ci->sub_stripes; j++) { 545 if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) { 546 Status = sync_read_phys(devices[stripe2 + j]->devobj, cis[stripe2 + j].offset + off, 547 Vcb->superblock.sector_size, sector, FALSE); 548 if (!NT_SUCCESS(Status)) { 549 WARN("sync_read_phys returned %08x\n", Status); 550 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS); 551 } else { 552 UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 553 554 if (crc32b == context->csum[i]) { 555 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 556 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id); 557 recovered = TRUE; 558 559 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad 560 Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, cis[stripe2 + badsubstripe].offset + off, 561 sector, Vcb->superblock.sector_size); 562 if (!NT_SUCCESS(Status)) { 563 WARN("write_data_phys returned %08x\n", Status); 564 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS); 565 } 566 } 567 568 break; 569 } else 570 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 571 } 572 } 573 } 574 575 if (!recovered) { 576 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 577 ExFreePool(sector); 578 return STATUS_CRC_ERROR; 579 } 580 } 581 } 582 583 ExFreePool(sector); 584 } 585 586 return STATUS_SUCCESS; 587 } 588 589 static NTSTATUS read_data_raid5(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci, 590 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) { 591 ULONG i; 592 NTSTATUS Status; 593 BOOL checksum_error = FALSE; 594 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 595 UINT16 j, stripe; 596 BOOL no_success = TRUE; 597 598 for (j = 0; j < ci->num_stripes; j++) { 599 if (context->stripes[j].status == ReadDataStatus_Error) { 600 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 601 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 602 return context->stripes[j].iosb.Status; 603 } else if (context->stripes[j].status == ReadDataStatus_Success) { 604 stripe = j; 605 no_success = FALSE; 606 } 607 } 608 609 if (c) { // check partial stripes 610 LIST_ENTRY* le; 611 UINT64 ps_length = (ci->num_stripes - 1) * ci->stripe_length; 612 613 ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE); 614 615 le = c->partial_stripes.Flink; 616 while (le != &c->partial_stripes) { 617 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 618 619 if (ps->address + ps_length > addr && ps->address < addr + length) { 620 ULONG runlength, index; 621 622 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 623 624 while (runlength != 0) { 625 UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size); 626 UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size); 627 UINT64 start = max(runstart, addr); 628 UINT64 end = min(runend, addr + length); 629 630 if (end > start) 631 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 632 633 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 634 } 635 } else if (ps->address >= addr + length) 636 break; 637 638 le = le->Flink; 639 } 640 641 ExReleaseResourceLite(&c->partial_stripes_lock); 642 } 643 644 if (context->tree) { 645 tree_header* th = (tree_header*)buf; 646 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 647 648 if (addr != th->address || crc32 != *((UINT32*)th->csum)) { 649 checksum_error = TRUE; 650 if (!no_success && !degraded) 651 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 652 } else if (generation != 0 && generation != th->generation) { 653 checksum_error = TRUE; 654 if (!no_success && !degraded) 655 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 656 } 657 } else if (context->csum) { 658 #ifdef DEBUG_STATS 659 LARGE_INTEGER time1, time2; 660 661 time1 = KeQueryPerformanceCounter(NULL); 662 #endif 663 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 664 665 if (Status == STATUS_CRC_ERROR) { 666 if (!degraded) 667 WARN("checksum error\n"); 668 checksum_error = TRUE; 669 } else if (!NT_SUCCESS(Status)) { 670 ERR("check_csum returned %08x\n", Status); 671 return Status; 672 } 673 674 #ifdef DEBUG_STATS 675 time2 = KeQueryPerformanceCounter(NULL); 676 677 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 678 #endif 679 } else if (degraded) 680 checksum_error = TRUE; 681 682 if (!checksum_error) 683 return STATUS_SUCCESS; 684 685 if (context->tree) { 686 UINT16 parity; 687 UINT64 off; 688 BOOL recovered = FALSE, first = TRUE, failed = FALSE; 689 UINT8* t2; 690 691 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG); 692 if (!t2) { 693 ERR("out of memory\n"); 694 return STATUS_INSUFFICIENT_RESOURCES; 695 } 696 697 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe); 698 699 parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 700 701 stripe = (parity + stripe + 1) % ci->num_stripes; 702 703 for (j = 0; j < ci->num_stripes; j++) { 704 if (j != stripe) { 705 if (devices[j] && devices[j]->devobj) { 706 if (first) { 707 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2, FALSE); 708 if (!NT_SUCCESS(Status)) { 709 ERR("sync_read_phys returned %08x\n", Status); 710 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 711 failed = TRUE; 712 break; 713 } 714 715 first = FALSE; 716 } else { 717 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, FALSE); 718 if (!NT_SUCCESS(Status)) { 719 ERR("sync_read_phys returned %08x\n", Status); 720 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 721 failed = TRUE; 722 break; 723 } 724 725 do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size); 726 } 727 } else { 728 failed = TRUE; 729 break; 730 } 731 } 732 } 733 734 if (!failed) { 735 tree_header* t3 = (tree_header*)t2; 736 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum)); 737 738 if (t3->address == addr && crc32 == *((UINT32*)t3->csum) && (generation == 0 || t3->generation == generation)) { 739 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 740 741 if (!degraded) 742 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 743 744 recovered = TRUE; 745 746 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 747 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size); 748 if (!NT_SUCCESS(Status)) { 749 WARN("write_data_phys returned %08x\n", Status); 750 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 751 } 752 } 753 } 754 } 755 756 if (!recovered) { 757 ERR("unrecoverable checksum error at %llx\n", addr); 758 ExFreePool(t2); 759 return STATUS_CRC_ERROR; 760 } 761 762 ExFreePool(t2); 763 } else { 764 ULONG sectors = length / Vcb->superblock.sector_size; 765 UINT8* sector; 766 767 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG); 768 if (!sector) { 769 ERR("out of memory\n"); 770 return STATUS_INSUFFICIENT_RESOURCES; 771 } 772 773 for (i = 0; i < sectors; i++) { 774 UINT16 parity; 775 UINT64 off; 776 UINT32 crc32; 777 778 if (context->csum) 779 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 780 781 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 782 ci->num_stripes - 1, &off, &stripe); 783 784 parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 785 786 stripe = (parity + stripe + 1) % ci->num_stripes; 787 788 if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) { 789 BOOL recovered = FALSE, first = TRUE, failed = FALSE; 790 791 if (devices[stripe] && devices[stripe]->devobj) 792 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS); 793 794 for (j = 0; j < ci->num_stripes; j++) { 795 if (j != stripe) { 796 if (devices[j] && devices[j]->devobj) { 797 if (first) { 798 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, FALSE); 799 if (!NT_SUCCESS(Status)) { 800 ERR("sync_read_phys returned %08x\n", Status); 801 failed = TRUE; 802 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 803 break; 804 } 805 806 first = FALSE; 807 } else { 808 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + Vcb->superblock.sector_size, FALSE); 809 if (!NT_SUCCESS(Status)) { 810 ERR("sync_read_phys returned %08x\n", Status); 811 failed = TRUE; 812 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 813 break; 814 } 815 816 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size); 817 } 818 } else { 819 failed = TRUE; 820 break; 821 } 822 } 823 } 824 825 if (!failed) { 826 if (context->csum) 827 crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 828 829 if (!context->csum || crc32 == context->csum[i]) { 830 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 831 832 if (!degraded) 833 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); 834 835 recovered = TRUE; 836 837 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 838 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, 839 sector, Vcb->superblock.sector_size); 840 if (!NT_SUCCESS(Status)) { 841 WARN("write_data_phys returned %08x\n", Status); 842 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 843 } 844 } 845 } 846 } 847 848 if (!recovered) { 849 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 850 ExFreePool(sector); 851 return STATUS_CRC_ERROR; 852 } 853 } 854 } 855 856 ExFreePool(sector); 857 } 858 859 return STATUS_SUCCESS; 860 } 861 862 void raid6_recover2(UINT8* sectors, UINT16 num_stripes, ULONG sector_size, UINT16 missing1, UINT16 missing2, UINT8* out) { 863 if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data 864 UINT16 missing = missing1 == (num_stripes - 2) ? missing2 : missing1; 865 UINT16 stripe; 866 867 stripe = num_stripes - 3; 868 869 if (stripe == missing) 870 RtlZeroMemory(out, sector_size); 871 else 872 RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size); 873 874 do { 875 stripe--; 876 877 galois_double(out, sector_size); 878 879 if (stripe != missing) 880 do_xor(out, sectors + (stripe * sector_size), sector_size); 881 } while (stripe > 0); 882 883 do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size); 884 885 if (missing != 0) 886 galois_divpower(out, (UINT8)missing, sector_size); 887 } else { // reconstruct from p and q 888 UINT16 x, y, stripe; 889 UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; 890 UINT32 j; 891 892 stripe = num_stripes - 3; 893 894 pxy = out + sector_size; 895 qxy = out; 896 897 if (stripe == missing1 || stripe == missing2) { 898 RtlZeroMemory(qxy, sector_size); 899 RtlZeroMemory(pxy, sector_size); 900 901 if (stripe == missing1) 902 x = stripe; 903 else 904 y = stripe; 905 } else { 906 RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size); 907 RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size); 908 } 909 910 do { 911 stripe--; 912 913 galois_double(qxy, sector_size); 914 915 if (stripe != missing1 && stripe != missing2) { 916 do_xor(qxy, sectors + (stripe * sector_size), sector_size); 917 do_xor(pxy, sectors + (stripe * sector_size), sector_size); 918 } else if (stripe == missing1) 919 x = stripe; 920 else if (stripe == missing2) 921 y = stripe; 922 } while (stripe > 0); 923 924 gyx = gpow2(y > x ? (y-x) : (255-x+y)); 925 gx = gpow2(255-x); 926 927 denom = gdiv(1, gyx ^ 1); 928 a = gmul(gyx, denom); 929 b = gmul(gx, denom); 930 931 p = sectors + ((num_stripes - 2) * sector_size); 932 q = sectors + ((num_stripes - 1) * sector_size); 933 934 for (j = 0; j < sector_size; j++) { 935 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); 936 937 p++; 938 q++; 939 pxy++; 940 qxy++; 941 } 942 943 do_xor(out + sector_size, out, sector_size); 944 do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size); 945 } 946 } 947 948 static NTSTATUS read_data_raid6(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci, 949 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) { 950 NTSTATUS Status; 951 ULONG i; 952 BOOL checksum_error = FALSE; 953 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 954 UINT16 stripe, j; 955 BOOL no_success = TRUE; 956 957 for (j = 0; j < ci->num_stripes; j++) { 958 if (context->stripes[j].status == ReadDataStatus_Error) { 959 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 960 961 if (devices[j]) 962 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 963 return context->stripes[j].iosb.Status; 964 } else if (context->stripes[j].status == ReadDataStatus_Success) { 965 stripe = j; 966 no_success = FALSE; 967 } 968 } 969 970 if (c) { // check partial stripes 971 LIST_ENTRY* le; 972 UINT64 ps_length = (ci->num_stripes - 2) * ci->stripe_length; 973 974 ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE); 975 976 le = c->partial_stripes.Flink; 977 while (le != &c->partial_stripes) { 978 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 979 980 if (ps->address + ps_length > addr && ps->address < addr + length) { 981 ULONG runlength, index; 982 983 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 984 985 while (runlength != 0) { 986 UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size); 987 UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size); 988 UINT64 start = max(runstart, addr); 989 UINT64 end = min(runend, addr + length); 990 991 if (end > start) 992 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 993 994 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 995 } 996 } else if (ps->address >= addr + length) 997 break; 998 999 le = le->Flink; 1000 } 1001 1002 ExReleaseResourceLite(&c->partial_stripes_lock); 1003 } 1004 1005 if (context->tree) { 1006 tree_header* th = (tree_header*)buf; 1007 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1008 1009 if (addr != th->address || crc32 != *((UINT32*)th->csum)) { 1010 checksum_error = TRUE; 1011 if (!no_success && !degraded && devices[stripe]) 1012 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1013 } else if (generation != 0 && generation != th->generation) { 1014 checksum_error = TRUE; 1015 if (!no_success && !degraded && devices[stripe]) 1016 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 1017 } 1018 } else if (context->csum) { 1019 #ifdef DEBUG_STATS 1020 LARGE_INTEGER time1, time2; 1021 1022 time1 = KeQueryPerformanceCounter(NULL); 1023 #endif 1024 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 1025 1026 if (Status == STATUS_CRC_ERROR) { 1027 if (!degraded) 1028 WARN("checksum error\n"); 1029 checksum_error = TRUE; 1030 } else if (!NT_SUCCESS(Status)) { 1031 ERR("check_csum returned %08x\n", Status); 1032 return Status; 1033 } 1034 #ifdef DEBUG_STATS 1035 time2 = KeQueryPerformanceCounter(NULL); 1036 1037 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 1038 #endif 1039 } else if (degraded) 1040 checksum_error = TRUE; 1041 1042 if (!checksum_error) 1043 return STATUS_SUCCESS; 1044 1045 if (context->tree) { 1046 UINT8* sector; 1047 UINT16 k, physstripe, parity1, parity2, error_stripe; 1048 UINT64 off; 1049 BOOL recovered = FALSE, failed = FALSE; 1050 ULONG num_errors = 0; 1051 1052 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG); 1053 if (!sector) { 1054 ERR("out of memory\n"); 1055 return STATUS_INSUFFICIENT_RESOURCES; 1056 } 1057 1058 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe); 1059 1060 parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1061 parity2 = (parity1 + 1) % ci->num_stripes; 1062 1063 physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1064 1065 j = (parity2 + 1) % ci->num_stripes; 1066 1067 for (k = 0; k < ci->num_stripes - 1; k++) { 1068 if (j != physstripe) { 1069 if (devices[j] && devices[j]->devobj) { 1070 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, sector + (k * Vcb->superblock.node_size), FALSE); 1071 if (!NT_SUCCESS(Status)) { 1072 ERR("sync_read_phys returned %08x\n", Status); 1073 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1074 num_errors++; 1075 error_stripe = k; 1076 1077 if (num_errors > 1) { 1078 failed = TRUE; 1079 break; 1080 } 1081 } 1082 } else { 1083 num_errors++; 1084 error_stripe = k; 1085 1086 if (num_errors > 1) { 1087 failed = TRUE; 1088 break; 1089 } 1090 } 1091 } 1092 1093 j = (j + 1) % ci->num_stripes; 1094 } 1095 1096 if (!failed) { 1097 if (num_errors == 0) { 1098 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size)); 1099 UINT32 crc32; 1100 1101 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), 1102 Vcb->superblock.node_size); 1103 1104 for (j = 0; j < ci->num_stripes - 2; j++) { 1105 if (j != stripe) 1106 do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size); 1107 } 1108 1109 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1110 1111 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) { 1112 RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1113 1114 if (devices[physstripe] && devices[physstripe]->devobj) 1115 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id); 1116 1117 recovered = TRUE; 1118 1119 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1120 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1121 sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1122 if (!NT_SUCCESS(Status)) { 1123 WARN("write_data_phys returned %08x\n", Status); 1124 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1125 } 1126 } 1127 } 1128 } 1129 1130 if (!recovered) { 1131 UINT32 crc32; 1132 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size)); 1133 BOOL read_q = FALSE; 1134 1135 if (devices[parity2] && devices[parity2]->devobj) { 1136 Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off, 1137 Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), FALSE); 1138 if (!NT_SUCCESS(Status)) { 1139 ERR("sync_read_phys returned %08x\n", Status); 1140 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1141 } else 1142 read_q = TRUE; 1143 } 1144 1145 if (read_q) { 1146 if (num_errors == 1) { 1147 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1148 1149 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1150 1151 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) 1152 recovered = TRUE; 1153 } else { 1154 for (j = 0; j < ci->num_stripes - 1; j++) { 1155 if (j != stripe) { 1156 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1157 1158 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1159 1160 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) { 1161 recovered = TRUE; 1162 error_stripe = j; 1163 break; 1164 } 1165 } 1166 } 1167 } 1168 } 1169 1170 if (recovered) { 1171 UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1172 1173 if (devices[physstripe] && devices[physstripe]->devobj) 1174 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id); 1175 1176 RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1177 1178 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1179 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1180 sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1181 if (!NT_SUCCESS(Status)) { 1182 WARN("write_data_phys returned %08x\n", Status); 1183 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1184 } 1185 } 1186 1187 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1188 if (error_stripe == ci->num_stripes - 2) { 1189 ERR("recovering from parity error at %llx, device %llx\n", addr, devices[error_stripe_phys]->devitem.dev_id); 1190 1191 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1192 1193 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1194 1195 for (j = 0; j < ci->num_stripes - 2; j++) { 1196 if (j == stripe) { 1197 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size), 1198 Vcb->superblock.node_size); 1199 } else { 1200 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), 1201 Vcb->superblock.node_size); 1202 } 1203 } 1204 } else { 1205 ERR("recovering from checksum error at %llx, device %llx\n", addr + ((error_stripe - stripe) * ci->stripe_length), 1206 devices[error_stripe_phys]->devitem.dev_id); 1207 1208 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1209 1210 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size), 1211 sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1212 } 1213 } 1214 1215 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1216 Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off, 1217 sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1218 if (!NT_SUCCESS(Status)) { 1219 WARN("write_data_phys returned %08x\n", Status); 1220 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1221 } 1222 } 1223 } 1224 } 1225 } 1226 1227 if (!recovered) { 1228 ERR("unrecoverable checksum error at %llx\n", addr); 1229 ExFreePool(sector); 1230 return STATUS_CRC_ERROR; 1231 } 1232 1233 ExFreePool(sector); 1234 } else { 1235 ULONG sectors = length / Vcb->superblock.sector_size; 1236 UINT8* sector; 1237 1238 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG); 1239 if (!sector) { 1240 ERR("out of memory\n"); 1241 return STATUS_INSUFFICIENT_RESOURCES; 1242 } 1243 1244 for (i = 0; i < sectors; i++) { 1245 UINT64 off; 1246 UINT16 physstripe, parity1, parity2; 1247 UINT32 crc32; 1248 1249 if (context->csum) 1250 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1251 1252 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 1253 ci->num_stripes - 2, &off, &stripe); 1254 1255 parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1256 parity2 = (parity1 + 1) % ci->num_stripes; 1257 1258 physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1259 1260 if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) { 1261 UINT16 k, error_stripe; 1262 BOOL recovered = FALSE, failed = FALSE; 1263 ULONG num_errors = 0; 1264 1265 if (devices[physstripe] && devices[physstripe]->devobj) 1266 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS); 1267 1268 j = (parity2 + 1) % ci->num_stripes; 1269 1270 for (k = 0; k < ci->num_stripes - 1; k++) { 1271 if (j != physstripe) { 1272 if (devices[j] && devices[j]->devobj) { 1273 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + (k * Vcb->superblock.sector_size), FALSE); 1274 if (!NT_SUCCESS(Status)) { 1275 ERR("sync_read_phys returned %08x\n", Status); 1276 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1277 num_errors++; 1278 error_stripe = k; 1279 1280 if (num_errors > 1) { 1281 failed = TRUE; 1282 break; 1283 } 1284 } 1285 } else { 1286 num_errors++; 1287 error_stripe = k; 1288 1289 if (num_errors > 1) { 1290 failed = TRUE; 1291 break; 1292 } 1293 } 1294 } 1295 1296 j = (j + 1) % ci->num_stripes; 1297 } 1298 1299 if (!failed) { 1300 if (num_errors == 0) { 1301 RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1302 1303 for (j = 0; j < ci->num_stripes - 2; j++) { 1304 if (j != stripe) 1305 do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1306 } 1307 1308 if (context->csum) 1309 crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1310 1311 if (!context->csum || crc32 == context->csum[i]) { 1312 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1313 1314 if (devices[physstripe] && devices[physstripe]->devobj) 1315 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), 1316 devices[physstripe]->devitem.dev_id); 1317 1318 recovered = TRUE; 1319 1320 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1321 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1322 sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1323 if (!NT_SUCCESS(Status)) { 1324 WARN("write_data_phys returned %08x\n", Status); 1325 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1326 } 1327 } 1328 } 1329 } 1330 1331 if (!recovered) { 1332 BOOL read_q = FALSE; 1333 1334 if (devices[parity2] && devices[parity2]->devobj) { 1335 Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off, 1336 Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), FALSE); 1337 if (!NT_SUCCESS(Status)) { 1338 ERR("sync_read_phys returned %08x\n", Status); 1339 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS); 1340 } else 1341 read_q = TRUE; 1342 } 1343 1344 if (read_q) { 1345 if (num_errors == 1) { 1346 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size)); 1347 1348 if (!devices[physstripe] || !devices[physstripe]->devobj) 1349 recovered = TRUE; 1350 else { 1351 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1352 1353 if (crc32 == context->csum[i]) 1354 recovered = TRUE; 1355 } 1356 } else { 1357 for (j = 0; j < ci->num_stripes - 1; j++) { 1358 if (j != stripe) { 1359 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size)); 1360 1361 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1362 1363 if (crc32 == context->csum[i]) { 1364 recovered = TRUE; 1365 error_stripe = j; 1366 break; 1367 } 1368 } 1369 } 1370 } 1371 } 1372 1373 if (recovered) { 1374 UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1375 1376 if (devices[physstripe] && devices[physstripe]->devobj) 1377 ERR("recovering from checksum error at %llx, device %llx\n", 1378 addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id); 1379 1380 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1381 1382 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1383 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1384 sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1385 if (!NT_SUCCESS(Status)) { 1386 WARN("write_data_phys returned %08x\n", Status); 1387 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1388 } 1389 } 1390 1391 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1392 if (error_stripe == ci->num_stripes - 2) { 1393 ERR("recovering from parity error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), 1394 devices[error_stripe_phys]->devitem.dev_id); 1395 1396 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1397 1398 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1399 1400 for (j = 0; j < ci->num_stripes - 2; j++) { 1401 if (j == stripe) { 1402 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), 1403 Vcb->superblock.sector_size); 1404 } else { 1405 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), 1406 Vcb->superblock.sector_size); 1407 } 1408 } 1409 } else { 1410 ERR("recovering from checksum error at %llx, device %llx\n", 1411 addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length), 1412 devices[error_stripe_phys]->devitem.dev_id); 1413 1414 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1415 1416 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size), 1417 sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1418 } 1419 } 1420 1421 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1422 Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off, 1423 sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1424 if (!NT_SUCCESS(Status)) { 1425 WARN("write_data_phys returned %08x\n", Status); 1426 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1427 } 1428 } 1429 } 1430 } 1431 } 1432 1433 if (!recovered) { 1434 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 1435 ExFreePool(sector); 1436 return STATUS_CRC_ERROR; 1437 } 1438 } 1439 } 1440 1441 ExFreePool(sector); 1442 } 1443 1444 return STATUS_SUCCESS; 1445 } 1446 1447 NTSTATUS read_data(_In_ device_extension* Vcb, _In_ UINT64 addr, _In_ UINT32 length, _In_reads_bytes_opt_(length*sizeof(UINT32)/Vcb->superblock.sector_size) UINT32* csum, 1448 _In_ BOOL is_tree, _Out_writes_bytes_(length) UINT8* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ UINT64 generation, _In_ BOOL file_read, 1449 _In_ ULONG priority) { 1450 CHUNK_ITEM* ci; 1451 CHUNK_ITEM_STRIPE* cis; 1452 read_data_context context; 1453 UINT64 type, offset, total_reading = 0; 1454 NTSTATUS Status; 1455 device** devices = NULL; 1456 UINT16 i, startoffstripe, allowed_missing, missing_devices = 0; 1457 UINT8* dummypage = NULL; 1458 PMDL dummy_mdl = NULL; 1459 BOOL need_to_wait; 1460 UINT64 lockaddr, locklen; 1461 #ifdef DEBUG_STATS 1462 LARGE_INTEGER time1, time2; 1463 #endif 1464 1465 if (Vcb->log_to_phys_loaded) { 1466 if (!c) { 1467 c = get_chunk_from_address(Vcb, addr); 1468 1469 if (!c) { 1470 ERR("get_chunk_from_address failed\n"); 1471 return STATUS_INTERNAL_ERROR; 1472 } 1473 } 1474 1475 ci = c->chunk_item; 1476 offset = c->offset; 1477 devices = c->devices; 1478 1479 if (pc) 1480 *pc = c; 1481 } else { 1482 LIST_ENTRY* le = Vcb->sys_chunks.Flink; 1483 1484 ci = NULL; 1485 1486 c = NULL; 1487 while (le != &Vcb->sys_chunks) { 1488 sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry); 1489 1490 if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) { 1491 CHUNK_ITEM* chunk_item = sc->data; 1492 1493 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) { 1494 ci = chunk_item; 1495 offset = sc->key.offset; 1496 cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1]; 1497 1498 devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG); 1499 if (!devices) { 1500 ERR("out of memory\n"); 1501 return STATUS_INSUFFICIENT_RESOURCES; 1502 } 1503 1504 for (i = 0; i < ci->num_stripes; i++) { 1505 devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid); 1506 } 1507 1508 break; 1509 } 1510 } 1511 1512 le = le->Flink; 1513 } 1514 1515 if (!ci) { 1516 ERR("could not find chunk for %llx in bootstrap\n", addr); 1517 return STATUS_INTERNAL_ERROR; 1518 } 1519 1520 if (pc) 1521 *pc = NULL; 1522 } 1523 1524 if (ci->type & BLOCK_FLAG_DUPLICATE) { 1525 type = BLOCK_FLAG_DUPLICATE; 1526 allowed_missing = ci->num_stripes - 1; 1527 } else if (ci->type & BLOCK_FLAG_RAID0) { 1528 type = BLOCK_FLAG_RAID0; 1529 allowed_missing = 0; 1530 } else if (ci->type & BLOCK_FLAG_RAID1) { 1531 type = BLOCK_FLAG_DUPLICATE; 1532 allowed_missing = 1; 1533 } else if (ci->type & BLOCK_FLAG_RAID10) { 1534 type = BLOCK_FLAG_RAID10; 1535 allowed_missing = 1; 1536 } else if (ci->type & BLOCK_FLAG_RAID5) { 1537 type = BLOCK_FLAG_RAID5; 1538 allowed_missing = 1; 1539 } else if (ci->type & BLOCK_FLAG_RAID6) { 1540 type = BLOCK_FLAG_RAID6; 1541 allowed_missing = 2; 1542 } else { // SINGLE 1543 type = BLOCK_FLAG_DUPLICATE; 1544 allowed_missing = 0; 1545 } 1546 1547 cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 1548 1549 RtlZeroMemory(&context, sizeof(read_data_context)); 1550 KeInitializeEvent(&context.Event, NotificationEvent, FALSE); 1551 1552 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG); 1553 if (!context.stripes) { 1554 ERR("out of memory\n"); 1555 return STATUS_INSUFFICIENT_RESOURCES; 1556 } 1557 1558 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) { 1559 get_raid56_lock_range(c, addr, length, &lockaddr, &locklen); 1560 chunk_lock_range(Vcb, c, lockaddr, locklen); 1561 } 1562 1563 RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes); 1564 1565 context.buflen = length; 1566 context.num_stripes = ci->num_stripes; 1567 context.stripes_left = context.num_stripes; 1568 context.sector_size = Vcb->superblock.sector_size; 1569 context.csum = csum; 1570 context.tree = is_tree; 1571 context.type = type; 1572 1573 if (type == BLOCK_FLAG_RAID0) { 1574 UINT64 startoff, endoff; 1575 UINT16 endoffstripe, stripe; 1576 UINT32 *stripeoff, pos; 1577 PMDL master_mdl; 1578 PFN_NUMBER* pfns; 1579 1580 // FIXME - test this still works if page size isn't the same as sector size 1581 1582 // This relies on the fact that MDLs are followed in memory by the page file numbers, 1583 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0 1584 // data for you without doing a memcpy yourself. 1585 // MDLs are officially opaque, so this might very well break in future versions of Windows. 1586 1587 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe); 1588 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe); 1589 1590 if (file_read) { 1591 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL 1592 // with duplicated dummy PFNs, which confuse check_csum. Ah well. 1593 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested. 1594 1595 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1596 1597 if (!context.va) { 1598 ERR("out of memory\n"); 1599 Status = STATUS_INSUFFICIENT_RESOURCES; 1600 goto exit; 1601 } 1602 } else 1603 context.va = buf; 1604 1605 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1606 if (!master_mdl) { 1607 ERR("out of memory\n"); 1608 Status = STATUS_INSUFFICIENT_RESOURCES; 1609 goto exit; 1610 } 1611 1612 Status = STATUS_SUCCESS; 1613 1614 _SEH2_TRY { 1615 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1616 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1617 Status = _SEH2_GetExceptionCode(); 1618 } _SEH2_END; 1619 1620 if (!NT_SUCCESS(Status)) { 1621 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1622 IoFreeMdl(master_mdl); 1623 goto exit; 1624 } 1625 1626 pfns = (PFN_NUMBER*)(master_mdl + 1); 1627 1628 for (i = 0; i < ci->num_stripes; i++) { 1629 if (startoffstripe > i) 1630 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1631 else if (startoffstripe == i) 1632 context.stripes[i].stripestart = startoff; 1633 else 1634 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length); 1635 1636 if (endoffstripe > i) 1637 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1638 else if (endoffstripe == i) 1639 context.stripes[i].stripeend = endoff + 1; 1640 else 1641 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length); 1642 1643 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 1644 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL); 1645 1646 if (!context.stripes[i].mdl) { 1647 ERR("IoAllocateMdl failed\n"); 1648 MmUnlockPages(master_mdl); 1649 IoFreeMdl(master_mdl); 1650 Status = STATUS_INSUFFICIENT_RESOURCES; 1651 goto exit; 1652 } 1653 } 1654 } 1655 1656 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); 1657 if (!stripeoff) { 1658 ERR("out of memory\n"); 1659 MmUnlockPages(master_mdl); 1660 IoFreeMdl(master_mdl); 1661 Status = STATUS_INSUFFICIENT_RESOURCES; 1662 goto exit; 1663 } 1664 1665 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); 1666 1667 pos = 0; 1668 stripe = startoffstripe; 1669 while (pos < length) { 1670 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 1671 1672 if (pos == 0) { 1673 UINT32 readlen = (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)); 1674 1675 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1676 1677 stripeoff[stripe] += readlen; 1678 pos += readlen; 1679 } else if (length - pos < ci->stripe_length) { 1680 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1681 1682 pos = length; 1683 } else { 1684 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1685 1686 stripeoff[stripe] += (UINT32)ci->stripe_length; 1687 pos += (UINT32)ci->stripe_length; 1688 } 1689 1690 stripe = (stripe + 1) % ci->num_stripes; 1691 } 1692 1693 MmUnlockPages(master_mdl); 1694 IoFreeMdl(master_mdl); 1695 1696 ExFreePool(stripeoff); 1697 } else if (type == BLOCK_FLAG_RAID10) { 1698 UINT64 startoff, endoff; 1699 UINT16 endoffstripe, j, stripe; 1700 ULONG orig_ls; 1701 PMDL master_mdl; 1702 PFN_NUMBER* pfns; 1703 UINT32* stripeoff, pos; 1704 read_data_stripe** stripes; 1705 1706 if (c) 1707 orig_ls = c->last_stripe; 1708 else 1709 orig_ls = 0; 1710 1711 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe); 1712 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe); 1713 1714 if ((ci->num_stripes % ci->sub_stripes) != 0) { 1715 ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes); 1716 Status = STATUS_INTERNAL_ERROR; 1717 goto exit; 1718 } 1719 1720 if (file_read) { 1721 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1722 1723 if (!context.va) { 1724 ERR("out of memory\n"); 1725 Status = STATUS_INSUFFICIENT_RESOURCES; 1726 goto exit; 1727 } 1728 } else 1729 context.va = buf; 1730 1731 context.firstoff = (UINT16)((startoff % ci->stripe_length) / Vcb->superblock.sector_size); 1732 context.startoffstripe = startoffstripe; 1733 context.sectors_per_stripe = (UINT16)(ci->stripe_length / Vcb->superblock.sector_size); 1734 1735 startoffstripe *= ci->sub_stripes; 1736 endoffstripe *= ci->sub_stripes; 1737 1738 if (c) 1739 c->last_stripe = (orig_ls + 1) % ci->sub_stripes; 1740 1741 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1742 if (!master_mdl) { 1743 ERR("out of memory\n"); 1744 Status = STATUS_INSUFFICIENT_RESOURCES; 1745 goto exit; 1746 } 1747 1748 Status = STATUS_SUCCESS; 1749 1750 _SEH2_TRY { 1751 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1752 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1753 Status = _SEH2_GetExceptionCode(); 1754 } _SEH2_END; 1755 1756 if (!NT_SUCCESS(Status)) { 1757 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1758 IoFreeMdl(master_mdl); 1759 goto exit; 1760 } 1761 1762 pfns = (PFN_NUMBER*)(master_mdl + 1); 1763 1764 stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1765 if (!stripes) { 1766 ERR("out of memory\n"); 1767 MmUnlockPages(master_mdl); 1768 IoFreeMdl(master_mdl); 1769 Status = STATUS_INSUFFICIENT_RESOURCES; 1770 goto exit; 1771 } 1772 1773 RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes); 1774 1775 for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { 1776 UINT64 sstart, send; 1777 BOOL stripeset = FALSE; 1778 1779 if (startoffstripe > i) 1780 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1781 else if (startoffstripe == i) 1782 sstart = startoff; 1783 else 1784 sstart = startoff - (startoff % ci->stripe_length); 1785 1786 if (endoffstripe > i) 1787 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1788 else if (endoffstripe == i) 1789 send = endoff + 1; 1790 else 1791 send = endoff - (endoff % ci->stripe_length); 1792 1793 for (j = 0; j < ci->sub_stripes; j++) { 1794 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) { 1795 context.stripes[i+j].stripestart = sstart; 1796 context.stripes[i+j].stripeend = send; 1797 stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1798 1799 if (sstart != send) { 1800 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL); 1801 1802 if (!context.stripes[i+j].mdl) { 1803 ERR("IoAllocateMdl failed\n"); 1804 MmUnlockPages(master_mdl); 1805 IoFreeMdl(master_mdl); 1806 Status = STATUS_INSUFFICIENT_RESOURCES; 1807 goto exit; 1808 } 1809 } 1810 1811 stripeset = TRUE; 1812 } else 1813 context.stripes[i+j].status = ReadDataStatus_Skip; 1814 } 1815 1816 if (!stripeset) { 1817 for (j = 0; j < ci->sub_stripes; j++) { 1818 if (devices[i+j] && devices[i+j]->devobj) { 1819 context.stripes[i+j].stripestart = sstart; 1820 context.stripes[i+j].stripeend = send; 1821 context.stripes[i+j].status = ReadDataStatus_Pending; 1822 stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1823 1824 if (sstart != send) { 1825 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL); 1826 1827 if (!context.stripes[i+j].mdl) { 1828 ERR("IoAllocateMdl failed\n"); 1829 MmUnlockPages(master_mdl); 1830 IoFreeMdl(master_mdl); 1831 Status = STATUS_INSUFFICIENT_RESOURCES; 1832 goto exit; 1833 } 1834 } 1835 1836 stripeset = TRUE; 1837 break; 1838 } 1839 } 1840 1841 if (!stripeset) { 1842 ERR("could not find stripe to read\n"); 1843 Status = STATUS_DEVICE_NOT_READY; 1844 goto exit; 1845 } 1846 } 1847 } 1848 1849 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1850 if (!stripeoff) { 1851 ERR("out of memory\n"); 1852 MmUnlockPages(master_mdl); 1853 IoFreeMdl(master_mdl); 1854 Status = STATUS_INSUFFICIENT_RESOURCES; 1855 goto exit; 1856 } 1857 1858 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); 1859 1860 pos = 0; 1861 stripe = startoffstripe / ci->sub_stripes; 1862 while (pos < length) { 1863 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1); 1864 1865 if (pos == 0) { 1866 UINT32 readlen = (UINT32)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart, 1867 ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length)); 1868 1869 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1870 1871 stripeoff[stripe] += readlen; 1872 pos += readlen; 1873 } else if (length - pos < ci->stripe_length) { 1874 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1875 1876 pos = length; 1877 } else { 1878 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1879 1880 stripeoff[stripe] += (ULONG)ci->stripe_length; 1881 pos += (ULONG)ci->stripe_length; 1882 } 1883 1884 stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); 1885 } 1886 1887 MmUnlockPages(master_mdl); 1888 IoFreeMdl(master_mdl); 1889 1890 ExFreePool(stripeoff); 1891 ExFreePool(stripes); 1892 } else if (type == BLOCK_FLAG_DUPLICATE) { 1893 UINT64 orig_ls; 1894 1895 if (c) 1896 orig_ls = i = c->last_stripe; 1897 else 1898 orig_ls = i = 0; 1899 1900 while (!devices[i] || !devices[i]->devobj) { 1901 i = (i + 1) % ci->num_stripes; 1902 1903 if (i == orig_ls) { 1904 ERR("no devices available to service request\n"); 1905 Status = STATUS_DEVICE_NOT_READY; 1906 goto exit; 1907 } 1908 } 1909 1910 if (c) 1911 c->last_stripe = (i + 1) % ci->num_stripes; 1912 1913 context.stripes[i].stripestart = addr - offset; 1914 context.stripes[i].stripeend = context.stripes[i].stripestart + length; 1915 1916 if (file_read) { 1917 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1918 1919 if (!context.va) { 1920 ERR("out of memory\n"); 1921 Status = STATUS_INSUFFICIENT_RESOURCES; 1922 goto exit; 1923 } 1924 1925 context.stripes[i].mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1926 if (!context.stripes[i].mdl) { 1927 ERR("IoAllocateMdl failed\n"); 1928 Status = STATUS_INSUFFICIENT_RESOURCES; 1929 goto exit; 1930 } 1931 1932 MmBuildMdlForNonPagedPool(context.stripes[i].mdl); 1933 } else { 1934 context.stripes[i].mdl = IoAllocateMdl(buf, length, FALSE, FALSE, NULL); 1935 1936 if (!context.stripes[i].mdl) { 1937 ERR("IoAllocateMdl failed\n"); 1938 Status = STATUS_INSUFFICIENT_RESOURCES; 1939 goto exit; 1940 } 1941 1942 Status = STATUS_SUCCESS; 1943 1944 _SEH2_TRY { 1945 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess); 1946 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1947 Status = _SEH2_GetExceptionCode(); 1948 } _SEH2_END; 1949 1950 if (!NT_SUCCESS(Status)) { 1951 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1952 goto exit; 1953 } 1954 } 1955 } else if (type == BLOCK_FLAG_RAID5) { 1956 UINT64 startoff, endoff; 1957 UINT16 endoffstripe, parity; 1958 UINT32 *stripeoff, pos; 1959 PMDL master_mdl; 1960 PFN_NUMBER *pfns, dummy; 1961 BOOL need_dummy = FALSE; 1962 1963 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe); 1964 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe); 1965 1966 if (file_read) { 1967 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1968 1969 if (!context.va) { 1970 ERR("out of memory\n"); 1971 Status = STATUS_INSUFFICIENT_RESOURCES; 1972 goto exit; 1973 } 1974 } else 1975 context.va = buf; 1976 1977 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1978 if (!master_mdl) { 1979 ERR("out of memory\n"); 1980 Status = STATUS_INSUFFICIENT_RESOURCES; 1981 goto exit; 1982 } 1983 1984 Status = STATUS_SUCCESS; 1985 1986 _SEH2_TRY { 1987 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1988 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1989 Status = _SEH2_GetExceptionCode(); 1990 } _SEH2_END; 1991 1992 if (!NT_SUCCESS(Status)) { 1993 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1994 IoFreeMdl(master_mdl); 1995 goto exit; 1996 } 1997 1998 pfns = (PFN_NUMBER*)(master_mdl + 1); 1999 2000 pos = 0; 2001 while (pos < length) { 2002 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 2003 2004 if (pos == 0) { 2005 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes; 2006 ULONG skip, readlen; 2007 2008 i = startoffstripe; 2009 while (stripe != parity) { 2010 if (i == startoffstripe) { 2011 readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length))); 2012 2013 context.stripes[stripe].stripestart = startoff; 2014 context.stripes[stripe].stripeend = startoff + readlen; 2015 2016 pos += readlen; 2017 2018 if (pos == length) 2019 break; 2020 } else { 2021 readlen = min(length - pos, (ULONG)ci->stripe_length); 2022 2023 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2024 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2025 2026 pos += readlen; 2027 2028 if (pos == length) 2029 break; 2030 } 2031 2032 i++; 2033 stripe = (stripe + 1) % ci->num_stripes; 2034 } 2035 2036 if (pos == length) 2037 break; 2038 2039 for (i = 0; i < startoffstripe; i++) { 2040 UINT16 stripe2 = (parity + i + 1) % ci->num_stripes; 2041 2042 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2043 } 2044 2045 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2046 2047 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) { 2048 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1); 2049 2050 for (i = 0; i < ci->num_stripes; i++) { 2051 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2052 } 2053 2054 pos += (UINT32)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length); 2055 need_dummy = TRUE; 2056 } 2057 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2058 for (i = 0; i < ci->num_stripes; i++) { 2059 context.stripes[i].stripeend += ci->stripe_length; 2060 } 2061 2062 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 1)); 2063 need_dummy = TRUE; 2064 } else { 2065 UINT16 stripe = (parity + 1) % ci->num_stripes; 2066 2067 i = 0; 2068 while (stripe != parity) { 2069 if (endoffstripe == i) { 2070 context.stripes[stripe].stripeend = endoff + 1; 2071 break; 2072 } else if (endoffstripe > i) 2073 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2074 2075 i++; 2076 stripe = (stripe + 1) % ci->num_stripes; 2077 } 2078 2079 break; 2080 } 2081 } 2082 2083 for (i = 0; i < ci->num_stripes; i++) { 2084 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2085 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), 2086 FALSE, FALSE, NULL); 2087 2088 if (!context.stripes[i].mdl) { 2089 ERR("IoAllocateMdl failed\n"); 2090 MmUnlockPages(master_mdl); 2091 IoFreeMdl(master_mdl); 2092 Status = STATUS_INSUFFICIENT_RESOURCES; 2093 goto exit; 2094 } 2095 } 2096 } 2097 2098 if (need_dummy) { 2099 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2100 if (!dummypage) { 2101 ERR("out of memory\n"); 2102 MmUnlockPages(master_mdl); 2103 IoFreeMdl(master_mdl); 2104 Status = STATUS_INSUFFICIENT_RESOURCES; 2105 goto exit; 2106 } 2107 2108 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL); 2109 if (!dummy_mdl) { 2110 ERR("IoAllocateMdl failed\n"); 2111 MmUnlockPages(master_mdl); 2112 IoFreeMdl(master_mdl); 2113 Status = STATUS_INSUFFICIENT_RESOURCES; 2114 goto exit; 2115 } 2116 2117 MmBuildMdlForNonPagedPool(dummy_mdl); 2118 2119 dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2120 } 2121 2122 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); 2123 if (!stripeoff) { 2124 ERR("out of memory\n"); 2125 MmUnlockPages(master_mdl); 2126 IoFreeMdl(master_mdl); 2127 Status = STATUS_INSUFFICIENT_RESOURCES; 2128 goto exit; 2129 } 2130 2131 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); 2132 2133 pos = 0; 2134 2135 while (pos < length) { 2136 PFN_NUMBER* stripe_pfns; 2137 2138 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 2139 2140 if (pos == 0) { 2141 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes; 2142 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2143 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2144 2145 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2146 2147 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2148 2149 stripeoff[stripe] = readlen; 2150 pos += readlen; 2151 2152 stripe = (stripe + 1) % ci->num_stripes; 2153 2154 while (stripe != parity) { 2155 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2156 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2157 2158 if (readlen == 0) 2159 break; 2160 2161 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2162 2163 stripeoff[stripe] = readlen; 2164 pos += readlen; 2165 2166 stripe = (stripe + 1) % ci->num_stripes; 2167 } 2168 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2169 UINT16 stripe = (parity + 1) % ci->num_stripes; 2170 ULONG k; 2171 2172 while (stripe != parity) { 2173 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2174 2175 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2176 2177 stripeoff[stripe] += (UINT32)ci->stripe_length; 2178 pos += (UINT32)ci->stripe_length; 2179 2180 stripe = (stripe + 1) % ci->num_stripes; 2181 } 2182 2183 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1); 2184 2185 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2186 stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy; 2187 stripeoff[parity] += PAGE_SIZE; 2188 } 2189 } else { 2190 UINT16 stripe = (parity + 1) % ci->num_stripes; 2191 UINT32 readlen; 2192 2193 while (pos < length) { 2194 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2195 readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2196 2197 if (readlen == 0) 2198 break; 2199 2200 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2201 2202 stripeoff[stripe] += readlen; 2203 pos += readlen; 2204 2205 stripe = (stripe + 1) % ci->num_stripes; 2206 } 2207 } 2208 } 2209 2210 MmUnlockPages(master_mdl); 2211 IoFreeMdl(master_mdl); 2212 2213 ExFreePool(stripeoff); 2214 } else if (type == BLOCK_FLAG_RAID6) { 2215 UINT64 startoff, endoff; 2216 UINT16 endoffstripe, parity1; 2217 UINT32 *stripeoff, pos; 2218 PMDL master_mdl; 2219 PFN_NUMBER *pfns, dummy; 2220 BOOL need_dummy = FALSE; 2221 2222 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe); 2223 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe); 2224 2225 if (file_read) { 2226 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 2227 2228 if (!context.va) { 2229 ERR("out of memory\n"); 2230 Status = STATUS_INSUFFICIENT_RESOURCES; 2231 goto exit; 2232 } 2233 } else 2234 context.va = buf; 2235 2236 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 2237 if (!master_mdl) { 2238 ERR("out of memory\n"); 2239 Status = STATUS_INSUFFICIENT_RESOURCES; 2240 goto exit; 2241 } 2242 2243 Status = STATUS_SUCCESS; 2244 2245 _SEH2_TRY { 2246 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 2247 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2248 Status = _SEH2_GetExceptionCode(); 2249 } _SEH2_END; 2250 2251 if (!NT_SUCCESS(Status)) { 2252 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 2253 IoFreeMdl(master_mdl); 2254 goto exit; 2255 } 2256 2257 pfns = (PFN_NUMBER*)(master_mdl + 1); 2258 2259 pos = 0; 2260 while (pos < length) { 2261 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2262 2263 if (pos == 0) { 2264 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2; 2265 ULONG skip, readlen; 2266 2267 i = startoffstripe; 2268 while (stripe != parity1) { 2269 if (i == startoffstripe) { 2270 readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length)); 2271 2272 context.stripes[stripe].stripestart = startoff; 2273 context.stripes[stripe].stripeend = startoff + readlen; 2274 2275 pos += readlen; 2276 2277 if (pos == length) 2278 break; 2279 } else { 2280 readlen = min(length - pos, (ULONG)ci->stripe_length); 2281 2282 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2283 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2284 2285 pos += readlen; 2286 2287 if (pos == length) 2288 break; 2289 } 2290 2291 i++; 2292 stripe = (stripe + 1) % ci->num_stripes; 2293 } 2294 2295 if (pos == length) 2296 break; 2297 2298 for (i = 0; i < startoffstripe; i++) { 2299 UINT16 stripe2 = (parity1 + i + 2) % ci->num_stripes; 2300 2301 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2302 } 2303 2304 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2305 2306 parity2 = (parity1 + 1) % ci->num_stripes; 2307 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2308 2309 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) { 2310 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1); 2311 2312 for (i = 0; i < ci->num_stripes; i++) { 2313 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2314 } 2315 2316 pos += (UINT32)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length); 2317 need_dummy = TRUE; 2318 } 2319 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2320 for (i = 0; i < ci->num_stripes; i++) { 2321 context.stripes[i].stripeend += ci->stripe_length; 2322 } 2323 2324 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 2)); 2325 need_dummy = TRUE; 2326 } else { 2327 UINT16 stripe = (parity1 + 2) % ci->num_stripes; 2328 2329 i = 0; 2330 while (stripe != parity1) { 2331 if (endoffstripe == i) { 2332 context.stripes[stripe].stripeend = endoff + 1; 2333 break; 2334 } else if (endoffstripe > i) 2335 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2336 2337 i++; 2338 stripe = (stripe + 1) % ci->num_stripes; 2339 } 2340 2341 break; 2342 } 2343 } 2344 2345 for (i = 0; i < ci->num_stripes; i++) { 2346 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2347 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL); 2348 2349 if (!context.stripes[i].mdl) { 2350 ERR("IoAllocateMdl failed\n"); 2351 MmUnlockPages(master_mdl); 2352 IoFreeMdl(master_mdl); 2353 Status = STATUS_INSUFFICIENT_RESOURCES; 2354 goto exit; 2355 } 2356 } 2357 } 2358 2359 if (need_dummy) { 2360 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2361 if (!dummypage) { 2362 ERR("out of memory\n"); 2363 MmUnlockPages(master_mdl); 2364 IoFreeMdl(master_mdl); 2365 Status = STATUS_INSUFFICIENT_RESOURCES; 2366 goto exit; 2367 } 2368 2369 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL); 2370 if (!dummy_mdl) { 2371 ERR("IoAllocateMdl failed\n"); 2372 MmUnlockPages(master_mdl); 2373 IoFreeMdl(master_mdl); 2374 Status = STATUS_INSUFFICIENT_RESOURCES; 2375 goto exit; 2376 } 2377 2378 MmBuildMdlForNonPagedPool(dummy_mdl); 2379 2380 dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2381 } 2382 2383 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); 2384 if (!stripeoff) { 2385 ERR("out of memory\n"); 2386 MmUnlockPages(master_mdl); 2387 IoFreeMdl(master_mdl); 2388 Status = STATUS_INSUFFICIENT_RESOURCES; 2389 goto exit; 2390 } 2391 2392 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); 2393 2394 pos = 0; 2395 2396 while (pos < length) { 2397 PFN_NUMBER* stripe_pfns; 2398 2399 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2400 2401 if (pos == 0) { 2402 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes; 2403 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2404 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2405 2406 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2407 2408 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2409 2410 stripeoff[stripe] = readlen; 2411 pos += readlen; 2412 2413 stripe = (stripe + 1) % ci->num_stripes; 2414 2415 while (stripe != parity1) { 2416 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2417 readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2418 2419 if (readlen == 0) 2420 break; 2421 2422 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2423 2424 stripeoff[stripe] = readlen; 2425 pos += readlen; 2426 2427 stripe = (stripe + 1) % ci->num_stripes; 2428 } 2429 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2430 UINT16 stripe = (parity1 + 2) % ci->num_stripes; 2431 UINT16 parity2 = (parity1 + 1) % ci->num_stripes; 2432 ULONG k; 2433 2434 while (stripe != parity1) { 2435 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2436 2437 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2438 2439 stripeoff[stripe] += (UINT32)ci->stripe_length; 2440 pos += (UINT32)ci->stripe_length; 2441 2442 stripe = (stripe + 1) % ci->num_stripes; 2443 } 2444 2445 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1); 2446 2447 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2448 stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy; 2449 stripeoff[parity1] += PAGE_SIZE; 2450 } 2451 2452 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1); 2453 2454 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2455 stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy; 2456 stripeoff[parity2] += PAGE_SIZE; 2457 } 2458 } else { 2459 UINT16 stripe = (parity1 + 2) % ci->num_stripes; 2460 UINT32 readlen; 2461 2462 while (pos < length) { 2463 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2464 readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2465 2466 if (readlen == 0) 2467 break; 2468 2469 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2470 2471 stripeoff[stripe] += readlen; 2472 pos += readlen; 2473 2474 stripe = (stripe + 1) % ci->num_stripes; 2475 } 2476 } 2477 } 2478 2479 MmUnlockPages(master_mdl); 2480 IoFreeMdl(master_mdl); 2481 2482 ExFreePool(stripeoff); 2483 } 2484 2485 context.address = addr; 2486 2487 for (i = 0; i < ci->num_stripes; i++) { 2488 if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) { 2489 context.stripes[i].status = ReadDataStatus_MissingDevice; 2490 context.stripes_left--; 2491 2492 if (!devices[i] || !devices[i]->devobj) 2493 missing_devices++; 2494 } 2495 } 2496 2497 if (missing_devices > allowed_missing) { 2498 ERR("not enough devices to service request (%u missing)\n", missing_devices); 2499 Status = STATUS_UNEXPECTED_IO_ERROR; 2500 goto exit; 2501 } 2502 2503 for (i = 0; i < ci->num_stripes; i++) { 2504 PIO_STACK_LOCATION IrpSp; 2505 2506 if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) { 2507 context.stripes[i].context = (struct read_data_context*)&context; 2508 2509 if (type == BLOCK_FLAG_RAID10) { 2510 context.stripes[i].stripenum = i / ci->sub_stripes; 2511 } 2512 2513 if (!Irp) { 2514 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE); 2515 2516 if (!context.stripes[i].Irp) { 2517 ERR("IoAllocateIrp failed\n"); 2518 Status = STATUS_INSUFFICIENT_RESOURCES; 2519 goto exit; 2520 } 2521 } else { 2522 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); 2523 2524 if (!context.stripes[i].Irp) { 2525 ERR("IoMakeAssociatedIrp failed\n"); 2526 Status = STATUS_INSUFFICIENT_RESOURCES; 2527 goto exit; 2528 } 2529 } 2530 2531 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 2532 IrpSp->MajorFunction = IRP_MJ_READ; 2533 2534 if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { 2535 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG); 2536 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 2537 ERR("out of memory\n"); 2538 Status = STATUS_INSUFFICIENT_RESOURCES; 2539 goto exit; 2540 } 2541 2542 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 2543 2544 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2545 } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) 2546 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl; 2547 else 2548 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2549 2550 IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart); 2551 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset; 2552 2553 total_reading += IrpSp->Parameters.Read.Length; 2554 2555 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 2556 2557 IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], TRUE, TRUE, TRUE); 2558 2559 context.stripes[i].status = ReadDataStatus_Pending; 2560 } 2561 } 2562 2563 #ifdef DEBUG_STATS 2564 if (!is_tree) 2565 time1 = KeQueryPerformanceCounter(NULL); 2566 #endif 2567 2568 need_to_wait = FALSE; 2569 for (i = 0; i < ci->num_stripes; i++) { 2570 if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) { 2571 IoCallDriver(devices[i]->devobj, context.stripes[i].Irp); 2572 need_to_wait = TRUE; 2573 } 2574 } 2575 2576 if (need_to_wait) 2577 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); 2578 2579 #ifdef DEBUG_STATS 2580 if (!is_tree) { 2581 time2 = KeQueryPerformanceCounter(NULL); 2582 2583 Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; 2584 } 2585 #endif 2586 2587 if (diskacc) 2588 fFsRtlUpdateDiskCounters(total_reading, 0); 2589 2590 // check if any of the devices return a "user-induced" error 2591 2592 for (i = 0; i < ci->num_stripes; i++) { 2593 if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) { 2594 Status = context.stripes[i].iosb.Status; 2595 goto exit; 2596 } 2597 } 2598 2599 if (type == BLOCK_FLAG_RAID0) { 2600 Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2601 if (!NT_SUCCESS(Status)) { 2602 ERR("read_data_raid0 returned %08x\n", Status); 2603 2604 if (file_read) 2605 ExFreePool(context.va); 2606 2607 goto exit; 2608 } 2609 2610 if (file_read) { 2611 RtlCopyMemory(buf, context.va, length); 2612 ExFreePool(context.va); 2613 } 2614 } else if (type == BLOCK_FLAG_RAID10) { 2615 Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2616 2617 if (!NT_SUCCESS(Status)) { 2618 ERR("read_data_raid10 returned %08x\n", Status); 2619 2620 if (file_read) 2621 ExFreePool(context.va); 2622 2623 goto exit; 2624 } 2625 2626 if (file_read) { 2627 RtlCopyMemory(buf, context.va, length); 2628 ExFreePool(context.va); 2629 } 2630 } else if (type == BLOCK_FLAG_DUPLICATE) { 2631 Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation); 2632 if (!NT_SUCCESS(Status)) { 2633 ERR("read_data_dup returned %08x\n", Status); 2634 2635 if (file_read) 2636 ExFreePool(context.va); 2637 2638 goto exit; 2639 } 2640 2641 if (file_read) { 2642 RtlCopyMemory(buf, context.va, length); 2643 ExFreePool(context.va); 2644 } 2645 } else if (type == BLOCK_FLAG_RAID5) { 2646 Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE); 2647 if (!NT_SUCCESS(Status)) { 2648 ERR("read_data_raid5 returned %08x\n", Status); 2649 2650 if (file_read) 2651 ExFreePool(context.va); 2652 2653 goto exit; 2654 } 2655 2656 if (file_read) { 2657 RtlCopyMemory(buf, context.va, length); 2658 ExFreePool(context.va); 2659 } 2660 } else if (type == BLOCK_FLAG_RAID6) { 2661 Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE); 2662 if (!NT_SUCCESS(Status)) { 2663 ERR("read_data_raid6 returned %08x\n", Status); 2664 2665 if (file_read) 2666 ExFreePool(context.va); 2667 2668 goto exit; 2669 } 2670 2671 if (file_read) { 2672 RtlCopyMemory(buf, context.va, length); 2673 ExFreePool(context.va); 2674 } 2675 } 2676 2677 exit: 2678 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) 2679 chunk_unlock_range(Vcb, c, lockaddr, locklen); 2680 2681 if (dummy_mdl) 2682 IoFreeMdl(dummy_mdl); 2683 2684 if (dummypage) 2685 ExFreePool(dummypage); 2686 2687 for (i = 0; i < ci->num_stripes; i++) { 2688 if (context.stripes[i].mdl) { 2689 if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED) 2690 MmUnlockPages(context.stripes[i].mdl); 2691 2692 IoFreeMdl(context.stripes[i].mdl); 2693 } 2694 2695 if (context.stripes[i].Irp) 2696 IoFreeIrp(context.stripes[i].Irp); 2697 } 2698 2699 ExFreePool(context.stripes); 2700 2701 if (!Vcb->log_to_phys_loaded) 2702 ExFreePool(devices); 2703 2704 return Status; 2705 } 2706 2707 NTSTATUS read_stream(fcb* fcb, UINT8* data, UINT64 start, ULONG length, ULONG* pbr) { 2708 ULONG readlen; 2709 2710 TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr); 2711 2712 if (pbr) *pbr = 0; 2713 2714 if (start >= fcb->adsdata.Length) { 2715 TRACE("tried to read beyond end of stream\n"); 2716 return STATUS_END_OF_FILE; 2717 } 2718 2719 if (length == 0) { 2720 WARN("tried to read zero bytes\n"); 2721 return STATUS_SUCCESS; 2722 } 2723 2724 if (start + length < fcb->adsdata.Length) 2725 readlen = length; 2726 else 2727 readlen = fcb->adsdata.Length - (ULONG)start; 2728 2729 if (readlen > 0) 2730 RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen); 2731 2732 if (pbr) *pbr = readlen; 2733 2734 return STATUS_SUCCESS; 2735 } 2736 2737 NTSTATUS read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp) { 2738 NTSTATUS Status; 2739 EXTENT_DATA* ed; 2740 UINT32 bytes_read = 0; 2741 UINT64 last_end; 2742 LIST_ENTRY* le; 2743 #ifdef DEBUG_STATS 2744 LARGE_INTEGER time1, time2; 2745 #endif 2746 2747 TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr); 2748 2749 if (pbr) 2750 *pbr = 0; 2751 2752 if (start >= fcb->inode_item.st_size) { 2753 WARN("Tried to read beyond end of file\n"); 2754 Status = STATUS_END_OF_FILE; 2755 goto exit; 2756 } 2757 2758 #ifdef DEBUG_STATS 2759 time1 = KeQueryPerformanceCounter(NULL); 2760 #endif 2761 2762 le = fcb->extents.Flink; 2763 2764 last_end = start; 2765 2766 while (le != &fcb->extents) { 2767 UINT64 len; 2768 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 2769 EXTENT_DATA2* ed2; 2770 2771 if (!ext->ignore) { 2772 ed = &ext->extent_data; 2773 2774 ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL; 2775 2776 len = ed2 ? ed2->num_bytes : ed->decoded_size; 2777 2778 if (ext->offset + len <= start) { 2779 last_end = ext->offset + len; 2780 goto nextitem; 2781 } 2782 2783 if (ext->offset > last_end && ext->offset > start + bytes_read) { 2784 UINT32 read = (UINT32)min(length, ext->offset - max(start, last_end)); 2785 2786 RtlZeroMemory(data + bytes_read, read); 2787 bytes_read += read; 2788 length -= read; 2789 } 2790 2791 if (length == 0 || ext->offset > start + bytes_read + length) 2792 break; 2793 2794 if (ed->encryption != BTRFS_ENCRYPTION_NONE) { 2795 WARN("Encryption not supported\n"); 2796 Status = STATUS_NOT_IMPLEMENTED; 2797 goto exit; 2798 } 2799 2800 if (ed->encoding != BTRFS_ENCODING_NONE) { 2801 WARN("Other encodings not supported\n"); 2802 Status = STATUS_NOT_IMPLEMENTED; 2803 goto exit; 2804 } 2805 2806 switch (ed->type) { 2807 case EXTENT_TYPE_INLINE: 2808 { 2809 UINT64 off = start + bytes_read - ext->offset; 2810 UINT32 read; 2811 2812 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2813 read = (UINT32)min(min(len, ext->datalen) - off, length); 2814 2815 RtlCopyMemory(data + bytes_read, &ed->data[off], read); 2816 } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) { 2817 UINT8* decomp; 2818 BOOL decomp_alloc; 2819 UINT16 inlen = ext->datalen - (UINT16)offsetof(EXTENT_DATA, data[0]); 2820 2821 if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) { 2822 ERR("ed->decoded_size was invalid (%llx)\n", ed->decoded_size); 2823 Status = STATUS_INTERNAL_ERROR; 2824 goto exit; 2825 } 2826 2827 read = (UINT32)min(ed->decoded_size - off, length); 2828 2829 if (off > 0) { 2830 decomp = ExAllocatePoolWithTag(NonPagedPool, (UINT32)ed->decoded_size, ALLOC_TAG); 2831 if (!decomp) { 2832 ERR("out of memory\n"); 2833 Status = STATUS_INSUFFICIENT_RESOURCES; 2834 goto exit; 2835 } 2836 2837 decomp_alloc = TRUE; 2838 } else { 2839 decomp = data + bytes_read; 2840 decomp_alloc = FALSE; 2841 } 2842 2843 if (ed->compression == BTRFS_COMPRESSION_ZLIB) { 2844 Status = zlib_decompress(ed->data, inlen, decomp, (UINT32)(read + off)); 2845 if (!NT_SUCCESS(Status)) { 2846 ERR("zlib_decompress returned %08x\n", Status); 2847 if (decomp_alloc) ExFreePool(decomp); 2848 goto exit; 2849 } 2850 } else if (ed->compression == BTRFS_COMPRESSION_LZO) { 2851 if (inlen < sizeof(UINT32)) { 2852 ERR("extent data was truncated\n"); 2853 Status = STATUS_INTERNAL_ERROR; 2854 if (decomp_alloc) ExFreePool(decomp); 2855 goto exit; 2856 } else 2857 inlen -= sizeof(UINT32); 2858 2859 Status = lzo_decompress(ed->data + sizeof(UINT32), inlen, decomp, (UINT32)(read + off), sizeof(UINT32)); 2860 if (!NT_SUCCESS(Status)) { 2861 ERR("lzo_decompress returned %08x\n", Status); 2862 if (decomp_alloc) ExFreePool(decomp); 2863 goto exit; 2864 } 2865 } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) { 2866 Status = zstd_decompress(ed->data, inlen, decomp, (UINT32)(read + off)); 2867 if (!NT_SUCCESS(Status)) { 2868 ERR("zstd_decompress returned %08x\n", Status); 2869 if (decomp_alloc) ExFreePool(decomp); 2870 goto exit; 2871 } 2872 } 2873 2874 if (decomp_alloc) { 2875 RtlCopyMemory(data + bytes_read, decomp + off, read); 2876 ExFreePool(decomp); 2877 } 2878 } else { 2879 ERR("unhandled compression type %x\n", ed->compression); 2880 Status = STATUS_NOT_IMPLEMENTED; 2881 goto exit; 2882 } 2883 2884 bytes_read += read; 2885 length -= read; 2886 2887 break; 2888 } 2889 2890 case EXTENT_TYPE_REGULAR: 2891 { 2892 UINT64 off = start + bytes_read - ext->offset; 2893 UINT32 to_read, read; 2894 UINT8* buf; 2895 BOOL mdl = (Irp && Irp->MdlAddress) ? TRUE : FALSE; 2896 BOOL buf_free; 2897 UINT32 bumpoff = 0, *csum; 2898 UINT64 addr; 2899 chunk* c; 2900 2901 read = (UINT32)(len - off); 2902 if (read > length) read = (UINT32)length; 2903 2904 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2905 addr = ed2->address + ed2->offset + off; 2906 to_read = (UINT32)sector_align(read, fcb->Vcb->superblock.sector_size); 2907 2908 if (addr % fcb->Vcb->superblock.sector_size > 0) { 2909 bumpoff = addr % fcb->Vcb->superblock.sector_size; 2910 addr -= bumpoff; 2911 to_read = (UINT32)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size); 2912 } 2913 } else { 2914 addr = ed2->address; 2915 to_read = (UINT32)sector_align(ed2->size, fcb->Vcb->superblock.sector_size); 2916 } 2917 2918 if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 && 2919 length % fcb->Vcb->superblock.sector_size == 0) { 2920 buf = data + bytes_read; 2921 buf_free = FALSE; 2922 } else { 2923 buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG); 2924 buf_free = TRUE; 2925 2926 if (!buf) { 2927 ERR("out of memory\n"); 2928 Status = STATUS_INSUFFICIENT_RESOURCES; 2929 goto exit; 2930 } 2931 2932 mdl = FALSE; 2933 } 2934 2935 c = get_chunk_from_address(fcb->Vcb, addr); 2936 2937 if (!c) { 2938 ERR("get_chunk_from_address(%llx) failed\n", addr); 2939 2940 if (buf_free) 2941 ExFreePool(buf); 2942 2943 goto exit; 2944 } 2945 2946 if (ext->csum) { 2947 if (ed->compression == BTRFS_COMPRESSION_NONE) 2948 csum = &ext->csum[off / fcb->Vcb->superblock.sector_size]; 2949 else 2950 csum = ext->csum; 2951 } else 2952 csum = NULL; 2953 2954 Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp, 0, mdl, 2955 fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 2956 if (!NT_SUCCESS(Status)) { 2957 ERR("read_data returned %08x\n", Status); 2958 2959 if (buf_free) 2960 ExFreePool(buf); 2961 2962 goto exit; 2963 } 2964 2965 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2966 if (buf_free) 2967 RtlCopyMemory(data + bytes_read, buf + bumpoff, read); 2968 } else { 2969 UINT8 *decomp = NULL, *buf2; 2970 ULONG outlen, inlen, off2; 2971 UINT32 inpageoff = 0; 2972 2973 off2 = (ULONG)(ed2->offset + off); 2974 buf2 = buf; 2975 inlen = (ULONG)ed2->size; 2976 2977 if (ed->compression == BTRFS_COMPRESSION_LZO) { 2978 ULONG inoff = sizeof(UINT32); 2979 2980 inlen -= sizeof(UINT32); 2981 2982 // If reading a few sectors in, skip to the interesting bit 2983 while (off2 > LINUX_PAGE_SIZE) { 2984 UINT32 partlen; 2985 2986 if (inlen < sizeof(UINT32)) 2987 break; 2988 2989 partlen = *(UINT32*)(buf2 + inoff); 2990 2991 if (partlen < inlen) { 2992 off2 -= LINUX_PAGE_SIZE; 2993 inoff += partlen + sizeof(UINT32); 2994 inlen -= partlen + sizeof(UINT32); 2995 2996 if (LINUX_PAGE_SIZE - (inoff % LINUX_PAGE_SIZE) < sizeof(UINT32)) 2997 inoff = ((inoff / LINUX_PAGE_SIZE) + 1) * LINUX_PAGE_SIZE; 2998 } else 2999 break; 3000 } 3001 3002 buf2 = &buf2[inoff]; 3003 inpageoff = inoff % LINUX_PAGE_SIZE; 3004 } 3005 3006 if (off2 != 0) { 3007 outlen = off2 + min(read, (UINT32)(ed2->num_bytes - off)); 3008 3009 decomp = ExAllocatePoolWithTag(PagedPool, outlen, ALLOC_TAG); 3010 if (!decomp) { 3011 ERR("out of memory\n"); 3012 ExFreePool(buf); 3013 Status = STATUS_INSUFFICIENT_RESOURCES; 3014 goto exit; 3015 } 3016 } else 3017 outlen = min(read, (UINT32)(ed2->num_bytes - off)); 3018 3019 if (ed->compression == BTRFS_COMPRESSION_ZLIB) { 3020 Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen); 3021 3022 if (!NT_SUCCESS(Status)) { 3023 ERR("zlib_decompress returned %08x\n", Status); 3024 ExFreePool(buf); 3025 3026 if (decomp) 3027 ExFreePool(decomp); 3028 3029 goto exit; 3030 } 3031 } else if (ed->compression == BTRFS_COMPRESSION_LZO) { 3032 Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff); 3033 3034 if (!NT_SUCCESS(Status)) { 3035 ERR("lzo_decompress returned %08x\n", Status); 3036 ExFreePool(buf); 3037 3038 if (decomp) 3039 ExFreePool(decomp); 3040 3041 goto exit; 3042 } 3043 } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) { 3044 Status = zstd_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen); 3045 3046 if (!NT_SUCCESS(Status)) { 3047 ERR("zstd_decompress returned %08x\n", Status); 3048 ExFreePool(buf); 3049 3050 if (decomp) 3051 ExFreePool(decomp); 3052 3053 goto exit; 3054 } 3055 } else { 3056 ERR("unsupported compression type %x\n", ed->compression); 3057 Status = STATUS_NOT_SUPPORTED; 3058 3059 ExFreePool(buf); 3060 3061 if (decomp) 3062 ExFreePool(decomp); 3063 3064 goto exit; 3065 } 3066 3067 if (decomp) { 3068 RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off)); 3069 ExFreePool(decomp); 3070 } 3071 } 3072 3073 if (buf_free) 3074 ExFreePool(buf); 3075 3076 bytes_read += read; 3077 length -= read; 3078 3079 break; 3080 } 3081 3082 case EXTENT_TYPE_PREALLOC: 3083 { 3084 UINT64 off = start + bytes_read - ext->offset; 3085 UINT32 read = (UINT32)(len - off); 3086 3087 if (read > length) read = (UINT32)length; 3088 3089 RtlZeroMemory(data + bytes_read, read); 3090 3091 bytes_read += read; 3092 length -= read; 3093 3094 break; 3095 } 3096 3097 default: 3098 WARN("Unsupported extent data type %u\n", ed->type); 3099 Status = STATUS_NOT_IMPLEMENTED; 3100 goto exit; 3101 } 3102 3103 last_end = ext->offset + len; 3104 3105 if (length == 0) 3106 break; 3107 } 3108 3109 nextitem: 3110 le = le->Flink; 3111 } 3112 3113 if (length > 0 && start + bytes_read < fcb->inode_item.st_size) { 3114 UINT32 read = (UINT32)min(fcb->inode_item.st_size - start - bytes_read, length); 3115 3116 RtlZeroMemory(data + bytes_read, read); 3117 3118 bytes_read += read; 3119 length -= read; 3120 } 3121 3122 Status = STATUS_SUCCESS; 3123 if (pbr) 3124 *pbr = bytes_read; 3125 3126 #ifdef DEBUG_STATS 3127 time2 = KeQueryPerformanceCounter(NULL); 3128 3129 fcb->Vcb->stats.num_reads++; 3130 fcb->Vcb->stats.data_read += bytes_read; 3131 fcb->Vcb->stats.read_total_time += time2.QuadPart - time1.QuadPart; 3132 #endif 3133 3134 exit: 3135 return Status; 3136 } 3137 3138 NTSTATUS do_read(PIRP Irp, BOOLEAN wait, ULONG* bytes_read) { 3139 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3140 PFILE_OBJECT FileObject = IrpSp->FileObject; 3141 fcb* fcb = FileObject->FsContext; 3142 UINT8* data = NULL; 3143 ULONG length = IrpSp->Parameters.Read.Length, addon = 0; 3144 UINT64 start = IrpSp->Parameters.Read.ByteOffset.QuadPart; 3145 3146 *bytes_read = 0; 3147 3148 if (!fcb || !fcb->Vcb || !fcb->subvol) 3149 return STATUS_INTERNAL_ERROR; 3150 3151 TRACE("file = %S (fcb = %p)\n", file_desc(FileObject), fcb); 3152 TRACE("offset = %llx, length = %x\n", start, length); 3153 TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE", Irp->Flags & IRP_NOCACHE ? "TRUE" : "FALSE"); 3154 3155 if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY) 3156 return STATUS_INVALID_DEVICE_REQUEST; 3157 3158 if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) { 3159 WARN("tried to read locked region\n"); 3160 return STATUS_FILE_LOCK_CONFLICT; 3161 } 3162 3163 if (length == 0) { 3164 TRACE("tried to read zero bytes\n"); 3165 return STATUS_SUCCESS; 3166 } 3167 3168 if (start >= (UINT64)fcb->Header.FileSize.QuadPart) { 3169 TRACE("tried to read with offset after file end (%llx >= %llx)\n", start, fcb->Header.FileSize.QuadPart); 3170 return STATUS_END_OF_FILE; 3171 } 3172 3173 TRACE("FileObject %p fcb %p FileSize = %llx st_size = %llx (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size); 3174 3175 if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) { 3176 data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 3177 3178 if (Irp->MdlAddress && !data) { 3179 ERR("MmGetSystemAddressForMdlSafe returned NULL\n"); 3180 return STATUS_INSUFFICIENT_RESOURCES; 3181 } 3182 3183 if (start >= (UINT64)fcb->Header.ValidDataLength.QuadPart) { 3184 length = (ULONG)min(length, min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3185 RtlZeroMemory(data, length); 3186 Irp->IoStatus.Information = *bytes_read = length; 3187 return STATUS_SUCCESS; 3188 } 3189 3190 if (length + start > (UINT64)fcb->Header.ValidDataLength.QuadPart) { 3191 addon = (ULONG)(min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3192 RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon); 3193 length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start); 3194 } 3195 } 3196 3197 if (!(Irp->Flags & IRP_NOCACHE)) { 3198 NTSTATUS Status = STATUS_SUCCESS; 3199 3200 _SEH2_TRY { 3201 if (!FileObject->PrivateCacheMap) { 3202 CC_FILE_SIZES ccfs; 3203 3204 ccfs.AllocationSize = fcb->Header.AllocationSize; 3205 ccfs.FileSize = fcb->Header.FileSize; 3206 ccfs.ValidDataLength = fcb->Header.ValidDataLength; 3207 3208 init_file_cache(FileObject, &ccfs); 3209 } 3210 3211 if (IrpSp->MinorFunction & IRP_MN_MDL) { 3212 CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus); 3213 } else { 3214 if (fCcCopyReadEx) { 3215 TRACE("CcCopyReadEx(%p, %llx, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, 3216 length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread); 3217 TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); 3218 if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) { 3219 TRACE("CcCopyReadEx could not wait\n"); 3220 3221 IoMarkIrpPending(Irp); 3222 return STATUS_PENDING; 3223 } 3224 TRACE("CcCopyReadEx finished\n"); 3225 } else { 3226 TRACE("CcCopyRead(%p, %llx, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus); 3227 TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); 3228 if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) { 3229 TRACE("CcCopyRead could not wait\n"); 3230 3231 IoMarkIrpPending(Irp); 3232 return STATUS_PENDING; 3233 } 3234 TRACE("CcCopyRead finished\n"); 3235 } 3236 } 3237 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 3238 Status = _SEH2_GetExceptionCode(); 3239 } _SEH2_END; 3240 3241 if (NT_SUCCESS(Status)) { 3242 Status = Irp->IoStatus.Status; 3243 Irp->IoStatus.Information += addon; 3244 *bytes_read = (ULONG)Irp->IoStatus.Information; 3245 } else 3246 ERR("EXCEPTION - %08x\n", Status); 3247 3248 return Status; 3249 } else { 3250 NTSTATUS Status; 3251 3252 if (!wait) { 3253 IoMarkIrpPending(Irp); 3254 return STATUS_PENDING; 3255 } 3256 3257 if (fcb->ads) 3258 Status = read_stream(fcb, data, start, length, bytes_read); 3259 else 3260 Status = read_file(fcb, data, start, length, bytes_read, Irp); 3261 3262 *bytes_read += addon; 3263 TRACE("read %u bytes\n", *bytes_read); 3264 3265 Irp->IoStatus.Information = *bytes_read; 3266 3267 if (diskacc && Status != STATUS_PENDING) { 3268 PETHREAD thread = NULL; 3269 3270 if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread)) 3271 thread = Irp->Tail.Overlay.Thread; 3272 else if (!IoIsSystemThread(PsGetCurrentThread())) 3273 thread = PsGetCurrentThread(); 3274 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp) 3275 thread = PsGetCurrentThread(); 3276 3277 if (thread) 3278 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0); 3279 } 3280 3281 return Status; 3282 } 3283 } 3284 3285 _Dispatch_type_(IRP_MJ_READ) 3286 _Function_class_(DRIVER_DISPATCH) 3287 NTSTATUS NTAPI drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { 3288 device_extension* Vcb = DeviceObject->DeviceExtension; 3289 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3290 PFILE_OBJECT FileObject = IrpSp->FileObject; 3291 ULONG bytes_read = 0; 3292 NTSTATUS Status; 3293 BOOL top_level; 3294 fcb* fcb; 3295 ccb* ccb; 3296 BOOLEAN fcb_lock = FALSE, wait; 3297 3298 FsRtlEnterFileSystem(); 3299 3300 top_level = is_top_level(Irp); 3301 3302 TRACE("read\n"); 3303 3304 if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { 3305 Status = vol_read(DeviceObject, Irp); 3306 goto exit2; 3307 } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { 3308 Status = STATUS_INVALID_PARAMETER; 3309 goto end; 3310 } 3311 3312 Irp->IoStatus.Information = 0; 3313 3314 if (IrpSp->MinorFunction & IRP_MN_COMPLETE) { 3315 CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress); 3316 3317 Irp->MdlAddress = NULL; 3318 Status = STATUS_SUCCESS; 3319 3320 goto exit; 3321 } 3322 3323 fcb = FileObject->FsContext; 3324 3325 if (!fcb) { 3326 ERR("fcb was NULL\n"); 3327 Status = STATUS_INVALID_PARAMETER; 3328 goto exit; 3329 } 3330 3331 ccb = FileObject->FsContext2; 3332 3333 if (!ccb) { 3334 ERR("ccb was NULL\n"); 3335 Status = STATUS_INVALID_PARAMETER; 3336 goto exit; 3337 } 3338 3339 if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) { 3340 WARN("insufficient privileges\n"); 3341 Status = STATUS_ACCESS_DENIED; 3342 goto exit; 3343 } 3344 3345 if (fcb == Vcb->volume_fcb) { 3346 TRACE("reading volume FCB\n"); 3347 3348 IoSkipCurrentIrpStackLocation(Irp); 3349 3350 Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); 3351 3352 goto exit2; 3353 } 3354 3355 wait = IoIsOperationSynchronous(Irp); 3356 3357 // Don't offload jobs when doing paging IO - otherwise this can lead to 3358 // deadlocks in CcCopyRead. 3359 if (Irp->Flags & IRP_PAGING_IO) 3360 wait = TRUE; 3361 3362 if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer->DataSectionObject) { 3363 IO_STATUS_BLOCK iosb; 3364 3365 CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb); 3366 if (!NT_SUCCESS(iosb.Status)) { 3367 ERR("CcFlushCache returned %08x\n", iosb.Status); 3368 return iosb.Status; 3369 } 3370 } 3371 3372 if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) { 3373 if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { 3374 Status = STATUS_PENDING; 3375 IoMarkIrpPending(Irp); 3376 goto exit; 3377 } 3378 3379 fcb_lock = TRUE; 3380 } 3381 3382 Status = do_read(Irp, wait, &bytes_read); 3383 3384 if (fcb_lock) 3385 ExReleaseResourceLite(fcb->Header.Resource); 3386 3387 exit: 3388 if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO)) 3389 FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0); 3390 3391 end: 3392 Irp->IoStatus.Status = Status; 3393 3394 TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status); 3395 TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information); 3396 TRACE("returning %08x\n", Status); 3397 3398 if (Status != STATUS_PENDING) 3399 IoCompleteRequest(Irp, IO_NO_INCREMENT); 3400 else { 3401 if (!add_thread_job(Vcb, Irp)) 3402 do_read_job(Irp); 3403 } 3404 3405 exit2: 3406 if (top_level) 3407 IoSetTopLevelIrp(NULL); 3408 3409 FsRtlExitFileSystem(); 3410 3411 return Status; 3412 } 3413