1 /* Copyright (c) Mark Harmstone 2016-17 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 20 enum read_data_status { 21 ReadDataStatus_Pending, 22 ReadDataStatus_Success, 23 ReadDataStatus_Error, 24 ReadDataStatus_MissingDevice, 25 ReadDataStatus_Skip 26 }; 27 28 struct read_data_context; 29 30 typedef struct { 31 struct read_data_context* context; 32 UINT16 stripenum; 33 BOOL rewrite; 34 PIRP Irp; 35 IO_STATUS_BLOCK iosb; 36 enum read_data_status status; 37 PMDL mdl; 38 UINT64 stripestart; 39 UINT64 stripeend; 40 } read_data_stripe; 41 42 typedef struct { 43 KEVENT Event; 44 NTSTATUS Status; 45 chunk* c; 46 UINT64 address; 47 UINT32 buflen; 48 LONG num_stripes, stripes_left; 49 UINT64 type; 50 UINT32 sector_size; 51 UINT16 firstoff, startoffstripe, sectors_per_stripe; 52 UINT32* csum; 53 BOOL tree; 54 read_data_stripe* stripes; 55 UINT8* va; 56 } read_data_context; 57 58 extern BOOL diskacc; 59 extern tPsUpdateDiskCounters fPsUpdateDiskCounters; 60 extern tCcCopyReadEx fCcCopyReadEx; 61 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters; 62 63 #define LINUX_PAGE_SIZE 4096 64 65 _Function_class_(IO_COMPLETION_ROUTINE) 66 #ifdef __REACTOS__ 67 static NTSTATUS NTAPI read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 68 #else 69 static NTSTATUS read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 70 #endif 71 read_data_stripe* stripe = conptr; 72 read_data_context* context = (read_data_context*)stripe->context; 73 74 UNUSED(DeviceObject); 75 76 stripe->iosb = Irp->IoStatus; 77 78 if (NT_SUCCESS(Irp->IoStatus.Status)) 79 stripe->status = ReadDataStatus_Success; 80 else 81 stripe->status = ReadDataStatus_Error; 82 83 if (InterlockedDecrement(&context->stripes_left) == 0) 84 KeSetEvent(&context->Event, 0, FALSE); 85 86 return STATUS_MORE_PROCESSING_REQUIRED; 87 } 88 89 NTSTATUS check_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) { 90 NTSTATUS Status; 91 calc_job* cj; 92 UINT32* csum2; 93 94 // From experimenting, it seems that 40 sectors is roughly the crossover 95 // point where offloading the crc32 calculation becomes worth it. 96 97 if (sectors < 40 || KeQueryActiveProcessorCount(NULL) < 2) { 98 ULONG j; 99 100 for (j = 0; j < sectors; j++) { 101 UINT32 crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 102 103 if (crc32 != csum[j]) { 104 return STATUS_CRC_ERROR; 105 } 106 } 107 108 return STATUS_SUCCESS; 109 } 110 111 csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sectors, ALLOC_TAG); 112 if (!csum2) { 113 ERR("out of memory\n"); 114 return STATUS_INSUFFICIENT_RESOURCES; 115 } 116 117 Status = add_calc_job(Vcb, data, sectors, csum2, &cj); 118 if (!NT_SUCCESS(Status)) { 119 ERR("add_calc_job returned %08x\n", Status); 120 ExFreePool(csum2); 121 return Status; 122 } 123 124 KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL); 125 126 if (RtlCompareMemory(csum2, csum, sectors * sizeof(UINT32)) != sectors * sizeof(UINT32)) { 127 free_calc_job(cj); 128 ExFreePool(csum2); 129 return STATUS_CRC_ERROR; 130 } 131 132 free_calc_job(cj); 133 ExFreePool(csum2); 134 135 return STATUS_SUCCESS; 136 } 137 138 static NTSTATUS read_data_dup(device_extension* Vcb, UINT8* buf, UINT64 addr, read_data_context* context, CHUNK_ITEM* ci, 139 device** devices, UINT64 generation) { 140 ULONG i; 141 BOOL checksum_error = FALSE; 142 UINT16 j, stripe = 0; 143 NTSTATUS Status; 144 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 145 146 for (j = 0; j < ci->num_stripes; j++) { 147 if (context->stripes[j].status == ReadDataStatus_Error) { 148 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 149 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 150 return context->stripes[j].iosb.Status; 151 } else if (context->stripes[j].status == ReadDataStatus_Success) { 152 stripe = j; 153 break; 154 } 155 } 156 157 if (context->stripes[stripe].status != ReadDataStatus_Success) 158 return STATUS_INTERNAL_ERROR; 159 160 if (context->tree) { 161 tree_header* th = (tree_header*)buf; 162 UINT32 crc32; 163 164 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); 165 166 if (th->address != context->address || crc32 != *((UINT32*)th->csum)) { 167 checksum_error = TRUE; 168 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 169 } else if (generation != 0 && th->generation != generation) { 170 checksum_error = TRUE; 171 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 172 } 173 } else if (context->csum) { 174 #ifdef DEBUG_STATS 175 LARGE_INTEGER time1, time2; 176 177 time1 = KeQueryPerformanceCounter(NULL); 178 #endif 179 Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum); 180 181 if (Status == STATUS_CRC_ERROR) { 182 checksum_error = TRUE; 183 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 184 } else if (!NT_SUCCESS(Status)) { 185 ERR("check_csum returned %08x\n", Status); 186 return Status; 187 } 188 #ifdef DEBUG_STATS 189 time2 = KeQueryPerformanceCounter(NULL); 190 191 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 192 #endif 193 } 194 195 if (!checksum_error) 196 return STATUS_SUCCESS; 197 198 if (ci->num_stripes == 1) 199 return STATUS_CRC_ERROR; 200 201 if (context->tree) { 202 tree_header* t2; 203 BOOL recovered = FALSE; 204 205 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 206 if (!t2) { 207 ERR("out of memory\n"); 208 return STATUS_INSUFFICIENT_RESOURCES; 209 } 210 211 for (j = 0; j < ci->num_stripes; j++) { 212 if (j != stripe && devices[j] && devices[j]->devobj) { 213 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart, Vcb->superblock.node_size, (UINT8*)t2, FALSE); 214 if (!NT_SUCCESS(Status)) { 215 WARN("sync_read_phys returned %08x\n", Status); 216 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 217 } else { 218 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum)); 219 220 if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) { 221 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 222 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 223 recovered = TRUE; 224 225 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 226 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart, 227 t2, Vcb->superblock.node_size); 228 if (!NT_SUCCESS(Status)) { 229 WARN("write_data_phys returned %08x\n", Status); 230 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 231 } 232 } 233 234 break; 235 } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum)) 236 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 237 else 238 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS); 239 } 240 } 241 } 242 243 if (!recovered) { 244 ERR("unrecoverable checksum error at %llx\n", addr); 245 ExFreePool(t2); 246 return STATUS_CRC_ERROR; 247 } 248 249 ExFreePool(t2); 250 } else { 251 ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size; 252 UINT8* sector; 253 254 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 255 if (!sector) { 256 ERR("out of memory\n"); 257 return STATUS_INSUFFICIENT_RESOURCES; 258 } 259 260 for (i = 0; i < sectors; i++) { 261 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 262 263 if (context->csum[i] != crc32) { 264 BOOL recovered = FALSE; 265 266 for (j = 0; j < ci->num_stripes; j++) { 267 if (j != stripe && devices[j] && devices[j]->devobj) { 268 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), 269 Vcb->superblock.sector_size, sector, FALSE); 270 if (!NT_SUCCESS(Status)) { 271 WARN("sync_read_phys returned %08x\n", Status); 272 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 273 } else { 274 UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 275 276 if (crc32b == context->csum[i]) { 277 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 278 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); 279 recovered = TRUE; 280 281 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 282 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), 283 sector, Vcb->superblock.sector_size); 284 if (!NT_SUCCESS(Status)) { 285 WARN("write_data_phys returned %08x\n", Status); 286 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 287 } 288 } 289 290 break; 291 } else 292 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 293 } 294 } 295 } 296 297 if (!recovered) { 298 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 299 ExFreePool(sector); 300 return STATUS_CRC_ERROR; 301 } 302 } 303 } 304 305 ExFreePool(sector); 306 } 307 308 return STATUS_SUCCESS; 309 } 310 311 static NTSTATUS read_data_raid0(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, 312 CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) { 313 UINT64 i; 314 315 for (i = 0; i < ci->num_stripes; i++) { 316 if (context->stripes[i].status == ReadDataStatus_Error) { 317 WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); 318 log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS); 319 return context->stripes[i].iosb.Status; 320 } 321 } 322 323 if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries 324 tree_header* th = (tree_header*)buf; 325 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 326 327 if (crc32 != *((UINT32*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) { 328 UINT64 off; 329 UINT16 stripe; 330 331 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe); 332 333 ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 334 335 if (crc32 != *((UINT32*)th->csum)) { 336 WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); 337 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 338 return STATUS_CRC_ERROR; 339 } else if (addr != th->address) { 340 WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); 341 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 342 return STATUS_CRC_ERROR; 343 } else if (generation != 0 && generation != th->generation) { 344 WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation); 345 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 346 return STATUS_CRC_ERROR; 347 } 348 } 349 } else if (context->csum) { 350 NTSTATUS Status; 351 #ifdef DEBUG_STATS 352 LARGE_INTEGER time1, time2; 353 354 time1 = KeQueryPerformanceCounter(NULL); 355 #endif 356 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 357 358 if (Status == STATUS_CRC_ERROR) { 359 for (i = 0; i < length / Vcb->superblock.sector_size; i++) { 360 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 361 362 if (context->csum[i] != crc32) { 363 UINT64 off; 364 UINT16 stripe; 365 366 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe); 367 368 ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 369 370 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 371 372 return Status; 373 } 374 } 375 376 return Status; 377 } else if (!NT_SUCCESS(Status)) { 378 ERR("check_csum returned %08x\n", Status); 379 return Status; 380 } 381 #ifdef DEBUG_STATS 382 time2 = KeQueryPerformanceCounter(NULL); 383 384 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 385 #endif 386 } 387 388 return STATUS_SUCCESS; 389 } 390 391 static NTSTATUS read_data_raid10(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, 392 CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) { 393 UINT64 i; 394 UINT16 j, stripe; 395 NTSTATUS Status; 396 BOOL checksum_error = FALSE; 397 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 398 399 for (j = 0; j < ci->num_stripes; j++) { 400 if (context->stripes[j].status == ReadDataStatus_Error) { 401 WARN("stripe %llu returned error %08x\n", j, context->stripes[j].iosb.Status); 402 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 403 return context->stripes[j].iosb.Status; 404 } else if (context->stripes[j].status == ReadDataStatus_Success) 405 stripe = j; 406 } 407 408 if (context->tree) { 409 tree_header* th = (tree_header*)buf; 410 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 411 412 if (crc32 != *((UINT32*)th->csum)) { 413 WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); 414 checksum_error = TRUE; 415 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 416 } else if (addr != th->address) { 417 WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); 418 checksum_error = TRUE; 419 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 420 } else if (generation != 0 && generation != th->generation) { 421 WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation); 422 checksum_error = TRUE; 423 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 424 } 425 } else if (context->csum) { 426 #ifdef DEBUG_STATS 427 LARGE_INTEGER time1, time2; 428 429 time1 = KeQueryPerformanceCounter(NULL); 430 #endif 431 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 432 433 if (Status == STATUS_CRC_ERROR) 434 checksum_error = TRUE; 435 else if (!NT_SUCCESS(Status)) { 436 ERR("check_csum returned %08x\n", Status); 437 return Status; 438 } 439 #ifdef DEBUG_STATS 440 time2 = KeQueryPerformanceCounter(NULL); 441 442 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 443 #endif 444 } 445 446 if (!checksum_error) 447 return STATUS_SUCCESS; 448 449 if (context->tree) { 450 tree_header* t2; 451 UINT64 off; 452 UINT16 badsubstripe = 0; 453 BOOL recovered = FALSE; 454 455 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 456 if (!t2) { 457 ERR("out of memory\n"); 458 return STATUS_INSUFFICIENT_RESOURCES; 459 } 460 461 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe); 462 463 stripe *= ci->sub_stripes; 464 465 for (j = 0; j < ci->sub_stripes; j++) { 466 if (context->stripes[stripe + j].status == ReadDataStatus_Success) { 467 badsubstripe = j; 468 break; 469 } 470 } 471 472 for (j = 0; j < ci->sub_stripes; j++) { 473 if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) { 474 Status = sync_read_phys(devices[stripe + j]->devobj, cis[stripe + j].offset + off, 475 Vcb->superblock.node_size, (UINT8*)t2, FALSE); 476 if (!NT_SUCCESS(Status)) { 477 WARN("sync_read_phys returned %08x\n", Status); 478 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS); 479 } else { 480 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum)); 481 482 if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) { 483 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 484 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe + j]->devitem.dev_id); 485 recovered = TRUE; 486 487 if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad 488 Status = write_data_phys(devices[stripe + badsubstripe]->devobj, cis[stripe + badsubstripe].offset + off, 489 t2, Vcb->superblock.node_size); 490 if (!NT_SUCCESS(Status)) { 491 WARN("write_data_phys returned %08x\n", Status); 492 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 493 } 494 } 495 496 break; 497 } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum)) 498 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 499 else 500 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS); 501 } 502 } 503 } 504 505 if (!recovered) { 506 ERR("unrecoverable checksum error at %llx\n", addr); 507 ExFreePool(t2); 508 return STATUS_CRC_ERROR; 509 } 510 511 ExFreePool(t2); 512 } else { 513 ULONG sectors = length / Vcb->superblock.sector_size; 514 UINT8* sector; 515 516 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 517 if (!sector) { 518 ERR("out of memory\n"); 519 return STATUS_INSUFFICIENT_RESOURCES; 520 } 521 522 for (i = 0; i < sectors; i++) { 523 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 524 525 if (context->csum[i] != crc32) { 526 UINT64 off; 527 UINT16 stripe2, badsubstripe = 0; 528 BOOL recovered = FALSE; 529 530 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 531 ci->num_stripes / ci->sub_stripes, &off, &stripe2); 532 533 stripe2 *= ci->sub_stripes; 534 535 for (j = 0; j < ci->sub_stripes; j++) { 536 if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) { 537 badsubstripe = j; 538 break; 539 } 540 } 541 542 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 543 544 for (j = 0; j < ci->sub_stripes; j++) { 545 if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) { 546 Status = sync_read_phys(devices[stripe2 + j]->devobj, cis[stripe2 + j].offset + off, 547 Vcb->superblock.sector_size, sector, FALSE); 548 if (!NT_SUCCESS(Status)) { 549 WARN("sync_read_phys returned %08x\n", Status); 550 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS); 551 } else { 552 UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 553 554 if (crc32b == context->csum[i]) { 555 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 556 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id); 557 recovered = TRUE; 558 559 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad 560 Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, cis[stripe2 + badsubstripe].offset + off, 561 sector, Vcb->superblock.sector_size); 562 if (!NT_SUCCESS(Status)) { 563 WARN("write_data_phys returned %08x\n", Status); 564 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS); 565 } 566 } 567 568 break; 569 } else 570 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 571 } 572 } 573 } 574 575 if (!recovered) { 576 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 577 ExFreePool(sector); 578 return STATUS_CRC_ERROR; 579 } 580 } 581 } 582 583 ExFreePool(sector); 584 } 585 586 return STATUS_SUCCESS; 587 } 588 589 static NTSTATUS read_data_raid5(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci, 590 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) { 591 ULONG i; 592 NTSTATUS Status; 593 BOOL checksum_error = FALSE; 594 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 595 UINT16 j, stripe; 596 BOOL no_success = TRUE; 597 598 for (j = 0; j < ci->num_stripes; j++) { 599 if (context->stripes[j].status == ReadDataStatus_Error) { 600 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 601 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 602 return context->stripes[j].iosb.Status; 603 } else if (context->stripes[j].status == ReadDataStatus_Success) { 604 stripe = j; 605 no_success = FALSE; 606 } 607 } 608 609 if (c) { // check partial stripes 610 LIST_ENTRY* le; 611 UINT64 ps_length = (ci->num_stripes - 1) * ci->stripe_length; 612 613 ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE); 614 615 le = c->partial_stripes.Flink; 616 while (le != &c->partial_stripes) { 617 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 618 619 if (ps->address + ps_length > addr && ps->address < addr + length) { 620 ULONG runlength, index; 621 622 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 623 624 while (runlength != 0) { 625 UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size); 626 UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size); 627 UINT64 start = max(runstart, addr); 628 UINT64 end = min(runend, addr + length); 629 630 if (end > start) 631 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 632 633 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 634 } 635 } else if (ps->address >= addr + length) 636 break; 637 638 le = le->Flink; 639 } 640 641 ExReleaseResourceLite(&c->partial_stripes_lock); 642 } 643 644 if (context->tree) { 645 tree_header* th = (tree_header*)buf; 646 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 647 648 if (addr != th->address || crc32 != *((UINT32*)th->csum)) { 649 checksum_error = TRUE; 650 if (!no_success && !degraded) 651 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 652 } else if (generation != 0 && generation != th->generation) { 653 checksum_error = TRUE; 654 if (!no_success && !degraded) 655 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 656 } 657 } else if (context->csum) { 658 #ifdef DEBUG_STATS 659 LARGE_INTEGER time1, time2; 660 661 time1 = KeQueryPerformanceCounter(NULL); 662 #endif 663 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 664 665 if (Status == STATUS_CRC_ERROR) { 666 if (!degraded) 667 WARN("checksum error\n"); 668 checksum_error = TRUE; 669 } else if (!NT_SUCCESS(Status)) { 670 ERR("check_csum returned %08x\n", Status); 671 return Status; 672 } 673 674 #ifdef DEBUG_STATS 675 time2 = KeQueryPerformanceCounter(NULL); 676 677 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 678 #endif 679 } else if (degraded) 680 checksum_error = TRUE; 681 682 if (!checksum_error) 683 return STATUS_SUCCESS; 684 685 if (context->tree) { 686 UINT16 parity; 687 UINT64 off; 688 BOOL recovered = FALSE, first = TRUE, failed = FALSE; 689 UINT8* t2; 690 691 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG); 692 if (!t2) { 693 ERR("out of memory\n"); 694 return STATUS_INSUFFICIENT_RESOURCES; 695 } 696 697 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe); 698 699 parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 700 701 stripe = (parity + stripe + 1) % ci->num_stripes; 702 703 for (j = 0; j < ci->num_stripes; j++) { 704 if (j != stripe) { 705 if (devices[j] && devices[j]->devobj) { 706 if (first) { 707 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2, FALSE); 708 if (!NT_SUCCESS(Status)) { 709 ERR("sync_read_phys returned %08x\n", Status); 710 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 711 failed = TRUE; 712 break; 713 } 714 715 first = FALSE; 716 } else { 717 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, FALSE); 718 if (!NT_SUCCESS(Status)) { 719 ERR("sync_read_phys returned %08x\n", Status); 720 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 721 failed = TRUE; 722 break; 723 } 724 725 do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size); 726 } 727 } else { 728 failed = TRUE; 729 break; 730 } 731 } 732 } 733 734 if (!failed) { 735 tree_header* t3 = (tree_header*)t2; 736 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum)); 737 738 if (t3->address == addr && crc32 == *((UINT32*)t3->csum) && (generation == 0 || t3->generation == generation)) { 739 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 740 741 if (!degraded) 742 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 743 744 recovered = TRUE; 745 746 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 747 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size); 748 if (!NT_SUCCESS(Status)) { 749 WARN("write_data_phys returned %08x\n", Status); 750 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 751 } 752 } 753 } 754 } 755 756 if (!recovered) { 757 ERR("unrecoverable checksum error at %llx\n", addr); 758 ExFreePool(t2); 759 return STATUS_CRC_ERROR; 760 } 761 762 ExFreePool(t2); 763 } else { 764 ULONG sectors = length / Vcb->superblock.sector_size; 765 UINT8* sector; 766 767 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG); 768 if (!sector) { 769 ERR("out of memory\n"); 770 return STATUS_INSUFFICIENT_RESOURCES; 771 } 772 773 for (i = 0; i < sectors; i++) { 774 UINT16 parity; 775 UINT64 off; 776 UINT32 crc32; 777 778 if (context->csum) 779 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 780 781 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 782 ci->num_stripes - 1, &off, &stripe); 783 784 parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 785 786 stripe = (parity + stripe + 1) % ci->num_stripes; 787 788 if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) { 789 BOOL recovered = FALSE, first = TRUE, failed = FALSE; 790 791 if (devices[stripe] && devices[stripe]->devobj) 792 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS); 793 794 for (j = 0; j < ci->num_stripes; j++) { 795 if (j != stripe) { 796 if (devices[j] && devices[j]->devobj) { 797 if (first) { 798 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, FALSE); 799 if (!NT_SUCCESS(Status)) { 800 ERR("sync_read_phys returned %08x\n", Status); 801 failed = TRUE; 802 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 803 break; 804 } 805 806 first = FALSE; 807 } else { 808 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + Vcb->superblock.sector_size, FALSE); 809 if (!NT_SUCCESS(Status)) { 810 ERR("sync_read_phys returned %08x\n", Status); 811 failed = TRUE; 812 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 813 break; 814 } 815 816 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size); 817 } 818 } else { 819 failed = TRUE; 820 break; 821 } 822 } 823 } 824 825 if (!failed) { 826 if (context->csum) 827 crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 828 829 if (!context->csum || crc32 == context->csum[i]) { 830 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 831 832 if (!degraded) 833 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); 834 835 recovered = TRUE; 836 837 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 838 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, 839 sector, Vcb->superblock.sector_size); 840 if (!NT_SUCCESS(Status)) { 841 WARN("write_data_phys returned %08x\n", Status); 842 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 843 } 844 } 845 } 846 } 847 848 if (!recovered) { 849 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 850 ExFreePool(sector); 851 return STATUS_CRC_ERROR; 852 } 853 } 854 } 855 856 ExFreePool(sector); 857 } 858 859 return STATUS_SUCCESS; 860 } 861 862 void raid6_recover2(UINT8* sectors, UINT16 num_stripes, ULONG sector_size, UINT16 missing1, UINT16 missing2, UINT8* out) { 863 if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data 864 UINT16 missing = missing1 == (num_stripes - 2) ? missing2 : missing1; 865 UINT16 stripe; 866 867 stripe = num_stripes - 3; 868 869 if (stripe == missing) 870 RtlZeroMemory(out, sector_size); 871 else 872 RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size); 873 874 do { 875 stripe--; 876 877 galois_double(out, sector_size); 878 879 if (stripe != missing) 880 do_xor(out, sectors + (stripe * sector_size), sector_size); 881 } while (stripe > 0); 882 883 do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size); 884 885 if (missing != 0) 886 galois_divpower(out, (UINT8)missing, sector_size); 887 } else { // reconstruct from p and q 888 UINT16 x, y, stripe; 889 UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; 890 UINT32 j; 891 892 stripe = num_stripes - 3; 893 894 pxy = out + sector_size; 895 qxy = out; 896 897 if (stripe == missing1 || stripe == missing2) { 898 RtlZeroMemory(qxy, sector_size); 899 RtlZeroMemory(pxy, sector_size); 900 901 if (stripe == missing1) 902 x = stripe; 903 else 904 y = stripe; 905 } else { 906 RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size); 907 RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size); 908 } 909 910 do { 911 stripe--; 912 913 galois_double(qxy, sector_size); 914 915 if (stripe != missing1 && stripe != missing2) { 916 do_xor(qxy, sectors + (stripe * sector_size), sector_size); 917 do_xor(pxy, sectors + (stripe * sector_size), sector_size); 918 } else if (stripe == missing1) 919 x = stripe; 920 else if (stripe == missing2) 921 y = stripe; 922 } while (stripe > 0); 923 924 gyx = gpow2(y > x ? (y-x) : (255-x+y)); 925 gx = gpow2(255-x); 926 927 denom = gdiv(1, gyx ^ 1); 928 a = gmul(gyx, denom); 929 b = gmul(gx, denom); 930 931 p = sectors + ((num_stripes - 2) * sector_size); 932 q = sectors + ((num_stripes - 1) * sector_size); 933 934 for (j = 0; j < sector_size; j++) { 935 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); 936 937 p++; 938 q++; 939 pxy++; 940 qxy++; 941 } 942 943 do_xor(out + sector_size, out, sector_size); 944 do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size); 945 } 946 } 947 948 static NTSTATUS read_data_raid6(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci, 949 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) { 950 NTSTATUS Status; 951 ULONG i; 952 BOOL checksum_error = FALSE; 953 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 954 UINT16 stripe, j; 955 BOOL no_success = TRUE; 956 957 for (j = 0; j < ci->num_stripes; j++) { 958 if (context->stripes[j].status == ReadDataStatus_Error) { 959 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 960 961 if (devices[j]) 962 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 963 return context->stripes[j].iosb.Status; 964 } else if (context->stripes[j].status == ReadDataStatus_Success) { 965 stripe = j; 966 no_success = FALSE; 967 } 968 } 969 970 if (c) { // check partial stripes 971 LIST_ENTRY* le; 972 UINT64 ps_length = (ci->num_stripes - 2) * ci->stripe_length; 973 974 ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE); 975 976 le = c->partial_stripes.Flink; 977 while (le != &c->partial_stripes) { 978 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 979 980 if (ps->address + ps_length > addr && ps->address < addr + length) { 981 ULONG runlength, index; 982 983 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 984 985 while (runlength != 0) { 986 UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size); 987 UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size); 988 UINT64 start = max(runstart, addr); 989 UINT64 end = min(runend, addr + length); 990 991 if (end > start) 992 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 993 994 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 995 } 996 } else if (ps->address >= addr + length) 997 break; 998 999 le = le->Flink; 1000 } 1001 1002 ExReleaseResourceLite(&c->partial_stripes_lock); 1003 } 1004 1005 if (context->tree) { 1006 tree_header* th = (tree_header*)buf; 1007 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1008 1009 if (addr != th->address || crc32 != *((UINT32*)th->csum)) { 1010 checksum_error = TRUE; 1011 if (!no_success && !degraded && devices[stripe]) 1012 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1013 } else if (generation != 0 && generation != th->generation) { 1014 checksum_error = TRUE; 1015 if (!no_success && !degraded && devices[stripe]) 1016 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 1017 } 1018 } else if (context->csum) { 1019 #ifdef DEBUG_STATS 1020 LARGE_INTEGER time1, time2; 1021 1022 time1 = KeQueryPerformanceCounter(NULL); 1023 #endif 1024 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 1025 1026 if (Status == STATUS_CRC_ERROR) { 1027 if (!degraded) 1028 WARN("checksum error\n"); 1029 checksum_error = TRUE; 1030 } else if (!NT_SUCCESS(Status)) { 1031 ERR("check_csum returned %08x\n", Status); 1032 return Status; 1033 } 1034 #ifdef DEBUG_STATS 1035 time2 = KeQueryPerformanceCounter(NULL); 1036 1037 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 1038 #endif 1039 } else if (degraded) 1040 checksum_error = TRUE; 1041 1042 if (!checksum_error) 1043 return STATUS_SUCCESS; 1044 1045 if (context->tree) { 1046 UINT8* sector; 1047 UINT16 k, physstripe, parity1, parity2, error_stripe; 1048 UINT64 off; 1049 BOOL recovered = FALSE, failed = FALSE; 1050 ULONG num_errors = 0; 1051 1052 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG); 1053 if (!sector) { 1054 ERR("out of memory\n"); 1055 return STATUS_INSUFFICIENT_RESOURCES; 1056 } 1057 1058 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe); 1059 1060 parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1061 parity2 = (parity1 + 1) % ci->num_stripes; 1062 1063 physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1064 1065 j = (parity2 + 1) % ci->num_stripes; 1066 1067 for (k = 0; k < ci->num_stripes - 1; k++) { 1068 if (j != physstripe) { 1069 if (devices[j] && devices[j]->devobj) { 1070 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, sector + (k * Vcb->superblock.node_size), FALSE); 1071 if (!NT_SUCCESS(Status)) { 1072 ERR("sync_read_phys returned %08x\n", Status); 1073 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1074 num_errors++; 1075 error_stripe = k; 1076 1077 if (num_errors > 1) { 1078 failed = TRUE; 1079 break; 1080 } 1081 } 1082 } else { 1083 num_errors++; 1084 error_stripe = k; 1085 1086 if (num_errors > 1) { 1087 failed = TRUE; 1088 break; 1089 } 1090 } 1091 } 1092 1093 j = (j + 1) % ci->num_stripes; 1094 } 1095 1096 if (!failed) { 1097 if (num_errors == 0) { 1098 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size)); 1099 UINT32 crc32; 1100 1101 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), 1102 Vcb->superblock.node_size); 1103 1104 for (j = 0; j < ci->num_stripes - 2; j++) { 1105 if (j != stripe) 1106 do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size); 1107 } 1108 1109 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1110 1111 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) { 1112 RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1113 1114 if (devices[physstripe] && devices[physstripe]->devobj) 1115 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id); 1116 1117 recovered = TRUE; 1118 1119 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1120 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1121 sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1122 if (!NT_SUCCESS(Status)) { 1123 WARN("write_data_phys returned %08x\n", Status); 1124 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1125 } 1126 } 1127 } 1128 } 1129 1130 if (!recovered) { 1131 UINT32 crc32; 1132 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size)); 1133 BOOL read_q = FALSE; 1134 1135 if (devices[parity2] && devices[parity2]->devobj) { 1136 Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off, 1137 Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), FALSE); 1138 if (!NT_SUCCESS(Status)) { 1139 ERR("sync_read_phys returned %08x\n", Status); 1140 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1141 } else 1142 read_q = TRUE; 1143 } 1144 1145 if (read_q) { 1146 if (num_errors == 1) { 1147 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1148 1149 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1150 1151 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) 1152 recovered = TRUE; 1153 } else { 1154 for (j = 0; j < ci->num_stripes - 1; j++) { 1155 if (j != stripe) { 1156 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1157 1158 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1159 1160 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) { 1161 recovered = TRUE; 1162 error_stripe = j; 1163 break; 1164 } 1165 } 1166 } 1167 } 1168 } 1169 1170 if (recovered) { 1171 UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1172 1173 if (devices[physstripe] && devices[physstripe]->devobj) 1174 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id); 1175 1176 RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1177 1178 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1179 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1180 sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1181 if (!NT_SUCCESS(Status)) { 1182 WARN("write_data_phys returned %08x\n", Status); 1183 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1184 } 1185 } 1186 1187 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1188 if (error_stripe == ci->num_stripes - 2) { 1189 ERR("recovering from parity error at %llx, device %llx\n", addr, devices[error_stripe_phys]->devitem.dev_id); 1190 1191 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1192 1193 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1194 1195 for (j = 0; j < ci->num_stripes - 2; j++) { 1196 if (j == stripe) { 1197 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size), 1198 Vcb->superblock.node_size); 1199 } else { 1200 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), 1201 Vcb->superblock.node_size); 1202 } 1203 } 1204 } else { 1205 ERR("recovering from checksum error at %llx, device %llx\n", addr + ((error_stripe - stripe) * ci->stripe_length), 1206 devices[error_stripe_phys]->devitem.dev_id); 1207 1208 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1209 1210 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size), 1211 sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1212 } 1213 } 1214 1215 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1216 Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off, 1217 sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1218 if (!NT_SUCCESS(Status)) { 1219 WARN("write_data_phys returned %08x\n", Status); 1220 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1221 } 1222 } 1223 } 1224 } 1225 } 1226 1227 if (!recovered) { 1228 ERR("unrecoverable checksum error at %llx\n", addr); 1229 ExFreePool(sector); 1230 return STATUS_CRC_ERROR; 1231 } 1232 1233 ExFreePool(sector); 1234 } else { 1235 ULONG sectors = length / Vcb->superblock.sector_size; 1236 UINT8* sector; 1237 1238 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG); 1239 if (!sector) { 1240 ERR("out of memory\n"); 1241 return STATUS_INSUFFICIENT_RESOURCES; 1242 } 1243 1244 for (i = 0; i < sectors; i++) { 1245 UINT64 off; 1246 UINT16 physstripe, parity1, parity2; 1247 UINT32 crc32; 1248 1249 if (context->csum) 1250 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1251 1252 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 1253 ci->num_stripes - 2, &off, &stripe); 1254 1255 parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1256 parity2 = (parity1 + 1) % ci->num_stripes; 1257 1258 physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1259 1260 if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) { 1261 UINT16 k, error_stripe; 1262 BOOL recovered = FALSE, failed = FALSE; 1263 ULONG num_errors = 0; 1264 1265 if (devices[physstripe] && devices[physstripe]->devobj) 1266 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS); 1267 1268 j = (parity2 + 1) % ci->num_stripes; 1269 1270 for (k = 0; k < ci->num_stripes - 1; k++) { 1271 if (j != physstripe) { 1272 if (devices[j] && devices[j]->devobj) { 1273 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + (k * Vcb->superblock.sector_size), FALSE); 1274 if (!NT_SUCCESS(Status)) { 1275 ERR("sync_read_phys returned %08x\n", Status); 1276 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1277 num_errors++; 1278 error_stripe = k; 1279 1280 if (num_errors > 1) { 1281 failed = TRUE; 1282 break; 1283 } 1284 } 1285 } else { 1286 num_errors++; 1287 error_stripe = k; 1288 1289 if (num_errors > 1) { 1290 failed = TRUE; 1291 break; 1292 } 1293 } 1294 } 1295 1296 j = (j + 1) % ci->num_stripes; 1297 } 1298 1299 if (!failed) { 1300 if (num_errors == 0) { 1301 RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1302 1303 for (j = 0; j < ci->num_stripes - 2; j++) { 1304 if (j != stripe) 1305 do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1306 } 1307 1308 if (context->csum) 1309 crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1310 1311 if (!context->csum || crc32 == context->csum[i]) { 1312 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1313 1314 if (devices[physstripe] && devices[physstripe]->devobj) 1315 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), 1316 devices[physstripe]->devitem.dev_id); 1317 1318 recovered = TRUE; 1319 1320 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1321 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1322 sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1323 if (!NT_SUCCESS(Status)) { 1324 WARN("write_data_phys returned %08x\n", Status); 1325 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1326 } 1327 } 1328 } 1329 } 1330 1331 if (!recovered) { 1332 BOOL read_q = FALSE; 1333 1334 if (devices[parity2] && devices[parity2]->devobj) { 1335 Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off, 1336 Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), FALSE); 1337 if (!NT_SUCCESS(Status)) { 1338 ERR("sync_read_phys returned %08x\n", Status); 1339 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS); 1340 } else 1341 read_q = TRUE; 1342 } 1343 1344 if (read_q) { 1345 if (num_errors == 1) { 1346 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size)); 1347 1348 if (!devices[physstripe] || !devices[physstripe]->devobj) 1349 recovered = TRUE; 1350 else { 1351 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1352 1353 if (crc32 == context->csum[i]) 1354 recovered = TRUE; 1355 } 1356 } else { 1357 for (j = 0; j < ci->num_stripes - 1; j++) { 1358 if (j != stripe) { 1359 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size)); 1360 1361 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1362 1363 if (crc32 == context->csum[i]) { 1364 recovered = TRUE; 1365 error_stripe = j; 1366 break; 1367 } 1368 } 1369 } 1370 } 1371 } 1372 1373 if (recovered) { 1374 UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1375 1376 if (devices[physstripe] && devices[physstripe]->devobj) 1377 ERR("recovering from checksum error at %llx, device %llx\n", 1378 addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id); 1379 1380 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1381 1382 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1383 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1384 sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1385 if (!NT_SUCCESS(Status)) { 1386 WARN("write_data_phys returned %08x\n", Status); 1387 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1388 } 1389 } 1390 1391 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1392 if (error_stripe == ci->num_stripes - 2) { 1393 ERR("recovering from parity error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), 1394 devices[error_stripe_phys]->devitem.dev_id); 1395 1396 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1397 1398 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1399 1400 for (j = 0; j < ci->num_stripes - 2; j++) { 1401 if (j == stripe) { 1402 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), 1403 Vcb->superblock.sector_size); 1404 } else { 1405 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), 1406 Vcb->superblock.sector_size); 1407 } 1408 } 1409 } else { 1410 ERR("recovering from checksum error at %llx, device %llx\n", 1411 addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length), 1412 devices[error_stripe_phys]->devitem.dev_id); 1413 1414 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1415 1416 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size), 1417 sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1418 } 1419 } 1420 1421 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1422 Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off, 1423 sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1424 if (!NT_SUCCESS(Status)) { 1425 WARN("write_data_phys returned %08x\n", Status); 1426 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1427 } 1428 } 1429 } 1430 } 1431 } 1432 1433 if (!recovered) { 1434 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 1435 ExFreePool(sector); 1436 return STATUS_CRC_ERROR; 1437 } 1438 } 1439 } 1440 1441 ExFreePool(sector); 1442 } 1443 1444 return STATUS_SUCCESS; 1445 } 1446 1447 NTSTATUS read_data(_In_ device_extension* Vcb, _In_ UINT64 addr, _In_ UINT32 length, _In_reads_bytes_opt_(length*sizeof(UINT32)/Vcb->superblock.sector_size) UINT32* csum, 1448 _In_ BOOL is_tree, _Out_writes_bytes_(length) UINT8* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ UINT64 generation, _In_ BOOL file_read, 1449 _In_ ULONG priority) { 1450 CHUNK_ITEM* ci; 1451 CHUNK_ITEM_STRIPE* cis; 1452 read_data_context context; 1453 UINT64 type, offset, total_reading = 0; 1454 NTSTATUS Status; 1455 device** devices = NULL; 1456 UINT16 i, startoffstripe, allowed_missing, missing_devices = 0; 1457 UINT8* dummypage = NULL; 1458 PMDL dummy_mdl = NULL; 1459 BOOL need_to_wait; 1460 UINT64 lockaddr, locklen; 1461 #ifdef DEBUG_STATS 1462 LARGE_INTEGER time1, time2; 1463 #endif 1464 1465 if (Vcb->log_to_phys_loaded) { 1466 if (!c) { 1467 c = get_chunk_from_address(Vcb, addr); 1468 1469 if (!c) { 1470 ERR("get_chunk_from_address failed\n"); 1471 return STATUS_INTERNAL_ERROR; 1472 } 1473 } 1474 1475 ci = c->chunk_item; 1476 offset = c->offset; 1477 devices = c->devices; 1478 1479 if (pc) 1480 *pc = c; 1481 } else { 1482 LIST_ENTRY* le = Vcb->sys_chunks.Flink; 1483 1484 ci = NULL; 1485 1486 c = NULL; 1487 while (le != &Vcb->sys_chunks) { 1488 sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry); 1489 1490 if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) { 1491 CHUNK_ITEM* chunk_item = sc->data; 1492 1493 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) { 1494 ci = chunk_item; 1495 offset = sc->key.offset; 1496 cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1]; 1497 1498 devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG); 1499 if (!devices) { 1500 ERR("out of memory\n"); 1501 return STATUS_INSUFFICIENT_RESOURCES; 1502 } 1503 1504 for (i = 0; i < ci->num_stripes; i++) { 1505 devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid); 1506 } 1507 1508 break; 1509 } 1510 } 1511 1512 le = le->Flink; 1513 } 1514 1515 if (!ci) { 1516 ERR("could not find chunk for %llx in bootstrap\n", addr); 1517 return STATUS_INTERNAL_ERROR; 1518 } 1519 1520 if (pc) 1521 *pc = NULL; 1522 } 1523 1524 if (ci->type & BLOCK_FLAG_DUPLICATE) { 1525 type = BLOCK_FLAG_DUPLICATE; 1526 allowed_missing = ci->num_stripes - 1; 1527 } else if (ci->type & BLOCK_FLAG_RAID0) { 1528 type = BLOCK_FLAG_RAID0; 1529 allowed_missing = 0; 1530 } else if (ci->type & BLOCK_FLAG_RAID1) { 1531 type = BLOCK_FLAG_DUPLICATE; 1532 allowed_missing = 1; 1533 } else if (ci->type & BLOCK_FLAG_RAID10) { 1534 type = BLOCK_FLAG_RAID10; 1535 allowed_missing = 1; 1536 } else if (ci->type & BLOCK_FLAG_RAID5) { 1537 type = BLOCK_FLAG_RAID5; 1538 allowed_missing = 1; 1539 } else if (ci->type & BLOCK_FLAG_RAID6) { 1540 type = BLOCK_FLAG_RAID6; 1541 allowed_missing = 2; 1542 } else { // SINGLE 1543 type = BLOCK_FLAG_DUPLICATE; 1544 allowed_missing = 0; 1545 } 1546 1547 cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 1548 1549 RtlZeroMemory(&context, sizeof(read_data_context)); 1550 KeInitializeEvent(&context.Event, NotificationEvent, FALSE); 1551 1552 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG); 1553 if (!context.stripes) { 1554 ERR("out of memory\n"); 1555 return STATUS_INSUFFICIENT_RESOURCES; 1556 } 1557 1558 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) { 1559 get_raid56_lock_range(c, addr, length, &lockaddr, &locklen); 1560 chunk_lock_range(Vcb, c, lockaddr, locklen); 1561 } 1562 1563 RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes); 1564 1565 context.buflen = length; 1566 context.num_stripes = ci->num_stripes; 1567 context.stripes_left = context.num_stripes; 1568 context.sector_size = Vcb->superblock.sector_size; 1569 context.csum = csum; 1570 context.tree = is_tree; 1571 context.type = type; 1572 1573 if (type == BLOCK_FLAG_RAID0) { 1574 UINT64 startoff, endoff; 1575 UINT16 endoffstripe, stripe; 1576 UINT32 *stripeoff, pos; 1577 PMDL master_mdl; 1578 PFN_NUMBER* pfns; 1579 1580 // FIXME - test this still works if page size isn't the same as sector size 1581 1582 // This relies on the fact that MDLs are followed in memory by the page file numbers, 1583 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0 1584 // data for you without doing a memcpy yourself. 1585 // MDLs are officially opaque, so this might very well break in future versions of Windows. 1586 1587 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe); 1588 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe); 1589 1590 if (file_read) { 1591 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL 1592 // with duplicated dummy PFNs, which confuse check_csum. Ah well. 1593 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested. 1594 1595 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1596 1597 if (!context.va) { 1598 ERR("out of memory\n"); 1599 Status = STATUS_INSUFFICIENT_RESOURCES; 1600 goto exit; 1601 } 1602 } else 1603 context.va = buf; 1604 1605 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1606 if (!master_mdl) { 1607 ERR("out of memory\n"); 1608 Status = STATUS_INSUFFICIENT_RESOURCES; 1609 goto exit; 1610 } 1611 1612 Status = STATUS_SUCCESS; 1613 1614 _SEH2_TRY { 1615 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1616 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1617 Status = _SEH2_GetExceptionCode(); 1618 } _SEH2_END; 1619 1620 if (!NT_SUCCESS(Status)) { 1621 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1622 IoFreeMdl(master_mdl); 1623 goto exit; 1624 } 1625 1626 pfns = (PFN_NUMBER*)(master_mdl + 1); 1627 1628 for (i = 0; i < ci->num_stripes; i++) { 1629 if (startoffstripe > i) 1630 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1631 else if (startoffstripe == i) 1632 context.stripes[i].stripestart = startoff; 1633 else 1634 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length); 1635 1636 if (endoffstripe > i) 1637 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1638 else if (endoffstripe == i) 1639 context.stripes[i].stripeend = endoff + 1; 1640 else 1641 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length); 1642 1643 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 1644 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL); 1645 1646 if (!context.stripes[i].mdl) { 1647 ERR("IoAllocateMdl failed\n"); 1648 Status = STATUS_INSUFFICIENT_RESOURCES; 1649 goto exit; 1650 } 1651 } 1652 } 1653 1654 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); 1655 if (!stripeoff) { 1656 ERR("out of memory\n"); 1657 Status = STATUS_INSUFFICIENT_RESOURCES; 1658 goto exit; 1659 } 1660 1661 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); 1662 1663 pos = 0; 1664 stripe = startoffstripe; 1665 while (pos < length) { 1666 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 1667 1668 if (pos == 0) { 1669 UINT32 readlen = (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)); 1670 1671 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1672 1673 stripeoff[stripe] += readlen; 1674 pos += readlen; 1675 } else if (length - pos < ci->stripe_length) { 1676 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1677 1678 pos = length; 1679 } else { 1680 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1681 1682 stripeoff[stripe] += (UINT32)ci->stripe_length; 1683 pos += (UINT32)ci->stripe_length; 1684 } 1685 1686 stripe = (stripe + 1) % ci->num_stripes; 1687 } 1688 1689 MmUnlockPages(master_mdl); 1690 IoFreeMdl(master_mdl); 1691 1692 ExFreePool(stripeoff); 1693 } else if (type == BLOCK_FLAG_RAID10) { 1694 UINT64 startoff, endoff; 1695 UINT16 endoffstripe, j, stripe; 1696 ULONG orig_ls; 1697 PMDL master_mdl; 1698 PFN_NUMBER* pfns; 1699 UINT32* stripeoff, pos; 1700 read_data_stripe** stripes; 1701 1702 if (c) 1703 orig_ls = c->last_stripe; 1704 else 1705 orig_ls = 0; 1706 1707 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe); 1708 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe); 1709 1710 if ((ci->num_stripes % ci->sub_stripes) != 0) { 1711 ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes); 1712 Status = STATUS_INTERNAL_ERROR; 1713 goto exit; 1714 } 1715 1716 if (file_read) { 1717 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1718 1719 if (!context.va) { 1720 ERR("out of memory\n"); 1721 Status = STATUS_INSUFFICIENT_RESOURCES; 1722 goto exit; 1723 } 1724 } else 1725 context.va = buf; 1726 1727 context.firstoff = (UINT16)((startoff % ci->stripe_length) / Vcb->superblock.sector_size); 1728 context.startoffstripe = startoffstripe; 1729 context.sectors_per_stripe = (UINT16)(ci->stripe_length / Vcb->superblock.sector_size); 1730 1731 startoffstripe *= ci->sub_stripes; 1732 endoffstripe *= ci->sub_stripes; 1733 1734 if (c) 1735 c->last_stripe = (orig_ls + 1) % ci->sub_stripes; 1736 1737 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1738 if (!master_mdl) { 1739 ERR("out of memory\n"); 1740 Status = STATUS_INSUFFICIENT_RESOURCES; 1741 goto exit; 1742 } 1743 1744 Status = STATUS_SUCCESS; 1745 1746 _SEH2_TRY { 1747 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1748 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1749 Status = _SEH2_GetExceptionCode(); 1750 } _SEH2_END; 1751 1752 if (!NT_SUCCESS(Status)) { 1753 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1754 IoFreeMdl(master_mdl); 1755 goto exit; 1756 } 1757 1758 pfns = (PFN_NUMBER*)(master_mdl + 1); 1759 1760 stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1761 if (!stripes) { 1762 ERR("out of memory\n"); 1763 Status = STATUS_INSUFFICIENT_RESOURCES; 1764 goto exit; 1765 } 1766 1767 RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes); 1768 1769 for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { 1770 UINT64 sstart, send; 1771 BOOL stripeset = FALSE; 1772 1773 if (startoffstripe > i) 1774 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1775 else if (startoffstripe == i) 1776 sstart = startoff; 1777 else 1778 sstart = startoff - (startoff % ci->stripe_length); 1779 1780 if (endoffstripe > i) 1781 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1782 else if (endoffstripe == i) 1783 send = endoff + 1; 1784 else 1785 send = endoff - (endoff % ci->stripe_length); 1786 1787 for (j = 0; j < ci->sub_stripes; j++) { 1788 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) { 1789 context.stripes[i+j].stripestart = sstart; 1790 context.stripes[i+j].stripeend = send; 1791 stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1792 1793 if (sstart != send) { 1794 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL); 1795 1796 if (!context.stripes[i+j].mdl) { 1797 ERR("IoAllocateMdl failed\n"); 1798 Status = STATUS_INSUFFICIENT_RESOURCES; 1799 goto exit; 1800 } 1801 } 1802 1803 stripeset = TRUE; 1804 } else 1805 context.stripes[i+j].status = ReadDataStatus_Skip; 1806 } 1807 1808 if (!stripeset) { 1809 for (j = 0; j < ci->sub_stripes; j++) { 1810 if (devices[i+j] && devices[i+j]->devobj) { 1811 context.stripes[i+j].stripestart = sstart; 1812 context.stripes[i+j].stripeend = send; 1813 context.stripes[i+j].status = ReadDataStatus_Pending; 1814 stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1815 1816 if (sstart != send) { 1817 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL); 1818 1819 if (!context.stripes[i+j].mdl) { 1820 ERR("IoAllocateMdl failed\n"); 1821 Status = STATUS_INSUFFICIENT_RESOURCES; 1822 goto exit; 1823 } 1824 } 1825 1826 stripeset = TRUE; 1827 break; 1828 } 1829 } 1830 1831 if (!stripeset) { 1832 ERR("could not find stripe to read\n"); 1833 Status = STATUS_DEVICE_NOT_READY; 1834 goto exit; 1835 } 1836 } 1837 } 1838 1839 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1840 if (!stripeoff) { 1841 ERR("out of memory\n"); 1842 Status = STATUS_INSUFFICIENT_RESOURCES; 1843 goto exit; 1844 } 1845 1846 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); 1847 1848 pos = 0; 1849 stripe = startoffstripe / ci->sub_stripes; 1850 while (pos < length) { 1851 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1); 1852 1853 if (pos == 0) { 1854 UINT32 readlen = (UINT32)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart, 1855 ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length)); 1856 1857 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1858 1859 stripeoff[stripe] += readlen; 1860 pos += readlen; 1861 } else if (length - pos < ci->stripe_length) { 1862 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1863 1864 pos = length; 1865 } else { 1866 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1867 1868 stripeoff[stripe] += (ULONG)ci->stripe_length; 1869 pos += (ULONG)ci->stripe_length; 1870 } 1871 1872 stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); 1873 } 1874 1875 MmUnlockPages(master_mdl); 1876 IoFreeMdl(master_mdl); 1877 1878 ExFreePool(stripeoff); 1879 ExFreePool(stripes); 1880 } else if (type == BLOCK_FLAG_DUPLICATE) { 1881 UINT64 orig_ls; 1882 1883 if (c) 1884 orig_ls = i = c->last_stripe; 1885 else 1886 orig_ls = i = 0; 1887 1888 while (!devices[i] || !devices[i]->devobj) { 1889 i = (i + 1) % ci->num_stripes; 1890 1891 if (i == orig_ls) { 1892 ERR("no devices available to service request\n"); 1893 Status = STATUS_DEVICE_NOT_READY; 1894 goto exit; 1895 } 1896 } 1897 1898 if (c) 1899 c->last_stripe = (i + 1) % ci->num_stripes; 1900 1901 context.stripes[i].stripestart = addr - offset; 1902 context.stripes[i].stripeend = context.stripes[i].stripestart + length; 1903 1904 if (file_read) { 1905 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1906 1907 if (!context.va) { 1908 ERR("out of memory\n"); 1909 Status = STATUS_INSUFFICIENT_RESOURCES; 1910 goto exit; 1911 } 1912 1913 context.stripes[i].mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1914 if (!context.stripes[i].mdl) { 1915 ERR("IoAllocateMdl failed\n"); 1916 Status = STATUS_INSUFFICIENT_RESOURCES; 1917 goto exit; 1918 } 1919 1920 MmBuildMdlForNonPagedPool(context.stripes[i].mdl); 1921 } else { 1922 context.stripes[i].mdl = IoAllocateMdl(buf, length, FALSE, FALSE, NULL); 1923 1924 if (!context.stripes[i].mdl) { 1925 ERR("IoAllocateMdl failed\n"); 1926 Status = STATUS_INSUFFICIENT_RESOURCES; 1927 goto exit; 1928 } 1929 1930 Status = STATUS_SUCCESS; 1931 1932 _SEH2_TRY { 1933 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess); 1934 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1935 Status = _SEH2_GetExceptionCode(); 1936 } _SEH2_END; 1937 1938 if (!NT_SUCCESS(Status)) { 1939 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1940 goto exit; 1941 } 1942 } 1943 } else if (type == BLOCK_FLAG_RAID5) { 1944 UINT64 startoff, endoff; 1945 UINT16 endoffstripe, parity; 1946 UINT32 *stripeoff, pos; 1947 PMDL master_mdl; 1948 PFN_NUMBER *pfns, dummy; 1949 BOOL need_dummy = FALSE; 1950 1951 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe); 1952 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe); 1953 1954 if (file_read) { 1955 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1956 1957 if (!context.va) { 1958 ERR("out of memory\n"); 1959 Status = STATUS_INSUFFICIENT_RESOURCES; 1960 goto exit; 1961 } 1962 } else 1963 context.va = buf; 1964 1965 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1966 if (!master_mdl) { 1967 ERR("out of memory\n"); 1968 Status = STATUS_INSUFFICIENT_RESOURCES; 1969 goto exit; 1970 } 1971 1972 Status = STATUS_SUCCESS; 1973 1974 _SEH2_TRY { 1975 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1976 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1977 Status = _SEH2_GetExceptionCode(); 1978 } _SEH2_END; 1979 1980 if (!NT_SUCCESS(Status)) { 1981 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1982 IoFreeMdl(master_mdl); 1983 goto exit; 1984 } 1985 1986 pfns = (PFN_NUMBER*)(master_mdl + 1); 1987 1988 pos = 0; 1989 while (pos < length) { 1990 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 1991 1992 if (pos == 0) { 1993 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes; 1994 ULONG skip, readlen; 1995 1996 i = startoffstripe; 1997 while (stripe != parity) { 1998 if (i == startoffstripe) { 1999 readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length))); 2000 2001 context.stripes[stripe].stripestart = startoff; 2002 context.stripes[stripe].stripeend = startoff + readlen; 2003 2004 pos += readlen; 2005 2006 if (pos == length) 2007 break; 2008 } else { 2009 readlen = min(length - pos, (ULONG)ci->stripe_length); 2010 2011 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2012 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2013 2014 pos += readlen; 2015 2016 if (pos == length) 2017 break; 2018 } 2019 2020 i++; 2021 stripe = (stripe + 1) % ci->num_stripes; 2022 } 2023 2024 if (pos == length) 2025 break; 2026 2027 for (i = 0; i < startoffstripe; i++) { 2028 UINT16 stripe2 = (parity + i + 1) % ci->num_stripes; 2029 2030 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2031 } 2032 2033 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2034 2035 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) { 2036 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1); 2037 2038 for (i = 0; i < ci->num_stripes; i++) { 2039 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2040 } 2041 2042 pos += (UINT32)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length); 2043 need_dummy = TRUE; 2044 } 2045 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2046 for (i = 0; i < ci->num_stripes; i++) { 2047 context.stripes[i].stripeend += ci->stripe_length; 2048 } 2049 2050 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 1)); 2051 need_dummy = TRUE; 2052 } else { 2053 UINT16 stripe = (parity + 1) % ci->num_stripes; 2054 2055 i = 0; 2056 while (stripe != parity) { 2057 if (endoffstripe == i) { 2058 context.stripes[stripe].stripeend = endoff + 1; 2059 break; 2060 } else if (endoffstripe > i) 2061 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2062 2063 i++; 2064 stripe = (stripe + 1) % ci->num_stripes; 2065 } 2066 2067 break; 2068 } 2069 } 2070 2071 for (i = 0; i < ci->num_stripes; i++) { 2072 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2073 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), 2074 FALSE, FALSE, NULL); 2075 2076 if (!context.stripes[i].mdl) { 2077 ERR("IoAllocateMdl failed\n"); 2078 Status = STATUS_INSUFFICIENT_RESOURCES; 2079 goto exit; 2080 } 2081 } 2082 } 2083 2084 if (need_dummy) { 2085 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2086 if (!dummypage) { 2087 ERR("out of memory\n"); 2088 Status = STATUS_INSUFFICIENT_RESOURCES; 2089 goto exit; 2090 } 2091 2092 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL); 2093 if (!dummy_mdl) { 2094 ERR("IoAllocateMdl failed\n"); 2095 Status = STATUS_INSUFFICIENT_RESOURCES; 2096 ExFreePool(dummypage); 2097 goto exit; 2098 } 2099 2100 MmBuildMdlForNonPagedPool(dummy_mdl); 2101 2102 dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2103 } 2104 2105 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); 2106 if (!stripeoff) { 2107 ERR("out of memory\n"); 2108 Status = STATUS_INSUFFICIENT_RESOURCES; 2109 goto exit; 2110 } 2111 2112 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); 2113 2114 pos = 0; 2115 2116 while (pos < length) { 2117 PFN_NUMBER* stripe_pfns; 2118 2119 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 2120 2121 if (pos == 0) { 2122 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes; 2123 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2124 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2125 2126 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2127 2128 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2129 2130 stripeoff[stripe] = readlen; 2131 pos += readlen; 2132 2133 stripe = (stripe + 1) % ci->num_stripes; 2134 2135 while (stripe != parity) { 2136 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2137 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2138 2139 if (readlen == 0) 2140 break; 2141 2142 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2143 2144 stripeoff[stripe] = readlen; 2145 pos += readlen; 2146 2147 stripe = (stripe + 1) % ci->num_stripes; 2148 } 2149 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2150 UINT16 stripe = (parity + 1) % ci->num_stripes; 2151 ULONG k; 2152 2153 while (stripe != parity) { 2154 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2155 2156 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2157 2158 stripeoff[stripe] += (UINT32)ci->stripe_length; 2159 pos += (UINT32)ci->stripe_length; 2160 2161 stripe = (stripe + 1) % ci->num_stripes; 2162 } 2163 2164 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1); 2165 2166 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2167 stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy; 2168 stripeoff[parity] += PAGE_SIZE; 2169 } 2170 } else { 2171 UINT16 stripe = (parity + 1) % ci->num_stripes; 2172 UINT32 readlen; 2173 2174 while (pos < length) { 2175 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2176 readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2177 2178 if (readlen == 0) 2179 break; 2180 2181 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2182 2183 stripeoff[stripe] += readlen; 2184 pos += readlen; 2185 2186 stripe = (stripe + 1) % ci->num_stripes; 2187 } 2188 } 2189 } 2190 2191 MmUnlockPages(master_mdl); 2192 IoFreeMdl(master_mdl); 2193 2194 ExFreePool(stripeoff); 2195 } else if (type == BLOCK_FLAG_RAID6) { 2196 UINT64 startoff, endoff; 2197 UINT16 endoffstripe, parity1; 2198 UINT32 *stripeoff, pos; 2199 PMDL master_mdl; 2200 PFN_NUMBER *pfns, dummy; 2201 BOOL need_dummy = FALSE; 2202 2203 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe); 2204 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe); 2205 2206 if (file_read) { 2207 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 2208 2209 if (!context.va) { 2210 ERR("out of memory\n"); 2211 Status = STATUS_INSUFFICIENT_RESOURCES; 2212 goto exit; 2213 } 2214 } else 2215 context.va = buf; 2216 2217 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 2218 if (!master_mdl) { 2219 ERR("out of memory\n"); 2220 Status = STATUS_INSUFFICIENT_RESOURCES; 2221 goto exit; 2222 } 2223 2224 Status = STATUS_SUCCESS; 2225 2226 _SEH2_TRY { 2227 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 2228 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2229 Status = _SEH2_GetExceptionCode(); 2230 } _SEH2_END; 2231 2232 if (!NT_SUCCESS(Status)) { 2233 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 2234 IoFreeMdl(master_mdl); 2235 goto exit; 2236 } 2237 2238 pfns = (PFN_NUMBER*)(master_mdl + 1); 2239 2240 pos = 0; 2241 while (pos < length) { 2242 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2243 2244 if (pos == 0) { 2245 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2; 2246 ULONG skip, readlen; 2247 2248 i = startoffstripe; 2249 while (stripe != parity1) { 2250 if (i == startoffstripe) { 2251 readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length)); 2252 2253 context.stripes[stripe].stripestart = startoff; 2254 context.stripes[stripe].stripeend = startoff + readlen; 2255 2256 pos += readlen; 2257 2258 if (pos == length) 2259 break; 2260 } else { 2261 readlen = min(length - pos, (ULONG)ci->stripe_length); 2262 2263 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2264 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2265 2266 pos += readlen; 2267 2268 if (pos == length) 2269 break; 2270 } 2271 2272 i++; 2273 stripe = (stripe + 1) % ci->num_stripes; 2274 } 2275 2276 if (pos == length) 2277 break; 2278 2279 for (i = 0; i < startoffstripe; i++) { 2280 UINT16 stripe2 = (parity1 + i + 2) % ci->num_stripes; 2281 2282 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2283 } 2284 2285 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2286 2287 parity2 = (parity1 + 1) % ci->num_stripes; 2288 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2289 2290 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) { 2291 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1); 2292 2293 for (i = 0; i < ci->num_stripes; i++) { 2294 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2295 } 2296 2297 pos += (UINT32)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length); 2298 need_dummy = TRUE; 2299 } 2300 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2301 for (i = 0; i < ci->num_stripes; i++) { 2302 context.stripes[i].stripeend += ci->stripe_length; 2303 } 2304 2305 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 2)); 2306 need_dummy = TRUE; 2307 } else { 2308 UINT16 stripe = (parity1 + 2) % ci->num_stripes; 2309 2310 i = 0; 2311 while (stripe != parity1) { 2312 if (endoffstripe == i) { 2313 context.stripes[stripe].stripeend = endoff + 1; 2314 break; 2315 } else if (endoffstripe > i) 2316 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2317 2318 i++; 2319 stripe = (stripe + 1) % ci->num_stripes; 2320 } 2321 2322 break; 2323 } 2324 } 2325 2326 for (i = 0; i < ci->num_stripes; i++) { 2327 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2328 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL); 2329 2330 if (!context.stripes[i].mdl) { 2331 ERR("IoAllocateMdl failed\n"); 2332 Status = STATUS_INSUFFICIENT_RESOURCES; 2333 goto exit; 2334 } 2335 } 2336 } 2337 2338 if (need_dummy) { 2339 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2340 if (!dummypage) { 2341 ERR("out of memory\n"); 2342 Status = STATUS_INSUFFICIENT_RESOURCES; 2343 goto exit; 2344 } 2345 2346 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL); 2347 if (!dummy_mdl) { 2348 ERR("IoAllocateMdl failed\n"); 2349 Status = STATUS_INSUFFICIENT_RESOURCES; 2350 ExFreePool(dummypage); 2351 goto exit; 2352 } 2353 2354 MmBuildMdlForNonPagedPool(dummy_mdl); 2355 2356 dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2357 } 2358 2359 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); 2360 if (!stripeoff) { 2361 ERR("out of memory\n"); 2362 Status = STATUS_INSUFFICIENT_RESOURCES; 2363 goto exit; 2364 } 2365 2366 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); 2367 2368 pos = 0; 2369 2370 while (pos < length) { 2371 PFN_NUMBER* stripe_pfns; 2372 2373 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2374 2375 if (pos == 0) { 2376 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes; 2377 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2378 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2379 2380 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2381 2382 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2383 2384 stripeoff[stripe] = readlen; 2385 pos += readlen; 2386 2387 stripe = (stripe + 1) % ci->num_stripes; 2388 2389 while (stripe != parity1) { 2390 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2391 readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2392 2393 if (readlen == 0) 2394 break; 2395 2396 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2397 2398 stripeoff[stripe] = readlen; 2399 pos += readlen; 2400 2401 stripe = (stripe + 1) % ci->num_stripes; 2402 } 2403 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2404 UINT16 stripe = (parity1 + 2) % ci->num_stripes; 2405 UINT16 parity2 = (parity1 + 1) % ci->num_stripes; 2406 ULONG k; 2407 2408 while (stripe != parity1) { 2409 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2410 2411 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2412 2413 stripeoff[stripe] += (UINT32)ci->stripe_length; 2414 pos += (UINT32)ci->stripe_length; 2415 2416 stripe = (stripe + 1) % ci->num_stripes; 2417 } 2418 2419 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1); 2420 2421 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2422 stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy; 2423 stripeoff[parity1] += PAGE_SIZE; 2424 } 2425 2426 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1); 2427 2428 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2429 stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy; 2430 stripeoff[parity2] += PAGE_SIZE; 2431 } 2432 } else { 2433 UINT16 stripe = (parity1 + 2) % ci->num_stripes; 2434 UINT32 readlen; 2435 2436 while (pos < length) { 2437 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2438 readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2439 2440 if (readlen == 0) 2441 break; 2442 2443 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2444 2445 stripeoff[stripe] += readlen; 2446 pos += readlen; 2447 2448 stripe = (stripe + 1) % ci->num_stripes; 2449 } 2450 } 2451 } 2452 2453 MmUnlockPages(master_mdl); 2454 IoFreeMdl(master_mdl); 2455 2456 ExFreePool(stripeoff); 2457 } 2458 2459 context.address = addr; 2460 2461 for (i = 0; i < ci->num_stripes; i++) { 2462 if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) { 2463 context.stripes[i].status = ReadDataStatus_MissingDevice; 2464 context.stripes_left--; 2465 2466 if (!devices[i] || !devices[i]->devobj) 2467 missing_devices++; 2468 } 2469 } 2470 2471 if (missing_devices > allowed_missing) { 2472 ERR("not enough devices to service request (%u missing)\n", missing_devices); 2473 Status = STATUS_UNEXPECTED_IO_ERROR; 2474 goto exit; 2475 } 2476 2477 for (i = 0; i < ci->num_stripes; i++) { 2478 PIO_STACK_LOCATION IrpSp; 2479 2480 if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) { 2481 context.stripes[i].context = (struct read_data_context*)&context; 2482 2483 if (type == BLOCK_FLAG_RAID10) { 2484 context.stripes[i].stripenum = i / ci->sub_stripes; 2485 } 2486 2487 if (!Irp) { 2488 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE); 2489 2490 if (!context.stripes[i].Irp) { 2491 ERR("IoAllocateIrp failed\n"); 2492 Status = STATUS_INSUFFICIENT_RESOURCES; 2493 goto exit; 2494 } 2495 } else { 2496 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); 2497 2498 if (!context.stripes[i].Irp) { 2499 ERR("IoMakeAssociatedIrp failed\n"); 2500 Status = STATUS_INSUFFICIENT_RESOURCES; 2501 goto exit; 2502 } 2503 } 2504 2505 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 2506 IrpSp->MajorFunction = IRP_MJ_READ; 2507 2508 if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { 2509 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG); 2510 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 2511 ERR("out of memory\n"); 2512 Status = STATUS_INSUFFICIENT_RESOURCES; 2513 goto exit; 2514 } 2515 2516 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 2517 2518 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2519 } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) 2520 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl; 2521 else 2522 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2523 2524 IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart); 2525 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset; 2526 2527 total_reading += IrpSp->Parameters.Read.Length; 2528 2529 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 2530 2531 IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], TRUE, TRUE, TRUE); 2532 2533 context.stripes[i].status = ReadDataStatus_Pending; 2534 } 2535 } 2536 2537 #ifdef DEBUG_STATS 2538 if (!is_tree) 2539 time1 = KeQueryPerformanceCounter(NULL); 2540 #endif 2541 2542 need_to_wait = FALSE; 2543 for (i = 0; i < ci->num_stripes; i++) { 2544 if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) { 2545 IoCallDriver(devices[i]->devobj, context.stripes[i].Irp); 2546 need_to_wait = TRUE; 2547 } 2548 } 2549 2550 if (need_to_wait) 2551 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); 2552 2553 #ifdef DEBUG_STATS 2554 if (!is_tree) { 2555 time2 = KeQueryPerformanceCounter(NULL); 2556 2557 Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; 2558 } 2559 #endif 2560 2561 if (diskacc) 2562 fFsRtlUpdateDiskCounters(total_reading, 0); 2563 2564 // check if any of the devices return a "user-induced" error 2565 2566 for (i = 0; i < ci->num_stripes; i++) { 2567 if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) { 2568 Status = context.stripes[i].iosb.Status; 2569 goto exit; 2570 } 2571 } 2572 2573 if (type == BLOCK_FLAG_RAID0) { 2574 Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2575 if (!NT_SUCCESS(Status)) { 2576 ERR("read_data_raid0 returned %08x\n", Status); 2577 2578 if (file_read) 2579 ExFreePool(context.va); 2580 2581 goto exit; 2582 } 2583 2584 if (file_read) { 2585 RtlCopyMemory(buf, context.va, length); 2586 ExFreePool(context.va); 2587 } 2588 } else if (type == BLOCK_FLAG_RAID10) { 2589 Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2590 2591 if (!NT_SUCCESS(Status)) { 2592 ERR("read_data_raid10 returned %08x\n", Status); 2593 2594 if (file_read) 2595 ExFreePool(context.va); 2596 2597 goto exit; 2598 } 2599 2600 if (file_read) { 2601 RtlCopyMemory(buf, context.va, length); 2602 ExFreePool(context.va); 2603 } 2604 } else if (type == BLOCK_FLAG_DUPLICATE) { 2605 Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation); 2606 if (!NT_SUCCESS(Status)) { 2607 ERR("read_data_dup returned %08x\n", Status); 2608 2609 if (file_read) 2610 ExFreePool(context.va); 2611 2612 goto exit; 2613 } 2614 2615 if (file_read) { 2616 RtlCopyMemory(buf, context.va, length); 2617 ExFreePool(context.va); 2618 } 2619 } else if (type == BLOCK_FLAG_RAID5) { 2620 Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE); 2621 if (!NT_SUCCESS(Status)) { 2622 ERR("read_data_raid5 returned %08x\n", Status); 2623 2624 if (file_read) 2625 ExFreePool(context.va); 2626 2627 goto exit; 2628 } 2629 2630 if (file_read) { 2631 RtlCopyMemory(buf, context.va, length); 2632 ExFreePool(context.va); 2633 } 2634 } else if (type == BLOCK_FLAG_RAID6) { 2635 Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE); 2636 if (!NT_SUCCESS(Status)) { 2637 ERR("read_data_raid6 returned %08x\n", Status); 2638 2639 if (file_read) 2640 ExFreePool(context.va); 2641 2642 goto exit; 2643 } 2644 2645 if (file_read) { 2646 RtlCopyMemory(buf, context.va, length); 2647 ExFreePool(context.va); 2648 } 2649 } 2650 2651 exit: 2652 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) 2653 chunk_unlock_range(Vcb, c, lockaddr, locklen); 2654 2655 if (dummy_mdl) 2656 IoFreeMdl(dummy_mdl); 2657 2658 if (dummypage) 2659 ExFreePool(dummypage); 2660 2661 for (i = 0; i < ci->num_stripes; i++) { 2662 if (context.stripes[i].mdl) { 2663 if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED) 2664 MmUnlockPages(context.stripes[i].mdl); 2665 2666 IoFreeMdl(context.stripes[i].mdl); 2667 } 2668 2669 if (context.stripes[i].Irp) 2670 IoFreeIrp(context.stripes[i].Irp); 2671 } 2672 2673 ExFreePool(context.stripes); 2674 2675 if (!Vcb->log_to_phys_loaded) 2676 ExFreePool(devices); 2677 2678 return Status; 2679 } 2680 2681 NTSTATUS read_stream(fcb* fcb, UINT8* data, UINT64 start, ULONG length, ULONG* pbr) { 2682 ULONG readlen; 2683 2684 TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr); 2685 2686 if (pbr) *pbr = 0; 2687 2688 if (start >= fcb->adsdata.Length) { 2689 TRACE("tried to read beyond end of stream\n"); 2690 return STATUS_END_OF_FILE; 2691 } 2692 2693 if (length == 0) { 2694 WARN("tried to read zero bytes\n"); 2695 return STATUS_SUCCESS; 2696 } 2697 2698 if (start + length < fcb->adsdata.Length) 2699 readlen = length; 2700 else 2701 readlen = fcb->adsdata.Length - (ULONG)start; 2702 2703 if (readlen > 0) 2704 RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen); 2705 2706 if (pbr) *pbr = readlen; 2707 2708 return STATUS_SUCCESS; 2709 } 2710 2711 NTSTATUS read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp) { 2712 NTSTATUS Status; 2713 EXTENT_DATA* ed; 2714 UINT32 bytes_read = 0; 2715 UINT64 last_end; 2716 LIST_ENTRY* le; 2717 #ifdef DEBUG_STATS 2718 LARGE_INTEGER time1, time2; 2719 #endif 2720 2721 TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr); 2722 2723 if (pbr) 2724 *pbr = 0; 2725 2726 if (start >= fcb->inode_item.st_size) { 2727 WARN("Tried to read beyond end of file\n"); 2728 Status = STATUS_END_OF_FILE; 2729 goto exit; 2730 } 2731 2732 #ifdef DEBUG_STATS 2733 time1 = KeQueryPerformanceCounter(NULL); 2734 #endif 2735 2736 le = fcb->extents.Flink; 2737 2738 last_end = start; 2739 2740 while (le != &fcb->extents) { 2741 UINT64 len; 2742 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 2743 EXTENT_DATA2* ed2; 2744 2745 if (!ext->ignore) { 2746 ed = &ext->extent_data; 2747 2748 ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL; 2749 2750 len = ed2 ? ed2->num_bytes : ed->decoded_size; 2751 2752 if (ext->offset + len <= start) { 2753 last_end = ext->offset + len; 2754 goto nextitem; 2755 } 2756 2757 if (ext->offset > last_end && ext->offset > start + bytes_read) { 2758 UINT32 read = (UINT32)min(length, ext->offset - max(start, last_end)); 2759 2760 RtlZeroMemory(data + bytes_read, read); 2761 bytes_read += read; 2762 length -= read; 2763 } 2764 2765 if (length == 0 || ext->offset > start + bytes_read + length) 2766 break; 2767 2768 if (ed->encryption != BTRFS_ENCRYPTION_NONE) { 2769 WARN("Encryption not supported\n"); 2770 Status = STATUS_NOT_IMPLEMENTED; 2771 goto exit; 2772 } 2773 2774 if (ed->encoding != BTRFS_ENCODING_NONE) { 2775 WARN("Other encodings not supported\n"); 2776 Status = STATUS_NOT_IMPLEMENTED; 2777 goto exit; 2778 } 2779 2780 switch (ed->type) { 2781 case EXTENT_TYPE_INLINE: 2782 { 2783 UINT64 off = start + bytes_read - ext->offset; 2784 UINT32 read; 2785 2786 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2787 read = (UINT32)min(min(len, ext->datalen) - off, length); 2788 2789 RtlCopyMemory(data + bytes_read, &ed->data[off], read); 2790 } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO) { 2791 UINT8* decomp; 2792 BOOL decomp_alloc; 2793 UINT16 inlen = ext->datalen - (UINT16)offsetof(EXTENT_DATA, data[0]); 2794 2795 if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) { 2796 ERR("ed->decoded_size was invalid (%llx)\n", ed->decoded_size); 2797 Status = STATUS_INTERNAL_ERROR; 2798 goto exit; 2799 } 2800 2801 read = (UINT32)min(ed->decoded_size - off, length); 2802 2803 if (off > 0) { 2804 decomp = ExAllocatePoolWithTag(NonPagedPool, (UINT32)ed->decoded_size, ALLOC_TAG); 2805 if (!decomp) { 2806 ERR("out of memory\n"); 2807 Status = STATUS_INSUFFICIENT_RESOURCES; 2808 goto exit; 2809 } 2810 2811 decomp_alloc = TRUE; 2812 } else { 2813 decomp = data + bytes_read; 2814 decomp_alloc = FALSE; 2815 } 2816 2817 if (ed->compression == BTRFS_COMPRESSION_ZLIB) { 2818 Status = zlib_decompress(ed->data, inlen, decomp, (UINT32)(read + off)); 2819 if (!NT_SUCCESS(Status)) { 2820 ERR("zlib_decompress returned %08x\n", Status); 2821 if (decomp_alloc) ExFreePool(decomp); 2822 goto exit; 2823 } 2824 } else if (ed->compression == BTRFS_COMPRESSION_LZO) { 2825 if (inlen < sizeof(UINT32)) { 2826 ERR("extent data was truncated\n"); 2827 Status = STATUS_INTERNAL_ERROR; 2828 if (decomp_alloc) ExFreePool(decomp); 2829 goto exit; 2830 } else 2831 inlen -= sizeof(UINT32); 2832 2833 Status = lzo_decompress(ed->data + sizeof(UINT32), inlen, decomp, (UINT32)(read + off), sizeof(UINT32)); 2834 if (!NT_SUCCESS(Status)) { 2835 ERR("lzo_decompress returned %08x\n", Status); 2836 if (decomp_alloc) ExFreePool(decomp); 2837 goto exit; 2838 } 2839 } 2840 2841 if (decomp_alloc) { 2842 RtlCopyMemory(data + bytes_read, decomp + off, read); 2843 ExFreePool(decomp); 2844 } 2845 } else { 2846 ERR("unhandled compression type %x\n", ed->compression); 2847 Status = STATUS_NOT_IMPLEMENTED; 2848 goto exit; 2849 } 2850 2851 bytes_read += read; 2852 length -= read; 2853 2854 break; 2855 } 2856 2857 case EXTENT_TYPE_REGULAR: 2858 { 2859 UINT64 off = start + bytes_read - ext->offset; 2860 UINT32 to_read, read; 2861 UINT8* buf; 2862 BOOL mdl = (Irp && Irp->MdlAddress) ? TRUE : FALSE; 2863 BOOL buf_free; 2864 UINT32 bumpoff = 0, *csum; 2865 UINT64 addr; 2866 chunk* c; 2867 2868 read = (UINT32)(len - off); 2869 if (read > length) read = (UINT32)length; 2870 2871 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2872 addr = ed2->address + ed2->offset + off; 2873 to_read = (UINT32)sector_align(read, fcb->Vcb->superblock.sector_size); 2874 2875 if (addr % fcb->Vcb->superblock.sector_size > 0) { 2876 bumpoff = addr % fcb->Vcb->superblock.sector_size; 2877 addr -= bumpoff; 2878 to_read = (UINT32)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size); 2879 } 2880 } else { 2881 addr = ed2->address; 2882 to_read = (UINT32)sector_align(ed2->size, fcb->Vcb->superblock.sector_size); 2883 } 2884 2885 if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 && 2886 length % fcb->Vcb->superblock.sector_size == 0) { 2887 buf = data + bytes_read; 2888 buf_free = FALSE; 2889 } else { 2890 buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG); 2891 buf_free = TRUE; 2892 2893 if (!buf) { 2894 ERR("out of memory\n"); 2895 Status = STATUS_INSUFFICIENT_RESOURCES; 2896 goto exit; 2897 } 2898 2899 mdl = FALSE; 2900 } 2901 2902 c = get_chunk_from_address(fcb->Vcb, addr); 2903 2904 if (!c) { 2905 ERR("get_chunk_from_address(%llx) failed\n", addr); 2906 2907 if (buf_free) 2908 ExFreePool(buf); 2909 2910 goto exit; 2911 } 2912 2913 if (ext->csum) { 2914 if (ed->compression == BTRFS_COMPRESSION_NONE) 2915 csum = &ext->csum[off / fcb->Vcb->superblock.sector_size]; 2916 else 2917 csum = ext->csum; 2918 } else 2919 csum = NULL; 2920 2921 Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp, 0, mdl, 2922 fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 2923 if (!NT_SUCCESS(Status)) { 2924 ERR("read_data returned %08x\n", Status); 2925 2926 if (buf_free) 2927 ExFreePool(buf); 2928 2929 goto exit; 2930 } 2931 2932 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2933 if (buf_free) 2934 RtlCopyMemory(data + bytes_read, buf + bumpoff, read); 2935 } else { 2936 UINT8 *decomp = NULL, *buf2; 2937 ULONG outlen, inlen, off2; 2938 UINT32 inpageoff = 0; 2939 2940 off2 = (ULONG)(ed2->offset + off); 2941 buf2 = buf; 2942 inlen = (ULONG)ed2->size; 2943 2944 if (ed->compression == BTRFS_COMPRESSION_LZO) { 2945 ULONG inoff = sizeof(UINT32); 2946 2947 inlen -= sizeof(UINT32); 2948 2949 // If reading a few sectors in, skip to the interesting bit 2950 while (off2 > LINUX_PAGE_SIZE) { 2951 UINT32 partlen; 2952 2953 if (inlen < sizeof(UINT32)) 2954 break; 2955 2956 partlen = *(UINT32*)(buf2 + inoff); 2957 2958 if (partlen < inlen) { 2959 off2 -= LINUX_PAGE_SIZE; 2960 inoff += partlen + sizeof(UINT32); 2961 inlen -= partlen + sizeof(UINT32); 2962 2963 if (LINUX_PAGE_SIZE - (inoff % LINUX_PAGE_SIZE) < sizeof(UINT32)) 2964 inoff = ((inoff / LINUX_PAGE_SIZE) + 1) * LINUX_PAGE_SIZE; 2965 } else 2966 break; 2967 } 2968 2969 buf2 = &buf2[inoff]; 2970 inpageoff = inoff % LINUX_PAGE_SIZE; 2971 } 2972 2973 if (off2 != 0) { 2974 outlen = off2 + min(read, (UINT32)(ed2->num_bytes - off)); 2975 2976 decomp = ExAllocatePoolWithTag(PagedPool, outlen, ALLOC_TAG); 2977 if (!decomp) { 2978 ERR("out of memory\n"); 2979 ExFreePool(buf); 2980 Status = STATUS_INSUFFICIENT_RESOURCES; 2981 goto exit; 2982 } 2983 } else 2984 outlen = min(read, (UINT32)(ed2->num_bytes - off)); 2985 2986 if (ed->compression == BTRFS_COMPRESSION_ZLIB) { 2987 Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen); 2988 2989 if (!NT_SUCCESS(Status)) { 2990 ERR("zlib_decompress returned %08x\n", Status); 2991 ExFreePool(buf); 2992 2993 if (decomp) 2994 ExFreePool(decomp); 2995 2996 goto exit; 2997 } 2998 } else if (ed->compression == BTRFS_COMPRESSION_LZO) { 2999 Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff); 3000 3001 if (!NT_SUCCESS(Status)) { 3002 ERR("lzo_decompress returned %08x\n", Status); 3003 ExFreePool(buf); 3004 3005 if (decomp) 3006 ExFreePool(decomp); 3007 3008 goto exit; 3009 } 3010 } else { 3011 ERR("unsupported compression type %x\n", ed->compression); 3012 Status = STATUS_NOT_SUPPORTED; 3013 3014 ExFreePool(buf); 3015 3016 if (decomp) 3017 ExFreePool(decomp); 3018 3019 goto exit; 3020 } 3021 3022 if (decomp) { 3023 RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off)); 3024 ExFreePool(decomp); 3025 } 3026 } 3027 3028 if (buf_free) 3029 ExFreePool(buf); 3030 3031 bytes_read += read; 3032 length -= read; 3033 3034 break; 3035 } 3036 3037 case EXTENT_TYPE_PREALLOC: 3038 { 3039 UINT64 off = start + bytes_read - ext->offset; 3040 UINT32 read = (UINT32)(len - off); 3041 3042 if (read > length) read = (UINT32)length; 3043 3044 RtlZeroMemory(data + bytes_read, read); 3045 3046 bytes_read += read; 3047 length -= read; 3048 3049 break; 3050 } 3051 3052 default: 3053 WARN("Unsupported extent data type %u\n", ed->type); 3054 Status = STATUS_NOT_IMPLEMENTED; 3055 goto exit; 3056 } 3057 3058 last_end = ext->offset + len; 3059 3060 if (length == 0) 3061 break; 3062 } 3063 3064 nextitem: 3065 le = le->Flink; 3066 } 3067 3068 if (length > 0 && start + bytes_read < fcb->inode_item.st_size) { 3069 UINT32 read = (UINT32)min(fcb->inode_item.st_size - start - bytes_read, length); 3070 3071 RtlZeroMemory(data + bytes_read, read); 3072 3073 bytes_read += read; 3074 length -= read; 3075 } 3076 3077 Status = STATUS_SUCCESS; 3078 if (pbr) 3079 *pbr = bytes_read; 3080 3081 #ifdef DEBUG_STATS 3082 time2 = KeQueryPerformanceCounter(NULL); 3083 3084 fcb->Vcb->stats.num_reads++; 3085 fcb->Vcb->stats.data_read += bytes_read; 3086 fcb->Vcb->stats.read_total_time += time2.QuadPart - time1.QuadPart; 3087 #endif 3088 3089 exit: 3090 return Status; 3091 } 3092 3093 NTSTATUS do_read(PIRP Irp, BOOLEAN wait, ULONG* bytes_read) { 3094 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3095 PFILE_OBJECT FileObject = IrpSp->FileObject; 3096 fcb* fcb = FileObject->FsContext; 3097 UINT8* data = NULL; 3098 ULONG length = IrpSp->Parameters.Read.Length, addon = 0; 3099 UINT64 start = IrpSp->Parameters.Read.ByteOffset.QuadPart; 3100 3101 *bytes_read = 0; 3102 3103 if (!fcb || !fcb->Vcb || !fcb->subvol) 3104 return STATUS_INTERNAL_ERROR; 3105 3106 TRACE("file = %S (fcb = %p)\n", file_desc(FileObject), fcb); 3107 TRACE("offset = %llx, length = %x\n", start, length); 3108 TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE", Irp->Flags & IRP_NOCACHE ? "TRUE" : "FALSE"); 3109 3110 if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY) 3111 return STATUS_INVALID_DEVICE_REQUEST; 3112 3113 if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) { 3114 WARN("tried to read locked region\n"); 3115 return STATUS_FILE_LOCK_CONFLICT; 3116 } 3117 3118 if (length == 0) { 3119 TRACE("tried to read zero bytes\n"); 3120 return STATUS_SUCCESS; 3121 } 3122 3123 if (start >= (UINT64)fcb->Header.FileSize.QuadPart) { 3124 TRACE("tried to read with offset after file end (%llx >= %llx)\n", start, fcb->Header.FileSize.QuadPart); 3125 return STATUS_END_OF_FILE; 3126 } 3127 3128 TRACE("FileObject %p fcb %p FileSize = %llx st_size = %llx (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size); 3129 3130 if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) { 3131 data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 3132 3133 if (Irp->MdlAddress && !data) { 3134 ERR("MmGetSystemAddressForMdlSafe returned NULL\n"); 3135 return STATUS_INSUFFICIENT_RESOURCES; 3136 } 3137 3138 if (start >= (UINT64)fcb->Header.ValidDataLength.QuadPart) { 3139 length = (ULONG)min(length, min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3140 RtlZeroMemory(data, length); 3141 Irp->IoStatus.Information = *bytes_read = length; 3142 return STATUS_SUCCESS; 3143 } 3144 3145 if (length + start > (UINT64)fcb->Header.ValidDataLength.QuadPart) { 3146 addon = (ULONG)(min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3147 RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon); 3148 length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start); 3149 } 3150 } 3151 3152 if (!(Irp->Flags & IRP_NOCACHE)) { 3153 NTSTATUS Status = STATUS_SUCCESS; 3154 3155 _SEH2_TRY { 3156 if (!FileObject->PrivateCacheMap) { 3157 CC_FILE_SIZES ccfs; 3158 3159 ccfs.AllocationSize = fcb->Header.AllocationSize; 3160 ccfs.FileSize = fcb->Header.FileSize; 3161 ccfs.ValidDataLength = fcb->Header.ValidDataLength; 3162 3163 init_file_cache(FileObject, &ccfs); 3164 } 3165 3166 if (IrpSp->MinorFunction & IRP_MN_MDL) { 3167 CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus); 3168 } else { 3169 if (fCcCopyReadEx) { 3170 TRACE("CcCopyReadEx(%p, %llx, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, 3171 length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread); 3172 TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); 3173 if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) { 3174 TRACE("CcCopyReadEx could not wait\n"); 3175 3176 IoMarkIrpPending(Irp); 3177 return STATUS_PENDING; 3178 } 3179 TRACE("CcCopyReadEx finished\n"); 3180 } else { 3181 TRACE("CcCopyRead(%p, %llx, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus); 3182 TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); 3183 if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) { 3184 TRACE("CcCopyRead could not wait\n"); 3185 3186 IoMarkIrpPending(Irp); 3187 return STATUS_PENDING; 3188 } 3189 TRACE("CcCopyRead finished\n"); 3190 } 3191 } 3192 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 3193 Status = _SEH2_GetExceptionCode(); 3194 } _SEH2_END; 3195 3196 if (NT_SUCCESS(Status)) { 3197 Status = Irp->IoStatus.Status; 3198 Irp->IoStatus.Information += addon; 3199 *bytes_read = (ULONG)Irp->IoStatus.Information; 3200 } else 3201 ERR("EXCEPTION - %08x\n", Status); 3202 3203 return Status; 3204 } else { 3205 NTSTATUS Status; 3206 3207 if (!wait) { 3208 IoMarkIrpPending(Irp); 3209 return STATUS_PENDING; 3210 } 3211 3212 if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer->DataSectionObject) { 3213 IO_STATUS_BLOCK iosb; 3214 3215 CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, length, &iosb); 3216 3217 if (!NT_SUCCESS(iosb.Status)) { 3218 ERR("CcFlushCache returned %08x\n", iosb.Status); 3219 return iosb.Status; 3220 } 3221 } 3222 3223 if (fcb->ads) 3224 Status = read_stream(fcb, data, start, length, bytes_read); 3225 else 3226 Status = read_file(fcb, data, start, length, bytes_read, Irp); 3227 3228 *bytes_read += addon; 3229 TRACE("read %u bytes\n", *bytes_read); 3230 3231 Irp->IoStatus.Information = *bytes_read; 3232 3233 if (diskacc && Status != STATUS_PENDING) { 3234 PETHREAD thread = NULL; 3235 3236 if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread)) 3237 thread = Irp->Tail.Overlay.Thread; 3238 else if (!IoIsSystemThread(PsGetCurrentThread())) 3239 thread = PsGetCurrentThread(); 3240 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp) 3241 thread = PsGetCurrentThread(); 3242 3243 if (thread) 3244 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0); 3245 } 3246 3247 return Status; 3248 } 3249 } 3250 3251 _Dispatch_type_(IRP_MJ_READ) 3252 _Function_class_(DRIVER_DISPATCH) 3253 NTSTATUS drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { 3254 device_extension* Vcb = DeviceObject->DeviceExtension; 3255 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3256 PFILE_OBJECT FileObject = IrpSp->FileObject; 3257 ULONG bytes_read = 0; 3258 NTSTATUS Status; 3259 BOOL top_level; 3260 fcb* fcb; 3261 ccb* ccb; 3262 BOOLEAN fcb_lock = FALSE, wait; 3263 3264 FsRtlEnterFileSystem(); 3265 3266 top_level = is_top_level(Irp); 3267 3268 TRACE("read\n"); 3269 3270 if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { 3271 Status = vol_read(DeviceObject, Irp); 3272 goto exit2; 3273 } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { 3274 Status = STATUS_INVALID_PARAMETER; 3275 goto end; 3276 } 3277 3278 Irp->IoStatus.Information = 0; 3279 3280 if (IrpSp->MinorFunction & IRP_MN_COMPLETE) { 3281 CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress); 3282 3283 Irp->MdlAddress = NULL; 3284 Status = STATUS_SUCCESS; 3285 3286 goto exit; 3287 } 3288 3289 fcb = FileObject->FsContext; 3290 3291 if (!fcb) { 3292 ERR("fcb was NULL\n"); 3293 Status = STATUS_INVALID_PARAMETER; 3294 goto exit; 3295 } 3296 3297 ccb = FileObject->FsContext2; 3298 3299 if (!ccb) { 3300 ERR("ccb was NULL\n"); 3301 Status = STATUS_INVALID_PARAMETER; 3302 goto exit; 3303 } 3304 3305 if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) { 3306 WARN("insufficient privileges\n"); 3307 Status = STATUS_ACCESS_DENIED; 3308 goto exit; 3309 } 3310 3311 if (fcb == Vcb->volume_fcb) { 3312 TRACE("reading volume FCB\n"); 3313 3314 IoSkipCurrentIrpStackLocation(Irp); 3315 3316 Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); 3317 3318 goto exit2; 3319 } 3320 3321 wait = IoIsOperationSynchronous(Irp); 3322 3323 // Don't offload jobs when doing paging IO - otherwise this can lead to 3324 // deadlocks in CcCopyRead. 3325 if (Irp->Flags & IRP_PAGING_IO) 3326 wait = TRUE; 3327 3328 if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) { 3329 if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { 3330 Status = STATUS_PENDING; 3331 IoMarkIrpPending(Irp); 3332 goto exit; 3333 } 3334 3335 fcb_lock = TRUE; 3336 } 3337 3338 Status = do_read(Irp, wait, &bytes_read); 3339 3340 if (fcb_lock) 3341 ExReleaseResourceLite(fcb->Header.Resource); 3342 3343 exit: 3344 if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO)) 3345 FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0); 3346 3347 end: 3348 Irp->IoStatus.Status = Status; 3349 3350 TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status); 3351 TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information); 3352 TRACE("returning %08x\n", Status); 3353 3354 if (Status != STATUS_PENDING) 3355 IoCompleteRequest(Irp, IO_NO_INCREMENT); 3356 else { 3357 if (!add_thread_job(Vcb, Irp)) 3358 do_read_job(Irp); 3359 } 3360 3361 exit2: 3362 if (top_level) 3363 IoSetTopLevelIrp(NULL); 3364 3365 FsRtlExitFileSystem(); 3366 3367 return Status; 3368 } 3369