1 /* Copyright (c) Mark Harmstone 2016-17 2 * 3 * This file is part of WinBtrfs. 4 * 5 * WinBtrfs is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU Lesser General Public Licence as published by 7 * the Free Software Foundation, either version 3 of the Licence, or 8 * (at your option) any later version. 9 * 10 * WinBtrfs is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public Licence for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public Licence 16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17 18 #include "btrfs_drv.h" 19 20 enum read_data_status { 21 ReadDataStatus_Pending, 22 ReadDataStatus_Success, 23 ReadDataStatus_Error, 24 ReadDataStatus_MissingDevice, 25 ReadDataStatus_Skip 26 }; 27 28 struct read_data_context; 29 30 typedef struct { 31 struct read_data_context* context; 32 uint16_t stripenum; 33 bool rewrite; 34 PIRP Irp; 35 IO_STATUS_BLOCK iosb; 36 enum read_data_status status; 37 PMDL mdl; 38 uint64_t stripestart; 39 uint64_t stripeend; 40 } read_data_stripe; 41 42 typedef struct { 43 KEVENT Event; 44 NTSTATUS Status; 45 chunk* c; 46 uint64_t address; 47 uint32_t buflen; 48 LONG num_stripes, stripes_left; 49 uint64_t type; 50 uint32_t sector_size; 51 uint16_t firstoff, startoffstripe, sectors_per_stripe; 52 uint32_t* csum; 53 bool tree; 54 read_data_stripe* stripes; 55 uint8_t* va; 56 } read_data_context; 57 58 extern bool diskacc; 59 extern tPsUpdateDiskCounters fPsUpdateDiskCounters; 60 extern tCcCopyReadEx fCcCopyReadEx; 61 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters; 62 63 #define LZO_PAGE_SIZE 4096 64 65 _Function_class_(IO_COMPLETION_ROUTINE) 66 static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 67 read_data_stripe* stripe = conptr; 68 read_data_context* context = (read_data_context*)stripe->context; 69 70 UNUSED(DeviceObject); 71 72 stripe->iosb = Irp->IoStatus; 73 74 if (NT_SUCCESS(Irp->IoStatus.Status)) 75 stripe->status = ReadDataStatus_Success; 76 else 77 stripe->status = ReadDataStatus_Error; 78 79 if (InterlockedDecrement(&context->stripes_left) == 0) 80 KeSetEvent(&context->Event, 0, false); 81 82 return STATUS_MORE_PROCESSING_REQUIRED; 83 } 84 85 NTSTATUS check_csum(device_extension* Vcb, uint8_t* data, uint32_t sectors, uint32_t* csum) { 86 NTSTATUS Status; 87 calc_job* cj; 88 uint32_t* csum2; 89 90 // From experimenting, it seems that 40 sectors is roughly the crossover 91 // point where offloading the crc32 calculation becomes worth it. 92 93 if (sectors < 40 || get_num_of_processors() < 2) { 94 ULONG j; 95 96 for (j = 0; j < sectors; j++) { 97 uint32_t crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 98 99 if (crc32 != csum[j]) { 100 return STATUS_CRC_ERROR; 101 } 102 } 103 104 return STATUS_SUCCESS; 105 } 106 107 csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(uint32_t) * sectors, ALLOC_TAG); 108 if (!csum2) { 109 ERR("out of memory\n"); 110 return STATUS_INSUFFICIENT_RESOURCES; 111 } 112 113 Status = add_calc_job(Vcb, data, sectors, csum2, &cj); 114 if (!NT_SUCCESS(Status)) { 115 ERR("add_calc_job returned %08x\n", Status); 116 ExFreePool(csum2); 117 return Status; 118 } 119 120 KeWaitForSingleObject(&cj->event, Executive, KernelMode, false, NULL); 121 122 if (RtlCompareMemory(csum2, csum, sectors * sizeof(uint32_t)) != sectors * sizeof(uint32_t)) { 123 free_calc_job(cj); 124 ExFreePool(csum2); 125 return STATUS_CRC_ERROR; 126 } 127 128 free_calc_job(cj); 129 ExFreePool(csum2); 130 131 return STATUS_SUCCESS; 132 } 133 134 static NTSTATUS read_data_dup(device_extension* Vcb, uint8_t* buf, uint64_t addr, read_data_context* context, CHUNK_ITEM* ci, 135 device** devices, uint64_t generation) { 136 ULONG i; 137 bool checksum_error = false; 138 uint16_t j, stripe = 0; 139 NTSTATUS Status; 140 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 141 142 for (j = 0; j < ci->num_stripes; j++) { 143 if (context->stripes[j].status == ReadDataStatus_Error) { 144 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 145 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 146 return context->stripes[j].iosb.Status; 147 } else if (context->stripes[j].status == ReadDataStatus_Success) { 148 stripe = j; 149 break; 150 } 151 } 152 153 if (context->stripes[stripe].status != ReadDataStatus_Success) 154 return STATUS_INTERNAL_ERROR; 155 156 if (context->tree) { 157 tree_header* th = (tree_header*)buf; 158 uint32_t crc32; 159 160 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, context->buflen - sizeof(th->csum)); 161 162 if (th->address != context->address || crc32 != *((uint32_t*)th->csum)) { 163 checksum_error = true; 164 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 165 } else if (generation != 0 && th->generation != generation) { 166 checksum_error = true; 167 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 168 } 169 } else if (context->csum) { 170 Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum); 171 172 if (Status == STATUS_CRC_ERROR) { 173 checksum_error = true; 174 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 175 } else if (!NT_SUCCESS(Status)) { 176 ERR("check_csum returned %08x\n", Status); 177 return Status; 178 } 179 } 180 181 if (!checksum_error) 182 return STATUS_SUCCESS; 183 184 if (ci->num_stripes == 1) 185 return STATUS_CRC_ERROR; 186 187 if (context->tree) { 188 tree_header* t2; 189 bool recovered = false; 190 191 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 192 if (!t2) { 193 ERR("out of memory\n"); 194 return STATUS_INSUFFICIENT_RESOURCES; 195 } 196 197 for (j = 0; j < ci->num_stripes; j++) { 198 if (j != stripe && devices[j] && devices[j]->devobj) { 199 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart, 200 Vcb->superblock.node_size, (uint8_t*)t2, false); 201 if (!NT_SUCCESS(Status)) { 202 WARN("sync_read_phys returned %08x\n", Status); 203 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 204 } else { 205 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum)); 206 207 if (t2->address == addr && crc32 == *((uint32_t*)t2->csum) && (generation == 0 || t2->generation == generation)) { 208 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 209 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 210 recovered = true; 211 212 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 213 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart, 214 t2, Vcb->superblock.node_size); 215 if (!NT_SUCCESS(Status)) { 216 WARN("write_data_phys returned %08x\n", Status); 217 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 218 } 219 } 220 221 break; 222 } else if (t2->address != addr || crc32 != *((uint32_t*)t2->csum)) 223 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 224 else 225 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS); 226 } 227 } 228 } 229 230 if (!recovered) { 231 ERR("unrecoverable checksum error at %I64x\n", addr); 232 ExFreePool(t2); 233 return STATUS_CRC_ERROR; 234 } 235 236 ExFreePool(t2); 237 } else { 238 ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size; 239 uint8_t* sector; 240 241 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 242 if (!sector) { 243 ERR("out of memory\n"); 244 return STATUS_INSUFFICIENT_RESOURCES; 245 } 246 247 for (i = 0; i < sectors; i++) { 248 uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 249 250 if (context->csum[i] != crc32) { 251 bool recovered = false; 252 253 for (j = 0; j < ci->num_stripes; j++) { 254 if (j != stripe && devices[j] && devices[j]->devobj) { 255 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, 256 cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), 257 Vcb->superblock.sector_size, sector, false); 258 if (!NT_SUCCESS(Status)) { 259 WARN("sync_read_phys returned %08x\n", Status); 260 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 261 } else { 262 uint32_t crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 263 264 if (crc32b == context->csum[i]) { 265 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 266 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); 267 recovered = true; 268 269 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 270 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, 271 cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), 272 sector, Vcb->superblock.sector_size); 273 if (!NT_SUCCESS(Status)) { 274 WARN("write_data_phys returned %08x\n", Status); 275 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 276 } 277 } 278 279 break; 280 } else 281 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 282 } 283 } 284 } 285 286 if (!recovered) { 287 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 288 ExFreePool(sector); 289 return STATUS_CRC_ERROR; 290 } 291 } 292 } 293 294 ExFreePool(sector); 295 } 296 297 return STATUS_SUCCESS; 298 } 299 300 static NTSTATUS read_data_raid0(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, 301 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) { 302 uint64_t i; 303 304 for (i = 0; i < ci->num_stripes; i++) { 305 if (context->stripes[i].status == ReadDataStatus_Error) { 306 WARN("stripe %I64u returned error %08x\n", i, context->stripes[i].iosb.Status); 307 log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS); 308 return context->stripes[i].iosb.Status; 309 } 310 } 311 312 if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries 313 tree_header* th = (tree_header*)buf; 314 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 315 316 if (crc32 != *((uint32_t*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) { 317 uint64_t off; 318 uint16_t stripe; 319 320 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe); 321 322 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 323 324 if (crc32 != *((uint32_t*)th->csum)) { 325 WARN("crc32 was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum)); 326 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 327 return STATUS_CRC_ERROR; 328 } else if (addr != th->address) { 329 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr); 330 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 331 return STATUS_CRC_ERROR; 332 } else if (generation != 0 && generation != th->generation) { 333 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation); 334 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 335 return STATUS_CRC_ERROR; 336 } 337 } 338 } else if (context->csum) { 339 NTSTATUS Status; 340 341 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 342 343 if (Status == STATUS_CRC_ERROR) { 344 for (i = 0; i < length / Vcb->superblock.sector_size; i++) { 345 uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 346 347 if (context->csum[i] != crc32) { 348 uint64_t off; 349 uint16_t stripe; 350 351 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe); 352 353 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 354 355 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 356 357 return Status; 358 } 359 } 360 361 return Status; 362 } else if (!NT_SUCCESS(Status)) { 363 ERR("check_csum returned %08x\n", Status); 364 return Status; 365 } 366 } 367 368 return STATUS_SUCCESS; 369 } 370 371 static NTSTATUS read_data_raid10(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, 372 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) { 373 uint64_t i; 374 uint16_t j, stripe; 375 NTSTATUS Status; 376 bool checksum_error = false; 377 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 378 379 for (j = 0; j < ci->num_stripes; j++) { 380 if (context->stripes[j].status == ReadDataStatus_Error) { 381 WARN("stripe %I64u returned error %08x\n", j, context->stripes[j].iosb.Status); 382 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 383 return context->stripes[j].iosb.Status; 384 } else if (context->stripes[j].status == ReadDataStatus_Success) 385 stripe = j; 386 } 387 388 if (context->tree) { 389 tree_header* th = (tree_header*)buf; 390 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 391 392 if (crc32 != *((uint32_t*)th->csum)) { 393 WARN("crc32 was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum)); 394 checksum_error = true; 395 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 396 } else if (addr != th->address) { 397 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr); 398 checksum_error = true; 399 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 400 } else if (generation != 0 && generation != th->generation) { 401 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation); 402 checksum_error = true; 403 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 404 } 405 } else if (context->csum) { 406 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 407 408 if (Status == STATUS_CRC_ERROR) 409 checksum_error = true; 410 else if (!NT_SUCCESS(Status)) { 411 ERR("check_csum returned %08x\n", Status); 412 return Status; 413 } 414 } 415 416 if (!checksum_error) 417 return STATUS_SUCCESS; 418 419 if (context->tree) { 420 tree_header* t2; 421 uint64_t off; 422 uint16_t badsubstripe = 0; 423 bool recovered = false; 424 425 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 426 if (!t2) { 427 ERR("out of memory\n"); 428 return STATUS_INSUFFICIENT_RESOURCES; 429 } 430 431 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe); 432 433 stripe *= ci->sub_stripes; 434 435 for (j = 0; j < ci->sub_stripes; j++) { 436 if (context->stripes[stripe + j].status == ReadDataStatus_Success) { 437 badsubstripe = j; 438 break; 439 } 440 } 441 442 for (j = 0; j < ci->sub_stripes; j++) { 443 if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) { 444 Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off, 445 Vcb->superblock.node_size, (uint8_t*)t2, false); 446 if (!NT_SUCCESS(Status)) { 447 WARN("sync_read_phys returned %08x\n", Status); 448 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS); 449 } else { 450 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum)); 451 452 if (t2->address == addr && crc32 == *((uint32_t*)t2->csum) && (generation == 0 || t2->generation == generation)) { 453 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 454 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id); 455 recovered = true; 456 457 if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad 458 Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj, 459 cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size); 460 if (!NT_SUCCESS(Status)) { 461 WARN("write_data_phys returned %08x\n", Status); 462 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 463 } 464 } 465 466 break; 467 } else if (t2->address != addr || crc32 != *((uint32_t*)t2->csum)) 468 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 469 else 470 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS); 471 } 472 } 473 } 474 475 if (!recovered) { 476 ERR("unrecoverable checksum error at %I64x\n", addr); 477 ExFreePool(t2); 478 return STATUS_CRC_ERROR; 479 } 480 481 ExFreePool(t2); 482 } else { 483 ULONG sectors = length / Vcb->superblock.sector_size; 484 uint8_t* sector; 485 486 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 487 if (!sector) { 488 ERR("out of memory\n"); 489 return STATUS_INSUFFICIENT_RESOURCES; 490 } 491 492 for (i = 0; i < sectors; i++) { 493 uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 494 495 if (context->csum[i] != crc32) { 496 uint64_t off; 497 uint16_t stripe2, badsubstripe = 0; 498 bool recovered = false; 499 500 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 501 ci->num_stripes / ci->sub_stripes, &off, &stripe2); 502 503 stripe2 *= ci->sub_stripes; 504 505 for (j = 0; j < ci->sub_stripes; j++) { 506 if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) { 507 badsubstripe = j; 508 break; 509 } 510 } 511 512 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 513 514 for (j = 0; j < ci->sub_stripes; j++) { 515 if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) { 516 Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off, 517 Vcb->superblock.sector_size, sector, false); 518 if (!NT_SUCCESS(Status)) { 519 WARN("sync_read_phys returned %08x\n", Status); 520 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS); 521 } else { 522 uint32_t crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 523 524 if (crc32b == context->csum[i]) { 525 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 526 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id); 527 recovered = true; 528 529 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad 530 Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj, 531 cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size); 532 if (!NT_SUCCESS(Status)) { 533 WARN("write_data_phys returned %08x\n", Status); 534 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS); 535 } 536 } 537 538 break; 539 } else 540 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 541 } 542 } 543 } 544 545 if (!recovered) { 546 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 547 ExFreePool(sector); 548 return STATUS_CRC_ERROR; 549 } 550 } 551 } 552 553 ExFreePool(sector); 554 } 555 556 return STATUS_SUCCESS; 557 } 558 559 static NTSTATUS read_data_raid5(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci, 560 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) { 561 ULONG i; 562 NTSTATUS Status; 563 bool checksum_error = false; 564 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 565 uint16_t j, stripe; 566 bool no_success = true; 567 568 for (j = 0; j < ci->num_stripes; j++) { 569 if (context->stripes[j].status == ReadDataStatus_Error) { 570 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 571 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 572 return context->stripes[j].iosb.Status; 573 } else if (context->stripes[j].status == ReadDataStatus_Success) { 574 stripe = j; 575 no_success = false; 576 } 577 } 578 579 if (c) { // check partial stripes 580 LIST_ENTRY* le; 581 uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length; 582 583 ExAcquireResourceSharedLite(&c->partial_stripes_lock, true); 584 585 le = c->partial_stripes.Flink; 586 while (le != &c->partial_stripes) { 587 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 588 589 if (ps->address + ps_length > addr && ps->address < addr + length) { 590 ULONG runlength, index; 591 592 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 593 594 while (runlength != 0) { 595 #ifdef __REACTOS__ 596 uint64_t runstart, runend, start, end; 597 #endif 598 if (index >= ps->bmplen) 599 break; 600 601 if (index + runlength >= ps->bmplen) { 602 runlength = ps->bmplen - index; 603 604 if (runlength == 0) 605 break; 606 } 607 608 #ifndef __REACTOS__ 609 uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size); 610 uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size); 611 uint64_t start = max(runstart, addr); 612 uint64_t end = min(runend, addr + length); 613 #else 614 runstart = ps->address + (index * Vcb->superblock.sector_size); 615 runend = runstart + (runlength * Vcb->superblock.sector_size); 616 start = max(runstart, addr); 617 end = min(runend, addr + length); 618 #endif 619 620 if (end > start) 621 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 622 623 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 624 } 625 } else if (ps->address >= addr + length) 626 break; 627 628 le = le->Flink; 629 } 630 631 ExReleaseResourceLite(&c->partial_stripes_lock); 632 } 633 634 if (context->tree) { 635 tree_header* th = (tree_header*)buf; 636 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 637 638 if (addr != th->address || crc32 != *((uint32_t*)th->csum)) { 639 checksum_error = true; 640 if (!no_success && !degraded) 641 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 642 } else if (generation != 0 && generation != th->generation) { 643 checksum_error = true; 644 if (!no_success && !degraded) 645 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 646 } 647 } else if (context->csum) { 648 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 649 650 if (Status == STATUS_CRC_ERROR) { 651 if (!degraded) 652 WARN("checksum error\n"); 653 checksum_error = true; 654 } else if (!NT_SUCCESS(Status)) { 655 ERR("check_csum returned %08x\n", Status); 656 return Status; 657 } 658 } else if (degraded) 659 checksum_error = true; 660 661 if (!checksum_error) 662 return STATUS_SUCCESS; 663 664 if (context->tree) { 665 uint16_t parity; 666 uint64_t off; 667 bool recovered = false, first = true, failed = false; 668 uint8_t* t2; 669 670 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG); 671 if (!t2) { 672 ERR("out of memory\n"); 673 return STATUS_INSUFFICIENT_RESOURCES; 674 } 675 676 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe); 677 678 parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 679 680 stripe = (parity + stripe + 1) % ci->num_stripes; 681 682 for (j = 0; j < ci->num_stripes; j++) { 683 if (j != stripe) { 684 if (devices[j] && devices[j]->devobj) { 685 if (first) { 686 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false); 687 if (!NT_SUCCESS(Status)) { 688 ERR("sync_read_phys returned %08x\n", Status); 689 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 690 failed = true; 691 break; 692 } 693 694 first = false; 695 } else { 696 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false); 697 if (!NT_SUCCESS(Status)) { 698 ERR("sync_read_phys returned %08x\n", Status); 699 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 700 failed = true; 701 break; 702 } 703 704 do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size); 705 } 706 } else { 707 failed = true; 708 break; 709 } 710 } 711 } 712 713 if (!failed) { 714 tree_header* t3 = (tree_header*)t2; 715 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum)); 716 717 if (t3->address == addr && crc32 == *((uint32_t*)t3->csum) && (generation == 0 || t3->generation == generation)) { 718 RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 719 720 if (!degraded) 721 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id); 722 723 recovered = true; 724 725 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 726 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size); 727 if (!NT_SUCCESS(Status)) { 728 WARN("write_data_phys returned %08x\n", Status); 729 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 730 } 731 } 732 } 733 } 734 735 if (!recovered) { 736 ERR("unrecoverable checksum error at %I64x\n", addr); 737 ExFreePool(t2); 738 return STATUS_CRC_ERROR; 739 } 740 741 ExFreePool(t2); 742 } else { 743 ULONG sectors = length / Vcb->superblock.sector_size; 744 uint8_t* sector; 745 746 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG); 747 if (!sector) { 748 ERR("out of memory\n"); 749 return STATUS_INSUFFICIENT_RESOURCES; 750 } 751 752 for (i = 0; i < sectors; i++) { 753 uint16_t parity; 754 uint64_t off; 755 uint32_t crc32; 756 757 if (context->csum) 758 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 759 760 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 761 ci->num_stripes - 1, &off, &stripe); 762 763 parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 764 765 stripe = (parity + stripe + 1) % ci->num_stripes; 766 767 if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) { 768 bool recovered = false, first = true, failed = false; 769 770 if (devices[stripe] && devices[stripe]->devobj) 771 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS); 772 773 for (j = 0; j < ci->num_stripes; j++) { 774 if (j != stripe) { 775 if (devices[j] && devices[j]->devobj) { 776 if (first) { 777 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false); 778 if (!NT_SUCCESS(Status)) { 779 ERR("sync_read_phys returned %08x\n", Status); 780 failed = true; 781 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 782 break; 783 } 784 785 first = false; 786 } else { 787 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, 788 sector + Vcb->superblock.sector_size, false); 789 if (!NT_SUCCESS(Status)) { 790 ERR("sync_read_phys returned %08x\n", Status); 791 failed = true; 792 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 793 break; 794 } 795 796 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size); 797 } 798 } else { 799 failed = true; 800 break; 801 } 802 } 803 } 804 805 if (!failed) { 806 if (context->csum) 807 crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 808 809 if (!context->csum || crc32 == context->csum[i]) { 810 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 811 812 if (!degraded) 813 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); 814 815 recovered = true; 816 817 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 818 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, 819 sector, Vcb->superblock.sector_size); 820 if (!NT_SUCCESS(Status)) { 821 WARN("write_data_phys returned %08x\n", Status); 822 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 823 } 824 } 825 } 826 } 827 828 if (!recovered) { 829 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 830 ExFreePool(sector); 831 return STATUS_CRC_ERROR; 832 } 833 } 834 } 835 836 ExFreePool(sector); 837 } 838 839 return STATUS_SUCCESS; 840 } 841 842 void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) { 843 if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data 844 uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1; 845 uint16_t stripe; 846 847 stripe = num_stripes - 3; 848 849 if (stripe == missing) 850 RtlZeroMemory(out, sector_size); 851 else 852 RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size); 853 854 do { 855 stripe--; 856 857 galois_double(out, sector_size); 858 859 if (stripe != missing) 860 do_xor(out, sectors + (stripe * sector_size), sector_size); 861 } while (stripe > 0); 862 863 do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size); 864 865 if (missing != 0) 866 galois_divpower(out, (uint8_t)missing, sector_size); 867 } else { // reconstruct from p and q 868 uint16_t x, y, stripe; 869 uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; 870 uint32_t j; 871 872 stripe = num_stripes - 3; 873 874 pxy = out + sector_size; 875 qxy = out; 876 877 if (stripe == missing1 || stripe == missing2) { 878 RtlZeroMemory(qxy, sector_size); 879 RtlZeroMemory(pxy, sector_size); 880 881 if (stripe == missing1) 882 x = stripe; 883 else 884 y = stripe; 885 } else { 886 RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size); 887 RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size); 888 } 889 890 do { 891 stripe--; 892 893 galois_double(qxy, sector_size); 894 895 if (stripe != missing1 && stripe != missing2) { 896 do_xor(qxy, sectors + (stripe * sector_size), sector_size); 897 do_xor(pxy, sectors + (stripe * sector_size), sector_size); 898 } else if (stripe == missing1) 899 x = stripe; 900 else if (stripe == missing2) 901 y = stripe; 902 } while (stripe > 0); 903 904 gyx = gpow2(y > x ? (y-x) : (255-x+y)); 905 gx = gpow2(255-x); 906 907 denom = gdiv(1, gyx ^ 1); 908 a = gmul(gyx, denom); 909 b = gmul(gx, denom); 910 911 p = sectors + ((num_stripes - 2) * sector_size); 912 q = sectors + ((num_stripes - 1) * sector_size); 913 914 for (j = 0; j < sector_size; j++) { 915 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); 916 917 p++; 918 q++; 919 pxy++; 920 qxy++; 921 } 922 923 do_xor(out + sector_size, out, sector_size); 924 do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size); 925 } 926 } 927 928 static NTSTATUS read_data_raid6(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci, 929 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) { 930 NTSTATUS Status; 931 ULONG i; 932 bool checksum_error = false; 933 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 934 uint16_t stripe, j; 935 bool no_success = true; 936 937 for (j = 0; j < ci->num_stripes; j++) { 938 if (context->stripes[j].status == ReadDataStatus_Error) { 939 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 940 941 if (devices[j]) 942 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 943 return context->stripes[j].iosb.Status; 944 } else if (context->stripes[j].status == ReadDataStatus_Success) { 945 stripe = j; 946 no_success = false; 947 } 948 } 949 950 if (c) { // check partial stripes 951 LIST_ENTRY* le; 952 uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length; 953 954 ExAcquireResourceSharedLite(&c->partial_stripes_lock, true); 955 956 le = c->partial_stripes.Flink; 957 while (le != &c->partial_stripes) { 958 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 959 960 if (ps->address + ps_length > addr && ps->address < addr + length) { 961 ULONG runlength, index; 962 963 runlength = RtlFindFirstRunClear(&ps->bmp, &index); 964 965 while (runlength != 0) { 966 #ifdef __REACTOS__ 967 uint64_t runstart, runend, start, end; 968 #endif 969 if (index >= ps->bmplen) 970 break; 971 972 if (index + runlength >= ps->bmplen) { 973 runlength = ps->bmplen - index; 974 975 if (runlength == 0) 976 break; 977 } 978 979 #ifndef __REACTOS__ 980 uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size); 981 uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size); 982 uint64_t start = max(runstart, addr); 983 uint64_t end = min(runend, addr + length); 984 #else 985 runstart = ps->address + (index * Vcb->superblock.sector_size); 986 runend = runstart + (runlength * Vcb->superblock.sector_size); 987 start = max(runstart, addr); 988 end = min(runend, addr + length); 989 #endif 990 991 if (end > start) 992 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 993 994 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 995 } 996 } else if (ps->address >= addr + length) 997 break; 998 999 le = le->Flink; 1000 } 1001 1002 ExReleaseResourceLite(&c->partial_stripes_lock); 1003 } 1004 1005 if (context->tree) { 1006 tree_header* th = (tree_header*)buf; 1007 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1008 1009 if (addr != th->address || crc32 != *((uint32_t*)th->csum)) { 1010 checksum_error = true; 1011 if (!no_success && !degraded && devices[stripe]) 1012 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1013 } else if (generation != 0 && generation != th->generation) { 1014 checksum_error = true; 1015 if (!no_success && !degraded && devices[stripe]) 1016 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 1017 } 1018 } else if (context->csum) { 1019 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 1020 1021 if (Status == STATUS_CRC_ERROR) { 1022 if (!degraded) 1023 WARN("checksum error\n"); 1024 checksum_error = true; 1025 } else if (!NT_SUCCESS(Status)) { 1026 ERR("check_csum returned %08x\n", Status); 1027 return Status; 1028 } 1029 } else if (degraded) 1030 checksum_error = true; 1031 1032 if (!checksum_error) 1033 return STATUS_SUCCESS; 1034 1035 if (context->tree) { 1036 uint8_t* sector; 1037 uint16_t k, physstripe, parity1, parity2, error_stripe; 1038 uint64_t off; 1039 bool recovered = false, failed = false; 1040 ULONG num_errors = 0; 1041 1042 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG); 1043 if (!sector) { 1044 ERR("out of memory\n"); 1045 return STATUS_INSUFFICIENT_RESOURCES; 1046 } 1047 1048 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe); 1049 1050 parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1051 parity2 = (parity1 + 1) % ci->num_stripes; 1052 1053 physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1054 1055 j = (parity2 + 1) % ci->num_stripes; 1056 1057 for (k = 0; k < ci->num_stripes - 1; k++) { 1058 if (j != physstripe) { 1059 if (devices[j] && devices[j]->devobj) { 1060 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, 1061 sector + (k * Vcb->superblock.node_size), false); 1062 if (!NT_SUCCESS(Status)) { 1063 ERR("sync_read_phys returned %08x\n", Status); 1064 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1065 num_errors++; 1066 error_stripe = k; 1067 1068 if (num_errors > 1) { 1069 failed = true; 1070 break; 1071 } 1072 } 1073 } else { 1074 num_errors++; 1075 error_stripe = k; 1076 1077 if (num_errors > 1) { 1078 failed = true; 1079 break; 1080 } 1081 } 1082 } 1083 1084 j = (j + 1) % ci->num_stripes; 1085 } 1086 1087 if (!failed) { 1088 if (num_errors == 0) { 1089 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size)); 1090 uint32_t crc32; 1091 1092 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), 1093 Vcb->superblock.node_size); 1094 1095 for (j = 0; j < ci->num_stripes - 2; j++) { 1096 if (j != stripe) 1097 do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size); 1098 } 1099 1100 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1101 1102 if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation)) { 1103 RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1104 1105 if (devices[physstripe] && devices[physstripe]->devobj) 1106 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id); 1107 1108 recovered = true; 1109 1110 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1111 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1112 sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1113 if (!NT_SUCCESS(Status)) { 1114 WARN("write_data_phys returned %08x\n", Status); 1115 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1116 } 1117 } 1118 } 1119 } 1120 1121 if (!recovered) { 1122 uint32_t crc32; 1123 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size)); 1124 bool read_q = false; 1125 1126 if (devices[parity2] && devices[parity2]->devobj) { 1127 Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off, 1128 Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false); 1129 if (!NT_SUCCESS(Status)) { 1130 ERR("sync_read_phys returned %08x\n", Status); 1131 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1132 } else 1133 read_q = true; 1134 } 1135 1136 if (read_q) { 1137 if (num_errors == 1) { 1138 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1139 1140 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1141 1142 if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation)) 1143 recovered = true; 1144 } else { 1145 for (j = 0; j < ci->num_stripes - 1; j++) { 1146 if (j != stripe) { 1147 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1148 1149 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1150 1151 if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation)) { 1152 recovered = true; 1153 error_stripe = j; 1154 break; 1155 } 1156 } 1157 } 1158 } 1159 } 1160 1161 if (recovered) { 1162 uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1163 1164 if (devices[physstripe] && devices[physstripe]->devobj) 1165 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id); 1166 1167 RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1168 1169 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1170 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1171 sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1172 if (!NT_SUCCESS(Status)) { 1173 WARN("write_data_phys returned %08x\n", Status); 1174 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1175 } 1176 } 1177 1178 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1179 if (error_stripe == ci->num_stripes - 2) { 1180 ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id); 1181 1182 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1183 1184 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1185 1186 for (j = 0; j < ci->num_stripes - 2; j++) { 1187 if (j == stripe) { 1188 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size), 1189 Vcb->superblock.node_size); 1190 } else { 1191 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), 1192 Vcb->superblock.node_size); 1193 } 1194 } 1195 } else { 1196 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length), 1197 devices[error_stripe_phys]->devitem.dev_id); 1198 1199 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1200 1201 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size), 1202 sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1203 } 1204 } 1205 1206 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1207 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off, 1208 sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1209 if (!NT_SUCCESS(Status)) { 1210 WARN("write_data_phys returned %08x\n", Status); 1211 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1212 } 1213 } 1214 } 1215 } 1216 } 1217 1218 if (!recovered) { 1219 ERR("unrecoverable checksum error at %I64x\n", addr); 1220 ExFreePool(sector); 1221 return STATUS_CRC_ERROR; 1222 } 1223 1224 ExFreePool(sector); 1225 } else { 1226 ULONG sectors = length / Vcb->superblock.sector_size; 1227 uint8_t* sector; 1228 1229 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG); 1230 if (!sector) { 1231 ERR("out of memory\n"); 1232 return STATUS_INSUFFICIENT_RESOURCES; 1233 } 1234 1235 for (i = 0; i < sectors; i++) { 1236 uint64_t off; 1237 uint16_t physstripe, parity1, parity2; 1238 uint32_t crc32; 1239 1240 if (context->csum) 1241 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1242 1243 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 1244 ci->num_stripes - 2, &off, &stripe); 1245 1246 parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1247 parity2 = (parity1 + 1) % ci->num_stripes; 1248 1249 physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1250 1251 if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) { 1252 uint16_t k, error_stripe; 1253 bool recovered = false, failed = false; 1254 ULONG num_errors = 0; 1255 1256 if (devices[physstripe] && devices[physstripe]->devobj) 1257 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS); 1258 1259 j = (parity2 + 1) % ci->num_stripes; 1260 1261 for (k = 0; k < ci->num_stripes - 1; k++) { 1262 if (j != physstripe) { 1263 if (devices[j] && devices[j]->devobj) { 1264 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, 1265 sector + (k * Vcb->superblock.sector_size), false); 1266 if (!NT_SUCCESS(Status)) { 1267 ERR("sync_read_phys returned %08x\n", Status); 1268 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1269 num_errors++; 1270 error_stripe = k; 1271 1272 if (num_errors > 1) { 1273 failed = true; 1274 break; 1275 } 1276 } 1277 } else { 1278 num_errors++; 1279 error_stripe = k; 1280 1281 if (num_errors > 1) { 1282 failed = true; 1283 break; 1284 } 1285 } 1286 } 1287 1288 j = (j + 1) % ci->num_stripes; 1289 } 1290 1291 if (!failed) { 1292 if (num_errors == 0) { 1293 RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1294 1295 for (j = 0; j < ci->num_stripes - 2; j++) { 1296 if (j != stripe) 1297 do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1298 } 1299 1300 if (context->csum) 1301 crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1302 1303 if (!context->csum || crc32 == context->csum[i]) { 1304 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1305 1306 if (devices[physstripe] && devices[physstripe]->devobj) 1307 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), 1308 devices[physstripe]->devitem.dev_id); 1309 1310 recovered = true; 1311 1312 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1313 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1314 sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1315 if (!NT_SUCCESS(Status)) { 1316 WARN("write_data_phys returned %08x\n", Status); 1317 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1318 } 1319 } 1320 } 1321 } 1322 1323 if (!recovered) { 1324 bool read_q = false; 1325 1326 if (devices[parity2] && devices[parity2]->devobj) { 1327 Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off, 1328 Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), false); 1329 if (!NT_SUCCESS(Status)) { 1330 ERR("sync_read_phys returned %08x\n", Status); 1331 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS); 1332 } else 1333 read_q = true; 1334 } 1335 1336 if (read_q) { 1337 if (num_errors == 1) { 1338 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size)); 1339 1340 if (!devices[physstripe] || !devices[physstripe]->devobj) 1341 recovered = true; 1342 else { 1343 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1344 1345 if (crc32 == context->csum[i]) 1346 recovered = true; 1347 } 1348 } else { 1349 for (j = 0; j < ci->num_stripes - 1; j++) { 1350 if (j != stripe) { 1351 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size)); 1352 1353 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1354 1355 if (crc32 == context->csum[i]) { 1356 recovered = true; 1357 error_stripe = j; 1358 break; 1359 } 1360 } 1361 } 1362 } 1363 } 1364 1365 if (recovered) { 1366 uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1367 1368 if (devices[physstripe] && devices[physstripe]->devobj) 1369 ERR("recovering from checksum error at %I64x, device %I64x\n", 1370 addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id); 1371 1372 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1373 1374 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1375 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off, 1376 sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1377 if (!NT_SUCCESS(Status)) { 1378 WARN("write_data_phys returned %08x\n", Status); 1379 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1380 } 1381 } 1382 1383 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1384 if (error_stripe == ci->num_stripes - 2) { 1385 ERR("recovering from parity error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), 1386 devices[error_stripe_phys]->devitem.dev_id); 1387 1388 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1389 1390 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1391 1392 for (j = 0; j < ci->num_stripes - 2; j++) { 1393 if (j == stripe) { 1394 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), 1395 Vcb->superblock.sector_size); 1396 } else { 1397 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), 1398 Vcb->superblock.sector_size); 1399 } 1400 } 1401 } else { 1402 ERR("recovering from checksum error at %I64x, device %I64x\n", 1403 addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length), 1404 devices[error_stripe_phys]->devitem.dev_id); 1405 1406 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1407 1408 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size), 1409 sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1410 } 1411 } 1412 1413 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1414 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off, 1415 sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1416 if (!NT_SUCCESS(Status)) { 1417 WARN("write_data_phys returned %08x\n", Status); 1418 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1419 } 1420 } 1421 } 1422 } 1423 } 1424 1425 if (!recovered) { 1426 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 1427 ExFreePool(sector); 1428 return STATUS_CRC_ERROR; 1429 } 1430 } 1431 } 1432 1433 ExFreePool(sector); 1434 } 1435 1436 return STATUS_SUCCESS; 1437 } 1438 1439 NTSTATUS read_data(_In_ device_extension* Vcb, _In_ uint64_t addr, _In_ uint32_t length, _In_reads_bytes_opt_(length*sizeof(uint32_t)/Vcb->superblock.sector_size) uint32_t* csum, 1440 _In_ bool is_tree, _Out_writes_bytes_(length) uint8_t* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ uint64_t generation, _In_ bool file_read, 1441 _In_ ULONG priority) { 1442 CHUNK_ITEM* ci; 1443 CHUNK_ITEM_STRIPE* cis; 1444 read_data_context context; 1445 uint64_t type, offset, total_reading = 0; 1446 NTSTATUS Status; 1447 device** devices = NULL; 1448 uint16_t i, startoffstripe, allowed_missing, missing_devices = 0; 1449 uint8_t* dummypage = NULL; 1450 PMDL dummy_mdl = NULL; 1451 bool need_to_wait; 1452 uint64_t lockaddr, locklen; 1453 1454 if (Vcb->log_to_phys_loaded) { 1455 if (!c) { 1456 c = get_chunk_from_address(Vcb, addr); 1457 1458 if (!c) { 1459 ERR("get_chunk_from_address failed\n"); 1460 return STATUS_INTERNAL_ERROR; 1461 } 1462 } 1463 1464 ci = c->chunk_item; 1465 offset = c->offset; 1466 devices = c->devices; 1467 1468 if (pc) 1469 *pc = c; 1470 } else { 1471 LIST_ENTRY* le = Vcb->sys_chunks.Flink; 1472 1473 ci = NULL; 1474 1475 c = NULL; 1476 while (le != &Vcb->sys_chunks) { 1477 sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry); 1478 1479 if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) { 1480 CHUNK_ITEM* chunk_item = sc->data; 1481 1482 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) { 1483 ci = chunk_item; 1484 offset = sc->key.offset; 1485 cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1]; 1486 1487 devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG); 1488 if (!devices) { 1489 ERR("out of memory\n"); 1490 return STATUS_INSUFFICIENT_RESOURCES; 1491 } 1492 1493 for (i = 0; i < ci->num_stripes; i++) { 1494 devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid); 1495 } 1496 1497 break; 1498 } 1499 } 1500 1501 le = le->Flink; 1502 } 1503 1504 if (!ci) { 1505 ERR("could not find chunk for %I64x in bootstrap\n", addr); 1506 return STATUS_INTERNAL_ERROR; 1507 } 1508 1509 if (pc) 1510 *pc = NULL; 1511 } 1512 1513 if (ci->type & BLOCK_FLAG_DUPLICATE) { 1514 type = BLOCK_FLAG_DUPLICATE; 1515 allowed_missing = ci->num_stripes - 1; 1516 } else if (ci->type & BLOCK_FLAG_RAID0) { 1517 type = BLOCK_FLAG_RAID0; 1518 allowed_missing = 0; 1519 } else if (ci->type & BLOCK_FLAG_RAID1) { 1520 type = BLOCK_FLAG_DUPLICATE; 1521 allowed_missing = 1; 1522 } else if (ci->type & BLOCK_FLAG_RAID10) { 1523 type = BLOCK_FLAG_RAID10; 1524 allowed_missing = 1; 1525 } else if (ci->type & BLOCK_FLAG_RAID5) { 1526 type = BLOCK_FLAG_RAID5; 1527 allowed_missing = 1; 1528 } else if (ci->type & BLOCK_FLAG_RAID6) { 1529 type = BLOCK_FLAG_RAID6; 1530 allowed_missing = 2; 1531 } else { // SINGLE 1532 type = BLOCK_FLAG_DUPLICATE; 1533 allowed_missing = 0; 1534 } 1535 1536 cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 1537 1538 RtlZeroMemory(&context, sizeof(read_data_context)); 1539 KeInitializeEvent(&context.Event, NotificationEvent, false); 1540 1541 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG); 1542 if (!context.stripes) { 1543 ERR("out of memory\n"); 1544 return STATUS_INSUFFICIENT_RESOURCES; 1545 } 1546 1547 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) { 1548 get_raid56_lock_range(c, addr, length, &lockaddr, &locklen); 1549 chunk_lock_range(Vcb, c, lockaddr, locklen); 1550 } 1551 1552 RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes); 1553 1554 context.buflen = length; 1555 context.num_stripes = ci->num_stripes; 1556 context.stripes_left = context.num_stripes; 1557 context.sector_size = Vcb->superblock.sector_size; 1558 context.csum = csum; 1559 context.tree = is_tree; 1560 context.type = type; 1561 1562 if (type == BLOCK_FLAG_RAID0) { 1563 uint64_t startoff, endoff; 1564 uint16_t endoffstripe, stripe; 1565 uint32_t *stripeoff, pos; 1566 PMDL master_mdl; 1567 PFN_NUMBER* pfns; 1568 1569 // FIXME - test this still works if page size isn't the same as sector size 1570 1571 // This relies on the fact that MDLs are followed in memory by the page file numbers, 1572 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0 1573 // data for you without doing a memcpy yourself. 1574 // MDLs are officially opaque, so this might very well break in future versions of Windows. 1575 1576 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe); 1577 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe); 1578 1579 if (file_read) { 1580 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL 1581 // with duplicated dummy PFNs, which confuse check_csum. Ah well. 1582 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested. 1583 1584 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1585 1586 if (!context.va) { 1587 ERR("out of memory\n"); 1588 Status = STATUS_INSUFFICIENT_RESOURCES; 1589 goto exit; 1590 } 1591 } else 1592 context.va = buf; 1593 1594 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 1595 if (!master_mdl) { 1596 ERR("out of memory\n"); 1597 Status = STATUS_INSUFFICIENT_RESOURCES; 1598 goto exit; 1599 } 1600 1601 Status = STATUS_SUCCESS; 1602 1603 _SEH2_TRY { 1604 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1605 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1606 Status = _SEH2_GetExceptionCode(); 1607 } _SEH2_END; 1608 1609 if (!NT_SUCCESS(Status)) { 1610 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1611 IoFreeMdl(master_mdl); 1612 goto exit; 1613 } 1614 1615 pfns = (PFN_NUMBER*)(master_mdl + 1); 1616 1617 for (i = 0; i < ci->num_stripes; i++) { 1618 if (startoffstripe > i) 1619 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1620 else if (startoffstripe == i) 1621 context.stripes[i].stripestart = startoff; 1622 else 1623 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length); 1624 1625 if (endoffstripe > i) 1626 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1627 else if (endoffstripe == i) 1628 context.stripes[i].stripeend = endoff + 1; 1629 else 1630 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length); 1631 1632 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 1633 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL); 1634 1635 if (!context.stripes[i].mdl) { 1636 ERR("IoAllocateMdl failed\n"); 1637 MmUnlockPages(master_mdl); 1638 IoFreeMdl(master_mdl); 1639 Status = STATUS_INSUFFICIENT_RESOURCES; 1640 goto exit; 1641 } 1642 } 1643 } 1644 1645 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG); 1646 if (!stripeoff) { 1647 ERR("out of memory\n"); 1648 MmUnlockPages(master_mdl); 1649 IoFreeMdl(master_mdl); 1650 Status = STATUS_INSUFFICIENT_RESOURCES; 1651 goto exit; 1652 } 1653 1654 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes); 1655 1656 pos = 0; 1657 stripe = startoffstripe; 1658 while (pos < length) { 1659 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 1660 1661 if (pos == 0) { 1662 uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)); 1663 1664 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1665 1666 stripeoff[stripe] += readlen; 1667 pos += readlen; 1668 } else if (length - pos < ci->stripe_length) { 1669 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1670 1671 pos = length; 1672 } else { 1673 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1674 1675 stripeoff[stripe] += (uint32_t)ci->stripe_length; 1676 pos += (uint32_t)ci->stripe_length; 1677 } 1678 1679 stripe = (stripe + 1) % ci->num_stripes; 1680 } 1681 1682 MmUnlockPages(master_mdl); 1683 IoFreeMdl(master_mdl); 1684 1685 ExFreePool(stripeoff); 1686 } else if (type == BLOCK_FLAG_RAID10) { 1687 uint64_t startoff, endoff; 1688 uint16_t endoffstripe, j, stripe; 1689 ULONG orig_ls; 1690 PMDL master_mdl; 1691 PFN_NUMBER* pfns; 1692 uint32_t* stripeoff, pos; 1693 read_data_stripe** stripes; 1694 1695 if (c) 1696 orig_ls = c->last_stripe; 1697 else 1698 orig_ls = 0; 1699 1700 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe); 1701 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe); 1702 1703 if ((ci->num_stripes % ci->sub_stripes) != 0) { 1704 ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes); 1705 Status = STATUS_INTERNAL_ERROR; 1706 goto exit; 1707 } 1708 1709 if (file_read) { 1710 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1711 1712 if (!context.va) { 1713 ERR("out of memory\n"); 1714 Status = STATUS_INSUFFICIENT_RESOURCES; 1715 goto exit; 1716 } 1717 } else 1718 context.va = buf; 1719 1720 context.firstoff = (uint16_t)((startoff % ci->stripe_length) / Vcb->superblock.sector_size); 1721 context.startoffstripe = startoffstripe; 1722 context.sectors_per_stripe = (uint16_t)(ci->stripe_length / Vcb->superblock.sector_size); 1723 1724 startoffstripe *= ci->sub_stripes; 1725 endoffstripe *= ci->sub_stripes; 1726 1727 if (c) 1728 c->last_stripe = (orig_ls + 1) % ci->sub_stripes; 1729 1730 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 1731 if (!master_mdl) { 1732 ERR("out of memory\n"); 1733 Status = STATUS_INSUFFICIENT_RESOURCES; 1734 goto exit; 1735 } 1736 1737 Status = STATUS_SUCCESS; 1738 1739 _SEH2_TRY { 1740 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1741 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1742 Status = _SEH2_GetExceptionCode(); 1743 } _SEH2_END; 1744 1745 if (!NT_SUCCESS(Status)) { 1746 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1747 IoFreeMdl(master_mdl); 1748 goto exit; 1749 } 1750 1751 pfns = (PFN_NUMBER*)(master_mdl + 1); 1752 1753 stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1754 if (!stripes) { 1755 ERR("out of memory\n"); 1756 MmUnlockPages(master_mdl); 1757 IoFreeMdl(master_mdl); 1758 Status = STATUS_INSUFFICIENT_RESOURCES; 1759 goto exit; 1760 } 1761 1762 RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes); 1763 1764 for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { 1765 uint64_t sstart, send; 1766 bool stripeset = false; 1767 1768 if (startoffstripe > i) 1769 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1770 else if (startoffstripe == i) 1771 sstart = startoff; 1772 else 1773 sstart = startoff - (startoff % ci->stripe_length); 1774 1775 if (endoffstripe > i) 1776 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1777 else if (endoffstripe == i) 1778 send = endoff + 1; 1779 else 1780 send = endoff - (endoff % ci->stripe_length); 1781 1782 for (j = 0; j < ci->sub_stripes; j++) { 1783 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) { 1784 context.stripes[i+j].stripestart = sstart; 1785 context.stripes[i+j].stripeend = send; 1786 stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1787 1788 if (sstart != send) { 1789 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL); 1790 1791 if (!context.stripes[i+j].mdl) { 1792 ERR("IoAllocateMdl failed\n"); 1793 MmUnlockPages(master_mdl); 1794 IoFreeMdl(master_mdl); 1795 Status = STATUS_INSUFFICIENT_RESOURCES; 1796 goto exit; 1797 } 1798 } 1799 1800 stripeset = true; 1801 } else 1802 context.stripes[i+j].status = ReadDataStatus_Skip; 1803 } 1804 1805 if (!stripeset) { 1806 for (j = 0; j < ci->sub_stripes; j++) { 1807 if (devices[i+j] && devices[i+j]->devobj) { 1808 context.stripes[i+j].stripestart = sstart; 1809 context.stripes[i+j].stripeend = send; 1810 context.stripes[i+j].status = ReadDataStatus_Pending; 1811 stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1812 1813 if (sstart != send) { 1814 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL); 1815 1816 if (!context.stripes[i+j].mdl) { 1817 ERR("IoAllocateMdl failed\n"); 1818 MmUnlockPages(master_mdl); 1819 IoFreeMdl(master_mdl); 1820 Status = STATUS_INSUFFICIENT_RESOURCES; 1821 goto exit; 1822 } 1823 } 1824 1825 stripeset = true; 1826 break; 1827 } 1828 } 1829 1830 if (!stripeset) { 1831 ERR("could not find stripe to read\n"); 1832 Status = STATUS_DEVICE_NOT_READY; 1833 goto exit; 1834 } 1835 } 1836 } 1837 1838 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1839 if (!stripeoff) { 1840 ERR("out of memory\n"); 1841 MmUnlockPages(master_mdl); 1842 IoFreeMdl(master_mdl); 1843 Status = STATUS_INSUFFICIENT_RESOURCES; 1844 goto exit; 1845 } 1846 1847 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes); 1848 1849 pos = 0; 1850 stripe = startoffstripe / ci->sub_stripes; 1851 while (pos < length) { 1852 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1); 1853 1854 if (pos == 0) { 1855 uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart, 1856 ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length)); 1857 1858 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1859 1860 stripeoff[stripe] += readlen; 1861 pos += readlen; 1862 } else if (length - pos < ci->stripe_length) { 1863 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1864 1865 pos = length; 1866 } else { 1867 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1868 1869 stripeoff[stripe] += (ULONG)ci->stripe_length; 1870 pos += (ULONG)ci->stripe_length; 1871 } 1872 1873 stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); 1874 } 1875 1876 MmUnlockPages(master_mdl); 1877 IoFreeMdl(master_mdl); 1878 1879 ExFreePool(stripeoff); 1880 ExFreePool(stripes); 1881 } else if (type == BLOCK_FLAG_DUPLICATE) { 1882 uint64_t orig_ls; 1883 1884 if (c) 1885 orig_ls = i = c->last_stripe; 1886 else 1887 orig_ls = i = 0; 1888 1889 while (!devices[i] || !devices[i]->devobj) { 1890 i = (i + 1) % ci->num_stripes; 1891 1892 if (i == orig_ls) { 1893 ERR("no devices available to service request\n"); 1894 Status = STATUS_DEVICE_NOT_READY; 1895 goto exit; 1896 } 1897 } 1898 1899 if (c) 1900 c->last_stripe = (i + 1) % ci->num_stripes; 1901 1902 context.stripes[i].stripestart = addr - offset; 1903 context.stripes[i].stripeend = context.stripes[i].stripestart + length; 1904 1905 if (file_read) { 1906 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1907 1908 if (!context.va) { 1909 ERR("out of memory\n"); 1910 Status = STATUS_INSUFFICIENT_RESOURCES; 1911 goto exit; 1912 } 1913 1914 context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL); 1915 if (!context.stripes[i].mdl) { 1916 ERR("IoAllocateMdl failed\n"); 1917 Status = STATUS_INSUFFICIENT_RESOURCES; 1918 goto exit; 1919 } 1920 1921 MmBuildMdlForNonPagedPool(context.stripes[i].mdl); 1922 } else { 1923 context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL); 1924 1925 if (!context.stripes[i].mdl) { 1926 ERR("IoAllocateMdl failed\n"); 1927 Status = STATUS_INSUFFICIENT_RESOURCES; 1928 goto exit; 1929 } 1930 1931 Status = STATUS_SUCCESS; 1932 1933 _SEH2_TRY { 1934 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess); 1935 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1936 Status = _SEH2_GetExceptionCode(); 1937 } _SEH2_END; 1938 1939 if (!NT_SUCCESS(Status)) { 1940 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1941 goto exit; 1942 } 1943 } 1944 } else if (type == BLOCK_FLAG_RAID5) { 1945 uint64_t startoff, endoff; 1946 uint16_t endoffstripe, parity; 1947 uint32_t *stripeoff, pos; 1948 PMDL master_mdl; 1949 PFN_NUMBER *pfns, dummy; 1950 bool need_dummy = false; 1951 1952 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe); 1953 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe); 1954 1955 if (file_read) { 1956 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1957 1958 if (!context.va) { 1959 ERR("out of memory\n"); 1960 Status = STATUS_INSUFFICIENT_RESOURCES; 1961 goto exit; 1962 } 1963 } else 1964 context.va = buf; 1965 1966 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 1967 if (!master_mdl) { 1968 ERR("out of memory\n"); 1969 Status = STATUS_INSUFFICIENT_RESOURCES; 1970 goto exit; 1971 } 1972 1973 Status = STATUS_SUCCESS; 1974 1975 _SEH2_TRY { 1976 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1977 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1978 Status = _SEH2_GetExceptionCode(); 1979 } _SEH2_END; 1980 1981 if (!NT_SUCCESS(Status)) { 1982 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1983 IoFreeMdl(master_mdl); 1984 goto exit; 1985 } 1986 1987 pfns = (PFN_NUMBER*)(master_mdl + 1); 1988 1989 pos = 0; 1990 while (pos < length) { 1991 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 1992 1993 if (pos == 0) { 1994 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes; 1995 ULONG skip, readlen; 1996 1997 i = startoffstripe; 1998 while (stripe != parity) { 1999 if (i == startoffstripe) { 2000 readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length))); 2001 2002 context.stripes[stripe].stripestart = startoff; 2003 context.stripes[stripe].stripeend = startoff + readlen; 2004 2005 pos += readlen; 2006 2007 if (pos == length) 2008 break; 2009 } else { 2010 readlen = min(length - pos, (ULONG)ci->stripe_length); 2011 2012 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2013 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2014 2015 pos += readlen; 2016 2017 if (pos == length) 2018 break; 2019 } 2020 2021 i++; 2022 stripe = (stripe + 1) % ci->num_stripes; 2023 } 2024 2025 if (pos == length) 2026 break; 2027 2028 for (i = 0; i < startoffstripe; i++) { 2029 uint16_t stripe2 = (parity + i + 1) % ci->num_stripes; 2030 2031 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2032 } 2033 2034 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2035 2036 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) { 2037 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1); 2038 2039 for (i = 0; i < ci->num_stripes; i++) { 2040 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2041 } 2042 2043 pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length); 2044 need_dummy = true; 2045 } 2046 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2047 for (i = 0; i < ci->num_stripes; i++) { 2048 context.stripes[i].stripeend += ci->stripe_length; 2049 } 2050 2051 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1)); 2052 need_dummy = true; 2053 } else { 2054 uint16_t stripe = (parity + 1) % ci->num_stripes; 2055 2056 i = 0; 2057 while (stripe != parity) { 2058 if (endoffstripe == i) { 2059 context.stripes[stripe].stripeend = endoff + 1; 2060 break; 2061 } else if (endoffstripe > i) 2062 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2063 2064 i++; 2065 stripe = (stripe + 1) % ci->num_stripes; 2066 } 2067 2068 break; 2069 } 2070 } 2071 2072 for (i = 0; i < ci->num_stripes; i++) { 2073 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2074 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), 2075 false, false, NULL); 2076 2077 if (!context.stripes[i].mdl) { 2078 ERR("IoAllocateMdl failed\n"); 2079 MmUnlockPages(master_mdl); 2080 IoFreeMdl(master_mdl); 2081 Status = STATUS_INSUFFICIENT_RESOURCES; 2082 goto exit; 2083 } 2084 } 2085 } 2086 2087 if (need_dummy) { 2088 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2089 if (!dummypage) { 2090 ERR("out of memory\n"); 2091 MmUnlockPages(master_mdl); 2092 IoFreeMdl(master_mdl); 2093 Status = STATUS_INSUFFICIENT_RESOURCES; 2094 goto exit; 2095 } 2096 2097 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL); 2098 if (!dummy_mdl) { 2099 ERR("IoAllocateMdl failed\n"); 2100 MmUnlockPages(master_mdl); 2101 IoFreeMdl(master_mdl); 2102 Status = STATUS_INSUFFICIENT_RESOURCES; 2103 goto exit; 2104 } 2105 2106 MmBuildMdlForNonPagedPool(dummy_mdl); 2107 2108 dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2109 } 2110 2111 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG); 2112 if (!stripeoff) { 2113 ERR("out of memory\n"); 2114 MmUnlockPages(master_mdl); 2115 IoFreeMdl(master_mdl); 2116 Status = STATUS_INSUFFICIENT_RESOURCES; 2117 goto exit; 2118 } 2119 2120 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes); 2121 2122 pos = 0; 2123 2124 while (pos < length) { 2125 PFN_NUMBER* stripe_pfns; 2126 2127 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 2128 2129 if (pos == 0) { 2130 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes; 2131 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2132 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2133 2134 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2135 2136 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2137 2138 stripeoff[stripe] = readlen; 2139 pos += readlen; 2140 2141 stripe = (stripe + 1) % ci->num_stripes; 2142 2143 while (stripe != parity) { 2144 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2145 readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2146 2147 if (readlen == 0) 2148 break; 2149 2150 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2151 2152 stripeoff[stripe] = readlen; 2153 pos += readlen; 2154 2155 stripe = (stripe + 1) % ci->num_stripes; 2156 } 2157 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2158 uint16_t stripe = (parity + 1) % ci->num_stripes; 2159 ULONG k; 2160 2161 while (stripe != parity) { 2162 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2163 2164 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2165 2166 stripeoff[stripe] += (uint32_t)ci->stripe_length; 2167 pos += (uint32_t)ci->stripe_length; 2168 2169 stripe = (stripe + 1) % ci->num_stripes; 2170 } 2171 2172 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1); 2173 2174 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2175 stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy; 2176 stripeoff[parity] += PAGE_SIZE; 2177 } 2178 } else { 2179 uint16_t stripe = (parity + 1) % ci->num_stripes; 2180 uint32_t readlen; 2181 2182 while (pos < length) { 2183 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2184 readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2185 2186 if (readlen == 0) 2187 break; 2188 2189 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2190 2191 stripeoff[stripe] += readlen; 2192 pos += readlen; 2193 2194 stripe = (stripe + 1) % ci->num_stripes; 2195 } 2196 } 2197 } 2198 2199 MmUnlockPages(master_mdl); 2200 IoFreeMdl(master_mdl); 2201 2202 ExFreePool(stripeoff); 2203 } else if (type == BLOCK_FLAG_RAID6) { 2204 uint64_t startoff, endoff; 2205 uint16_t endoffstripe, parity1; 2206 uint32_t *stripeoff, pos; 2207 PMDL master_mdl; 2208 PFN_NUMBER *pfns, dummy; 2209 bool need_dummy = false; 2210 2211 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe); 2212 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe); 2213 2214 if (file_read) { 2215 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 2216 2217 if (!context.va) { 2218 ERR("out of memory\n"); 2219 Status = STATUS_INSUFFICIENT_RESOURCES; 2220 goto exit; 2221 } 2222 } else 2223 context.va = buf; 2224 2225 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL); 2226 if (!master_mdl) { 2227 ERR("out of memory\n"); 2228 Status = STATUS_INSUFFICIENT_RESOURCES; 2229 goto exit; 2230 } 2231 2232 Status = STATUS_SUCCESS; 2233 2234 _SEH2_TRY { 2235 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 2236 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2237 Status = _SEH2_GetExceptionCode(); 2238 } _SEH2_END; 2239 2240 if (!NT_SUCCESS(Status)) { 2241 ERR("MmProbeAndLockPages threw exception %08x\n", Status); 2242 IoFreeMdl(master_mdl); 2243 goto exit; 2244 } 2245 2246 pfns = (PFN_NUMBER*)(master_mdl + 1); 2247 2248 pos = 0; 2249 while (pos < length) { 2250 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2251 2252 if (pos == 0) { 2253 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2; 2254 ULONG skip, readlen; 2255 2256 i = startoffstripe; 2257 while (stripe != parity1) { 2258 if (i == startoffstripe) { 2259 readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length)); 2260 2261 context.stripes[stripe].stripestart = startoff; 2262 context.stripes[stripe].stripeend = startoff + readlen; 2263 2264 pos += readlen; 2265 2266 if (pos == length) 2267 break; 2268 } else { 2269 readlen = min(length - pos, (ULONG)ci->stripe_length); 2270 2271 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2272 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2273 2274 pos += readlen; 2275 2276 if (pos == length) 2277 break; 2278 } 2279 2280 i++; 2281 stripe = (stripe + 1) % ci->num_stripes; 2282 } 2283 2284 if (pos == length) 2285 break; 2286 2287 for (i = 0; i < startoffstripe; i++) { 2288 uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes; 2289 2290 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2291 } 2292 2293 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2294 2295 parity2 = (parity1 + 1) % ci->num_stripes; 2296 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2297 2298 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) { 2299 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1); 2300 2301 for (i = 0; i < ci->num_stripes; i++) { 2302 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2303 } 2304 2305 pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length); 2306 need_dummy = true; 2307 } 2308 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2309 for (i = 0; i < ci->num_stripes; i++) { 2310 context.stripes[i].stripeend += ci->stripe_length; 2311 } 2312 2313 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2)); 2314 need_dummy = true; 2315 } else { 2316 uint16_t stripe = (parity1 + 2) % ci->num_stripes; 2317 2318 i = 0; 2319 while (stripe != parity1) { 2320 if (endoffstripe == i) { 2321 context.stripes[stripe].stripeend = endoff + 1; 2322 break; 2323 } else if (endoffstripe > i) 2324 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2325 2326 i++; 2327 stripe = (stripe + 1) % ci->num_stripes; 2328 } 2329 2330 break; 2331 } 2332 } 2333 2334 for (i = 0; i < ci->num_stripes; i++) { 2335 if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2336 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL); 2337 2338 if (!context.stripes[i].mdl) { 2339 ERR("IoAllocateMdl failed\n"); 2340 MmUnlockPages(master_mdl); 2341 IoFreeMdl(master_mdl); 2342 Status = STATUS_INSUFFICIENT_RESOURCES; 2343 goto exit; 2344 } 2345 } 2346 } 2347 2348 if (need_dummy) { 2349 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2350 if (!dummypage) { 2351 ERR("out of memory\n"); 2352 MmUnlockPages(master_mdl); 2353 IoFreeMdl(master_mdl); 2354 Status = STATUS_INSUFFICIENT_RESOURCES; 2355 goto exit; 2356 } 2357 2358 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL); 2359 if (!dummy_mdl) { 2360 ERR("IoAllocateMdl failed\n"); 2361 MmUnlockPages(master_mdl); 2362 IoFreeMdl(master_mdl); 2363 Status = STATUS_INSUFFICIENT_RESOURCES; 2364 goto exit; 2365 } 2366 2367 MmBuildMdlForNonPagedPool(dummy_mdl); 2368 2369 dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2370 } 2371 2372 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG); 2373 if (!stripeoff) { 2374 ERR("out of memory\n"); 2375 MmUnlockPages(master_mdl); 2376 IoFreeMdl(master_mdl); 2377 Status = STATUS_INSUFFICIENT_RESOURCES; 2378 goto exit; 2379 } 2380 2381 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes); 2382 2383 pos = 0; 2384 2385 while (pos < length) { 2386 PFN_NUMBER* stripe_pfns; 2387 2388 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2389 2390 if (pos == 0) { 2391 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes; 2392 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2393 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2394 2395 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2396 2397 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2398 2399 stripeoff[stripe] = readlen; 2400 pos += readlen; 2401 2402 stripe = (stripe + 1) % ci->num_stripes; 2403 2404 while (stripe != parity1) { 2405 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2406 readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2407 2408 if (readlen == 0) 2409 break; 2410 2411 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2412 2413 stripeoff[stripe] = readlen; 2414 pos += readlen; 2415 2416 stripe = (stripe + 1) % ci->num_stripes; 2417 } 2418 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2419 uint16_t stripe = (parity1 + 2) % ci->num_stripes; 2420 uint16_t parity2 = (parity1 + 1) % ci->num_stripes; 2421 ULONG k; 2422 2423 while (stripe != parity1) { 2424 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2425 2426 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2427 2428 stripeoff[stripe] += (uint32_t)ci->stripe_length; 2429 pos += (uint32_t)ci->stripe_length; 2430 2431 stripe = (stripe + 1) % ci->num_stripes; 2432 } 2433 2434 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1); 2435 2436 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2437 stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy; 2438 stripeoff[parity1] += PAGE_SIZE; 2439 } 2440 2441 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1); 2442 2443 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2444 stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy; 2445 stripeoff[parity2] += PAGE_SIZE; 2446 } 2447 } else { 2448 uint16_t stripe = (parity1 + 2) % ci->num_stripes; 2449 uint32_t readlen; 2450 2451 while (pos < length) { 2452 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2453 readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2454 2455 if (readlen == 0) 2456 break; 2457 2458 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2459 2460 stripeoff[stripe] += readlen; 2461 pos += readlen; 2462 2463 stripe = (stripe + 1) % ci->num_stripes; 2464 } 2465 } 2466 } 2467 2468 MmUnlockPages(master_mdl); 2469 IoFreeMdl(master_mdl); 2470 2471 ExFreePool(stripeoff); 2472 } 2473 2474 context.address = addr; 2475 2476 for (i = 0; i < ci->num_stripes; i++) { 2477 if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) { 2478 context.stripes[i].status = ReadDataStatus_MissingDevice; 2479 context.stripes_left--; 2480 2481 if (!devices[i] || !devices[i]->devobj) 2482 missing_devices++; 2483 } 2484 } 2485 2486 if (missing_devices > allowed_missing) { 2487 ERR("not enough devices to service request (%u missing)\n", missing_devices); 2488 Status = STATUS_UNEXPECTED_IO_ERROR; 2489 goto exit; 2490 } 2491 2492 for (i = 0; i < ci->num_stripes; i++) { 2493 PIO_STACK_LOCATION IrpSp; 2494 2495 if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) { 2496 context.stripes[i].context = (struct read_data_context*)&context; 2497 2498 if (type == BLOCK_FLAG_RAID10) { 2499 context.stripes[i].stripenum = i / ci->sub_stripes; 2500 } 2501 2502 if (!Irp) { 2503 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false); 2504 2505 if (!context.stripes[i].Irp) { 2506 ERR("IoAllocateIrp failed\n"); 2507 Status = STATUS_INSUFFICIENT_RESOURCES; 2508 goto exit; 2509 } 2510 } else { 2511 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); 2512 2513 if (!context.stripes[i].Irp) { 2514 ERR("IoMakeAssociatedIrp failed\n"); 2515 Status = STATUS_INSUFFICIENT_RESOURCES; 2516 goto exit; 2517 } 2518 } 2519 2520 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 2521 IrpSp->MajorFunction = IRP_MJ_READ; 2522 IrpSp->MinorFunction = IRP_MN_NORMAL; 2523 IrpSp->FileObject = devices[i]->fileobj; 2524 2525 if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { 2526 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG); 2527 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 2528 ERR("out of memory\n"); 2529 Status = STATUS_INSUFFICIENT_RESOURCES; 2530 goto exit; 2531 } 2532 2533 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 2534 2535 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2536 } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) 2537 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl; 2538 else 2539 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2540 2541 IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart); 2542 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset; 2543 2544 total_reading += IrpSp->Parameters.Read.Length; 2545 2546 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 2547 2548 IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true); 2549 2550 context.stripes[i].status = ReadDataStatus_Pending; 2551 } 2552 } 2553 2554 need_to_wait = false; 2555 for (i = 0; i < ci->num_stripes; i++) { 2556 if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) { 2557 IoCallDriver(devices[i]->devobj, context.stripes[i].Irp); 2558 need_to_wait = true; 2559 } 2560 } 2561 2562 if (need_to_wait) 2563 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL); 2564 2565 if (diskacc) 2566 fFsRtlUpdateDiskCounters(total_reading, 0); 2567 2568 // check if any of the devices return a "user-induced" error 2569 2570 for (i = 0; i < ci->num_stripes; i++) { 2571 if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) { 2572 Status = context.stripes[i].iosb.Status; 2573 goto exit; 2574 } 2575 } 2576 2577 if (type == BLOCK_FLAG_RAID0) { 2578 Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2579 if (!NT_SUCCESS(Status)) { 2580 ERR("read_data_raid0 returned %08x\n", Status); 2581 2582 if (file_read) 2583 ExFreePool(context.va); 2584 2585 goto exit; 2586 } 2587 2588 if (file_read) { 2589 RtlCopyMemory(buf, context.va, length); 2590 ExFreePool(context.va); 2591 } 2592 } else if (type == BLOCK_FLAG_RAID10) { 2593 Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2594 2595 if (!NT_SUCCESS(Status)) { 2596 ERR("read_data_raid10 returned %08x\n", Status); 2597 2598 if (file_read) 2599 ExFreePool(context.va); 2600 2601 goto exit; 2602 } 2603 2604 if (file_read) { 2605 RtlCopyMemory(buf, context.va, length); 2606 ExFreePool(context.va); 2607 } 2608 } else if (type == BLOCK_FLAG_DUPLICATE) { 2609 Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation); 2610 if (!NT_SUCCESS(Status)) { 2611 ERR("read_data_dup returned %08x\n", Status); 2612 2613 if (file_read) 2614 ExFreePool(context.va); 2615 2616 goto exit; 2617 } 2618 2619 if (file_read) { 2620 RtlCopyMemory(buf, context.va, length); 2621 ExFreePool(context.va); 2622 } 2623 } else if (type == BLOCK_FLAG_RAID5) { 2624 Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false); 2625 if (!NT_SUCCESS(Status)) { 2626 ERR("read_data_raid5 returned %08x\n", Status); 2627 2628 if (file_read) 2629 ExFreePool(context.va); 2630 2631 goto exit; 2632 } 2633 2634 if (file_read) { 2635 RtlCopyMemory(buf, context.va, length); 2636 ExFreePool(context.va); 2637 } 2638 } else if (type == BLOCK_FLAG_RAID6) { 2639 Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false); 2640 if (!NT_SUCCESS(Status)) { 2641 ERR("read_data_raid6 returned %08x\n", Status); 2642 2643 if (file_read) 2644 ExFreePool(context.va); 2645 2646 goto exit; 2647 } 2648 2649 if (file_read) { 2650 RtlCopyMemory(buf, context.va, length); 2651 ExFreePool(context.va); 2652 } 2653 } 2654 2655 exit: 2656 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) 2657 chunk_unlock_range(Vcb, c, lockaddr, locklen); 2658 2659 if (dummy_mdl) 2660 IoFreeMdl(dummy_mdl); 2661 2662 if (dummypage) 2663 ExFreePool(dummypage); 2664 2665 for (i = 0; i < ci->num_stripes; i++) { 2666 if (context.stripes[i].mdl) { 2667 if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED) 2668 MmUnlockPages(context.stripes[i].mdl); 2669 2670 IoFreeMdl(context.stripes[i].mdl); 2671 } 2672 2673 if (context.stripes[i].Irp) 2674 IoFreeIrp(context.stripes[i].Irp); 2675 } 2676 2677 ExFreePool(context.stripes); 2678 2679 if (!Vcb->log_to_phys_loaded) 2680 ExFreePool(devices); 2681 2682 return Status; 2683 } 2684 2685 NTSTATUS read_stream(fcb* fcb, uint8_t* data, uint64_t start, ULONG length, ULONG* pbr) { 2686 ULONG readlen; 2687 2688 TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr); 2689 2690 if (pbr) *pbr = 0; 2691 2692 if (start >= fcb->adsdata.Length) { 2693 TRACE("tried to read beyond end of stream\n"); 2694 return STATUS_END_OF_FILE; 2695 } 2696 2697 if (length == 0) { 2698 WARN("tried to read zero bytes\n"); 2699 return STATUS_SUCCESS; 2700 } 2701 2702 if (start + length < fcb->adsdata.Length) 2703 readlen = length; 2704 else 2705 readlen = fcb->adsdata.Length - (ULONG)start; 2706 2707 if (readlen > 0) 2708 RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen); 2709 2710 if (pbr) *pbr = readlen; 2711 2712 return STATUS_SUCCESS; 2713 } 2714 2715 NTSTATUS read_file(fcb* fcb, uint8_t* data, uint64_t start, uint64_t length, ULONG* pbr, PIRP Irp) { 2716 NTSTATUS Status; 2717 EXTENT_DATA* ed; 2718 uint32_t bytes_read = 0; 2719 uint64_t last_end; 2720 LIST_ENTRY* le; 2721 POOL_TYPE pool_type; 2722 2723 TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr); 2724 2725 if (pbr) 2726 *pbr = 0; 2727 2728 if (start >= fcb->inode_item.st_size) { 2729 WARN("Tried to read beyond end of file\n"); 2730 Status = STATUS_END_OF_FILE; 2731 goto exit; 2732 } 2733 2734 pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool; 2735 2736 le = fcb->extents.Flink; 2737 2738 last_end = start; 2739 2740 while (le != &fcb->extents) { 2741 uint64_t len; 2742 extent* ext = CONTAINING_RECORD(le, extent, list_entry); 2743 EXTENT_DATA2* ed2; 2744 2745 if (!ext->ignore) { 2746 ed = &ext->extent_data; 2747 2748 ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL; 2749 2750 len = ed2 ? ed2->num_bytes : ed->decoded_size; 2751 2752 if (ext->offset + len <= start) { 2753 last_end = ext->offset + len; 2754 goto nextitem; 2755 } 2756 2757 if (ext->offset > last_end && ext->offset > start + bytes_read) { 2758 uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end)); 2759 2760 RtlZeroMemory(data + bytes_read, read); 2761 bytes_read += read; 2762 length -= read; 2763 } 2764 2765 if (length == 0 || ext->offset > start + bytes_read + length) 2766 break; 2767 2768 if (ed->encryption != BTRFS_ENCRYPTION_NONE) { 2769 WARN("Encryption not supported\n"); 2770 Status = STATUS_NOT_IMPLEMENTED; 2771 goto exit; 2772 } 2773 2774 if (ed->encoding != BTRFS_ENCODING_NONE) { 2775 WARN("Other encodings not supported\n"); 2776 Status = STATUS_NOT_IMPLEMENTED; 2777 goto exit; 2778 } 2779 2780 switch (ed->type) { 2781 case EXTENT_TYPE_INLINE: 2782 { 2783 uint64_t off = start + bytes_read - ext->offset; 2784 uint32_t read; 2785 2786 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2787 read = (uint32_t)min(min(len, ext->datalen) - off, length); 2788 2789 RtlCopyMemory(data + bytes_read, &ed->data[off], read); 2790 } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) { 2791 uint8_t* decomp; 2792 bool decomp_alloc; 2793 uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]); 2794 2795 if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) { 2796 ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size); 2797 Status = STATUS_INTERNAL_ERROR; 2798 goto exit; 2799 } 2800 2801 read = (uint32_t)min(ed->decoded_size - off, length); 2802 2803 if (off > 0) { 2804 decomp = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)ed->decoded_size, ALLOC_TAG); 2805 if (!decomp) { 2806 ERR("out of memory\n"); 2807 Status = STATUS_INSUFFICIENT_RESOURCES; 2808 goto exit; 2809 } 2810 2811 decomp_alloc = true; 2812 } else { 2813 decomp = data + bytes_read; 2814 decomp_alloc = false; 2815 } 2816 2817 if (ed->compression == BTRFS_COMPRESSION_ZLIB) { 2818 Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off)); 2819 if (!NT_SUCCESS(Status)) { 2820 ERR("zlib_decompress returned %08x\n", Status); 2821 if (decomp_alloc) ExFreePool(decomp); 2822 goto exit; 2823 } 2824 } else if (ed->compression == BTRFS_COMPRESSION_LZO) { 2825 if (inlen < sizeof(uint32_t)) { 2826 ERR("extent data was truncated\n"); 2827 Status = STATUS_INTERNAL_ERROR; 2828 if (decomp_alloc) ExFreePool(decomp); 2829 goto exit; 2830 } else 2831 inlen -= sizeof(uint32_t); 2832 2833 Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t)); 2834 if (!NT_SUCCESS(Status)) { 2835 ERR("lzo_decompress returned %08x\n", Status); 2836 if (decomp_alloc) ExFreePool(decomp); 2837 goto exit; 2838 } 2839 } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) { 2840 Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off)); 2841 if (!NT_SUCCESS(Status)) { 2842 ERR("zstd_decompress returned %08x\n", Status); 2843 if (decomp_alloc) ExFreePool(decomp); 2844 goto exit; 2845 } 2846 } 2847 2848 if (decomp_alloc) { 2849 RtlCopyMemory(data + bytes_read, decomp + off, read); 2850 ExFreePool(decomp); 2851 } 2852 } else { 2853 ERR("unhandled compression type %x\n", ed->compression); 2854 Status = STATUS_NOT_IMPLEMENTED; 2855 goto exit; 2856 } 2857 2858 bytes_read += read; 2859 length -= read; 2860 2861 break; 2862 } 2863 2864 case EXTENT_TYPE_REGULAR: 2865 { 2866 uint64_t off = start + bytes_read - ext->offset; 2867 uint32_t to_read, read; 2868 uint8_t* buf; 2869 bool mdl = (Irp && Irp->MdlAddress) ? true : false; 2870 bool buf_free; 2871 uint32_t bumpoff = 0, *csum; 2872 uint64_t addr; 2873 chunk* c; 2874 2875 read = (uint32_t)(len - off); 2876 if (read > length) read = (uint32_t)length; 2877 2878 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2879 addr = ed2->address + ed2->offset + off; 2880 to_read = (uint32_t)sector_align(read, fcb->Vcb->superblock.sector_size); 2881 2882 if (addr % fcb->Vcb->superblock.sector_size > 0) { 2883 bumpoff = addr % fcb->Vcb->superblock.sector_size; 2884 addr -= bumpoff; 2885 to_read = (uint32_t)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size); 2886 } 2887 } else { 2888 addr = ed2->address; 2889 to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size); 2890 } 2891 2892 if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 && 2893 length % fcb->Vcb->superblock.sector_size == 0) { 2894 buf = data + bytes_read; 2895 buf_free = false; 2896 } else { 2897 buf = ExAllocatePoolWithTag(pool_type, to_read, ALLOC_TAG); 2898 buf_free = true; 2899 2900 if (!buf) { 2901 ERR("out of memory\n"); 2902 Status = STATUS_INSUFFICIENT_RESOURCES; 2903 goto exit; 2904 } 2905 2906 mdl = false; 2907 } 2908 2909 c = get_chunk_from_address(fcb->Vcb, addr); 2910 2911 if (!c) { 2912 ERR("get_chunk_from_address(%I64x) failed\n", addr); 2913 2914 if (buf_free) 2915 ExFreePool(buf); 2916 2917 goto exit; 2918 } 2919 2920 if (ext->csum) { 2921 if (ed->compression == BTRFS_COMPRESSION_NONE) 2922 csum = &ext->csum[off / fcb->Vcb->superblock.sector_size]; 2923 else 2924 csum = ext->csum; 2925 } else 2926 csum = NULL; 2927 2928 Status = read_data(fcb->Vcb, addr, to_read, csum, false, buf, c, NULL, Irp, 0, mdl, 2929 fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 2930 if (!NT_SUCCESS(Status)) { 2931 ERR("read_data returned %08x\n", Status); 2932 2933 if (buf_free) 2934 ExFreePool(buf); 2935 2936 goto exit; 2937 } 2938 2939 if (ed->compression == BTRFS_COMPRESSION_NONE) { 2940 if (buf_free) 2941 RtlCopyMemory(data + bytes_read, buf + bumpoff, read); 2942 } else { 2943 uint8_t *decomp = NULL, *buf2; 2944 ULONG outlen, inlen, off2; 2945 uint32_t inpageoff = 0; 2946 2947 off2 = (ULONG)(ed2->offset + off); 2948 buf2 = buf; 2949 inlen = (ULONG)ed2->size; 2950 2951 if (ed->compression == BTRFS_COMPRESSION_LZO) { 2952 ULONG inoff = sizeof(uint32_t); 2953 2954 inlen -= sizeof(uint32_t); 2955 2956 // If reading a few sectors in, skip to the interesting bit 2957 while (off2 > LZO_PAGE_SIZE) { 2958 uint32_t partlen; 2959 2960 if (inlen < sizeof(uint32_t)) 2961 break; 2962 2963 partlen = *(uint32_t*)(buf2 + inoff); 2964 2965 if (partlen < inlen) { 2966 off2 -= LZO_PAGE_SIZE; 2967 inoff += partlen + sizeof(uint32_t); 2968 inlen -= partlen + sizeof(uint32_t); 2969 2970 if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t)) 2971 inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE; 2972 } else 2973 break; 2974 } 2975 2976 buf2 = &buf2[inoff]; 2977 inpageoff = inoff % LZO_PAGE_SIZE; 2978 } 2979 2980 if (off2 != 0) { 2981 outlen = off2 + min(read, (uint32_t)(ed2->num_bytes - off)); 2982 2983 decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG); 2984 if (!decomp) { 2985 ERR("out of memory\n"); 2986 ExFreePool(buf); 2987 Status = STATUS_INSUFFICIENT_RESOURCES; 2988 goto exit; 2989 } 2990 } else 2991 outlen = min(read, (uint32_t)(ed2->num_bytes - off)); 2992 2993 if (ed->compression == BTRFS_COMPRESSION_ZLIB) { 2994 Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen); 2995 2996 if (!NT_SUCCESS(Status)) { 2997 ERR("zlib_decompress returned %08x\n", Status); 2998 ExFreePool(buf); 2999 3000 if (decomp) 3001 ExFreePool(decomp); 3002 3003 goto exit; 3004 } 3005 } else if (ed->compression == BTRFS_COMPRESSION_LZO) { 3006 Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff); 3007 3008 if (!NT_SUCCESS(Status)) { 3009 ERR("lzo_decompress returned %08x\n", Status); 3010 ExFreePool(buf); 3011 3012 if (decomp) 3013 ExFreePool(decomp); 3014 3015 goto exit; 3016 } 3017 } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) { 3018 Status = zstd_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen); 3019 3020 if (!NT_SUCCESS(Status)) { 3021 ERR("zstd_decompress returned %08x\n", Status); 3022 ExFreePool(buf); 3023 3024 if (decomp) 3025 ExFreePool(decomp); 3026 3027 goto exit; 3028 } 3029 } else { 3030 ERR("unsupported compression type %x\n", ed->compression); 3031 Status = STATUS_NOT_SUPPORTED; 3032 3033 ExFreePool(buf); 3034 3035 if (decomp) 3036 ExFreePool(decomp); 3037 3038 goto exit; 3039 } 3040 3041 if (decomp) { 3042 RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off)); 3043 ExFreePool(decomp); 3044 } 3045 } 3046 3047 if (buf_free) 3048 ExFreePool(buf); 3049 3050 bytes_read += read; 3051 length -= read; 3052 3053 break; 3054 } 3055 3056 case EXTENT_TYPE_PREALLOC: 3057 { 3058 uint64_t off = start + bytes_read - ext->offset; 3059 uint32_t read = (uint32_t)(len - off); 3060 3061 if (read > length) read = (uint32_t)length; 3062 3063 RtlZeroMemory(data + bytes_read, read); 3064 3065 bytes_read += read; 3066 length -= read; 3067 3068 break; 3069 } 3070 3071 default: 3072 WARN("Unsupported extent data type %u\n", ed->type); 3073 Status = STATUS_NOT_IMPLEMENTED; 3074 goto exit; 3075 } 3076 3077 last_end = ext->offset + len; 3078 3079 if (length == 0) 3080 break; 3081 } 3082 3083 nextitem: 3084 le = le->Flink; 3085 } 3086 3087 if (length > 0 && start + bytes_read < fcb->inode_item.st_size) { 3088 uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length); 3089 3090 RtlZeroMemory(data + bytes_read, read); 3091 3092 bytes_read += read; 3093 length -= read; 3094 } 3095 3096 Status = STATUS_SUCCESS; 3097 if (pbr) 3098 *pbr = bytes_read; 3099 3100 exit: 3101 return Status; 3102 } 3103 3104 NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) { 3105 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3106 PFILE_OBJECT FileObject = IrpSp->FileObject; 3107 fcb* fcb = FileObject->FsContext; 3108 uint8_t* data = NULL; 3109 ULONG length = IrpSp->Parameters.Read.Length, addon = 0; 3110 uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart; 3111 3112 *bytes_read = 0; 3113 3114 if (!fcb || !fcb->Vcb || !fcb->subvol) 3115 return STATUS_INTERNAL_ERROR; 3116 3117 TRACE("fcb = %p\n", fcb); 3118 TRACE("offset = %I64x, length = %x\n", start, length); 3119 TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false"); 3120 3121 if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY) 3122 return STATUS_INVALID_DEVICE_REQUEST; 3123 3124 if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) { 3125 WARN("tried to read locked region\n"); 3126 return STATUS_FILE_LOCK_CONFLICT; 3127 } 3128 3129 if (length == 0) { 3130 TRACE("tried to read zero bytes\n"); 3131 return STATUS_SUCCESS; 3132 } 3133 3134 if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) { 3135 TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart); 3136 return STATUS_END_OF_FILE; 3137 } 3138 3139 TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size); 3140 3141 if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) { 3142 data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 3143 3144 if (Irp->MdlAddress && !data) { 3145 ERR("MmGetSystemAddressForMdlSafe returned NULL\n"); 3146 return STATUS_INSUFFICIENT_RESOURCES; 3147 } 3148 3149 if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) { 3150 length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3151 RtlZeroMemory(data, length); 3152 Irp->IoStatus.Information = *bytes_read = length; 3153 return STATUS_SUCCESS; 3154 } 3155 3156 if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) { 3157 addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3158 RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon); 3159 length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start); 3160 } 3161 } 3162 3163 if (!(Irp->Flags & IRP_NOCACHE)) { 3164 NTSTATUS Status = STATUS_SUCCESS; 3165 3166 _SEH2_TRY { 3167 if (!FileObject->PrivateCacheMap) { 3168 CC_FILE_SIZES ccfs; 3169 3170 ccfs.AllocationSize = fcb->Header.AllocationSize; 3171 ccfs.FileSize = fcb->Header.FileSize; 3172 ccfs.ValidDataLength = fcb->Header.ValidDataLength; 3173 3174 init_file_cache(FileObject, &ccfs); 3175 } 3176 3177 if (IrpSp->MinorFunction & IRP_MN_MDL) { 3178 CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus); 3179 } else { 3180 if (fCcCopyReadEx) { 3181 TRACE("CcCopyReadEx(%p, %I64x, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, 3182 length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread); 3183 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); 3184 if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) { 3185 TRACE("CcCopyReadEx could not wait\n"); 3186 3187 IoMarkIrpPending(Irp); 3188 return STATUS_PENDING; 3189 } 3190 TRACE("CcCopyReadEx finished\n"); 3191 } else { 3192 TRACE("CcCopyRead(%p, %I64x, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus); 3193 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); 3194 if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) { 3195 TRACE("CcCopyRead could not wait\n"); 3196 3197 IoMarkIrpPending(Irp); 3198 return STATUS_PENDING; 3199 } 3200 TRACE("CcCopyRead finished\n"); 3201 } 3202 } 3203 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 3204 Status = _SEH2_GetExceptionCode(); 3205 } _SEH2_END; 3206 3207 if (NT_SUCCESS(Status)) { 3208 Status = Irp->IoStatus.Status; 3209 Irp->IoStatus.Information += addon; 3210 *bytes_read = (ULONG)Irp->IoStatus.Information; 3211 } else 3212 ERR("EXCEPTION - %08x\n", Status); 3213 3214 return Status; 3215 } else { 3216 NTSTATUS Status; 3217 3218 if (!wait) { 3219 IoMarkIrpPending(Irp); 3220 return STATUS_PENDING; 3221 } 3222 3223 if (fcb->ads) 3224 Status = read_stream(fcb, data, start, length, bytes_read); 3225 else 3226 Status = read_file(fcb, data, start, length, bytes_read, Irp); 3227 3228 *bytes_read += addon; 3229 TRACE("read %u bytes\n", *bytes_read); 3230 3231 Irp->IoStatus.Information = *bytes_read; 3232 3233 if (diskacc && Status != STATUS_PENDING) { 3234 PETHREAD thread = NULL; 3235 3236 if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread)) 3237 thread = Irp->Tail.Overlay.Thread; 3238 else if (!IoIsSystemThread(PsGetCurrentThread())) 3239 thread = PsGetCurrentThread(); 3240 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp) 3241 thread = PsGetCurrentThread(); 3242 3243 if (thread) 3244 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0); 3245 } 3246 3247 return Status; 3248 } 3249 } 3250 3251 _Dispatch_type_(IRP_MJ_READ) 3252 _Function_class_(DRIVER_DISPATCH) 3253 NTSTATUS __stdcall drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { 3254 device_extension* Vcb = DeviceObject->DeviceExtension; 3255 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3256 PFILE_OBJECT FileObject = IrpSp->FileObject; 3257 ULONG bytes_read = 0; 3258 NTSTATUS Status; 3259 bool top_level; 3260 fcb* fcb; 3261 ccb* ccb; 3262 bool acquired_fcb_lock = false, wait; 3263 3264 FsRtlEnterFileSystem(); 3265 3266 top_level = is_top_level(Irp); 3267 3268 TRACE("read\n"); 3269 3270 if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { 3271 Status = vol_read(DeviceObject, Irp); 3272 goto exit2; 3273 } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { 3274 Status = STATUS_INVALID_PARAMETER; 3275 goto end; 3276 } 3277 3278 Irp->IoStatus.Information = 0; 3279 3280 if (IrpSp->MinorFunction & IRP_MN_COMPLETE) { 3281 CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress); 3282 3283 Irp->MdlAddress = NULL; 3284 Status = STATUS_SUCCESS; 3285 3286 goto exit; 3287 } 3288 3289 fcb = FileObject->FsContext; 3290 3291 if (!fcb) { 3292 ERR("fcb was NULL\n"); 3293 Status = STATUS_INVALID_PARAMETER; 3294 goto exit; 3295 } 3296 3297 ccb = FileObject->FsContext2; 3298 3299 if (!ccb) { 3300 ERR("ccb was NULL\n"); 3301 Status = STATUS_INVALID_PARAMETER; 3302 goto exit; 3303 } 3304 3305 if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) { 3306 WARN("insufficient privileges\n"); 3307 Status = STATUS_ACCESS_DENIED; 3308 goto exit; 3309 } 3310 3311 if (fcb == Vcb->volume_fcb) { 3312 TRACE("reading volume FCB\n"); 3313 3314 IoSkipCurrentIrpStackLocation(Irp); 3315 3316 Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); 3317 3318 goto exit2; 3319 } 3320 3321 if (!(Irp->Flags & IRP_PAGING_IO)) 3322 FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL); 3323 3324 wait = IoIsOperationSynchronous(Irp); 3325 3326 // Don't offload jobs when doing paging IO - otherwise this can lead to 3327 // deadlocks in CcCopyRead. 3328 if (Irp->Flags & IRP_PAGING_IO) 3329 wait = true; 3330 3331 if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer && FileObject->SectionObjectPointer->DataSectionObject) { 3332 IO_STATUS_BLOCK iosb; 3333 3334 CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb); 3335 if (!NT_SUCCESS(iosb.Status)) { 3336 ERR("CcFlushCache returned %08x\n", iosb.Status); 3337 return iosb.Status; 3338 } 3339 } 3340 3341 if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) { 3342 if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { 3343 Status = STATUS_PENDING; 3344 IoMarkIrpPending(Irp); 3345 goto exit; 3346 } 3347 3348 acquired_fcb_lock = true; 3349 } 3350 3351 Status = do_read(Irp, wait, &bytes_read); 3352 3353 if (acquired_fcb_lock) 3354 ExReleaseResourceLite(fcb->Header.Resource); 3355 3356 exit: 3357 if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO)) 3358 FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0); 3359 3360 end: 3361 Irp->IoStatus.Status = Status; 3362 3363 TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status); 3364 TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information); 3365 TRACE("returning %08x\n", Status); 3366 3367 if (Status != STATUS_PENDING) 3368 IoCompleteRequest(Irp, IO_NO_INCREMENT); 3369 else { 3370 if (!add_thread_job(Vcb, Irp)) 3371 Status = do_read_job(Irp); 3372 } 3373 3374 exit2: 3375 if (top_level) 3376 IoSetTopLevelIrp(NULL); 3377 3378 FsRtlExitFileSystem(); 3379 3380 return Status; 3381 } 3382