1c2c66affSColin Finck /* Copyright (c) Mark Harmstone 2016-17 2c2c66affSColin Finck * 3c2c66affSColin Finck * This file is part of WinBtrfs. 4c2c66affSColin Finck * 5c2c66affSColin Finck * WinBtrfs is free software: you can redistribute it and/or modify 6c2c66affSColin Finck * it under the terms of the GNU Lesser General Public Licence as published by 7c2c66affSColin Finck * the Free Software Foundation, either version 3 of the Licence, or 8c2c66affSColin Finck * (at your option) any later version. 9c2c66affSColin Finck * 10c2c66affSColin Finck * WinBtrfs is distributed in the hope that it will be useful, 11c2c66affSColin Finck * but WITHOUT ANY WARRANTY; without even the implied warranty of 12c2c66affSColin Finck * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13c2c66affSColin Finck * GNU Lesser General Public Licence for more details. 14c2c66affSColin Finck * 15c2c66affSColin Finck * You should have received a copy of the GNU Lesser General Public Licence 16c2c66affSColin Finck * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */ 17c2c66affSColin Finck 18c2c66affSColin Finck #include "btrfs_drv.h" 19c2c66affSColin Finck 20c2c66affSColin Finck enum read_data_status { 21c2c66affSColin Finck ReadDataStatus_Pending, 22c2c66affSColin Finck ReadDataStatus_Success, 23c2c66affSColin Finck ReadDataStatus_Error, 24c2c66affSColin Finck ReadDataStatus_MissingDevice, 25c2c66affSColin Finck ReadDataStatus_Skip 26c2c66affSColin Finck }; 27c2c66affSColin Finck 28c2c66affSColin Finck struct read_data_context; 29c2c66affSColin Finck 30c2c66affSColin Finck typedef struct { 31c2c66affSColin Finck struct read_data_context* context; 32c2c66affSColin Finck UINT16 stripenum; 33c2c66affSColin Finck BOOL rewrite; 34c2c66affSColin Finck PIRP Irp; 35c2c66affSColin Finck IO_STATUS_BLOCK iosb; 36c2c66affSColin Finck enum read_data_status status; 37c2c66affSColin Finck PMDL mdl; 38c2c66affSColin Finck UINT64 stripestart; 39c2c66affSColin Finck UINT64 stripeend; 40c2c66affSColin Finck } read_data_stripe; 41c2c66affSColin Finck 42c2c66affSColin Finck typedef struct { 43c2c66affSColin Finck KEVENT Event; 44c2c66affSColin Finck NTSTATUS Status; 45c2c66affSColin Finck chunk* c; 46c2c66affSColin Finck UINT64 address; 47c2c66affSColin Finck UINT32 buflen; 48c2c66affSColin Finck LONG num_stripes, stripes_left; 49c2c66affSColin Finck UINT64 type; 50c2c66affSColin Finck UINT32 sector_size; 51c2c66affSColin Finck UINT16 firstoff, startoffstripe, sectors_per_stripe; 52c2c66affSColin Finck UINT32* csum; 53c2c66affSColin Finck BOOL tree; 54c2c66affSColin Finck read_data_stripe* stripes; 55c2c66affSColin Finck UINT8* va; 56c2c66affSColin Finck } read_data_context; 57c2c66affSColin Finck 58c2c66affSColin Finck extern BOOL diskacc; 59c2c66affSColin Finck extern tPsUpdateDiskCounters fPsUpdateDiskCounters; 60c2c66affSColin Finck extern tCcCopyReadEx fCcCopyReadEx; 61c2c66affSColin Finck extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters; 62c2c66affSColin Finck 63c2c66affSColin Finck #define LINUX_PAGE_SIZE 4096 64c2c66affSColin Finck 65c2c66affSColin Finck _Function_class_(IO_COMPLETION_ROUTINE) 66c2c66affSColin Finck #ifdef __REACTOS__ 67c2c66affSColin Finck static NTSTATUS NTAPI read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 68c2c66affSColin Finck #else 69c2c66affSColin Finck static NTSTATUS read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) { 70c2c66affSColin Finck #endif 71c2c66affSColin Finck read_data_stripe* stripe = conptr; 72c2c66affSColin Finck read_data_context* context = (read_data_context*)stripe->context; 73c2c66affSColin Finck 74c2c66affSColin Finck UNUSED(DeviceObject); 75c2c66affSColin Finck 76c2c66affSColin Finck stripe->iosb = Irp->IoStatus; 77c2c66affSColin Finck 78c2c66affSColin Finck if (NT_SUCCESS(Irp->IoStatus.Status)) 79c2c66affSColin Finck stripe->status = ReadDataStatus_Success; 80c2c66affSColin Finck else 81c2c66affSColin Finck stripe->status = ReadDataStatus_Error; 82c2c66affSColin Finck 83c2c66affSColin Finck if (InterlockedDecrement(&context->stripes_left) == 0) 84c2c66affSColin Finck KeSetEvent(&context->Event, 0, FALSE); 85c2c66affSColin Finck 86c2c66affSColin Finck return STATUS_MORE_PROCESSING_REQUIRED; 87c2c66affSColin Finck } 88c2c66affSColin Finck 89c2c66affSColin Finck NTSTATUS check_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) { 90c2c66affSColin Finck NTSTATUS Status; 91c2c66affSColin Finck calc_job* cj; 92c2c66affSColin Finck UINT32* csum2; 93c2c66affSColin Finck 94c2c66affSColin Finck // From experimenting, it seems that 40 sectors is roughly the crossover 95c2c66affSColin Finck // point where offloading the crc32 calculation becomes worth it. 96c2c66affSColin Finck 97c2c66affSColin Finck if (sectors < 40 || KeQueryActiveProcessorCount(NULL) < 2) { 98c2c66affSColin Finck ULONG j; 99c2c66affSColin Finck 100c2c66affSColin Finck for (j = 0; j < sectors; j++) { 101c2c66affSColin Finck UINT32 crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 102c2c66affSColin Finck 103c2c66affSColin Finck if (crc32 != csum[j]) { 104c2c66affSColin Finck return STATUS_CRC_ERROR; 105c2c66affSColin Finck } 106c2c66affSColin Finck } 107c2c66affSColin Finck 108c2c66affSColin Finck return STATUS_SUCCESS; 109c2c66affSColin Finck } 110c2c66affSColin Finck 111c2c66affSColin Finck csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sectors, ALLOC_TAG); 112c2c66affSColin Finck if (!csum2) { 113c2c66affSColin Finck ERR("out of memory\n"); 114c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 115c2c66affSColin Finck } 116c2c66affSColin Finck 117c2c66affSColin Finck Status = add_calc_job(Vcb, data, sectors, csum2, &cj); 118c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 119c2c66affSColin Finck ERR("add_calc_job returned %08x\n", Status); 120c2c66affSColin Finck ExFreePool(csum2); 121c2c66affSColin Finck return Status; 122c2c66affSColin Finck } 123c2c66affSColin Finck 124c2c66affSColin Finck KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL); 125c2c66affSColin Finck 126c2c66affSColin Finck if (RtlCompareMemory(csum2, csum, sectors * sizeof(UINT32)) != sectors * sizeof(UINT32)) { 127c2c66affSColin Finck free_calc_job(cj); 128c2c66affSColin Finck ExFreePool(csum2); 129c2c66affSColin Finck return STATUS_CRC_ERROR; 130c2c66affSColin Finck } 131c2c66affSColin Finck 132c2c66affSColin Finck free_calc_job(cj); 133c2c66affSColin Finck ExFreePool(csum2); 134c2c66affSColin Finck 135c2c66affSColin Finck return STATUS_SUCCESS; 136c2c66affSColin Finck } 137c2c66affSColin Finck 138c2c66affSColin Finck static NTSTATUS read_data_dup(device_extension* Vcb, UINT8* buf, UINT64 addr, read_data_context* context, CHUNK_ITEM* ci, 139c2c66affSColin Finck device** devices, UINT64 generation) { 140c2c66affSColin Finck ULONG i; 141c2c66affSColin Finck BOOL checksum_error = FALSE; 142c2c66affSColin Finck UINT16 j, stripe = 0; 143c2c66affSColin Finck NTSTATUS Status; 144c2c66affSColin Finck CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 145c2c66affSColin Finck 146c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) { 147c2c66affSColin Finck if (context->stripes[j].status == ReadDataStatus_Error) { 148c2c66affSColin Finck WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 149c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 150c2c66affSColin Finck return context->stripes[j].iosb.Status; 151c2c66affSColin Finck } else if (context->stripes[j].status == ReadDataStatus_Success) { 152c2c66affSColin Finck stripe = j; 153c2c66affSColin Finck break; 154c2c66affSColin Finck } 155c2c66affSColin Finck } 156c2c66affSColin Finck 157c2c66affSColin Finck if (context->stripes[stripe].status != ReadDataStatus_Success) 158c2c66affSColin Finck return STATUS_INTERNAL_ERROR; 159c2c66affSColin Finck 160c2c66affSColin Finck if (context->tree) { 161c2c66affSColin Finck tree_header* th = (tree_header*)buf; 162c2c66affSColin Finck UINT32 crc32; 163c2c66affSColin Finck 164c2c66affSColin Finck crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum)); 165c2c66affSColin Finck 166c2c66affSColin Finck if (th->address != context->address || crc32 != *((UINT32*)th->csum)) { 167c2c66affSColin Finck checksum_error = TRUE; 168c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 169c2c66affSColin Finck } else if (generation != 0 && th->generation != generation) { 170c2c66affSColin Finck checksum_error = TRUE; 171c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 172c2c66affSColin Finck } 173c2c66affSColin Finck } else if (context->csum) { 174c2c66affSColin Finck #ifdef DEBUG_STATS 175c2c66affSColin Finck LARGE_INTEGER time1, time2; 176c2c66affSColin Finck 177c2c66affSColin Finck time1 = KeQueryPerformanceCounter(NULL); 178c2c66affSColin Finck #endif 179c2c66affSColin Finck Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum); 180c2c66affSColin Finck 181c2c66affSColin Finck if (Status == STATUS_CRC_ERROR) { 182c2c66affSColin Finck checksum_error = TRUE; 183c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 184c2c66affSColin Finck } else if (!NT_SUCCESS(Status)) { 185c2c66affSColin Finck ERR("check_csum returned %08x\n", Status); 186c2c66affSColin Finck return Status; 187c2c66affSColin Finck } 188c2c66affSColin Finck #ifdef DEBUG_STATS 189c2c66affSColin Finck time2 = KeQueryPerformanceCounter(NULL); 190c2c66affSColin Finck 191c2c66affSColin Finck Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 192c2c66affSColin Finck #endif 193c2c66affSColin Finck } 194c2c66affSColin Finck 195c2c66affSColin Finck if (!checksum_error) 196c2c66affSColin Finck return STATUS_SUCCESS; 197c2c66affSColin Finck 198c2c66affSColin Finck if (ci->num_stripes == 1) 199c2c66affSColin Finck return STATUS_CRC_ERROR; 200c2c66affSColin Finck 201c2c66affSColin Finck if (context->tree) { 202c2c66affSColin Finck tree_header* t2; 203c2c66affSColin Finck BOOL recovered = FALSE; 204c2c66affSColin Finck 205c2c66affSColin Finck t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 206c2c66affSColin Finck if (!t2) { 207c2c66affSColin Finck ERR("out of memory\n"); 208c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 209c2c66affSColin Finck } 210c2c66affSColin Finck 211c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) { 212c2c66affSColin Finck if (j != stripe && devices[j] && devices[j]->devobj) { 213c2c66affSColin Finck Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart, Vcb->superblock.node_size, (UINT8*)t2, FALSE); 214c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 215c2c66affSColin Finck WARN("sync_read_phys returned %08x\n", Status); 216c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 217c2c66affSColin Finck } else { 218c2c66affSColin Finck UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum)); 219c2c66affSColin Finck 220c2c66affSColin Finck if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) { 221c2c66affSColin Finck RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 222c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 223c2c66affSColin Finck recovered = TRUE; 224c2c66affSColin Finck 225c2c66affSColin Finck if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 226c2c66affSColin Finck Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart, 227c2c66affSColin Finck t2, Vcb->superblock.node_size); 228c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 229c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 230c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 231c2c66affSColin Finck } 232c2c66affSColin Finck } 233c2c66affSColin Finck 234c2c66affSColin Finck break; 235c2c66affSColin Finck } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum)) 236c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 237c2c66affSColin Finck else 238c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS); 239c2c66affSColin Finck } 240c2c66affSColin Finck } 241c2c66affSColin Finck } 242c2c66affSColin Finck 243c2c66affSColin Finck if (!recovered) { 244c2c66affSColin Finck ERR("unrecoverable checksum error at %llx\n", addr); 245c2c66affSColin Finck ExFreePool(t2); 246c2c66affSColin Finck return STATUS_CRC_ERROR; 247c2c66affSColin Finck } 248c2c66affSColin Finck 249c2c66affSColin Finck ExFreePool(t2); 250c2c66affSColin Finck } else { 251c2c66affSColin Finck ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size; 252c2c66affSColin Finck UINT8* sector; 253c2c66affSColin Finck 254c2c66affSColin Finck sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 255c2c66affSColin Finck if (!sector) { 256c2c66affSColin Finck ERR("out of memory\n"); 257c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 258c2c66affSColin Finck } 259c2c66affSColin Finck 260c2c66affSColin Finck for (i = 0; i < sectors; i++) { 261c2c66affSColin Finck UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 262c2c66affSColin Finck 263c2c66affSColin Finck if (context->csum[i] != crc32) { 264c2c66affSColin Finck BOOL recovered = FALSE; 265c2c66affSColin Finck 266c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) { 267c2c66affSColin Finck if (j != stripe && devices[j] && devices[j]->devobj) { 268c2c66affSColin Finck Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), 269c2c66affSColin Finck Vcb->superblock.sector_size, sector, FALSE); 270c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 271c2c66affSColin Finck WARN("sync_read_phys returned %08x\n", Status); 272c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 273c2c66affSColin Finck } else { 274c2c66affSColin Finck UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 275c2c66affSColin Finck 276c2c66affSColin Finck if (crc32b == context->csum[i]) { 277c2c66affSColin Finck RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 278c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); 279c2c66affSColin Finck recovered = TRUE; 280c2c66affSColin Finck 281c2c66affSColin Finck if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad 282c2c66affSColin Finck Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size), 283c2c66affSColin Finck sector, Vcb->superblock.sector_size); 284c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 285c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 286c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 287c2c66affSColin Finck } 288c2c66affSColin Finck } 289c2c66affSColin Finck 290c2c66affSColin Finck break; 291c2c66affSColin Finck } else 292c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 293c2c66affSColin Finck } 294c2c66affSColin Finck } 295c2c66affSColin Finck } 296c2c66affSColin Finck 297c2c66affSColin Finck if (!recovered) { 298c2c66affSColin Finck ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 299c2c66affSColin Finck ExFreePool(sector); 300c2c66affSColin Finck return STATUS_CRC_ERROR; 301c2c66affSColin Finck } 302c2c66affSColin Finck } 303c2c66affSColin Finck } 304c2c66affSColin Finck 305c2c66affSColin Finck ExFreePool(sector); 306c2c66affSColin Finck } 307c2c66affSColin Finck 308c2c66affSColin Finck return STATUS_SUCCESS; 309c2c66affSColin Finck } 310c2c66affSColin Finck 311c2c66affSColin Finck static NTSTATUS read_data_raid0(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, 312c2c66affSColin Finck CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) { 313c2c66affSColin Finck UINT64 i; 314c2c66affSColin Finck 315c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 316c2c66affSColin Finck if (context->stripes[i].status == ReadDataStatus_Error) { 317c2c66affSColin Finck WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); 318c2c66affSColin Finck log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS); 319c2c66affSColin Finck return context->stripes[i].iosb.Status; 320c2c66affSColin Finck } 321c2c66affSColin Finck } 322c2c66affSColin Finck 323c2c66affSColin Finck if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries 324c2c66affSColin Finck tree_header* th = (tree_header*)buf; 325c2c66affSColin Finck UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 326c2c66affSColin Finck 327c2c66affSColin Finck if (crc32 != *((UINT32*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) { 328c2c66affSColin Finck UINT64 off; 329c2c66affSColin Finck UINT16 stripe; 330c2c66affSColin Finck 331c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe); 332c2c66affSColin Finck 333c2c66affSColin Finck ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 334c2c66affSColin Finck 335c2c66affSColin Finck if (crc32 != *((UINT32*)th->csum)) { 336c2c66affSColin Finck WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); 337c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 338c2c66affSColin Finck return STATUS_CRC_ERROR; 339c2c66affSColin Finck } else if (addr != th->address) { 340c2c66affSColin Finck WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); 341c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 342c2c66affSColin Finck return STATUS_CRC_ERROR; 343c2c66affSColin Finck } else if (generation != 0 && generation != th->generation) { 344c2c66affSColin Finck WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation); 345c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 346c2c66affSColin Finck return STATUS_CRC_ERROR; 347c2c66affSColin Finck } 348c2c66affSColin Finck } 349c2c66affSColin Finck } else if (context->csum) { 350c2c66affSColin Finck NTSTATUS Status; 351c2c66affSColin Finck #ifdef DEBUG_STATS 352c2c66affSColin Finck LARGE_INTEGER time1, time2; 353c2c66affSColin Finck 354c2c66affSColin Finck time1 = KeQueryPerformanceCounter(NULL); 355c2c66affSColin Finck #endif 356c2c66affSColin Finck Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 357c2c66affSColin Finck 358c2c66affSColin Finck if (Status == STATUS_CRC_ERROR) { 359c2c66affSColin Finck for (i = 0; i < length / Vcb->superblock.sector_size; i++) { 360c2c66affSColin Finck UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 361c2c66affSColin Finck 362c2c66affSColin Finck if (context->csum[i] != crc32) { 363c2c66affSColin Finck UINT64 off; 364c2c66affSColin Finck UINT16 stripe; 365c2c66affSColin Finck 366c2c66affSColin Finck get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe); 367c2c66affSColin Finck 368c2c66affSColin Finck ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 369c2c66affSColin Finck 370c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 371c2c66affSColin Finck 372c2c66affSColin Finck return Status; 373c2c66affSColin Finck } 374c2c66affSColin Finck } 375c2c66affSColin Finck 376c2c66affSColin Finck return Status; 377c2c66affSColin Finck } else if (!NT_SUCCESS(Status)) { 378c2c66affSColin Finck ERR("check_csum returned %08x\n", Status); 379c2c66affSColin Finck return Status; 380c2c66affSColin Finck } 381c2c66affSColin Finck #ifdef DEBUG_STATS 382c2c66affSColin Finck time2 = KeQueryPerformanceCounter(NULL); 383c2c66affSColin Finck 384c2c66affSColin Finck Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 385c2c66affSColin Finck #endif 386c2c66affSColin Finck } 387c2c66affSColin Finck 388c2c66affSColin Finck return STATUS_SUCCESS; 389c2c66affSColin Finck } 390c2c66affSColin Finck 391c2c66affSColin Finck static NTSTATUS read_data_raid10(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, 392c2c66affSColin Finck CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) { 393c2c66affSColin Finck UINT64 i; 394c2c66affSColin Finck UINT16 j, stripe; 395c2c66affSColin Finck NTSTATUS Status; 396c2c66affSColin Finck BOOL checksum_error = FALSE; 397c2c66affSColin Finck CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 398c2c66affSColin Finck 399c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) { 400c2c66affSColin Finck if (context->stripes[j].status == ReadDataStatus_Error) { 401c2c66affSColin Finck WARN("stripe %llu returned error %08x\n", j, context->stripes[j].iosb.Status); 402c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 403c2c66affSColin Finck return context->stripes[j].iosb.Status; 404c2c66affSColin Finck } else if (context->stripes[j].status == ReadDataStatus_Success) 405c2c66affSColin Finck stripe = j; 406c2c66affSColin Finck } 407c2c66affSColin Finck 408c2c66affSColin Finck if (context->tree) { 409c2c66affSColin Finck tree_header* th = (tree_header*)buf; 410c2c66affSColin Finck UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 411c2c66affSColin Finck 412c2c66affSColin Finck if (crc32 != *((UINT32*)th->csum)) { 413c2c66affSColin Finck WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum)); 414c2c66affSColin Finck checksum_error = TRUE; 415c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 416c2c66affSColin Finck } else if (addr != th->address) { 417c2c66affSColin Finck WARN("address of tree was %llx, not %llx as expected\n", th->address, addr); 418c2c66affSColin Finck checksum_error = TRUE; 419c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 420c2c66affSColin Finck } else if (generation != 0 && generation != th->generation) { 421c2c66affSColin Finck WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation); 422c2c66affSColin Finck checksum_error = TRUE; 423c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 424c2c66affSColin Finck } 425c2c66affSColin Finck } else if (context->csum) { 426c2c66affSColin Finck #ifdef DEBUG_STATS 427c2c66affSColin Finck LARGE_INTEGER time1, time2; 428c2c66affSColin Finck 429c2c66affSColin Finck time1 = KeQueryPerformanceCounter(NULL); 430c2c66affSColin Finck #endif 431c2c66affSColin Finck Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 432c2c66affSColin Finck 433c2c66affSColin Finck if (Status == STATUS_CRC_ERROR) 434c2c66affSColin Finck checksum_error = TRUE; 435c2c66affSColin Finck else if (!NT_SUCCESS(Status)) { 436c2c66affSColin Finck ERR("check_csum returned %08x\n", Status); 437c2c66affSColin Finck return Status; 438c2c66affSColin Finck } 439c2c66affSColin Finck #ifdef DEBUG_STATS 440c2c66affSColin Finck time2 = KeQueryPerformanceCounter(NULL); 441c2c66affSColin Finck 442c2c66affSColin Finck Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 443c2c66affSColin Finck #endif 444c2c66affSColin Finck } 445c2c66affSColin Finck 446c2c66affSColin Finck if (!checksum_error) 447c2c66affSColin Finck return STATUS_SUCCESS; 448c2c66affSColin Finck 449c2c66affSColin Finck if (context->tree) { 450c2c66affSColin Finck tree_header* t2; 451c2c66affSColin Finck UINT64 off; 452c2c66affSColin Finck UINT16 badsubstripe = 0; 453c2c66affSColin Finck BOOL recovered = FALSE; 454c2c66affSColin Finck 455c2c66affSColin Finck t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG); 456c2c66affSColin Finck if (!t2) { 457c2c66affSColin Finck ERR("out of memory\n"); 458c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 459c2c66affSColin Finck } 460c2c66affSColin Finck 461c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe); 462c2c66affSColin Finck 463c2c66affSColin Finck stripe *= ci->sub_stripes; 464c2c66affSColin Finck 465c2c66affSColin Finck for (j = 0; j < ci->sub_stripes; j++) { 466c2c66affSColin Finck if (context->stripes[stripe + j].status == ReadDataStatus_Success) { 467c2c66affSColin Finck badsubstripe = j; 468c2c66affSColin Finck break; 469c2c66affSColin Finck } 470c2c66affSColin Finck } 471c2c66affSColin Finck 472c2c66affSColin Finck for (j = 0; j < ci->sub_stripes; j++) { 473c2c66affSColin Finck if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) { 474c2c66affSColin Finck Status = sync_read_phys(devices[stripe + j]->devobj, cis[stripe + j].offset + off, 475c2c66affSColin Finck Vcb->superblock.node_size, (UINT8*)t2, FALSE); 476c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 477c2c66affSColin Finck WARN("sync_read_phys returned %08x\n", Status); 478c2c66affSColin Finck log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS); 479c2c66affSColin Finck } else { 480c2c66affSColin Finck UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum)); 481c2c66affSColin Finck 482c2c66affSColin Finck if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) { 483c2c66affSColin Finck RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 484c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe + j]->devitem.dev_id); 485c2c66affSColin Finck recovered = TRUE; 486c2c66affSColin Finck 487c2c66affSColin Finck if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad 488c2c66affSColin Finck Status = write_data_phys(devices[stripe + badsubstripe]->devobj, cis[stripe + badsubstripe].offset + off, 489c2c66affSColin Finck t2, Vcb->superblock.node_size); 490c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 491c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 492c2c66affSColin Finck log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 493c2c66affSColin Finck } 494c2c66affSColin Finck } 495c2c66affSColin Finck 496c2c66affSColin Finck break; 497c2c66affSColin Finck } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum)) 498c2c66affSColin Finck log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 499c2c66affSColin Finck else 500c2c66affSColin Finck log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS); 501c2c66affSColin Finck } 502c2c66affSColin Finck } 503c2c66affSColin Finck } 504c2c66affSColin Finck 505c2c66affSColin Finck if (!recovered) { 506c2c66affSColin Finck ERR("unrecoverable checksum error at %llx\n", addr); 507c2c66affSColin Finck ExFreePool(t2); 508c2c66affSColin Finck return STATUS_CRC_ERROR; 509c2c66affSColin Finck } 510c2c66affSColin Finck 511c2c66affSColin Finck ExFreePool(t2); 512c2c66affSColin Finck } else { 513c2c66affSColin Finck ULONG sectors = length / Vcb->superblock.sector_size; 514c2c66affSColin Finck UINT8* sector; 515c2c66affSColin Finck 516c2c66affSColin Finck sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG); 517c2c66affSColin Finck if (!sector) { 518c2c66affSColin Finck ERR("out of memory\n"); 519c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 520c2c66affSColin Finck } 521c2c66affSColin Finck 522c2c66affSColin Finck for (i = 0; i < sectors; i++) { 523c2c66affSColin Finck UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 524c2c66affSColin Finck 525c2c66affSColin Finck if (context->csum[i] != crc32) { 526c2c66affSColin Finck UINT64 off; 527c2c66affSColin Finck UINT16 stripe2, badsubstripe = 0; 528c2c66affSColin Finck BOOL recovered = FALSE; 529c2c66affSColin Finck 530c2c66affSColin Finck get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 531c2c66affSColin Finck ci->num_stripes / ci->sub_stripes, &off, &stripe2); 532c2c66affSColin Finck 533c2c66affSColin Finck stripe2 *= ci->sub_stripes; 534c2c66affSColin Finck 535c2c66affSColin Finck for (j = 0; j < ci->sub_stripes; j++) { 536c2c66affSColin Finck if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) { 537c2c66affSColin Finck badsubstripe = j; 538c2c66affSColin Finck break; 539c2c66affSColin Finck } 540c2c66affSColin Finck } 541c2c66affSColin Finck 542c2c66affSColin Finck log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 543c2c66affSColin Finck 544c2c66affSColin Finck for (j = 0; j < ci->sub_stripes; j++) { 545c2c66affSColin Finck if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) { 546c2c66affSColin Finck Status = sync_read_phys(devices[stripe2 + j]->devobj, cis[stripe2 + j].offset + off, 547c2c66affSColin Finck Vcb->superblock.sector_size, sector, FALSE); 548c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 549c2c66affSColin Finck WARN("sync_read_phys returned %08x\n", Status); 550c2c66affSColin Finck log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS); 551c2c66affSColin Finck } else { 552c2c66affSColin Finck UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 553c2c66affSColin Finck 554c2c66affSColin Finck if (crc32b == context->csum[i]) { 555c2c66affSColin Finck RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 556c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id); 557c2c66affSColin Finck recovered = TRUE; 558c2c66affSColin Finck 559c2c66affSColin Finck if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad 560c2c66affSColin Finck Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, cis[stripe2 + badsubstripe].offset + off, 561c2c66affSColin Finck sector, Vcb->superblock.sector_size); 562c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 563c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 564c2c66affSColin Finck log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS); 565c2c66affSColin Finck } 566c2c66affSColin Finck } 567c2c66affSColin Finck 568c2c66affSColin Finck break; 569c2c66affSColin Finck } else 570c2c66affSColin Finck log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 571c2c66affSColin Finck } 572c2c66affSColin Finck } 573c2c66affSColin Finck } 574c2c66affSColin Finck 575c2c66affSColin Finck if (!recovered) { 576c2c66affSColin Finck ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 577c2c66affSColin Finck ExFreePool(sector); 578c2c66affSColin Finck return STATUS_CRC_ERROR; 579c2c66affSColin Finck } 580c2c66affSColin Finck } 581c2c66affSColin Finck } 582c2c66affSColin Finck 583c2c66affSColin Finck ExFreePool(sector); 584c2c66affSColin Finck } 585c2c66affSColin Finck 586c2c66affSColin Finck return STATUS_SUCCESS; 587c2c66affSColin Finck } 588c2c66affSColin Finck 589c2c66affSColin Finck static NTSTATUS read_data_raid5(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci, 590c2c66affSColin Finck device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) { 591c2c66affSColin Finck ULONG i; 592c2c66affSColin Finck NTSTATUS Status; 593c2c66affSColin Finck BOOL checksum_error = FALSE; 594c2c66affSColin Finck CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 595c2c66affSColin Finck UINT16 j, stripe; 596c2c66affSColin Finck BOOL no_success = TRUE; 597c2c66affSColin Finck 598c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) { 599c2c66affSColin Finck if (context->stripes[j].status == ReadDataStatus_Error) { 600c2c66affSColin Finck WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 601c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 602c2c66affSColin Finck return context->stripes[j].iosb.Status; 603c2c66affSColin Finck } else if (context->stripes[j].status == ReadDataStatus_Success) { 604c2c66affSColin Finck stripe = j; 605c2c66affSColin Finck no_success = FALSE; 606c2c66affSColin Finck } 607c2c66affSColin Finck } 608c2c66affSColin Finck 609c2c66affSColin Finck if (c) { // check partial stripes 610c2c66affSColin Finck LIST_ENTRY* le; 611c2c66affSColin Finck UINT64 ps_length = (ci->num_stripes - 1) * ci->stripe_length; 612c2c66affSColin Finck 613c2c66affSColin Finck ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE); 614c2c66affSColin Finck 615c2c66affSColin Finck le = c->partial_stripes.Flink; 616c2c66affSColin Finck while (le != &c->partial_stripes) { 617c2c66affSColin Finck partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 618c2c66affSColin Finck 619c2c66affSColin Finck if (ps->address + ps_length > addr && ps->address < addr + length) { 620c2c66affSColin Finck ULONG runlength, index; 621c2c66affSColin Finck 622c2c66affSColin Finck runlength = RtlFindFirstRunClear(&ps->bmp, &index); 623c2c66affSColin Finck 624c2c66affSColin Finck while (runlength != 0) { 625c2c66affSColin Finck UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size); 626c2c66affSColin Finck UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size); 627c2c66affSColin Finck UINT64 start = max(runstart, addr); 628c2c66affSColin Finck UINT64 end = min(runend, addr + length); 629c2c66affSColin Finck 630c2c66affSColin Finck if (end > start) 631c2c66affSColin Finck RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 632c2c66affSColin Finck 633c2c66affSColin Finck runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 634c2c66affSColin Finck } 635c2c66affSColin Finck } else if (ps->address >= addr + length) 636c2c66affSColin Finck break; 637c2c66affSColin Finck 638c2c66affSColin Finck le = le->Flink; 639c2c66affSColin Finck } 640c2c66affSColin Finck 641c2c66affSColin Finck ExReleaseResourceLite(&c->partial_stripes_lock); 642c2c66affSColin Finck } 643c2c66affSColin Finck 644c2c66affSColin Finck if (context->tree) { 645c2c66affSColin Finck tree_header* th = (tree_header*)buf; 646c2c66affSColin Finck UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 647c2c66affSColin Finck 648c2c66affSColin Finck if (addr != th->address || crc32 != *((UINT32*)th->csum)) { 649c2c66affSColin Finck checksum_error = TRUE; 650c2c66affSColin Finck if (!no_success && !degraded) 651c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 652c2c66affSColin Finck } else if (generation != 0 && generation != th->generation) { 653c2c66affSColin Finck checksum_error = TRUE; 654c2c66affSColin Finck if (!no_success && !degraded) 655c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 656c2c66affSColin Finck } 657c2c66affSColin Finck } else if (context->csum) { 658c2c66affSColin Finck #ifdef DEBUG_STATS 659c2c66affSColin Finck LARGE_INTEGER time1, time2; 660c2c66affSColin Finck 661c2c66affSColin Finck time1 = KeQueryPerformanceCounter(NULL); 662c2c66affSColin Finck #endif 663c2c66affSColin Finck Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 664c2c66affSColin Finck 665c2c66affSColin Finck if (Status == STATUS_CRC_ERROR) { 666c2c66affSColin Finck if (!degraded) 667c2c66affSColin Finck WARN("checksum error\n"); 668c2c66affSColin Finck checksum_error = TRUE; 669c2c66affSColin Finck } else if (!NT_SUCCESS(Status)) { 670c2c66affSColin Finck ERR("check_csum returned %08x\n", Status); 671c2c66affSColin Finck return Status; 672c2c66affSColin Finck } 673c2c66affSColin Finck 674c2c66affSColin Finck #ifdef DEBUG_STATS 675c2c66affSColin Finck time2 = KeQueryPerformanceCounter(NULL); 676c2c66affSColin Finck 677c2c66affSColin Finck Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 678c2c66affSColin Finck #endif 679c2c66affSColin Finck } else if (degraded) 680c2c66affSColin Finck checksum_error = TRUE; 681c2c66affSColin Finck 682c2c66affSColin Finck if (!checksum_error) 683c2c66affSColin Finck return STATUS_SUCCESS; 684c2c66affSColin Finck 685c2c66affSColin Finck if (context->tree) { 686c2c66affSColin Finck UINT16 parity; 687c2c66affSColin Finck UINT64 off; 688c2c66affSColin Finck BOOL recovered = FALSE, first = TRUE, failed = FALSE; 689c2c66affSColin Finck UINT8* t2; 690c2c66affSColin Finck 691c2c66affSColin Finck t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG); 692c2c66affSColin Finck if (!t2) { 693c2c66affSColin Finck ERR("out of memory\n"); 694c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 695c2c66affSColin Finck } 696c2c66affSColin Finck 697c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe); 698c2c66affSColin Finck 699c2c66affSColin Finck parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 700c2c66affSColin Finck 701c2c66affSColin Finck stripe = (parity + stripe + 1) % ci->num_stripes; 702c2c66affSColin Finck 703c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) { 704c2c66affSColin Finck if (j != stripe) { 705c2c66affSColin Finck if (devices[j] && devices[j]->devobj) { 706c2c66affSColin Finck if (first) { 707c2c66affSColin Finck Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2, FALSE); 708c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 709c2c66affSColin Finck ERR("sync_read_phys returned %08x\n", Status); 710c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 711c2c66affSColin Finck failed = TRUE; 712c2c66affSColin Finck break; 713c2c66affSColin Finck } 714c2c66affSColin Finck 715c2c66affSColin Finck first = FALSE; 716c2c66affSColin Finck } else { 717c2c66affSColin Finck Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, FALSE); 718c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 719c2c66affSColin Finck ERR("sync_read_phys returned %08x\n", Status); 720c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 721c2c66affSColin Finck failed = TRUE; 722c2c66affSColin Finck break; 723c2c66affSColin Finck } 724c2c66affSColin Finck 725c2c66affSColin Finck do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size); 726c2c66affSColin Finck } 727c2c66affSColin Finck } else { 728c2c66affSColin Finck failed = TRUE; 729c2c66affSColin Finck break; 730c2c66affSColin Finck } 731c2c66affSColin Finck } 732c2c66affSColin Finck } 733c2c66affSColin Finck 734c2c66affSColin Finck if (!failed) { 735c2c66affSColin Finck tree_header* t3 = (tree_header*)t2; 736c2c66affSColin Finck UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum)); 737c2c66affSColin Finck 738c2c66affSColin Finck if (t3->address == addr && crc32 == *((UINT32*)t3->csum) && (generation == 0 || t3->generation == generation)) { 739c2c66affSColin Finck RtlCopyMemory(buf, t2, Vcb->superblock.node_size); 740c2c66affSColin Finck 741c2c66affSColin Finck if (!degraded) 742c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id); 743c2c66affSColin Finck 744c2c66affSColin Finck recovered = TRUE; 745c2c66affSColin Finck 746c2c66affSColin Finck if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 747c2c66affSColin Finck Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size); 748c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 749c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 750c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 751c2c66affSColin Finck } 752c2c66affSColin Finck } 753c2c66affSColin Finck } 754c2c66affSColin Finck } 755c2c66affSColin Finck 756c2c66affSColin Finck if (!recovered) { 757c2c66affSColin Finck ERR("unrecoverable checksum error at %llx\n", addr); 758c2c66affSColin Finck ExFreePool(t2); 759c2c66affSColin Finck return STATUS_CRC_ERROR; 760c2c66affSColin Finck } 761c2c66affSColin Finck 762c2c66affSColin Finck ExFreePool(t2); 763c2c66affSColin Finck } else { 764c2c66affSColin Finck ULONG sectors = length / Vcb->superblock.sector_size; 765c2c66affSColin Finck UINT8* sector; 766c2c66affSColin Finck 767c2c66affSColin Finck sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG); 768c2c66affSColin Finck if (!sector) { 769c2c66affSColin Finck ERR("out of memory\n"); 770c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 771c2c66affSColin Finck } 772c2c66affSColin Finck 773c2c66affSColin Finck for (i = 0; i < sectors; i++) { 774c2c66affSColin Finck UINT16 parity; 775c2c66affSColin Finck UINT64 off; 776c2c66affSColin Finck UINT32 crc32; 777c2c66affSColin Finck 778c2c66affSColin Finck if (context->csum) 779c2c66affSColin Finck crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 780c2c66affSColin Finck 781c2c66affSColin Finck get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 782c2c66affSColin Finck ci->num_stripes - 1, &off, &stripe); 783c2c66affSColin Finck 784c2c66affSColin Finck parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 785c2c66affSColin Finck 786c2c66affSColin Finck stripe = (parity + stripe + 1) % ci->num_stripes; 787c2c66affSColin Finck 788c2c66affSColin Finck if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) { 789c2c66affSColin Finck BOOL recovered = FALSE, first = TRUE, failed = FALSE; 790c2c66affSColin Finck 791c2c66affSColin Finck if (devices[stripe] && devices[stripe]->devobj) 792c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS); 793c2c66affSColin Finck 794c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) { 795c2c66affSColin Finck if (j != stripe) { 796c2c66affSColin Finck if (devices[j] && devices[j]->devobj) { 797c2c66affSColin Finck if (first) { 798c2c66affSColin Finck Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, FALSE); 799c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 800c2c66affSColin Finck ERR("sync_read_phys returned %08x\n", Status); 801c2c66affSColin Finck failed = TRUE; 802c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 803c2c66affSColin Finck break; 804c2c66affSColin Finck } 805c2c66affSColin Finck 806c2c66affSColin Finck first = FALSE; 807c2c66affSColin Finck } else { 808c2c66affSColin Finck Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + Vcb->superblock.sector_size, FALSE); 809c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 810c2c66affSColin Finck ERR("sync_read_phys returned %08x\n", Status); 811c2c66affSColin Finck failed = TRUE; 812c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 813c2c66affSColin Finck break; 814c2c66affSColin Finck } 815c2c66affSColin Finck 816c2c66affSColin Finck do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size); 817c2c66affSColin Finck } 818c2c66affSColin Finck } else { 819c2c66affSColin Finck failed = TRUE; 820c2c66affSColin Finck break; 821c2c66affSColin Finck } 822c2c66affSColin Finck } 823c2c66affSColin Finck } 824c2c66affSColin Finck 825c2c66affSColin Finck if (!failed) { 826c2c66affSColin Finck if (context->csum) 827c2c66affSColin Finck crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size); 828c2c66affSColin Finck 829c2c66affSColin Finck if (!context->csum || crc32 == context->csum[i]) { 830c2c66affSColin Finck RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size); 831c2c66affSColin Finck 832c2c66affSColin Finck if (!degraded) 833c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id); 834c2c66affSColin Finck 835c2c66affSColin Finck recovered = TRUE; 836c2c66affSColin Finck 837c2c66affSColin Finck if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad 838c2c66affSColin Finck Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, 839c2c66affSColin Finck sector, Vcb->superblock.sector_size); 840c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 841c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 842c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS); 843c2c66affSColin Finck } 844c2c66affSColin Finck } 845c2c66affSColin Finck } 846c2c66affSColin Finck } 847c2c66affSColin Finck 848c2c66affSColin Finck if (!recovered) { 849c2c66affSColin Finck ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 850c2c66affSColin Finck ExFreePool(sector); 851c2c66affSColin Finck return STATUS_CRC_ERROR; 852c2c66affSColin Finck } 853c2c66affSColin Finck } 854c2c66affSColin Finck } 855c2c66affSColin Finck 856c2c66affSColin Finck ExFreePool(sector); 857c2c66affSColin Finck } 858c2c66affSColin Finck 859c2c66affSColin Finck return STATUS_SUCCESS; 860c2c66affSColin Finck } 861c2c66affSColin Finck 862c2c66affSColin Finck void raid6_recover2(UINT8* sectors, UINT16 num_stripes, ULONG sector_size, UINT16 missing1, UINT16 missing2, UINT8* out) { 863c2c66affSColin Finck if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data 864c2c66affSColin Finck UINT16 missing = missing1 == (num_stripes - 2) ? missing2 : missing1; 865c2c66affSColin Finck UINT16 stripe; 866c2c66affSColin Finck 867c2c66affSColin Finck stripe = num_stripes - 3; 868c2c66affSColin Finck 869c2c66affSColin Finck if (stripe == missing) 870c2c66affSColin Finck RtlZeroMemory(out, sector_size); 871c2c66affSColin Finck else 872c2c66affSColin Finck RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size); 873c2c66affSColin Finck 874c2c66affSColin Finck do { 875c2c66affSColin Finck stripe--; 876c2c66affSColin Finck 877c2c66affSColin Finck galois_double(out, sector_size); 878c2c66affSColin Finck 879c2c66affSColin Finck if (stripe != missing) 880c2c66affSColin Finck do_xor(out, sectors + (stripe * sector_size), sector_size); 881c2c66affSColin Finck } while (stripe > 0); 882c2c66affSColin Finck 883c2c66affSColin Finck do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size); 884c2c66affSColin Finck 885c2c66affSColin Finck if (missing != 0) 886c2c66affSColin Finck galois_divpower(out, (UINT8)missing, sector_size); 887c2c66affSColin Finck } else { // reconstruct from p and q 888c2c66affSColin Finck UINT16 x, y, stripe; 889c2c66affSColin Finck UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy; 890c2c66affSColin Finck UINT32 j; 891c2c66affSColin Finck 892c2c66affSColin Finck stripe = num_stripes - 3; 893c2c66affSColin Finck 894c2c66affSColin Finck pxy = out + sector_size; 895c2c66affSColin Finck qxy = out; 896c2c66affSColin Finck 897c2c66affSColin Finck if (stripe == missing1 || stripe == missing2) { 898c2c66affSColin Finck RtlZeroMemory(qxy, sector_size); 899c2c66affSColin Finck RtlZeroMemory(pxy, sector_size); 900c2c66affSColin Finck 901c2c66affSColin Finck if (stripe == missing1) 902c2c66affSColin Finck x = stripe; 903c2c66affSColin Finck else 904c2c66affSColin Finck y = stripe; 905c2c66affSColin Finck } else { 906c2c66affSColin Finck RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size); 907c2c66affSColin Finck RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size); 908c2c66affSColin Finck } 909c2c66affSColin Finck 910c2c66affSColin Finck do { 911c2c66affSColin Finck stripe--; 912c2c66affSColin Finck 913c2c66affSColin Finck galois_double(qxy, sector_size); 914c2c66affSColin Finck 915c2c66affSColin Finck if (stripe != missing1 && stripe != missing2) { 916c2c66affSColin Finck do_xor(qxy, sectors + (stripe * sector_size), sector_size); 917c2c66affSColin Finck do_xor(pxy, sectors + (stripe * sector_size), sector_size); 918c2c66affSColin Finck } else if (stripe == missing1) 919c2c66affSColin Finck x = stripe; 920c2c66affSColin Finck else if (stripe == missing2) 921c2c66affSColin Finck y = stripe; 922c2c66affSColin Finck } while (stripe > 0); 923c2c66affSColin Finck 924c2c66affSColin Finck gyx = gpow2(y > x ? (y-x) : (255-x+y)); 925c2c66affSColin Finck gx = gpow2(255-x); 926c2c66affSColin Finck 927c2c66affSColin Finck denom = gdiv(1, gyx ^ 1); 928c2c66affSColin Finck a = gmul(gyx, denom); 929c2c66affSColin Finck b = gmul(gx, denom); 930c2c66affSColin Finck 931c2c66affSColin Finck p = sectors + ((num_stripes - 2) * sector_size); 932c2c66affSColin Finck q = sectors + ((num_stripes - 1) * sector_size); 933c2c66affSColin Finck 934c2c66affSColin Finck for (j = 0; j < sector_size; j++) { 935c2c66affSColin Finck *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy); 936c2c66affSColin Finck 937c2c66affSColin Finck p++; 938c2c66affSColin Finck q++; 939c2c66affSColin Finck pxy++; 940c2c66affSColin Finck qxy++; 941c2c66affSColin Finck } 942c2c66affSColin Finck 943c2c66affSColin Finck do_xor(out + sector_size, out, sector_size); 944c2c66affSColin Finck do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size); 945c2c66affSColin Finck } 946c2c66affSColin Finck } 947c2c66affSColin Finck 948c2c66affSColin Finck static NTSTATUS read_data_raid6(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci, 949c2c66affSColin Finck device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) { 950c2c66affSColin Finck NTSTATUS Status; 951c2c66affSColin Finck ULONG i; 952c2c66affSColin Finck BOOL checksum_error = FALSE; 953c2c66affSColin Finck CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 954c2c66affSColin Finck UINT16 stripe, j; 955c2c66affSColin Finck BOOL no_success = TRUE; 956c2c66affSColin Finck 957c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) { 958c2c66affSColin Finck if (context->stripes[j].status == ReadDataStatus_Error) { 959c2c66affSColin Finck WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status); 960c2c66affSColin Finck 961c2c66affSColin Finck if (devices[j]) 962c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 963c2c66affSColin Finck return context->stripes[j].iosb.Status; 964c2c66affSColin Finck } else if (context->stripes[j].status == ReadDataStatus_Success) { 965c2c66affSColin Finck stripe = j; 966c2c66affSColin Finck no_success = FALSE; 967c2c66affSColin Finck } 968c2c66affSColin Finck } 969c2c66affSColin Finck 970c2c66affSColin Finck if (c) { // check partial stripes 971c2c66affSColin Finck LIST_ENTRY* le; 972c2c66affSColin Finck UINT64 ps_length = (ci->num_stripes - 2) * ci->stripe_length; 973c2c66affSColin Finck 974c2c66affSColin Finck ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE); 975c2c66affSColin Finck 976c2c66affSColin Finck le = c->partial_stripes.Flink; 977c2c66affSColin Finck while (le != &c->partial_stripes) { 978c2c66affSColin Finck partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry); 979c2c66affSColin Finck 980c2c66affSColin Finck if (ps->address + ps_length > addr && ps->address < addr + length) { 981c2c66affSColin Finck ULONG runlength, index; 982c2c66affSColin Finck 983c2c66affSColin Finck runlength = RtlFindFirstRunClear(&ps->bmp, &index); 984c2c66affSColin Finck 985c2c66affSColin Finck while (runlength != 0) { 986c2c66affSColin Finck UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size); 987c2c66affSColin Finck UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size); 988c2c66affSColin Finck UINT64 start = max(runstart, addr); 989c2c66affSColin Finck UINT64 end = min(runend, addr + length); 990c2c66affSColin Finck 991c2c66affSColin Finck if (end > start) 992c2c66affSColin Finck RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start)); 993c2c66affSColin Finck 994c2c66affSColin Finck runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index); 995c2c66affSColin Finck } 996c2c66affSColin Finck } else if (ps->address >= addr + length) 997c2c66affSColin Finck break; 998c2c66affSColin Finck 999c2c66affSColin Finck le = le->Flink; 1000c2c66affSColin Finck } 1001c2c66affSColin Finck 1002c2c66affSColin Finck ExReleaseResourceLite(&c->partial_stripes_lock); 1003c2c66affSColin Finck } 1004c2c66affSColin Finck 1005c2c66affSColin Finck if (context->tree) { 1006c2c66affSColin Finck tree_header* th = (tree_header*)buf; 1007c2c66affSColin Finck UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1008c2c66affSColin Finck 1009c2c66affSColin Finck if (addr != th->address || crc32 != *((UINT32*)th->csum)) { 1010c2c66affSColin Finck checksum_error = TRUE; 1011c2c66affSColin Finck if (!no_success && !degraded && devices[stripe]) 1012c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1013c2c66affSColin Finck } else if (generation != 0 && generation != th->generation) { 1014c2c66affSColin Finck checksum_error = TRUE; 1015c2c66affSColin Finck if (!no_success && !degraded && devices[stripe]) 1016c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS); 1017c2c66affSColin Finck } 1018c2c66affSColin Finck } else if (context->csum) { 1019c2c66affSColin Finck #ifdef DEBUG_STATS 1020c2c66affSColin Finck LARGE_INTEGER time1, time2; 1021c2c66affSColin Finck 1022c2c66affSColin Finck time1 = KeQueryPerformanceCounter(NULL); 1023c2c66affSColin Finck #endif 1024c2c66affSColin Finck Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum); 1025c2c66affSColin Finck 1026c2c66affSColin Finck if (Status == STATUS_CRC_ERROR) { 1027c2c66affSColin Finck if (!degraded) 1028c2c66affSColin Finck WARN("checksum error\n"); 1029c2c66affSColin Finck checksum_error = TRUE; 1030c2c66affSColin Finck } else if (!NT_SUCCESS(Status)) { 1031c2c66affSColin Finck ERR("check_csum returned %08x\n", Status); 1032c2c66affSColin Finck return Status; 1033c2c66affSColin Finck } 1034c2c66affSColin Finck #ifdef DEBUG_STATS 1035c2c66affSColin Finck time2 = KeQueryPerformanceCounter(NULL); 1036c2c66affSColin Finck 1037c2c66affSColin Finck Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart; 1038c2c66affSColin Finck #endif 1039c2c66affSColin Finck } else if (degraded) 1040c2c66affSColin Finck checksum_error = TRUE; 1041c2c66affSColin Finck 1042c2c66affSColin Finck if (!checksum_error) 1043c2c66affSColin Finck return STATUS_SUCCESS; 1044c2c66affSColin Finck 1045c2c66affSColin Finck if (context->tree) { 1046c2c66affSColin Finck UINT8* sector; 1047c2c66affSColin Finck UINT16 k, physstripe, parity1, parity2, error_stripe; 1048c2c66affSColin Finck UINT64 off; 1049c2c66affSColin Finck BOOL recovered = FALSE, failed = FALSE; 1050c2c66affSColin Finck ULONG num_errors = 0; 1051c2c66affSColin Finck 1052c2c66affSColin Finck sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG); 1053c2c66affSColin Finck if (!sector) { 1054c2c66affSColin Finck ERR("out of memory\n"); 1055c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 1056c2c66affSColin Finck } 1057c2c66affSColin Finck 1058c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe); 1059c2c66affSColin Finck 1060c2c66affSColin Finck parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1061c2c66affSColin Finck parity2 = (parity1 + 1) % ci->num_stripes; 1062c2c66affSColin Finck 1063c2c66affSColin Finck physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1064c2c66affSColin Finck 1065c2c66affSColin Finck j = (parity2 + 1) % ci->num_stripes; 1066c2c66affSColin Finck 1067c2c66affSColin Finck for (k = 0; k < ci->num_stripes - 1; k++) { 1068c2c66affSColin Finck if (j != physstripe) { 1069c2c66affSColin Finck if (devices[j] && devices[j]->devobj) { 1070c2c66affSColin Finck Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, sector + (k * Vcb->superblock.node_size), FALSE); 1071c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1072c2c66affSColin Finck ERR("sync_read_phys returned %08x\n", Status); 1073c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1074c2c66affSColin Finck num_errors++; 1075c2c66affSColin Finck error_stripe = k; 1076c2c66affSColin Finck 1077c2c66affSColin Finck if (num_errors > 1) { 1078c2c66affSColin Finck failed = TRUE; 1079c2c66affSColin Finck break; 1080c2c66affSColin Finck } 1081c2c66affSColin Finck } 1082c2c66affSColin Finck } else { 1083c2c66affSColin Finck num_errors++; 1084c2c66affSColin Finck error_stripe = k; 1085c2c66affSColin Finck 1086c2c66affSColin Finck if (num_errors > 1) { 1087c2c66affSColin Finck failed = TRUE; 1088c2c66affSColin Finck break; 1089c2c66affSColin Finck } 1090c2c66affSColin Finck } 1091c2c66affSColin Finck } 1092c2c66affSColin Finck 1093c2c66affSColin Finck j = (j + 1) % ci->num_stripes; 1094c2c66affSColin Finck } 1095c2c66affSColin Finck 1096c2c66affSColin Finck if (!failed) { 1097c2c66affSColin Finck if (num_errors == 0) { 1098c2c66affSColin Finck tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size)); 1099c2c66affSColin Finck UINT32 crc32; 1100c2c66affSColin Finck 1101c2c66affSColin Finck RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), 1102c2c66affSColin Finck Vcb->superblock.node_size); 1103c2c66affSColin Finck 1104c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 2; j++) { 1105c2c66affSColin Finck if (j != stripe) 1106c2c66affSColin Finck do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size); 1107c2c66affSColin Finck } 1108c2c66affSColin Finck 1109c2c66affSColin Finck crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1110c2c66affSColin Finck 1111c2c66affSColin Finck if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) { 1112c2c66affSColin Finck RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1113c2c66affSColin Finck 1114c2c66affSColin Finck if (devices[physstripe] && devices[physstripe]->devobj) 1115c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id); 1116c2c66affSColin Finck 1117c2c66affSColin Finck recovered = TRUE; 1118c2c66affSColin Finck 1119c2c66affSColin Finck if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1120c2c66affSColin Finck Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1121c2c66affSColin Finck sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1122c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1123c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 1124c2c66affSColin Finck log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1125c2c66affSColin Finck } 1126c2c66affSColin Finck } 1127c2c66affSColin Finck } 1128c2c66affSColin Finck } 1129c2c66affSColin Finck 1130c2c66affSColin Finck if (!recovered) { 1131c2c66affSColin Finck UINT32 crc32; 1132c2c66affSColin Finck tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size)); 1133c2c66affSColin Finck BOOL read_q = FALSE; 1134c2c66affSColin Finck 1135c2c66affSColin Finck if (devices[parity2] && devices[parity2]->devobj) { 1136c2c66affSColin Finck Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off, 1137c2c66affSColin Finck Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), FALSE); 1138c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1139c2c66affSColin Finck ERR("sync_read_phys returned %08x\n", Status); 1140c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1141c2c66affSColin Finck } else 1142c2c66affSColin Finck read_q = TRUE; 1143c2c66affSColin Finck } 1144c2c66affSColin Finck 1145c2c66affSColin Finck if (read_q) { 1146c2c66affSColin Finck if (num_errors == 1) { 1147c2c66affSColin Finck raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1148c2c66affSColin Finck 1149c2c66affSColin Finck crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1150c2c66affSColin Finck 1151c2c66affSColin Finck if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) 1152c2c66affSColin Finck recovered = TRUE; 1153c2c66affSColin Finck } else { 1154c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 1; j++) { 1155c2c66affSColin Finck if (j != stripe) { 1156c2c66affSColin Finck raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size)); 1157c2c66affSColin Finck 1158c2c66affSColin Finck crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum)); 1159c2c66affSColin Finck 1160c2c66affSColin Finck if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) { 1161c2c66affSColin Finck recovered = TRUE; 1162c2c66affSColin Finck error_stripe = j; 1163c2c66affSColin Finck break; 1164c2c66affSColin Finck } 1165c2c66affSColin Finck } 1166c2c66affSColin Finck } 1167c2c66affSColin Finck } 1168c2c66affSColin Finck } 1169c2c66affSColin Finck 1170c2c66affSColin Finck if (recovered) { 1171c2c66affSColin Finck UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1172c2c66affSColin Finck 1173c2c66affSColin Finck if (devices[physstripe] && devices[physstripe]->devobj) 1174c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id); 1175c2c66affSColin Finck 1176c2c66affSColin Finck RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1177c2c66affSColin Finck 1178c2c66affSColin Finck if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1179c2c66affSColin Finck Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1180c2c66affSColin Finck sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size); 1181c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1182c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 1183c2c66affSColin Finck log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1184c2c66affSColin Finck } 1185c2c66affSColin Finck } 1186c2c66affSColin Finck 1187c2c66affSColin Finck if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1188c2c66affSColin Finck if (error_stripe == ci->num_stripes - 2) { 1189c2c66affSColin Finck ERR("recovering from parity error at %llx, device %llx\n", addr, devices[error_stripe_phys]->devitem.dev_id); 1190c2c66affSColin Finck 1191c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1192c2c66affSColin Finck 1193c2c66affSColin Finck RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1194c2c66affSColin Finck 1195c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 2; j++) { 1196c2c66affSColin Finck if (j == stripe) { 1197c2c66affSColin Finck do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size), 1198c2c66affSColin Finck Vcb->superblock.node_size); 1199c2c66affSColin Finck } else { 1200c2c66affSColin Finck do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), 1201c2c66affSColin Finck Vcb->superblock.node_size); 1202c2c66affSColin Finck } 1203c2c66affSColin Finck } 1204c2c66affSColin Finck } else { 1205c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", addr + ((error_stripe - stripe) * ci->stripe_length), 1206c2c66affSColin Finck devices[error_stripe_phys]->devitem.dev_id); 1207c2c66affSColin Finck 1208c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1209c2c66affSColin Finck 1210c2c66affSColin Finck RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size), 1211c2c66affSColin Finck sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size); 1212c2c66affSColin Finck } 1213c2c66affSColin Finck } 1214c2c66affSColin Finck 1215c2c66affSColin Finck if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1216c2c66affSColin Finck Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off, 1217c2c66affSColin Finck sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size); 1218c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1219c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 1220c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1221c2c66affSColin Finck } 1222c2c66affSColin Finck } 1223c2c66affSColin Finck } 1224c2c66affSColin Finck } 1225c2c66affSColin Finck } 1226c2c66affSColin Finck 1227c2c66affSColin Finck if (!recovered) { 1228c2c66affSColin Finck ERR("unrecoverable checksum error at %llx\n", addr); 1229c2c66affSColin Finck ExFreePool(sector); 1230c2c66affSColin Finck return STATUS_CRC_ERROR; 1231c2c66affSColin Finck } 1232c2c66affSColin Finck 1233c2c66affSColin Finck ExFreePool(sector); 1234c2c66affSColin Finck } else { 1235c2c66affSColin Finck ULONG sectors = length / Vcb->superblock.sector_size; 1236c2c66affSColin Finck UINT8* sector; 1237c2c66affSColin Finck 1238c2c66affSColin Finck sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG); 1239c2c66affSColin Finck if (!sector) { 1240c2c66affSColin Finck ERR("out of memory\n"); 1241c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 1242c2c66affSColin Finck } 1243c2c66affSColin Finck 1244c2c66affSColin Finck for (i = 0; i < sectors; i++) { 1245c2c66affSColin Finck UINT64 off; 1246c2c66affSColin Finck UINT16 physstripe, parity1, parity2; 1247c2c66affSColin Finck UINT32 crc32; 1248c2c66affSColin Finck 1249c2c66affSColin Finck if (context->csum) 1250c2c66affSColin Finck crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1251c2c66affSColin Finck 1252c2c66affSColin Finck get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, 1253c2c66affSColin Finck ci->num_stripes - 2, &off, &stripe); 1254c2c66affSColin Finck 1255c2c66affSColin Finck parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 1256c2c66affSColin Finck parity2 = (parity1 + 1) % ci->num_stripes; 1257c2c66affSColin Finck 1258c2c66affSColin Finck physstripe = (parity2 + stripe + 1) % ci->num_stripes; 1259c2c66affSColin Finck 1260c2c66affSColin Finck if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) { 1261c2c66affSColin Finck UINT16 k, error_stripe; 1262c2c66affSColin Finck BOOL recovered = FALSE, failed = FALSE; 1263c2c66affSColin Finck ULONG num_errors = 0; 1264c2c66affSColin Finck 1265c2c66affSColin Finck if (devices[physstripe] && devices[physstripe]->devobj) 1266c2c66affSColin Finck log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS); 1267c2c66affSColin Finck 1268c2c66affSColin Finck j = (parity2 + 1) % ci->num_stripes; 1269c2c66affSColin Finck 1270c2c66affSColin Finck for (k = 0; k < ci->num_stripes - 1; k++) { 1271c2c66affSColin Finck if (j != physstripe) { 1272c2c66affSColin Finck if (devices[j] && devices[j]->devobj) { 1273c2c66affSColin Finck Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + (k * Vcb->superblock.sector_size), FALSE); 1274c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1275c2c66affSColin Finck ERR("sync_read_phys returned %08x\n", Status); 1276c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS); 1277c2c66affSColin Finck num_errors++; 1278c2c66affSColin Finck error_stripe = k; 1279c2c66affSColin Finck 1280c2c66affSColin Finck if (num_errors > 1) { 1281c2c66affSColin Finck failed = TRUE; 1282c2c66affSColin Finck break; 1283c2c66affSColin Finck } 1284c2c66affSColin Finck } 1285c2c66affSColin Finck } else { 1286c2c66affSColin Finck num_errors++; 1287c2c66affSColin Finck error_stripe = k; 1288c2c66affSColin Finck 1289c2c66affSColin Finck if (num_errors > 1) { 1290c2c66affSColin Finck failed = TRUE; 1291c2c66affSColin Finck break; 1292c2c66affSColin Finck } 1293c2c66affSColin Finck } 1294c2c66affSColin Finck } 1295c2c66affSColin Finck 1296c2c66affSColin Finck j = (j + 1) % ci->num_stripes; 1297c2c66affSColin Finck } 1298c2c66affSColin Finck 1299c2c66affSColin Finck if (!failed) { 1300c2c66affSColin Finck if (num_errors == 0) { 1301c2c66affSColin Finck RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1302c2c66affSColin Finck 1303c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 2; j++) { 1304c2c66affSColin Finck if (j != stripe) 1305c2c66affSColin Finck do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1306c2c66affSColin Finck } 1307c2c66affSColin Finck 1308c2c66affSColin Finck if (context->csum) 1309c2c66affSColin Finck crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1310c2c66affSColin Finck 1311c2c66affSColin Finck if (!context->csum || crc32 == context->csum[i]) { 1312c2c66affSColin Finck RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1313c2c66affSColin Finck 1314c2c66affSColin Finck if (devices[physstripe] && devices[physstripe]->devobj) 1315c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), 1316c2c66affSColin Finck devices[physstripe]->devitem.dev_id); 1317c2c66affSColin Finck 1318c2c66affSColin Finck recovered = TRUE; 1319c2c66affSColin Finck 1320c2c66affSColin Finck if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1321c2c66affSColin Finck Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1322c2c66affSColin Finck sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1323c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1324c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 1325c2c66affSColin Finck log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1326c2c66affSColin Finck } 1327c2c66affSColin Finck } 1328c2c66affSColin Finck } 1329c2c66affSColin Finck } 1330c2c66affSColin Finck 1331c2c66affSColin Finck if (!recovered) { 1332c2c66affSColin Finck BOOL read_q = FALSE; 1333c2c66affSColin Finck 1334c2c66affSColin Finck if (devices[parity2] && devices[parity2]->devobj) { 1335c2c66affSColin Finck Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off, 1336c2c66affSColin Finck Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), FALSE); 1337c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1338c2c66affSColin Finck ERR("sync_read_phys returned %08x\n", Status); 1339c2c66affSColin Finck log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS); 1340c2c66affSColin Finck } else 1341c2c66affSColin Finck read_q = TRUE; 1342c2c66affSColin Finck } 1343c2c66affSColin Finck 1344c2c66affSColin Finck if (read_q) { 1345c2c66affSColin Finck if (num_errors == 1) { 1346c2c66affSColin Finck raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size)); 1347c2c66affSColin Finck 1348c2c66affSColin Finck if (!devices[physstripe] || !devices[physstripe]->devobj) 1349c2c66affSColin Finck recovered = TRUE; 1350c2c66affSColin Finck else { 1351c2c66affSColin Finck crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1352c2c66affSColin Finck 1353c2c66affSColin Finck if (crc32 == context->csum[i]) 1354c2c66affSColin Finck recovered = TRUE; 1355c2c66affSColin Finck } 1356c2c66affSColin Finck } else { 1357c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 1; j++) { 1358c2c66affSColin Finck if (j != stripe) { 1359c2c66affSColin Finck raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size)); 1360c2c66affSColin Finck 1361c2c66affSColin Finck crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1362c2c66affSColin Finck 1363c2c66affSColin Finck if (crc32 == context->csum[i]) { 1364c2c66affSColin Finck recovered = TRUE; 1365c2c66affSColin Finck error_stripe = j; 1366c2c66affSColin Finck break; 1367c2c66affSColin Finck } 1368c2c66affSColin Finck } 1369c2c66affSColin Finck } 1370c2c66affSColin Finck } 1371c2c66affSColin Finck } 1372c2c66affSColin Finck 1373c2c66affSColin Finck if (recovered) { 1374c2c66affSColin Finck UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes; 1375c2c66affSColin Finck 1376c2c66affSColin Finck if (devices[physstripe] && devices[physstripe]->devobj) 1377c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", 1378c2c66affSColin Finck addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id); 1379c2c66affSColin Finck 1380c2c66affSColin Finck RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1381c2c66affSColin Finck 1382c2c66affSColin Finck if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad 1383c2c66affSColin Finck Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off, 1384c2c66affSColin Finck sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1385c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1386c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 1387c2c66affSColin Finck log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS); 1388c2c66affSColin Finck } 1389c2c66affSColin Finck } 1390c2c66affSColin Finck 1391c2c66affSColin Finck if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) { 1392c2c66affSColin Finck if (error_stripe == ci->num_stripes - 2) { 1393c2c66affSColin Finck ERR("recovering from parity error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), 1394c2c66affSColin Finck devices[error_stripe_phys]->devitem.dev_id); 1395c2c66affSColin Finck 1396c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1397c2c66affSColin Finck 1398c2c66affSColin Finck RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1399c2c66affSColin Finck 1400c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 2; j++) { 1401c2c66affSColin Finck if (j == stripe) { 1402c2c66affSColin Finck do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), 1403c2c66affSColin Finck Vcb->superblock.sector_size); 1404c2c66affSColin Finck } else { 1405c2c66affSColin Finck do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), 1406c2c66affSColin Finck Vcb->superblock.sector_size); 1407c2c66affSColin Finck } 1408c2c66affSColin Finck } 1409c2c66affSColin Finck } else { 1410c2c66affSColin Finck ERR("recovering from checksum error at %llx, device %llx\n", 1411c2c66affSColin Finck addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length), 1412c2c66affSColin Finck devices[error_stripe_phys]->devitem.dev_id); 1413c2c66affSColin Finck 1414c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS); 1415c2c66affSColin Finck 1416c2c66affSColin Finck RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size), 1417c2c66affSColin Finck sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1418c2c66affSColin Finck } 1419c2c66affSColin Finck } 1420c2c66affSColin Finck 1421c2c66affSColin Finck if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad 1422c2c66affSColin Finck Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off, 1423c2c66affSColin Finck sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size); 1424c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1425c2c66affSColin Finck WARN("write_data_phys returned %08x\n", Status); 1426c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS); 1427c2c66affSColin Finck } 1428c2c66affSColin Finck } 1429c2c66affSColin Finck } 1430c2c66affSColin Finck } 1431c2c66affSColin Finck } 1432c2c66affSColin Finck 1433c2c66affSColin Finck if (!recovered) { 1434c2c66affSColin Finck ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size)); 1435c2c66affSColin Finck ExFreePool(sector); 1436c2c66affSColin Finck return STATUS_CRC_ERROR; 1437c2c66affSColin Finck } 1438c2c66affSColin Finck } 1439c2c66affSColin Finck } 1440c2c66affSColin Finck 1441c2c66affSColin Finck ExFreePool(sector); 1442c2c66affSColin Finck } 1443c2c66affSColin Finck 1444c2c66affSColin Finck return STATUS_SUCCESS; 1445c2c66affSColin Finck } 1446c2c66affSColin Finck 1447c2c66affSColin Finck NTSTATUS read_data(_In_ device_extension* Vcb, _In_ UINT64 addr, _In_ UINT32 length, _In_reads_bytes_opt_(length*sizeof(UINT32)/Vcb->superblock.sector_size) UINT32* csum, 1448c2c66affSColin Finck _In_ BOOL is_tree, _Out_writes_bytes_(length) UINT8* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ UINT64 generation, _In_ BOOL file_read, 1449c2c66affSColin Finck _In_ ULONG priority) { 1450c2c66affSColin Finck CHUNK_ITEM* ci; 1451c2c66affSColin Finck CHUNK_ITEM_STRIPE* cis; 1452c2c66affSColin Finck read_data_context context; 1453c2c66affSColin Finck UINT64 type, offset, total_reading = 0; 1454c2c66affSColin Finck NTSTATUS Status; 1455c2c66affSColin Finck device** devices = NULL; 1456c2c66affSColin Finck UINT16 i, startoffstripe, allowed_missing, missing_devices = 0; 1457c2c66affSColin Finck UINT8* dummypage = NULL; 1458c2c66affSColin Finck PMDL dummy_mdl = NULL; 1459c2c66affSColin Finck BOOL need_to_wait; 1460c2c66affSColin Finck UINT64 lockaddr, locklen; 1461c2c66affSColin Finck #ifdef DEBUG_STATS 1462c2c66affSColin Finck LARGE_INTEGER time1, time2; 1463c2c66affSColin Finck #endif 1464c2c66affSColin Finck 1465c2c66affSColin Finck if (Vcb->log_to_phys_loaded) { 1466c2c66affSColin Finck if (!c) { 1467c2c66affSColin Finck c = get_chunk_from_address(Vcb, addr); 1468c2c66affSColin Finck 1469c2c66affSColin Finck if (!c) { 1470c2c66affSColin Finck ERR("get_chunk_from_address failed\n"); 1471c2c66affSColin Finck return STATUS_INTERNAL_ERROR; 1472c2c66affSColin Finck } 1473c2c66affSColin Finck } 1474c2c66affSColin Finck 1475c2c66affSColin Finck ci = c->chunk_item; 1476c2c66affSColin Finck offset = c->offset; 1477c2c66affSColin Finck devices = c->devices; 1478c2c66affSColin Finck 1479c2c66affSColin Finck if (pc) 1480c2c66affSColin Finck *pc = c; 1481c2c66affSColin Finck } else { 1482c2c66affSColin Finck LIST_ENTRY* le = Vcb->sys_chunks.Flink; 1483c2c66affSColin Finck 1484c2c66affSColin Finck ci = NULL; 1485c2c66affSColin Finck 1486c2c66affSColin Finck c = NULL; 1487c2c66affSColin Finck while (le != &Vcb->sys_chunks) { 1488c2c66affSColin Finck sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry); 1489c2c66affSColin Finck 1490c2c66affSColin Finck if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) { 1491c2c66affSColin Finck CHUNK_ITEM* chunk_item = sc->data; 1492c2c66affSColin Finck 1493c2c66affSColin Finck if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) { 1494c2c66affSColin Finck ci = chunk_item; 1495c2c66affSColin Finck offset = sc->key.offset; 1496c2c66affSColin Finck cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1]; 1497c2c66affSColin Finck 1498c2c66affSColin Finck devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG); 1499c2c66affSColin Finck if (!devices) { 1500c2c66affSColin Finck ERR("out of memory\n"); 1501c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 1502c2c66affSColin Finck } 1503c2c66affSColin Finck 1504c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 1505c2c66affSColin Finck devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid); 1506c2c66affSColin Finck } 1507c2c66affSColin Finck 1508c2c66affSColin Finck break; 1509c2c66affSColin Finck } 1510c2c66affSColin Finck } 1511c2c66affSColin Finck 1512c2c66affSColin Finck le = le->Flink; 1513c2c66affSColin Finck } 1514c2c66affSColin Finck 1515c2c66affSColin Finck if (!ci) { 1516c2c66affSColin Finck ERR("could not find chunk for %llx in bootstrap\n", addr); 1517c2c66affSColin Finck return STATUS_INTERNAL_ERROR; 1518c2c66affSColin Finck } 1519c2c66affSColin Finck 1520c2c66affSColin Finck if (pc) 1521c2c66affSColin Finck *pc = NULL; 1522c2c66affSColin Finck } 1523c2c66affSColin Finck 1524c2c66affSColin Finck if (ci->type & BLOCK_FLAG_DUPLICATE) { 1525c2c66affSColin Finck type = BLOCK_FLAG_DUPLICATE; 1526c2c66affSColin Finck allowed_missing = ci->num_stripes - 1; 1527c2c66affSColin Finck } else if (ci->type & BLOCK_FLAG_RAID0) { 1528c2c66affSColin Finck type = BLOCK_FLAG_RAID0; 1529c2c66affSColin Finck allowed_missing = 0; 1530c2c66affSColin Finck } else if (ci->type & BLOCK_FLAG_RAID1) { 1531c2c66affSColin Finck type = BLOCK_FLAG_DUPLICATE; 1532c2c66affSColin Finck allowed_missing = 1; 1533c2c66affSColin Finck } else if (ci->type & BLOCK_FLAG_RAID10) { 1534c2c66affSColin Finck type = BLOCK_FLAG_RAID10; 1535c2c66affSColin Finck allowed_missing = 1; 1536c2c66affSColin Finck } else if (ci->type & BLOCK_FLAG_RAID5) { 1537c2c66affSColin Finck type = BLOCK_FLAG_RAID5; 1538c2c66affSColin Finck allowed_missing = 1; 1539c2c66affSColin Finck } else if (ci->type & BLOCK_FLAG_RAID6) { 1540c2c66affSColin Finck type = BLOCK_FLAG_RAID6; 1541c2c66affSColin Finck allowed_missing = 2; 1542c2c66affSColin Finck } else { // SINGLE 1543c2c66affSColin Finck type = BLOCK_FLAG_DUPLICATE; 1544c2c66affSColin Finck allowed_missing = 0; 1545c2c66affSColin Finck } 1546c2c66affSColin Finck 1547c2c66affSColin Finck cis = (CHUNK_ITEM_STRIPE*)&ci[1]; 1548c2c66affSColin Finck 1549c2c66affSColin Finck RtlZeroMemory(&context, sizeof(read_data_context)); 1550c2c66affSColin Finck KeInitializeEvent(&context.Event, NotificationEvent, FALSE); 1551c2c66affSColin Finck 1552c2c66affSColin Finck context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG); 1553c2c66affSColin Finck if (!context.stripes) { 1554c2c66affSColin Finck ERR("out of memory\n"); 1555c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 1556c2c66affSColin Finck } 1557c2c66affSColin Finck 1558c2c66affSColin Finck if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) { 1559c2c66affSColin Finck get_raid56_lock_range(c, addr, length, &lockaddr, &locklen); 1560c2c66affSColin Finck chunk_lock_range(Vcb, c, lockaddr, locklen); 1561c2c66affSColin Finck } 1562c2c66affSColin Finck 1563c2c66affSColin Finck RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes); 1564c2c66affSColin Finck 1565c2c66affSColin Finck context.buflen = length; 1566c2c66affSColin Finck context.num_stripes = ci->num_stripes; 1567c2c66affSColin Finck context.stripes_left = context.num_stripes; 1568c2c66affSColin Finck context.sector_size = Vcb->superblock.sector_size; 1569c2c66affSColin Finck context.csum = csum; 1570c2c66affSColin Finck context.tree = is_tree; 1571c2c66affSColin Finck context.type = type; 1572c2c66affSColin Finck 1573c2c66affSColin Finck if (type == BLOCK_FLAG_RAID0) { 1574c2c66affSColin Finck UINT64 startoff, endoff; 1575c2c66affSColin Finck UINT16 endoffstripe, stripe; 1576c2c66affSColin Finck UINT32 *stripeoff, pos; 1577c2c66affSColin Finck PMDL master_mdl; 1578c2c66affSColin Finck PFN_NUMBER* pfns; 1579c2c66affSColin Finck 1580c2c66affSColin Finck // FIXME - test this still works if page size isn't the same as sector size 1581c2c66affSColin Finck 1582c2c66affSColin Finck // This relies on the fact that MDLs are followed in memory by the page file numbers, 1583c2c66affSColin Finck // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0 1584c2c66affSColin Finck // data for you without doing a memcpy yourself. 1585c2c66affSColin Finck // MDLs are officially opaque, so this might very well break in future versions of Windows. 1586c2c66affSColin Finck 1587c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe); 1588c2c66affSColin Finck get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe); 1589c2c66affSColin Finck 1590c2c66affSColin Finck if (file_read) { 1591c2c66affSColin Finck // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL 1592c2c66affSColin Finck // with duplicated dummy PFNs, which confuse check_csum. Ah well. 1593c2c66affSColin Finck // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested. 1594c2c66affSColin Finck 1595c2c66affSColin Finck context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1596c2c66affSColin Finck 1597c2c66affSColin Finck if (!context.va) { 1598c2c66affSColin Finck ERR("out of memory\n"); 1599c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1600c2c66affSColin Finck goto exit; 1601c2c66affSColin Finck } 1602c2c66affSColin Finck } else 1603c2c66affSColin Finck context.va = buf; 1604c2c66affSColin Finck 1605c2c66affSColin Finck master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1606c2c66affSColin Finck if (!master_mdl) { 1607c2c66affSColin Finck ERR("out of memory\n"); 1608c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1609c2c66affSColin Finck goto exit; 1610c2c66affSColin Finck } 1611c2c66affSColin Finck 1612c2c66affSColin Finck Status = STATUS_SUCCESS; 1613c2c66affSColin Finck 1614c2c66affSColin Finck _SEH2_TRY { 1615c2c66affSColin Finck MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1616c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1617c2c66affSColin Finck Status = _SEH2_GetExceptionCode(); 1618c2c66affSColin Finck } _SEH2_END; 1619c2c66affSColin Finck 1620c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1621c2c66affSColin Finck ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1622c2c66affSColin Finck IoFreeMdl(master_mdl); 1623c2c66affSColin Finck goto exit; 1624c2c66affSColin Finck } 1625c2c66affSColin Finck 1626c2c66affSColin Finck pfns = (PFN_NUMBER*)(master_mdl + 1); 1627c2c66affSColin Finck 1628c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 1629c2c66affSColin Finck if (startoffstripe > i) 1630c2c66affSColin Finck context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1631c2c66affSColin Finck else if (startoffstripe == i) 1632c2c66affSColin Finck context.stripes[i].stripestart = startoff; 1633c2c66affSColin Finck else 1634c2c66affSColin Finck context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length); 1635c2c66affSColin Finck 1636c2c66affSColin Finck if (endoffstripe > i) 1637c2c66affSColin Finck context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1638c2c66affSColin Finck else if (endoffstripe == i) 1639c2c66affSColin Finck context.stripes[i].stripeend = endoff + 1; 1640c2c66affSColin Finck else 1641c2c66affSColin Finck context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length); 1642c2c66affSColin Finck 1643c2c66affSColin Finck if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 1644c2c66affSColin Finck context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL); 1645c2c66affSColin Finck 1646c2c66affSColin Finck if (!context.stripes[i].mdl) { 1647c2c66affSColin Finck ERR("IoAllocateMdl failed\n"); 1648*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 1649*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 1650c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1651c2c66affSColin Finck goto exit; 1652c2c66affSColin Finck } 1653c2c66affSColin Finck } 1654c2c66affSColin Finck } 1655c2c66affSColin Finck 1656c2c66affSColin Finck stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); 1657c2c66affSColin Finck if (!stripeoff) { 1658c2c66affSColin Finck ERR("out of memory\n"); 1659*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 1660*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 1661c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1662c2c66affSColin Finck goto exit; 1663c2c66affSColin Finck } 1664c2c66affSColin Finck 1665c2c66affSColin Finck RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); 1666c2c66affSColin Finck 1667c2c66affSColin Finck pos = 0; 1668c2c66affSColin Finck stripe = startoffstripe; 1669c2c66affSColin Finck while (pos < length) { 1670c2c66affSColin Finck PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 1671c2c66affSColin Finck 1672c2c66affSColin Finck if (pos == 0) { 1673c2c66affSColin Finck UINT32 readlen = (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)); 1674c2c66affSColin Finck 1675c2c66affSColin Finck RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1676c2c66affSColin Finck 1677c2c66affSColin Finck stripeoff[stripe] += readlen; 1678c2c66affSColin Finck pos += readlen; 1679c2c66affSColin Finck } else if (length - pos < ci->stripe_length) { 1680c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1681c2c66affSColin Finck 1682c2c66affSColin Finck pos = length; 1683c2c66affSColin Finck } else { 1684c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1685c2c66affSColin Finck 1686c2c66affSColin Finck stripeoff[stripe] += (UINT32)ci->stripe_length; 1687c2c66affSColin Finck pos += (UINT32)ci->stripe_length; 1688c2c66affSColin Finck } 1689c2c66affSColin Finck 1690c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 1691c2c66affSColin Finck } 1692c2c66affSColin Finck 1693c2c66affSColin Finck MmUnlockPages(master_mdl); 1694c2c66affSColin Finck IoFreeMdl(master_mdl); 1695c2c66affSColin Finck 1696c2c66affSColin Finck ExFreePool(stripeoff); 1697c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID10) { 1698c2c66affSColin Finck UINT64 startoff, endoff; 1699c2c66affSColin Finck UINT16 endoffstripe, j, stripe; 1700c2c66affSColin Finck ULONG orig_ls; 1701c2c66affSColin Finck PMDL master_mdl; 1702c2c66affSColin Finck PFN_NUMBER* pfns; 1703c2c66affSColin Finck UINT32* stripeoff, pos; 1704c2c66affSColin Finck read_data_stripe** stripes; 1705c2c66affSColin Finck 1706c2c66affSColin Finck if (c) 1707c2c66affSColin Finck orig_ls = c->last_stripe; 1708c2c66affSColin Finck else 1709c2c66affSColin Finck orig_ls = 0; 1710c2c66affSColin Finck 1711c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe); 1712c2c66affSColin Finck get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe); 1713c2c66affSColin Finck 1714c2c66affSColin Finck if ((ci->num_stripes % ci->sub_stripes) != 0) { 1715c2c66affSColin Finck ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes); 1716c2c66affSColin Finck Status = STATUS_INTERNAL_ERROR; 1717c2c66affSColin Finck goto exit; 1718c2c66affSColin Finck } 1719c2c66affSColin Finck 1720c2c66affSColin Finck if (file_read) { 1721c2c66affSColin Finck context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1722c2c66affSColin Finck 1723c2c66affSColin Finck if (!context.va) { 1724c2c66affSColin Finck ERR("out of memory\n"); 1725c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1726c2c66affSColin Finck goto exit; 1727c2c66affSColin Finck } 1728c2c66affSColin Finck } else 1729c2c66affSColin Finck context.va = buf; 1730c2c66affSColin Finck 1731c2c66affSColin Finck context.firstoff = (UINT16)((startoff % ci->stripe_length) / Vcb->superblock.sector_size); 1732c2c66affSColin Finck context.startoffstripe = startoffstripe; 1733c2c66affSColin Finck context.sectors_per_stripe = (UINT16)(ci->stripe_length / Vcb->superblock.sector_size); 1734c2c66affSColin Finck 1735c2c66affSColin Finck startoffstripe *= ci->sub_stripes; 1736c2c66affSColin Finck endoffstripe *= ci->sub_stripes; 1737c2c66affSColin Finck 1738c2c66affSColin Finck if (c) 1739c2c66affSColin Finck c->last_stripe = (orig_ls + 1) % ci->sub_stripes; 1740c2c66affSColin Finck 1741c2c66affSColin Finck master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1742c2c66affSColin Finck if (!master_mdl) { 1743c2c66affSColin Finck ERR("out of memory\n"); 1744c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1745c2c66affSColin Finck goto exit; 1746c2c66affSColin Finck } 1747c2c66affSColin Finck 1748c2c66affSColin Finck Status = STATUS_SUCCESS; 1749c2c66affSColin Finck 1750c2c66affSColin Finck _SEH2_TRY { 1751c2c66affSColin Finck MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1752c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1753c2c66affSColin Finck Status = _SEH2_GetExceptionCode(); 1754c2c66affSColin Finck } _SEH2_END; 1755c2c66affSColin Finck 1756c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1757c2c66affSColin Finck ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1758c2c66affSColin Finck IoFreeMdl(master_mdl); 1759c2c66affSColin Finck goto exit; 1760c2c66affSColin Finck } 1761c2c66affSColin Finck 1762c2c66affSColin Finck pfns = (PFN_NUMBER*)(master_mdl + 1); 1763c2c66affSColin Finck 1764c2c66affSColin Finck stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1765c2c66affSColin Finck if (!stripes) { 1766c2c66affSColin Finck ERR("out of memory\n"); 1767*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 1768*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 1769c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1770c2c66affSColin Finck goto exit; 1771c2c66affSColin Finck } 1772c2c66affSColin Finck 1773c2c66affSColin Finck RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes); 1774c2c66affSColin Finck 1775c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) { 1776c2c66affSColin Finck UINT64 sstart, send; 1777c2c66affSColin Finck BOOL stripeset = FALSE; 1778c2c66affSColin Finck 1779c2c66affSColin Finck if (startoffstripe > i) 1780c2c66affSColin Finck sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 1781c2c66affSColin Finck else if (startoffstripe == i) 1782c2c66affSColin Finck sstart = startoff; 1783c2c66affSColin Finck else 1784c2c66affSColin Finck sstart = startoff - (startoff % ci->stripe_length); 1785c2c66affSColin Finck 1786c2c66affSColin Finck if (endoffstripe > i) 1787c2c66affSColin Finck send = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 1788c2c66affSColin Finck else if (endoffstripe == i) 1789c2c66affSColin Finck send = endoff + 1; 1790c2c66affSColin Finck else 1791c2c66affSColin Finck send = endoff - (endoff % ci->stripe_length); 1792c2c66affSColin Finck 1793c2c66affSColin Finck for (j = 0; j < ci->sub_stripes; j++) { 1794c2c66affSColin Finck if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) { 1795c2c66affSColin Finck context.stripes[i+j].stripestart = sstart; 1796c2c66affSColin Finck context.stripes[i+j].stripeend = send; 1797c2c66affSColin Finck stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1798c2c66affSColin Finck 1799c2c66affSColin Finck if (sstart != send) { 1800c2c66affSColin Finck context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL); 1801c2c66affSColin Finck 1802c2c66affSColin Finck if (!context.stripes[i+j].mdl) { 1803c2c66affSColin Finck ERR("IoAllocateMdl failed\n"); 1804*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 1805*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 1806c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1807c2c66affSColin Finck goto exit; 1808c2c66affSColin Finck } 1809c2c66affSColin Finck } 1810c2c66affSColin Finck 1811c2c66affSColin Finck stripeset = TRUE; 1812c2c66affSColin Finck } else 1813c2c66affSColin Finck context.stripes[i+j].status = ReadDataStatus_Skip; 1814c2c66affSColin Finck } 1815c2c66affSColin Finck 1816c2c66affSColin Finck if (!stripeset) { 1817c2c66affSColin Finck for (j = 0; j < ci->sub_stripes; j++) { 1818c2c66affSColin Finck if (devices[i+j] && devices[i+j]->devobj) { 1819c2c66affSColin Finck context.stripes[i+j].stripestart = sstart; 1820c2c66affSColin Finck context.stripes[i+j].stripeend = send; 1821c2c66affSColin Finck context.stripes[i+j].status = ReadDataStatus_Pending; 1822c2c66affSColin Finck stripes[i / ci->sub_stripes] = &context.stripes[i+j]; 1823c2c66affSColin Finck 1824c2c66affSColin Finck if (sstart != send) { 1825c2c66affSColin Finck context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL); 1826c2c66affSColin Finck 1827c2c66affSColin Finck if (!context.stripes[i+j].mdl) { 1828c2c66affSColin Finck ERR("IoAllocateMdl failed\n"); 1829*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 1830*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 1831c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1832c2c66affSColin Finck goto exit; 1833c2c66affSColin Finck } 1834c2c66affSColin Finck } 1835c2c66affSColin Finck 1836c2c66affSColin Finck stripeset = TRUE; 1837c2c66affSColin Finck break; 1838c2c66affSColin Finck } 1839c2c66affSColin Finck } 1840c2c66affSColin Finck 1841c2c66affSColin Finck if (!stripeset) { 1842c2c66affSColin Finck ERR("could not find stripe to read\n"); 1843c2c66affSColin Finck Status = STATUS_DEVICE_NOT_READY; 1844c2c66affSColin Finck goto exit; 1845c2c66affSColin Finck } 1846c2c66affSColin Finck } 1847c2c66affSColin Finck } 1848c2c66affSColin Finck 1849c2c66affSColin Finck stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG); 1850c2c66affSColin Finck if (!stripeoff) { 1851c2c66affSColin Finck ERR("out of memory\n"); 1852*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 1853*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 1854c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1855c2c66affSColin Finck goto exit; 1856c2c66affSColin Finck } 1857c2c66affSColin Finck 1858c2c66affSColin Finck RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes); 1859c2c66affSColin Finck 1860c2c66affSColin Finck pos = 0; 1861c2c66affSColin Finck stripe = startoffstripe / ci->sub_stripes; 1862c2c66affSColin Finck while (pos < length) { 1863c2c66affSColin Finck PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1); 1864c2c66affSColin Finck 1865c2c66affSColin Finck if (pos == 0) { 1866c2c66affSColin Finck UINT32 readlen = (UINT32)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart, 1867c2c66affSColin Finck ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length)); 1868c2c66affSColin Finck 1869c2c66affSColin Finck RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1870c2c66affSColin Finck 1871c2c66affSColin Finck stripeoff[stripe] += readlen; 1872c2c66affSColin Finck pos += readlen; 1873c2c66affSColin Finck } else if (length - pos < ci->stripe_length) { 1874c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 1875c2c66affSColin Finck 1876c2c66affSColin Finck pos = length; 1877c2c66affSColin Finck } else { 1878c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 1879c2c66affSColin Finck 1880c2c66affSColin Finck stripeoff[stripe] += (ULONG)ci->stripe_length; 1881c2c66affSColin Finck pos += (ULONG)ci->stripe_length; 1882c2c66affSColin Finck } 1883c2c66affSColin Finck 1884c2c66affSColin Finck stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes); 1885c2c66affSColin Finck } 1886c2c66affSColin Finck 1887c2c66affSColin Finck MmUnlockPages(master_mdl); 1888c2c66affSColin Finck IoFreeMdl(master_mdl); 1889c2c66affSColin Finck 1890c2c66affSColin Finck ExFreePool(stripeoff); 1891c2c66affSColin Finck ExFreePool(stripes); 1892c2c66affSColin Finck } else if (type == BLOCK_FLAG_DUPLICATE) { 1893c2c66affSColin Finck UINT64 orig_ls; 1894c2c66affSColin Finck 1895c2c66affSColin Finck if (c) 1896c2c66affSColin Finck orig_ls = i = c->last_stripe; 1897c2c66affSColin Finck else 1898c2c66affSColin Finck orig_ls = i = 0; 1899c2c66affSColin Finck 1900c2c66affSColin Finck while (!devices[i] || !devices[i]->devobj) { 1901c2c66affSColin Finck i = (i + 1) % ci->num_stripes; 1902c2c66affSColin Finck 1903c2c66affSColin Finck if (i == orig_ls) { 1904c2c66affSColin Finck ERR("no devices available to service request\n"); 1905c2c66affSColin Finck Status = STATUS_DEVICE_NOT_READY; 1906c2c66affSColin Finck goto exit; 1907c2c66affSColin Finck } 1908c2c66affSColin Finck } 1909c2c66affSColin Finck 1910c2c66affSColin Finck if (c) 1911c2c66affSColin Finck c->last_stripe = (i + 1) % ci->num_stripes; 1912c2c66affSColin Finck 1913c2c66affSColin Finck context.stripes[i].stripestart = addr - offset; 1914c2c66affSColin Finck context.stripes[i].stripeend = context.stripes[i].stripestart + length; 1915c2c66affSColin Finck 1916c2c66affSColin Finck if (file_read) { 1917c2c66affSColin Finck context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1918c2c66affSColin Finck 1919c2c66affSColin Finck if (!context.va) { 1920c2c66affSColin Finck ERR("out of memory\n"); 1921c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1922c2c66affSColin Finck goto exit; 1923c2c66affSColin Finck } 1924c2c66affSColin Finck 1925c2c66affSColin Finck context.stripes[i].mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1926c2c66affSColin Finck if (!context.stripes[i].mdl) { 1927c2c66affSColin Finck ERR("IoAllocateMdl failed\n"); 1928c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1929c2c66affSColin Finck goto exit; 1930c2c66affSColin Finck } 1931c2c66affSColin Finck 1932c2c66affSColin Finck MmBuildMdlForNonPagedPool(context.stripes[i].mdl); 1933c2c66affSColin Finck } else { 1934c2c66affSColin Finck context.stripes[i].mdl = IoAllocateMdl(buf, length, FALSE, FALSE, NULL); 1935c2c66affSColin Finck 1936c2c66affSColin Finck if (!context.stripes[i].mdl) { 1937c2c66affSColin Finck ERR("IoAllocateMdl failed\n"); 1938c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1939c2c66affSColin Finck goto exit; 1940c2c66affSColin Finck } 1941c2c66affSColin Finck 1942c2c66affSColin Finck Status = STATUS_SUCCESS; 1943c2c66affSColin Finck 1944c2c66affSColin Finck _SEH2_TRY { 1945c2c66affSColin Finck MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess); 1946c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1947c2c66affSColin Finck Status = _SEH2_GetExceptionCode(); 1948c2c66affSColin Finck } _SEH2_END; 1949c2c66affSColin Finck 1950c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1951c2c66affSColin Finck ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1952c2c66affSColin Finck goto exit; 1953c2c66affSColin Finck } 1954c2c66affSColin Finck } 1955c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID5) { 1956c2c66affSColin Finck UINT64 startoff, endoff; 1957c2c66affSColin Finck UINT16 endoffstripe, parity; 1958c2c66affSColin Finck UINT32 *stripeoff, pos; 1959c2c66affSColin Finck PMDL master_mdl; 1960c2c66affSColin Finck PFN_NUMBER *pfns, dummy; 1961c2c66affSColin Finck BOOL need_dummy = FALSE; 1962c2c66affSColin Finck 1963c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe); 1964c2c66affSColin Finck get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe); 1965c2c66affSColin Finck 1966c2c66affSColin Finck if (file_read) { 1967c2c66affSColin Finck context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 1968c2c66affSColin Finck 1969c2c66affSColin Finck if (!context.va) { 1970c2c66affSColin Finck ERR("out of memory\n"); 1971c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1972c2c66affSColin Finck goto exit; 1973c2c66affSColin Finck } 1974c2c66affSColin Finck } else 1975c2c66affSColin Finck context.va = buf; 1976c2c66affSColin Finck 1977c2c66affSColin Finck master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 1978c2c66affSColin Finck if (!master_mdl) { 1979c2c66affSColin Finck ERR("out of memory\n"); 1980c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 1981c2c66affSColin Finck goto exit; 1982c2c66affSColin Finck } 1983c2c66affSColin Finck 1984c2c66affSColin Finck Status = STATUS_SUCCESS; 1985c2c66affSColin Finck 1986c2c66affSColin Finck _SEH2_TRY { 1987c2c66affSColin Finck MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 1988c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 1989c2c66affSColin Finck Status = _SEH2_GetExceptionCode(); 1990c2c66affSColin Finck } _SEH2_END; 1991c2c66affSColin Finck 1992c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 1993c2c66affSColin Finck ERR("MmProbeAndLockPages threw exception %08x\n", Status); 1994c2c66affSColin Finck IoFreeMdl(master_mdl); 1995c2c66affSColin Finck goto exit; 1996c2c66affSColin Finck } 1997c2c66affSColin Finck 1998c2c66affSColin Finck pfns = (PFN_NUMBER*)(master_mdl + 1); 1999c2c66affSColin Finck 2000c2c66affSColin Finck pos = 0; 2001c2c66affSColin Finck while (pos < length) { 2002c2c66affSColin Finck parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 2003c2c66affSColin Finck 2004c2c66affSColin Finck if (pos == 0) { 2005c2c66affSColin Finck UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes; 2006c2c66affSColin Finck ULONG skip, readlen; 2007c2c66affSColin Finck 2008c2c66affSColin Finck i = startoffstripe; 2009c2c66affSColin Finck while (stripe != parity) { 2010c2c66affSColin Finck if (i == startoffstripe) { 2011c2c66affSColin Finck readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length))); 2012c2c66affSColin Finck 2013c2c66affSColin Finck context.stripes[stripe].stripestart = startoff; 2014c2c66affSColin Finck context.stripes[stripe].stripeend = startoff + readlen; 2015c2c66affSColin Finck 2016c2c66affSColin Finck pos += readlen; 2017c2c66affSColin Finck 2018c2c66affSColin Finck if (pos == length) 2019c2c66affSColin Finck break; 2020c2c66affSColin Finck } else { 2021c2c66affSColin Finck readlen = min(length - pos, (ULONG)ci->stripe_length); 2022c2c66affSColin Finck 2023c2c66affSColin Finck context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2024c2c66affSColin Finck context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2025c2c66affSColin Finck 2026c2c66affSColin Finck pos += readlen; 2027c2c66affSColin Finck 2028c2c66affSColin Finck if (pos == length) 2029c2c66affSColin Finck break; 2030c2c66affSColin Finck } 2031c2c66affSColin Finck 2032c2c66affSColin Finck i++; 2033c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2034c2c66affSColin Finck } 2035c2c66affSColin Finck 2036c2c66affSColin Finck if (pos == length) 2037c2c66affSColin Finck break; 2038c2c66affSColin Finck 2039c2c66affSColin Finck for (i = 0; i < startoffstripe; i++) { 2040c2c66affSColin Finck UINT16 stripe2 = (parity + i + 1) % ci->num_stripes; 2041c2c66affSColin Finck 2042c2c66affSColin Finck context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2043c2c66affSColin Finck } 2044c2c66affSColin Finck 2045c2c66affSColin Finck context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2046c2c66affSColin Finck 2047c2c66affSColin Finck if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) { 2048c2c66affSColin Finck skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1); 2049c2c66affSColin Finck 2050c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 2051c2c66affSColin Finck context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2052c2c66affSColin Finck } 2053c2c66affSColin Finck 2054c2c66affSColin Finck pos += (UINT32)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length); 2055c2c66affSColin Finck need_dummy = TRUE; 2056c2c66affSColin Finck } 2057c2c66affSColin Finck } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2058c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 2059c2c66affSColin Finck context.stripes[i].stripeend += ci->stripe_length; 2060c2c66affSColin Finck } 2061c2c66affSColin Finck 2062c2c66affSColin Finck pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 1)); 2063c2c66affSColin Finck need_dummy = TRUE; 2064c2c66affSColin Finck } else { 2065c2c66affSColin Finck UINT16 stripe = (parity + 1) % ci->num_stripes; 2066c2c66affSColin Finck 2067c2c66affSColin Finck i = 0; 2068c2c66affSColin Finck while (stripe != parity) { 2069c2c66affSColin Finck if (endoffstripe == i) { 2070c2c66affSColin Finck context.stripes[stripe].stripeend = endoff + 1; 2071c2c66affSColin Finck break; 2072c2c66affSColin Finck } else if (endoffstripe > i) 2073c2c66affSColin Finck context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2074c2c66affSColin Finck 2075c2c66affSColin Finck i++; 2076c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2077c2c66affSColin Finck } 2078c2c66affSColin Finck 2079c2c66affSColin Finck break; 2080c2c66affSColin Finck } 2081c2c66affSColin Finck } 2082c2c66affSColin Finck 2083c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 2084c2c66affSColin Finck if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2085c2c66affSColin Finck context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), 2086c2c66affSColin Finck FALSE, FALSE, NULL); 2087c2c66affSColin Finck 2088c2c66affSColin Finck if (!context.stripes[i].mdl) { 2089c2c66affSColin Finck ERR("IoAllocateMdl failed\n"); 2090*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 2091*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 2092c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2093c2c66affSColin Finck goto exit; 2094c2c66affSColin Finck } 2095c2c66affSColin Finck } 2096c2c66affSColin Finck } 2097c2c66affSColin Finck 2098c2c66affSColin Finck if (need_dummy) { 2099c2c66affSColin Finck dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2100c2c66affSColin Finck if (!dummypage) { 2101c2c66affSColin Finck ERR("out of memory\n"); 2102*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 2103*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 2104c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2105c2c66affSColin Finck goto exit; 2106c2c66affSColin Finck } 2107c2c66affSColin Finck 2108c2c66affSColin Finck dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL); 2109c2c66affSColin Finck if (!dummy_mdl) { 2110c2c66affSColin Finck ERR("IoAllocateMdl failed\n"); 2111*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 2112*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 2113c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2114c2c66affSColin Finck goto exit; 2115c2c66affSColin Finck } 2116c2c66affSColin Finck 2117c2c66affSColin Finck MmBuildMdlForNonPagedPool(dummy_mdl); 2118c2c66affSColin Finck 2119c2c66affSColin Finck dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2120c2c66affSColin Finck } 2121c2c66affSColin Finck 2122c2c66affSColin Finck stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); 2123c2c66affSColin Finck if (!stripeoff) { 2124c2c66affSColin Finck ERR("out of memory\n"); 2125*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 2126*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 2127c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2128c2c66affSColin Finck goto exit; 2129c2c66affSColin Finck } 2130c2c66affSColin Finck 2131c2c66affSColin Finck RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); 2132c2c66affSColin Finck 2133c2c66affSColin Finck pos = 0; 2134c2c66affSColin Finck 2135c2c66affSColin Finck while (pos < length) { 2136c2c66affSColin Finck PFN_NUMBER* stripe_pfns; 2137c2c66affSColin Finck 2138c2c66affSColin Finck parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes; 2139c2c66affSColin Finck 2140c2c66affSColin Finck if (pos == 0) { 2141c2c66affSColin Finck UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes; 2142c2c66affSColin Finck UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2143c2c66affSColin Finck ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2144c2c66affSColin Finck 2145c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2146c2c66affSColin Finck 2147c2c66affSColin Finck RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2148c2c66affSColin Finck 2149c2c66affSColin Finck stripeoff[stripe] = readlen; 2150c2c66affSColin Finck pos += readlen; 2151c2c66affSColin Finck 2152c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2153c2c66affSColin Finck 2154c2c66affSColin Finck while (stripe != parity) { 2155c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2156c2c66affSColin Finck readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2157c2c66affSColin Finck 2158c2c66affSColin Finck if (readlen == 0) 2159c2c66affSColin Finck break; 2160c2c66affSColin Finck 2161c2c66affSColin Finck RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2162c2c66affSColin Finck 2163c2c66affSColin Finck stripeoff[stripe] = readlen; 2164c2c66affSColin Finck pos += readlen; 2165c2c66affSColin Finck 2166c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2167c2c66affSColin Finck } 2168c2c66affSColin Finck } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) { 2169c2c66affSColin Finck UINT16 stripe = (parity + 1) % ci->num_stripes; 2170c2c66affSColin Finck ULONG k; 2171c2c66affSColin Finck 2172c2c66affSColin Finck while (stripe != parity) { 2173c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2174c2c66affSColin Finck 2175c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2176c2c66affSColin Finck 2177c2c66affSColin Finck stripeoff[stripe] += (UINT32)ci->stripe_length; 2178c2c66affSColin Finck pos += (UINT32)ci->stripe_length; 2179c2c66affSColin Finck 2180c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2181c2c66affSColin Finck } 2182c2c66affSColin Finck 2183c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1); 2184c2c66affSColin Finck 2185c2c66affSColin Finck for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2186c2c66affSColin Finck stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy; 2187c2c66affSColin Finck stripeoff[parity] += PAGE_SIZE; 2188c2c66affSColin Finck } 2189c2c66affSColin Finck } else { 2190c2c66affSColin Finck UINT16 stripe = (parity + 1) % ci->num_stripes; 2191c2c66affSColin Finck UINT32 readlen; 2192c2c66affSColin Finck 2193c2c66affSColin Finck while (pos < length) { 2194c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2195c2c66affSColin Finck readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2196c2c66affSColin Finck 2197c2c66affSColin Finck if (readlen == 0) 2198c2c66affSColin Finck break; 2199c2c66affSColin Finck 2200c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2201c2c66affSColin Finck 2202c2c66affSColin Finck stripeoff[stripe] += readlen; 2203c2c66affSColin Finck pos += readlen; 2204c2c66affSColin Finck 2205c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2206c2c66affSColin Finck } 2207c2c66affSColin Finck } 2208c2c66affSColin Finck } 2209c2c66affSColin Finck 2210c2c66affSColin Finck MmUnlockPages(master_mdl); 2211c2c66affSColin Finck IoFreeMdl(master_mdl); 2212c2c66affSColin Finck 2213c2c66affSColin Finck ExFreePool(stripeoff); 2214c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID6) { 2215c2c66affSColin Finck UINT64 startoff, endoff; 2216c2c66affSColin Finck UINT16 endoffstripe, parity1; 2217c2c66affSColin Finck UINT32 *stripeoff, pos; 2218c2c66affSColin Finck PMDL master_mdl; 2219c2c66affSColin Finck PFN_NUMBER *pfns, dummy; 2220c2c66affSColin Finck BOOL need_dummy = FALSE; 2221c2c66affSColin Finck 2222c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe); 2223c2c66affSColin Finck get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe); 2224c2c66affSColin Finck 2225c2c66affSColin Finck if (file_read) { 2226c2c66affSColin Finck context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG); 2227c2c66affSColin Finck 2228c2c66affSColin Finck if (!context.va) { 2229c2c66affSColin Finck ERR("out of memory\n"); 2230c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2231c2c66affSColin Finck goto exit; 2232c2c66affSColin Finck } 2233c2c66affSColin Finck } else 2234c2c66affSColin Finck context.va = buf; 2235c2c66affSColin Finck 2236c2c66affSColin Finck master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL); 2237c2c66affSColin Finck if (!master_mdl) { 2238c2c66affSColin Finck ERR("out of memory\n"); 2239c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2240c2c66affSColin Finck goto exit; 2241c2c66affSColin Finck } 2242c2c66affSColin Finck 2243c2c66affSColin Finck Status = STATUS_SUCCESS; 2244c2c66affSColin Finck 2245c2c66affSColin Finck _SEH2_TRY { 2246c2c66affSColin Finck MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess); 2247c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 2248c2c66affSColin Finck Status = _SEH2_GetExceptionCode(); 2249c2c66affSColin Finck } _SEH2_END; 2250c2c66affSColin Finck 2251c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 2252c2c66affSColin Finck ERR("MmProbeAndLockPages threw exception %08x\n", Status); 2253c2c66affSColin Finck IoFreeMdl(master_mdl); 2254c2c66affSColin Finck goto exit; 2255c2c66affSColin Finck } 2256c2c66affSColin Finck 2257c2c66affSColin Finck pfns = (PFN_NUMBER*)(master_mdl + 1); 2258c2c66affSColin Finck 2259c2c66affSColin Finck pos = 0; 2260c2c66affSColin Finck while (pos < length) { 2261c2c66affSColin Finck parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2262c2c66affSColin Finck 2263c2c66affSColin Finck if (pos == 0) { 2264c2c66affSColin Finck UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2; 2265c2c66affSColin Finck ULONG skip, readlen; 2266c2c66affSColin Finck 2267c2c66affSColin Finck i = startoffstripe; 2268c2c66affSColin Finck while (stripe != parity1) { 2269c2c66affSColin Finck if (i == startoffstripe) { 2270c2c66affSColin Finck readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length)); 2271c2c66affSColin Finck 2272c2c66affSColin Finck context.stripes[stripe].stripestart = startoff; 2273c2c66affSColin Finck context.stripes[stripe].stripeend = startoff + readlen; 2274c2c66affSColin Finck 2275c2c66affSColin Finck pos += readlen; 2276c2c66affSColin Finck 2277c2c66affSColin Finck if (pos == length) 2278c2c66affSColin Finck break; 2279c2c66affSColin Finck } else { 2280c2c66affSColin Finck readlen = min(length - pos, (ULONG)ci->stripe_length); 2281c2c66affSColin Finck 2282c2c66affSColin Finck context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length); 2283c2c66affSColin Finck context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen; 2284c2c66affSColin Finck 2285c2c66affSColin Finck pos += readlen; 2286c2c66affSColin Finck 2287c2c66affSColin Finck if (pos == length) 2288c2c66affSColin Finck break; 2289c2c66affSColin Finck } 2290c2c66affSColin Finck 2291c2c66affSColin Finck i++; 2292c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2293c2c66affSColin Finck } 2294c2c66affSColin Finck 2295c2c66affSColin Finck if (pos == length) 2296c2c66affSColin Finck break; 2297c2c66affSColin Finck 2298c2c66affSColin Finck for (i = 0; i < startoffstripe; i++) { 2299c2c66affSColin Finck UINT16 stripe2 = (parity1 + i + 2) % ci->num_stripes; 2300c2c66affSColin Finck 2301c2c66affSColin Finck context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2302c2c66affSColin Finck } 2303c2c66affSColin Finck 2304c2c66affSColin Finck context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2305c2c66affSColin Finck 2306c2c66affSColin Finck parity2 = (parity1 + 1) % ci->num_stripes; 2307c2c66affSColin Finck context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length; 2308c2c66affSColin Finck 2309c2c66affSColin Finck if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) { 2310c2c66affSColin Finck skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1); 2311c2c66affSColin Finck 2312c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 2313c2c66affSColin Finck context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length; 2314c2c66affSColin Finck } 2315c2c66affSColin Finck 2316c2c66affSColin Finck pos += (UINT32)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length); 2317c2c66affSColin Finck need_dummy = TRUE; 2318c2c66affSColin Finck } 2319c2c66affSColin Finck } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2320c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 2321c2c66affSColin Finck context.stripes[i].stripeend += ci->stripe_length; 2322c2c66affSColin Finck } 2323c2c66affSColin Finck 2324c2c66affSColin Finck pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 2)); 2325c2c66affSColin Finck need_dummy = TRUE; 2326c2c66affSColin Finck } else { 2327c2c66affSColin Finck UINT16 stripe = (parity1 + 2) % ci->num_stripes; 2328c2c66affSColin Finck 2329c2c66affSColin Finck i = 0; 2330c2c66affSColin Finck while (stripe != parity1) { 2331c2c66affSColin Finck if (endoffstripe == i) { 2332c2c66affSColin Finck context.stripes[stripe].stripeend = endoff + 1; 2333c2c66affSColin Finck break; 2334c2c66affSColin Finck } else if (endoffstripe > i) 2335c2c66affSColin Finck context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length; 2336c2c66affSColin Finck 2337c2c66affSColin Finck i++; 2338c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2339c2c66affSColin Finck } 2340c2c66affSColin Finck 2341c2c66affSColin Finck break; 2342c2c66affSColin Finck } 2343c2c66affSColin Finck } 2344c2c66affSColin Finck 2345c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 2346c2c66affSColin Finck if (context.stripes[i].stripestart != context.stripes[i].stripeend) { 2347c2c66affSColin Finck context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL); 2348c2c66affSColin Finck 2349c2c66affSColin Finck if (!context.stripes[i].mdl) { 2350c2c66affSColin Finck ERR("IoAllocateMdl failed\n"); 2351*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 2352*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 2353c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2354c2c66affSColin Finck goto exit; 2355c2c66affSColin Finck } 2356c2c66affSColin Finck } 2357c2c66affSColin Finck } 2358c2c66affSColin Finck 2359c2c66affSColin Finck if (need_dummy) { 2360c2c66affSColin Finck dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG); 2361c2c66affSColin Finck if (!dummypage) { 2362c2c66affSColin Finck ERR("out of memory\n"); 2363*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 2364*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 2365c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2366c2c66affSColin Finck goto exit; 2367c2c66affSColin Finck } 2368c2c66affSColin Finck 2369c2c66affSColin Finck dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL); 2370c2c66affSColin Finck if (!dummy_mdl) { 2371c2c66affSColin Finck ERR("IoAllocateMdl failed\n"); 2372*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 2373*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 2374c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2375c2c66affSColin Finck goto exit; 2376c2c66affSColin Finck } 2377c2c66affSColin Finck 2378c2c66affSColin Finck MmBuildMdlForNonPagedPool(dummy_mdl); 2379c2c66affSColin Finck 2380c2c66affSColin Finck dummy = *(PFN_NUMBER*)(dummy_mdl + 1); 2381c2c66affSColin Finck } 2382c2c66affSColin Finck 2383c2c66affSColin Finck stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG); 2384c2c66affSColin Finck if (!stripeoff) { 2385c2c66affSColin Finck ERR("out of memory\n"); 2386*eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl); 2387*eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl); 2388c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2389c2c66affSColin Finck goto exit; 2390c2c66affSColin Finck } 2391c2c66affSColin Finck 2392c2c66affSColin Finck RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes); 2393c2c66affSColin Finck 2394c2c66affSColin Finck pos = 0; 2395c2c66affSColin Finck 2396c2c66affSColin Finck while (pos < length) { 2397c2c66affSColin Finck PFN_NUMBER* stripe_pfns; 2398c2c66affSColin Finck 2399c2c66affSColin Finck parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes; 2400c2c66affSColin Finck 2401c2c66affSColin Finck if (pos == 0) { 2402c2c66affSColin Finck UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes; 2403c2c66affSColin Finck UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, 2404c2c66affSColin Finck ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length))); 2405c2c66affSColin Finck 2406c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2407c2c66affSColin Finck 2408c2c66affSColin Finck RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2409c2c66affSColin Finck 2410c2c66affSColin Finck stripeoff[stripe] = readlen; 2411c2c66affSColin Finck pos += readlen; 2412c2c66affSColin Finck 2413c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2414c2c66affSColin Finck 2415c2c66affSColin Finck while (stripe != parity1) { 2416c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2417c2c66affSColin Finck readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2418c2c66affSColin Finck 2419c2c66affSColin Finck if (readlen == 0) 2420c2c66affSColin Finck break; 2421c2c66affSColin Finck 2422c2c66affSColin Finck RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2423c2c66affSColin Finck 2424c2c66affSColin Finck stripeoff[stripe] = readlen; 2425c2c66affSColin Finck pos += readlen; 2426c2c66affSColin Finck 2427c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2428c2c66affSColin Finck } 2429c2c66affSColin Finck } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) { 2430c2c66affSColin Finck UINT16 stripe = (parity1 + 2) % ci->num_stripes; 2431c2c66affSColin Finck UINT16 parity2 = (parity1 + 1) % ci->num_stripes; 2432c2c66affSColin Finck ULONG k; 2433c2c66affSColin Finck 2434c2c66affSColin Finck while (stripe != parity1) { 2435c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2436c2c66affSColin Finck 2437c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT)); 2438c2c66affSColin Finck 2439c2c66affSColin Finck stripeoff[stripe] += (UINT32)ci->stripe_length; 2440c2c66affSColin Finck pos += (UINT32)ci->stripe_length; 2441c2c66affSColin Finck 2442c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2443c2c66affSColin Finck } 2444c2c66affSColin Finck 2445c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1); 2446c2c66affSColin Finck 2447c2c66affSColin Finck for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2448c2c66affSColin Finck stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy; 2449c2c66affSColin Finck stripeoff[parity1] += PAGE_SIZE; 2450c2c66affSColin Finck } 2451c2c66affSColin Finck 2452c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1); 2453c2c66affSColin Finck 2454c2c66affSColin Finck for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) { 2455c2c66affSColin Finck stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy; 2456c2c66affSColin Finck stripeoff[parity2] += PAGE_SIZE; 2457c2c66affSColin Finck } 2458c2c66affSColin Finck } else { 2459c2c66affSColin Finck UINT16 stripe = (parity1 + 2) % ci->num_stripes; 2460c2c66affSColin Finck UINT32 readlen; 2461c2c66affSColin Finck 2462c2c66affSColin Finck while (pos < length) { 2463c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1); 2464c2c66affSColin Finck readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length)); 2465c2c66affSColin Finck 2466c2c66affSColin Finck if (readlen == 0) 2467c2c66affSColin Finck break; 2468c2c66affSColin Finck 2469c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT); 2470c2c66affSColin Finck 2471c2c66affSColin Finck stripeoff[stripe] += readlen; 2472c2c66affSColin Finck pos += readlen; 2473c2c66affSColin Finck 2474c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes; 2475c2c66affSColin Finck } 2476c2c66affSColin Finck } 2477c2c66affSColin Finck } 2478c2c66affSColin Finck 2479c2c66affSColin Finck MmUnlockPages(master_mdl); 2480c2c66affSColin Finck IoFreeMdl(master_mdl); 2481c2c66affSColin Finck 2482c2c66affSColin Finck ExFreePool(stripeoff); 2483c2c66affSColin Finck } 2484c2c66affSColin Finck 2485c2c66affSColin Finck context.address = addr; 2486c2c66affSColin Finck 2487c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 2488c2c66affSColin Finck if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) { 2489c2c66affSColin Finck context.stripes[i].status = ReadDataStatus_MissingDevice; 2490c2c66affSColin Finck context.stripes_left--; 2491c2c66affSColin Finck 2492c2c66affSColin Finck if (!devices[i] || !devices[i]->devobj) 2493c2c66affSColin Finck missing_devices++; 2494c2c66affSColin Finck } 2495c2c66affSColin Finck } 2496c2c66affSColin Finck 2497c2c66affSColin Finck if (missing_devices > allowed_missing) { 2498c2c66affSColin Finck ERR("not enough devices to service request (%u missing)\n", missing_devices); 2499c2c66affSColin Finck Status = STATUS_UNEXPECTED_IO_ERROR; 2500c2c66affSColin Finck goto exit; 2501c2c66affSColin Finck } 2502c2c66affSColin Finck 2503c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 2504c2c66affSColin Finck PIO_STACK_LOCATION IrpSp; 2505c2c66affSColin Finck 2506c2c66affSColin Finck if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) { 2507c2c66affSColin Finck context.stripes[i].context = (struct read_data_context*)&context; 2508c2c66affSColin Finck 2509c2c66affSColin Finck if (type == BLOCK_FLAG_RAID10) { 2510c2c66affSColin Finck context.stripes[i].stripenum = i / ci->sub_stripes; 2511c2c66affSColin Finck } 2512c2c66affSColin Finck 2513c2c66affSColin Finck if (!Irp) { 2514c2c66affSColin Finck context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE); 2515c2c66affSColin Finck 2516c2c66affSColin Finck if (!context.stripes[i].Irp) { 2517c2c66affSColin Finck ERR("IoAllocateIrp failed\n"); 2518c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2519c2c66affSColin Finck goto exit; 2520c2c66affSColin Finck } 2521c2c66affSColin Finck } else { 2522c2c66affSColin Finck context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize); 2523c2c66affSColin Finck 2524c2c66affSColin Finck if (!context.stripes[i].Irp) { 2525c2c66affSColin Finck ERR("IoMakeAssociatedIrp failed\n"); 2526c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2527c2c66affSColin Finck goto exit; 2528c2c66affSColin Finck } 2529c2c66affSColin Finck } 2530c2c66affSColin Finck 2531c2c66affSColin Finck IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp); 2532c2c66affSColin Finck IrpSp->MajorFunction = IRP_MJ_READ; 2533c2c66affSColin Finck 2534c2c66affSColin Finck if (devices[i]->devobj->Flags & DO_BUFFERED_IO) { 2535c2c66affSColin Finck context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG); 2536c2c66affSColin Finck if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) { 2537c2c66affSColin Finck ERR("out of memory\n"); 2538c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2539c2c66affSColin Finck goto exit; 2540c2c66affSColin Finck } 2541c2c66affSColin Finck 2542c2c66affSColin Finck context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION; 2543c2c66affSColin Finck 2544c2c66affSColin Finck context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2545c2c66affSColin Finck } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) 2546c2c66affSColin Finck context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl; 2547c2c66affSColin Finck else 2548c2c66affSColin Finck context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority); 2549c2c66affSColin Finck 2550c2c66affSColin Finck IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart); 2551c2c66affSColin Finck IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset; 2552c2c66affSColin Finck 2553c2c66affSColin Finck total_reading += IrpSp->Parameters.Read.Length; 2554c2c66affSColin Finck 2555c2c66affSColin Finck context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb; 2556c2c66affSColin Finck 2557c2c66affSColin Finck IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], TRUE, TRUE, TRUE); 2558c2c66affSColin Finck 2559c2c66affSColin Finck context.stripes[i].status = ReadDataStatus_Pending; 2560c2c66affSColin Finck } 2561c2c66affSColin Finck } 2562c2c66affSColin Finck 2563c2c66affSColin Finck #ifdef DEBUG_STATS 2564c2c66affSColin Finck if (!is_tree) 2565c2c66affSColin Finck time1 = KeQueryPerformanceCounter(NULL); 2566c2c66affSColin Finck #endif 2567c2c66affSColin Finck 2568c2c66affSColin Finck need_to_wait = FALSE; 2569c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 2570c2c66affSColin Finck if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) { 2571c2c66affSColin Finck IoCallDriver(devices[i]->devobj, context.stripes[i].Irp); 2572c2c66affSColin Finck need_to_wait = TRUE; 2573c2c66affSColin Finck } 2574c2c66affSColin Finck } 2575c2c66affSColin Finck 2576c2c66affSColin Finck if (need_to_wait) 2577c2c66affSColin Finck KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL); 2578c2c66affSColin Finck 2579c2c66affSColin Finck #ifdef DEBUG_STATS 2580c2c66affSColin Finck if (!is_tree) { 2581c2c66affSColin Finck time2 = KeQueryPerformanceCounter(NULL); 2582c2c66affSColin Finck 2583c2c66affSColin Finck Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart; 2584c2c66affSColin Finck } 2585c2c66affSColin Finck #endif 2586c2c66affSColin Finck 2587c2c66affSColin Finck if (diskacc) 2588c2c66affSColin Finck fFsRtlUpdateDiskCounters(total_reading, 0); 2589c2c66affSColin Finck 2590c2c66affSColin Finck // check if any of the devices return a "user-induced" error 2591c2c66affSColin Finck 2592c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 2593c2c66affSColin Finck if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) { 2594c2c66affSColin Finck Status = context.stripes[i].iosb.Status; 2595c2c66affSColin Finck goto exit; 2596c2c66affSColin Finck } 2597c2c66affSColin Finck } 2598c2c66affSColin Finck 2599c2c66affSColin Finck if (type == BLOCK_FLAG_RAID0) { 2600c2c66affSColin Finck Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2601c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 2602c2c66affSColin Finck ERR("read_data_raid0 returned %08x\n", Status); 2603c2c66affSColin Finck 2604c2c66affSColin Finck if (file_read) 2605c2c66affSColin Finck ExFreePool(context.va); 2606c2c66affSColin Finck 2607c2c66affSColin Finck goto exit; 2608c2c66affSColin Finck } 2609c2c66affSColin Finck 2610c2c66affSColin Finck if (file_read) { 2611c2c66affSColin Finck RtlCopyMemory(buf, context.va, length); 2612c2c66affSColin Finck ExFreePool(context.va); 2613c2c66affSColin Finck } 2614c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID10) { 2615c2c66affSColin Finck Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset); 2616c2c66affSColin Finck 2617c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 2618c2c66affSColin Finck ERR("read_data_raid10 returned %08x\n", Status); 2619c2c66affSColin Finck 2620c2c66affSColin Finck if (file_read) 2621c2c66affSColin Finck ExFreePool(context.va); 2622c2c66affSColin Finck 2623c2c66affSColin Finck goto exit; 2624c2c66affSColin Finck } 2625c2c66affSColin Finck 2626c2c66affSColin Finck if (file_read) { 2627c2c66affSColin Finck RtlCopyMemory(buf, context.va, length); 2628c2c66affSColin Finck ExFreePool(context.va); 2629c2c66affSColin Finck } 2630c2c66affSColin Finck } else if (type == BLOCK_FLAG_DUPLICATE) { 2631c2c66affSColin Finck Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation); 2632c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 2633c2c66affSColin Finck ERR("read_data_dup returned %08x\n", Status); 2634c2c66affSColin Finck 2635c2c66affSColin Finck if (file_read) 2636c2c66affSColin Finck ExFreePool(context.va); 2637c2c66affSColin Finck 2638c2c66affSColin Finck goto exit; 2639c2c66affSColin Finck } 2640c2c66affSColin Finck 2641c2c66affSColin Finck if (file_read) { 2642c2c66affSColin Finck RtlCopyMemory(buf, context.va, length); 2643c2c66affSColin Finck ExFreePool(context.va); 2644c2c66affSColin Finck } 2645c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID5) { 2646c2c66affSColin Finck Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE); 2647c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 2648c2c66affSColin Finck ERR("read_data_raid5 returned %08x\n", Status); 2649c2c66affSColin Finck 2650c2c66affSColin Finck if (file_read) 2651c2c66affSColin Finck ExFreePool(context.va); 2652c2c66affSColin Finck 2653c2c66affSColin Finck goto exit; 2654c2c66affSColin Finck } 2655c2c66affSColin Finck 2656c2c66affSColin Finck if (file_read) { 2657c2c66affSColin Finck RtlCopyMemory(buf, context.va, length); 2658c2c66affSColin Finck ExFreePool(context.va); 2659c2c66affSColin Finck } 2660c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID6) { 2661c2c66affSColin Finck Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE); 2662c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 2663c2c66affSColin Finck ERR("read_data_raid6 returned %08x\n", Status); 2664c2c66affSColin Finck 2665c2c66affSColin Finck if (file_read) 2666c2c66affSColin Finck ExFreePool(context.va); 2667c2c66affSColin Finck 2668c2c66affSColin Finck goto exit; 2669c2c66affSColin Finck } 2670c2c66affSColin Finck 2671c2c66affSColin Finck if (file_read) { 2672c2c66affSColin Finck RtlCopyMemory(buf, context.va, length); 2673c2c66affSColin Finck ExFreePool(context.va); 2674c2c66affSColin Finck } 2675c2c66affSColin Finck } 2676c2c66affSColin Finck 2677c2c66affSColin Finck exit: 2678c2c66affSColin Finck if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) 2679c2c66affSColin Finck chunk_unlock_range(Vcb, c, lockaddr, locklen); 2680c2c66affSColin Finck 2681c2c66affSColin Finck if (dummy_mdl) 2682c2c66affSColin Finck IoFreeMdl(dummy_mdl); 2683c2c66affSColin Finck 2684c2c66affSColin Finck if (dummypage) 2685c2c66affSColin Finck ExFreePool(dummypage); 2686c2c66affSColin Finck 2687c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) { 2688c2c66affSColin Finck if (context.stripes[i].mdl) { 2689c2c66affSColin Finck if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED) 2690c2c66affSColin Finck MmUnlockPages(context.stripes[i].mdl); 2691c2c66affSColin Finck 2692c2c66affSColin Finck IoFreeMdl(context.stripes[i].mdl); 2693c2c66affSColin Finck } 2694c2c66affSColin Finck 2695c2c66affSColin Finck if (context.stripes[i].Irp) 2696c2c66affSColin Finck IoFreeIrp(context.stripes[i].Irp); 2697c2c66affSColin Finck } 2698c2c66affSColin Finck 2699c2c66affSColin Finck ExFreePool(context.stripes); 2700c2c66affSColin Finck 2701c2c66affSColin Finck if (!Vcb->log_to_phys_loaded) 2702c2c66affSColin Finck ExFreePool(devices); 2703c2c66affSColin Finck 2704c2c66affSColin Finck return Status; 2705c2c66affSColin Finck } 2706c2c66affSColin Finck 2707c2c66affSColin Finck NTSTATUS read_stream(fcb* fcb, UINT8* data, UINT64 start, ULONG length, ULONG* pbr) { 2708c2c66affSColin Finck ULONG readlen; 2709c2c66affSColin Finck 2710c2c66affSColin Finck TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr); 2711c2c66affSColin Finck 2712c2c66affSColin Finck if (pbr) *pbr = 0; 2713c2c66affSColin Finck 2714c2c66affSColin Finck if (start >= fcb->adsdata.Length) { 2715c2c66affSColin Finck TRACE("tried to read beyond end of stream\n"); 2716c2c66affSColin Finck return STATUS_END_OF_FILE; 2717c2c66affSColin Finck } 2718c2c66affSColin Finck 2719c2c66affSColin Finck if (length == 0) { 2720c2c66affSColin Finck WARN("tried to read zero bytes\n"); 2721c2c66affSColin Finck return STATUS_SUCCESS; 2722c2c66affSColin Finck } 2723c2c66affSColin Finck 2724c2c66affSColin Finck if (start + length < fcb->adsdata.Length) 2725c2c66affSColin Finck readlen = length; 2726c2c66affSColin Finck else 2727c2c66affSColin Finck readlen = fcb->adsdata.Length - (ULONG)start; 2728c2c66affSColin Finck 2729c2c66affSColin Finck if (readlen > 0) 2730c2c66affSColin Finck RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen); 2731c2c66affSColin Finck 2732c2c66affSColin Finck if (pbr) *pbr = readlen; 2733c2c66affSColin Finck 2734c2c66affSColin Finck return STATUS_SUCCESS; 2735c2c66affSColin Finck } 2736c2c66affSColin Finck 2737c2c66affSColin Finck NTSTATUS read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp) { 2738c2c66affSColin Finck NTSTATUS Status; 2739c2c66affSColin Finck EXTENT_DATA* ed; 2740c2c66affSColin Finck UINT32 bytes_read = 0; 2741c2c66affSColin Finck UINT64 last_end; 2742c2c66affSColin Finck LIST_ENTRY* le; 2743c2c66affSColin Finck #ifdef DEBUG_STATS 2744c2c66affSColin Finck LARGE_INTEGER time1, time2; 2745c2c66affSColin Finck #endif 2746c2c66affSColin Finck 2747c2c66affSColin Finck TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr); 2748c2c66affSColin Finck 2749c2c66affSColin Finck if (pbr) 2750c2c66affSColin Finck *pbr = 0; 2751c2c66affSColin Finck 2752c2c66affSColin Finck if (start >= fcb->inode_item.st_size) { 2753c2c66affSColin Finck WARN("Tried to read beyond end of file\n"); 2754c2c66affSColin Finck Status = STATUS_END_OF_FILE; 2755c2c66affSColin Finck goto exit; 2756c2c66affSColin Finck } 2757c2c66affSColin Finck 2758c2c66affSColin Finck #ifdef DEBUG_STATS 2759c2c66affSColin Finck time1 = KeQueryPerformanceCounter(NULL); 2760c2c66affSColin Finck #endif 2761c2c66affSColin Finck 2762c2c66affSColin Finck le = fcb->extents.Flink; 2763c2c66affSColin Finck 2764c2c66affSColin Finck last_end = start; 2765c2c66affSColin Finck 2766c2c66affSColin Finck while (le != &fcb->extents) { 2767c2c66affSColin Finck UINT64 len; 2768c2c66affSColin Finck extent* ext = CONTAINING_RECORD(le, extent, list_entry); 2769c2c66affSColin Finck EXTENT_DATA2* ed2; 2770c2c66affSColin Finck 2771c2c66affSColin Finck if (!ext->ignore) { 2772c2c66affSColin Finck ed = &ext->extent_data; 2773c2c66affSColin Finck 2774c2c66affSColin Finck ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL; 2775c2c66affSColin Finck 2776c2c66affSColin Finck len = ed2 ? ed2->num_bytes : ed->decoded_size; 2777c2c66affSColin Finck 2778c2c66affSColin Finck if (ext->offset + len <= start) { 2779c2c66affSColin Finck last_end = ext->offset + len; 2780c2c66affSColin Finck goto nextitem; 2781c2c66affSColin Finck } 2782c2c66affSColin Finck 2783c2c66affSColin Finck if (ext->offset > last_end && ext->offset > start + bytes_read) { 2784c2c66affSColin Finck UINT32 read = (UINT32)min(length, ext->offset - max(start, last_end)); 2785c2c66affSColin Finck 2786c2c66affSColin Finck RtlZeroMemory(data + bytes_read, read); 2787c2c66affSColin Finck bytes_read += read; 2788c2c66affSColin Finck length -= read; 2789c2c66affSColin Finck } 2790c2c66affSColin Finck 2791c2c66affSColin Finck if (length == 0 || ext->offset > start + bytes_read + length) 2792c2c66affSColin Finck break; 2793c2c66affSColin Finck 2794c2c66affSColin Finck if (ed->encryption != BTRFS_ENCRYPTION_NONE) { 2795c2c66affSColin Finck WARN("Encryption not supported\n"); 2796c2c66affSColin Finck Status = STATUS_NOT_IMPLEMENTED; 2797c2c66affSColin Finck goto exit; 2798c2c66affSColin Finck } 2799c2c66affSColin Finck 2800c2c66affSColin Finck if (ed->encoding != BTRFS_ENCODING_NONE) { 2801c2c66affSColin Finck WARN("Other encodings not supported\n"); 2802c2c66affSColin Finck Status = STATUS_NOT_IMPLEMENTED; 2803c2c66affSColin Finck goto exit; 2804c2c66affSColin Finck } 2805c2c66affSColin Finck 2806c2c66affSColin Finck switch (ed->type) { 2807c2c66affSColin Finck case EXTENT_TYPE_INLINE: 2808c2c66affSColin Finck { 2809c2c66affSColin Finck UINT64 off = start + bytes_read - ext->offset; 2810c2c66affSColin Finck UINT32 read; 2811c2c66affSColin Finck 2812c2c66affSColin Finck if (ed->compression == BTRFS_COMPRESSION_NONE) { 2813c2c66affSColin Finck read = (UINT32)min(min(len, ext->datalen) - off, length); 2814c2c66affSColin Finck 2815c2c66affSColin Finck RtlCopyMemory(data + bytes_read, &ed->data[off], read); 2816*eb7fbc25SPierre Schweitzer } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) { 2817c2c66affSColin Finck UINT8* decomp; 2818c2c66affSColin Finck BOOL decomp_alloc; 2819c2c66affSColin Finck UINT16 inlen = ext->datalen - (UINT16)offsetof(EXTENT_DATA, data[0]); 2820c2c66affSColin Finck 2821c2c66affSColin Finck if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) { 2822c2c66affSColin Finck ERR("ed->decoded_size was invalid (%llx)\n", ed->decoded_size); 2823c2c66affSColin Finck Status = STATUS_INTERNAL_ERROR; 2824c2c66affSColin Finck goto exit; 2825c2c66affSColin Finck } 2826c2c66affSColin Finck 2827c2c66affSColin Finck read = (UINT32)min(ed->decoded_size - off, length); 2828c2c66affSColin Finck 2829c2c66affSColin Finck if (off > 0) { 2830c2c66affSColin Finck decomp = ExAllocatePoolWithTag(NonPagedPool, (UINT32)ed->decoded_size, ALLOC_TAG); 2831c2c66affSColin Finck if (!decomp) { 2832c2c66affSColin Finck ERR("out of memory\n"); 2833c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2834c2c66affSColin Finck goto exit; 2835c2c66affSColin Finck } 2836c2c66affSColin Finck 2837c2c66affSColin Finck decomp_alloc = TRUE; 2838c2c66affSColin Finck } else { 2839c2c66affSColin Finck decomp = data + bytes_read; 2840c2c66affSColin Finck decomp_alloc = FALSE; 2841c2c66affSColin Finck } 2842c2c66affSColin Finck 2843c2c66affSColin Finck if (ed->compression == BTRFS_COMPRESSION_ZLIB) { 2844c2c66affSColin Finck Status = zlib_decompress(ed->data, inlen, decomp, (UINT32)(read + off)); 2845c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 2846c2c66affSColin Finck ERR("zlib_decompress returned %08x\n", Status); 2847c2c66affSColin Finck if (decomp_alloc) ExFreePool(decomp); 2848c2c66affSColin Finck goto exit; 2849c2c66affSColin Finck } 2850c2c66affSColin Finck } else if (ed->compression == BTRFS_COMPRESSION_LZO) { 2851c2c66affSColin Finck if (inlen < sizeof(UINT32)) { 2852c2c66affSColin Finck ERR("extent data was truncated\n"); 2853c2c66affSColin Finck Status = STATUS_INTERNAL_ERROR; 2854c2c66affSColin Finck if (decomp_alloc) ExFreePool(decomp); 2855c2c66affSColin Finck goto exit; 2856c2c66affSColin Finck } else 2857c2c66affSColin Finck inlen -= sizeof(UINT32); 2858c2c66affSColin Finck 2859c2c66affSColin Finck Status = lzo_decompress(ed->data + sizeof(UINT32), inlen, decomp, (UINT32)(read + off), sizeof(UINT32)); 2860c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 2861c2c66affSColin Finck ERR("lzo_decompress returned %08x\n", Status); 2862c2c66affSColin Finck if (decomp_alloc) ExFreePool(decomp); 2863c2c66affSColin Finck goto exit; 2864c2c66affSColin Finck } 2865*eb7fbc25SPierre Schweitzer } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) { 2866*eb7fbc25SPierre Schweitzer Status = zstd_decompress(ed->data, inlen, decomp, (UINT32)(read + off)); 2867*eb7fbc25SPierre Schweitzer if (!NT_SUCCESS(Status)) { 2868*eb7fbc25SPierre Schweitzer ERR("zstd_decompress returned %08x\n", Status); 2869*eb7fbc25SPierre Schweitzer if (decomp_alloc) ExFreePool(decomp); 2870*eb7fbc25SPierre Schweitzer goto exit; 2871*eb7fbc25SPierre Schweitzer } 2872c2c66affSColin Finck } 2873c2c66affSColin Finck 2874c2c66affSColin Finck if (decomp_alloc) { 2875c2c66affSColin Finck RtlCopyMemory(data + bytes_read, decomp + off, read); 2876c2c66affSColin Finck ExFreePool(decomp); 2877c2c66affSColin Finck } 2878c2c66affSColin Finck } else { 2879c2c66affSColin Finck ERR("unhandled compression type %x\n", ed->compression); 2880c2c66affSColin Finck Status = STATUS_NOT_IMPLEMENTED; 2881c2c66affSColin Finck goto exit; 2882c2c66affSColin Finck } 2883c2c66affSColin Finck 2884c2c66affSColin Finck bytes_read += read; 2885c2c66affSColin Finck length -= read; 2886c2c66affSColin Finck 2887c2c66affSColin Finck break; 2888c2c66affSColin Finck } 2889c2c66affSColin Finck 2890c2c66affSColin Finck case EXTENT_TYPE_REGULAR: 2891c2c66affSColin Finck { 2892c2c66affSColin Finck UINT64 off = start + bytes_read - ext->offset; 2893c2c66affSColin Finck UINT32 to_read, read; 2894c2c66affSColin Finck UINT8* buf; 2895c2c66affSColin Finck BOOL mdl = (Irp && Irp->MdlAddress) ? TRUE : FALSE; 2896c2c66affSColin Finck BOOL buf_free; 2897c2c66affSColin Finck UINT32 bumpoff = 0, *csum; 2898c2c66affSColin Finck UINT64 addr; 2899c2c66affSColin Finck chunk* c; 2900c2c66affSColin Finck 2901c2c66affSColin Finck read = (UINT32)(len - off); 2902c2c66affSColin Finck if (read > length) read = (UINT32)length; 2903c2c66affSColin Finck 2904c2c66affSColin Finck if (ed->compression == BTRFS_COMPRESSION_NONE) { 2905c2c66affSColin Finck addr = ed2->address + ed2->offset + off; 2906c2c66affSColin Finck to_read = (UINT32)sector_align(read, fcb->Vcb->superblock.sector_size); 2907c2c66affSColin Finck 2908c2c66affSColin Finck if (addr % fcb->Vcb->superblock.sector_size > 0) { 2909c2c66affSColin Finck bumpoff = addr % fcb->Vcb->superblock.sector_size; 2910c2c66affSColin Finck addr -= bumpoff; 2911c2c66affSColin Finck to_read = (UINT32)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size); 2912c2c66affSColin Finck } 2913c2c66affSColin Finck } else { 2914c2c66affSColin Finck addr = ed2->address; 2915c2c66affSColin Finck to_read = (UINT32)sector_align(ed2->size, fcb->Vcb->superblock.sector_size); 2916c2c66affSColin Finck } 2917c2c66affSColin Finck 2918c2c66affSColin Finck if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 && 2919c2c66affSColin Finck length % fcb->Vcb->superblock.sector_size == 0) { 2920c2c66affSColin Finck buf = data + bytes_read; 2921c2c66affSColin Finck buf_free = FALSE; 2922c2c66affSColin Finck } else { 2923c2c66affSColin Finck buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG); 2924c2c66affSColin Finck buf_free = TRUE; 2925c2c66affSColin Finck 2926c2c66affSColin Finck if (!buf) { 2927c2c66affSColin Finck ERR("out of memory\n"); 2928c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 2929c2c66affSColin Finck goto exit; 2930c2c66affSColin Finck } 2931c2c66affSColin Finck 2932c2c66affSColin Finck mdl = FALSE; 2933c2c66affSColin Finck } 2934c2c66affSColin Finck 2935c2c66affSColin Finck c = get_chunk_from_address(fcb->Vcb, addr); 2936c2c66affSColin Finck 2937c2c66affSColin Finck if (!c) { 2938c2c66affSColin Finck ERR("get_chunk_from_address(%llx) failed\n", addr); 2939c2c66affSColin Finck 2940c2c66affSColin Finck if (buf_free) 2941c2c66affSColin Finck ExFreePool(buf); 2942c2c66affSColin Finck 2943c2c66affSColin Finck goto exit; 2944c2c66affSColin Finck } 2945c2c66affSColin Finck 2946c2c66affSColin Finck if (ext->csum) { 2947c2c66affSColin Finck if (ed->compression == BTRFS_COMPRESSION_NONE) 2948c2c66affSColin Finck csum = &ext->csum[off / fcb->Vcb->superblock.sector_size]; 2949c2c66affSColin Finck else 2950c2c66affSColin Finck csum = ext->csum; 2951c2c66affSColin Finck } else 2952c2c66affSColin Finck csum = NULL; 2953c2c66affSColin Finck 2954c2c66affSColin Finck Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp, 0, mdl, 2955c2c66affSColin Finck fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 2956c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 2957c2c66affSColin Finck ERR("read_data returned %08x\n", Status); 2958c2c66affSColin Finck 2959c2c66affSColin Finck if (buf_free) 2960c2c66affSColin Finck ExFreePool(buf); 2961c2c66affSColin Finck 2962c2c66affSColin Finck goto exit; 2963c2c66affSColin Finck } 2964c2c66affSColin Finck 2965c2c66affSColin Finck if (ed->compression == BTRFS_COMPRESSION_NONE) { 2966c2c66affSColin Finck if (buf_free) 2967c2c66affSColin Finck RtlCopyMemory(data + bytes_read, buf + bumpoff, read); 2968c2c66affSColin Finck } else { 2969c2c66affSColin Finck UINT8 *decomp = NULL, *buf2; 2970c2c66affSColin Finck ULONG outlen, inlen, off2; 2971c2c66affSColin Finck UINT32 inpageoff = 0; 2972c2c66affSColin Finck 2973c2c66affSColin Finck off2 = (ULONG)(ed2->offset + off); 2974c2c66affSColin Finck buf2 = buf; 2975c2c66affSColin Finck inlen = (ULONG)ed2->size; 2976c2c66affSColin Finck 2977c2c66affSColin Finck if (ed->compression == BTRFS_COMPRESSION_LZO) { 2978c2c66affSColin Finck ULONG inoff = sizeof(UINT32); 2979c2c66affSColin Finck 2980c2c66affSColin Finck inlen -= sizeof(UINT32); 2981c2c66affSColin Finck 2982c2c66affSColin Finck // If reading a few sectors in, skip to the interesting bit 2983c2c66affSColin Finck while (off2 > LINUX_PAGE_SIZE) { 2984c2c66affSColin Finck UINT32 partlen; 2985c2c66affSColin Finck 2986c2c66affSColin Finck if (inlen < sizeof(UINT32)) 2987c2c66affSColin Finck break; 2988c2c66affSColin Finck 2989c2c66affSColin Finck partlen = *(UINT32*)(buf2 + inoff); 2990c2c66affSColin Finck 2991c2c66affSColin Finck if (partlen < inlen) { 2992c2c66affSColin Finck off2 -= LINUX_PAGE_SIZE; 2993c2c66affSColin Finck inoff += partlen + sizeof(UINT32); 2994c2c66affSColin Finck inlen -= partlen + sizeof(UINT32); 2995c2c66affSColin Finck 2996c2c66affSColin Finck if (LINUX_PAGE_SIZE - (inoff % LINUX_PAGE_SIZE) < sizeof(UINT32)) 2997c2c66affSColin Finck inoff = ((inoff / LINUX_PAGE_SIZE) + 1) * LINUX_PAGE_SIZE; 2998c2c66affSColin Finck } else 2999c2c66affSColin Finck break; 3000c2c66affSColin Finck } 3001c2c66affSColin Finck 3002c2c66affSColin Finck buf2 = &buf2[inoff]; 3003c2c66affSColin Finck inpageoff = inoff % LINUX_PAGE_SIZE; 3004c2c66affSColin Finck } 3005c2c66affSColin Finck 3006c2c66affSColin Finck if (off2 != 0) { 3007c2c66affSColin Finck outlen = off2 + min(read, (UINT32)(ed2->num_bytes - off)); 3008c2c66affSColin Finck 3009c2c66affSColin Finck decomp = ExAllocatePoolWithTag(PagedPool, outlen, ALLOC_TAG); 3010c2c66affSColin Finck if (!decomp) { 3011c2c66affSColin Finck ERR("out of memory\n"); 3012c2c66affSColin Finck ExFreePool(buf); 3013c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES; 3014c2c66affSColin Finck goto exit; 3015c2c66affSColin Finck } 3016c2c66affSColin Finck } else 3017c2c66affSColin Finck outlen = min(read, (UINT32)(ed2->num_bytes - off)); 3018c2c66affSColin Finck 3019c2c66affSColin Finck if (ed->compression == BTRFS_COMPRESSION_ZLIB) { 3020c2c66affSColin Finck Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen); 3021c2c66affSColin Finck 3022c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 3023c2c66affSColin Finck ERR("zlib_decompress returned %08x\n", Status); 3024c2c66affSColin Finck ExFreePool(buf); 3025c2c66affSColin Finck 3026c2c66affSColin Finck if (decomp) 3027c2c66affSColin Finck ExFreePool(decomp); 3028c2c66affSColin Finck 3029c2c66affSColin Finck goto exit; 3030c2c66affSColin Finck } 3031c2c66affSColin Finck } else if (ed->compression == BTRFS_COMPRESSION_LZO) { 3032c2c66affSColin Finck Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff); 3033c2c66affSColin Finck 3034c2c66affSColin Finck if (!NT_SUCCESS(Status)) { 3035c2c66affSColin Finck ERR("lzo_decompress returned %08x\n", Status); 3036c2c66affSColin Finck ExFreePool(buf); 3037c2c66affSColin Finck 3038c2c66affSColin Finck if (decomp) 3039c2c66affSColin Finck ExFreePool(decomp); 3040c2c66affSColin Finck 3041c2c66affSColin Finck goto exit; 3042c2c66affSColin Finck } 3043*eb7fbc25SPierre Schweitzer } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) { 3044*eb7fbc25SPierre Schweitzer Status = zstd_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen); 3045*eb7fbc25SPierre Schweitzer 3046*eb7fbc25SPierre Schweitzer if (!NT_SUCCESS(Status)) { 3047*eb7fbc25SPierre Schweitzer ERR("zstd_decompress returned %08x\n", Status); 3048*eb7fbc25SPierre Schweitzer ExFreePool(buf); 3049*eb7fbc25SPierre Schweitzer 3050*eb7fbc25SPierre Schweitzer if (decomp) 3051*eb7fbc25SPierre Schweitzer ExFreePool(decomp); 3052*eb7fbc25SPierre Schweitzer 3053*eb7fbc25SPierre Schweitzer goto exit; 3054*eb7fbc25SPierre Schweitzer } 3055c2c66affSColin Finck } else { 3056c2c66affSColin Finck ERR("unsupported compression type %x\n", ed->compression); 3057c2c66affSColin Finck Status = STATUS_NOT_SUPPORTED; 3058c2c66affSColin Finck 3059c2c66affSColin Finck ExFreePool(buf); 3060c2c66affSColin Finck 3061c2c66affSColin Finck if (decomp) 3062c2c66affSColin Finck ExFreePool(decomp); 3063c2c66affSColin Finck 3064c2c66affSColin Finck goto exit; 3065c2c66affSColin Finck } 3066c2c66affSColin Finck 3067c2c66affSColin Finck if (decomp) { 3068c2c66affSColin Finck RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off)); 3069c2c66affSColin Finck ExFreePool(decomp); 3070c2c66affSColin Finck } 3071c2c66affSColin Finck } 3072c2c66affSColin Finck 3073c2c66affSColin Finck if (buf_free) 3074c2c66affSColin Finck ExFreePool(buf); 3075c2c66affSColin Finck 3076c2c66affSColin Finck bytes_read += read; 3077c2c66affSColin Finck length -= read; 3078c2c66affSColin Finck 3079c2c66affSColin Finck break; 3080c2c66affSColin Finck } 3081c2c66affSColin Finck 3082c2c66affSColin Finck case EXTENT_TYPE_PREALLOC: 3083c2c66affSColin Finck { 3084c2c66affSColin Finck UINT64 off = start + bytes_read - ext->offset; 3085c2c66affSColin Finck UINT32 read = (UINT32)(len - off); 3086c2c66affSColin Finck 3087c2c66affSColin Finck if (read > length) read = (UINT32)length; 3088c2c66affSColin Finck 3089c2c66affSColin Finck RtlZeroMemory(data + bytes_read, read); 3090c2c66affSColin Finck 3091c2c66affSColin Finck bytes_read += read; 3092c2c66affSColin Finck length -= read; 3093c2c66affSColin Finck 3094c2c66affSColin Finck break; 3095c2c66affSColin Finck } 3096c2c66affSColin Finck 3097c2c66affSColin Finck default: 3098c2c66affSColin Finck WARN("Unsupported extent data type %u\n", ed->type); 3099c2c66affSColin Finck Status = STATUS_NOT_IMPLEMENTED; 3100c2c66affSColin Finck goto exit; 3101c2c66affSColin Finck } 3102c2c66affSColin Finck 3103c2c66affSColin Finck last_end = ext->offset + len; 3104c2c66affSColin Finck 3105c2c66affSColin Finck if (length == 0) 3106c2c66affSColin Finck break; 3107c2c66affSColin Finck } 3108c2c66affSColin Finck 3109c2c66affSColin Finck nextitem: 3110c2c66affSColin Finck le = le->Flink; 3111c2c66affSColin Finck } 3112c2c66affSColin Finck 3113c2c66affSColin Finck if (length > 0 && start + bytes_read < fcb->inode_item.st_size) { 3114c2c66affSColin Finck UINT32 read = (UINT32)min(fcb->inode_item.st_size - start - bytes_read, length); 3115c2c66affSColin Finck 3116c2c66affSColin Finck RtlZeroMemory(data + bytes_read, read); 3117c2c66affSColin Finck 3118c2c66affSColin Finck bytes_read += read; 3119c2c66affSColin Finck length -= read; 3120c2c66affSColin Finck } 3121c2c66affSColin Finck 3122c2c66affSColin Finck Status = STATUS_SUCCESS; 3123c2c66affSColin Finck if (pbr) 3124c2c66affSColin Finck *pbr = bytes_read; 3125c2c66affSColin Finck 3126c2c66affSColin Finck #ifdef DEBUG_STATS 3127c2c66affSColin Finck time2 = KeQueryPerformanceCounter(NULL); 3128c2c66affSColin Finck 3129c2c66affSColin Finck fcb->Vcb->stats.num_reads++; 3130c2c66affSColin Finck fcb->Vcb->stats.data_read += bytes_read; 3131c2c66affSColin Finck fcb->Vcb->stats.read_total_time += time2.QuadPart - time1.QuadPart; 3132c2c66affSColin Finck #endif 3133c2c66affSColin Finck 3134c2c66affSColin Finck exit: 3135c2c66affSColin Finck return Status; 3136c2c66affSColin Finck } 3137c2c66affSColin Finck 3138c2c66affSColin Finck NTSTATUS do_read(PIRP Irp, BOOLEAN wait, ULONG* bytes_read) { 3139c2c66affSColin Finck PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3140c2c66affSColin Finck PFILE_OBJECT FileObject = IrpSp->FileObject; 3141c2c66affSColin Finck fcb* fcb = FileObject->FsContext; 3142c2c66affSColin Finck UINT8* data = NULL; 3143c2c66affSColin Finck ULONG length = IrpSp->Parameters.Read.Length, addon = 0; 3144c2c66affSColin Finck UINT64 start = IrpSp->Parameters.Read.ByteOffset.QuadPart; 3145c2c66affSColin Finck 3146c2c66affSColin Finck *bytes_read = 0; 3147c2c66affSColin Finck 3148c2c66affSColin Finck if (!fcb || !fcb->Vcb || !fcb->subvol) 3149c2c66affSColin Finck return STATUS_INTERNAL_ERROR; 3150c2c66affSColin Finck 3151c2c66affSColin Finck TRACE("file = %S (fcb = %p)\n", file_desc(FileObject), fcb); 3152c2c66affSColin Finck TRACE("offset = %llx, length = %x\n", start, length); 3153c2c66affSColin Finck TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE", Irp->Flags & IRP_NOCACHE ? "TRUE" : "FALSE"); 3154c2c66affSColin Finck 3155c2c66affSColin Finck if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY) 3156c2c66affSColin Finck return STATUS_INVALID_DEVICE_REQUEST; 3157c2c66affSColin Finck 3158c2c66affSColin Finck if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) { 3159c2c66affSColin Finck WARN("tried to read locked region\n"); 3160c2c66affSColin Finck return STATUS_FILE_LOCK_CONFLICT; 3161c2c66affSColin Finck } 3162c2c66affSColin Finck 3163c2c66affSColin Finck if (length == 0) { 3164c2c66affSColin Finck TRACE("tried to read zero bytes\n"); 3165c2c66affSColin Finck return STATUS_SUCCESS; 3166c2c66affSColin Finck } 3167c2c66affSColin Finck 3168c2c66affSColin Finck if (start >= (UINT64)fcb->Header.FileSize.QuadPart) { 3169c2c66affSColin Finck TRACE("tried to read with offset after file end (%llx >= %llx)\n", start, fcb->Header.FileSize.QuadPart); 3170c2c66affSColin Finck return STATUS_END_OF_FILE; 3171c2c66affSColin Finck } 3172c2c66affSColin Finck 3173c2c66affSColin Finck TRACE("FileObject %p fcb %p FileSize = %llx st_size = %llx (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size); 3174c2c66affSColin Finck 3175c2c66affSColin Finck if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) { 3176c2c66affSColin Finck data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority); 3177c2c66affSColin Finck 3178c2c66affSColin Finck if (Irp->MdlAddress && !data) { 3179c2c66affSColin Finck ERR("MmGetSystemAddressForMdlSafe returned NULL\n"); 3180c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES; 3181c2c66affSColin Finck } 3182c2c66affSColin Finck 3183c2c66affSColin Finck if (start >= (UINT64)fcb->Header.ValidDataLength.QuadPart) { 3184c2c66affSColin Finck length = (ULONG)min(length, min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3185c2c66affSColin Finck RtlZeroMemory(data, length); 3186c2c66affSColin Finck Irp->IoStatus.Information = *bytes_read = length; 3187c2c66affSColin Finck return STATUS_SUCCESS; 3188c2c66affSColin Finck } 3189c2c66affSColin Finck 3190c2c66affSColin Finck if (length + start > (UINT64)fcb->Header.ValidDataLength.QuadPart) { 3191c2c66affSColin Finck addon = (ULONG)(min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart); 3192c2c66affSColin Finck RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon); 3193c2c66affSColin Finck length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start); 3194c2c66affSColin Finck } 3195c2c66affSColin Finck } 3196c2c66affSColin Finck 3197c2c66affSColin Finck if (!(Irp->Flags & IRP_NOCACHE)) { 3198c2c66affSColin Finck NTSTATUS Status = STATUS_SUCCESS; 3199c2c66affSColin Finck 3200c2c66affSColin Finck _SEH2_TRY { 3201c2c66affSColin Finck if (!FileObject->PrivateCacheMap) { 3202c2c66affSColin Finck CC_FILE_SIZES ccfs; 3203c2c66affSColin Finck 3204c2c66affSColin Finck ccfs.AllocationSize = fcb->Header.AllocationSize; 3205c2c66affSColin Finck ccfs.FileSize = fcb->Header.FileSize; 3206c2c66affSColin Finck ccfs.ValidDataLength = fcb->Header.ValidDataLength; 3207c2c66affSColin Finck 3208c2c66affSColin Finck init_file_cache(FileObject, &ccfs); 3209c2c66affSColin Finck } 3210c2c66affSColin Finck 3211c2c66affSColin Finck if (IrpSp->MinorFunction & IRP_MN_MDL) { 3212c2c66affSColin Finck CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus); 3213c2c66affSColin Finck } else { 3214c2c66affSColin Finck if (fCcCopyReadEx) { 3215c2c66affSColin Finck TRACE("CcCopyReadEx(%p, %llx, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, 3216c2c66affSColin Finck length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread); 3217c2c66affSColin Finck TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); 3218c2c66affSColin Finck if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) { 3219c2c66affSColin Finck TRACE("CcCopyReadEx could not wait\n"); 3220c2c66affSColin Finck 3221c2c66affSColin Finck IoMarkIrpPending(Irp); 3222c2c66affSColin Finck return STATUS_PENDING; 3223c2c66affSColin Finck } 3224c2c66affSColin Finck TRACE("CcCopyReadEx finished\n"); 3225c2c66affSColin Finck } else { 3226c2c66affSColin Finck TRACE("CcCopyRead(%p, %llx, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus); 3227c2c66affSColin Finck TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength); 3228c2c66affSColin Finck if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) { 3229c2c66affSColin Finck TRACE("CcCopyRead could not wait\n"); 3230c2c66affSColin Finck 3231c2c66affSColin Finck IoMarkIrpPending(Irp); 3232c2c66affSColin Finck return STATUS_PENDING; 3233c2c66affSColin Finck } 3234c2c66affSColin Finck TRACE("CcCopyRead finished\n"); 3235c2c66affSColin Finck } 3236c2c66affSColin Finck } 3237c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) { 3238c2c66affSColin Finck Status = _SEH2_GetExceptionCode(); 3239c2c66affSColin Finck } _SEH2_END; 3240c2c66affSColin Finck 3241c2c66affSColin Finck if (NT_SUCCESS(Status)) { 3242c2c66affSColin Finck Status = Irp->IoStatus.Status; 3243c2c66affSColin Finck Irp->IoStatus.Information += addon; 3244c2c66affSColin Finck *bytes_read = (ULONG)Irp->IoStatus.Information; 3245c2c66affSColin Finck } else 3246c2c66affSColin Finck ERR("EXCEPTION - %08x\n", Status); 3247c2c66affSColin Finck 3248c2c66affSColin Finck return Status; 3249c2c66affSColin Finck } else { 3250c2c66affSColin Finck NTSTATUS Status; 3251c2c66affSColin Finck 3252c2c66affSColin Finck if (!wait) { 3253c2c66affSColin Finck IoMarkIrpPending(Irp); 3254c2c66affSColin Finck return STATUS_PENDING; 3255c2c66affSColin Finck } 3256c2c66affSColin Finck 3257c2c66affSColin Finck if (fcb->ads) 3258c2c66affSColin Finck Status = read_stream(fcb, data, start, length, bytes_read); 3259c2c66affSColin Finck else 3260c2c66affSColin Finck Status = read_file(fcb, data, start, length, bytes_read, Irp); 3261c2c66affSColin Finck 3262c2c66affSColin Finck *bytes_read += addon; 3263c2c66affSColin Finck TRACE("read %u bytes\n", *bytes_read); 3264c2c66affSColin Finck 3265c2c66affSColin Finck Irp->IoStatus.Information = *bytes_read; 3266c2c66affSColin Finck 3267c2c66affSColin Finck if (diskacc && Status != STATUS_PENDING) { 3268c2c66affSColin Finck PETHREAD thread = NULL; 3269c2c66affSColin Finck 3270c2c66affSColin Finck if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread)) 3271c2c66affSColin Finck thread = Irp->Tail.Overlay.Thread; 3272c2c66affSColin Finck else if (!IoIsSystemThread(PsGetCurrentThread())) 3273c2c66affSColin Finck thread = PsGetCurrentThread(); 3274c2c66affSColin Finck else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp) 3275c2c66affSColin Finck thread = PsGetCurrentThread(); 3276c2c66affSColin Finck 3277c2c66affSColin Finck if (thread) 3278c2c66affSColin Finck fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0); 3279c2c66affSColin Finck } 3280c2c66affSColin Finck 3281c2c66affSColin Finck return Status; 3282c2c66affSColin Finck } 3283c2c66affSColin Finck } 3284c2c66affSColin Finck 3285c2c66affSColin Finck _Dispatch_type_(IRP_MJ_READ) 3286c2c66affSColin Finck _Function_class_(DRIVER_DISPATCH) 32876c75215bSMark Jansen NTSTATUS NTAPI drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) { 3288c2c66affSColin Finck device_extension* Vcb = DeviceObject->DeviceExtension; 3289c2c66affSColin Finck PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp); 3290c2c66affSColin Finck PFILE_OBJECT FileObject = IrpSp->FileObject; 3291c2c66affSColin Finck ULONG bytes_read = 0; 3292c2c66affSColin Finck NTSTATUS Status; 3293c2c66affSColin Finck BOOL top_level; 3294c2c66affSColin Finck fcb* fcb; 3295c2c66affSColin Finck ccb* ccb; 3296c2c66affSColin Finck BOOLEAN fcb_lock = FALSE, wait; 3297c2c66affSColin Finck 3298c2c66affSColin Finck FsRtlEnterFileSystem(); 3299c2c66affSColin Finck 3300c2c66affSColin Finck top_level = is_top_level(Irp); 3301c2c66affSColin Finck 3302c2c66affSColin Finck TRACE("read\n"); 3303c2c66affSColin Finck 3304c2c66affSColin Finck if (Vcb && Vcb->type == VCB_TYPE_VOLUME) { 3305c2c66affSColin Finck Status = vol_read(DeviceObject, Irp); 3306c2c66affSColin Finck goto exit2; 3307c2c66affSColin Finck } else if (!Vcb || Vcb->type != VCB_TYPE_FS) { 3308c2c66affSColin Finck Status = STATUS_INVALID_PARAMETER; 3309c2c66affSColin Finck goto end; 3310c2c66affSColin Finck } 3311c2c66affSColin Finck 3312c2c66affSColin Finck Irp->IoStatus.Information = 0; 3313c2c66affSColin Finck 3314c2c66affSColin Finck if (IrpSp->MinorFunction & IRP_MN_COMPLETE) { 3315c2c66affSColin Finck CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress); 3316c2c66affSColin Finck 3317c2c66affSColin Finck Irp->MdlAddress = NULL; 3318c2c66affSColin Finck Status = STATUS_SUCCESS; 3319c2c66affSColin Finck 3320c2c66affSColin Finck goto exit; 3321c2c66affSColin Finck } 3322c2c66affSColin Finck 3323c2c66affSColin Finck fcb = FileObject->FsContext; 3324c2c66affSColin Finck 3325c2c66affSColin Finck if (!fcb) { 3326c2c66affSColin Finck ERR("fcb was NULL\n"); 3327c2c66affSColin Finck Status = STATUS_INVALID_PARAMETER; 3328c2c66affSColin Finck goto exit; 3329c2c66affSColin Finck } 3330c2c66affSColin Finck 3331c2c66affSColin Finck ccb = FileObject->FsContext2; 3332c2c66affSColin Finck 3333c2c66affSColin Finck if (!ccb) { 3334c2c66affSColin Finck ERR("ccb was NULL\n"); 3335c2c66affSColin Finck Status = STATUS_INVALID_PARAMETER; 3336c2c66affSColin Finck goto exit; 3337c2c66affSColin Finck } 3338c2c66affSColin Finck 3339c2c66affSColin Finck if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) { 3340c2c66affSColin Finck WARN("insufficient privileges\n"); 3341c2c66affSColin Finck Status = STATUS_ACCESS_DENIED; 3342c2c66affSColin Finck goto exit; 3343c2c66affSColin Finck } 3344c2c66affSColin Finck 3345c2c66affSColin Finck if (fcb == Vcb->volume_fcb) { 3346c2c66affSColin Finck TRACE("reading volume FCB\n"); 3347c2c66affSColin Finck 3348c2c66affSColin Finck IoSkipCurrentIrpStackLocation(Irp); 3349c2c66affSColin Finck 3350c2c66affSColin Finck Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp); 3351c2c66affSColin Finck 3352c2c66affSColin Finck goto exit2; 3353c2c66affSColin Finck } 3354c2c66affSColin Finck 3355c2c66affSColin Finck wait = IoIsOperationSynchronous(Irp); 3356c2c66affSColin Finck 3357c2c66affSColin Finck // Don't offload jobs when doing paging IO - otherwise this can lead to 3358c2c66affSColin Finck // deadlocks in CcCopyRead. 3359c2c66affSColin Finck if (Irp->Flags & IRP_PAGING_IO) 3360c2c66affSColin Finck wait = TRUE; 3361c2c66affSColin Finck 33624672b2baSPierre Schweitzer if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer->DataSectionObject) { 33634672b2baSPierre Schweitzer IO_STATUS_BLOCK iosb; 33644672b2baSPierre Schweitzer 33654672b2baSPierre Schweitzer CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb); 33664672b2baSPierre Schweitzer if (!NT_SUCCESS(iosb.Status)) { 33674672b2baSPierre Schweitzer ERR("CcFlushCache returned %08x\n", iosb.Status); 33684672b2baSPierre Schweitzer return iosb.Status; 33694672b2baSPierre Schweitzer } 33704672b2baSPierre Schweitzer } 33714672b2baSPierre Schweitzer 3372c2c66affSColin Finck if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) { 3373c2c66affSColin Finck if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) { 3374c2c66affSColin Finck Status = STATUS_PENDING; 3375c2c66affSColin Finck IoMarkIrpPending(Irp); 3376c2c66affSColin Finck goto exit; 3377c2c66affSColin Finck } 3378c2c66affSColin Finck 3379c2c66affSColin Finck fcb_lock = TRUE; 3380c2c66affSColin Finck } 3381c2c66affSColin Finck 3382c2c66affSColin Finck Status = do_read(Irp, wait, &bytes_read); 3383c2c66affSColin Finck 3384c2c66affSColin Finck if (fcb_lock) 3385c2c66affSColin Finck ExReleaseResourceLite(fcb->Header.Resource); 3386c2c66affSColin Finck 3387c2c66affSColin Finck exit: 3388c2c66affSColin Finck if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO)) 3389c2c66affSColin Finck FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0); 3390c2c66affSColin Finck 3391c2c66affSColin Finck end: 3392c2c66affSColin Finck Irp->IoStatus.Status = Status; 3393c2c66affSColin Finck 3394c2c66affSColin Finck TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status); 3395c2c66affSColin Finck TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information); 3396c2c66affSColin Finck TRACE("returning %08x\n", Status); 3397c2c66affSColin Finck 3398c2c66affSColin Finck if (Status != STATUS_PENDING) 3399c2c66affSColin Finck IoCompleteRequest(Irp, IO_NO_INCREMENT); 3400c2c66affSColin Finck else { 3401c2c66affSColin Finck if (!add_thread_job(Vcb, Irp)) 3402c2c66affSColin Finck do_read_job(Irp); 3403c2c66affSColin Finck } 3404c2c66affSColin Finck 3405c2c66affSColin Finck exit2: 3406c2c66affSColin Finck if (top_level) 3407c2c66affSColin Finck IoSetTopLevelIrp(NULL); 3408c2c66affSColin Finck 3409c2c66affSColin Finck FsRtlExitFileSystem(); 3410c2c66affSColin Finck 3411c2c66affSColin Finck return Status; 3412c2c66affSColin Finck } 3413