xref: /reactos/drivers/filesystems/btrfs/read.c (revision eb7fbc25)
1c2c66affSColin Finck /* Copyright (c) Mark Harmstone 2016-17
2c2c66affSColin Finck  *
3c2c66affSColin Finck  * This file is part of WinBtrfs.
4c2c66affSColin Finck  *
5c2c66affSColin Finck  * WinBtrfs is free software: you can redistribute it and/or modify
6c2c66affSColin Finck  * it under the terms of the GNU Lesser General Public Licence as published by
7c2c66affSColin Finck  * the Free Software Foundation, either version 3 of the Licence, or
8c2c66affSColin Finck  * (at your option) any later version.
9c2c66affSColin Finck  *
10c2c66affSColin Finck  * WinBtrfs is distributed in the hope that it will be useful,
11c2c66affSColin Finck  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12c2c66affSColin Finck  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13c2c66affSColin Finck  * GNU Lesser General Public Licence for more details.
14c2c66affSColin Finck  *
15c2c66affSColin Finck  * You should have received a copy of the GNU Lesser General Public Licence
16c2c66affSColin Finck  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17c2c66affSColin Finck 
18c2c66affSColin Finck #include "btrfs_drv.h"
19c2c66affSColin Finck 
20c2c66affSColin Finck enum read_data_status {
21c2c66affSColin Finck     ReadDataStatus_Pending,
22c2c66affSColin Finck     ReadDataStatus_Success,
23c2c66affSColin Finck     ReadDataStatus_Error,
24c2c66affSColin Finck     ReadDataStatus_MissingDevice,
25c2c66affSColin Finck     ReadDataStatus_Skip
26c2c66affSColin Finck };
27c2c66affSColin Finck 
28c2c66affSColin Finck struct read_data_context;
29c2c66affSColin Finck 
30c2c66affSColin Finck typedef struct {
31c2c66affSColin Finck     struct read_data_context* context;
32c2c66affSColin Finck     UINT16 stripenum;
33c2c66affSColin Finck     BOOL rewrite;
34c2c66affSColin Finck     PIRP Irp;
35c2c66affSColin Finck     IO_STATUS_BLOCK iosb;
36c2c66affSColin Finck     enum read_data_status status;
37c2c66affSColin Finck     PMDL mdl;
38c2c66affSColin Finck     UINT64 stripestart;
39c2c66affSColin Finck     UINT64 stripeend;
40c2c66affSColin Finck } read_data_stripe;
41c2c66affSColin Finck 
42c2c66affSColin Finck typedef struct {
43c2c66affSColin Finck     KEVENT Event;
44c2c66affSColin Finck     NTSTATUS Status;
45c2c66affSColin Finck     chunk* c;
46c2c66affSColin Finck     UINT64 address;
47c2c66affSColin Finck     UINT32 buflen;
48c2c66affSColin Finck     LONG num_stripes, stripes_left;
49c2c66affSColin Finck     UINT64 type;
50c2c66affSColin Finck     UINT32 sector_size;
51c2c66affSColin Finck     UINT16 firstoff, startoffstripe, sectors_per_stripe;
52c2c66affSColin Finck     UINT32* csum;
53c2c66affSColin Finck     BOOL tree;
54c2c66affSColin Finck     read_data_stripe* stripes;
55c2c66affSColin Finck     UINT8* va;
56c2c66affSColin Finck } read_data_context;
57c2c66affSColin Finck 
58c2c66affSColin Finck extern BOOL diskacc;
59c2c66affSColin Finck extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
60c2c66affSColin Finck extern tCcCopyReadEx fCcCopyReadEx;
61c2c66affSColin Finck extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
62c2c66affSColin Finck 
63c2c66affSColin Finck #define LINUX_PAGE_SIZE 4096
64c2c66affSColin Finck 
65c2c66affSColin Finck _Function_class_(IO_COMPLETION_ROUTINE)
66c2c66affSColin Finck #ifdef __REACTOS__
67c2c66affSColin Finck static NTSTATUS NTAPI read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
68c2c66affSColin Finck #else
69c2c66affSColin Finck static NTSTATUS read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
70c2c66affSColin Finck #endif
71c2c66affSColin Finck     read_data_stripe* stripe = conptr;
72c2c66affSColin Finck     read_data_context* context = (read_data_context*)stripe->context;
73c2c66affSColin Finck 
74c2c66affSColin Finck     UNUSED(DeviceObject);
75c2c66affSColin Finck 
76c2c66affSColin Finck     stripe->iosb = Irp->IoStatus;
77c2c66affSColin Finck 
78c2c66affSColin Finck     if (NT_SUCCESS(Irp->IoStatus.Status))
79c2c66affSColin Finck         stripe->status = ReadDataStatus_Success;
80c2c66affSColin Finck     else
81c2c66affSColin Finck         stripe->status = ReadDataStatus_Error;
82c2c66affSColin Finck 
83c2c66affSColin Finck     if (InterlockedDecrement(&context->stripes_left) == 0)
84c2c66affSColin Finck         KeSetEvent(&context->Event, 0, FALSE);
85c2c66affSColin Finck 
86c2c66affSColin Finck     return STATUS_MORE_PROCESSING_REQUIRED;
87c2c66affSColin Finck }
88c2c66affSColin Finck 
89c2c66affSColin Finck NTSTATUS check_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) {
90c2c66affSColin Finck     NTSTATUS Status;
91c2c66affSColin Finck     calc_job* cj;
92c2c66affSColin Finck     UINT32* csum2;
93c2c66affSColin Finck 
94c2c66affSColin Finck     // From experimenting, it seems that 40 sectors is roughly the crossover
95c2c66affSColin Finck     // point where offloading the crc32 calculation becomes worth it.
96c2c66affSColin Finck 
97c2c66affSColin Finck     if (sectors < 40 || KeQueryActiveProcessorCount(NULL) < 2) {
98c2c66affSColin Finck         ULONG j;
99c2c66affSColin Finck 
100c2c66affSColin Finck         for (j = 0; j < sectors; j++) {
101c2c66affSColin Finck             UINT32 crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
102c2c66affSColin Finck 
103c2c66affSColin Finck             if (crc32 != csum[j]) {
104c2c66affSColin Finck                 return STATUS_CRC_ERROR;
105c2c66affSColin Finck             }
106c2c66affSColin Finck         }
107c2c66affSColin Finck 
108c2c66affSColin Finck         return STATUS_SUCCESS;
109c2c66affSColin Finck     }
110c2c66affSColin Finck 
111c2c66affSColin Finck     csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sectors, ALLOC_TAG);
112c2c66affSColin Finck     if (!csum2) {
113c2c66affSColin Finck         ERR("out of memory\n");
114c2c66affSColin Finck         return STATUS_INSUFFICIENT_RESOURCES;
115c2c66affSColin Finck     }
116c2c66affSColin Finck 
117c2c66affSColin Finck     Status = add_calc_job(Vcb, data, sectors, csum2, &cj);
118c2c66affSColin Finck     if (!NT_SUCCESS(Status)) {
119c2c66affSColin Finck         ERR("add_calc_job returned %08x\n", Status);
120c2c66affSColin Finck         ExFreePool(csum2);
121c2c66affSColin Finck         return Status;
122c2c66affSColin Finck     }
123c2c66affSColin Finck 
124c2c66affSColin Finck     KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL);
125c2c66affSColin Finck 
126c2c66affSColin Finck     if (RtlCompareMemory(csum2, csum, sectors * sizeof(UINT32)) != sectors * sizeof(UINT32)) {
127c2c66affSColin Finck         free_calc_job(cj);
128c2c66affSColin Finck         ExFreePool(csum2);
129c2c66affSColin Finck         return STATUS_CRC_ERROR;
130c2c66affSColin Finck     }
131c2c66affSColin Finck 
132c2c66affSColin Finck     free_calc_job(cj);
133c2c66affSColin Finck     ExFreePool(csum2);
134c2c66affSColin Finck 
135c2c66affSColin Finck     return STATUS_SUCCESS;
136c2c66affSColin Finck }
137c2c66affSColin Finck 
138c2c66affSColin Finck static NTSTATUS read_data_dup(device_extension* Vcb, UINT8* buf, UINT64 addr, read_data_context* context, CHUNK_ITEM* ci,
139c2c66affSColin Finck                               device** devices, UINT64 generation) {
140c2c66affSColin Finck     ULONG i;
141c2c66affSColin Finck     BOOL checksum_error = FALSE;
142c2c66affSColin Finck     UINT16 j, stripe = 0;
143c2c66affSColin Finck     NTSTATUS Status;
144c2c66affSColin Finck     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
145c2c66affSColin Finck 
146c2c66affSColin Finck     for (j = 0; j < ci->num_stripes; j++) {
147c2c66affSColin Finck         if (context->stripes[j].status == ReadDataStatus_Error) {
148c2c66affSColin Finck             WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
149c2c66affSColin Finck             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
150c2c66affSColin Finck             return context->stripes[j].iosb.Status;
151c2c66affSColin Finck         } else if (context->stripes[j].status == ReadDataStatus_Success) {
152c2c66affSColin Finck             stripe = j;
153c2c66affSColin Finck             break;
154c2c66affSColin Finck         }
155c2c66affSColin Finck     }
156c2c66affSColin Finck 
157c2c66affSColin Finck     if (context->stripes[stripe].status != ReadDataStatus_Success)
158c2c66affSColin Finck         return STATUS_INTERNAL_ERROR;
159c2c66affSColin Finck 
160c2c66affSColin Finck     if (context->tree) {
161c2c66affSColin Finck         tree_header* th = (tree_header*)buf;
162c2c66affSColin Finck         UINT32 crc32;
163c2c66affSColin Finck 
164c2c66affSColin Finck         crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
165c2c66affSColin Finck 
166c2c66affSColin Finck         if (th->address != context->address || crc32 != *((UINT32*)th->csum)) {
167c2c66affSColin Finck             checksum_error = TRUE;
168c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
169c2c66affSColin Finck         } else if (generation != 0 && th->generation != generation) {
170c2c66affSColin Finck             checksum_error = TRUE;
171c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
172c2c66affSColin Finck         }
173c2c66affSColin Finck     } else if (context->csum) {
174c2c66affSColin Finck #ifdef DEBUG_STATS
175c2c66affSColin Finck         LARGE_INTEGER time1, time2;
176c2c66affSColin Finck 
177c2c66affSColin Finck         time1 = KeQueryPerformanceCounter(NULL);
178c2c66affSColin Finck #endif
179c2c66affSColin Finck         Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
180c2c66affSColin Finck 
181c2c66affSColin Finck         if (Status == STATUS_CRC_ERROR) {
182c2c66affSColin Finck             checksum_error = TRUE;
183c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
184c2c66affSColin Finck         } else if (!NT_SUCCESS(Status)) {
185c2c66affSColin Finck             ERR("check_csum returned %08x\n", Status);
186c2c66affSColin Finck             return Status;
187c2c66affSColin Finck         }
188c2c66affSColin Finck #ifdef DEBUG_STATS
189c2c66affSColin Finck         time2 = KeQueryPerformanceCounter(NULL);
190c2c66affSColin Finck 
191c2c66affSColin Finck         Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
192c2c66affSColin Finck #endif
193c2c66affSColin Finck     }
194c2c66affSColin Finck 
195c2c66affSColin Finck     if (!checksum_error)
196c2c66affSColin Finck         return STATUS_SUCCESS;
197c2c66affSColin Finck 
198c2c66affSColin Finck     if (ci->num_stripes == 1)
199c2c66affSColin Finck         return STATUS_CRC_ERROR;
200c2c66affSColin Finck 
201c2c66affSColin Finck     if (context->tree) {
202c2c66affSColin Finck         tree_header* t2;
203c2c66affSColin Finck         BOOL recovered = FALSE;
204c2c66affSColin Finck 
205c2c66affSColin Finck         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
206c2c66affSColin Finck         if (!t2) {
207c2c66affSColin Finck             ERR("out of memory\n");
208c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
209c2c66affSColin Finck         }
210c2c66affSColin Finck 
211c2c66affSColin Finck         for (j = 0; j < ci->num_stripes; j++) {
212c2c66affSColin Finck             if (j != stripe && devices[j] && devices[j]->devobj) {
213c2c66affSColin Finck                 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart, Vcb->superblock.node_size, (UINT8*)t2, FALSE);
214c2c66affSColin Finck                 if (!NT_SUCCESS(Status)) {
215c2c66affSColin Finck                     WARN("sync_read_phys returned %08x\n", Status);
216c2c66affSColin Finck                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
217c2c66affSColin Finck                 } else {
218c2c66affSColin Finck                     UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
219c2c66affSColin Finck 
220c2c66affSColin Finck                     if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) {
221c2c66affSColin Finck                         RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
222c2c66affSColin Finck                         ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
223c2c66affSColin Finck                         recovered = TRUE;
224c2c66affSColin Finck 
225c2c66affSColin Finck                         if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
226c2c66affSColin Finck                             Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart,
227c2c66affSColin Finck                                                      t2, Vcb->superblock.node_size);
228c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
229c2c66affSColin Finck                                 WARN("write_data_phys returned %08x\n", Status);
230c2c66affSColin Finck                                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
231c2c66affSColin Finck                             }
232c2c66affSColin Finck                         }
233c2c66affSColin Finck 
234c2c66affSColin Finck                         break;
235c2c66affSColin Finck                     } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum))
236c2c66affSColin Finck                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
237c2c66affSColin Finck                     else
238c2c66affSColin Finck                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS);
239c2c66affSColin Finck                 }
240c2c66affSColin Finck             }
241c2c66affSColin Finck         }
242c2c66affSColin Finck 
243c2c66affSColin Finck         if (!recovered) {
244c2c66affSColin Finck             ERR("unrecoverable checksum error at %llx\n", addr);
245c2c66affSColin Finck             ExFreePool(t2);
246c2c66affSColin Finck             return STATUS_CRC_ERROR;
247c2c66affSColin Finck         }
248c2c66affSColin Finck 
249c2c66affSColin Finck         ExFreePool(t2);
250c2c66affSColin Finck     } else {
251c2c66affSColin Finck         ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size;
252c2c66affSColin Finck         UINT8* sector;
253c2c66affSColin Finck 
254c2c66affSColin Finck         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
255c2c66affSColin Finck         if (!sector) {
256c2c66affSColin Finck             ERR("out of memory\n");
257c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
258c2c66affSColin Finck         }
259c2c66affSColin Finck 
260c2c66affSColin Finck         for (i = 0; i < sectors; i++) {
261c2c66affSColin Finck             UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
262c2c66affSColin Finck 
263c2c66affSColin Finck             if (context->csum[i] != crc32) {
264c2c66affSColin Finck                 BOOL recovered = FALSE;
265c2c66affSColin Finck 
266c2c66affSColin Finck                 for (j = 0; j < ci->num_stripes; j++) {
267c2c66affSColin Finck                     if (j != stripe && devices[j] && devices[j]->devobj) {
268c2c66affSColin Finck                         Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
269c2c66affSColin Finck                                                 Vcb->superblock.sector_size, sector, FALSE);
270c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
271c2c66affSColin Finck                             WARN("sync_read_phys returned %08x\n", Status);
272c2c66affSColin Finck                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
273c2c66affSColin Finck                         } else {
274c2c66affSColin Finck                             UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
275c2c66affSColin Finck 
276c2c66affSColin Finck                             if (crc32b == context->csum[i]) {
277c2c66affSColin Finck                                 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
278c2c66affSColin Finck                                 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
279c2c66affSColin Finck                                 recovered = TRUE;
280c2c66affSColin Finck 
281c2c66affSColin Finck                                 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
282c2c66affSColin Finck                                     Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
283c2c66affSColin Finck                                                              sector, Vcb->superblock.sector_size);
284c2c66affSColin Finck                                     if (!NT_SUCCESS(Status)) {
285c2c66affSColin Finck                                         WARN("write_data_phys returned %08x\n", Status);
286c2c66affSColin Finck                                         log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
287c2c66affSColin Finck                                     }
288c2c66affSColin Finck                                 }
289c2c66affSColin Finck 
290c2c66affSColin Finck                                 break;
291c2c66affSColin Finck                             } else
292c2c66affSColin Finck                                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
293c2c66affSColin Finck                         }
294c2c66affSColin Finck                     }
295c2c66affSColin Finck                 }
296c2c66affSColin Finck 
297c2c66affSColin Finck                 if (!recovered) {
298c2c66affSColin Finck                     ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
299c2c66affSColin Finck                     ExFreePool(sector);
300c2c66affSColin Finck                     return STATUS_CRC_ERROR;
301c2c66affSColin Finck                 }
302c2c66affSColin Finck             }
303c2c66affSColin Finck         }
304c2c66affSColin Finck 
305c2c66affSColin Finck         ExFreePool(sector);
306c2c66affSColin Finck     }
307c2c66affSColin Finck 
308c2c66affSColin Finck     return STATUS_SUCCESS;
309c2c66affSColin Finck }
310c2c66affSColin Finck 
311c2c66affSColin Finck static NTSTATUS read_data_raid0(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context,
312c2c66affSColin Finck                                 CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) {
313c2c66affSColin Finck     UINT64 i;
314c2c66affSColin Finck 
315c2c66affSColin Finck     for (i = 0; i < ci->num_stripes; i++) {
316c2c66affSColin Finck         if (context->stripes[i].status == ReadDataStatus_Error) {
317c2c66affSColin Finck             WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status);
318c2c66affSColin Finck             log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS);
319c2c66affSColin Finck             return context->stripes[i].iosb.Status;
320c2c66affSColin Finck         }
321c2c66affSColin Finck     }
322c2c66affSColin Finck 
323c2c66affSColin Finck     if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
324c2c66affSColin Finck         tree_header* th = (tree_header*)buf;
325c2c66affSColin Finck         UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
326c2c66affSColin Finck 
327c2c66affSColin Finck         if (crc32 != *((UINT32*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) {
328c2c66affSColin Finck             UINT64 off;
329c2c66affSColin Finck             UINT16 stripe;
330c2c66affSColin Finck 
331c2c66affSColin Finck             get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe);
332c2c66affSColin Finck 
333c2c66affSColin Finck             ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
334c2c66affSColin Finck 
335c2c66affSColin Finck             if (crc32 != *((UINT32*)th->csum)) {
336c2c66affSColin Finck                 WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
337c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
338c2c66affSColin Finck                 return STATUS_CRC_ERROR;
339c2c66affSColin Finck             } else if (addr != th->address) {
340c2c66affSColin Finck                 WARN("address of tree was %llx, not %llx as expected\n", th->address, addr);
341c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
342c2c66affSColin Finck                 return STATUS_CRC_ERROR;
343c2c66affSColin Finck             } else if (generation != 0 && generation != th->generation) {
344c2c66affSColin Finck                 WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation);
345c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
346c2c66affSColin Finck                 return STATUS_CRC_ERROR;
347c2c66affSColin Finck             }
348c2c66affSColin Finck         }
349c2c66affSColin Finck     } else if (context->csum) {
350c2c66affSColin Finck         NTSTATUS Status;
351c2c66affSColin Finck #ifdef DEBUG_STATS
352c2c66affSColin Finck         LARGE_INTEGER time1, time2;
353c2c66affSColin Finck 
354c2c66affSColin Finck         time1 = KeQueryPerformanceCounter(NULL);
355c2c66affSColin Finck #endif
356c2c66affSColin Finck         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
357c2c66affSColin Finck 
358c2c66affSColin Finck         if (Status == STATUS_CRC_ERROR) {
359c2c66affSColin Finck             for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
360c2c66affSColin Finck                 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
361c2c66affSColin Finck 
362c2c66affSColin Finck                 if (context->csum[i] != crc32) {
363c2c66affSColin Finck                     UINT64 off;
364c2c66affSColin Finck                     UINT16 stripe;
365c2c66affSColin Finck 
366c2c66affSColin Finck                     get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe);
367c2c66affSColin Finck 
368c2c66affSColin Finck                     ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
369c2c66affSColin Finck 
370c2c66affSColin Finck                     log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
371c2c66affSColin Finck 
372c2c66affSColin Finck                     return Status;
373c2c66affSColin Finck                 }
374c2c66affSColin Finck             }
375c2c66affSColin Finck 
376c2c66affSColin Finck             return Status;
377c2c66affSColin Finck         } else if (!NT_SUCCESS(Status)) {
378c2c66affSColin Finck             ERR("check_csum returned %08x\n", Status);
379c2c66affSColin Finck             return Status;
380c2c66affSColin Finck         }
381c2c66affSColin Finck #ifdef DEBUG_STATS
382c2c66affSColin Finck         time2 = KeQueryPerformanceCounter(NULL);
383c2c66affSColin Finck 
384c2c66affSColin Finck         Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
385c2c66affSColin Finck #endif
386c2c66affSColin Finck     }
387c2c66affSColin Finck 
388c2c66affSColin Finck     return STATUS_SUCCESS;
389c2c66affSColin Finck }
390c2c66affSColin Finck 
391c2c66affSColin Finck static NTSTATUS read_data_raid10(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context,
392c2c66affSColin Finck                                  CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) {
393c2c66affSColin Finck     UINT64 i;
394c2c66affSColin Finck     UINT16 j, stripe;
395c2c66affSColin Finck     NTSTATUS Status;
396c2c66affSColin Finck     BOOL checksum_error = FALSE;
397c2c66affSColin Finck     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
398c2c66affSColin Finck 
399c2c66affSColin Finck     for (j = 0; j < ci->num_stripes; j++) {
400c2c66affSColin Finck         if (context->stripes[j].status == ReadDataStatus_Error) {
401c2c66affSColin Finck             WARN("stripe %llu returned error %08x\n", j, context->stripes[j].iosb.Status);
402c2c66affSColin Finck             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
403c2c66affSColin Finck             return context->stripes[j].iosb.Status;
404c2c66affSColin Finck         } else if (context->stripes[j].status == ReadDataStatus_Success)
405c2c66affSColin Finck             stripe = j;
406c2c66affSColin Finck     }
407c2c66affSColin Finck 
408c2c66affSColin Finck     if (context->tree) {
409c2c66affSColin Finck         tree_header* th = (tree_header*)buf;
410c2c66affSColin Finck         UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
411c2c66affSColin Finck 
412c2c66affSColin Finck         if (crc32 != *((UINT32*)th->csum)) {
413c2c66affSColin Finck             WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
414c2c66affSColin Finck             checksum_error = TRUE;
415c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
416c2c66affSColin Finck         } else if (addr != th->address) {
417c2c66affSColin Finck             WARN("address of tree was %llx, not %llx as expected\n", th->address, addr);
418c2c66affSColin Finck             checksum_error = TRUE;
419c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
420c2c66affSColin Finck         } else if (generation != 0 && generation != th->generation) {
421c2c66affSColin Finck             WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation);
422c2c66affSColin Finck             checksum_error = TRUE;
423c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
424c2c66affSColin Finck         }
425c2c66affSColin Finck     } else if (context->csum) {
426c2c66affSColin Finck #ifdef DEBUG_STATS
427c2c66affSColin Finck         LARGE_INTEGER time1, time2;
428c2c66affSColin Finck 
429c2c66affSColin Finck         time1 = KeQueryPerformanceCounter(NULL);
430c2c66affSColin Finck #endif
431c2c66affSColin Finck         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
432c2c66affSColin Finck 
433c2c66affSColin Finck         if (Status == STATUS_CRC_ERROR)
434c2c66affSColin Finck             checksum_error = TRUE;
435c2c66affSColin Finck         else if (!NT_SUCCESS(Status)) {
436c2c66affSColin Finck             ERR("check_csum returned %08x\n", Status);
437c2c66affSColin Finck             return Status;
438c2c66affSColin Finck         }
439c2c66affSColin Finck #ifdef DEBUG_STATS
440c2c66affSColin Finck         time2 = KeQueryPerformanceCounter(NULL);
441c2c66affSColin Finck 
442c2c66affSColin Finck         Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
443c2c66affSColin Finck #endif
444c2c66affSColin Finck     }
445c2c66affSColin Finck 
446c2c66affSColin Finck     if (!checksum_error)
447c2c66affSColin Finck         return STATUS_SUCCESS;
448c2c66affSColin Finck 
449c2c66affSColin Finck     if (context->tree) {
450c2c66affSColin Finck         tree_header* t2;
451c2c66affSColin Finck         UINT64 off;
452c2c66affSColin Finck         UINT16 badsubstripe = 0;
453c2c66affSColin Finck         BOOL recovered = FALSE;
454c2c66affSColin Finck 
455c2c66affSColin Finck         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
456c2c66affSColin Finck         if (!t2) {
457c2c66affSColin Finck             ERR("out of memory\n");
458c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
459c2c66affSColin Finck         }
460c2c66affSColin Finck 
461c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe);
462c2c66affSColin Finck 
463c2c66affSColin Finck         stripe *= ci->sub_stripes;
464c2c66affSColin Finck 
465c2c66affSColin Finck         for (j = 0; j < ci->sub_stripes; j++) {
466c2c66affSColin Finck             if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
467c2c66affSColin Finck                 badsubstripe = j;
468c2c66affSColin Finck                 break;
469c2c66affSColin Finck             }
470c2c66affSColin Finck         }
471c2c66affSColin Finck 
472c2c66affSColin Finck         for (j = 0; j < ci->sub_stripes; j++) {
473c2c66affSColin Finck             if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
474c2c66affSColin Finck                 Status = sync_read_phys(devices[stripe + j]->devobj, cis[stripe + j].offset + off,
475c2c66affSColin Finck                                         Vcb->superblock.node_size, (UINT8*)t2, FALSE);
476c2c66affSColin Finck                 if (!NT_SUCCESS(Status)) {
477c2c66affSColin Finck                     WARN("sync_read_phys returned %08x\n", Status);
478c2c66affSColin Finck                     log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS);
479c2c66affSColin Finck                 } else {
480c2c66affSColin Finck                     UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
481c2c66affSColin Finck 
482c2c66affSColin Finck                     if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) {
483c2c66affSColin Finck                         RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
484c2c66affSColin Finck                         ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe + j]->devitem.dev_id);
485c2c66affSColin Finck                         recovered = TRUE;
486c2c66affSColin Finck 
487c2c66affSColin Finck                         if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
488c2c66affSColin Finck                             Status = write_data_phys(devices[stripe + badsubstripe]->devobj, cis[stripe + badsubstripe].offset + off,
489c2c66affSColin Finck                                                      t2, Vcb->superblock.node_size);
490c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
491c2c66affSColin Finck                                 WARN("write_data_phys returned %08x\n", Status);
492c2c66affSColin Finck                                 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
493c2c66affSColin Finck                             }
494c2c66affSColin Finck                         }
495c2c66affSColin Finck 
496c2c66affSColin Finck                         break;
497c2c66affSColin Finck                     } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum))
498c2c66affSColin Finck                         log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
499c2c66affSColin Finck                     else
500c2c66affSColin Finck                         log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS);
501c2c66affSColin Finck                 }
502c2c66affSColin Finck             }
503c2c66affSColin Finck         }
504c2c66affSColin Finck 
505c2c66affSColin Finck         if (!recovered) {
506c2c66affSColin Finck             ERR("unrecoverable checksum error at %llx\n", addr);
507c2c66affSColin Finck             ExFreePool(t2);
508c2c66affSColin Finck             return STATUS_CRC_ERROR;
509c2c66affSColin Finck         }
510c2c66affSColin Finck 
511c2c66affSColin Finck         ExFreePool(t2);
512c2c66affSColin Finck     } else {
513c2c66affSColin Finck         ULONG sectors = length / Vcb->superblock.sector_size;
514c2c66affSColin Finck         UINT8* sector;
515c2c66affSColin Finck 
516c2c66affSColin Finck         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
517c2c66affSColin Finck         if (!sector) {
518c2c66affSColin Finck             ERR("out of memory\n");
519c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
520c2c66affSColin Finck         }
521c2c66affSColin Finck 
522c2c66affSColin Finck         for (i = 0; i < sectors; i++) {
523c2c66affSColin Finck             UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
524c2c66affSColin Finck 
525c2c66affSColin Finck             if (context->csum[i] != crc32) {
526c2c66affSColin Finck                 UINT64 off;
527c2c66affSColin Finck                 UINT16 stripe2, badsubstripe = 0;
528c2c66affSColin Finck                 BOOL recovered = FALSE;
529c2c66affSColin Finck 
530c2c66affSColin Finck                 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
531c2c66affSColin Finck                                  ci->num_stripes / ci->sub_stripes, &off, &stripe2);
532c2c66affSColin Finck 
533c2c66affSColin Finck                 stripe2 *= ci->sub_stripes;
534c2c66affSColin Finck 
535c2c66affSColin Finck                 for (j = 0; j < ci->sub_stripes; j++) {
536c2c66affSColin Finck                     if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
537c2c66affSColin Finck                         badsubstripe = j;
538c2c66affSColin Finck                         break;
539c2c66affSColin Finck                     }
540c2c66affSColin Finck                 }
541c2c66affSColin Finck 
542c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
543c2c66affSColin Finck 
544c2c66affSColin Finck                 for (j = 0; j < ci->sub_stripes; j++) {
545c2c66affSColin Finck                     if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
546c2c66affSColin Finck                         Status = sync_read_phys(devices[stripe2 + j]->devobj, cis[stripe2 + j].offset + off,
547c2c66affSColin Finck                                                 Vcb->superblock.sector_size, sector, FALSE);
548c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
549c2c66affSColin Finck                             WARN("sync_read_phys returned %08x\n", Status);
550c2c66affSColin Finck                             log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS);
551c2c66affSColin Finck                         } else {
552c2c66affSColin Finck                             UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
553c2c66affSColin Finck 
554c2c66affSColin Finck                             if (crc32b == context->csum[i]) {
555c2c66affSColin Finck                                 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
556c2c66affSColin Finck                                 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id);
557c2c66affSColin Finck                                 recovered = TRUE;
558c2c66affSColin Finck 
559c2c66affSColin Finck                                 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
560c2c66affSColin Finck                                     Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, cis[stripe2 + badsubstripe].offset + off,
561c2c66affSColin Finck                                                              sector, Vcb->superblock.sector_size);
562c2c66affSColin Finck                                     if (!NT_SUCCESS(Status)) {
563c2c66affSColin Finck                                         WARN("write_data_phys returned %08x\n", Status);
564c2c66affSColin Finck                                         log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
565c2c66affSColin Finck                                     }
566c2c66affSColin Finck                                 }
567c2c66affSColin Finck 
568c2c66affSColin Finck                                 break;
569c2c66affSColin Finck                             } else
570c2c66affSColin Finck                                 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
571c2c66affSColin Finck                         }
572c2c66affSColin Finck                     }
573c2c66affSColin Finck                 }
574c2c66affSColin Finck 
575c2c66affSColin Finck                 if (!recovered) {
576c2c66affSColin Finck                     ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
577c2c66affSColin Finck                     ExFreePool(sector);
578c2c66affSColin Finck                     return STATUS_CRC_ERROR;
579c2c66affSColin Finck                 }
580c2c66affSColin Finck             }
581c2c66affSColin Finck         }
582c2c66affSColin Finck 
583c2c66affSColin Finck         ExFreePool(sector);
584c2c66affSColin Finck     }
585c2c66affSColin Finck 
586c2c66affSColin Finck     return STATUS_SUCCESS;
587c2c66affSColin Finck }
588c2c66affSColin Finck 
589c2c66affSColin Finck static NTSTATUS read_data_raid5(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci,
590c2c66affSColin Finck                                 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) {
591c2c66affSColin Finck     ULONG i;
592c2c66affSColin Finck     NTSTATUS Status;
593c2c66affSColin Finck     BOOL checksum_error = FALSE;
594c2c66affSColin Finck     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
595c2c66affSColin Finck     UINT16 j, stripe;
596c2c66affSColin Finck     BOOL no_success = TRUE;
597c2c66affSColin Finck 
598c2c66affSColin Finck     for (j = 0; j < ci->num_stripes; j++) {
599c2c66affSColin Finck         if (context->stripes[j].status == ReadDataStatus_Error) {
600c2c66affSColin Finck             WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
601c2c66affSColin Finck             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
602c2c66affSColin Finck             return context->stripes[j].iosb.Status;
603c2c66affSColin Finck         } else if (context->stripes[j].status == ReadDataStatus_Success) {
604c2c66affSColin Finck             stripe = j;
605c2c66affSColin Finck             no_success = FALSE;
606c2c66affSColin Finck         }
607c2c66affSColin Finck     }
608c2c66affSColin Finck 
609c2c66affSColin Finck     if (c) {    // check partial stripes
610c2c66affSColin Finck         LIST_ENTRY* le;
611c2c66affSColin Finck         UINT64 ps_length = (ci->num_stripes - 1) * ci->stripe_length;
612c2c66affSColin Finck 
613c2c66affSColin Finck         ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE);
614c2c66affSColin Finck 
615c2c66affSColin Finck         le = c->partial_stripes.Flink;
616c2c66affSColin Finck         while (le != &c->partial_stripes) {
617c2c66affSColin Finck             partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
618c2c66affSColin Finck 
619c2c66affSColin Finck             if (ps->address + ps_length > addr && ps->address < addr + length) {
620c2c66affSColin Finck                 ULONG runlength, index;
621c2c66affSColin Finck 
622c2c66affSColin Finck                 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
623c2c66affSColin Finck 
624c2c66affSColin Finck                 while (runlength != 0) {
625c2c66affSColin Finck                     UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size);
626c2c66affSColin Finck                     UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size);
627c2c66affSColin Finck                     UINT64 start = max(runstart, addr);
628c2c66affSColin Finck                     UINT64 end = min(runend, addr + length);
629c2c66affSColin Finck 
630c2c66affSColin Finck                     if (end > start)
631c2c66affSColin Finck                         RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
632c2c66affSColin Finck 
633c2c66affSColin Finck                     runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
634c2c66affSColin Finck                 }
635c2c66affSColin Finck             } else if (ps->address >= addr + length)
636c2c66affSColin Finck                 break;
637c2c66affSColin Finck 
638c2c66affSColin Finck             le = le->Flink;
639c2c66affSColin Finck         }
640c2c66affSColin Finck 
641c2c66affSColin Finck         ExReleaseResourceLite(&c->partial_stripes_lock);
642c2c66affSColin Finck     }
643c2c66affSColin Finck 
644c2c66affSColin Finck     if (context->tree) {
645c2c66affSColin Finck         tree_header* th = (tree_header*)buf;
646c2c66affSColin Finck         UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
647c2c66affSColin Finck 
648c2c66affSColin Finck         if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
649c2c66affSColin Finck             checksum_error = TRUE;
650c2c66affSColin Finck             if (!no_success && !degraded)
651c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
652c2c66affSColin Finck         } else if (generation != 0 && generation != th->generation) {
653c2c66affSColin Finck             checksum_error = TRUE;
654c2c66affSColin Finck             if (!no_success && !degraded)
655c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
656c2c66affSColin Finck         }
657c2c66affSColin Finck     } else if (context->csum) {
658c2c66affSColin Finck #ifdef DEBUG_STATS
659c2c66affSColin Finck         LARGE_INTEGER time1, time2;
660c2c66affSColin Finck 
661c2c66affSColin Finck         time1 = KeQueryPerformanceCounter(NULL);
662c2c66affSColin Finck #endif
663c2c66affSColin Finck         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
664c2c66affSColin Finck 
665c2c66affSColin Finck         if (Status == STATUS_CRC_ERROR) {
666c2c66affSColin Finck             if (!degraded)
667c2c66affSColin Finck                 WARN("checksum error\n");
668c2c66affSColin Finck             checksum_error = TRUE;
669c2c66affSColin Finck         } else if (!NT_SUCCESS(Status)) {
670c2c66affSColin Finck             ERR("check_csum returned %08x\n", Status);
671c2c66affSColin Finck             return Status;
672c2c66affSColin Finck         }
673c2c66affSColin Finck 
674c2c66affSColin Finck #ifdef DEBUG_STATS
675c2c66affSColin Finck         time2 = KeQueryPerformanceCounter(NULL);
676c2c66affSColin Finck 
677c2c66affSColin Finck         Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
678c2c66affSColin Finck #endif
679c2c66affSColin Finck     } else if (degraded)
680c2c66affSColin Finck         checksum_error = TRUE;
681c2c66affSColin Finck 
682c2c66affSColin Finck     if (!checksum_error)
683c2c66affSColin Finck         return STATUS_SUCCESS;
684c2c66affSColin Finck 
685c2c66affSColin Finck     if (context->tree) {
686c2c66affSColin Finck         UINT16 parity;
687c2c66affSColin Finck         UINT64 off;
688c2c66affSColin Finck         BOOL recovered = FALSE, first = TRUE, failed = FALSE;
689c2c66affSColin Finck         UINT8* t2;
690c2c66affSColin Finck 
691c2c66affSColin Finck         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
692c2c66affSColin Finck         if (!t2) {
693c2c66affSColin Finck             ERR("out of memory\n");
694c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
695c2c66affSColin Finck         }
696c2c66affSColin Finck 
697c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe);
698c2c66affSColin Finck 
699c2c66affSColin Finck         parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
700c2c66affSColin Finck 
701c2c66affSColin Finck         stripe = (parity + stripe + 1) % ci->num_stripes;
702c2c66affSColin Finck 
703c2c66affSColin Finck         for (j = 0; j < ci->num_stripes; j++) {
704c2c66affSColin Finck             if (j != stripe) {
705c2c66affSColin Finck                 if (devices[j] && devices[j]->devobj) {
706c2c66affSColin Finck                     if (first) {
707c2c66affSColin Finck                         Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2, FALSE);
708c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
709c2c66affSColin Finck                             ERR("sync_read_phys returned %08x\n", Status);
710c2c66affSColin Finck                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
711c2c66affSColin Finck                             failed = TRUE;
712c2c66affSColin Finck                             break;
713c2c66affSColin Finck                         }
714c2c66affSColin Finck 
715c2c66affSColin Finck                         first = FALSE;
716c2c66affSColin Finck                     } else {
717c2c66affSColin Finck                         Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, FALSE);
718c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
719c2c66affSColin Finck                             ERR("sync_read_phys returned %08x\n", Status);
720c2c66affSColin Finck                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
721c2c66affSColin Finck                             failed = TRUE;
722c2c66affSColin Finck                             break;
723c2c66affSColin Finck                         }
724c2c66affSColin Finck 
725c2c66affSColin Finck                         do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
726c2c66affSColin Finck                     }
727c2c66affSColin Finck                 } else {
728c2c66affSColin Finck                     failed = TRUE;
729c2c66affSColin Finck                     break;
730c2c66affSColin Finck                 }
731c2c66affSColin Finck             }
732c2c66affSColin Finck         }
733c2c66affSColin Finck 
734c2c66affSColin Finck         if (!failed) {
735c2c66affSColin Finck             tree_header* t3 = (tree_header*)t2;
736c2c66affSColin Finck             UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum));
737c2c66affSColin Finck 
738c2c66affSColin Finck             if (t3->address == addr && crc32 == *((UINT32*)t3->csum) && (generation == 0 || t3->generation == generation)) {
739c2c66affSColin Finck                 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
740c2c66affSColin Finck 
741c2c66affSColin Finck                 if (!degraded)
742c2c66affSColin Finck                     ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
743c2c66affSColin Finck 
744c2c66affSColin Finck                 recovered = TRUE;
745c2c66affSColin Finck 
746c2c66affSColin Finck                 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
747c2c66affSColin Finck                     Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
748c2c66affSColin Finck                     if (!NT_SUCCESS(Status)) {
749c2c66affSColin Finck                         WARN("write_data_phys returned %08x\n", Status);
750c2c66affSColin Finck                         log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
751c2c66affSColin Finck                     }
752c2c66affSColin Finck                 }
753c2c66affSColin Finck             }
754c2c66affSColin Finck         }
755c2c66affSColin Finck 
756c2c66affSColin Finck         if (!recovered) {
757c2c66affSColin Finck             ERR("unrecoverable checksum error at %llx\n", addr);
758c2c66affSColin Finck             ExFreePool(t2);
759c2c66affSColin Finck             return STATUS_CRC_ERROR;
760c2c66affSColin Finck         }
761c2c66affSColin Finck 
762c2c66affSColin Finck         ExFreePool(t2);
763c2c66affSColin Finck     } else {
764c2c66affSColin Finck         ULONG sectors = length / Vcb->superblock.sector_size;
765c2c66affSColin Finck         UINT8* sector;
766c2c66affSColin Finck 
767c2c66affSColin Finck         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
768c2c66affSColin Finck         if (!sector) {
769c2c66affSColin Finck             ERR("out of memory\n");
770c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
771c2c66affSColin Finck         }
772c2c66affSColin Finck 
773c2c66affSColin Finck         for (i = 0; i < sectors; i++) {
774c2c66affSColin Finck             UINT16 parity;
775c2c66affSColin Finck             UINT64 off;
776c2c66affSColin Finck             UINT32 crc32;
777c2c66affSColin Finck 
778c2c66affSColin Finck             if (context->csum)
779c2c66affSColin Finck                 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
780c2c66affSColin Finck 
781c2c66affSColin Finck             get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
782c2c66affSColin Finck                              ci->num_stripes - 1, &off, &stripe);
783c2c66affSColin Finck 
784c2c66affSColin Finck             parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
785c2c66affSColin Finck 
786c2c66affSColin Finck             stripe = (parity + stripe + 1) % ci->num_stripes;
787c2c66affSColin Finck 
788c2c66affSColin Finck             if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) {
789c2c66affSColin Finck                 BOOL recovered = FALSE, first = TRUE, failed = FALSE;
790c2c66affSColin Finck 
791c2c66affSColin Finck                 if (devices[stripe] && devices[stripe]->devobj)
792c2c66affSColin Finck                     log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS);
793c2c66affSColin Finck 
794c2c66affSColin Finck                 for (j = 0; j < ci->num_stripes; j++) {
795c2c66affSColin Finck                     if (j != stripe) {
796c2c66affSColin Finck                         if (devices[j] && devices[j]->devobj) {
797c2c66affSColin Finck                             if (first) {
798c2c66affSColin Finck                                 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, FALSE);
799c2c66affSColin Finck                                 if (!NT_SUCCESS(Status)) {
800c2c66affSColin Finck                                     ERR("sync_read_phys returned %08x\n", Status);
801c2c66affSColin Finck                                     failed = TRUE;
802c2c66affSColin Finck                                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
803c2c66affSColin Finck                                     break;
804c2c66affSColin Finck                                 }
805c2c66affSColin Finck 
806c2c66affSColin Finck                                 first = FALSE;
807c2c66affSColin Finck                             } else {
808c2c66affSColin Finck                                 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + Vcb->superblock.sector_size, FALSE);
809c2c66affSColin Finck                                 if (!NT_SUCCESS(Status)) {
810c2c66affSColin Finck                                     ERR("sync_read_phys returned %08x\n", Status);
811c2c66affSColin Finck                                     failed = TRUE;
812c2c66affSColin Finck                                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
813c2c66affSColin Finck                                     break;
814c2c66affSColin Finck                                 }
815c2c66affSColin Finck 
816c2c66affSColin Finck                                 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
817c2c66affSColin Finck                             }
818c2c66affSColin Finck                         } else {
819c2c66affSColin Finck                             failed = TRUE;
820c2c66affSColin Finck                             break;
821c2c66affSColin Finck                         }
822c2c66affSColin Finck                     }
823c2c66affSColin Finck                 }
824c2c66affSColin Finck 
825c2c66affSColin Finck                 if (!failed) {
826c2c66affSColin Finck                     if (context->csum)
827c2c66affSColin Finck                         crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
828c2c66affSColin Finck 
829c2c66affSColin Finck                     if (!context->csum || crc32 == context->csum[i]) {
830c2c66affSColin Finck                         RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
831c2c66affSColin Finck 
832c2c66affSColin Finck                         if (!degraded)
833c2c66affSColin Finck                             ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
834c2c66affSColin Finck 
835c2c66affSColin Finck                         recovered = TRUE;
836c2c66affSColin Finck 
837c2c66affSColin Finck                         if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
838c2c66affSColin Finck                             Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off,
839c2c66affSColin Finck                                                      sector, Vcb->superblock.sector_size);
840c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
841c2c66affSColin Finck                                 WARN("write_data_phys returned %08x\n", Status);
842c2c66affSColin Finck                                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
843c2c66affSColin Finck                             }
844c2c66affSColin Finck                         }
845c2c66affSColin Finck                     }
846c2c66affSColin Finck                 }
847c2c66affSColin Finck 
848c2c66affSColin Finck                 if (!recovered) {
849c2c66affSColin Finck                     ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
850c2c66affSColin Finck                     ExFreePool(sector);
851c2c66affSColin Finck                     return STATUS_CRC_ERROR;
852c2c66affSColin Finck                 }
853c2c66affSColin Finck             }
854c2c66affSColin Finck         }
855c2c66affSColin Finck 
856c2c66affSColin Finck         ExFreePool(sector);
857c2c66affSColin Finck     }
858c2c66affSColin Finck 
859c2c66affSColin Finck     return STATUS_SUCCESS;
860c2c66affSColin Finck }
861c2c66affSColin Finck 
862c2c66affSColin Finck void raid6_recover2(UINT8* sectors, UINT16 num_stripes, ULONG sector_size, UINT16 missing1, UINT16 missing2, UINT8* out) {
863c2c66affSColin Finck     if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
864c2c66affSColin Finck         UINT16 missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
865c2c66affSColin Finck         UINT16 stripe;
866c2c66affSColin Finck 
867c2c66affSColin Finck         stripe = num_stripes - 3;
868c2c66affSColin Finck 
869c2c66affSColin Finck         if (stripe == missing)
870c2c66affSColin Finck             RtlZeroMemory(out, sector_size);
871c2c66affSColin Finck         else
872c2c66affSColin Finck             RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size);
873c2c66affSColin Finck 
874c2c66affSColin Finck         do {
875c2c66affSColin Finck             stripe--;
876c2c66affSColin Finck 
877c2c66affSColin Finck             galois_double(out, sector_size);
878c2c66affSColin Finck 
879c2c66affSColin Finck             if (stripe != missing)
880c2c66affSColin Finck                 do_xor(out, sectors + (stripe * sector_size), sector_size);
881c2c66affSColin Finck         } while (stripe > 0);
882c2c66affSColin Finck 
883c2c66affSColin Finck         do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
884c2c66affSColin Finck 
885c2c66affSColin Finck         if (missing != 0)
886c2c66affSColin Finck             galois_divpower(out, (UINT8)missing, sector_size);
887c2c66affSColin Finck     } else { // reconstruct from p and q
888c2c66affSColin Finck         UINT16 x, y, stripe;
889c2c66affSColin Finck         UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
890c2c66affSColin Finck         UINT32 j;
891c2c66affSColin Finck 
892c2c66affSColin Finck         stripe = num_stripes - 3;
893c2c66affSColin Finck 
894c2c66affSColin Finck         pxy = out + sector_size;
895c2c66affSColin Finck         qxy = out;
896c2c66affSColin Finck 
897c2c66affSColin Finck         if (stripe == missing1 || stripe == missing2) {
898c2c66affSColin Finck             RtlZeroMemory(qxy, sector_size);
899c2c66affSColin Finck             RtlZeroMemory(pxy, sector_size);
900c2c66affSColin Finck 
901c2c66affSColin Finck             if (stripe == missing1)
902c2c66affSColin Finck                 x = stripe;
903c2c66affSColin Finck             else
904c2c66affSColin Finck                 y = stripe;
905c2c66affSColin Finck         } else {
906c2c66affSColin Finck             RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size);
907c2c66affSColin Finck             RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size);
908c2c66affSColin Finck         }
909c2c66affSColin Finck 
910c2c66affSColin Finck         do {
911c2c66affSColin Finck             stripe--;
912c2c66affSColin Finck 
913c2c66affSColin Finck             galois_double(qxy, sector_size);
914c2c66affSColin Finck 
915c2c66affSColin Finck             if (stripe != missing1 && stripe != missing2) {
916c2c66affSColin Finck                 do_xor(qxy, sectors + (stripe * sector_size), sector_size);
917c2c66affSColin Finck                 do_xor(pxy, sectors + (stripe * sector_size), sector_size);
918c2c66affSColin Finck             } else if (stripe == missing1)
919c2c66affSColin Finck                 x = stripe;
920c2c66affSColin Finck             else if (stripe == missing2)
921c2c66affSColin Finck                 y = stripe;
922c2c66affSColin Finck         } while (stripe > 0);
923c2c66affSColin Finck 
924c2c66affSColin Finck         gyx = gpow2(y > x ? (y-x) : (255-x+y));
925c2c66affSColin Finck         gx = gpow2(255-x);
926c2c66affSColin Finck 
927c2c66affSColin Finck         denom = gdiv(1, gyx ^ 1);
928c2c66affSColin Finck         a = gmul(gyx, denom);
929c2c66affSColin Finck         b = gmul(gx, denom);
930c2c66affSColin Finck 
931c2c66affSColin Finck         p = sectors + ((num_stripes - 2) * sector_size);
932c2c66affSColin Finck         q = sectors + ((num_stripes - 1) * sector_size);
933c2c66affSColin Finck 
934c2c66affSColin Finck         for (j = 0; j < sector_size; j++) {
935c2c66affSColin Finck             *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
936c2c66affSColin Finck 
937c2c66affSColin Finck             p++;
938c2c66affSColin Finck             q++;
939c2c66affSColin Finck             pxy++;
940c2c66affSColin Finck             qxy++;
941c2c66affSColin Finck         }
942c2c66affSColin Finck 
943c2c66affSColin Finck         do_xor(out + sector_size, out, sector_size);
944c2c66affSColin Finck         do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
945c2c66affSColin Finck     }
946c2c66affSColin Finck }
947c2c66affSColin Finck 
948c2c66affSColin Finck static NTSTATUS read_data_raid6(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci,
949c2c66affSColin Finck                                 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) {
950c2c66affSColin Finck     NTSTATUS Status;
951c2c66affSColin Finck     ULONG i;
952c2c66affSColin Finck     BOOL checksum_error = FALSE;
953c2c66affSColin Finck     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
954c2c66affSColin Finck     UINT16 stripe, j;
955c2c66affSColin Finck     BOOL no_success = TRUE;
956c2c66affSColin Finck 
957c2c66affSColin Finck     for (j = 0; j < ci->num_stripes; j++) {
958c2c66affSColin Finck         if (context->stripes[j].status == ReadDataStatus_Error) {
959c2c66affSColin Finck             WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
960c2c66affSColin Finck 
961c2c66affSColin Finck             if (devices[j])
962c2c66affSColin Finck                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
963c2c66affSColin Finck             return context->stripes[j].iosb.Status;
964c2c66affSColin Finck         } else if (context->stripes[j].status == ReadDataStatus_Success) {
965c2c66affSColin Finck             stripe = j;
966c2c66affSColin Finck             no_success = FALSE;
967c2c66affSColin Finck         }
968c2c66affSColin Finck     }
969c2c66affSColin Finck 
970c2c66affSColin Finck     if (c) {    // check partial stripes
971c2c66affSColin Finck         LIST_ENTRY* le;
972c2c66affSColin Finck         UINT64 ps_length = (ci->num_stripes - 2) * ci->stripe_length;
973c2c66affSColin Finck 
974c2c66affSColin Finck         ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE);
975c2c66affSColin Finck 
976c2c66affSColin Finck         le = c->partial_stripes.Flink;
977c2c66affSColin Finck         while (le != &c->partial_stripes) {
978c2c66affSColin Finck             partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
979c2c66affSColin Finck 
980c2c66affSColin Finck             if (ps->address + ps_length > addr && ps->address < addr + length) {
981c2c66affSColin Finck                 ULONG runlength, index;
982c2c66affSColin Finck 
983c2c66affSColin Finck                 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
984c2c66affSColin Finck 
985c2c66affSColin Finck                 while (runlength != 0) {
986c2c66affSColin Finck                     UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size);
987c2c66affSColin Finck                     UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size);
988c2c66affSColin Finck                     UINT64 start = max(runstart, addr);
989c2c66affSColin Finck                     UINT64 end = min(runend, addr + length);
990c2c66affSColin Finck 
991c2c66affSColin Finck                     if (end > start)
992c2c66affSColin Finck                         RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
993c2c66affSColin Finck 
994c2c66affSColin Finck                     runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
995c2c66affSColin Finck                 }
996c2c66affSColin Finck             } else if (ps->address >= addr + length)
997c2c66affSColin Finck                 break;
998c2c66affSColin Finck 
999c2c66affSColin Finck             le = le->Flink;
1000c2c66affSColin Finck         }
1001c2c66affSColin Finck 
1002c2c66affSColin Finck         ExReleaseResourceLite(&c->partial_stripes_lock);
1003c2c66affSColin Finck     }
1004c2c66affSColin Finck 
1005c2c66affSColin Finck     if (context->tree) {
1006c2c66affSColin Finck         tree_header* th = (tree_header*)buf;
1007c2c66affSColin Finck         UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1008c2c66affSColin Finck 
1009c2c66affSColin Finck         if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
1010c2c66affSColin Finck             checksum_error = TRUE;
1011c2c66affSColin Finck             if (!no_success && !degraded && devices[stripe])
1012c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1013c2c66affSColin Finck         } else if (generation != 0 && generation != th->generation) {
1014c2c66affSColin Finck             checksum_error = TRUE;
1015c2c66affSColin Finck             if (!no_success && !degraded && devices[stripe])
1016c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
1017c2c66affSColin Finck         }
1018c2c66affSColin Finck     } else if (context->csum) {
1019c2c66affSColin Finck #ifdef DEBUG_STATS
1020c2c66affSColin Finck         LARGE_INTEGER time1, time2;
1021c2c66affSColin Finck 
1022c2c66affSColin Finck         time1 = KeQueryPerformanceCounter(NULL);
1023c2c66affSColin Finck #endif
1024c2c66affSColin Finck         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
1025c2c66affSColin Finck 
1026c2c66affSColin Finck         if (Status == STATUS_CRC_ERROR) {
1027c2c66affSColin Finck             if (!degraded)
1028c2c66affSColin Finck                 WARN("checksum error\n");
1029c2c66affSColin Finck             checksum_error = TRUE;
1030c2c66affSColin Finck         } else if (!NT_SUCCESS(Status)) {
1031c2c66affSColin Finck             ERR("check_csum returned %08x\n", Status);
1032c2c66affSColin Finck             return Status;
1033c2c66affSColin Finck         }
1034c2c66affSColin Finck #ifdef DEBUG_STATS
1035c2c66affSColin Finck         time2 = KeQueryPerformanceCounter(NULL);
1036c2c66affSColin Finck 
1037c2c66affSColin Finck         Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
1038c2c66affSColin Finck #endif
1039c2c66affSColin Finck     } else if (degraded)
1040c2c66affSColin Finck         checksum_error = TRUE;
1041c2c66affSColin Finck 
1042c2c66affSColin Finck     if (!checksum_error)
1043c2c66affSColin Finck         return STATUS_SUCCESS;
1044c2c66affSColin Finck 
1045c2c66affSColin Finck     if (context->tree) {
1046c2c66affSColin Finck         UINT8* sector;
1047c2c66affSColin Finck         UINT16 k, physstripe, parity1, parity2, error_stripe;
1048c2c66affSColin Finck         UINT64 off;
1049c2c66affSColin Finck         BOOL recovered = FALSE, failed = FALSE;
1050c2c66affSColin Finck         ULONG num_errors = 0;
1051c2c66affSColin Finck 
1052c2c66affSColin Finck         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1053c2c66affSColin Finck         if (!sector) {
1054c2c66affSColin Finck             ERR("out of memory\n");
1055c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
1056c2c66affSColin Finck         }
1057c2c66affSColin Finck 
1058c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe);
1059c2c66affSColin Finck 
1060c2c66affSColin Finck         parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1061c2c66affSColin Finck         parity2 = (parity1 + 1) % ci->num_stripes;
1062c2c66affSColin Finck 
1063c2c66affSColin Finck         physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1064c2c66affSColin Finck 
1065c2c66affSColin Finck         j = (parity2 + 1) % ci->num_stripes;
1066c2c66affSColin Finck 
1067c2c66affSColin Finck         for (k = 0; k < ci->num_stripes - 1; k++) {
1068c2c66affSColin Finck             if (j != physstripe) {
1069c2c66affSColin Finck                 if (devices[j] && devices[j]->devobj) {
1070c2c66affSColin Finck                     Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, sector + (k * Vcb->superblock.node_size), FALSE);
1071c2c66affSColin Finck                     if (!NT_SUCCESS(Status)) {
1072c2c66affSColin Finck                         ERR("sync_read_phys returned %08x\n", Status);
1073c2c66affSColin Finck                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1074c2c66affSColin Finck                         num_errors++;
1075c2c66affSColin Finck                         error_stripe = k;
1076c2c66affSColin Finck 
1077c2c66affSColin Finck                         if (num_errors > 1) {
1078c2c66affSColin Finck                             failed = TRUE;
1079c2c66affSColin Finck                             break;
1080c2c66affSColin Finck                         }
1081c2c66affSColin Finck                     }
1082c2c66affSColin Finck                 } else {
1083c2c66affSColin Finck                     num_errors++;
1084c2c66affSColin Finck                     error_stripe = k;
1085c2c66affSColin Finck 
1086c2c66affSColin Finck                     if (num_errors > 1) {
1087c2c66affSColin Finck                         failed = TRUE;
1088c2c66affSColin Finck                         break;
1089c2c66affSColin Finck                     }
1090c2c66affSColin Finck                 }
1091c2c66affSColin Finck             }
1092c2c66affSColin Finck 
1093c2c66affSColin Finck             j = (j + 1) % ci->num_stripes;
1094c2c66affSColin Finck         }
1095c2c66affSColin Finck 
1096c2c66affSColin Finck         if (!failed) {
1097c2c66affSColin Finck             if (num_errors == 0) {
1098c2c66affSColin Finck                 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1099c2c66affSColin Finck                 UINT32 crc32;
1100c2c66affSColin Finck 
1101c2c66affSColin Finck                 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1102c2c66affSColin Finck                               Vcb->superblock.node_size);
1103c2c66affSColin Finck 
1104c2c66affSColin Finck                 for (j = 0; j < ci->num_stripes - 2; j++) {
1105c2c66affSColin Finck                     if (j != stripe)
1106c2c66affSColin Finck                         do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1107c2c66affSColin Finck                 }
1108c2c66affSColin Finck 
1109c2c66affSColin Finck                 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1110c2c66affSColin Finck 
1111c2c66affSColin Finck                 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) {
1112c2c66affSColin Finck                     RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1113c2c66affSColin Finck 
1114c2c66affSColin Finck                     if (devices[physstripe] && devices[physstripe]->devobj)
1115c2c66affSColin Finck                         ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id);
1116c2c66affSColin Finck 
1117c2c66affSColin Finck                     recovered = TRUE;
1118c2c66affSColin Finck 
1119c2c66affSColin Finck                     if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1120c2c66affSColin Finck                         Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1121c2c66affSColin Finck                                                  sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1122c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
1123c2c66affSColin Finck                             WARN("write_data_phys returned %08x\n", Status);
1124c2c66affSColin Finck                             log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1125c2c66affSColin Finck                         }
1126c2c66affSColin Finck                     }
1127c2c66affSColin Finck                 }
1128c2c66affSColin Finck             }
1129c2c66affSColin Finck 
1130c2c66affSColin Finck             if (!recovered) {
1131c2c66affSColin Finck                 UINT32 crc32;
1132c2c66affSColin Finck                 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1133c2c66affSColin Finck                 BOOL read_q = FALSE;
1134c2c66affSColin Finck 
1135c2c66affSColin Finck                 if (devices[parity2] && devices[parity2]->devobj) {
1136c2c66affSColin Finck                     Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off,
1137c2c66affSColin Finck                                             Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), FALSE);
1138c2c66affSColin Finck                     if (!NT_SUCCESS(Status)) {
1139c2c66affSColin Finck                         ERR("sync_read_phys returned %08x\n", Status);
1140c2c66affSColin Finck                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1141c2c66affSColin Finck                     } else
1142c2c66affSColin Finck                         read_q = TRUE;
1143c2c66affSColin Finck                 }
1144c2c66affSColin Finck 
1145c2c66affSColin Finck                 if (read_q) {
1146c2c66affSColin Finck                     if (num_errors == 1) {
1147c2c66affSColin Finck                         raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1148c2c66affSColin Finck 
1149c2c66affSColin Finck                         crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1150c2c66affSColin Finck 
1151c2c66affSColin Finck                         if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation))
1152c2c66affSColin Finck                             recovered = TRUE;
1153c2c66affSColin Finck                     } else {
1154c2c66affSColin Finck                         for (j = 0; j < ci->num_stripes - 1; j++) {
1155c2c66affSColin Finck                             if (j != stripe) {
1156c2c66affSColin Finck                                 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1157c2c66affSColin Finck 
1158c2c66affSColin Finck                                 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1159c2c66affSColin Finck 
1160c2c66affSColin Finck                                 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) {
1161c2c66affSColin Finck                                     recovered = TRUE;
1162c2c66affSColin Finck                                     error_stripe = j;
1163c2c66affSColin Finck                                     break;
1164c2c66affSColin Finck                                 }
1165c2c66affSColin Finck                             }
1166c2c66affSColin Finck                         }
1167c2c66affSColin Finck                     }
1168c2c66affSColin Finck                 }
1169c2c66affSColin Finck 
1170c2c66affSColin Finck                 if (recovered) {
1171c2c66affSColin Finck                     UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1172c2c66affSColin Finck 
1173c2c66affSColin Finck                     if (devices[physstripe] && devices[physstripe]->devobj)
1174c2c66affSColin Finck                         ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id);
1175c2c66affSColin Finck 
1176c2c66affSColin Finck                     RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1177c2c66affSColin Finck 
1178c2c66affSColin Finck                     if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1179c2c66affSColin Finck                         Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1180c2c66affSColin Finck                                                  sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1181c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
1182c2c66affSColin Finck                             WARN("write_data_phys returned %08x\n", Status);
1183c2c66affSColin Finck                             log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1184c2c66affSColin Finck                         }
1185c2c66affSColin Finck                     }
1186c2c66affSColin Finck 
1187c2c66affSColin Finck                     if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1188c2c66affSColin Finck                         if (error_stripe == ci->num_stripes - 2) {
1189c2c66affSColin Finck                             ERR("recovering from parity error at %llx, device %llx\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1190c2c66affSColin Finck 
1191c2c66affSColin Finck                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1192c2c66affSColin Finck 
1193c2c66affSColin Finck                             RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1194c2c66affSColin Finck 
1195c2c66affSColin Finck                             for (j = 0; j < ci->num_stripes - 2; j++) {
1196c2c66affSColin Finck                                 if (j == stripe) {
1197c2c66affSColin Finck                                     do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1198c2c66affSColin Finck                                            Vcb->superblock.node_size);
1199c2c66affSColin Finck                                 } else {
1200c2c66affSColin Finck                                     do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1201c2c66affSColin Finck                                             Vcb->superblock.node_size);
1202c2c66affSColin Finck                                 }
1203c2c66affSColin Finck                             }
1204c2c66affSColin Finck                         } else {
1205c2c66affSColin Finck                             ERR("recovering from checksum error at %llx, device %llx\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1206c2c66affSColin Finck                                 devices[error_stripe_phys]->devitem.dev_id);
1207c2c66affSColin Finck 
1208c2c66affSColin Finck                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1209c2c66affSColin Finck 
1210c2c66affSColin Finck                             RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1211c2c66affSColin Finck                                           sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1212c2c66affSColin Finck                         }
1213c2c66affSColin Finck                     }
1214c2c66affSColin Finck 
1215c2c66affSColin Finck                     if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1216c2c66affSColin Finck                         Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off,
1217c2c66affSColin Finck                                                  sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1218c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
1219c2c66affSColin Finck                             WARN("write_data_phys returned %08x\n", Status);
1220c2c66affSColin Finck                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1221c2c66affSColin Finck                         }
1222c2c66affSColin Finck                     }
1223c2c66affSColin Finck                 }
1224c2c66affSColin Finck             }
1225c2c66affSColin Finck         }
1226c2c66affSColin Finck 
1227c2c66affSColin Finck         if (!recovered) {
1228c2c66affSColin Finck             ERR("unrecoverable checksum error at %llx\n", addr);
1229c2c66affSColin Finck             ExFreePool(sector);
1230c2c66affSColin Finck             return STATUS_CRC_ERROR;
1231c2c66affSColin Finck         }
1232c2c66affSColin Finck 
1233c2c66affSColin Finck         ExFreePool(sector);
1234c2c66affSColin Finck     } else {
1235c2c66affSColin Finck         ULONG sectors = length / Vcb->superblock.sector_size;
1236c2c66affSColin Finck         UINT8* sector;
1237c2c66affSColin Finck 
1238c2c66affSColin Finck         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG);
1239c2c66affSColin Finck         if (!sector) {
1240c2c66affSColin Finck             ERR("out of memory\n");
1241c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
1242c2c66affSColin Finck         }
1243c2c66affSColin Finck 
1244c2c66affSColin Finck         for (i = 0; i < sectors; i++) {
1245c2c66affSColin Finck             UINT64 off;
1246c2c66affSColin Finck             UINT16 physstripe, parity1, parity2;
1247c2c66affSColin Finck             UINT32 crc32;
1248c2c66affSColin Finck 
1249c2c66affSColin Finck             if (context->csum)
1250c2c66affSColin Finck                 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1251c2c66affSColin Finck 
1252c2c66affSColin Finck             get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
1253c2c66affSColin Finck                              ci->num_stripes - 2, &off, &stripe);
1254c2c66affSColin Finck 
1255c2c66affSColin Finck             parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1256c2c66affSColin Finck             parity2 = (parity1 + 1) % ci->num_stripes;
1257c2c66affSColin Finck 
1258c2c66affSColin Finck             physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1259c2c66affSColin Finck 
1260c2c66affSColin Finck             if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) {
1261c2c66affSColin Finck                 UINT16 k, error_stripe;
1262c2c66affSColin Finck                 BOOL recovered = FALSE, failed = FALSE;
1263c2c66affSColin Finck                 ULONG num_errors = 0;
1264c2c66affSColin Finck 
1265c2c66affSColin Finck                 if (devices[physstripe] && devices[physstripe]->devobj)
1266c2c66affSColin Finck                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS);
1267c2c66affSColin Finck 
1268c2c66affSColin Finck                 j = (parity2 + 1) % ci->num_stripes;
1269c2c66affSColin Finck 
1270c2c66affSColin Finck                 for (k = 0; k < ci->num_stripes - 1; k++) {
1271c2c66affSColin Finck                     if (j != physstripe) {
1272c2c66affSColin Finck                         if (devices[j] && devices[j]->devobj) {
1273c2c66affSColin Finck                             Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + (k * Vcb->superblock.sector_size), FALSE);
1274c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
1275c2c66affSColin Finck                                 ERR("sync_read_phys returned %08x\n", Status);
1276c2c66affSColin Finck                                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1277c2c66affSColin Finck                                 num_errors++;
1278c2c66affSColin Finck                                 error_stripe = k;
1279c2c66affSColin Finck 
1280c2c66affSColin Finck                                 if (num_errors > 1) {
1281c2c66affSColin Finck                                     failed = TRUE;
1282c2c66affSColin Finck                                     break;
1283c2c66affSColin Finck                                 }
1284c2c66affSColin Finck                             }
1285c2c66affSColin Finck                         } else {
1286c2c66affSColin Finck                             num_errors++;
1287c2c66affSColin Finck                             error_stripe = k;
1288c2c66affSColin Finck 
1289c2c66affSColin Finck                             if (num_errors > 1) {
1290c2c66affSColin Finck                                 failed = TRUE;
1291c2c66affSColin Finck                                 break;
1292c2c66affSColin Finck                             }
1293c2c66affSColin Finck                         }
1294c2c66affSColin Finck                     }
1295c2c66affSColin Finck 
1296c2c66affSColin Finck                     j = (j + 1) % ci->num_stripes;
1297c2c66affSColin Finck                 }
1298c2c66affSColin Finck 
1299c2c66affSColin Finck                 if (!failed) {
1300c2c66affSColin Finck                     if (num_errors == 0) {
1301c2c66affSColin Finck                         RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1302c2c66affSColin Finck 
1303c2c66affSColin Finck                         for (j = 0; j < ci->num_stripes - 2; j++) {
1304c2c66affSColin Finck                             if (j != stripe)
1305c2c66affSColin Finck                                 do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1306c2c66affSColin Finck                         }
1307c2c66affSColin Finck 
1308c2c66affSColin Finck                         if (context->csum)
1309c2c66affSColin Finck                             crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1310c2c66affSColin Finck 
1311c2c66affSColin Finck                         if (!context->csum || crc32 == context->csum[i]) {
1312c2c66affSColin Finck                             RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1313c2c66affSColin Finck 
1314c2c66affSColin Finck                             if (devices[physstripe] && devices[physstripe]->devobj)
1315c2c66affSColin Finck                                 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1316c2c66affSColin Finck                                     devices[physstripe]->devitem.dev_id);
1317c2c66affSColin Finck 
1318c2c66affSColin Finck                             recovered = TRUE;
1319c2c66affSColin Finck 
1320c2c66affSColin Finck                             if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1321c2c66affSColin Finck                                 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1322c2c66affSColin Finck                                                          sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1323c2c66affSColin Finck                                 if (!NT_SUCCESS(Status)) {
1324c2c66affSColin Finck                                     WARN("write_data_phys returned %08x\n", Status);
1325c2c66affSColin Finck                                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1326c2c66affSColin Finck                                 }
1327c2c66affSColin Finck                             }
1328c2c66affSColin Finck                         }
1329c2c66affSColin Finck                     }
1330c2c66affSColin Finck 
1331c2c66affSColin Finck                     if (!recovered) {
1332c2c66affSColin Finck                         BOOL read_q = FALSE;
1333c2c66affSColin Finck 
1334c2c66affSColin Finck                         if (devices[parity2] && devices[parity2]->devobj) {
1335c2c66affSColin Finck                             Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off,
1336c2c66affSColin Finck                                                     Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), FALSE);
1337c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
1338c2c66affSColin Finck                                 ERR("sync_read_phys returned %08x\n", Status);
1339c2c66affSColin Finck                                 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS);
1340c2c66affSColin Finck                             } else
1341c2c66affSColin Finck                                 read_q = TRUE;
1342c2c66affSColin Finck                         }
1343c2c66affSColin Finck 
1344c2c66affSColin Finck                         if (read_q) {
1345c2c66affSColin Finck                             if (num_errors == 1) {
1346c2c66affSColin Finck                                 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1347c2c66affSColin Finck 
1348c2c66affSColin Finck                                 if (!devices[physstripe] || !devices[physstripe]->devobj)
1349c2c66affSColin Finck                                     recovered = TRUE;
1350c2c66affSColin Finck                                 else {
1351c2c66affSColin Finck                                     crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1352c2c66affSColin Finck 
1353c2c66affSColin Finck                                     if (crc32 == context->csum[i])
1354c2c66affSColin Finck                                         recovered = TRUE;
1355c2c66affSColin Finck                                 }
1356c2c66affSColin Finck                             } else {
1357c2c66affSColin Finck                                 for (j = 0; j < ci->num_stripes - 1; j++) {
1358c2c66affSColin Finck                                     if (j != stripe) {
1359c2c66affSColin Finck                                         raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1360c2c66affSColin Finck 
1361c2c66affSColin Finck                                         crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1362c2c66affSColin Finck 
1363c2c66affSColin Finck                                         if (crc32 == context->csum[i]) {
1364c2c66affSColin Finck                                             recovered = TRUE;
1365c2c66affSColin Finck                                             error_stripe = j;
1366c2c66affSColin Finck                                             break;
1367c2c66affSColin Finck                                         }
1368c2c66affSColin Finck                                     }
1369c2c66affSColin Finck                                 }
1370c2c66affSColin Finck                             }
1371c2c66affSColin Finck                         }
1372c2c66affSColin Finck 
1373c2c66affSColin Finck                         if (recovered) {
1374c2c66affSColin Finck                             UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1375c2c66affSColin Finck 
1376c2c66affSColin Finck                             if (devices[physstripe] && devices[physstripe]->devobj)
1377c2c66affSColin Finck                                 ERR("recovering from checksum error at %llx, device %llx\n",
1378c2c66affSColin Finck                                     addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id);
1379c2c66affSColin Finck 
1380c2c66affSColin Finck                             RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1381c2c66affSColin Finck 
1382c2c66affSColin Finck                             if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1383c2c66affSColin Finck                                 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1384c2c66affSColin Finck                                                          sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1385c2c66affSColin Finck                                 if (!NT_SUCCESS(Status)) {
1386c2c66affSColin Finck                                     WARN("write_data_phys returned %08x\n", Status);
1387c2c66affSColin Finck                                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1388c2c66affSColin Finck                                 }
1389c2c66affSColin Finck                             }
1390c2c66affSColin Finck 
1391c2c66affSColin Finck                             if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1392c2c66affSColin Finck                                 if (error_stripe == ci->num_stripes - 2) {
1393c2c66affSColin Finck                                     ERR("recovering from parity error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1394c2c66affSColin Finck                                         devices[error_stripe_phys]->devitem.dev_id);
1395c2c66affSColin Finck 
1396c2c66affSColin Finck                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1397c2c66affSColin Finck 
1398c2c66affSColin Finck                                     RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1399c2c66affSColin Finck 
1400c2c66affSColin Finck                                     for (j = 0; j < ci->num_stripes - 2; j++) {
1401c2c66affSColin Finck                                         if (j == stripe) {
1402c2c66affSColin Finck                                             do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size),
1403c2c66affSColin Finck                                                    Vcb->superblock.sector_size);
1404c2c66affSColin Finck                                         } else {
1405c2c66affSColin Finck                                             do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size),
1406c2c66affSColin Finck                                                    Vcb->superblock.sector_size);
1407c2c66affSColin Finck                                         }
1408c2c66affSColin Finck                                     }
1409c2c66affSColin Finck                                 } else {
1410c2c66affSColin Finck                                     ERR("recovering from checksum error at %llx, device %llx\n",
1411c2c66affSColin Finck                                         addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length),
1412c2c66affSColin Finck                                         devices[error_stripe_phys]->devitem.dev_id);
1413c2c66affSColin Finck 
1414c2c66affSColin Finck                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1415c2c66affSColin Finck 
1416c2c66affSColin Finck                                     RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size),
1417c2c66affSColin Finck                                                   sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1418c2c66affSColin Finck                                 }
1419c2c66affSColin Finck                             }
1420c2c66affSColin Finck 
1421c2c66affSColin Finck                             if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1422c2c66affSColin Finck                                 Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off,
1423c2c66affSColin Finck                                                          sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1424c2c66affSColin Finck                                 if (!NT_SUCCESS(Status)) {
1425c2c66affSColin Finck                                     WARN("write_data_phys returned %08x\n", Status);
1426c2c66affSColin Finck                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1427c2c66affSColin Finck                                 }
1428c2c66affSColin Finck                             }
1429c2c66affSColin Finck                         }
1430c2c66affSColin Finck                     }
1431c2c66affSColin Finck                 }
1432c2c66affSColin Finck 
1433c2c66affSColin Finck                 if (!recovered) {
1434c2c66affSColin Finck                     ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
1435c2c66affSColin Finck                     ExFreePool(sector);
1436c2c66affSColin Finck                     return STATUS_CRC_ERROR;
1437c2c66affSColin Finck                 }
1438c2c66affSColin Finck             }
1439c2c66affSColin Finck         }
1440c2c66affSColin Finck 
1441c2c66affSColin Finck         ExFreePool(sector);
1442c2c66affSColin Finck     }
1443c2c66affSColin Finck 
1444c2c66affSColin Finck     return STATUS_SUCCESS;
1445c2c66affSColin Finck }
1446c2c66affSColin Finck 
1447c2c66affSColin Finck NTSTATUS read_data(_In_ device_extension* Vcb, _In_ UINT64 addr, _In_ UINT32 length, _In_reads_bytes_opt_(length*sizeof(UINT32)/Vcb->superblock.sector_size) UINT32* csum,
1448c2c66affSColin Finck                    _In_ BOOL is_tree, _Out_writes_bytes_(length) UINT8* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ UINT64 generation, _In_ BOOL file_read,
1449c2c66affSColin Finck                    _In_ ULONG priority) {
1450c2c66affSColin Finck     CHUNK_ITEM* ci;
1451c2c66affSColin Finck     CHUNK_ITEM_STRIPE* cis;
1452c2c66affSColin Finck     read_data_context context;
1453c2c66affSColin Finck     UINT64 type, offset, total_reading = 0;
1454c2c66affSColin Finck     NTSTATUS Status;
1455c2c66affSColin Finck     device** devices = NULL;
1456c2c66affSColin Finck     UINT16 i, startoffstripe, allowed_missing, missing_devices = 0;
1457c2c66affSColin Finck     UINT8* dummypage = NULL;
1458c2c66affSColin Finck     PMDL dummy_mdl = NULL;
1459c2c66affSColin Finck     BOOL need_to_wait;
1460c2c66affSColin Finck     UINT64 lockaddr, locklen;
1461c2c66affSColin Finck #ifdef DEBUG_STATS
1462c2c66affSColin Finck     LARGE_INTEGER time1, time2;
1463c2c66affSColin Finck #endif
1464c2c66affSColin Finck 
1465c2c66affSColin Finck     if (Vcb->log_to_phys_loaded) {
1466c2c66affSColin Finck         if (!c) {
1467c2c66affSColin Finck             c = get_chunk_from_address(Vcb, addr);
1468c2c66affSColin Finck 
1469c2c66affSColin Finck             if (!c) {
1470c2c66affSColin Finck                 ERR("get_chunk_from_address failed\n");
1471c2c66affSColin Finck                 return STATUS_INTERNAL_ERROR;
1472c2c66affSColin Finck             }
1473c2c66affSColin Finck         }
1474c2c66affSColin Finck 
1475c2c66affSColin Finck         ci = c->chunk_item;
1476c2c66affSColin Finck         offset = c->offset;
1477c2c66affSColin Finck         devices = c->devices;
1478c2c66affSColin Finck 
1479c2c66affSColin Finck         if (pc)
1480c2c66affSColin Finck             *pc = c;
1481c2c66affSColin Finck     } else {
1482c2c66affSColin Finck         LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1483c2c66affSColin Finck 
1484c2c66affSColin Finck         ci = NULL;
1485c2c66affSColin Finck 
1486c2c66affSColin Finck         c = NULL;
1487c2c66affSColin Finck         while (le != &Vcb->sys_chunks) {
1488c2c66affSColin Finck             sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
1489c2c66affSColin Finck 
1490c2c66affSColin Finck             if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1491c2c66affSColin Finck                 CHUNK_ITEM* chunk_item = sc->data;
1492c2c66affSColin Finck 
1493c2c66affSColin Finck                 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1494c2c66affSColin Finck                     ci = chunk_item;
1495c2c66affSColin Finck                     offset = sc->key.offset;
1496c2c66affSColin Finck                     cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1497c2c66affSColin Finck 
1498c2c66affSColin Finck                     devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
1499c2c66affSColin Finck                     if (!devices) {
1500c2c66affSColin Finck                         ERR("out of memory\n");
1501c2c66affSColin Finck                         return STATUS_INSUFFICIENT_RESOURCES;
1502c2c66affSColin Finck                     }
1503c2c66affSColin Finck 
1504c2c66affSColin Finck                     for (i = 0; i < ci->num_stripes; i++) {
1505c2c66affSColin Finck                         devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1506c2c66affSColin Finck                     }
1507c2c66affSColin Finck 
1508c2c66affSColin Finck                     break;
1509c2c66affSColin Finck                 }
1510c2c66affSColin Finck             }
1511c2c66affSColin Finck 
1512c2c66affSColin Finck             le = le->Flink;
1513c2c66affSColin Finck         }
1514c2c66affSColin Finck 
1515c2c66affSColin Finck         if (!ci) {
1516c2c66affSColin Finck             ERR("could not find chunk for %llx in bootstrap\n", addr);
1517c2c66affSColin Finck             return STATUS_INTERNAL_ERROR;
1518c2c66affSColin Finck         }
1519c2c66affSColin Finck 
1520c2c66affSColin Finck         if (pc)
1521c2c66affSColin Finck             *pc = NULL;
1522c2c66affSColin Finck     }
1523c2c66affSColin Finck 
1524c2c66affSColin Finck     if (ci->type & BLOCK_FLAG_DUPLICATE) {
1525c2c66affSColin Finck         type = BLOCK_FLAG_DUPLICATE;
1526c2c66affSColin Finck         allowed_missing = ci->num_stripes - 1;
1527c2c66affSColin Finck     } else if (ci->type & BLOCK_FLAG_RAID0) {
1528c2c66affSColin Finck         type = BLOCK_FLAG_RAID0;
1529c2c66affSColin Finck         allowed_missing = 0;
1530c2c66affSColin Finck     } else if (ci->type & BLOCK_FLAG_RAID1) {
1531c2c66affSColin Finck         type = BLOCK_FLAG_DUPLICATE;
1532c2c66affSColin Finck         allowed_missing = 1;
1533c2c66affSColin Finck     } else if (ci->type & BLOCK_FLAG_RAID10) {
1534c2c66affSColin Finck         type = BLOCK_FLAG_RAID10;
1535c2c66affSColin Finck         allowed_missing = 1;
1536c2c66affSColin Finck     } else if (ci->type & BLOCK_FLAG_RAID5) {
1537c2c66affSColin Finck         type = BLOCK_FLAG_RAID5;
1538c2c66affSColin Finck         allowed_missing = 1;
1539c2c66affSColin Finck     } else if (ci->type & BLOCK_FLAG_RAID6) {
1540c2c66affSColin Finck         type = BLOCK_FLAG_RAID6;
1541c2c66affSColin Finck         allowed_missing = 2;
1542c2c66affSColin Finck     } else { // SINGLE
1543c2c66affSColin Finck         type = BLOCK_FLAG_DUPLICATE;
1544c2c66affSColin Finck         allowed_missing = 0;
1545c2c66affSColin Finck     }
1546c2c66affSColin Finck 
1547c2c66affSColin Finck     cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1548c2c66affSColin Finck 
1549c2c66affSColin Finck     RtlZeroMemory(&context, sizeof(read_data_context));
1550c2c66affSColin Finck     KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
1551c2c66affSColin Finck 
1552c2c66affSColin Finck     context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
1553c2c66affSColin Finck     if (!context.stripes) {
1554c2c66affSColin Finck         ERR("out of memory\n");
1555c2c66affSColin Finck         return STATUS_INSUFFICIENT_RESOURCES;
1556c2c66affSColin Finck     }
1557c2c66affSColin Finck 
1558c2c66affSColin Finck     if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1559c2c66affSColin Finck         get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1560c2c66affSColin Finck         chunk_lock_range(Vcb, c, lockaddr, locklen);
1561c2c66affSColin Finck     }
1562c2c66affSColin Finck 
1563c2c66affSColin Finck     RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1564c2c66affSColin Finck 
1565c2c66affSColin Finck     context.buflen = length;
1566c2c66affSColin Finck     context.num_stripes = ci->num_stripes;
1567c2c66affSColin Finck     context.stripes_left = context.num_stripes;
1568c2c66affSColin Finck     context.sector_size = Vcb->superblock.sector_size;
1569c2c66affSColin Finck     context.csum = csum;
1570c2c66affSColin Finck     context.tree = is_tree;
1571c2c66affSColin Finck     context.type = type;
1572c2c66affSColin Finck 
1573c2c66affSColin Finck     if (type == BLOCK_FLAG_RAID0) {
1574c2c66affSColin Finck         UINT64 startoff, endoff;
1575c2c66affSColin Finck         UINT16 endoffstripe, stripe;
1576c2c66affSColin Finck         UINT32 *stripeoff, pos;
1577c2c66affSColin Finck         PMDL master_mdl;
1578c2c66affSColin Finck         PFN_NUMBER* pfns;
1579c2c66affSColin Finck 
1580c2c66affSColin Finck         // FIXME - test this still works if page size isn't the same as sector size
1581c2c66affSColin Finck 
1582c2c66affSColin Finck         // This relies on the fact that MDLs are followed in memory by the page file numbers,
1583c2c66affSColin Finck         // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1584c2c66affSColin Finck         // data for you without doing a memcpy yourself.
1585c2c66affSColin Finck         // MDLs are officially opaque, so this might very well break in future versions of Windows.
1586c2c66affSColin Finck 
1587c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1588c2c66affSColin Finck         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1589c2c66affSColin Finck 
1590c2c66affSColin Finck         if (file_read) {
1591c2c66affSColin Finck             // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1592c2c66affSColin Finck             // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1593c2c66affSColin Finck             // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1594c2c66affSColin Finck 
1595c2c66affSColin Finck             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1596c2c66affSColin Finck 
1597c2c66affSColin Finck             if (!context.va) {
1598c2c66affSColin Finck                 ERR("out of memory\n");
1599c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
1600c2c66affSColin Finck                 goto exit;
1601c2c66affSColin Finck             }
1602c2c66affSColin Finck         } else
1603c2c66affSColin Finck             context.va = buf;
1604c2c66affSColin Finck 
1605c2c66affSColin Finck         master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1606c2c66affSColin Finck         if (!master_mdl) {
1607c2c66affSColin Finck             ERR("out of memory\n");
1608c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
1609c2c66affSColin Finck             goto exit;
1610c2c66affSColin Finck         }
1611c2c66affSColin Finck 
1612c2c66affSColin Finck         Status = STATUS_SUCCESS;
1613c2c66affSColin Finck 
1614c2c66affSColin Finck         _SEH2_TRY {
1615c2c66affSColin Finck             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1616c2c66affSColin Finck         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1617c2c66affSColin Finck             Status = _SEH2_GetExceptionCode();
1618c2c66affSColin Finck         } _SEH2_END;
1619c2c66affSColin Finck 
1620c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
1621c2c66affSColin Finck             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1622c2c66affSColin Finck             IoFreeMdl(master_mdl);
1623c2c66affSColin Finck             goto exit;
1624c2c66affSColin Finck         }
1625c2c66affSColin Finck 
1626c2c66affSColin Finck         pfns = (PFN_NUMBER*)(master_mdl + 1);
1627c2c66affSColin Finck 
1628c2c66affSColin Finck         for (i = 0; i < ci->num_stripes; i++) {
1629c2c66affSColin Finck             if (startoffstripe > i)
1630c2c66affSColin Finck                 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1631c2c66affSColin Finck             else if (startoffstripe == i)
1632c2c66affSColin Finck                 context.stripes[i].stripestart = startoff;
1633c2c66affSColin Finck             else
1634c2c66affSColin Finck                 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1635c2c66affSColin Finck 
1636c2c66affSColin Finck             if (endoffstripe > i)
1637c2c66affSColin Finck                 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1638c2c66affSColin Finck             else if (endoffstripe == i)
1639c2c66affSColin Finck                 context.stripes[i].stripeend = endoff + 1;
1640c2c66affSColin Finck             else
1641c2c66affSColin Finck                 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1642c2c66affSColin Finck 
1643c2c66affSColin Finck             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1644c2c66affSColin Finck                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL);
1645c2c66affSColin Finck 
1646c2c66affSColin Finck                 if (!context.stripes[i].mdl) {
1647c2c66affSColin Finck                     ERR("IoAllocateMdl failed\n");
1648*eb7fbc25SPierre Schweitzer                     MmUnlockPages(master_mdl);
1649*eb7fbc25SPierre Schweitzer                     IoFreeMdl(master_mdl);
1650c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
1651c2c66affSColin Finck                     goto exit;
1652c2c66affSColin Finck                 }
1653c2c66affSColin Finck             }
1654c2c66affSColin Finck         }
1655c2c66affSColin Finck 
1656c2c66affSColin Finck         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
1657c2c66affSColin Finck         if (!stripeoff) {
1658c2c66affSColin Finck             ERR("out of memory\n");
1659*eb7fbc25SPierre Schweitzer             MmUnlockPages(master_mdl);
1660*eb7fbc25SPierre Schweitzer             IoFreeMdl(master_mdl);
1661c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
1662c2c66affSColin Finck             goto exit;
1663c2c66affSColin Finck         }
1664c2c66affSColin Finck 
1665c2c66affSColin Finck         RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
1666c2c66affSColin Finck 
1667c2c66affSColin Finck         pos = 0;
1668c2c66affSColin Finck         stripe = startoffstripe;
1669c2c66affSColin Finck         while (pos < length) {
1670c2c66affSColin Finck             PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1671c2c66affSColin Finck 
1672c2c66affSColin Finck             if (pos == 0) {
1673c2c66affSColin Finck                 UINT32 readlen = (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1674c2c66affSColin Finck 
1675c2c66affSColin Finck                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1676c2c66affSColin Finck 
1677c2c66affSColin Finck                 stripeoff[stripe] += readlen;
1678c2c66affSColin Finck                 pos += readlen;
1679c2c66affSColin Finck             } else if (length - pos < ci->stripe_length) {
1680c2c66affSColin Finck                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1681c2c66affSColin Finck 
1682c2c66affSColin Finck                 pos = length;
1683c2c66affSColin Finck             } else {
1684c2c66affSColin Finck                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1685c2c66affSColin Finck 
1686c2c66affSColin Finck                 stripeoff[stripe] += (UINT32)ci->stripe_length;
1687c2c66affSColin Finck                 pos += (UINT32)ci->stripe_length;
1688c2c66affSColin Finck             }
1689c2c66affSColin Finck 
1690c2c66affSColin Finck             stripe = (stripe + 1) % ci->num_stripes;
1691c2c66affSColin Finck         }
1692c2c66affSColin Finck 
1693c2c66affSColin Finck         MmUnlockPages(master_mdl);
1694c2c66affSColin Finck         IoFreeMdl(master_mdl);
1695c2c66affSColin Finck 
1696c2c66affSColin Finck         ExFreePool(stripeoff);
1697c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID10) {
1698c2c66affSColin Finck         UINT64 startoff, endoff;
1699c2c66affSColin Finck         UINT16 endoffstripe, j, stripe;
1700c2c66affSColin Finck         ULONG orig_ls;
1701c2c66affSColin Finck         PMDL master_mdl;
1702c2c66affSColin Finck         PFN_NUMBER* pfns;
1703c2c66affSColin Finck         UINT32* stripeoff, pos;
1704c2c66affSColin Finck         read_data_stripe** stripes;
1705c2c66affSColin Finck 
1706c2c66affSColin Finck         if (c)
1707c2c66affSColin Finck             orig_ls = c->last_stripe;
1708c2c66affSColin Finck         else
1709c2c66affSColin Finck             orig_ls = 0;
1710c2c66affSColin Finck 
1711c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1712c2c66affSColin Finck         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1713c2c66affSColin Finck 
1714c2c66affSColin Finck         if ((ci->num_stripes % ci->sub_stripes) != 0) {
1715c2c66affSColin Finck             ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1716c2c66affSColin Finck             Status = STATUS_INTERNAL_ERROR;
1717c2c66affSColin Finck             goto exit;
1718c2c66affSColin Finck         }
1719c2c66affSColin Finck 
1720c2c66affSColin Finck         if (file_read) {
1721c2c66affSColin Finck             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1722c2c66affSColin Finck 
1723c2c66affSColin Finck             if (!context.va) {
1724c2c66affSColin Finck                 ERR("out of memory\n");
1725c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
1726c2c66affSColin Finck                 goto exit;
1727c2c66affSColin Finck             }
1728c2c66affSColin Finck         } else
1729c2c66affSColin Finck             context.va = buf;
1730c2c66affSColin Finck 
1731c2c66affSColin Finck         context.firstoff = (UINT16)((startoff % ci->stripe_length) / Vcb->superblock.sector_size);
1732c2c66affSColin Finck         context.startoffstripe = startoffstripe;
1733c2c66affSColin Finck         context.sectors_per_stripe = (UINT16)(ci->stripe_length / Vcb->superblock.sector_size);
1734c2c66affSColin Finck 
1735c2c66affSColin Finck         startoffstripe *= ci->sub_stripes;
1736c2c66affSColin Finck         endoffstripe *= ci->sub_stripes;
1737c2c66affSColin Finck 
1738c2c66affSColin Finck         if (c)
1739c2c66affSColin Finck             c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1740c2c66affSColin Finck 
1741c2c66affSColin Finck         master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1742c2c66affSColin Finck         if (!master_mdl) {
1743c2c66affSColin Finck             ERR("out of memory\n");
1744c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
1745c2c66affSColin Finck             goto exit;
1746c2c66affSColin Finck         }
1747c2c66affSColin Finck 
1748c2c66affSColin Finck         Status = STATUS_SUCCESS;
1749c2c66affSColin Finck 
1750c2c66affSColin Finck         _SEH2_TRY {
1751c2c66affSColin Finck             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1752c2c66affSColin Finck         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1753c2c66affSColin Finck             Status = _SEH2_GetExceptionCode();
1754c2c66affSColin Finck         } _SEH2_END;
1755c2c66affSColin Finck 
1756c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
1757c2c66affSColin Finck             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1758c2c66affSColin Finck             IoFreeMdl(master_mdl);
1759c2c66affSColin Finck             goto exit;
1760c2c66affSColin Finck         }
1761c2c66affSColin Finck 
1762c2c66affSColin Finck         pfns = (PFN_NUMBER*)(master_mdl + 1);
1763c2c66affSColin Finck 
1764c2c66affSColin Finck         stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1765c2c66affSColin Finck         if (!stripes) {
1766c2c66affSColin Finck             ERR("out of memory\n");
1767*eb7fbc25SPierre Schweitzer             MmUnlockPages(master_mdl);
1768*eb7fbc25SPierre Schweitzer             IoFreeMdl(master_mdl);
1769c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
1770c2c66affSColin Finck             goto exit;
1771c2c66affSColin Finck         }
1772c2c66affSColin Finck 
1773c2c66affSColin Finck         RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1774c2c66affSColin Finck 
1775c2c66affSColin Finck         for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1776c2c66affSColin Finck             UINT64 sstart, send;
1777c2c66affSColin Finck             BOOL stripeset = FALSE;
1778c2c66affSColin Finck 
1779c2c66affSColin Finck             if (startoffstripe > i)
1780c2c66affSColin Finck                 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1781c2c66affSColin Finck             else if (startoffstripe == i)
1782c2c66affSColin Finck                 sstart = startoff;
1783c2c66affSColin Finck             else
1784c2c66affSColin Finck                 sstart = startoff - (startoff % ci->stripe_length);
1785c2c66affSColin Finck 
1786c2c66affSColin Finck             if (endoffstripe > i)
1787c2c66affSColin Finck                 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1788c2c66affSColin Finck             else if (endoffstripe == i)
1789c2c66affSColin Finck                 send = endoff + 1;
1790c2c66affSColin Finck             else
1791c2c66affSColin Finck                 send = endoff - (endoff % ci->stripe_length);
1792c2c66affSColin Finck 
1793c2c66affSColin Finck             for (j = 0; j < ci->sub_stripes; j++) {
1794c2c66affSColin Finck                 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1795c2c66affSColin Finck                     context.stripes[i+j].stripestart = sstart;
1796c2c66affSColin Finck                     context.stripes[i+j].stripeend = send;
1797c2c66affSColin Finck                     stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1798c2c66affSColin Finck 
1799c2c66affSColin Finck                     if (sstart != send) {
1800c2c66affSColin Finck                         context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL);
1801c2c66affSColin Finck 
1802c2c66affSColin Finck                         if (!context.stripes[i+j].mdl) {
1803c2c66affSColin Finck                             ERR("IoAllocateMdl failed\n");
1804*eb7fbc25SPierre Schweitzer                             MmUnlockPages(master_mdl);
1805*eb7fbc25SPierre Schweitzer                             IoFreeMdl(master_mdl);
1806c2c66affSColin Finck                             Status = STATUS_INSUFFICIENT_RESOURCES;
1807c2c66affSColin Finck                             goto exit;
1808c2c66affSColin Finck                         }
1809c2c66affSColin Finck                     }
1810c2c66affSColin Finck 
1811c2c66affSColin Finck                     stripeset = TRUE;
1812c2c66affSColin Finck                 } else
1813c2c66affSColin Finck                     context.stripes[i+j].status = ReadDataStatus_Skip;
1814c2c66affSColin Finck             }
1815c2c66affSColin Finck 
1816c2c66affSColin Finck             if (!stripeset) {
1817c2c66affSColin Finck                 for (j = 0; j < ci->sub_stripes; j++) {
1818c2c66affSColin Finck                     if (devices[i+j] && devices[i+j]->devobj) {
1819c2c66affSColin Finck                         context.stripes[i+j].stripestart = sstart;
1820c2c66affSColin Finck                         context.stripes[i+j].stripeend = send;
1821c2c66affSColin Finck                         context.stripes[i+j].status = ReadDataStatus_Pending;
1822c2c66affSColin Finck                         stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1823c2c66affSColin Finck 
1824c2c66affSColin Finck                         if (sstart != send) {
1825c2c66affSColin Finck                             context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL);
1826c2c66affSColin Finck 
1827c2c66affSColin Finck                             if (!context.stripes[i+j].mdl) {
1828c2c66affSColin Finck                                 ERR("IoAllocateMdl failed\n");
1829*eb7fbc25SPierre Schweitzer                                 MmUnlockPages(master_mdl);
1830*eb7fbc25SPierre Schweitzer                                 IoFreeMdl(master_mdl);
1831c2c66affSColin Finck                                 Status = STATUS_INSUFFICIENT_RESOURCES;
1832c2c66affSColin Finck                                 goto exit;
1833c2c66affSColin Finck                             }
1834c2c66affSColin Finck                         }
1835c2c66affSColin Finck 
1836c2c66affSColin Finck                         stripeset = TRUE;
1837c2c66affSColin Finck                         break;
1838c2c66affSColin Finck                     }
1839c2c66affSColin Finck                 }
1840c2c66affSColin Finck 
1841c2c66affSColin Finck                 if (!stripeset) {
1842c2c66affSColin Finck                     ERR("could not find stripe to read\n");
1843c2c66affSColin Finck                     Status = STATUS_DEVICE_NOT_READY;
1844c2c66affSColin Finck                     goto exit;
1845c2c66affSColin Finck                 }
1846c2c66affSColin Finck             }
1847c2c66affSColin Finck         }
1848c2c66affSColin Finck 
1849c2c66affSColin Finck         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1850c2c66affSColin Finck         if (!stripeoff) {
1851c2c66affSColin Finck             ERR("out of memory\n");
1852*eb7fbc25SPierre Schweitzer             MmUnlockPages(master_mdl);
1853*eb7fbc25SPierre Schweitzer             IoFreeMdl(master_mdl);
1854c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
1855c2c66affSColin Finck             goto exit;
1856c2c66affSColin Finck         }
1857c2c66affSColin Finck 
1858c2c66affSColin Finck         RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes);
1859c2c66affSColin Finck 
1860c2c66affSColin Finck         pos = 0;
1861c2c66affSColin Finck         stripe = startoffstripe / ci->sub_stripes;
1862c2c66affSColin Finck         while (pos < length) {
1863c2c66affSColin Finck             PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1864c2c66affSColin Finck 
1865c2c66affSColin Finck             if (pos == 0) {
1866c2c66affSColin Finck                 UINT32 readlen = (UINT32)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1867c2c66affSColin Finck                                              ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1868c2c66affSColin Finck 
1869c2c66affSColin Finck                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1870c2c66affSColin Finck 
1871c2c66affSColin Finck                 stripeoff[stripe] += readlen;
1872c2c66affSColin Finck                 pos += readlen;
1873c2c66affSColin Finck             } else if (length - pos < ci->stripe_length) {
1874c2c66affSColin Finck                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1875c2c66affSColin Finck 
1876c2c66affSColin Finck                 pos = length;
1877c2c66affSColin Finck             } else {
1878c2c66affSColin Finck                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1879c2c66affSColin Finck 
1880c2c66affSColin Finck                 stripeoff[stripe] += (ULONG)ci->stripe_length;
1881c2c66affSColin Finck                 pos += (ULONG)ci->stripe_length;
1882c2c66affSColin Finck             }
1883c2c66affSColin Finck 
1884c2c66affSColin Finck             stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1885c2c66affSColin Finck         }
1886c2c66affSColin Finck 
1887c2c66affSColin Finck         MmUnlockPages(master_mdl);
1888c2c66affSColin Finck         IoFreeMdl(master_mdl);
1889c2c66affSColin Finck 
1890c2c66affSColin Finck         ExFreePool(stripeoff);
1891c2c66affSColin Finck         ExFreePool(stripes);
1892c2c66affSColin Finck     } else if (type == BLOCK_FLAG_DUPLICATE) {
1893c2c66affSColin Finck         UINT64 orig_ls;
1894c2c66affSColin Finck 
1895c2c66affSColin Finck         if (c)
1896c2c66affSColin Finck             orig_ls = i = c->last_stripe;
1897c2c66affSColin Finck         else
1898c2c66affSColin Finck             orig_ls = i = 0;
1899c2c66affSColin Finck 
1900c2c66affSColin Finck         while (!devices[i] || !devices[i]->devobj) {
1901c2c66affSColin Finck             i = (i + 1) % ci->num_stripes;
1902c2c66affSColin Finck 
1903c2c66affSColin Finck             if (i == orig_ls) {
1904c2c66affSColin Finck                 ERR("no devices available to service request\n");
1905c2c66affSColin Finck                 Status = STATUS_DEVICE_NOT_READY;
1906c2c66affSColin Finck                 goto exit;
1907c2c66affSColin Finck             }
1908c2c66affSColin Finck         }
1909c2c66affSColin Finck 
1910c2c66affSColin Finck         if (c)
1911c2c66affSColin Finck             c->last_stripe = (i + 1) % ci->num_stripes;
1912c2c66affSColin Finck 
1913c2c66affSColin Finck         context.stripes[i].stripestart = addr - offset;
1914c2c66affSColin Finck         context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1915c2c66affSColin Finck 
1916c2c66affSColin Finck         if (file_read) {
1917c2c66affSColin Finck             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1918c2c66affSColin Finck 
1919c2c66affSColin Finck             if (!context.va) {
1920c2c66affSColin Finck                 ERR("out of memory\n");
1921c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
1922c2c66affSColin Finck                 goto exit;
1923c2c66affSColin Finck             }
1924c2c66affSColin Finck 
1925c2c66affSColin Finck             context.stripes[i].mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1926c2c66affSColin Finck             if (!context.stripes[i].mdl) {
1927c2c66affSColin Finck                 ERR("IoAllocateMdl failed\n");
1928c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
1929c2c66affSColin Finck                 goto exit;
1930c2c66affSColin Finck             }
1931c2c66affSColin Finck 
1932c2c66affSColin Finck             MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
1933c2c66affSColin Finck         } else {
1934c2c66affSColin Finck             context.stripes[i].mdl = IoAllocateMdl(buf, length, FALSE, FALSE, NULL);
1935c2c66affSColin Finck 
1936c2c66affSColin Finck             if (!context.stripes[i].mdl) {
1937c2c66affSColin Finck                 ERR("IoAllocateMdl failed\n");
1938c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
1939c2c66affSColin Finck                 goto exit;
1940c2c66affSColin Finck             }
1941c2c66affSColin Finck 
1942c2c66affSColin Finck             Status = STATUS_SUCCESS;
1943c2c66affSColin Finck 
1944c2c66affSColin Finck             _SEH2_TRY {
1945c2c66affSColin Finck                 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess);
1946c2c66affSColin Finck             } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1947c2c66affSColin Finck                 Status = _SEH2_GetExceptionCode();
1948c2c66affSColin Finck             } _SEH2_END;
1949c2c66affSColin Finck 
1950c2c66affSColin Finck             if (!NT_SUCCESS(Status)) {
1951c2c66affSColin Finck                 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1952c2c66affSColin Finck                 goto exit;
1953c2c66affSColin Finck             }
1954c2c66affSColin Finck         }
1955c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID5) {
1956c2c66affSColin Finck         UINT64 startoff, endoff;
1957c2c66affSColin Finck         UINT16 endoffstripe, parity;
1958c2c66affSColin Finck         UINT32 *stripeoff, pos;
1959c2c66affSColin Finck         PMDL master_mdl;
1960c2c66affSColin Finck         PFN_NUMBER *pfns, dummy;
1961c2c66affSColin Finck         BOOL need_dummy = FALSE;
1962c2c66affSColin Finck 
1963c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
1964c2c66affSColin Finck         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
1965c2c66affSColin Finck 
1966c2c66affSColin Finck         if (file_read) {
1967c2c66affSColin Finck             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1968c2c66affSColin Finck 
1969c2c66affSColin Finck             if (!context.va) {
1970c2c66affSColin Finck                 ERR("out of memory\n");
1971c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
1972c2c66affSColin Finck                 goto exit;
1973c2c66affSColin Finck             }
1974c2c66affSColin Finck         } else
1975c2c66affSColin Finck             context.va = buf;
1976c2c66affSColin Finck 
1977c2c66affSColin Finck         master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1978c2c66affSColin Finck         if (!master_mdl) {
1979c2c66affSColin Finck             ERR("out of memory\n");
1980c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
1981c2c66affSColin Finck             goto exit;
1982c2c66affSColin Finck         }
1983c2c66affSColin Finck 
1984c2c66affSColin Finck         Status = STATUS_SUCCESS;
1985c2c66affSColin Finck 
1986c2c66affSColin Finck         _SEH2_TRY {
1987c2c66affSColin Finck             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1988c2c66affSColin Finck         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1989c2c66affSColin Finck             Status = _SEH2_GetExceptionCode();
1990c2c66affSColin Finck         } _SEH2_END;
1991c2c66affSColin Finck 
1992c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
1993c2c66affSColin Finck             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1994c2c66affSColin Finck             IoFreeMdl(master_mdl);
1995c2c66affSColin Finck             goto exit;
1996c2c66affSColin Finck         }
1997c2c66affSColin Finck 
1998c2c66affSColin Finck         pfns = (PFN_NUMBER*)(master_mdl + 1);
1999c2c66affSColin Finck 
2000c2c66affSColin Finck         pos = 0;
2001c2c66affSColin Finck         while (pos < length) {
2002c2c66affSColin Finck             parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2003c2c66affSColin Finck 
2004c2c66affSColin Finck             if (pos == 0) {
2005c2c66affSColin Finck                 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2006c2c66affSColin Finck                 ULONG skip, readlen;
2007c2c66affSColin Finck 
2008c2c66affSColin Finck                 i = startoffstripe;
2009c2c66affSColin Finck                 while (stripe != parity) {
2010c2c66affSColin Finck                     if (i == startoffstripe) {
2011c2c66affSColin Finck                         readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2012c2c66affSColin Finck 
2013c2c66affSColin Finck                         context.stripes[stripe].stripestart = startoff;
2014c2c66affSColin Finck                         context.stripes[stripe].stripeend = startoff + readlen;
2015c2c66affSColin Finck 
2016c2c66affSColin Finck                         pos += readlen;
2017c2c66affSColin Finck 
2018c2c66affSColin Finck                         if (pos == length)
2019c2c66affSColin Finck                             break;
2020c2c66affSColin Finck                     } else {
2021c2c66affSColin Finck                         readlen = min(length - pos, (ULONG)ci->stripe_length);
2022c2c66affSColin Finck 
2023c2c66affSColin Finck                         context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2024c2c66affSColin Finck                         context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2025c2c66affSColin Finck 
2026c2c66affSColin Finck                         pos += readlen;
2027c2c66affSColin Finck 
2028c2c66affSColin Finck                         if (pos == length)
2029c2c66affSColin Finck                             break;
2030c2c66affSColin Finck                     }
2031c2c66affSColin Finck 
2032c2c66affSColin Finck                     i++;
2033c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2034c2c66affSColin Finck                 }
2035c2c66affSColin Finck 
2036c2c66affSColin Finck                 if (pos == length)
2037c2c66affSColin Finck                     break;
2038c2c66affSColin Finck 
2039c2c66affSColin Finck                 for (i = 0; i < startoffstripe; i++) {
2040c2c66affSColin Finck                     UINT16 stripe2 = (parity + i + 1) % ci->num_stripes;
2041c2c66affSColin Finck 
2042c2c66affSColin Finck                     context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2043c2c66affSColin Finck                 }
2044c2c66affSColin Finck 
2045c2c66affSColin Finck                 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2046c2c66affSColin Finck 
2047c2c66affSColin Finck                 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2048c2c66affSColin Finck                     skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2049c2c66affSColin Finck 
2050c2c66affSColin Finck                     for (i = 0; i < ci->num_stripes; i++) {
2051c2c66affSColin Finck                         context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2052c2c66affSColin Finck                     }
2053c2c66affSColin Finck 
2054c2c66affSColin Finck                     pos += (UINT32)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2055c2c66affSColin Finck                     need_dummy = TRUE;
2056c2c66affSColin Finck                 }
2057c2c66affSColin Finck             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2058c2c66affSColin Finck                 for (i = 0; i < ci->num_stripes; i++) {
2059c2c66affSColin Finck                     context.stripes[i].stripeend += ci->stripe_length;
2060c2c66affSColin Finck                 }
2061c2c66affSColin Finck 
2062c2c66affSColin Finck                 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 1));
2063c2c66affSColin Finck                 need_dummy = TRUE;
2064c2c66affSColin Finck             } else {
2065c2c66affSColin Finck                 UINT16 stripe = (parity + 1) % ci->num_stripes;
2066c2c66affSColin Finck 
2067c2c66affSColin Finck                 i = 0;
2068c2c66affSColin Finck                 while (stripe != parity) {
2069c2c66affSColin Finck                     if (endoffstripe == i) {
2070c2c66affSColin Finck                         context.stripes[stripe].stripeend = endoff + 1;
2071c2c66affSColin Finck                         break;
2072c2c66affSColin Finck                     } else if (endoffstripe > i)
2073c2c66affSColin Finck                         context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2074c2c66affSColin Finck 
2075c2c66affSColin Finck                     i++;
2076c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2077c2c66affSColin Finck                 }
2078c2c66affSColin Finck 
2079c2c66affSColin Finck                 break;
2080c2c66affSColin Finck             }
2081c2c66affSColin Finck         }
2082c2c66affSColin Finck 
2083c2c66affSColin Finck         for (i = 0; i < ci->num_stripes; i++) {
2084c2c66affSColin Finck             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2085c2c66affSColin Finck                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2086c2c66affSColin Finck                                                        FALSE, FALSE, NULL);
2087c2c66affSColin Finck 
2088c2c66affSColin Finck                 if (!context.stripes[i].mdl) {
2089c2c66affSColin Finck                     ERR("IoAllocateMdl failed\n");
2090*eb7fbc25SPierre Schweitzer                     MmUnlockPages(master_mdl);
2091*eb7fbc25SPierre Schweitzer                     IoFreeMdl(master_mdl);
2092c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
2093c2c66affSColin Finck                     goto exit;
2094c2c66affSColin Finck                 }
2095c2c66affSColin Finck             }
2096c2c66affSColin Finck         }
2097c2c66affSColin Finck 
2098c2c66affSColin Finck         if (need_dummy) {
2099c2c66affSColin Finck             dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2100c2c66affSColin Finck             if (!dummypage) {
2101c2c66affSColin Finck                 ERR("out of memory\n");
2102*eb7fbc25SPierre Schweitzer                 MmUnlockPages(master_mdl);
2103*eb7fbc25SPierre Schweitzer                 IoFreeMdl(master_mdl);
2104c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
2105c2c66affSColin Finck                 goto exit;
2106c2c66affSColin Finck             }
2107c2c66affSColin Finck 
2108c2c66affSColin Finck             dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL);
2109c2c66affSColin Finck             if (!dummy_mdl) {
2110c2c66affSColin Finck                 ERR("IoAllocateMdl failed\n");
2111*eb7fbc25SPierre Schweitzer                 MmUnlockPages(master_mdl);
2112*eb7fbc25SPierre Schweitzer                 IoFreeMdl(master_mdl);
2113c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
2114c2c66affSColin Finck                 goto exit;
2115c2c66affSColin Finck             }
2116c2c66affSColin Finck 
2117c2c66affSColin Finck             MmBuildMdlForNonPagedPool(dummy_mdl);
2118c2c66affSColin Finck 
2119c2c66affSColin Finck             dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2120c2c66affSColin Finck         }
2121c2c66affSColin Finck 
2122c2c66affSColin Finck         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
2123c2c66affSColin Finck         if (!stripeoff) {
2124c2c66affSColin Finck             ERR("out of memory\n");
2125*eb7fbc25SPierre Schweitzer             MmUnlockPages(master_mdl);
2126*eb7fbc25SPierre Schweitzer             IoFreeMdl(master_mdl);
2127c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
2128c2c66affSColin Finck             goto exit;
2129c2c66affSColin Finck         }
2130c2c66affSColin Finck 
2131c2c66affSColin Finck         RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
2132c2c66affSColin Finck 
2133c2c66affSColin Finck         pos = 0;
2134c2c66affSColin Finck 
2135c2c66affSColin Finck         while (pos < length) {
2136c2c66affSColin Finck             PFN_NUMBER* stripe_pfns;
2137c2c66affSColin Finck 
2138c2c66affSColin Finck             parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2139c2c66affSColin Finck 
2140c2c66affSColin Finck             if (pos == 0) {
2141c2c66affSColin Finck                 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2142c2c66affSColin Finck                 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2143c2c66affSColin Finck                                                        ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2144c2c66affSColin Finck 
2145c2c66affSColin Finck                 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2146c2c66affSColin Finck 
2147c2c66affSColin Finck                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2148c2c66affSColin Finck 
2149c2c66affSColin Finck                 stripeoff[stripe] = readlen;
2150c2c66affSColin Finck                 pos += readlen;
2151c2c66affSColin Finck 
2152c2c66affSColin Finck                 stripe = (stripe + 1) % ci->num_stripes;
2153c2c66affSColin Finck 
2154c2c66affSColin Finck                 while (stripe != parity) {
2155c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2156c2c66affSColin Finck                     readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2157c2c66affSColin Finck 
2158c2c66affSColin Finck                     if (readlen == 0)
2159c2c66affSColin Finck                         break;
2160c2c66affSColin Finck 
2161c2c66affSColin Finck                     RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2162c2c66affSColin Finck 
2163c2c66affSColin Finck                     stripeoff[stripe] = readlen;
2164c2c66affSColin Finck                     pos += readlen;
2165c2c66affSColin Finck 
2166c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2167c2c66affSColin Finck                 }
2168c2c66affSColin Finck             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2169c2c66affSColin Finck                 UINT16 stripe = (parity + 1) % ci->num_stripes;
2170c2c66affSColin Finck                 ULONG k;
2171c2c66affSColin Finck 
2172c2c66affSColin Finck                 while (stripe != parity) {
2173c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2174c2c66affSColin Finck 
2175c2c66affSColin Finck                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2176c2c66affSColin Finck 
2177c2c66affSColin Finck                     stripeoff[stripe] += (UINT32)ci->stripe_length;
2178c2c66affSColin Finck                     pos += (UINT32)ci->stripe_length;
2179c2c66affSColin Finck 
2180c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2181c2c66affSColin Finck                 }
2182c2c66affSColin Finck 
2183c2c66affSColin Finck                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2184c2c66affSColin Finck 
2185c2c66affSColin Finck                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2186c2c66affSColin Finck                     stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2187c2c66affSColin Finck                     stripeoff[parity] += PAGE_SIZE;
2188c2c66affSColin Finck                 }
2189c2c66affSColin Finck             } else {
2190c2c66affSColin Finck                 UINT16 stripe = (parity + 1) % ci->num_stripes;
2191c2c66affSColin Finck                 UINT32 readlen;
2192c2c66affSColin Finck 
2193c2c66affSColin Finck                 while (pos < length) {
2194c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2195c2c66affSColin Finck                     readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2196c2c66affSColin Finck 
2197c2c66affSColin Finck                     if (readlen == 0)
2198c2c66affSColin Finck                         break;
2199c2c66affSColin Finck 
2200c2c66affSColin Finck                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2201c2c66affSColin Finck 
2202c2c66affSColin Finck                     stripeoff[stripe] += readlen;
2203c2c66affSColin Finck                     pos += readlen;
2204c2c66affSColin Finck 
2205c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2206c2c66affSColin Finck                 }
2207c2c66affSColin Finck             }
2208c2c66affSColin Finck         }
2209c2c66affSColin Finck 
2210c2c66affSColin Finck         MmUnlockPages(master_mdl);
2211c2c66affSColin Finck         IoFreeMdl(master_mdl);
2212c2c66affSColin Finck 
2213c2c66affSColin Finck         ExFreePool(stripeoff);
2214c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID6) {
2215c2c66affSColin Finck         UINT64 startoff, endoff;
2216c2c66affSColin Finck         UINT16 endoffstripe, parity1;
2217c2c66affSColin Finck         UINT32 *stripeoff, pos;
2218c2c66affSColin Finck         PMDL master_mdl;
2219c2c66affSColin Finck         PFN_NUMBER *pfns, dummy;
2220c2c66affSColin Finck         BOOL need_dummy = FALSE;
2221c2c66affSColin Finck 
2222c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2223c2c66affSColin Finck         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2224c2c66affSColin Finck 
2225c2c66affSColin Finck         if (file_read) {
2226c2c66affSColin Finck             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2227c2c66affSColin Finck 
2228c2c66affSColin Finck             if (!context.va) {
2229c2c66affSColin Finck                 ERR("out of memory\n");
2230c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
2231c2c66affSColin Finck                 goto exit;
2232c2c66affSColin Finck             }
2233c2c66affSColin Finck         } else
2234c2c66affSColin Finck             context.va = buf;
2235c2c66affSColin Finck 
2236c2c66affSColin Finck         master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
2237c2c66affSColin Finck         if (!master_mdl) {
2238c2c66affSColin Finck             ERR("out of memory\n");
2239c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
2240c2c66affSColin Finck             goto exit;
2241c2c66affSColin Finck         }
2242c2c66affSColin Finck 
2243c2c66affSColin Finck         Status = STATUS_SUCCESS;
2244c2c66affSColin Finck 
2245c2c66affSColin Finck         _SEH2_TRY {
2246c2c66affSColin Finck             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
2247c2c66affSColin Finck         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2248c2c66affSColin Finck             Status = _SEH2_GetExceptionCode();
2249c2c66affSColin Finck         } _SEH2_END;
2250c2c66affSColin Finck 
2251c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2252c2c66affSColin Finck             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2253c2c66affSColin Finck             IoFreeMdl(master_mdl);
2254c2c66affSColin Finck             goto exit;
2255c2c66affSColin Finck         }
2256c2c66affSColin Finck 
2257c2c66affSColin Finck         pfns = (PFN_NUMBER*)(master_mdl + 1);
2258c2c66affSColin Finck 
2259c2c66affSColin Finck         pos = 0;
2260c2c66affSColin Finck         while (pos < length) {
2261c2c66affSColin Finck             parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2262c2c66affSColin Finck 
2263c2c66affSColin Finck             if (pos == 0) {
2264c2c66affSColin Finck                 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2265c2c66affSColin Finck                 ULONG skip, readlen;
2266c2c66affSColin Finck 
2267c2c66affSColin Finck                 i = startoffstripe;
2268c2c66affSColin Finck                 while (stripe != parity1) {
2269c2c66affSColin Finck                     if (i == startoffstripe) {
2270c2c66affSColin Finck                         readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2271c2c66affSColin Finck 
2272c2c66affSColin Finck                         context.stripes[stripe].stripestart = startoff;
2273c2c66affSColin Finck                         context.stripes[stripe].stripeend = startoff + readlen;
2274c2c66affSColin Finck 
2275c2c66affSColin Finck                         pos += readlen;
2276c2c66affSColin Finck 
2277c2c66affSColin Finck                         if (pos == length)
2278c2c66affSColin Finck                             break;
2279c2c66affSColin Finck                     } else {
2280c2c66affSColin Finck                         readlen = min(length - pos, (ULONG)ci->stripe_length);
2281c2c66affSColin Finck 
2282c2c66affSColin Finck                         context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2283c2c66affSColin Finck                         context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2284c2c66affSColin Finck 
2285c2c66affSColin Finck                         pos += readlen;
2286c2c66affSColin Finck 
2287c2c66affSColin Finck                         if (pos == length)
2288c2c66affSColin Finck                             break;
2289c2c66affSColin Finck                     }
2290c2c66affSColin Finck 
2291c2c66affSColin Finck                     i++;
2292c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2293c2c66affSColin Finck                 }
2294c2c66affSColin Finck 
2295c2c66affSColin Finck                 if (pos == length)
2296c2c66affSColin Finck                     break;
2297c2c66affSColin Finck 
2298c2c66affSColin Finck                 for (i = 0; i < startoffstripe; i++) {
2299c2c66affSColin Finck                     UINT16 stripe2 = (parity1 + i + 2) % ci->num_stripes;
2300c2c66affSColin Finck 
2301c2c66affSColin Finck                     context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2302c2c66affSColin Finck                 }
2303c2c66affSColin Finck 
2304c2c66affSColin Finck                 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2305c2c66affSColin Finck 
2306c2c66affSColin Finck                 parity2 = (parity1 + 1) % ci->num_stripes;
2307c2c66affSColin Finck                 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2308c2c66affSColin Finck 
2309c2c66affSColin Finck                 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2310c2c66affSColin Finck                     skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2311c2c66affSColin Finck 
2312c2c66affSColin Finck                     for (i = 0; i < ci->num_stripes; i++) {
2313c2c66affSColin Finck                         context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2314c2c66affSColin Finck                     }
2315c2c66affSColin Finck 
2316c2c66affSColin Finck                     pos += (UINT32)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2317c2c66affSColin Finck                     need_dummy = TRUE;
2318c2c66affSColin Finck                 }
2319c2c66affSColin Finck             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2320c2c66affSColin Finck                 for (i = 0; i < ci->num_stripes; i++) {
2321c2c66affSColin Finck                     context.stripes[i].stripeend += ci->stripe_length;
2322c2c66affSColin Finck                 }
2323c2c66affSColin Finck 
2324c2c66affSColin Finck                 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 2));
2325c2c66affSColin Finck                 need_dummy = TRUE;
2326c2c66affSColin Finck             } else {
2327c2c66affSColin Finck                 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2328c2c66affSColin Finck 
2329c2c66affSColin Finck                 i = 0;
2330c2c66affSColin Finck                 while (stripe != parity1) {
2331c2c66affSColin Finck                     if (endoffstripe == i) {
2332c2c66affSColin Finck                         context.stripes[stripe].stripeend = endoff + 1;
2333c2c66affSColin Finck                         break;
2334c2c66affSColin Finck                     } else if (endoffstripe > i)
2335c2c66affSColin Finck                         context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2336c2c66affSColin Finck 
2337c2c66affSColin Finck                     i++;
2338c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2339c2c66affSColin Finck                 }
2340c2c66affSColin Finck 
2341c2c66affSColin Finck                 break;
2342c2c66affSColin Finck             }
2343c2c66affSColin Finck         }
2344c2c66affSColin Finck 
2345c2c66affSColin Finck         for (i = 0; i < ci->num_stripes; i++) {
2346c2c66affSColin Finck             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2347c2c66affSColin Finck                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL);
2348c2c66affSColin Finck 
2349c2c66affSColin Finck                 if (!context.stripes[i].mdl) {
2350c2c66affSColin Finck                     ERR("IoAllocateMdl failed\n");
2351*eb7fbc25SPierre Schweitzer                     MmUnlockPages(master_mdl);
2352*eb7fbc25SPierre Schweitzer                     IoFreeMdl(master_mdl);
2353c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
2354c2c66affSColin Finck                     goto exit;
2355c2c66affSColin Finck                 }
2356c2c66affSColin Finck             }
2357c2c66affSColin Finck         }
2358c2c66affSColin Finck 
2359c2c66affSColin Finck         if (need_dummy) {
2360c2c66affSColin Finck             dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2361c2c66affSColin Finck             if (!dummypage) {
2362c2c66affSColin Finck                 ERR("out of memory\n");
2363*eb7fbc25SPierre Schweitzer                 MmUnlockPages(master_mdl);
2364*eb7fbc25SPierre Schweitzer                 IoFreeMdl(master_mdl);
2365c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
2366c2c66affSColin Finck                 goto exit;
2367c2c66affSColin Finck             }
2368c2c66affSColin Finck 
2369c2c66affSColin Finck             dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL);
2370c2c66affSColin Finck             if (!dummy_mdl) {
2371c2c66affSColin Finck                 ERR("IoAllocateMdl failed\n");
2372*eb7fbc25SPierre Schweitzer                 MmUnlockPages(master_mdl);
2373*eb7fbc25SPierre Schweitzer                 IoFreeMdl(master_mdl);
2374c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
2375c2c66affSColin Finck                 goto exit;
2376c2c66affSColin Finck             }
2377c2c66affSColin Finck 
2378c2c66affSColin Finck             MmBuildMdlForNonPagedPool(dummy_mdl);
2379c2c66affSColin Finck 
2380c2c66affSColin Finck             dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2381c2c66affSColin Finck         }
2382c2c66affSColin Finck 
2383c2c66affSColin Finck         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
2384c2c66affSColin Finck         if (!stripeoff) {
2385c2c66affSColin Finck             ERR("out of memory\n");
2386*eb7fbc25SPierre Schweitzer             MmUnlockPages(master_mdl);
2387*eb7fbc25SPierre Schweitzer             IoFreeMdl(master_mdl);
2388c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
2389c2c66affSColin Finck             goto exit;
2390c2c66affSColin Finck         }
2391c2c66affSColin Finck 
2392c2c66affSColin Finck         RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
2393c2c66affSColin Finck 
2394c2c66affSColin Finck         pos = 0;
2395c2c66affSColin Finck 
2396c2c66affSColin Finck         while (pos < length) {
2397c2c66affSColin Finck             PFN_NUMBER* stripe_pfns;
2398c2c66affSColin Finck 
2399c2c66affSColin Finck             parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2400c2c66affSColin Finck 
2401c2c66affSColin Finck             if (pos == 0) {
2402c2c66affSColin Finck                 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2403c2c66affSColin Finck                 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2404c2c66affSColin Finck                                                        ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2405c2c66affSColin Finck 
2406c2c66affSColin Finck                 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2407c2c66affSColin Finck 
2408c2c66affSColin Finck                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2409c2c66affSColin Finck 
2410c2c66affSColin Finck                 stripeoff[stripe] = readlen;
2411c2c66affSColin Finck                 pos += readlen;
2412c2c66affSColin Finck 
2413c2c66affSColin Finck                 stripe = (stripe + 1) % ci->num_stripes;
2414c2c66affSColin Finck 
2415c2c66affSColin Finck                 while (stripe != parity1) {
2416c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2417c2c66affSColin Finck                     readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2418c2c66affSColin Finck 
2419c2c66affSColin Finck                     if (readlen == 0)
2420c2c66affSColin Finck                         break;
2421c2c66affSColin Finck 
2422c2c66affSColin Finck                     RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2423c2c66affSColin Finck 
2424c2c66affSColin Finck                     stripeoff[stripe] = readlen;
2425c2c66affSColin Finck                     pos += readlen;
2426c2c66affSColin Finck 
2427c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2428c2c66affSColin Finck                 }
2429c2c66affSColin Finck             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2430c2c66affSColin Finck                 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2431c2c66affSColin Finck                 UINT16 parity2 = (parity1 + 1) % ci->num_stripes;
2432c2c66affSColin Finck                 ULONG k;
2433c2c66affSColin Finck 
2434c2c66affSColin Finck                 while (stripe != parity1) {
2435c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2436c2c66affSColin Finck 
2437c2c66affSColin Finck                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2438c2c66affSColin Finck 
2439c2c66affSColin Finck                     stripeoff[stripe] += (UINT32)ci->stripe_length;
2440c2c66affSColin Finck                     pos += (UINT32)ci->stripe_length;
2441c2c66affSColin Finck 
2442c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2443c2c66affSColin Finck                 }
2444c2c66affSColin Finck 
2445c2c66affSColin Finck                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2446c2c66affSColin Finck 
2447c2c66affSColin Finck                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2448c2c66affSColin Finck                     stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2449c2c66affSColin Finck                     stripeoff[parity1] += PAGE_SIZE;
2450c2c66affSColin Finck                 }
2451c2c66affSColin Finck 
2452c2c66affSColin Finck                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2453c2c66affSColin Finck 
2454c2c66affSColin Finck                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2455c2c66affSColin Finck                     stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2456c2c66affSColin Finck                     stripeoff[parity2] += PAGE_SIZE;
2457c2c66affSColin Finck                 }
2458c2c66affSColin Finck             } else {
2459c2c66affSColin Finck                 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2460c2c66affSColin Finck                 UINT32 readlen;
2461c2c66affSColin Finck 
2462c2c66affSColin Finck                 while (pos < length) {
2463c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2464c2c66affSColin Finck                     readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2465c2c66affSColin Finck 
2466c2c66affSColin Finck                     if (readlen == 0)
2467c2c66affSColin Finck                         break;
2468c2c66affSColin Finck 
2469c2c66affSColin Finck                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2470c2c66affSColin Finck 
2471c2c66affSColin Finck                     stripeoff[stripe] += readlen;
2472c2c66affSColin Finck                     pos += readlen;
2473c2c66affSColin Finck 
2474c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2475c2c66affSColin Finck                 }
2476c2c66affSColin Finck             }
2477c2c66affSColin Finck         }
2478c2c66affSColin Finck 
2479c2c66affSColin Finck         MmUnlockPages(master_mdl);
2480c2c66affSColin Finck         IoFreeMdl(master_mdl);
2481c2c66affSColin Finck 
2482c2c66affSColin Finck         ExFreePool(stripeoff);
2483c2c66affSColin Finck     }
2484c2c66affSColin Finck 
2485c2c66affSColin Finck     context.address = addr;
2486c2c66affSColin Finck 
2487c2c66affSColin Finck     for (i = 0; i < ci->num_stripes; i++) {
2488c2c66affSColin Finck         if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2489c2c66affSColin Finck             context.stripes[i].status = ReadDataStatus_MissingDevice;
2490c2c66affSColin Finck             context.stripes_left--;
2491c2c66affSColin Finck 
2492c2c66affSColin Finck             if (!devices[i] || !devices[i]->devobj)
2493c2c66affSColin Finck                 missing_devices++;
2494c2c66affSColin Finck         }
2495c2c66affSColin Finck     }
2496c2c66affSColin Finck 
2497c2c66affSColin Finck     if (missing_devices > allowed_missing) {
2498c2c66affSColin Finck         ERR("not enough devices to service request (%u missing)\n", missing_devices);
2499c2c66affSColin Finck         Status = STATUS_UNEXPECTED_IO_ERROR;
2500c2c66affSColin Finck         goto exit;
2501c2c66affSColin Finck     }
2502c2c66affSColin Finck 
2503c2c66affSColin Finck     for (i = 0; i < ci->num_stripes; i++) {
2504c2c66affSColin Finck         PIO_STACK_LOCATION IrpSp;
2505c2c66affSColin Finck 
2506c2c66affSColin Finck         if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2507c2c66affSColin Finck             context.stripes[i].context = (struct read_data_context*)&context;
2508c2c66affSColin Finck 
2509c2c66affSColin Finck             if (type == BLOCK_FLAG_RAID10) {
2510c2c66affSColin Finck                 context.stripes[i].stripenum = i / ci->sub_stripes;
2511c2c66affSColin Finck             }
2512c2c66affSColin Finck 
2513c2c66affSColin Finck             if (!Irp) {
2514c2c66affSColin Finck                 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
2515c2c66affSColin Finck 
2516c2c66affSColin Finck                 if (!context.stripes[i].Irp) {
2517c2c66affSColin Finck                     ERR("IoAllocateIrp failed\n");
2518c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
2519c2c66affSColin Finck                     goto exit;
2520c2c66affSColin Finck                 }
2521c2c66affSColin Finck             } else {
2522c2c66affSColin Finck                 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2523c2c66affSColin Finck 
2524c2c66affSColin Finck                 if (!context.stripes[i].Irp) {
2525c2c66affSColin Finck                     ERR("IoMakeAssociatedIrp failed\n");
2526c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
2527c2c66affSColin Finck                     goto exit;
2528c2c66affSColin Finck                 }
2529c2c66affSColin Finck             }
2530c2c66affSColin Finck 
2531c2c66affSColin Finck             IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2532c2c66affSColin Finck             IrpSp->MajorFunction = IRP_MJ_READ;
2533c2c66affSColin Finck 
2534c2c66affSColin Finck             if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2535c2c66affSColin Finck                 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2536c2c66affSColin Finck                 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2537c2c66affSColin Finck                     ERR("out of memory\n");
2538c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
2539c2c66affSColin Finck                     goto exit;
2540c2c66affSColin Finck                 }
2541c2c66affSColin Finck 
2542c2c66affSColin Finck                 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
2543c2c66affSColin Finck 
2544c2c66affSColin Finck                 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2545c2c66affSColin Finck             } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2546c2c66affSColin Finck                 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2547c2c66affSColin Finck             else
2548c2c66affSColin Finck                 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2549c2c66affSColin Finck 
2550c2c66affSColin Finck             IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2551c2c66affSColin Finck             IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2552c2c66affSColin Finck 
2553c2c66affSColin Finck             total_reading += IrpSp->Parameters.Read.Length;
2554c2c66affSColin Finck 
2555c2c66affSColin Finck             context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2556c2c66affSColin Finck 
2557c2c66affSColin Finck             IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], TRUE, TRUE, TRUE);
2558c2c66affSColin Finck 
2559c2c66affSColin Finck             context.stripes[i].status = ReadDataStatus_Pending;
2560c2c66affSColin Finck         }
2561c2c66affSColin Finck     }
2562c2c66affSColin Finck 
2563c2c66affSColin Finck #ifdef DEBUG_STATS
2564c2c66affSColin Finck     if (!is_tree)
2565c2c66affSColin Finck         time1 = KeQueryPerformanceCounter(NULL);
2566c2c66affSColin Finck #endif
2567c2c66affSColin Finck 
2568c2c66affSColin Finck     need_to_wait = FALSE;
2569c2c66affSColin Finck     for (i = 0; i < ci->num_stripes; i++) {
2570c2c66affSColin Finck         if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2571c2c66affSColin Finck             IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2572c2c66affSColin Finck             need_to_wait = TRUE;
2573c2c66affSColin Finck         }
2574c2c66affSColin Finck     }
2575c2c66affSColin Finck 
2576c2c66affSColin Finck     if (need_to_wait)
2577c2c66affSColin Finck         KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
2578c2c66affSColin Finck 
2579c2c66affSColin Finck #ifdef DEBUG_STATS
2580c2c66affSColin Finck     if (!is_tree) {
2581c2c66affSColin Finck         time2 = KeQueryPerformanceCounter(NULL);
2582c2c66affSColin Finck 
2583c2c66affSColin Finck         Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
2584c2c66affSColin Finck     }
2585c2c66affSColin Finck #endif
2586c2c66affSColin Finck 
2587c2c66affSColin Finck     if (diskacc)
2588c2c66affSColin Finck         fFsRtlUpdateDiskCounters(total_reading, 0);
2589c2c66affSColin Finck 
2590c2c66affSColin Finck     // check if any of the devices return a "user-induced" error
2591c2c66affSColin Finck 
2592c2c66affSColin Finck     for (i = 0; i < ci->num_stripes; i++) {
2593c2c66affSColin Finck         if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2594c2c66affSColin Finck             Status = context.stripes[i].iosb.Status;
2595c2c66affSColin Finck             goto exit;
2596c2c66affSColin Finck         }
2597c2c66affSColin Finck     }
2598c2c66affSColin Finck 
2599c2c66affSColin Finck     if (type == BLOCK_FLAG_RAID0) {
2600c2c66affSColin Finck         Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2601c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2602c2c66affSColin Finck             ERR("read_data_raid0 returned %08x\n", Status);
2603c2c66affSColin Finck 
2604c2c66affSColin Finck             if (file_read)
2605c2c66affSColin Finck                 ExFreePool(context.va);
2606c2c66affSColin Finck 
2607c2c66affSColin Finck             goto exit;
2608c2c66affSColin Finck         }
2609c2c66affSColin Finck 
2610c2c66affSColin Finck         if (file_read) {
2611c2c66affSColin Finck             RtlCopyMemory(buf, context.va, length);
2612c2c66affSColin Finck             ExFreePool(context.va);
2613c2c66affSColin Finck         }
2614c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID10) {
2615c2c66affSColin Finck         Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2616c2c66affSColin Finck 
2617c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2618c2c66affSColin Finck             ERR("read_data_raid10 returned %08x\n", Status);
2619c2c66affSColin Finck 
2620c2c66affSColin Finck             if (file_read)
2621c2c66affSColin Finck                 ExFreePool(context.va);
2622c2c66affSColin Finck 
2623c2c66affSColin Finck             goto exit;
2624c2c66affSColin Finck         }
2625c2c66affSColin Finck 
2626c2c66affSColin Finck         if (file_read) {
2627c2c66affSColin Finck             RtlCopyMemory(buf, context.va, length);
2628c2c66affSColin Finck             ExFreePool(context.va);
2629c2c66affSColin Finck         }
2630c2c66affSColin Finck     } else if (type == BLOCK_FLAG_DUPLICATE) {
2631c2c66affSColin Finck         Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2632c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2633c2c66affSColin Finck             ERR("read_data_dup returned %08x\n", Status);
2634c2c66affSColin Finck 
2635c2c66affSColin Finck             if (file_read)
2636c2c66affSColin Finck                 ExFreePool(context.va);
2637c2c66affSColin Finck 
2638c2c66affSColin Finck             goto exit;
2639c2c66affSColin Finck         }
2640c2c66affSColin Finck 
2641c2c66affSColin Finck         if (file_read) {
2642c2c66affSColin Finck             RtlCopyMemory(buf, context.va, length);
2643c2c66affSColin Finck             ExFreePool(context.va);
2644c2c66affSColin Finck         }
2645c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID5) {
2646c2c66affSColin Finck         Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE);
2647c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2648c2c66affSColin Finck             ERR("read_data_raid5 returned %08x\n", Status);
2649c2c66affSColin Finck 
2650c2c66affSColin Finck             if (file_read)
2651c2c66affSColin Finck                 ExFreePool(context.va);
2652c2c66affSColin Finck 
2653c2c66affSColin Finck             goto exit;
2654c2c66affSColin Finck         }
2655c2c66affSColin Finck 
2656c2c66affSColin Finck         if (file_read) {
2657c2c66affSColin Finck             RtlCopyMemory(buf, context.va, length);
2658c2c66affSColin Finck             ExFreePool(context.va);
2659c2c66affSColin Finck         }
2660c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID6) {
2661c2c66affSColin Finck         Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE);
2662c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2663c2c66affSColin Finck             ERR("read_data_raid6 returned %08x\n", Status);
2664c2c66affSColin Finck 
2665c2c66affSColin Finck             if (file_read)
2666c2c66affSColin Finck                 ExFreePool(context.va);
2667c2c66affSColin Finck 
2668c2c66affSColin Finck             goto exit;
2669c2c66affSColin Finck         }
2670c2c66affSColin Finck 
2671c2c66affSColin Finck         if (file_read) {
2672c2c66affSColin Finck             RtlCopyMemory(buf, context.va, length);
2673c2c66affSColin Finck             ExFreePool(context.va);
2674c2c66affSColin Finck         }
2675c2c66affSColin Finck     }
2676c2c66affSColin Finck 
2677c2c66affSColin Finck exit:
2678c2c66affSColin Finck     if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2679c2c66affSColin Finck         chunk_unlock_range(Vcb, c, lockaddr, locklen);
2680c2c66affSColin Finck 
2681c2c66affSColin Finck     if (dummy_mdl)
2682c2c66affSColin Finck         IoFreeMdl(dummy_mdl);
2683c2c66affSColin Finck 
2684c2c66affSColin Finck     if (dummypage)
2685c2c66affSColin Finck         ExFreePool(dummypage);
2686c2c66affSColin Finck 
2687c2c66affSColin Finck     for (i = 0; i < ci->num_stripes; i++) {
2688c2c66affSColin Finck         if (context.stripes[i].mdl) {
2689c2c66affSColin Finck             if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2690c2c66affSColin Finck                 MmUnlockPages(context.stripes[i].mdl);
2691c2c66affSColin Finck 
2692c2c66affSColin Finck             IoFreeMdl(context.stripes[i].mdl);
2693c2c66affSColin Finck         }
2694c2c66affSColin Finck 
2695c2c66affSColin Finck         if (context.stripes[i].Irp)
2696c2c66affSColin Finck             IoFreeIrp(context.stripes[i].Irp);
2697c2c66affSColin Finck     }
2698c2c66affSColin Finck 
2699c2c66affSColin Finck     ExFreePool(context.stripes);
2700c2c66affSColin Finck 
2701c2c66affSColin Finck     if (!Vcb->log_to_phys_loaded)
2702c2c66affSColin Finck         ExFreePool(devices);
2703c2c66affSColin Finck 
2704c2c66affSColin Finck     return Status;
2705c2c66affSColin Finck }
2706c2c66affSColin Finck 
2707c2c66affSColin Finck NTSTATUS read_stream(fcb* fcb, UINT8* data, UINT64 start, ULONG length, ULONG* pbr) {
2708c2c66affSColin Finck     ULONG readlen;
2709c2c66affSColin Finck 
2710c2c66affSColin Finck     TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
2711c2c66affSColin Finck 
2712c2c66affSColin Finck     if (pbr) *pbr = 0;
2713c2c66affSColin Finck 
2714c2c66affSColin Finck     if (start >= fcb->adsdata.Length) {
2715c2c66affSColin Finck         TRACE("tried to read beyond end of stream\n");
2716c2c66affSColin Finck         return STATUS_END_OF_FILE;
2717c2c66affSColin Finck     }
2718c2c66affSColin Finck 
2719c2c66affSColin Finck     if (length == 0) {
2720c2c66affSColin Finck         WARN("tried to read zero bytes\n");
2721c2c66affSColin Finck         return STATUS_SUCCESS;
2722c2c66affSColin Finck     }
2723c2c66affSColin Finck 
2724c2c66affSColin Finck     if (start + length < fcb->adsdata.Length)
2725c2c66affSColin Finck         readlen = length;
2726c2c66affSColin Finck     else
2727c2c66affSColin Finck         readlen = fcb->adsdata.Length - (ULONG)start;
2728c2c66affSColin Finck 
2729c2c66affSColin Finck     if (readlen > 0)
2730c2c66affSColin Finck         RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen);
2731c2c66affSColin Finck 
2732c2c66affSColin Finck     if (pbr) *pbr = readlen;
2733c2c66affSColin Finck 
2734c2c66affSColin Finck     return STATUS_SUCCESS;
2735c2c66affSColin Finck }
2736c2c66affSColin Finck 
2737c2c66affSColin Finck NTSTATUS read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp) {
2738c2c66affSColin Finck     NTSTATUS Status;
2739c2c66affSColin Finck     EXTENT_DATA* ed;
2740c2c66affSColin Finck     UINT32 bytes_read = 0;
2741c2c66affSColin Finck     UINT64 last_end;
2742c2c66affSColin Finck     LIST_ENTRY* le;
2743c2c66affSColin Finck #ifdef DEBUG_STATS
2744c2c66affSColin Finck     LARGE_INTEGER time1, time2;
2745c2c66affSColin Finck #endif
2746c2c66affSColin Finck 
2747c2c66affSColin Finck     TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
2748c2c66affSColin Finck 
2749c2c66affSColin Finck     if (pbr)
2750c2c66affSColin Finck         *pbr = 0;
2751c2c66affSColin Finck 
2752c2c66affSColin Finck     if (start >= fcb->inode_item.st_size) {
2753c2c66affSColin Finck         WARN("Tried to read beyond end of file\n");
2754c2c66affSColin Finck         Status = STATUS_END_OF_FILE;
2755c2c66affSColin Finck         goto exit;
2756c2c66affSColin Finck     }
2757c2c66affSColin Finck 
2758c2c66affSColin Finck #ifdef DEBUG_STATS
2759c2c66affSColin Finck     time1 = KeQueryPerformanceCounter(NULL);
2760c2c66affSColin Finck #endif
2761c2c66affSColin Finck 
2762c2c66affSColin Finck     le = fcb->extents.Flink;
2763c2c66affSColin Finck 
2764c2c66affSColin Finck     last_end = start;
2765c2c66affSColin Finck 
2766c2c66affSColin Finck     while (le != &fcb->extents) {
2767c2c66affSColin Finck         UINT64 len;
2768c2c66affSColin Finck         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2769c2c66affSColin Finck         EXTENT_DATA2* ed2;
2770c2c66affSColin Finck 
2771c2c66affSColin Finck         if (!ext->ignore) {
2772c2c66affSColin Finck             ed = &ext->extent_data;
2773c2c66affSColin Finck 
2774c2c66affSColin Finck             ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL;
2775c2c66affSColin Finck 
2776c2c66affSColin Finck             len = ed2 ? ed2->num_bytes : ed->decoded_size;
2777c2c66affSColin Finck 
2778c2c66affSColin Finck             if (ext->offset + len <= start) {
2779c2c66affSColin Finck                 last_end = ext->offset + len;
2780c2c66affSColin Finck                 goto nextitem;
2781c2c66affSColin Finck             }
2782c2c66affSColin Finck 
2783c2c66affSColin Finck             if (ext->offset > last_end && ext->offset > start + bytes_read) {
2784c2c66affSColin Finck                 UINT32 read = (UINT32)min(length, ext->offset - max(start, last_end));
2785c2c66affSColin Finck 
2786c2c66affSColin Finck                 RtlZeroMemory(data + bytes_read, read);
2787c2c66affSColin Finck                 bytes_read += read;
2788c2c66affSColin Finck                 length -= read;
2789c2c66affSColin Finck             }
2790c2c66affSColin Finck 
2791c2c66affSColin Finck             if (length == 0 || ext->offset > start + bytes_read + length)
2792c2c66affSColin Finck                 break;
2793c2c66affSColin Finck 
2794c2c66affSColin Finck             if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
2795c2c66affSColin Finck                 WARN("Encryption not supported\n");
2796c2c66affSColin Finck                 Status = STATUS_NOT_IMPLEMENTED;
2797c2c66affSColin Finck                 goto exit;
2798c2c66affSColin Finck             }
2799c2c66affSColin Finck 
2800c2c66affSColin Finck             if (ed->encoding != BTRFS_ENCODING_NONE) {
2801c2c66affSColin Finck                 WARN("Other encodings not supported\n");
2802c2c66affSColin Finck                 Status = STATUS_NOT_IMPLEMENTED;
2803c2c66affSColin Finck                 goto exit;
2804c2c66affSColin Finck             }
2805c2c66affSColin Finck 
2806c2c66affSColin Finck             switch (ed->type) {
2807c2c66affSColin Finck                 case EXTENT_TYPE_INLINE:
2808c2c66affSColin Finck                 {
2809c2c66affSColin Finck                     UINT64 off = start + bytes_read - ext->offset;
2810c2c66affSColin Finck                     UINT32 read;
2811c2c66affSColin Finck 
2812c2c66affSColin Finck                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
2813c2c66affSColin Finck                         read = (UINT32)min(min(len, ext->datalen) - off, length);
2814c2c66affSColin Finck 
2815c2c66affSColin Finck                         RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2816*eb7fbc25SPierre Schweitzer                     } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) {
2817c2c66affSColin Finck                         UINT8* decomp;
2818c2c66affSColin Finck                         BOOL decomp_alloc;
2819c2c66affSColin Finck                         UINT16 inlen = ext->datalen - (UINT16)offsetof(EXTENT_DATA, data[0]);
2820c2c66affSColin Finck 
2821c2c66affSColin Finck                         if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2822c2c66affSColin Finck                             ERR("ed->decoded_size was invalid (%llx)\n", ed->decoded_size);
2823c2c66affSColin Finck                             Status = STATUS_INTERNAL_ERROR;
2824c2c66affSColin Finck                             goto exit;
2825c2c66affSColin Finck                         }
2826c2c66affSColin Finck 
2827c2c66affSColin Finck                         read = (UINT32)min(ed->decoded_size - off, length);
2828c2c66affSColin Finck 
2829c2c66affSColin Finck                         if (off > 0) {
2830c2c66affSColin Finck                             decomp = ExAllocatePoolWithTag(NonPagedPool, (UINT32)ed->decoded_size, ALLOC_TAG);
2831c2c66affSColin Finck                             if (!decomp) {
2832c2c66affSColin Finck                                 ERR("out of memory\n");
2833c2c66affSColin Finck                                 Status = STATUS_INSUFFICIENT_RESOURCES;
2834c2c66affSColin Finck                                 goto exit;
2835c2c66affSColin Finck                             }
2836c2c66affSColin Finck 
2837c2c66affSColin Finck                             decomp_alloc = TRUE;
2838c2c66affSColin Finck                         } else {
2839c2c66affSColin Finck                             decomp = data + bytes_read;
2840c2c66affSColin Finck                             decomp_alloc = FALSE;
2841c2c66affSColin Finck                         }
2842c2c66affSColin Finck 
2843c2c66affSColin Finck                         if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2844c2c66affSColin Finck                             Status = zlib_decompress(ed->data, inlen, decomp, (UINT32)(read + off));
2845c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
2846c2c66affSColin Finck                                 ERR("zlib_decompress returned %08x\n", Status);
2847c2c66affSColin Finck                                 if (decomp_alloc) ExFreePool(decomp);
2848c2c66affSColin Finck                                 goto exit;
2849c2c66affSColin Finck                             }
2850c2c66affSColin Finck                         } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2851c2c66affSColin Finck                             if (inlen < sizeof(UINT32)) {
2852c2c66affSColin Finck                                 ERR("extent data was truncated\n");
2853c2c66affSColin Finck                                 Status = STATUS_INTERNAL_ERROR;
2854c2c66affSColin Finck                                 if (decomp_alloc) ExFreePool(decomp);
2855c2c66affSColin Finck                                 goto exit;
2856c2c66affSColin Finck                             } else
2857c2c66affSColin Finck                                 inlen -= sizeof(UINT32);
2858c2c66affSColin Finck 
2859c2c66affSColin Finck                             Status = lzo_decompress(ed->data + sizeof(UINT32), inlen, decomp, (UINT32)(read + off), sizeof(UINT32));
2860c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
2861c2c66affSColin Finck                                 ERR("lzo_decompress returned %08x\n", Status);
2862c2c66affSColin Finck                                 if (decomp_alloc) ExFreePool(decomp);
2863c2c66affSColin Finck                                 goto exit;
2864c2c66affSColin Finck                             }
2865*eb7fbc25SPierre Schweitzer                         } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
2866*eb7fbc25SPierre Schweitzer                             Status = zstd_decompress(ed->data, inlen, decomp, (UINT32)(read + off));
2867*eb7fbc25SPierre Schweitzer                             if (!NT_SUCCESS(Status)) {
2868*eb7fbc25SPierre Schweitzer                                 ERR("zstd_decompress returned %08x\n", Status);
2869*eb7fbc25SPierre Schweitzer                                 if (decomp_alloc) ExFreePool(decomp);
2870*eb7fbc25SPierre Schweitzer                                 goto exit;
2871*eb7fbc25SPierre Schweitzer                             }
2872c2c66affSColin Finck                         }
2873c2c66affSColin Finck 
2874c2c66affSColin Finck                         if (decomp_alloc) {
2875c2c66affSColin Finck                             RtlCopyMemory(data + bytes_read, decomp + off, read);
2876c2c66affSColin Finck                             ExFreePool(decomp);
2877c2c66affSColin Finck                         }
2878c2c66affSColin Finck                     } else {
2879c2c66affSColin Finck                         ERR("unhandled compression type %x\n", ed->compression);
2880c2c66affSColin Finck                         Status = STATUS_NOT_IMPLEMENTED;
2881c2c66affSColin Finck                         goto exit;
2882c2c66affSColin Finck                     }
2883c2c66affSColin Finck 
2884c2c66affSColin Finck                     bytes_read += read;
2885c2c66affSColin Finck                     length -= read;
2886c2c66affSColin Finck 
2887c2c66affSColin Finck                     break;
2888c2c66affSColin Finck                 }
2889c2c66affSColin Finck 
2890c2c66affSColin Finck                 case EXTENT_TYPE_REGULAR:
2891c2c66affSColin Finck                 {
2892c2c66affSColin Finck                     UINT64 off = start + bytes_read - ext->offset;
2893c2c66affSColin Finck                     UINT32 to_read, read;
2894c2c66affSColin Finck                     UINT8* buf;
2895c2c66affSColin Finck                     BOOL mdl = (Irp && Irp->MdlAddress) ? TRUE : FALSE;
2896c2c66affSColin Finck                     BOOL buf_free;
2897c2c66affSColin Finck                     UINT32 bumpoff = 0, *csum;
2898c2c66affSColin Finck                     UINT64 addr;
2899c2c66affSColin Finck                     chunk* c;
2900c2c66affSColin Finck 
2901c2c66affSColin Finck                     read = (UINT32)(len - off);
2902c2c66affSColin Finck                     if (read > length) read = (UINT32)length;
2903c2c66affSColin Finck 
2904c2c66affSColin Finck                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
2905c2c66affSColin Finck                         addr = ed2->address + ed2->offset + off;
2906c2c66affSColin Finck                         to_read = (UINT32)sector_align(read, fcb->Vcb->superblock.sector_size);
2907c2c66affSColin Finck 
2908c2c66affSColin Finck                         if (addr % fcb->Vcb->superblock.sector_size > 0) {
2909c2c66affSColin Finck                             bumpoff = addr % fcb->Vcb->superblock.sector_size;
2910c2c66affSColin Finck                             addr -= bumpoff;
2911c2c66affSColin Finck                             to_read = (UINT32)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size);
2912c2c66affSColin Finck                         }
2913c2c66affSColin Finck                     } else {
2914c2c66affSColin Finck                         addr = ed2->address;
2915c2c66affSColin Finck                         to_read = (UINT32)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
2916c2c66affSColin Finck                     }
2917c2c66affSColin Finck 
2918c2c66affSColin Finck                     if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
2919c2c66affSColin Finck                         length % fcb->Vcb->superblock.sector_size == 0) {
2920c2c66affSColin Finck                         buf = data + bytes_read;
2921c2c66affSColin Finck                         buf_free = FALSE;
2922c2c66affSColin Finck                     } else {
2923c2c66affSColin Finck                         buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG);
2924c2c66affSColin Finck                         buf_free = TRUE;
2925c2c66affSColin Finck 
2926c2c66affSColin Finck                         if (!buf) {
2927c2c66affSColin Finck                             ERR("out of memory\n");
2928c2c66affSColin Finck                             Status = STATUS_INSUFFICIENT_RESOURCES;
2929c2c66affSColin Finck                             goto exit;
2930c2c66affSColin Finck                         }
2931c2c66affSColin Finck 
2932c2c66affSColin Finck                         mdl = FALSE;
2933c2c66affSColin Finck                     }
2934c2c66affSColin Finck 
2935c2c66affSColin Finck                     c = get_chunk_from_address(fcb->Vcb, addr);
2936c2c66affSColin Finck 
2937c2c66affSColin Finck                     if (!c) {
2938c2c66affSColin Finck                         ERR("get_chunk_from_address(%llx) failed\n", addr);
2939c2c66affSColin Finck 
2940c2c66affSColin Finck                         if (buf_free)
2941c2c66affSColin Finck                             ExFreePool(buf);
2942c2c66affSColin Finck 
2943c2c66affSColin Finck                         goto exit;
2944c2c66affSColin Finck                     }
2945c2c66affSColin Finck 
2946c2c66affSColin Finck                     if (ext->csum) {
2947c2c66affSColin Finck                         if (ed->compression == BTRFS_COMPRESSION_NONE)
2948c2c66affSColin Finck                             csum = &ext->csum[off / fcb->Vcb->superblock.sector_size];
2949c2c66affSColin Finck                         else
2950c2c66affSColin Finck                             csum = ext->csum;
2951c2c66affSColin Finck                     } else
2952c2c66affSColin Finck                         csum = NULL;
2953c2c66affSColin Finck 
2954c2c66affSColin Finck                     Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp, 0, mdl,
2955c2c66affSColin Finck                                        fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
2956c2c66affSColin Finck                     if (!NT_SUCCESS(Status)) {
2957c2c66affSColin Finck                         ERR("read_data returned %08x\n", Status);
2958c2c66affSColin Finck 
2959c2c66affSColin Finck                         if (buf_free)
2960c2c66affSColin Finck                             ExFreePool(buf);
2961c2c66affSColin Finck 
2962c2c66affSColin Finck                         goto exit;
2963c2c66affSColin Finck                     }
2964c2c66affSColin Finck 
2965c2c66affSColin Finck                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
2966c2c66affSColin Finck                         if (buf_free)
2967c2c66affSColin Finck                             RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
2968c2c66affSColin Finck                     } else {
2969c2c66affSColin Finck                         UINT8 *decomp = NULL, *buf2;
2970c2c66affSColin Finck                         ULONG outlen, inlen, off2;
2971c2c66affSColin Finck                         UINT32 inpageoff = 0;
2972c2c66affSColin Finck 
2973c2c66affSColin Finck                         off2 = (ULONG)(ed2->offset + off);
2974c2c66affSColin Finck                         buf2 = buf;
2975c2c66affSColin Finck                         inlen = (ULONG)ed2->size;
2976c2c66affSColin Finck 
2977c2c66affSColin Finck                         if (ed->compression == BTRFS_COMPRESSION_LZO) {
2978c2c66affSColin Finck                             ULONG inoff = sizeof(UINT32);
2979c2c66affSColin Finck 
2980c2c66affSColin Finck                             inlen -= sizeof(UINT32);
2981c2c66affSColin Finck 
2982c2c66affSColin Finck                             // If reading a few sectors in, skip to the interesting bit
2983c2c66affSColin Finck                             while (off2 > LINUX_PAGE_SIZE) {
2984c2c66affSColin Finck                                 UINT32 partlen;
2985c2c66affSColin Finck 
2986c2c66affSColin Finck                                 if (inlen < sizeof(UINT32))
2987c2c66affSColin Finck                                     break;
2988c2c66affSColin Finck 
2989c2c66affSColin Finck                                 partlen = *(UINT32*)(buf2 + inoff);
2990c2c66affSColin Finck 
2991c2c66affSColin Finck                                 if (partlen < inlen) {
2992c2c66affSColin Finck                                     off2 -= LINUX_PAGE_SIZE;
2993c2c66affSColin Finck                                     inoff += partlen + sizeof(UINT32);
2994c2c66affSColin Finck                                     inlen -= partlen + sizeof(UINT32);
2995c2c66affSColin Finck 
2996c2c66affSColin Finck                                     if (LINUX_PAGE_SIZE - (inoff % LINUX_PAGE_SIZE) < sizeof(UINT32))
2997c2c66affSColin Finck                                         inoff = ((inoff / LINUX_PAGE_SIZE) + 1) * LINUX_PAGE_SIZE;
2998c2c66affSColin Finck                                 } else
2999c2c66affSColin Finck                                     break;
3000c2c66affSColin Finck                             }
3001c2c66affSColin Finck 
3002c2c66affSColin Finck                             buf2 = &buf2[inoff];
3003c2c66affSColin Finck                             inpageoff = inoff % LINUX_PAGE_SIZE;
3004c2c66affSColin Finck                         }
3005c2c66affSColin Finck 
3006c2c66affSColin Finck                         if (off2 != 0) {
3007c2c66affSColin Finck                             outlen = off2 + min(read, (UINT32)(ed2->num_bytes - off));
3008c2c66affSColin Finck 
3009c2c66affSColin Finck                             decomp = ExAllocatePoolWithTag(PagedPool, outlen, ALLOC_TAG);
3010c2c66affSColin Finck                             if (!decomp) {
3011c2c66affSColin Finck                                 ERR("out of memory\n");
3012c2c66affSColin Finck                                 ExFreePool(buf);
3013c2c66affSColin Finck                                 Status = STATUS_INSUFFICIENT_RESOURCES;
3014c2c66affSColin Finck                                 goto exit;
3015c2c66affSColin Finck                             }
3016c2c66affSColin Finck                         } else
3017c2c66affSColin Finck                             outlen = min(read, (UINT32)(ed2->num_bytes - off));
3018c2c66affSColin Finck 
3019c2c66affSColin Finck                         if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
3020c2c66affSColin Finck                             Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
3021c2c66affSColin Finck 
3022c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
3023c2c66affSColin Finck                                 ERR("zlib_decompress returned %08x\n", Status);
3024c2c66affSColin Finck                                 ExFreePool(buf);
3025c2c66affSColin Finck 
3026c2c66affSColin Finck                                 if (decomp)
3027c2c66affSColin Finck                                     ExFreePool(decomp);
3028c2c66affSColin Finck 
3029c2c66affSColin Finck                                 goto exit;
3030c2c66affSColin Finck                             }
3031c2c66affSColin Finck                         } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
3032c2c66affSColin Finck                             Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff);
3033c2c66affSColin Finck 
3034c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
3035c2c66affSColin Finck                                 ERR("lzo_decompress returned %08x\n", Status);
3036c2c66affSColin Finck                                 ExFreePool(buf);
3037c2c66affSColin Finck 
3038c2c66affSColin Finck                                 if (decomp)
3039c2c66affSColin Finck                                     ExFreePool(decomp);
3040c2c66affSColin Finck 
3041c2c66affSColin Finck                                 goto exit;
3042c2c66affSColin Finck                             }
3043*eb7fbc25SPierre Schweitzer                         } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
3044*eb7fbc25SPierre Schweitzer                             Status = zstd_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
3045*eb7fbc25SPierre Schweitzer 
3046*eb7fbc25SPierre Schweitzer                             if (!NT_SUCCESS(Status)) {
3047*eb7fbc25SPierre Schweitzer                                 ERR("zstd_decompress returned %08x\n", Status);
3048*eb7fbc25SPierre Schweitzer                                 ExFreePool(buf);
3049*eb7fbc25SPierre Schweitzer 
3050*eb7fbc25SPierre Schweitzer                                 if (decomp)
3051*eb7fbc25SPierre Schweitzer                                     ExFreePool(decomp);
3052*eb7fbc25SPierre Schweitzer 
3053*eb7fbc25SPierre Schweitzer                                 goto exit;
3054*eb7fbc25SPierre Schweitzer                             }
3055c2c66affSColin Finck                         } else {
3056c2c66affSColin Finck                             ERR("unsupported compression type %x\n", ed->compression);
3057c2c66affSColin Finck                             Status = STATUS_NOT_SUPPORTED;
3058c2c66affSColin Finck 
3059c2c66affSColin Finck                             ExFreePool(buf);
3060c2c66affSColin Finck 
3061c2c66affSColin Finck                             if (decomp)
3062c2c66affSColin Finck                                 ExFreePool(decomp);
3063c2c66affSColin Finck 
3064c2c66affSColin Finck                             goto exit;
3065c2c66affSColin Finck                         }
3066c2c66affSColin Finck 
3067c2c66affSColin Finck                         if (decomp) {
3068c2c66affSColin Finck                             RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off));
3069c2c66affSColin Finck                             ExFreePool(decomp);
3070c2c66affSColin Finck                         }
3071c2c66affSColin Finck                     }
3072c2c66affSColin Finck 
3073c2c66affSColin Finck                     if (buf_free)
3074c2c66affSColin Finck                         ExFreePool(buf);
3075c2c66affSColin Finck 
3076c2c66affSColin Finck                     bytes_read += read;
3077c2c66affSColin Finck                     length -= read;
3078c2c66affSColin Finck 
3079c2c66affSColin Finck                     break;
3080c2c66affSColin Finck                 }
3081c2c66affSColin Finck 
3082c2c66affSColin Finck                 case EXTENT_TYPE_PREALLOC:
3083c2c66affSColin Finck                 {
3084c2c66affSColin Finck                     UINT64 off = start + bytes_read - ext->offset;
3085c2c66affSColin Finck                     UINT32 read = (UINT32)(len - off);
3086c2c66affSColin Finck 
3087c2c66affSColin Finck                     if (read > length) read = (UINT32)length;
3088c2c66affSColin Finck 
3089c2c66affSColin Finck                     RtlZeroMemory(data + bytes_read, read);
3090c2c66affSColin Finck 
3091c2c66affSColin Finck                     bytes_read += read;
3092c2c66affSColin Finck                     length -= read;
3093c2c66affSColin Finck 
3094c2c66affSColin Finck                     break;
3095c2c66affSColin Finck                 }
3096c2c66affSColin Finck 
3097c2c66affSColin Finck                 default:
3098c2c66affSColin Finck                     WARN("Unsupported extent data type %u\n", ed->type);
3099c2c66affSColin Finck                     Status = STATUS_NOT_IMPLEMENTED;
3100c2c66affSColin Finck                     goto exit;
3101c2c66affSColin Finck             }
3102c2c66affSColin Finck 
3103c2c66affSColin Finck             last_end = ext->offset + len;
3104c2c66affSColin Finck 
3105c2c66affSColin Finck             if (length == 0)
3106c2c66affSColin Finck                 break;
3107c2c66affSColin Finck         }
3108c2c66affSColin Finck 
3109c2c66affSColin Finck nextitem:
3110c2c66affSColin Finck         le = le->Flink;
3111c2c66affSColin Finck     }
3112c2c66affSColin Finck 
3113c2c66affSColin Finck     if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3114c2c66affSColin Finck         UINT32 read = (UINT32)min(fcb->inode_item.st_size - start - bytes_read, length);
3115c2c66affSColin Finck 
3116c2c66affSColin Finck         RtlZeroMemory(data + bytes_read, read);
3117c2c66affSColin Finck 
3118c2c66affSColin Finck         bytes_read += read;
3119c2c66affSColin Finck         length -= read;
3120c2c66affSColin Finck     }
3121c2c66affSColin Finck 
3122c2c66affSColin Finck     Status = STATUS_SUCCESS;
3123c2c66affSColin Finck     if (pbr)
3124c2c66affSColin Finck         *pbr = bytes_read;
3125c2c66affSColin Finck 
3126c2c66affSColin Finck #ifdef DEBUG_STATS
3127c2c66affSColin Finck     time2 = KeQueryPerformanceCounter(NULL);
3128c2c66affSColin Finck 
3129c2c66affSColin Finck     fcb->Vcb->stats.num_reads++;
3130c2c66affSColin Finck     fcb->Vcb->stats.data_read += bytes_read;
3131c2c66affSColin Finck     fcb->Vcb->stats.read_total_time += time2.QuadPart - time1.QuadPart;
3132c2c66affSColin Finck #endif
3133c2c66affSColin Finck 
3134c2c66affSColin Finck exit:
3135c2c66affSColin Finck     return Status;
3136c2c66affSColin Finck }
3137c2c66affSColin Finck 
3138c2c66affSColin Finck NTSTATUS do_read(PIRP Irp, BOOLEAN wait, ULONG* bytes_read) {
3139c2c66affSColin Finck     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3140c2c66affSColin Finck     PFILE_OBJECT FileObject = IrpSp->FileObject;
3141c2c66affSColin Finck     fcb* fcb = FileObject->FsContext;
3142c2c66affSColin Finck     UINT8* data = NULL;
3143c2c66affSColin Finck     ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3144c2c66affSColin Finck     UINT64 start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3145c2c66affSColin Finck 
3146c2c66affSColin Finck     *bytes_read = 0;
3147c2c66affSColin Finck 
3148c2c66affSColin Finck     if (!fcb || !fcb->Vcb || !fcb->subvol)
3149c2c66affSColin Finck         return STATUS_INTERNAL_ERROR;
3150c2c66affSColin Finck 
3151c2c66affSColin Finck     TRACE("file = %S (fcb = %p)\n", file_desc(FileObject), fcb);
3152c2c66affSColin Finck     TRACE("offset = %llx, length = %x\n", start, length);
3153c2c66affSColin Finck     TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE", Irp->Flags & IRP_NOCACHE ? "TRUE" : "FALSE");
3154c2c66affSColin Finck 
3155c2c66affSColin Finck     if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3156c2c66affSColin Finck         return STATUS_INVALID_DEVICE_REQUEST;
3157c2c66affSColin Finck 
3158c2c66affSColin Finck     if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3159c2c66affSColin Finck         WARN("tried to read locked region\n");
3160c2c66affSColin Finck         return STATUS_FILE_LOCK_CONFLICT;
3161c2c66affSColin Finck     }
3162c2c66affSColin Finck 
3163c2c66affSColin Finck     if (length == 0) {
3164c2c66affSColin Finck         TRACE("tried to read zero bytes\n");
3165c2c66affSColin Finck         return STATUS_SUCCESS;
3166c2c66affSColin Finck     }
3167c2c66affSColin Finck 
3168c2c66affSColin Finck     if (start >= (UINT64)fcb->Header.FileSize.QuadPart) {
3169c2c66affSColin Finck         TRACE("tried to read with offset after file end (%llx >= %llx)\n", start, fcb->Header.FileSize.QuadPart);
3170c2c66affSColin Finck         return STATUS_END_OF_FILE;
3171c2c66affSColin Finck     }
3172c2c66affSColin Finck 
3173c2c66affSColin Finck     TRACE("FileObject %p fcb %p FileSize = %llx st_size = %llx (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3174c2c66affSColin Finck 
3175c2c66affSColin Finck     if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) {
3176c2c66affSColin Finck         data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3177c2c66affSColin Finck 
3178c2c66affSColin Finck         if (Irp->MdlAddress && !data) {
3179c2c66affSColin Finck             ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3180c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
3181c2c66affSColin Finck         }
3182c2c66affSColin Finck 
3183c2c66affSColin Finck         if (start >= (UINT64)fcb->Header.ValidDataLength.QuadPart) {
3184c2c66affSColin Finck             length = (ULONG)min(length, min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3185c2c66affSColin Finck             RtlZeroMemory(data, length);
3186c2c66affSColin Finck             Irp->IoStatus.Information = *bytes_read = length;
3187c2c66affSColin Finck             return STATUS_SUCCESS;
3188c2c66affSColin Finck         }
3189c2c66affSColin Finck 
3190c2c66affSColin Finck         if (length + start > (UINT64)fcb->Header.ValidDataLength.QuadPart) {
3191c2c66affSColin Finck             addon = (ULONG)(min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3192c2c66affSColin Finck             RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3193c2c66affSColin Finck             length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3194c2c66affSColin Finck         }
3195c2c66affSColin Finck     }
3196c2c66affSColin Finck 
3197c2c66affSColin Finck     if (!(Irp->Flags & IRP_NOCACHE)) {
3198c2c66affSColin Finck         NTSTATUS Status = STATUS_SUCCESS;
3199c2c66affSColin Finck 
3200c2c66affSColin Finck         _SEH2_TRY {
3201c2c66affSColin Finck             if (!FileObject->PrivateCacheMap) {
3202c2c66affSColin Finck                 CC_FILE_SIZES ccfs;
3203c2c66affSColin Finck 
3204c2c66affSColin Finck                 ccfs.AllocationSize = fcb->Header.AllocationSize;
3205c2c66affSColin Finck                 ccfs.FileSize = fcb->Header.FileSize;
3206c2c66affSColin Finck                 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3207c2c66affSColin Finck 
3208c2c66affSColin Finck                 init_file_cache(FileObject, &ccfs);
3209c2c66affSColin Finck             }
3210c2c66affSColin Finck 
3211c2c66affSColin Finck             if (IrpSp->MinorFunction & IRP_MN_MDL) {
3212c2c66affSColin Finck                 CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3213c2c66affSColin Finck             } else {
3214c2c66affSColin Finck                 if (fCcCopyReadEx) {
3215c2c66affSColin Finck                     TRACE("CcCopyReadEx(%p, %llx, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3216c2c66affSColin Finck                           length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3217c2c66affSColin Finck                     TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3218c2c66affSColin Finck                     if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3219c2c66affSColin Finck                         TRACE("CcCopyReadEx could not wait\n");
3220c2c66affSColin Finck 
3221c2c66affSColin Finck                         IoMarkIrpPending(Irp);
3222c2c66affSColin Finck                         return STATUS_PENDING;
3223c2c66affSColin Finck                     }
3224c2c66affSColin Finck                     TRACE("CcCopyReadEx finished\n");
3225c2c66affSColin Finck                 } else {
3226c2c66affSColin Finck                     TRACE("CcCopyRead(%p, %llx, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3227c2c66affSColin Finck                     TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3228c2c66affSColin Finck                     if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3229c2c66affSColin Finck                         TRACE("CcCopyRead could not wait\n");
3230c2c66affSColin Finck 
3231c2c66affSColin Finck                         IoMarkIrpPending(Irp);
3232c2c66affSColin Finck                         return STATUS_PENDING;
3233c2c66affSColin Finck                     }
3234c2c66affSColin Finck                     TRACE("CcCopyRead finished\n");
3235c2c66affSColin Finck                 }
3236c2c66affSColin Finck             }
3237c2c66affSColin Finck         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
3238c2c66affSColin Finck             Status = _SEH2_GetExceptionCode();
3239c2c66affSColin Finck         } _SEH2_END;
3240c2c66affSColin Finck 
3241c2c66affSColin Finck         if (NT_SUCCESS(Status)) {
3242c2c66affSColin Finck             Status = Irp->IoStatus.Status;
3243c2c66affSColin Finck             Irp->IoStatus.Information += addon;
3244c2c66affSColin Finck             *bytes_read = (ULONG)Irp->IoStatus.Information;
3245c2c66affSColin Finck         } else
3246c2c66affSColin Finck             ERR("EXCEPTION - %08x\n", Status);
3247c2c66affSColin Finck 
3248c2c66affSColin Finck         return Status;
3249c2c66affSColin Finck     } else {
3250c2c66affSColin Finck         NTSTATUS Status;
3251c2c66affSColin Finck 
3252c2c66affSColin Finck         if (!wait) {
3253c2c66affSColin Finck             IoMarkIrpPending(Irp);
3254c2c66affSColin Finck             return STATUS_PENDING;
3255c2c66affSColin Finck         }
3256c2c66affSColin Finck 
3257c2c66affSColin Finck         if (fcb->ads)
3258c2c66affSColin Finck             Status = read_stream(fcb, data, start, length, bytes_read);
3259c2c66affSColin Finck         else
3260c2c66affSColin Finck             Status = read_file(fcb, data, start, length, bytes_read, Irp);
3261c2c66affSColin Finck 
3262c2c66affSColin Finck         *bytes_read += addon;
3263c2c66affSColin Finck         TRACE("read %u bytes\n", *bytes_read);
3264c2c66affSColin Finck 
3265c2c66affSColin Finck         Irp->IoStatus.Information = *bytes_read;
3266c2c66affSColin Finck 
3267c2c66affSColin Finck         if (diskacc && Status != STATUS_PENDING) {
3268c2c66affSColin Finck             PETHREAD thread = NULL;
3269c2c66affSColin Finck 
3270c2c66affSColin Finck             if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3271c2c66affSColin Finck                 thread = Irp->Tail.Overlay.Thread;
3272c2c66affSColin Finck             else if (!IoIsSystemThread(PsGetCurrentThread()))
3273c2c66affSColin Finck                 thread = PsGetCurrentThread();
3274c2c66affSColin Finck             else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
3275c2c66affSColin Finck                 thread = PsGetCurrentThread();
3276c2c66affSColin Finck 
3277c2c66affSColin Finck             if (thread)
3278c2c66affSColin Finck                 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3279c2c66affSColin Finck         }
3280c2c66affSColin Finck 
3281c2c66affSColin Finck         return Status;
3282c2c66affSColin Finck     }
3283c2c66affSColin Finck }
3284c2c66affSColin Finck 
3285c2c66affSColin Finck _Dispatch_type_(IRP_MJ_READ)
3286c2c66affSColin Finck _Function_class_(DRIVER_DISPATCH)
32876c75215bSMark Jansen NTSTATUS NTAPI drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
3288c2c66affSColin Finck     device_extension* Vcb = DeviceObject->DeviceExtension;
3289c2c66affSColin Finck     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3290c2c66affSColin Finck     PFILE_OBJECT FileObject = IrpSp->FileObject;
3291c2c66affSColin Finck     ULONG bytes_read = 0;
3292c2c66affSColin Finck     NTSTATUS Status;
3293c2c66affSColin Finck     BOOL top_level;
3294c2c66affSColin Finck     fcb* fcb;
3295c2c66affSColin Finck     ccb* ccb;
3296c2c66affSColin Finck     BOOLEAN fcb_lock = FALSE, wait;
3297c2c66affSColin Finck 
3298c2c66affSColin Finck     FsRtlEnterFileSystem();
3299c2c66affSColin Finck 
3300c2c66affSColin Finck     top_level = is_top_level(Irp);
3301c2c66affSColin Finck 
3302c2c66affSColin Finck     TRACE("read\n");
3303c2c66affSColin Finck 
3304c2c66affSColin Finck     if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
3305c2c66affSColin Finck         Status = vol_read(DeviceObject, Irp);
3306c2c66affSColin Finck         goto exit2;
3307c2c66affSColin Finck     } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
3308c2c66affSColin Finck         Status = STATUS_INVALID_PARAMETER;
3309c2c66affSColin Finck         goto end;
3310c2c66affSColin Finck     }
3311c2c66affSColin Finck 
3312c2c66affSColin Finck     Irp->IoStatus.Information = 0;
3313c2c66affSColin Finck 
3314c2c66affSColin Finck     if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
3315c2c66affSColin Finck         CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
3316c2c66affSColin Finck 
3317c2c66affSColin Finck         Irp->MdlAddress = NULL;
3318c2c66affSColin Finck         Status = STATUS_SUCCESS;
3319c2c66affSColin Finck 
3320c2c66affSColin Finck         goto exit;
3321c2c66affSColin Finck     }
3322c2c66affSColin Finck 
3323c2c66affSColin Finck     fcb = FileObject->FsContext;
3324c2c66affSColin Finck 
3325c2c66affSColin Finck     if (!fcb) {
3326c2c66affSColin Finck         ERR("fcb was NULL\n");
3327c2c66affSColin Finck         Status = STATUS_INVALID_PARAMETER;
3328c2c66affSColin Finck         goto exit;
3329c2c66affSColin Finck     }
3330c2c66affSColin Finck 
3331c2c66affSColin Finck     ccb = FileObject->FsContext2;
3332c2c66affSColin Finck 
3333c2c66affSColin Finck     if (!ccb) {
3334c2c66affSColin Finck         ERR("ccb was NULL\n");
3335c2c66affSColin Finck         Status = STATUS_INVALID_PARAMETER;
3336c2c66affSColin Finck         goto exit;
3337c2c66affSColin Finck     }
3338c2c66affSColin Finck 
3339c2c66affSColin Finck     if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
3340c2c66affSColin Finck         WARN("insufficient privileges\n");
3341c2c66affSColin Finck         Status = STATUS_ACCESS_DENIED;
3342c2c66affSColin Finck         goto exit;
3343c2c66affSColin Finck     }
3344c2c66affSColin Finck 
3345c2c66affSColin Finck     if (fcb == Vcb->volume_fcb) {
3346c2c66affSColin Finck         TRACE("reading volume FCB\n");
3347c2c66affSColin Finck 
3348c2c66affSColin Finck         IoSkipCurrentIrpStackLocation(Irp);
3349c2c66affSColin Finck 
3350c2c66affSColin Finck         Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
3351c2c66affSColin Finck 
3352c2c66affSColin Finck         goto exit2;
3353c2c66affSColin Finck     }
3354c2c66affSColin Finck 
3355c2c66affSColin Finck     wait = IoIsOperationSynchronous(Irp);
3356c2c66affSColin Finck 
3357c2c66affSColin Finck     // Don't offload jobs when doing paging IO - otherwise this can lead to
3358c2c66affSColin Finck     // deadlocks in CcCopyRead.
3359c2c66affSColin Finck     if (Irp->Flags & IRP_PAGING_IO)
3360c2c66affSColin Finck         wait = TRUE;
3361c2c66affSColin Finck 
33624672b2baSPierre Schweitzer     if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer->DataSectionObject) {
33634672b2baSPierre Schweitzer         IO_STATUS_BLOCK iosb;
33644672b2baSPierre Schweitzer 
33654672b2baSPierre Schweitzer         CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb);
33664672b2baSPierre Schweitzer         if (!NT_SUCCESS(iosb.Status)) {
33674672b2baSPierre Schweitzer             ERR("CcFlushCache returned %08x\n", iosb.Status);
33684672b2baSPierre Schweitzer             return iosb.Status;
33694672b2baSPierre Schweitzer         }
33704672b2baSPierre Schweitzer     }
33714672b2baSPierre Schweitzer 
3372c2c66affSColin Finck     if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
3373c2c66affSColin Finck         if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
3374c2c66affSColin Finck             Status = STATUS_PENDING;
3375c2c66affSColin Finck             IoMarkIrpPending(Irp);
3376c2c66affSColin Finck             goto exit;
3377c2c66affSColin Finck         }
3378c2c66affSColin Finck 
3379c2c66affSColin Finck         fcb_lock = TRUE;
3380c2c66affSColin Finck     }
3381c2c66affSColin Finck 
3382c2c66affSColin Finck     Status = do_read(Irp, wait, &bytes_read);
3383c2c66affSColin Finck 
3384c2c66affSColin Finck     if (fcb_lock)
3385c2c66affSColin Finck         ExReleaseResourceLite(fcb->Header.Resource);
3386c2c66affSColin Finck 
3387c2c66affSColin Finck exit:
3388c2c66affSColin Finck     if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO))
3389c2c66affSColin Finck         FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0);
3390c2c66affSColin Finck 
3391c2c66affSColin Finck end:
3392c2c66affSColin Finck     Irp->IoStatus.Status = Status;
3393c2c66affSColin Finck 
3394c2c66affSColin Finck     TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status);
3395c2c66affSColin Finck     TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information);
3396c2c66affSColin Finck     TRACE("returning %08x\n", Status);
3397c2c66affSColin Finck 
3398c2c66affSColin Finck     if (Status != STATUS_PENDING)
3399c2c66affSColin Finck         IoCompleteRequest(Irp, IO_NO_INCREMENT);
3400c2c66affSColin Finck     else {
3401c2c66affSColin Finck         if (!add_thread_job(Vcb, Irp))
3402c2c66affSColin Finck             do_read_job(Irp);
3403c2c66affSColin Finck     }
3404c2c66affSColin Finck 
3405c2c66affSColin Finck exit2:
3406c2c66affSColin Finck     if (top_level)
3407c2c66affSColin Finck         IoSetTopLevelIrp(NULL);
3408c2c66affSColin Finck 
3409c2c66affSColin Finck     FsRtlExitFileSystem();
3410c2c66affSColin Finck 
3411c2c66affSColin Finck     return Status;
3412c2c66affSColin Finck }
3413