xref: /reactos/drivers/filesystems/btrfs/read.c (revision 6e0cf03d)
1c2c66affSColin Finck /* Copyright (c) Mark Harmstone 2016-17
2c2c66affSColin Finck  *
3c2c66affSColin Finck  * This file is part of WinBtrfs.
4c2c66affSColin Finck  *
5c2c66affSColin Finck  * WinBtrfs is free software: you can redistribute it and/or modify
6c2c66affSColin Finck  * it under the terms of the GNU Lesser General Public Licence as published by
7c2c66affSColin Finck  * the Free Software Foundation, either version 3 of the Licence, or
8c2c66affSColin Finck  * (at your option) any later version.
9c2c66affSColin Finck  *
10c2c66affSColin Finck  * WinBtrfs is distributed in the hope that it will be useful,
11c2c66affSColin Finck  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12c2c66affSColin Finck  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13c2c66affSColin Finck  * GNU Lesser General Public Licence for more details.
14c2c66affSColin Finck  *
15c2c66affSColin Finck  * You should have received a copy of the GNU Lesser General Public Licence
16c2c66affSColin Finck  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17c2c66affSColin Finck 
18c2c66affSColin Finck #include "btrfs_drv.h"
19194ea909SVictor Perevertkin #include "xxhash.h"
20194ea909SVictor Perevertkin #include "crc32c.h"
21c2c66affSColin Finck 
22c2c66affSColin Finck enum read_data_status {
23c2c66affSColin Finck     ReadDataStatus_Pending,
24c2c66affSColin Finck     ReadDataStatus_Success,
25c2c66affSColin Finck     ReadDataStatus_Error,
26c2c66affSColin Finck     ReadDataStatus_MissingDevice,
27c2c66affSColin Finck     ReadDataStatus_Skip
28c2c66affSColin Finck };
29c2c66affSColin Finck 
30c2c66affSColin Finck struct read_data_context;
31c2c66affSColin Finck 
32c2c66affSColin Finck typedef struct {
33c2c66affSColin Finck     struct read_data_context* context;
34318da0c1SPierre Schweitzer     uint16_t stripenum;
35318da0c1SPierre Schweitzer     bool rewrite;
36c2c66affSColin Finck     PIRP Irp;
37c2c66affSColin Finck     IO_STATUS_BLOCK iosb;
38c2c66affSColin Finck     enum read_data_status status;
39c2c66affSColin Finck     PMDL mdl;
40318da0c1SPierre Schweitzer     uint64_t stripestart;
41318da0c1SPierre Schweitzer     uint64_t stripeend;
42c2c66affSColin Finck } read_data_stripe;
43c2c66affSColin Finck 
44c2c66affSColin Finck typedef struct {
45c2c66affSColin Finck     KEVENT Event;
46c2c66affSColin Finck     NTSTATUS Status;
47c2c66affSColin Finck     chunk* c;
48318da0c1SPierre Schweitzer     uint64_t address;
49318da0c1SPierre Schweitzer     uint32_t buflen;
50c2c66affSColin Finck     LONG num_stripes, stripes_left;
51318da0c1SPierre Schweitzer     uint64_t type;
52318da0c1SPierre Schweitzer     uint32_t sector_size;
53318da0c1SPierre Schweitzer     uint16_t firstoff, startoffstripe, sectors_per_stripe;
54194ea909SVictor Perevertkin     void* csum;
55318da0c1SPierre Schweitzer     bool tree;
56c2c66affSColin Finck     read_data_stripe* stripes;
57318da0c1SPierre Schweitzer     uint8_t* va;
58c2c66affSColin Finck } read_data_context;
59c2c66affSColin Finck 
60318da0c1SPierre Schweitzer extern bool diskacc;
61c2c66affSColin Finck extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
62c2c66affSColin Finck extern tCcCopyReadEx fCcCopyReadEx;
63c2c66affSColin Finck extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
64c2c66affSColin Finck 
65318da0c1SPierre Schweitzer #define LZO_PAGE_SIZE 4096
66c2c66affSColin Finck 
_Function_class_(IO_COMPLETION_ROUTINE)67c2c66affSColin Finck _Function_class_(IO_COMPLETION_ROUTINE)
68318da0c1SPierre Schweitzer static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
69c2c66affSColin Finck     read_data_stripe* stripe = conptr;
70c2c66affSColin Finck     read_data_context* context = (read_data_context*)stripe->context;
71c2c66affSColin Finck 
72c2c66affSColin Finck     UNUSED(DeviceObject);
73c2c66affSColin Finck 
74c2c66affSColin Finck     stripe->iosb = Irp->IoStatus;
75c2c66affSColin Finck 
76c2c66affSColin Finck     if (NT_SUCCESS(Irp->IoStatus.Status))
77c2c66affSColin Finck         stripe->status = ReadDataStatus_Success;
78c2c66affSColin Finck     else
79c2c66affSColin Finck         stripe->status = ReadDataStatus_Error;
80c2c66affSColin Finck 
81c2c66affSColin Finck     if (InterlockedDecrement(&context->stripes_left) == 0)
82318da0c1SPierre Schweitzer         KeSetEvent(&context->Event, 0, false);
83c2c66affSColin Finck 
84c2c66affSColin Finck     return STATUS_MORE_PROCESSING_REQUIRED;
85c2c66affSColin Finck }
86c2c66affSColin Finck 
check_csum(device_extension * Vcb,uint8_t * data,uint32_t sectors,void * csum)87194ea909SVictor Perevertkin NTSTATUS check_csum(device_extension* Vcb, uint8_t* data, uint32_t sectors, void* csum) {
88194ea909SVictor Perevertkin     void* csum2;
89c2c66affSColin Finck 
90194ea909SVictor Perevertkin     csum2 = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * sectors, ALLOC_TAG);
91c2c66affSColin Finck     if (!csum2) {
92c2c66affSColin Finck         ERR("out of memory\n");
93c2c66affSColin Finck         return STATUS_INSUFFICIENT_RESOURCES;
94c2c66affSColin Finck     }
95c2c66affSColin Finck 
96194ea909SVictor Perevertkin     do_calc_job(Vcb, data, sectors, csum2);
97c2c66affSColin Finck 
98194ea909SVictor Perevertkin     if (RtlCompareMemory(csum2, csum, sectors * Vcb->csum_size) != sectors * Vcb->csum_size) {
99c2c66affSColin Finck         ExFreePool(csum2);
100c2c66affSColin Finck         return STATUS_CRC_ERROR;
101c2c66affSColin Finck     }
102c2c66affSColin Finck 
103c2c66affSColin Finck     ExFreePool(csum2);
104c2c66affSColin Finck 
105c2c66affSColin Finck     return STATUS_SUCCESS;
106c2c66affSColin Finck }
107c2c66affSColin Finck 
get_tree_checksum(device_extension * Vcb,tree_header * th,void * csum)108194ea909SVictor Perevertkin void get_tree_checksum(device_extension* Vcb, tree_header* th, void* csum) {
109194ea909SVictor Perevertkin     switch (Vcb->superblock.csum_type) {
110194ea909SVictor Perevertkin         case CSUM_TYPE_CRC32C:
111194ea909SVictor Perevertkin             *(uint32_t*)csum = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
112194ea909SVictor Perevertkin         break;
113194ea909SVictor Perevertkin 
114194ea909SVictor Perevertkin         case CSUM_TYPE_XXHASH:
115194ea909SVictor Perevertkin             *(uint64_t*)csum = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
116194ea909SVictor Perevertkin         break;
117194ea909SVictor Perevertkin 
118194ea909SVictor Perevertkin         case CSUM_TYPE_SHA256:
119194ea909SVictor Perevertkin             calc_sha256(csum, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
120194ea909SVictor Perevertkin         break;
121194ea909SVictor Perevertkin 
122194ea909SVictor Perevertkin         case CSUM_TYPE_BLAKE2:
123194ea909SVictor Perevertkin             blake2b(csum, BLAKE2_HASH_SIZE, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
124194ea909SVictor Perevertkin         break;
125194ea909SVictor Perevertkin     }
126194ea909SVictor Perevertkin }
127194ea909SVictor Perevertkin 
check_tree_checksum(device_extension * Vcb,tree_header * th)128194ea909SVictor Perevertkin bool check_tree_checksum(device_extension* Vcb, tree_header* th) {
129194ea909SVictor Perevertkin     switch (Vcb->superblock.csum_type) {
130194ea909SVictor Perevertkin         case CSUM_TYPE_CRC32C: {
131194ea909SVictor Perevertkin             uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
132194ea909SVictor Perevertkin 
133194ea909SVictor Perevertkin             if (crc32 == *((uint32_t*)th->csum))
134194ea909SVictor Perevertkin                 return true;
135194ea909SVictor Perevertkin 
136194ea909SVictor Perevertkin             WARN("hash was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
137194ea909SVictor Perevertkin 
138194ea909SVictor Perevertkin             break;
139194ea909SVictor Perevertkin         }
140194ea909SVictor Perevertkin 
141194ea909SVictor Perevertkin         case CSUM_TYPE_XXHASH: {
142194ea909SVictor Perevertkin             uint64_t hash = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
143194ea909SVictor Perevertkin 
144194ea909SVictor Perevertkin             if (hash == *((uint64_t*)th->csum))
145194ea909SVictor Perevertkin                 return true;
146194ea909SVictor Perevertkin 
147194ea909SVictor Perevertkin             WARN("hash was %I64x, expected %I64x\n", hash, *((uint64_t*)th->csum));
148194ea909SVictor Perevertkin 
149194ea909SVictor Perevertkin             break;
150194ea909SVictor Perevertkin         }
151194ea909SVictor Perevertkin 
152194ea909SVictor Perevertkin         case CSUM_TYPE_SHA256: {
153194ea909SVictor Perevertkin             uint8_t hash[SHA256_HASH_SIZE];
154194ea909SVictor Perevertkin 
155194ea909SVictor Perevertkin             calc_sha256(hash, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
156194ea909SVictor Perevertkin 
157194ea909SVictor Perevertkin             if (RtlCompareMemory(hash, th, SHA256_HASH_SIZE) == SHA256_HASH_SIZE)
158194ea909SVictor Perevertkin                 return true;
159194ea909SVictor Perevertkin 
160194ea909SVictor Perevertkin             WARN("hash was invalid\n");
161194ea909SVictor Perevertkin 
162194ea909SVictor Perevertkin             break;
163194ea909SVictor Perevertkin         }
164194ea909SVictor Perevertkin 
165194ea909SVictor Perevertkin         case CSUM_TYPE_BLAKE2: {
166194ea909SVictor Perevertkin             uint8_t hash[BLAKE2_HASH_SIZE];
167194ea909SVictor Perevertkin 
168194ea909SVictor Perevertkin             blake2b(hash, sizeof(hash), (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
169194ea909SVictor Perevertkin 
170194ea909SVictor Perevertkin             if (RtlCompareMemory(hash, th, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE)
171194ea909SVictor Perevertkin                 return true;
172194ea909SVictor Perevertkin 
173194ea909SVictor Perevertkin             WARN("hash was invalid\n");
174194ea909SVictor Perevertkin 
175194ea909SVictor Perevertkin             break;
176194ea909SVictor Perevertkin         }
177194ea909SVictor Perevertkin     }
178194ea909SVictor Perevertkin 
179194ea909SVictor Perevertkin     return false;
180194ea909SVictor Perevertkin }
181194ea909SVictor Perevertkin 
get_sector_csum(device_extension * Vcb,void * buf,void * csum)182194ea909SVictor Perevertkin void get_sector_csum(device_extension* Vcb, void* buf, void* csum) {
183194ea909SVictor Perevertkin     switch (Vcb->superblock.csum_type) {
184194ea909SVictor Perevertkin         case CSUM_TYPE_CRC32C:
185194ea909SVictor Perevertkin             *(uint32_t*)csum = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
186194ea909SVictor Perevertkin         break;
187194ea909SVictor Perevertkin 
188194ea909SVictor Perevertkin         case CSUM_TYPE_XXHASH:
189194ea909SVictor Perevertkin             *(uint64_t*)csum = XXH64(buf, Vcb->superblock.sector_size, 0);
190194ea909SVictor Perevertkin         break;
191194ea909SVictor Perevertkin 
192194ea909SVictor Perevertkin         case CSUM_TYPE_SHA256:
193194ea909SVictor Perevertkin             calc_sha256(csum, buf, Vcb->superblock.sector_size);
194194ea909SVictor Perevertkin         break;
195194ea909SVictor Perevertkin 
196194ea909SVictor Perevertkin         case CSUM_TYPE_BLAKE2:
197194ea909SVictor Perevertkin             blake2b(csum, BLAKE2_HASH_SIZE, buf, Vcb->superblock.sector_size);
198194ea909SVictor Perevertkin         break;
199194ea909SVictor Perevertkin     }
200194ea909SVictor Perevertkin }
201194ea909SVictor Perevertkin 
check_sector_csum(device_extension * Vcb,void * buf,void * csum)202194ea909SVictor Perevertkin bool check_sector_csum(device_extension* Vcb, void* buf, void* csum) {
203194ea909SVictor Perevertkin     switch (Vcb->superblock.csum_type) {
204194ea909SVictor Perevertkin         case CSUM_TYPE_CRC32C: {
205194ea909SVictor Perevertkin             uint32_t crc32 = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
206194ea909SVictor Perevertkin 
207194ea909SVictor Perevertkin             return *(uint32_t*)csum == crc32;
208194ea909SVictor Perevertkin         }
209194ea909SVictor Perevertkin 
210194ea909SVictor Perevertkin         case CSUM_TYPE_XXHASH: {
211194ea909SVictor Perevertkin             uint64_t hash = XXH64(buf, Vcb->superblock.sector_size, 0);
212194ea909SVictor Perevertkin 
213194ea909SVictor Perevertkin             return *(uint64_t*)csum == hash;
214194ea909SVictor Perevertkin         }
215194ea909SVictor Perevertkin 
216194ea909SVictor Perevertkin         case CSUM_TYPE_SHA256: {
217194ea909SVictor Perevertkin             uint8_t hash[SHA256_HASH_SIZE];
218194ea909SVictor Perevertkin 
219194ea909SVictor Perevertkin             calc_sha256(hash, buf, Vcb->superblock.sector_size);
220194ea909SVictor Perevertkin 
221194ea909SVictor Perevertkin             return RtlCompareMemory(hash, csum, SHA256_HASH_SIZE) == SHA256_HASH_SIZE;
222194ea909SVictor Perevertkin         }
223194ea909SVictor Perevertkin 
224194ea909SVictor Perevertkin         case CSUM_TYPE_BLAKE2: {
225194ea909SVictor Perevertkin             uint8_t hash[BLAKE2_HASH_SIZE];
226194ea909SVictor Perevertkin 
227194ea909SVictor Perevertkin             blake2b(hash, sizeof(hash), buf, Vcb->superblock.sector_size);
228194ea909SVictor Perevertkin 
229194ea909SVictor Perevertkin             return RtlCompareMemory(hash, csum, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE;
230194ea909SVictor Perevertkin         }
231194ea909SVictor Perevertkin     }
232194ea909SVictor Perevertkin 
233194ea909SVictor Perevertkin     return false;
234194ea909SVictor Perevertkin }
235194ea909SVictor Perevertkin 
read_data_dup(device_extension * Vcb,uint8_t * buf,uint64_t addr,read_data_context * context,CHUNK_ITEM * ci,device ** devices,uint64_t generation)236318da0c1SPierre Schweitzer static NTSTATUS read_data_dup(device_extension* Vcb, uint8_t* buf, uint64_t addr, read_data_context* context, CHUNK_ITEM* ci,
237318da0c1SPierre Schweitzer                               device** devices, uint64_t generation) {
238318da0c1SPierre Schweitzer     bool checksum_error = false;
239318da0c1SPierre Schweitzer     uint16_t j, stripe = 0;
240c2c66affSColin Finck     NTSTATUS Status;
241c2c66affSColin Finck     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
242c2c66affSColin Finck 
243c2c66affSColin Finck     for (j = 0; j < ci->num_stripes; j++) {
244c2c66affSColin Finck         if (context->stripes[j].status == ReadDataStatus_Error) {
245194ea909SVictor Perevertkin             WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
246c2c66affSColin Finck             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
247c2c66affSColin Finck             return context->stripes[j].iosb.Status;
248c2c66affSColin Finck         } else if (context->stripes[j].status == ReadDataStatus_Success) {
249c2c66affSColin Finck             stripe = j;
250c2c66affSColin Finck             break;
251c2c66affSColin Finck         }
252c2c66affSColin Finck     }
253c2c66affSColin Finck 
254c2c66affSColin Finck     if (context->stripes[stripe].status != ReadDataStatus_Success)
255c2c66affSColin Finck         return STATUS_INTERNAL_ERROR;
256c2c66affSColin Finck 
257c2c66affSColin Finck     if (context->tree) {
258c2c66affSColin Finck         tree_header* th = (tree_header*)buf;
259c2c66affSColin Finck 
260194ea909SVictor Perevertkin         if (th->address != context->address || !check_tree_checksum(Vcb, th)) {
261318da0c1SPierre Schweitzer             checksum_error = true;
262c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
263c2c66affSColin Finck         } else if (generation != 0 && th->generation != generation) {
264318da0c1SPierre Schweitzer             checksum_error = true;
265c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
266c2c66affSColin Finck         }
267c2c66affSColin Finck     } else if (context->csum) {
268c2c66affSColin Finck         Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
269c2c66affSColin Finck 
270c2c66affSColin Finck         if (Status == STATUS_CRC_ERROR) {
271318da0c1SPierre Schweitzer             checksum_error = true;
272c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
273c2c66affSColin Finck         } else if (!NT_SUCCESS(Status)) {
274194ea909SVictor Perevertkin             ERR("check_csum returned %08lx\n", Status);
275c2c66affSColin Finck             return Status;
276c2c66affSColin Finck         }
277c2c66affSColin Finck     }
278c2c66affSColin Finck 
279c2c66affSColin Finck     if (!checksum_error)
280c2c66affSColin Finck         return STATUS_SUCCESS;
281c2c66affSColin Finck 
282c2c66affSColin Finck     if (ci->num_stripes == 1)
283c2c66affSColin Finck         return STATUS_CRC_ERROR;
284c2c66affSColin Finck 
285c2c66affSColin Finck     if (context->tree) {
286c2c66affSColin Finck         tree_header* t2;
287318da0c1SPierre Schweitzer         bool recovered = false;
288c2c66affSColin Finck 
289c2c66affSColin Finck         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
290c2c66affSColin Finck         if (!t2) {
291c2c66affSColin Finck             ERR("out of memory\n");
292c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
293c2c66affSColin Finck         }
294c2c66affSColin Finck 
295c2c66affSColin Finck         for (j = 0; j < ci->num_stripes; j++) {
296c2c66affSColin Finck             if (j != stripe && devices[j] && devices[j]->devobj) {
297318da0c1SPierre Schweitzer                 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart,
298318da0c1SPierre Schweitzer                                         Vcb->superblock.node_size, (uint8_t*)t2, false);
299c2c66affSColin Finck                 if (!NT_SUCCESS(Status)) {
300194ea909SVictor Perevertkin                     WARN("sync_read_phys returned %08lx\n", Status);
301c2c66affSColin Finck                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
302c2c66affSColin Finck                 } else {
303194ea909SVictor Perevertkin                     bool checksum_error = !check_tree_checksum(Vcb, t2);
304c2c66affSColin Finck 
305194ea909SVictor Perevertkin                     if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
306c2c66affSColin Finck                         RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
307318da0c1SPierre Schweitzer                         ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
308318da0c1SPierre Schweitzer                         recovered = true;
309c2c66affSColin Finck 
310c2c66affSColin Finck                         if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
311318da0c1SPierre Schweitzer                             Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart,
312c2c66affSColin Finck                                                      t2, Vcb->superblock.node_size);
313c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
314194ea909SVictor Perevertkin                                 WARN("write_data_phys returned %08lx\n", Status);
315c2c66affSColin Finck                                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
316c2c66affSColin Finck                             }
317c2c66affSColin Finck                         }
318c2c66affSColin Finck 
319c2c66affSColin Finck                         break;
320194ea909SVictor Perevertkin                     } else if (t2->address != addr || checksum_error)
321c2c66affSColin Finck                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
322c2c66affSColin Finck                     else
323c2c66affSColin Finck                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS);
324c2c66affSColin Finck                 }
325c2c66affSColin Finck             }
326c2c66affSColin Finck         }
327c2c66affSColin Finck 
328c2c66affSColin Finck         if (!recovered) {
329318da0c1SPierre Schweitzer             ERR("unrecoverable checksum error at %I64x\n", addr);
330c2c66affSColin Finck             ExFreePool(t2);
331c2c66affSColin Finck             return STATUS_CRC_ERROR;
332c2c66affSColin Finck         }
333c2c66affSColin Finck 
334c2c66affSColin Finck         ExFreePool(t2);
335c2c66affSColin Finck     } else {
336174dfab6SVincent Franchomme         ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information >> Vcb->sector_shift;
337318da0c1SPierre Schweitzer         uint8_t* sector;
338194ea909SVictor Perevertkin         void* ptr = context->csum;
339c2c66affSColin Finck 
340c2c66affSColin Finck         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
341c2c66affSColin Finck         if (!sector) {
342c2c66affSColin Finck             ERR("out of memory\n");
343c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
344c2c66affSColin Finck         }
345c2c66affSColin Finck 
346174dfab6SVincent Franchomme         for (ULONG i = 0; i < sectors; i++) {
347174dfab6SVincent Franchomme             if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
348318da0c1SPierre Schweitzer                 bool recovered = false;
349c2c66affSColin Finck 
350c2c66affSColin Finck                 for (j = 0; j < ci->num_stripes; j++) {
351c2c66affSColin Finck                     if (j != stripe && devices[j] && devices[j]->devobj) {
352318da0c1SPierre Schweitzer                         Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj,
353174dfab6SVincent Franchomme                                                 cis[j].offset + context->stripes[stripe].stripestart + ((uint64_t)i << Vcb->sector_shift),
354318da0c1SPierre Schweitzer                                                 Vcb->superblock.sector_size, sector, false);
355c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
356194ea909SVictor Perevertkin                             WARN("sync_read_phys returned %08lx\n", Status);
357c2c66affSColin Finck                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
358c2c66affSColin Finck                         } else {
359194ea909SVictor Perevertkin                             if (check_sector_csum(Vcb, sector, ptr)) {
360174dfab6SVincent Franchomme                                 RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
361174dfab6SVincent Franchomme                                 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe]->devitem.dev_id);
362318da0c1SPierre Schweitzer                                 recovered = true;
363c2c66affSColin Finck 
364c2c66affSColin Finck                                 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
365318da0c1SPierre Schweitzer                                     Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj,
366174dfab6SVincent Franchomme                                                              cis[stripe].offset + context->stripes[stripe].stripestart + ((uint64_t)i << Vcb->sector_shift),
367c2c66affSColin Finck                                                              sector, Vcb->superblock.sector_size);
368c2c66affSColin Finck                                     if (!NT_SUCCESS(Status)) {
369194ea909SVictor Perevertkin                                         WARN("write_data_phys returned %08lx\n", Status);
370c2c66affSColin Finck                                         log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
371c2c66affSColin Finck                                     }
372c2c66affSColin Finck                                 }
373c2c66affSColin Finck 
374c2c66affSColin Finck                                 break;
375c2c66affSColin Finck                             } else
376c2c66affSColin Finck                                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
377c2c66affSColin Finck                         }
378c2c66affSColin Finck                     }
379c2c66affSColin Finck                 }
380c2c66affSColin Finck 
381c2c66affSColin Finck                 if (!recovered) {
382174dfab6SVincent Franchomme                     ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
383c2c66affSColin Finck                     ExFreePool(sector);
384c2c66affSColin Finck                     return STATUS_CRC_ERROR;
385c2c66affSColin Finck                 }
386c2c66affSColin Finck             }
387194ea909SVictor Perevertkin 
388194ea909SVictor Perevertkin             ptr = (uint8_t*)ptr + Vcb->csum_size;
389c2c66affSColin Finck         }
390c2c66affSColin Finck 
391c2c66affSColin Finck         ExFreePool(sector);
392c2c66affSColin Finck     }
393c2c66affSColin Finck 
394c2c66affSColin Finck     return STATUS_SUCCESS;
395c2c66affSColin Finck }
396c2c66affSColin Finck 
read_data_raid0(device_extension * Vcb,uint8_t * buf,uint64_t addr,uint32_t length,read_data_context * context,CHUNK_ITEM * ci,device ** devices,uint64_t generation,uint64_t offset)397318da0c1SPierre Schweitzer static NTSTATUS read_data_raid0(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
398318da0c1SPierre Schweitzer                                 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
399174dfab6SVincent Franchomme     for (uint16_t i = 0; i < ci->num_stripes; i++) {
400c2c66affSColin Finck         if (context->stripes[i].status == ReadDataStatus_Error) {
401174dfab6SVincent Franchomme             WARN("stripe %u returned error %08lx\n", i, context->stripes[i].iosb.Status);
402c2c66affSColin Finck             log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS);
403c2c66affSColin Finck             return context->stripes[i].iosb.Status;
404c2c66affSColin Finck         }
405c2c66affSColin Finck     }
406c2c66affSColin Finck 
407c2c66affSColin Finck     if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
408c2c66affSColin Finck         tree_header* th = (tree_header*)buf;
409194ea909SVictor Perevertkin         bool checksum_error = !check_tree_checksum(Vcb, th);
410c2c66affSColin Finck 
411194ea909SVictor Perevertkin         if (checksum_error || addr != th->address || (generation != 0 && generation != th->generation)) {
412318da0c1SPierre Schweitzer             uint64_t off;
413318da0c1SPierre Schweitzer             uint16_t stripe;
414c2c66affSColin Finck 
415c2c66affSColin Finck             get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe);
416c2c66affSColin Finck 
417318da0c1SPierre Schweitzer             ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
418c2c66affSColin Finck 
419194ea909SVictor Perevertkin             if (checksum_error) {
420c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
421c2c66affSColin Finck                 return STATUS_CRC_ERROR;
422c2c66affSColin Finck             } else if (addr != th->address) {
423318da0c1SPierre Schweitzer                 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
424c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
425c2c66affSColin Finck                 return STATUS_CRC_ERROR;
426c2c66affSColin Finck             } else if (generation != 0 && generation != th->generation) {
427318da0c1SPierre Schweitzer                 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
428c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
429c2c66affSColin Finck                 return STATUS_CRC_ERROR;
430c2c66affSColin Finck             }
431c2c66affSColin Finck         }
432c2c66affSColin Finck     } else if (context->csum) {
433c2c66affSColin Finck         NTSTATUS Status;
434c2c66affSColin Finck 
435174dfab6SVincent Franchomme         Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
436c2c66affSColin Finck 
437c2c66affSColin Finck         if (Status == STATUS_CRC_ERROR) {
438194ea909SVictor Perevertkin             void* ptr = context->csum;
439c2c66affSColin Finck 
440174dfab6SVincent Franchomme             for (uint32_t i = 0; i < length >> Vcb->sector_shift; i++) {
441174dfab6SVincent Franchomme                 if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
442318da0c1SPierre Schweitzer                     uint64_t off;
443318da0c1SPierre Schweitzer                     uint16_t stripe;
444c2c66affSColin Finck 
445174dfab6SVincent Franchomme                     get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length, ci->num_stripes, &off, &stripe);
446c2c66affSColin Finck 
447318da0c1SPierre Schweitzer                     ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
448c2c66affSColin Finck 
449c2c66affSColin Finck                     log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
450c2c66affSColin Finck 
451c2c66affSColin Finck                     return Status;
452c2c66affSColin Finck                 }
453194ea909SVictor Perevertkin 
454194ea909SVictor Perevertkin                 ptr = (uint8_t*)ptr + Vcb->csum_size;
455c2c66affSColin Finck             }
456c2c66affSColin Finck 
457c2c66affSColin Finck             return Status;
458c2c66affSColin Finck         } else if (!NT_SUCCESS(Status)) {
459194ea909SVictor Perevertkin             ERR("check_csum returned %08lx\n", Status);
460c2c66affSColin Finck             return Status;
461c2c66affSColin Finck         }
462c2c66affSColin Finck     }
463c2c66affSColin Finck 
464c2c66affSColin Finck     return STATUS_SUCCESS;
465c2c66affSColin Finck }
466c2c66affSColin Finck 
read_data_raid10(device_extension * Vcb,uint8_t * buf,uint64_t addr,uint32_t length,read_data_context * context,CHUNK_ITEM * ci,device ** devices,uint64_t generation,uint64_t offset)467318da0c1SPierre Schweitzer static NTSTATUS read_data_raid10(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
468318da0c1SPierre Schweitzer                                  CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
46906042735SVincent Franchomme     uint16_t stripe = 0;
470c2c66affSColin Finck     NTSTATUS Status;
471318da0c1SPierre Schweitzer     bool checksum_error = false;
472c2c66affSColin Finck     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
473c2c66affSColin Finck 
474174dfab6SVincent Franchomme     for (uint16_t j = 0; j < ci->num_stripes; j++) {
475c2c66affSColin Finck         if (context->stripes[j].status == ReadDataStatus_Error) {
476194ea909SVictor Perevertkin             WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
477c2c66affSColin Finck             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
478c2c66affSColin Finck             return context->stripes[j].iosb.Status;
479c2c66affSColin Finck         } else if (context->stripes[j].status == ReadDataStatus_Success)
480c2c66affSColin Finck             stripe = j;
481c2c66affSColin Finck     }
482c2c66affSColin Finck 
483c2c66affSColin Finck     if (context->tree) {
484c2c66affSColin Finck         tree_header* th = (tree_header*)buf;
485c2c66affSColin Finck 
486194ea909SVictor Perevertkin         if (!check_tree_checksum(Vcb, th)) {
487318da0c1SPierre Schweitzer             checksum_error = true;
488c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
489c2c66affSColin Finck         } else if (addr != th->address) {
490318da0c1SPierre Schweitzer             WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
491318da0c1SPierre Schweitzer             checksum_error = true;
492c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
493c2c66affSColin Finck         } else if (generation != 0 && generation != th->generation) {
494318da0c1SPierre Schweitzer             WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
495318da0c1SPierre Schweitzer             checksum_error = true;
496c2c66affSColin Finck             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
497c2c66affSColin Finck         }
498c2c66affSColin Finck     } else if (context->csum) {
499174dfab6SVincent Franchomme         Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
500c2c66affSColin Finck 
501c2c66affSColin Finck         if (Status == STATUS_CRC_ERROR)
502318da0c1SPierre Schweitzer             checksum_error = true;
503c2c66affSColin Finck         else if (!NT_SUCCESS(Status)) {
504194ea909SVictor Perevertkin             ERR("check_csum returned %08lx\n", Status);
505c2c66affSColin Finck             return Status;
506c2c66affSColin Finck         }
507c2c66affSColin Finck     }
508c2c66affSColin Finck 
509c2c66affSColin Finck     if (!checksum_error)
510c2c66affSColin Finck         return STATUS_SUCCESS;
511c2c66affSColin Finck 
512c2c66affSColin Finck     if (context->tree) {
513c2c66affSColin Finck         tree_header* t2;
514318da0c1SPierre Schweitzer         uint64_t off;
515318da0c1SPierre Schweitzer         uint16_t badsubstripe = 0;
516318da0c1SPierre Schweitzer         bool recovered = false;
517c2c66affSColin Finck 
518c2c66affSColin Finck         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
519c2c66affSColin Finck         if (!t2) {
520c2c66affSColin Finck             ERR("out of memory\n");
521c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
522c2c66affSColin Finck         }
523c2c66affSColin Finck 
524c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe);
525c2c66affSColin Finck 
526c2c66affSColin Finck         stripe *= ci->sub_stripes;
527c2c66affSColin Finck 
528174dfab6SVincent Franchomme         for (uint16_t j = 0; j < ci->sub_stripes; j++) {
529c2c66affSColin Finck             if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
530c2c66affSColin Finck                 badsubstripe = j;
531c2c66affSColin Finck                 break;
532c2c66affSColin Finck             }
533c2c66affSColin Finck         }
534c2c66affSColin Finck 
535174dfab6SVincent Franchomme         for (uint16_t j = 0; j < ci->sub_stripes; j++) {
536c2c66affSColin Finck             if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
537318da0c1SPierre Schweitzer                 Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off,
538318da0c1SPierre Schweitzer                                         Vcb->superblock.node_size, (uint8_t*)t2, false);
539c2c66affSColin Finck                 if (!NT_SUCCESS(Status)) {
540194ea909SVictor Perevertkin                     WARN("sync_read_phys returned %08lx\n", Status);
541c2c66affSColin Finck                     log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS);
542c2c66affSColin Finck                 } else {
543194ea909SVictor Perevertkin                     bool checksum_error = !check_tree_checksum(Vcb, t2);
544c2c66affSColin Finck 
545194ea909SVictor Perevertkin                     if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
546c2c66affSColin Finck                         RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
547318da0c1SPierre Schweitzer                         ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id);
548318da0c1SPierre Schweitzer                         recovered = true;
549c2c66affSColin Finck 
550c2c66affSColin Finck                         if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
551318da0c1SPierre Schweitzer                             Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj,
552318da0c1SPierre Schweitzer                                                      cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size);
553c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
554194ea909SVictor Perevertkin                                 WARN("write_data_phys returned %08lx\n", Status);
555c2c66affSColin Finck                                 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
556c2c66affSColin Finck                             }
557c2c66affSColin Finck                         }
558c2c66affSColin Finck 
559c2c66affSColin Finck                         break;
560194ea909SVictor Perevertkin                     } else if (t2->address != addr || checksum_error)
561c2c66affSColin Finck                         log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
562c2c66affSColin Finck                     else
563c2c66affSColin Finck                         log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS);
564c2c66affSColin Finck                 }
565c2c66affSColin Finck             }
566c2c66affSColin Finck         }
567c2c66affSColin Finck 
568c2c66affSColin Finck         if (!recovered) {
569318da0c1SPierre Schweitzer             ERR("unrecoverable checksum error at %I64x\n", addr);
570c2c66affSColin Finck             ExFreePool(t2);
571c2c66affSColin Finck             return STATUS_CRC_ERROR;
572c2c66affSColin Finck         }
573c2c66affSColin Finck 
574c2c66affSColin Finck         ExFreePool(t2);
575c2c66affSColin Finck     } else {
576174dfab6SVincent Franchomme         ULONG sectors = length >> Vcb->sector_shift;
577318da0c1SPierre Schweitzer         uint8_t* sector;
578194ea909SVictor Perevertkin         void* ptr = context->csum;
579c2c66affSColin Finck 
580c2c66affSColin Finck         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
581c2c66affSColin Finck         if (!sector) {
582c2c66affSColin Finck             ERR("out of memory\n");
583c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
584c2c66affSColin Finck         }
585c2c66affSColin Finck 
586174dfab6SVincent Franchomme         for (ULONG i = 0; i < sectors; i++) {
587174dfab6SVincent Franchomme             if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
588318da0c1SPierre Schweitzer                 uint64_t off;
589318da0c1SPierre Schweitzer                 uint16_t stripe2, badsubstripe = 0;
590318da0c1SPierre Schweitzer                 bool recovered = false;
591c2c66affSColin Finck 
592174dfab6SVincent Franchomme                 get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
593c2c66affSColin Finck                                  ci->num_stripes / ci->sub_stripes, &off, &stripe2);
594c2c66affSColin Finck 
595c2c66affSColin Finck                 stripe2 *= ci->sub_stripes;
596c2c66affSColin Finck 
597174dfab6SVincent Franchomme                 for (uint16_t j = 0; j < ci->sub_stripes; j++) {
598c2c66affSColin Finck                     if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
599c2c66affSColin Finck                         badsubstripe = j;
600c2c66affSColin Finck                         break;
601c2c66affSColin Finck                     }
602c2c66affSColin Finck                 }
603c2c66affSColin Finck 
604c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
605c2c66affSColin Finck 
606174dfab6SVincent Franchomme                 for (uint16_t j = 0; j < ci->sub_stripes; j++) {
607c2c66affSColin Finck                     if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
608318da0c1SPierre Schweitzer                         Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off,
609318da0c1SPierre Schweitzer                                                 Vcb->superblock.sector_size, sector, false);
610c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
611194ea909SVictor Perevertkin                             WARN("sync_read_phys returned %08lx\n", Status);
612c2c66affSColin Finck                             log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS);
613c2c66affSColin Finck                         } else {
614194ea909SVictor Perevertkin                             if (check_sector_csum(Vcb, sector, ptr)) {
615174dfab6SVincent Franchomme                                 RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
616174dfab6SVincent Franchomme                                 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe2 + j]->devitem.dev_id);
617318da0c1SPierre Schweitzer                                 recovered = true;
618c2c66affSColin Finck 
619c2c66affSColin Finck                                 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
620318da0c1SPierre Schweitzer                                     Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj,
621318da0c1SPierre Schweitzer                                                              cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size);
622c2c66affSColin Finck                                     if (!NT_SUCCESS(Status)) {
623194ea909SVictor Perevertkin                                         WARN("write_data_phys returned %08lx\n", Status);
624c2c66affSColin Finck                                         log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
625c2c66affSColin Finck                                     }
626c2c66affSColin Finck                                 }
627c2c66affSColin Finck 
628c2c66affSColin Finck                                 break;
629c2c66affSColin Finck                             } else
630c2c66affSColin Finck                                 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
631c2c66affSColin Finck                         }
632c2c66affSColin Finck                     }
633c2c66affSColin Finck                 }
634c2c66affSColin Finck 
635c2c66affSColin Finck                 if (!recovered) {
636174dfab6SVincent Franchomme                     ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
637c2c66affSColin Finck                     ExFreePool(sector);
638c2c66affSColin Finck                     return STATUS_CRC_ERROR;
639c2c66affSColin Finck                 }
640c2c66affSColin Finck             }
641194ea909SVictor Perevertkin 
642194ea909SVictor Perevertkin             ptr = (uint8_t*)ptr + Vcb->csum_size;
643c2c66affSColin Finck         }
644c2c66affSColin Finck 
645c2c66affSColin Finck         ExFreePool(sector);
646c2c66affSColin Finck     }
647c2c66affSColin Finck 
648c2c66affSColin Finck     return STATUS_SUCCESS;
649c2c66affSColin Finck }
650c2c66affSColin Finck 
read_data_raid5(device_extension * Vcb,uint8_t * buf,uint64_t addr,uint32_t length,read_data_context * context,CHUNK_ITEM * ci,device ** devices,uint64_t offset,uint64_t generation,chunk * c,bool degraded)651318da0c1SPierre Schweitzer static NTSTATUS read_data_raid5(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
652318da0c1SPierre Schweitzer                                 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
653c2c66affSColin Finck     NTSTATUS Status;
654318da0c1SPierre Schweitzer     bool checksum_error = false;
655c2c66affSColin Finck     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
65606042735SVincent Franchomme     uint16_t j, stripe = 0;
657318da0c1SPierre Schweitzer     bool no_success = true;
658c2c66affSColin Finck 
659c2c66affSColin Finck     for (j = 0; j < ci->num_stripes; j++) {
660c2c66affSColin Finck         if (context->stripes[j].status == ReadDataStatus_Error) {
661194ea909SVictor Perevertkin             WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
662c2c66affSColin Finck             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
663c2c66affSColin Finck             return context->stripes[j].iosb.Status;
664c2c66affSColin Finck         } else if (context->stripes[j].status == ReadDataStatus_Success) {
665c2c66affSColin Finck             stripe = j;
666318da0c1SPierre Schweitzer             no_success = false;
667c2c66affSColin Finck         }
668c2c66affSColin Finck     }
669c2c66affSColin Finck 
670c2c66affSColin Finck     if (c) {    // check partial stripes
671c2c66affSColin Finck         LIST_ENTRY* le;
672318da0c1SPierre Schweitzer         uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length;
673c2c66affSColin Finck 
674318da0c1SPierre Schweitzer         ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
675c2c66affSColin Finck 
676c2c66affSColin Finck         le = c->partial_stripes.Flink;
677c2c66affSColin Finck         while (le != &c->partial_stripes) {
678c2c66affSColin Finck             partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
679c2c66affSColin Finck 
680c2c66affSColin Finck             if (ps->address + ps_length > addr && ps->address < addr + length) {
681c2c66affSColin Finck                 ULONG runlength, index;
682c2c66affSColin Finck 
683c2c66affSColin Finck                 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
684c2c66affSColin Finck 
685c2c66affSColin Finck                 while (runlength != 0) {
686318da0c1SPierre Schweitzer                     if (index >= ps->bmplen)
687318da0c1SPierre Schweitzer                         break;
688318da0c1SPierre Schweitzer 
689318da0c1SPierre Schweitzer                     if (index + runlength >= ps->bmplen) {
690318da0c1SPierre Schweitzer                         runlength = ps->bmplen - index;
691318da0c1SPierre Schweitzer 
692318da0c1SPierre Schweitzer                         if (runlength == 0)
693318da0c1SPierre Schweitzer                             break;
694318da0c1SPierre Schweitzer                     }
695*6e0cf03dSVincent Franchomme 
696174dfab6SVincent Franchomme                     uint64_t runstart = ps->address + (index << Vcb->sector_shift);
697174dfab6SVincent Franchomme                     uint64_t runend = runstart + (runlength << Vcb->sector_shift);
698318da0c1SPierre Schweitzer                     uint64_t start = max(runstart, addr);
699318da0c1SPierre Schweitzer                     uint64_t end = min(runend, addr + length);
700c2c66affSColin Finck 
701c2c66affSColin Finck                     if (end > start)
702c2c66affSColin Finck                         RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
703c2c66affSColin Finck 
704c2c66affSColin Finck                     runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
705c2c66affSColin Finck                 }
706c2c66affSColin Finck             } else if (ps->address >= addr + length)
707c2c66affSColin Finck                 break;
708c2c66affSColin Finck 
709c2c66affSColin Finck             le = le->Flink;
710c2c66affSColin Finck         }
711c2c66affSColin Finck 
712c2c66affSColin Finck         ExReleaseResourceLite(&c->partial_stripes_lock);
713c2c66affSColin Finck     }
714c2c66affSColin Finck 
715c2c66affSColin Finck     if (context->tree) {
716c2c66affSColin Finck         tree_header* th = (tree_header*)buf;
717c2c66affSColin Finck 
718194ea909SVictor Perevertkin         if (addr != th->address || !check_tree_checksum(Vcb, th)) {
719318da0c1SPierre Schweitzer             checksum_error = true;
720c2c66affSColin Finck             if (!no_success && !degraded)
721c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
722c2c66affSColin Finck         } else if (generation != 0 && generation != th->generation) {
723318da0c1SPierre Schweitzer             checksum_error = true;
724c2c66affSColin Finck             if (!no_success && !degraded)
725c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
726c2c66affSColin Finck         }
727c2c66affSColin Finck     } else if (context->csum) {
728174dfab6SVincent Franchomme         Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
729c2c66affSColin Finck 
730c2c66affSColin Finck         if (Status == STATUS_CRC_ERROR) {
731c2c66affSColin Finck             if (!degraded)
732c2c66affSColin Finck                 WARN("checksum error\n");
733318da0c1SPierre Schweitzer             checksum_error = true;
734c2c66affSColin Finck         } else if (!NT_SUCCESS(Status)) {
735194ea909SVictor Perevertkin             ERR("check_csum returned %08lx\n", Status);
736c2c66affSColin Finck             return Status;
737c2c66affSColin Finck         }
738c2c66affSColin Finck     } else if (degraded)
739318da0c1SPierre Schweitzer         checksum_error = true;
740c2c66affSColin Finck 
741c2c66affSColin Finck     if (!checksum_error)
742c2c66affSColin Finck         return STATUS_SUCCESS;
743c2c66affSColin Finck 
744c2c66affSColin Finck     if (context->tree) {
745318da0c1SPierre Schweitzer         uint16_t parity;
746318da0c1SPierre Schweitzer         uint64_t off;
747318da0c1SPierre Schweitzer         bool recovered = false, first = true, failed = false;
748318da0c1SPierre Schweitzer         uint8_t* t2;
749c2c66affSColin Finck 
750c2c66affSColin Finck         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
751c2c66affSColin Finck         if (!t2) {
752c2c66affSColin Finck             ERR("out of memory\n");
753c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
754c2c66affSColin Finck         }
755c2c66affSColin Finck 
756c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe);
757c2c66affSColin Finck 
758c2c66affSColin Finck         parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
759c2c66affSColin Finck 
760c2c66affSColin Finck         stripe = (parity + stripe + 1) % ci->num_stripes;
761c2c66affSColin Finck 
762c2c66affSColin Finck         for (j = 0; j < ci->num_stripes; j++) {
763c2c66affSColin Finck             if (j != stripe) {
764c2c66affSColin Finck                 if (devices[j] && devices[j]->devobj) {
765c2c66affSColin Finck                     if (first) {
766318da0c1SPierre Schweitzer                         Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false);
767c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
768194ea909SVictor Perevertkin                             ERR("sync_read_phys returned %08lx\n", Status);
769c2c66affSColin Finck                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
770318da0c1SPierre Schweitzer                             failed = true;
771c2c66affSColin Finck                             break;
772c2c66affSColin Finck                         }
773c2c66affSColin Finck 
774318da0c1SPierre Schweitzer                         first = false;
775c2c66affSColin Finck                     } else {
776318da0c1SPierre Schweitzer                         Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false);
777c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
778194ea909SVictor Perevertkin                             ERR("sync_read_phys returned %08lx\n", Status);
779c2c66affSColin Finck                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
780318da0c1SPierre Schweitzer                             failed = true;
781c2c66affSColin Finck                             break;
782c2c66affSColin Finck                         }
783c2c66affSColin Finck 
784c2c66affSColin Finck                         do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
785c2c66affSColin Finck                     }
786c2c66affSColin Finck                 } else {
787318da0c1SPierre Schweitzer                     failed = true;
788c2c66affSColin Finck                     break;
789c2c66affSColin Finck                 }
790c2c66affSColin Finck             }
791c2c66affSColin Finck         }
792c2c66affSColin Finck 
793c2c66affSColin Finck         if (!failed) {
794c2c66affSColin Finck             tree_header* t3 = (tree_header*)t2;
795c2c66affSColin Finck 
796194ea909SVictor Perevertkin             if (t3->address == addr && check_tree_checksum(Vcb, t3) && (generation == 0 || t3->generation == generation)) {
797c2c66affSColin Finck                 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
798c2c66affSColin Finck 
799c2c66affSColin Finck                 if (!degraded)
800318da0c1SPierre Schweitzer                     ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
801c2c66affSColin Finck 
802318da0c1SPierre Schweitzer                 recovered = true;
803c2c66affSColin Finck 
804c2c66affSColin Finck                 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
805318da0c1SPierre Schweitzer                     Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
806c2c66affSColin Finck                     if (!NT_SUCCESS(Status)) {
807194ea909SVictor Perevertkin                         WARN("write_data_phys returned %08lx\n", Status);
808c2c66affSColin Finck                         log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
809c2c66affSColin Finck                     }
810c2c66affSColin Finck                 }
811c2c66affSColin Finck             }
812c2c66affSColin Finck         }
813c2c66affSColin Finck 
814c2c66affSColin Finck         if (!recovered) {
815318da0c1SPierre Schweitzer             ERR("unrecoverable checksum error at %I64x\n", addr);
816c2c66affSColin Finck             ExFreePool(t2);
817c2c66affSColin Finck             return STATUS_CRC_ERROR;
818c2c66affSColin Finck         }
819c2c66affSColin Finck 
820c2c66affSColin Finck         ExFreePool(t2);
821c2c66affSColin Finck     } else {
822174dfab6SVincent Franchomme         ULONG sectors = length >> Vcb->sector_shift;
823318da0c1SPierre Schweitzer         uint8_t* sector;
824194ea909SVictor Perevertkin         void* ptr = context->csum;
825c2c66affSColin Finck 
826c2c66affSColin Finck         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
827c2c66affSColin Finck         if (!sector) {
828c2c66affSColin Finck             ERR("out of memory\n");
829c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
830c2c66affSColin Finck         }
831c2c66affSColin Finck 
832174dfab6SVincent Franchomme         for (ULONG i = 0; i < sectors; i++) {
833318da0c1SPierre Schweitzer             uint16_t parity;
834318da0c1SPierre Schweitzer             uint64_t off;
835c2c66affSColin Finck 
836174dfab6SVincent Franchomme             get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
837c2c66affSColin Finck                              ci->num_stripes - 1, &off, &stripe);
838c2c66affSColin Finck 
839174dfab6SVincent Franchomme             parity = (((addr - offset + ((uint64_t)i << Vcb->sector_shift)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
840c2c66affSColin Finck 
841c2c66affSColin Finck             stripe = (parity + stripe + 1) % ci->num_stripes;
842c2c66affSColin Finck 
843174dfab6SVincent Franchomme             if (!devices[stripe] || !devices[stripe]->devobj || (ptr && !check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr))) {
844318da0c1SPierre Schweitzer                 bool recovered = false, first = true, failed = false;
845c2c66affSColin Finck 
846c2c66affSColin Finck                 if (devices[stripe] && devices[stripe]->devobj)
847c2c66affSColin Finck                     log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS);
848c2c66affSColin Finck 
849c2c66affSColin Finck                 for (j = 0; j < ci->num_stripes; j++) {
850c2c66affSColin Finck                     if (j != stripe) {
851c2c66affSColin Finck                         if (devices[j] && devices[j]->devobj) {
852c2c66affSColin Finck                             if (first) {
853318da0c1SPierre Schweitzer                                 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false);
854c2c66affSColin Finck                                 if (!NT_SUCCESS(Status)) {
855194ea909SVictor Perevertkin                                     ERR("sync_read_phys returned %08lx\n", Status);
856318da0c1SPierre Schweitzer                                     failed = true;
857c2c66affSColin Finck                                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
858c2c66affSColin Finck                                     break;
859c2c66affSColin Finck                                 }
860c2c66affSColin Finck 
861318da0c1SPierre Schweitzer                                 first = false;
862c2c66affSColin Finck                             } else {
863318da0c1SPierre Schweitzer                                 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
864318da0c1SPierre Schweitzer                                                         sector + Vcb->superblock.sector_size, false);
865c2c66affSColin Finck                                 if (!NT_SUCCESS(Status)) {
866194ea909SVictor Perevertkin                                     ERR("sync_read_phys returned %08lx\n", Status);
867318da0c1SPierre Schweitzer                                     failed = true;
868c2c66affSColin Finck                                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
869c2c66affSColin Finck                                     break;
870c2c66affSColin Finck                                 }
871c2c66affSColin Finck 
872c2c66affSColin Finck                                 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
873c2c66affSColin Finck                             }
874c2c66affSColin Finck                         } else {
875318da0c1SPierre Schweitzer                             failed = true;
876c2c66affSColin Finck                             break;
877c2c66affSColin Finck                         }
878c2c66affSColin Finck                     }
879c2c66affSColin Finck                 }
880c2c66affSColin Finck 
881c2c66affSColin Finck                 if (!failed) {
882194ea909SVictor Perevertkin                     if (!ptr || check_sector_csum(Vcb, sector, ptr)) {
883174dfab6SVincent Franchomme                         RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
884c2c66affSColin Finck 
885c2c66affSColin Finck                         if (!degraded)
886174dfab6SVincent Franchomme                             ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe]->devitem.dev_id);
887c2c66affSColin Finck 
888318da0c1SPierre Schweitzer                         recovered = true;
889c2c66affSColin Finck 
890c2c66affSColin Finck                         if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
891318da0c1SPierre Schweitzer                             Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off,
892c2c66affSColin Finck                                                      sector, Vcb->superblock.sector_size);
893c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
894194ea909SVictor Perevertkin                                 WARN("write_data_phys returned %08lx\n", Status);
895c2c66affSColin Finck                                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
896c2c66affSColin Finck                             }
897c2c66affSColin Finck                         }
898c2c66affSColin Finck                     }
899c2c66affSColin Finck                 }
900c2c66affSColin Finck 
901c2c66affSColin Finck                 if (!recovered) {
902174dfab6SVincent Franchomme                     ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
903c2c66affSColin Finck                     ExFreePool(sector);
904c2c66affSColin Finck                     return STATUS_CRC_ERROR;
905c2c66affSColin Finck                 }
906c2c66affSColin Finck             }
907194ea909SVictor Perevertkin 
908194ea909SVictor Perevertkin             if (ptr)
909194ea909SVictor Perevertkin                 ptr = (uint8_t*)ptr + Vcb->csum_size;
910c2c66affSColin Finck         }
911c2c66affSColin Finck 
912c2c66affSColin Finck         ExFreePool(sector);
913c2c66affSColin Finck     }
914c2c66affSColin Finck 
915c2c66affSColin Finck     return STATUS_SUCCESS;
916c2c66affSColin Finck }
917c2c66affSColin Finck 
raid6_recover2(uint8_t * sectors,uint16_t num_stripes,ULONG sector_size,uint16_t missing1,uint16_t missing2,uint8_t * out)918318da0c1SPierre Schweitzer void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) {
919c2c66affSColin Finck     if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
920318da0c1SPierre Schweitzer         uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
921318da0c1SPierre Schweitzer         uint16_t stripe;
922c2c66affSColin Finck 
923c2c66affSColin Finck         stripe = num_stripes - 3;
924c2c66affSColin Finck 
925c2c66affSColin Finck         if (stripe == missing)
926c2c66affSColin Finck             RtlZeroMemory(out, sector_size);
927c2c66affSColin Finck         else
928c2c66affSColin Finck             RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size);
929c2c66affSColin Finck 
930c2c66affSColin Finck         do {
931c2c66affSColin Finck             stripe--;
932c2c66affSColin Finck 
933c2c66affSColin Finck             galois_double(out, sector_size);
934c2c66affSColin Finck 
935c2c66affSColin Finck             if (stripe != missing)
936c2c66affSColin Finck                 do_xor(out, sectors + (stripe * sector_size), sector_size);
937c2c66affSColin Finck         } while (stripe > 0);
938c2c66affSColin Finck 
939c2c66affSColin Finck         do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
940c2c66affSColin Finck 
941c2c66affSColin Finck         if (missing != 0)
942318da0c1SPierre Schweitzer             galois_divpower(out, (uint8_t)missing, sector_size);
943c2c66affSColin Finck     } else { // reconstruct from p and q
94406042735SVincent Franchomme         uint16_t x = missing1, y = missing2, stripe;
945318da0c1SPierre Schweitzer         uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
946318da0c1SPierre Schweitzer         uint32_t j;
947c2c66affSColin Finck 
948c2c66affSColin Finck         stripe = num_stripes - 3;
949c2c66affSColin Finck 
950c2c66affSColin Finck         pxy = out + sector_size;
951c2c66affSColin Finck         qxy = out;
952c2c66affSColin Finck 
953c2c66affSColin Finck         if (stripe == missing1 || stripe == missing2) {
954c2c66affSColin Finck             RtlZeroMemory(qxy, sector_size);
955c2c66affSColin Finck             RtlZeroMemory(pxy, sector_size);
956c2c66affSColin Finck         } else {
957c2c66affSColin Finck             RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size);
958c2c66affSColin Finck             RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size);
959c2c66affSColin Finck         }
960c2c66affSColin Finck 
961c2c66affSColin Finck         do {
962c2c66affSColin Finck             stripe--;
963c2c66affSColin Finck 
964c2c66affSColin Finck             galois_double(qxy, sector_size);
965c2c66affSColin Finck 
966c2c66affSColin Finck             if (stripe != missing1 && stripe != missing2) {
967c2c66affSColin Finck                 do_xor(qxy, sectors + (stripe * sector_size), sector_size);
968c2c66affSColin Finck                 do_xor(pxy, sectors + (stripe * sector_size), sector_size);
96906042735SVincent Franchomme             }
970c2c66affSColin Finck         } while (stripe > 0);
971c2c66affSColin Finck 
972c2c66affSColin Finck         gyx = gpow2(y > x ? (y-x) : (255-x+y));
973c2c66affSColin Finck         gx = gpow2(255-x);
974c2c66affSColin Finck 
975c2c66affSColin Finck         denom = gdiv(1, gyx ^ 1);
976c2c66affSColin Finck         a = gmul(gyx, denom);
977c2c66affSColin Finck         b = gmul(gx, denom);
978c2c66affSColin Finck 
979c2c66affSColin Finck         p = sectors + ((num_stripes - 2) * sector_size);
980c2c66affSColin Finck         q = sectors + ((num_stripes - 1) * sector_size);
981c2c66affSColin Finck 
982c2c66affSColin Finck         for (j = 0; j < sector_size; j++) {
983c2c66affSColin Finck             *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
984c2c66affSColin Finck 
985c2c66affSColin Finck             p++;
986c2c66affSColin Finck             q++;
987c2c66affSColin Finck             pxy++;
988c2c66affSColin Finck             qxy++;
989c2c66affSColin Finck         }
990c2c66affSColin Finck 
991c2c66affSColin Finck         do_xor(out + sector_size, out, sector_size);
992c2c66affSColin Finck         do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
993c2c66affSColin Finck     }
994c2c66affSColin Finck }
995c2c66affSColin Finck 
read_data_raid6(device_extension * Vcb,uint8_t * buf,uint64_t addr,uint32_t length,read_data_context * context,CHUNK_ITEM * ci,device ** devices,uint64_t offset,uint64_t generation,chunk * c,bool degraded)996318da0c1SPierre Schweitzer static NTSTATUS read_data_raid6(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
997318da0c1SPierre Schweitzer                                 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
998c2c66affSColin Finck     NTSTATUS Status;
999318da0c1SPierre Schweitzer     bool checksum_error = false;
1000c2c66affSColin Finck     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
100106042735SVincent Franchomme     uint16_t stripe = 0, j;
1002318da0c1SPierre Schweitzer     bool no_success = true;
1003c2c66affSColin Finck 
1004c2c66affSColin Finck     for (j = 0; j < ci->num_stripes; j++) {
1005c2c66affSColin Finck         if (context->stripes[j].status == ReadDataStatus_Error) {
1006194ea909SVictor Perevertkin             WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
1007c2c66affSColin Finck 
1008c2c66affSColin Finck             if (devices[j])
1009c2c66affSColin Finck                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1010c2c66affSColin Finck             return context->stripes[j].iosb.Status;
1011c2c66affSColin Finck         } else if (context->stripes[j].status == ReadDataStatus_Success) {
1012c2c66affSColin Finck             stripe = j;
1013318da0c1SPierre Schweitzer             no_success = false;
1014c2c66affSColin Finck         }
1015c2c66affSColin Finck     }
1016c2c66affSColin Finck 
1017c2c66affSColin Finck     if (c) {    // check partial stripes
1018c2c66affSColin Finck         LIST_ENTRY* le;
1019318da0c1SPierre Schweitzer         uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length;
1020c2c66affSColin Finck 
1021318da0c1SPierre Schweitzer         ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
1022c2c66affSColin Finck 
1023c2c66affSColin Finck         le = c->partial_stripes.Flink;
1024c2c66affSColin Finck         while (le != &c->partial_stripes) {
1025c2c66affSColin Finck             partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
1026c2c66affSColin Finck 
1027c2c66affSColin Finck             if (ps->address + ps_length > addr && ps->address < addr + length) {
1028c2c66affSColin Finck                 ULONG runlength, index;
1029c2c66affSColin Finck 
1030c2c66affSColin Finck                 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
1031c2c66affSColin Finck 
1032c2c66affSColin Finck                 while (runlength != 0) {
1033318da0c1SPierre Schweitzer                     if (index >= ps->bmplen)
1034318da0c1SPierre Schweitzer                         break;
1035318da0c1SPierre Schweitzer 
1036318da0c1SPierre Schweitzer                     if (index + runlength >= ps->bmplen) {
1037318da0c1SPierre Schweitzer                         runlength = ps->bmplen - index;
1038318da0c1SPierre Schweitzer 
1039318da0c1SPierre Schweitzer                         if (runlength == 0)
1040318da0c1SPierre Schweitzer                             break;
1041318da0c1SPierre Schweitzer                     }
1042318da0c1SPierre Schweitzer 
1043174dfab6SVincent Franchomme                     uint64_t runstart = ps->address + (index << Vcb->sector_shift);
1044174dfab6SVincent Franchomme                     uint64_t runend = runstart + (runlength << Vcb->sector_shift);
1045318da0c1SPierre Schweitzer                     uint64_t start = max(runstart, addr);
1046318da0c1SPierre Schweitzer                     uint64_t end = min(runend, addr + length);
1047c2c66affSColin Finck 
1048c2c66affSColin Finck                     if (end > start)
1049c2c66affSColin Finck                         RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
1050c2c66affSColin Finck 
1051c2c66affSColin Finck                     runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
1052c2c66affSColin Finck                 }
1053c2c66affSColin Finck             } else if (ps->address >= addr + length)
1054c2c66affSColin Finck                 break;
1055c2c66affSColin Finck 
1056c2c66affSColin Finck             le = le->Flink;
1057c2c66affSColin Finck         }
1058c2c66affSColin Finck 
1059c2c66affSColin Finck         ExReleaseResourceLite(&c->partial_stripes_lock);
1060c2c66affSColin Finck     }
1061c2c66affSColin Finck 
1062c2c66affSColin Finck     if (context->tree) {
1063c2c66affSColin Finck         tree_header* th = (tree_header*)buf;
1064c2c66affSColin Finck 
1065194ea909SVictor Perevertkin         if (addr != th->address || !check_tree_checksum(Vcb, th)) {
1066318da0c1SPierre Schweitzer             checksum_error = true;
1067c2c66affSColin Finck             if (!no_success && !degraded && devices[stripe])
1068c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1069c2c66affSColin Finck         } else if (generation != 0 && generation != th->generation) {
1070318da0c1SPierre Schweitzer             checksum_error = true;
1071c2c66affSColin Finck             if (!no_success && !degraded && devices[stripe])
1072c2c66affSColin Finck                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
1073c2c66affSColin Finck         }
1074c2c66affSColin Finck     } else if (context->csum) {
1075174dfab6SVincent Franchomme         Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
1076c2c66affSColin Finck 
1077c2c66affSColin Finck         if (Status == STATUS_CRC_ERROR) {
1078c2c66affSColin Finck             if (!degraded)
1079c2c66affSColin Finck                 WARN("checksum error\n");
1080318da0c1SPierre Schweitzer             checksum_error = true;
1081c2c66affSColin Finck         } else if (!NT_SUCCESS(Status)) {
1082194ea909SVictor Perevertkin             ERR("check_csum returned %08lx\n", Status);
1083c2c66affSColin Finck             return Status;
1084c2c66affSColin Finck         }
1085c2c66affSColin Finck     } else if (degraded)
1086318da0c1SPierre Schweitzer         checksum_error = true;
1087c2c66affSColin Finck 
1088c2c66affSColin Finck     if (!checksum_error)
1089c2c66affSColin Finck         return STATUS_SUCCESS;
1090c2c66affSColin Finck 
1091c2c66affSColin Finck     if (context->tree) {
1092318da0c1SPierre Schweitzer         uint8_t* sector;
109306042735SVincent Franchomme         uint16_t k, physstripe, parity1, parity2, error_stripe = 0;
1094318da0c1SPierre Schweitzer         uint64_t off;
1095318da0c1SPierre Schweitzer         bool recovered = false, failed = false;
1096c2c66affSColin Finck         ULONG num_errors = 0;
1097c2c66affSColin Finck 
1098c2c66affSColin Finck         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1099c2c66affSColin Finck         if (!sector) {
1100c2c66affSColin Finck             ERR("out of memory\n");
1101c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
1102c2c66affSColin Finck         }
1103c2c66affSColin Finck 
1104c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe);
1105c2c66affSColin Finck 
1106c2c66affSColin Finck         parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1107c2c66affSColin Finck         parity2 = (parity1 + 1) % ci->num_stripes;
1108c2c66affSColin Finck 
1109c2c66affSColin Finck         physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1110c2c66affSColin Finck 
1111c2c66affSColin Finck         j = (parity2 + 1) % ci->num_stripes;
1112c2c66affSColin Finck 
1113c2c66affSColin Finck         for (k = 0; k < ci->num_stripes - 1; k++) {
1114c2c66affSColin Finck             if (j != physstripe) {
1115c2c66affSColin Finck                 if (devices[j] && devices[j]->devobj) {
1116318da0c1SPierre Schweitzer                     Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size,
1117318da0c1SPierre Schweitzer                                             sector + (k * Vcb->superblock.node_size), false);
1118c2c66affSColin Finck                     if (!NT_SUCCESS(Status)) {
1119194ea909SVictor Perevertkin                         ERR("sync_read_phys returned %08lx\n", Status);
1120c2c66affSColin Finck                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1121c2c66affSColin Finck                         num_errors++;
1122c2c66affSColin Finck                         error_stripe = k;
1123c2c66affSColin Finck 
1124c2c66affSColin Finck                         if (num_errors > 1) {
1125318da0c1SPierre Schweitzer                             failed = true;
1126c2c66affSColin Finck                             break;
1127c2c66affSColin Finck                         }
1128c2c66affSColin Finck                     }
1129c2c66affSColin Finck                 } else {
1130c2c66affSColin Finck                     num_errors++;
1131c2c66affSColin Finck                     error_stripe = k;
1132c2c66affSColin Finck 
1133c2c66affSColin Finck                     if (num_errors > 1) {
1134318da0c1SPierre Schweitzer                         failed = true;
1135c2c66affSColin Finck                         break;
1136c2c66affSColin Finck                     }
1137c2c66affSColin Finck                 }
1138c2c66affSColin Finck             }
1139c2c66affSColin Finck 
1140c2c66affSColin Finck             j = (j + 1) % ci->num_stripes;
1141c2c66affSColin Finck         }
1142c2c66affSColin Finck 
1143c2c66affSColin Finck         if (!failed) {
1144c2c66affSColin Finck             if (num_errors == 0) {
1145c2c66affSColin Finck                 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1146c2c66affSColin Finck 
1147c2c66affSColin Finck                 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1148c2c66affSColin Finck                               Vcb->superblock.node_size);
1149c2c66affSColin Finck 
1150c2c66affSColin Finck                 for (j = 0; j < ci->num_stripes - 2; j++) {
1151c2c66affSColin Finck                     if (j != stripe)
1152c2c66affSColin Finck                         do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1153c2c66affSColin Finck                 }
1154c2c66affSColin Finck 
1155194ea909SVictor Perevertkin                 if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
1156c2c66affSColin Finck                     RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1157c2c66affSColin Finck 
1158c2c66affSColin Finck                     if (devices[physstripe] && devices[physstripe]->devobj)
1159318da0c1SPierre Schweitzer                         ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1160c2c66affSColin Finck 
1161318da0c1SPierre Schweitzer                     recovered = true;
1162c2c66affSColin Finck 
1163c2c66affSColin Finck                     if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1164318da0c1SPierre Schweitzer                         Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1165c2c66affSColin Finck                                                  sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1166c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
1167194ea909SVictor Perevertkin                             WARN("write_data_phys returned %08lx\n", Status);
1168c2c66affSColin Finck                             log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1169c2c66affSColin Finck                         }
1170c2c66affSColin Finck                     }
1171c2c66affSColin Finck                 }
1172c2c66affSColin Finck             }
1173c2c66affSColin Finck 
1174c2c66affSColin Finck             if (!recovered) {
1175c2c66affSColin Finck                 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1176318da0c1SPierre Schweitzer                 bool read_q = false;
1177c2c66affSColin Finck 
1178c2c66affSColin Finck                 if (devices[parity2] && devices[parity2]->devobj) {
1179318da0c1SPierre Schweitzer                     Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1180318da0c1SPierre Schweitzer                                             Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false);
1181c2c66affSColin Finck                     if (!NT_SUCCESS(Status)) {
1182194ea909SVictor Perevertkin                         ERR("sync_read_phys returned %08lx\n", Status);
1183c2c66affSColin Finck                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1184c2c66affSColin Finck                     } else
1185318da0c1SPierre Schweitzer                         read_q = true;
1186c2c66affSColin Finck                 }
1187c2c66affSColin Finck 
1188c2c66affSColin Finck                 if (read_q) {
1189c2c66affSColin Finck                     if (num_errors == 1) {
1190c2c66affSColin Finck                         raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1191c2c66affSColin Finck 
1192194ea909SVictor Perevertkin                         if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation))
1193318da0c1SPierre Schweitzer                             recovered = true;
1194c2c66affSColin Finck                     } else {
1195c2c66affSColin Finck                         for (j = 0; j < ci->num_stripes - 1; j++) {
1196c2c66affSColin Finck                             if (j != stripe) {
1197c2c66affSColin Finck                                 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1198c2c66affSColin Finck 
1199194ea909SVictor Perevertkin                                 if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
1200318da0c1SPierre Schweitzer                                     recovered = true;
1201c2c66affSColin Finck                                     error_stripe = j;
1202c2c66affSColin Finck                                     break;
1203c2c66affSColin Finck                                 }
1204c2c66affSColin Finck                             }
1205c2c66affSColin Finck                         }
1206c2c66affSColin Finck                     }
1207c2c66affSColin Finck                 }
1208c2c66affSColin Finck 
1209c2c66affSColin Finck                 if (recovered) {
1210318da0c1SPierre Schweitzer                     uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1211c2c66affSColin Finck 
1212c2c66affSColin Finck                     if (devices[physstripe] && devices[physstripe]->devobj)
1213318da0c1SPierre Schweitzer                         ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1214c2c66affSColin Finck 
1215c2c66affSColin Finck                     RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1216c2c66affSColin Finck 
1217c2c66affSColin Finck                     if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1218318da0c1SPierre Schweitzer                         Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1219c2c66affSColin Finck                                                  sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1220c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
1221194ea909SVictor Perevertkin                             WARN("write_data_phys returned %08lx\n", Status);
1222c2c66affSColin Finck                             log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1223c2c66affSColin Finck                         }
1224c2c66affSColin Finck                     }
1225c2c66affSColin Finck 
1226c2c66affSColin Finck                     if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1227c2c66affSColin Finck                         if (error_stripe == ci->num_stripes - 2) {
1228318da0c1SPierre Schweitzer                             ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1229c2c66affSColin Finck 
1230c2c66affSColin Finck                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1231c2c66affSColin Finck 
1232c2c66affSColin Finck                             RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1233c2c66affSColin Finck 
1234c2c66affSColin Finck                             for (j = 0; j < ci->num_stripes - 2; j++) {
1235c2c66affSColin Finck                                 if (j == stripe) {
1236c2c66affSColin Finck                                     do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1237c2c66affSColin Finck                                            Vcb->superblock.node_size);
1238c2c66affSColin Finck                                 } else {
1239c2c66affSColin Finck                                     do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1240c2c66affSColin Finck                                             Vcb->superblock.node_size);
1241c2c66affSColin Finck                                 }
1242c2c66affSColin Finck                             }
1243c2c66affSColin Finck                         } else {
1244318da0c1SPierre Schweitzer                             ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1245c2c66affSColin Finck                                 devices[error_stripe_phys]->devitem.dev_id);
1246c2c66affSColin Finck 
1247c2c66affSColin Finck                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1248c2c66affSColin Finck 
1249c2c66affSColin Finck                             RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1250c2c66affSColin Finck                                           sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1251c2c66affSColin Finck                         }
1252c2c66affSColin Finck                     }
1253c2c66affSColin Finck 
1254c2c66affSColin Finck                     if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1255318da0c1SPierre Schweitzer                         Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1256c2c66affSColin Finck                                                  sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1257c2c66affSColin Finck                         if (!NT_SUCCESS(Status)) {
1258194ea909SVictor Perevertkin                             WARN("write_data_phys returned %08lx\n", Status);
1259c2c66affSColin Finck                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1260c2c66affSColin Finck                         }
1261c2c66affSColin Finck                     }
1262c2c66affSColin Finck                 }
1263c2c66affSColin Finck             }
1264c2c66affSColin Finck         }
1265c2c66affSColin Finck 
1266c2c66affSColin Finck         if (!recovered) {
1267318da0c1SPierre Schweitzer             ERR("unrecoverable checksum error at %I64x\n", addr);
1268c2c66affSColin Finck             ExFreePool(sector);
1269c2c66affSColin Finck             return STATUS_CRC_ERROR;
1270c2c66affSColin Finck         }
1271c2c66affSColin Finck 
1272c2c66affSColin Finck         ExFreePool(sector);
1273c2c66affSColin Finck     } else {
1274174dfab6SVincent Franchomme         ULONG sectors = length >> Vcb->sector_shift;
1275318da0c1SPierre Schweitzer         uint8_t* sector;
1276194ea909SVictor Perevertkin         void* ptr = context->csum;
1277c2c66affSColin Finck 
1278174dfab6SVincent Franchomme         sector = ExAllocatePoolWithTag(NonPagedPool, (ci->num_stripes + 2) << Vcb->sector_shift, ALLOC_TAG);
1279c2c66affSColin Finck         if (!sector) {
1280c2c66affSColin Finck             ERR("out of memory\n");
1281c2c66affSColin Finck             return STATUS_INSUFFICIENT_RESOURCES;
1282c2c66affSColin Finck         }
1283c2c66affSColin Finck 
1284174dfab6SVincent Franchomme         for (ULONG i = 0; i < sectors; i++) {
1285318da0c1SPierre Schweitzer             uint64_t off;
1286318da0c1SPierre Schweitzer             uint16_t physstripe, parity1, parity2;
1287c2c66affSColin Finck 
1288174dfab6SVincent Franchomme             get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
1289c2c66affSColin Finck                              ci->num_stripes - 2, &off, &stripe);
1290c2c66affSColin Finck 
1291174dfab6SVincent Franchomme             parity1 = (((addr - offset + ((uint64_t)i << Vcb->sector_shift)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1292c2c66affSColin Finck             parity2 = (parity1 + 1) % ci->num_stripes;
1293c2c66affSColin Finck 
1294c2c66affSColin Finck             physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1295c2c66affSColin Finck 
1296174dfab6SVincent Franchomme             if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && !check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr))) {
129706042735SVincent Franchomme                 uint16_t error_stripe = 0;
1298318da0c1SPierre Schweitzer                 bool recovered = false, failed = false;
1299c2c66affSColin Finck                 ULONG num_errors = 0;
1300c2c66affSColin Finck 
1301c2c66affSColin Finck                 if (devices[physstripe] && devices[physstripe]->devobj)
1302c2c66affSColin Finck                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS);
1303c2c66affSColin Finck 
1304c2c66affSColin Finck                 j = (parity2 + 1) % ci->num_stripes;
1305c2c66affSColin Finck 
1306174dfab6SVincent Franchomme                 for (uint16_t k = 0; k < ci->num_stripes - 1; k++) {
1307c2c66affSColin Finck                     if (j != physstripe) {
1308c2c66affSColin Finck                         if (devices[j] && devices[j]->devobj) {
1309318da0c1SPierre Schweitzer                             Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
1310174dfab6SVincent Franchomme                                                     sector + ((ULONG)k << Vcb->sector_shift), false);
1311c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
1312194ea909SVictor Perevertkin                                 ERR("sync_read_phys returned %08lx\n", Status);
1313c2c66affSColin Finck                                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1314c2c66affSColin Finck                                 num_errors++;
1315c2c66affSColin Finck                                 error_stripe = k;
1316c2c66affSColin Finck 
1317c2c66affSColin Finck                                 if (num_errors > 1) {
1318318da0c1SPierre Schweitzer                                     failed = true;
1319c2c66affSColin Finck                                     break;
1320c2c66affSColin Finck                                 }
1321c2c66affSColin Finck                             }
1322c2c66affSColin Finck                         } else {
1323c2c66affSColin Finck                             num_errors++;
1324c2c66affSColin Finck                             error_stripe = k;
1325c2c66affSColin Finck 
1326c2c66affSColin Finck                             if (num_errors > 1) {
1327318da0c1SPierre Schweitzer                                 failed = true;
1328c2c66affSColin Finck                                 break;
1329c2c66affSColin Finck                             }
1330c2c66affSColin Finck                         }
1331c2c66affSColin Finck                     }
1332c2c66affSColin Finck 
1333c2c66affSColin Finck                     j = (j + 1) % ci->num_stripes;
1334c2c66affSColin Finck                 }
1335c2c66affSColin Finck 
1336c2c66affSColin Finck                 if (!failed) {
1337c2c66affSColin Finck                     if (num_errors == 0) {
1338174dfab6SVincent Franchomme                         RtlCopyMemory(sector + ((unsigned int)stripe << Vcb->sector_shift), sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), Vcb->superblock.sector_size);
1339c2c66affSColin Finck 
1340c2c66affSColin Finck                         for (j = 0; j < ci->num_stripes - 2; j++) {
1341c2c66affSColin Finck                             if (j != stripe)
1342174dfab6SVincent Franchomme                                 do_xor(sector + ((unsigned int)stripe << Vcb->sector_shift), sector + ((unsigned int)j << Vcb->sector_shift), Vcb->superblock.sector_size);
1343c2c66affSColin Finck                         }
1344c2c66affSColin Finck 
1345174dfab6SVincent Franchomme                         if (!ptr || check_sector_csum(Vcb, sector + ((unsigned int)stripe << Vcb->sector_shift), ptr)) {
1346174dfab6SVincent Franchomme                             RtlCopyMemory(buf + (i << Vcb->sector_shift), sector + ((unsigned int)stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
1347c2c66affSColin Finck 
1348c2c66affSColin Finck                             if (devices[physstripe] && devices[physstripe]->devobj)
1349174dfab6SVincent Franchomme                                 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift),
1350c2c66affSColin Finck                                     devices[physstripe]->devitem.dev_id);
1351c2c66affSColin Finck 
1352318da0c1SPierre Schweitzer                             recovered = true;
1353c2c66affSColin Finck 
1354c2c66affSColin Finck                             if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1355318da0c1SPierre Schweitzer                                 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1356174dfab6SVincent Franchomme                                                          sector + ((unsigned int)stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
1357c2c66affSColin Finck                                 if (!NT_SUCCESS(Status)) {
1358194ea909SVictor Perevertkin                                     WARN("write_data_phys returned %08lx\n", Status);
1359c2c66affSColin Finck                                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1360c2c66affSColin Finck                                 }
1361c2c66affSColin Finck                             }
1362c2c66affSColin Finck                         }
1363c2c66affSColin Finck                     }
1364c2c66affSColin Finck 
1365c2c66affSColin Finck                     if (!recovered) {
1366318da0c1SPierre Schweitzer                         bool read_q = false;
1367c2c66affSColin Finck 
1368c2c66affSColin Finck                         if (devices[parity2] && devices[parity2]->devobj) {
1369318da0c1SPierre Schweitzer                             Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1370174dfab6SVincent Franchomme                                                     Vcb->superblock.sector_size, sector + ((unsigned int)(ci->num_stripes - 1) << Vcb->sector_shift), false);
1371c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
1372194ea909SVictor Perevertkin                                 ERR("sync_read_phys returned %08lx\n", Status);
1373c2c66affSColin Finck                                 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS);
1374c2c66affSColin Finck                             } else
1375318da0c1SPierre Schweitzer                                 read_q = true;
1376c2c66affSColin Finck                         }
1377c2c66affSColin Finck 
1378c2c66affSColin Finck                         if (read_q) {
1379c2c66affSColin Finck                             if (num_errors == 1) {
1380174dfab6SVincent Franchomme                                 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift));
1381c2c66affSColin Finck 
1382c2c66affSColin Finck                                 if (!devices[physstripe] || !devices[physstripe]->devobj)
1383318da0c1SPierre Schweitzer                                     recovered = true;
1384194ea909SVictor Perevertkin                                 else
1385174dfab6SVincent Franchomme                                     recovered = check_sector_csum(Vcb, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), ptr);
1386c2c66affSColin Finck                             } else {
1387c2c66affSColin Finck                                 for (j = 0; j < ci->num_stripes - 1; j++) {
1388c2c66affSColin Finck                                     if (j != stripe) {
1389174dfab6SVincent Franchomme                                         raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift));
1390c2c66affSColin Finck 
1391174dfab6SVincent Franchomme                                         if (check_sector_csum(Vcb, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), ptr)) {
1392318da0c1SPierre Schweitzer                                             recovered = true;
1393c2c66affSColin Finck                                             error_stripe = j;
1394c2c66affSColin Finck                                             break;
1395c2c66affSColin Finck                                         }
1396c2c66affSColin Finck                                     }
1397c2c66affSColin Finck                                 }
1398c2c66affSColin Finck                             }
1399c2c66affSColin Finck                         }
1400c2c66affSColin Finck 
1401c2c66affSColin Finck                         if (recovered) {
1402318da0c1SPierre Schweitzer                             uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1403c2c66affSColin Finck 
1404c2c66affSColin Finck                             if (devices[physstripe] && devices[physstripe]->devobj)
1405318da0c1SPierre Schweitzer                                 ERR("recovering from checksum error at %I64x, device %I64x\n",
1406174dfab6SVincent Franchomme                                     addr + ((uint64_t)i << Vcb->sector_shift), devices[physstripe]->devitem.dev_id);
1407c2c66affSColin Finck 
1408174dfab6SVincent Franchomme                             RtlCopyMemory(buf + (i << Vcb->sector_shift), sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), Vcb->superblock.sector_size);
1409c2c66affSColin Finck 
1410c2c66affSColin Finck                             if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1411318da0c1SPierre Schweitzer                                 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1412174dfab6SVincent Franchomme                                                          sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), Vcb->superblock.sector_size);
1413c2c66affSColin Finck                                 if (!NT_SUCCESS(Status)) {
1414194ea909SVictor Perevertkin                                     WARN("write_data_phys returned %08lx\n", Status);
1415c2c66affSColin Finck                                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1416c2c66affSColin Finck                                 }
1417c2c66affSColin Finck                             }
1418c2c66affSColin Finck 
1419c2c66affSColin Finck                             if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1420c2c66affSColin Finck                                 if (error_stripe == ci->num_stripes - 2) {
1421174dfab6SVincent Franchomme                                     ERR("recovering from parity error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift),
1422c2c66affSColin Finck                                         devices[error_stripe_phys]->devitem.dev_id);
1423c2c66affSColin Finck 
1424c2c66affSColin Finck                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1425c2c66affSColin Finck 
1426174dfab6SVincent Franchomme                                     RtlZeroMemory(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), Vcb->superblock.sector_size);
1427c2c66affSColin Finck 
1428c2c66affSColin Finck                                     for (j = 0; j < ci->num_stripes - 2; j++) {
1429c2c66affSColin Finck                                         if (j == stripe) {
1430174dfab6SVincent Franchomme                                             do_xor(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift),
1431c2c66affSColin Finck                                                    Vcb->superblock.sector_size);
1432c2c66affSColin Finck                                         } else {
1433174dfab6SVincent Franchomme                                             do_xor(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), sector + ((unsigned int)j << Vcb->sector_shift),
1434c2c66affSColin Finck                                                    Vcb->superblock.sector_size);
1435c2c66affSColin Finck                                         }
1436c2c66affSColin Finck                                     }
1437c2c66affSColin Finck                                 } else {
1438318da0c1SPierre Schweitzer                                     ERR("recovering from checksum error at %I64x, device %I64x\n",
1439174dfab6SVincent Franchomme                                         addr + ((uint64_t)i << Vcb->sector_shift) + ((error_stripe - stripe) * ci->stripe_length),
1440c2c66affSColin Finck                                         devices[error_stripe_phys]->devitem.dev_id);
1441c2c66affSColin Finck 
1442c2c66affSColin Finck                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1443c2c66affSColin Finck 
1444174dfab6SVincent Franchomme                                     RtlCopyMemory(sector + ((unsigned int)error_stripe << Vcb->sector_shift),
1445174dfab6SVincent Franchomme                                                   sector + ((unsigned int)(ci->num_stripes + 1) << Vcb->sector_shift), Vcb->superblock.sector_size);
1446c2c66affSColin Finck                                 }
1447c2c66affSColin Finck                             }
1448c2c66affSColin Finck 
1449c2c66affSColin Finck                             if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1450318da0c1SPierre Schweitzer                                 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1451174dfab6SVincent Franchomme                                                          sector + ((unsigned int)error_stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
1452c2c66affSColin Finck                                 if (!NT_SUCCESS(Status)) {
1453194ea909SVictor Perevertkin                                     WARN("write_data_phys returned %08lx\n", Status);
1454c2c66affSColin Finck                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1455c2c66affSColin Finck                                 }
1456c2c66affSColin Finck                             }
1457c2c66affSColin Finck                         }
1458c2c66affSColin Finck                     }
1459c2c66affSColin Finck                 }
1460c2c66affSColin Finck 
1461c2c66affSColin Finck                 if (!recovered) {
1462174dfab6SVincent Franchomme                     ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
1463c2c66affSColin Finck                     ExFreePool(sector);
1464c2c66affSColin Finck                     return STATUS_CRC_ERROR;
1465c2c66affSColin Finck                 }
1466c2c66affSColin Finck             }
1467194ea909SVictor Perevertkin 
1468194ea909SVictor Perevertkin             if (ptr)
1469194ea909SVictor Perevertkin                 ptr = (uint8_t*)ptr + Vcb->csum_size;
1470c2c66affSColin Finck         }
1471c2c66affSColin Finck 
1472c2c66affSColin Finck         ExFreePool(sector);
1473c2c66affSColin Finck     }
1474c2c66affSColin Finck 
1475c2c66affSColin Finck     return STATUS_SUCCESS;
1476c2c66affSColin Finck }
1477c2c66affSColin Finck 
1478194ea909SVictor Perevertkin NTSTATUS read_data(_In_ device_extension* Vcb, _In_ uint64_t addr, _In_ uint32_t length, _In_reads_bytes_opt_(length*sizeof(uint32_t)/Vcb->superblock.sector_size) void* csum,
1479318da0c1SPierre Schweitzer                    _In_ bool is_tree, _Out_writes_bytes_(length) uint8_t* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ uint64_t generation, _In_ bool file_read,
1480c2c66affSColin Finck                    _In_ ULONG priority) {
1481c2c66affSColin Finck     CHUNK_ITEM* ci;
1482c2c66affSColin Finck     CHUNK_ITEM_STRIPE* cis;
1483c2c66affSColin Finck     read_data_context context;
1484318da0c1SPierre Schweitzer     uint64_t type, offset, total_reading = 0;
1485c2c66affSColin Finck     NTSTATUS Status;
1486c2c66affSColin Finck     device** devices = NULL;
1487318da0c1SPierre Schweitzer     uint16_t i, startoffstripe, allowed_missing, missing_devices = 0;
1488318da0c1SPierre Schweitzer     uint8_t* dummypage = NULL;
1489c2c66affSColin Finck     PMDL dummy_mdl = NULL;
1490318da0c1SPierre Schweitzer     bool need_to_wait;
1491318da0c1SPierre Schweitzer     uint64_t lockaddr, locklen;
1492c2c66affSColin Finck 
1493c2c66affSColin Finck     if (Vcb->log_to_phys_loaded) {
1494c2c66affSColin Finck         if (!c) {
1495c2c66affSColin Finck             c = get_chunk_from_address(Vcb, addr);
1496c2c66affSColin Finck 
1497c2c66affSColin Finck             if (!c) {
1498c2c66affSColin Finck                 ERR("get_chunk_from_address failed\n");
1499c2c66affSColin Finck                 return STATUS_INTERNAL_ERROR;
1500c2c66affSColin Finck             }
1501c2c66affSColin Finck         }
1502c2c66affSColin Finck 
1503c2c66affSColin Finck         ci = c->chunk_item;
1504c2c66affSColin Finck         offset = c->offset;
1505c2c66affSColin Finck         devices = c->devices;
1506c2c66affSColin Finck 
1507c2c66affSColin Finck         if (pc)
1508c2c66affSColin Finck             *pc = c;
1509c2c66affSColin Finck     } else {
1510c2c66affSColin Finck         LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1511c2c66affSColin Finck 
1512c2c66affSColin Finck         ci = NULL;
1513c2c66affSColin Finck 
1514c2c66affSColin Finck         c = NULL;
1515c2c66affSColin Finck         while (le != &Vcb->sys_chunks) {
1516c2c66affSColin Finck             sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
1517c2c66affSColin Finck 
1518c2c66affSColin Finck             if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1519c2c66affSColin Finck                 CHUNK_ITEM* chunk_item = sc->data;
1520c2c66affSColin Finck 
1521c2c66affSColin Finck                 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1522c2c66affSColin Finck                     ci = chunk_item;
1523c2c66affSColin Finck                     offset = sc->key.offset;
1524c2c66affSColin Finck                     cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1525c2c66affSColin Finck 
1526318da0c1SPierre Schweitzer                     devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
1527c2c66affSColin Finck                     if (!devices) {
1528c2c66affSColin Finck                         ERR("out of memory\n");
1529c2c66affSColin Finck                         return STATUS_INSUFFICIENT_RESOURCES;
1530c2c66affSColin Finck                     }
1531c2c66affSColin Finck 
1532c2c66affSColin Finck                     for (i = 0; i < ci->num_stripes; i++) {
1533c2c66affSColin Finck                         devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1534c2c66affSColin Finck                     }
1535c2c66affSColin Finck 
1536c2c66affSColin Finck                     break;
1537c2c66affSColin Finck                 }
1538c2c66affSColin Finck             }
1539c2c66affSColin Finck 
1540c2c66affSColin Finck             le = le->Flink;
1541c2c66affSColin Finck         }
1542c2c66affSColin Finck 
1543c2c66affSColin Finck         if (!ci) {
1544318da0c1SPierre Schweitzer             ERR("could not find chunk for %I64x in bootstrap\n", addr);
1545c2c66affSColin Finck             return STATUS_INTERNAL_ERROR;
1546c2c66affSColin Finck         }
1547c2c66affSColin Finck 
1548c2c66affSColin Finck         if (pc)
1549c2c66affSColin Finck             *pc = NULL;
1550c2c66affSColin Finck     }
1551c2c66affSColin Finck 
1552c2c66affSColin Finck     if (ci->type & BLOCK_FLAG_DUPLICATE) {
1553c2c66affSColin Finck         type = BLOCK_FLAG_DUPLICATE;
1554c2c66affSColin Finck         allowed_missing = ci->num_stripes - 1;
1555c2c66affSColin Finck     } else if (ci->type & BLOCK_FLAG_RAID0) {
1556c2c66affSColin Finck         type = BLOCK_FLAG_RAID0;
1557c2c66affSColin Finck         allowed_missing = 0;
1558c2c66affSColin Finck     } else if (ci->type & BLOCK_FLAG_RAID1) {
1559c2c66affSColin Finck         type = BLOCK_FLAG_DUPLICATE;
1560c2c66affSColin Finck         allowed_missing = 1;
1561c2c66affSColin Finck     } else if (ci->type & BLOCK_FLAG_RAID10) {
1562c2c66affSColin Finck         type = BLOCK_FLAG_RAID10;
1563c2c66affSColin Finck         allowed_missing = 1;
1564c2c66affSColin Finck     } else if (ci->type & BLOCK_FLAG_RAID5) {
1565c2c66affSColin Finck         type = BLOCK_FLAG_RAID5;
1566c2c66affSColin Finck         allowed_missing = 1;
1567c2c66affSColin Finck     } else if (ci->type & BLOCK_FLAG_RAID6) {
1568c2c66affSColin Finck         type = BLOCK_FLAG_RAID6;
1569c2c66affSColin Finck         allowed_missing = 2;
1570194ea909SVictor Perevertkin     } else if (ci->type & BLOCK_FLAG_RAID1C3) {
1571194ea909SVictor Perevertkin         type = BLOCK_FLAG_DUPLICATE;
1572194ea909SVictor Perevertkin         allowed_missing = 2;
1573194ea909SVictor Perevertkin     } else if (ci->type & BLOCK_FLAG_RAID1C4) {
1574194ea909SVictor Perevertkin         type = BLOCK_FLAG_DUPLICATE;
1575194ea909SVictor Perevertkin         allowed_missing = 3;
1576c2c66affSColin Finck     } else { // SINGLE
1577c2c66affSColin Finck         type = BLOCK_FLAG_DUPLICATE;
1578c2c66affSColin Finck         allowed_missing = 0;
1579c2c66affSColin Finck     }
1580c2c66affSColin Finck 
1581c2c66affSColin Finck     cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1582c2c66affSColin Finck 
1583c2c66affSColin Finck     RtlZeroMemory(&context, sizeof(read_data_context));
1584318da0c1SPierre Schweitzer     KeInitializeEvent(&context.Event, NotificationEvent, false);
1585c2c66affSColin Finck 
1586c2c66affSColin Finck     context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
1587c2c66affSColin Finck     if (!context.stripes) {
1588c2c66affSColin Finck         ERR("out of memory\n");
1589c2c66affSColin Finck         return STATUS_INSUFFICIENT_RESOURCES;
1590c2c66affSColin Finck     }
1591c2c66affSColin Finck 
1592c2c66affSColin Finck     if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1593c2c66affSColin Finck         get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1594c2c66affSColin Finck         chunk_lock_range(Vcb, c, lockaddr, locklen);
1595c2c66affSColin Finck     }
1596c2c66affSColin Finck 
1597c2c66affSColin Finck     RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1598c2c66affSColin Finck 
1599c2c66affSColin Finck     context.buflen = length;
1600c2c66affSColin Finck     context.num_stripes = ci->num_stripes;
1601c2c66affSColin Finck     context.stripes_left = context.num_stripes;
1602c2c66affSColin Finck     context.sector_size = Vcb->superblock.sector_size;
1603c2c66affSColin Finck     context.csum = csum;
1604c2c66affSColin Finck     context.tree = is_tree;
1605c2c66affSColin Finck     context.type = type;
1606c2c66affSColin Finck 
1607c2c66affSColin Finck     if (type == BLOCK_FLAG_RAID0) {
1608318da0c1SPierre Schweitzer         uint64_t startoff, endoff;
1609318da0c1SPierre Schweitzer         uint16_t endoffstripe, stripe;
1610318da0c1SPierre Schweitzer         uint32_t *stripeoff, pos;
1611c2c66affSColin Finck         PMDL master_mdl;
1612c2c66affSColin Finck         PFN_NUMBER* pfns;
1613c2c66affSColin Finck 
1614c2c66affSColin Finck         // FIXME - test this still works if page size isn't the same as sector size
1615c2c66affSColin Finck 
1616c2c66affSColin Finck         // This relies on the fact that MDLs are followed in memory by the page file numbers,
1617c2c66affSColin Finck         // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1618c2c66affSColin Finck         // data for you without doing a memcpy yourself.
1619c2c66affSColin Finck         // MDLs are officially opaque, so this might very well break in future versions of Windows.
1620c2c66affSColin Finck 
1621c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1622c2c66affSColin Finck         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1623c2c66affSColin Finck 
1624c2c66affSColin Finck         if (file_read) {
1625c2c66affSColin Finck             // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1626c2c66affSColin Finck             // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1627c2c66affSColin Finck             // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1628c2c66affSColin Finck 
1629c2c66affSColin Finck             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1630c2c66affSColin Finck 
1631c2c66affSColin Finck             if (!context.va) {
1632c2c66affSColin Finck                 ERR("out of memory\n");
1633c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
1634c2c66affSColin Finck                 goto exit;
1635c2c66affSColin Finck             }
1636c2c66affSColin Finck         } else
1637c2c66affSColin Finck             context.va = buf;
1638c2c66affSColin Finck 
1639318da0c1SPierre Schweitzer         master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1640c2c66affSColin Finck         if (!master_mdl) {
1641c2c66affSColin Finck             ERR("out of memory\n");
1642c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
1643c2c66affSColin Finck             goto exit;
1644c2c66affSColin Finck         }
1645c2c66affSColin Finck 
1646c2c66affSColin Finck         Status = STATUS_SUCCESS;
1647c2c66affSColin Finck 
1648c2c66affSColin Finck         _SEH2_TRY {
1649c2c66affSColin Finck             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)1650c2c66affSColin Finck         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1651c2c66affSColin Finck             Status = _SEH2_GetExceptionCode();
1652c2c66affSColin Finck         } _SEH2_END;
1653c2c66affSColin Finck 
1654c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
1655194ea909SVictor Perevertkin             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1656c2c66affSColin Finck             IoFreeMdl(master_mdl);
1657c2c66affSColin Finck             goto exit;
1658c2c66affSColin Finck         }
1659c2c66affSColin Finck 
1660c2c66affSColin Finck         pfns = (PFN_NUMBER*)(master_mdl + 1);
1661c2c66affSColin Finck 
1662c2c66affSColin Finck         for (i = 0; i < ci->num_stripes; i++) {
1663c2c66affSColin Finck             if (startoffstripe > i)
1664c2c66affSColin Finck                 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1665c2c66affSColin Finck             else if (startoffstripe == i)
1666c2c66affSColin Finck                 context.stripes[i].stripestart = startoff;
1667c2c66affSColin Finck             else
1668c2c66affSColin Finck                 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1669c2c66affSColin Finck 
1670c2c66affSColin Finck             if (endoffstripe > i)
1671c2c66affSColin Finck                 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1672c2c66affSColin Finck             else if (endoffstripe == i)
1673c2c66affSColin Finck                 context.stripes[i].stripeend = endoff + 1;
1674c2c66affSColin Finck             else
1675c2c66affSColin Finck                 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1676c2c66affSColin Finck 
1677c2c66affSColin Finck             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1678318da0c1SPierre Schweitzer                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
1679c2c66affSColin Finck 
1680c2c66affSColin Finck                 if (!context.stripes[i].mdl) {
1681c2c66affSColin Finck                     ERR("IoAllocateMdl failed\n");
1682eb7fbc25SPierre Schweitzer                     MmUnlockPages(master_mdl);
1683eb7fbc25SPierre Schweitzer                     IoFreeMdl(master_mdl);
1684c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
1685c2c66affSColin Finck                     goto exit;
1686c2c66affSColin Finck                 }
1687c2c66affSColin Finck             }
1688c2c66affSColin Finck         }
1689c2c66affSColin Finck 
1690318da0c1SPierre Schweitzer         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
1691c2c66affSColin Finck         if (!stripeoff) {
1692c2c66affSColin Finck             ERR("out of memory\n");
1693eb7fbc25SPierre Schweitzer             MmUnlockPages(master_mdl);
1694eb7fbc25SPierre Schweitzer             IoFreeMdl(master_mdl);
1695c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
1696c2c66affSColin Finck             goto exit;
1697c2c66affSColin Finck         }
1698c2c66affSColin Finck 
1699318da0c1SPierre Schweitzer         RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
1700c2c66affSColin Finck 
1701c2c66affSColin Finck         pos = 0;
1702c2c66affSColin Finck         stripe = startoffstripe;
1703c2c66affSColin Finck         while (pos < length) {
1704c2c66affSColin Finck             PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1705c2c66affSColin Finck 
1706c2c66affSColin Finck             if (pos == 0) {
1707318da0c1SPierre Schweitzer                 uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1708c2c66affSColin Finck 
1709c2c66affSColin Finck                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1710c2c66affSColin Finck 
1711c2c66affSColin Finck                 stripeoff[stripe] += readlen;
1712c2c66affSColin Finck                 pos += readlen;
1713c2c66affSColin Finck             } else if (length - pos < ci->stripe_length) {
1714c2c66affSColin Finck                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1715c2c66affSColin Finck 
1716c2c66affSColin Finck                 pos = length;
1717c2c66affSColin Finck             } else {
1718c2c66affSColin Finck                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1719c2c66affSColin Finck 
1720318da0c1SPierre Schweitzer                 stripeoff[stripe] += (uint32_t)ci->stripe_length;
1721318da0c1SPierre Schweitzer                 pos += (uint32_t)ci->stripe_length;
1722c2c66affSColin Finck             }
1723c2c66affSColin Finck 
1724c2c66affSColin Finck             stripe = (stripe + 1) % ci->num_stripes;
1725c2c66affSColin Finck         }
1726c2c66affSColin Finck 
1727c2c66affSColin Finck         MmUnlockPages(master_mdl);
1728c2c66affSColin Finck         IoFreeMdl(master_mdl);
1729c2c66affSColin Finck 
1730c2c66affSColin Finck         ExFreePool(stripeoff);
1731c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID10) {
1732318da0c1SPierre Schweitzer         uint64_t startoff, endoff;
1733318da0c1SPierre Schweitzer         uint16_t endoffstripe, j, stripe;
1734c2c66affSColin Finck         ULONG orig_ls;
1735c2c66affSColin Finck         PMDL master_mdl;
1736c2c66affSColin Finck         PFN_NUMBER* pfns;
1737318da0c1SPierre Schweitzer         uint32_t* stripeoff, pos;
1738c2c66affSColin Finck         read_data_stripe** stripes;
1739c2c66affSColin Finck 
1740c2c66affSColin Finck         if (c)
1741c2c66affSColin Finck             orig_ls = c->last_stripe;
1742c2c66affSColin Finck         else
1743c2c66affSColin Finck             orig_ls = 0;
1744c2c66affSColin Finck 
1745c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1746c2c66affSColin Finck         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1747c2c66affSColin Finck 
1748c2c66affSColin Finck         if ((ci->num_stripes % ci->sub_stripes) != 0) {
1749318da0c1SPierre Schweitzer             ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1750c2c66affSColin Finck             Status = STATUS_INTERNAL_ERROR;
1751c2c66affSColin Finck             goto exit;
1752c2c66affSColin Finck         }
1753c2c66affSColin Finck 
1754c2c66affSColin Finck         if (file_read) {
1755c2c66affSColin Finck             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1756c2c66affSColin Finck 
1757c2c66affSColin Finck             if (!context.va) {
1758c2c66affSColin Finck                 ERR("out of memory\n");
1759c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
1760c2c66affSColin Finck                 goto exit;
1761c2c66affSColin Finck             }
1762c2c66affSColin Finck         } else
1763c2c66affSColin Finck             context.va = buf;
1764c2c66affSColin Finck 
1765174dfab6SVincent Franchomme         context.firstoff = (uint16_t)((startoff % ci->stripe_length) >> Vcb->sector_shift);
1766c2c66affSColin Finck         context.startoffstripe = startoffstripe;
1767174dfab6SVincent Franchomme         context.sectors_per_stripe = (uint16_t)(ci->stripe_length >> Vcb->sector_shift);
1768c2c66affSColin Finck 
1769c2c66affSColin Finck         startoffstripe *= ci->sub_stripes;
1770c2c66affSColin Finck         endoffstripe *= ci->sub_stripes;
1771c2c66affSColin Finck 
1772c2c66affSColin Finck         if (c)
1773c2c66affSColin Finck             c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1774c2c66affSColin Finck 
1775318da0c1SPierre Schweitzer         master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1776c2c66affSColin Finck         if (!master_mdl) {
1777c2c66affSColin Finck             ERR("out of memory\n");
1778c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
1779c2c66affSColin Finck             goto exit;
1780c2c66affSColin Finck         }
1781c2c66affSColin Finck 
1782c2c66affSColin Finck         Status = STATUS_SUCCESS;
1783c2c66affSColin Finck 
1784c2c66affSColin Finck         _SEH2_TRY {
1785c2c66affSColin Finck             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)1786c2c66affSColin Finck         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1787c2c66affSColin Finck             Status = _SEH2_GetExceptionCode();
1788c2c66affSColin Finck         } _SEH2_END;
1789c2c66affSColin Finck 
1790c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
1791194ea909SVictor Perevertkin             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1792c2c66affSColin Finck             IoFreeMdl(master_mdl);
1793c2c66affSColin Finck             goto exit;
1794c2c66affSColin Finck         }
1795c2c66affSColin Finck 
1796c2c66affSColin Finck         pfns = (PFN_NUMBER*)(master_mdl + 1);
1797c2c66affSColin Finck 
1798c2c66affSColin Finck         stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1799c2c66affSColin Finck         if (!stripes) {
1800c2c66affSColin Finck             ERR("out of memory\n");
1801eb7fbc25SPierre Schweitzer             MmUnlockPages(master_mdl);
1802eb7fbc25SPierre Schweitzer             IoFreeMdl(master_mdl);
1803c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
1804c2c66affSColin Finck             goto exit;
1805c2c66affSColin Finck         }
1806c2c66affSColin Finck 
1807c2c66affSColin Finck         RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1808c2c66affSColin Finck 
1809c2c66affSColin Finck         for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1810318da0c1SPierre Schweitzer             uint64_t sstart, send;
1811318da0c1SPierre Schweitzer             bool stripeset = false;
1812c2c66affSColin Finck 
1813c2c66affSColin Finck             if (startoffstripe > i)
1814c2c66affSColin Finck                 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1815c2c66affSColin Finck             else if (startoffstripe == i)
1816c2c66affSColin Finck                 sstart = startoff;
1817c2c66affSColin Finck             else
1818c2c66affSColin Finck                 sstart = startoff - (startoff % ci->stripe_length);
1819c2c66affSColin Finck 
1820c2c66affSColin Finck             if (endoffstripe > i)
1821c2c66affSColin Finck                 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1822c2c66affSColin Finck             else if (endoffstripe == i)
1823c2c66affSColin Finck                 send = endoff + 1;
1824c2c66affSColin Finck             else
1825c2c66affSColin Finck                 send = endoff - (endoff % ci->stripe_length);
1826c2c66affSColin Finck 
1827c2c66affSColin Finck             for (j = 0; j < ci->sub_stripes; j++) {
1828c2c66affSColin Finck                 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1829c2c66affSColin Finck                     context.stripes[i+j].stripestart = sstart;
1830c2c66affSColin Finck                     context.stripes[i+j].stripeend = send;
1831c2c66affSColin Finck                     stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1832c2c66affSColin Finck 
1833c2c66affSColin Finck                     if (sstart != send) {
1834318da0c1SPierre Schweitzer                         context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1835c2c66affSColin Finck 
1836c2c66affSColin Finck                         if (!context.stripes[i+j].mdl) {
1837c2c66affSColin Finck                             ERR("IoAllocateMdl failed\n");
1838eb7fbc25SPierre Schweitzer                             MmUnlockPages(master_mdl);
1839eb7fbc25SPierre Schweitzer                             IoFreeMdl(master_mdl);
1840c2c66affSColin Finck                             Status = STATUS_INSUFFICIENT_RESOURCES;
1841c2c66affSColin Finck                             goto exit;
1842c2c66affSColin Finck                         }
1843c2c66affSColin Finck                     }
1844c2c66affSColin Finck 
1845318da0c1SPierre Schweitzer                     stripeset = true;
1846c2c66affSColin Finck                 } else
1847c2c66affSColin Finck                     context.stripes[i+j].status = ReadDataStatus_Skip;
1848c2c66affSColin Finck             }
1849c2c66affSColin Finck 
1850c2c66affSColin Finck             if (!stripeset) {
1851c2c66affSColin Finck                 for (j = 0; j < ci->sub_stripes; j++) {
1852c2c66affSColin Finck                     if (devices[i+j] && devices[i+j]->devobj) {
1853c2c66affSColin Finck                         context.stripes[i+j].stripestart = sstart;
1854c2c66affSColin Finck                         context.stripes[i+j].stripeend = send;
1855c2c66affSColin Finck                         context.stripes[i+j].status = ReadDataStatus_Pending;
1856c2c66affSColin Finck                         stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1857c2c66affSColin Finck 
1858c2c66affSColin Finck                         if (sstart != send) {
1859318da0c1SPierre Schweitzer                             context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1860c2c66affSColin Finck 
1861c2c66affSColin Finck                             if (!context.stripes[i+j].mdl) {
1862c2c66affSColin Finck                                 ERR("IoAllocateMdl failed\n");
1863eb7fbc25SPierre Schweitzer                                 MmUnlockPages(master_mdl);
1864eb7fbc25SPierre Schweitzer                                 IoFreeMdl(master_mdl);
1865c2c66affSColin Finck                                 Status = STATUS_INSUFFICIENT_RESOURCES;
1866c2c66affSColin Finck                                 goto exit;
1867c2c66affSColin Finck                             }
1868c2c66affSColin Finck                         }
1869c2c66affSColin Finck 
1870318da0c1SPierre Schweitzer                         stripeset = true;
1871c2c66affSColin Finck                         break;
1872c2c66affSColin Finck                     }
1873c2c66affSColin Finck                 }
1874c2c66affSColin Finck 
1875c2c66affSColin Finck                 if (!stripeset) {
1876c2c66affSColin Finck                     ERR("could not find stripe to read\n");
1877c2c66affSColin Finck                     Status = STATUS_DEVICE_NOT_READY;
1878c2c66affSColin Finck                     goto exit;
1879c2c66affSColin Finck                 }
1880c2c66affSColin Finck             }
1881c2c66affSColin Finck         }
1882c2c66affSColin Finck 
1883318da0c1SPierre Schweitzer         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1884c2c66affSColin Finck         if (!stripeoff) {
1885c2c66affSColin Finck             ERR("out of memory\n");
1886eb7fbc25SPierre Schweitzer             MmUnlockPages(master_mdl);
1887eb7fbc25SPierre Schweitzer             IoFreeMdl(master_mdl);
1888c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
1889c2c66affSColin Finck             goto exit;
1890c2c66affSColin Finck         }
1891c2c66affSColin Finck 
1892318da0c1SPierre Schweitzer         RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes);
1893c2c66affSColin Finck 
1894c2c66affSColin Finck         pos = 0;
1895c2c66affSColin Finck         stripe = startoffstripe / ci->sub_stripes;
1896c2c66affSColin Finck         while (pos < length) {
1897c2c66affSColin Finck             PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1898c2c66affSColin Finck 
1899c2c66affSColin Finck             if (pos == 0) {
1900318da0c1SPierre Schweitzer                 uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1901c2c66affSColin Finck                                              ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1902c2c66affSColin Finck 
1903c2c66affSColin Finck                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1904c2c66affSColin Finck 
1905c2c66affSColin Finck                 stripeoff[stripe] += readlen;
1906c2c66affSColin Finck                 pos += readlen;
1907c2c66affSColin Finck             } else if (length - pos < ci->stripe_length) {
1908c2c66affSColin Finck                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1909c2c66affSColin Finck 
1910c2c66affSColin Finck                 pos = length;
1911c2c66affSColin Finck             } else {
1912c2c66affSColin Finck                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1913c2c66affSColin Finck 
1914c2c66affSColin Finck                 stripeoff[stripe] += (ULONG)ci->stripe_length;
1915c2c66affSColin Finck                 pos += (ULONG)ci->stripe_length;
1916c2c66affSColin Finck             }
1917c2c66affSColin Finck 
1918c2c66affSColin Finck             stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1919c2c66affSColin Finck         }
1920c2c66affSColin Finck 
1921c2c66affSColin Finck         MmUnlockPages(master_mdl);
1922c2c66affSColin Finck         IoFreeMdl(master_mdl);
1923c2c66affSColin Finck 
1924c2c66affSColin Finck         ExFreePool(stripeoff);
1925c2c66affSColin Finck         ExFreePool(stripes);
1926c2c66affSColin Finck     } else if (type == BLOCK_FLAG_DUPLICATE) {
1927318da0c1SPierre Schweitzer         uint64_t orig_ls;
1928c2c66affSColin Finck 
1929c2c66affSColin Finck         if (c)
1930c2c66affSColin Finck             orig_ls = i = c->last_stripe;
1931c2c66affSColin Finck         else
1932c2c66affSColin Finck             orig_ls = i = 0;
1933c2c66affSColin Finck 
1934c2c66affSColin Finck         while (!devices[i] || !devices[i]->devobj) {
1935c2c66affSColin Finck             i = (i + 1) % ci->num_stripes;
1936c2c66affSColin Finck 
1937c2c66affSColin Finck             if (i == orig_ls) {
1938c2c66affSColin Finck                 ERR("no devices available to service request\n");
1939c2c66affSColin Finck                 Status = STATUS_DEVICE_NOT_READY;
1940c2c66affSColin Finck                 goto exit;
1941c2c66affSColin Finck             }
1942c2c66affSColin Finck         }
1943c2c66affSColin Finck 
1944c2c66affSColin Finck         if (c)
1945c2c66affSColin Finck             c->last_stripe = (i + 1) % ci->num_stripes;
1946c2c66affSColin Finck 
1947c2c66affSColin Finck         context.stripes[i].stripestart = addr - offset;
1948c2c66affSColin Finck         context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1949c2c66affSColin Finck 
1950c2c66affSColin Finck         if (file_read) {
1951c2c66affSColin Finck             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1952c2c66affSColin Finck 
1953c2c66affSColin Finck             if (!context.va) {
1954c2c66affSColin Finck                 ERR("out of memory\n");
1955c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
1956c2c66affSColin Finck                 goto exit;
1957c2c66affSColin Finck             }
1958c2c66affSColin Finck 
1959318da0c1SPierre Schweitzer             context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1960c2c66affSColin Finck             if (!context.stripes[i].mdl) {
1961c2c66affSColin Finck                 ERR("IoAllocateMdl failed\n");
1962c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
1963c2c66affSColin Finck                 goto exit;
1964c2c66affSColin Finck             }
1965c2c66affSColin Finck 
1966c2c66affSColin Finck             MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
1967c2c66affSColin Finck         } else {
1968318da0c1SPierre Schweitzer             context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL);
1969c2c66affSColin Finck 
1970c2c66affSColin Finck             if (!context.stripes[i].mdl) {
1971c2c66affSColin Finck                 ERR("IoAllocateMdl failed\n");
1972c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
1973c2c66affSColin Finck                 goto exit;
1974c2c66affSColin Finck             }
1975c2c66affSColin Finck 
1976c2c66affSColin Finck             Status = STATUS_SUCCESS;
1977c2c66affSColin Finck 
1978c2c66affSColin Finck             _SEH2_TRY {
1979c2c66affSColin Finck                 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)1980c2c66affSColin Finck             } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1981c2c66affSColin Finck                 Status = _SEH2_GetExceptionCode();
1982c2c66affSColin Finck             } _SEH2_END;
1983c2c66affSColin Finck 
1984c2c66affSColin Finck             if (!NT_SUCCESS(Status)) {
1985194ea909SVictor Perevertkin                 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1986c2c66affSColin Finck                 goto exit;
1987c2c66affSColin Finck             }
1988c2c66affSColin Finck         }
1989c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID5) {
1990318da0c1SPierre Schweitzer         uint64_t startoff, endoff;
1991318da0c1SPierre Schweitzer         uint16_t endoffstripe, parity;
1992318da0c1SPierre Schweitzer         uint32_t *stripeoff, pos;
1993c2c66affSColin Finck         PMDL master_mdl;
199406042735SVincent Franchomme         PFN_NUMBER *pfns, dummy = 0;
1995318da0c1SPierre Schweitzer         bool need_dummy = false;
1996c2c66affSColin Finck 
1997c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
1998c2c66affSColin Finck         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
1999c2c66affSColin Finck 
2000c2c66affSColin Finck         if (file_read) {
2001c2c66affSColin Finck             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2002c2c66affSColin Finck 
2003c2c66affSColin Finck             if (!context.va) {
2004c2c66affSColin Finck                 ERR("out of memory\n");
2005c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
2006c2c66affSColin Finck                 goto exit;
2007c2c66affSColin Finck             }
2008c2c66affSColin Finck         } else
2009c2c66affSColin Finck             context.va = buf;
2010c2c66affSColin Finck 
2011318da0c1SPierre Schweitzer         master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2012c2c66affSColin Finck         if (!master_mdl) {
2013c2c66affSColin Finck             ERR("out of memory\n");
2014c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
2015c2c66affSColin Finck             goto exit;
2016c2c66affSColin Finck         }
2017c2c66affSColin Finck 
2018c2c66affSColin Finck         Status = STATUS_SUCCESS;
2019c2c66affSColin Finck 
2020c2c66affSColin Finck         _SEH2_TRY {
2021c2c66affSColin Finck             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)2022c2c66affSColin Finck         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2023c2c66affSColin Finck             Status = _SEH2_GetExceptionCode();
2024c2c66affSColin Finck         } _SEH2_END;
2025c2c66affSColin Finck 
2026c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2027194ea909SVictor Perevertkin             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2028c2c66affSColin Finck             IoFreeMdl(master_mdl);
2029c2c66affSColin Finck             goto exit;
2030c2c66affSColin Finck         }
2031c2c66affSColin Finck 
2032c2c66affSColin Finck         pfns = (PFN_NUMBER*)(master_mdl + 1);
2033c2c66affSColin Finck 
2034c2c66affSColin Finck         pos = 0;
2035c2c66affSColin Finck         while (pos < length) {
2036c2c66affSColin Finck             parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2037c2c66affSColin Finck 
2038c2c66affSColin Finck             if (pos == 0) {
2039318da0c1SPierre Schweitzer                 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2040c2c66affSColin Finck                 ULONG skip, readlen;
2041c2c66affSColin Finck 
2042c2c66affSColin Finck                 i = startoffstripe;
2043c2c66affSColin Finck                 while (stripe != parity) {
2044c2c66affSColin Finck                     if (i == startoffstripe) {
2045c2c66affSColin Finck                         readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2046c2c66affSColin Finck 
2047c2c66affSColin Finck                         context.stripes[stripe].stripestart = startoff;
2048c2c66affSColin Finck                         context.stripes[stripe].stripeend = startoff + readlen;
2049c2c66affSColin Finck 
2050c2c66affSColin Finck                         pos += readlen;
2051c2c66affSColin Finck 
2052c2c66affSColin Finck                         if (pos == length)
2053c2c66affSColin Finck                             break;
2054c2c66affSColin Finck                     } else {
2055c2c66affSColin Finck                         readlen = min(length - pos, (ULONG)ci->stripe_length);
2056c2c66affSColin Finck 
2057c2c66affSColin Finck                         context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2058c2c66affSColin Finck                         context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2059c2c66affSColin Finck 
2060c2c66affSColin Finck                         pos += readlen;
2061c2c66affSColin Finck 
2062c2c66affSColin Finck                         if (pos == length)
2063c2c66affSColin Finck                             break;
2064c2c66affSColin Finck                     }
2065c2c66affSColin Finck 
2066c2c66affSColin Finck                     i++;
2067c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2068c2c66affSColin Finck                 }
2069c2c66affSColin Finck 
2070c2c66affSColin Finck                 if (pos == length)
2071c2c66affSColin Finck                     break;
2072c2c66affSColin Finck 
2073c2c66affSColin Finck                 for (i = 0; i < startoffstripe; i++) {
2074318da0c1SPierre Schweitzer                     uint16_t stripe2 = (parity + i + 1) % ci->num_stripes;
2075c2c66affSColin Finck 
2076c2c66affSColin Finck                     context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2077c2c66affSColin Finck                 }
2078c2c66affSColin Finck 
2079c2c66affSColin Finck                 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2080c2c66affSColin Finck 
2081c2c66affSColin Finck                 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2082c2c66affSColin Finck                     skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2083c2c66affSColin Finck 
2084c2c66affSColin Finck                     for (i = 0; i < ci->num_stripes; i++) {
2085c2c66affSColin Finck                         context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2086c2c66affSColin Finck                     }
2087c2c66affSColin Finck 
2088318da0c1SPierre Schweitzer                     pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2089318da0c1SPierre Schweitzer                     need_dummy = true;
2090c2c66affSColin Finck                 }
2091c2c66affSColin Finck             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2092c2c66affSColin Finck                 for (i = 0; i < ci->num_stripes; i++) {
2093c2c66affSColin Finck                     context.stripes[i].stripeend += ci->stripe_length;
2094c2c66affSColin Finck                 }
2095c2c66affSColin Finck 
2096318da0c1SPierre Schweitzer                 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1));
2097318da0c1SPierre Schweitzer                 need_dummy = true;
2098c2c66affSColin Finck             } else {
2099318da0c1SPierre Schweitzer                 uint16_t stripe = (parity + 1) % ci->num_stripes;
2100c2c66affSColin Finck 
2101c2c66affSColin Finck                 i = 0;
2102c2c66affSColin Finck                 while (stripe != parity) {
2103c2c66affSColin Finck                     if (endoffstripe == i) {
2104c2c66affSColin Finck                         context.stripes[stripe].stripeend = endoff + 1;
2105c2c66affSColin Finck                         break;
2106c2c66affSColin Finck                     } else if (endoffstripe > i)
2107c2c66affSColin Finck                         context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2108c2c66affSColin Finck 
2109c2c66affSColin Finck                     i++;
2110c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2111c2c66affSColin Finck                 }
2112c2c66affSColin Finck 
2113c2c66affSColin Finck                 break;
2114c2c66affSColin Finck             }
2115c2c66affSColin Finck         }
2116c2c66affSColin Finck 
2117c2c66affSColin Finck         for (i = 0; i < ci->num_stripes; i++) {
2118c2c66affSColin Finck             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2119c2c66affSColin Finck                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2120318da0c1SPierre Schweitzer                                                        false, false, NULL);
2121c2c66affSColin Finck 
2122c2c66affSColin Finck                 if (!context.stripes[i].mdl) {
2123c2c66affSColin Finck                     ERR("IoAllocateMdl failed\n");
2124eb7fbc25SPierre Schweitzer                     MmUnlockPages(master_mdl);
2125eb7fbc25SPierre Schweitzer                     IoFreeMdl(master_mdl);
2126c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
2127c2c66affSColin Finck                     goto exit;
2128c2c66affSColin Finck                 }
2129c2c66affSColin Finck             }
2130c2c66affSColin Finck         }
2131c2c66affSColin Finck 
2132c2c66affSColin Finck         if (need_dummy) {
2133c2c66affSColin Finck             dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2134c2c66affSColin Finck             if (!dummypage) {
2135c2c66affSColin Finck                 ERR("out of memory\n");
2136eb7fbc25SPierre Schweitzer                 MmUnlockPages(master_mdl);
2137eb7fbc25SPierre Schweitzer                 IoFreeMdl(master_mdl);
2138c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
2139c2c66affSColin Finck                 goto exit;
2140c2c66affSColin Finck             }
2141c2c66affSColin Finck 
2142318da0c1SPierre Schweitzer             dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2143c2c66affSColin Finck             if (!dummy_mdl) {
2144c2c66affSColin Finck                 ERR("IoAllocateMdl failed\n");
2145eb7fbc25SPierre Schweitzer                 MmUnlockPages(master_mdl);
2146eb7fbc25SPierre Schweitzer                 IoFreeMdl(master_mdl);
2147c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
2148c2c66affSColin Finck                 goto exit;
2149c2c66affSColin Finck             }
2150c2c66affSColin Finck 
2151c2c66affSColin Finck             MmBuildMdlForNonPagedPool(dummy_mdl);
2152c2c66affSColin Finck 
2153c2c66affSColin Finck             dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2154c2c66affSColin Finck         }
2155c2c66affSColin Finck 
2156318da0c1SPierre Schweitzer         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2157c2c66affSColin Finck         if (!stripeoff) {
2158c2c66affSColin Finck             ERR("out of memory\n");
2159eb7fbc25SPierre Schweitzer             MmUnlockPages(master_mdl);
2160eb7fbc25SPierre Schweitzer             IoFreeMdl(master_mdl);
2161c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
2162c2c66affSColin Finck             goto exit;
2163c2c66affSColin Finck         }
2164c2c66affSColin Finck 
2165318da0c1SPierre Schweitzer         RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2166c2c66affSColin Finck 
2167c2c66affSColin Finck         pos = 0;
2168c2c66affSColin Finck 
2169c2c66affSColin Finck         while (pos < length) {
2170c2c66affSColin Finck             PFN_NUMBER* stripe_pfns;
2171c2c66affSColin Finck 
2172c2c66affSColin Finck             parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2173c2c66affSColin Finck 
2174c2c66affSColin Finck             if (pos == 0) {
2175318da0c1SPierre Schweitzer                 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2176318da0c1SPierre Schweitzer                 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2177c2c66affSColin Finck                                                        ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2178c2c66affSColin Finck 
2179c2c66affSColin Finck                 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2180c2c66affSColin Finck 
2181c2c66affSColin Finck                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2182c2c66affSColin Finck 
2183c2c66affSColin Finck                 stripeoff[stripe] = readlen;
2184c2c66affSColin Finck                 pos += readlen;
2185c2c66affSColin Finck 
2186c2c66affSColin Finck                 stripe = (stripe + 1) % ci->num_stripes;
2187c2c66affSColin Finck 
2188c2c66affSColin Finck                 while (stripe != parity) {
2189c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2190318da0c1SPierre Schweitzer                     readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2191c2c66affSColin Finck 
2192c2c66affSColin Finck                     if (readlen == 0)
2193c2c66affSColin Finck                         break;
2194c2c66affSColin Finck 
2195c2c66affSColin Finck                     RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2196c2c66affSColin Finck 
2197c2c66affSColin Finck                     stripeoff[stripe] = readlen;
2198c2c66affSColin Finck                     pos += readlen;
2199c2c66affSColin Finck 
2200c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2201c2c66affSColin Finck                 }
2202c2c66affSColin Finck             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2203318da0c1SPierre Schweitzer                 uint16_t stripe = (parity + 1) % ci->num_stripes;
2204c2c66affSColin Finck                 ULONG k;
2205c2c66affSColin Finck 
2206c2c66affSColin Finck                 while (stripe != parity) {
2207c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2208c2c66affSColin Finck 
2209c2c66affSColin Finck                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2210c2c66affSColin Finck 
2211318da0c1SPierre Schweitzer                     stripeoff[stripe] += (uint32_t)ci->stripe_length;
2212318da0c1SPierre Schweitzer                     pos += (uint32_t)ci->stripe_length;
2213c2c66affSColin Finck 
2214c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2215c2c66affSColin Finck                 }
2216c2c66affSColin Finck 
2217c2c66affSColin Finck                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2218c2c66affSColin Finck 
2219c2c66affSColin Finck                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2220c2c66affSColin Finck                     stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2221c2c66affSColin Finck                     stripeoff[parity] += PAGE_SIZE;
2222c2c66affSColin Finck                 }
2223c2c66affSColin Finck             } else {
2224318da0c1SPierre Schweitzer                 uint16_t stripe = (parity + 1) % ci->num_stripes;
2225318da0c1SPierre Schweitzer                 uint32_t readlen;
2226c2c66affSColin Finck 
2227c2c66affSColin Finck                 while (pos < length) {
2228c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2229c2c66affSColin Finck                     readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2230c2c66affSColin Finck 
2231c2c66affSColin Finck                     if (readlen == 0)
2232c2c66affSColin Finck                         break;
2233c2c66affSColin Finck 
2234c2c66affSColin Finck                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2235c2c66affSColin Finck 
2236c2c66affSColin Finck                     stripeoff[stripe] += readlen;
2237c2c66affSColin Finck                     pos += readlen;
2238c2c66affSColin Finck 
2239c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2240c2c66affSColin Finck                 }
2241c2c66affSColin Finck             }
2242c2c66affSColin Finck         }
2243c2c66affSColin Finck 
2244c2c66affSColin Finck         MmUnlockPages(master_mdl);
2245c2c66affSColin Finck         IoFreeMdl(master_mdl);
2246c2c66affSColin Finck 
2247c2c66affSColin Finck         ExFreePool(stripeoff);
2248c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID6) {
2249318da0c1SPierre Schweitzer         uint64_t startoff, endoff;
2250318da0c1SPierre Schweitzer         uint16_t endoffstripe, parity1;
2251318da0c1SPierre Schweitzer         uint32_t *stripeoff, pos;
2252c2c66affSColin Finck         PMDL master_mdl;
225306042735SVincent Franchomme         PFN_NUMBER *pfns, dummy = 0;
2254318da0c1SPierre Schweitzer         bool need_dummy = false;
2255c2c66affSColin Finck 
2256c2c66affSColin Finck         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2257c2c66affSColin Finck         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2258c2c66affSColin Finck 
2259c2c66affSColin Finck         if (file_read) {
2260c2c66affSColin Finck             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2261c2c66affSColin Finck 
2262c2c66affSColin Finck             if (!context.va) {
2263c2c66affSColin Finck                 ERR("out of memory\n");
2264c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
2265c2c66affSColin Finck                 goto exit;
2266c2c66affSColin Finck             }
2267c2c66affSColin Finck         } else
2268c2c66affSColin Finck             context.va = buf;
2269c2c66affSColin Finck 
2270318da0c1SPierre Schweitzer         master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2271c2c66affSColin Finck         if (!master_mdl) {
2272c2c66affSColin Finck             ERR("out of memory\n");
2273c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
2274c2c66affSColin Finck             goto exit;
2275c2c66affSColin Finck         }
2276c2c66affSColin Finck 
2277c2c66affSColin Finck         Status = STATUS_SUCCESS;
2278c2c66affSColin Finck 
2279c2c66affSColin Finck         _SEH2_TRY {
2280c2c66affSColin Finck             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)2281c2c66affSColin Finck         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2282c2c66affSColin Finck             Status = _SEH2_GetExceptionCode();
2283c2c66affSColin Finck         } _SEH2_END;
2284c2c66affSColin Finck 
2285c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2286194ea909SVictor Perevertkin             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2287c2c66affSColin Finck             IoFreeMdl(master_mdl);
2288c2c66affSColin Finck             goto exit;
2289c2c66affSColin Finck         }
2290c2c66affSColin Finck 
2291c2c66affSColin Finck         pfns = (PFN_NUMBER*)(master_mdl + 1);
2292c2c66affSColin Finck 
2293c2c66affSColin Finck         pos = 0;
2294c2c66affSColin Finck         while (pos < length) {
2295c2c66affSColin Finck             parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2296c2c66affSColin Finck 
2297c2c66affSColin Finck             if (pos == 0) {
2298318da0c1SPierre Schweitzer                 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2299c2c66affSColin Finck                 ULONG skip, readlen;
2300c2c66affSColin Finck 
2301c2c66affSColin Finck                 i = startoffstripe;
2302c2c66affSColin Finck                 while (stripe != parity1) {
2303c2c66affSColin Finck                     if (i == startoffstripe) {
2304c2c66affSColin Finck                         readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2305c2c66affSColin Finck 
2306c2c66affSColin Finck                         context.stripes[stripe].stripestart = startoff;
2307c2c66affSColin Finck                         context.stripes[stripe].stripeend = startoff + readlen;
2308c2c66affSColin Finck 
2309c2c66affSColin Finck                         pos += readlen;
2310c2c66affSColin Finck 
2311c2c66affSColin Finck                         if (pos == length)
2312c2c66affSColin Finck                             break;
2313c2c66affSColin Finck                     } else {
2314c2c66affSColin Finck                         readlen = min(length - pos, (ULONG)ci->stripe_length);
2315c2c66affSColin Finck 
2316c2c66affSColin Finck                         context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2317c2c66affSColin Finck                         context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2318c2c66affSColin Finck 
2319c2c66affSColin Finck                         pos += readlen;
2320c2c66affSColin Finck 
2321c2c66affSColin Finck                         if (pos == length)
2322c2c66affSColin Finck                             break;
2323c2c66affSColin Finck                     }
2324c2c66affSColin Finck 
2325c2c66affSColin Finck                     i++;
2326c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2327c2c66affSColin Finck                 }
2328c2c66affSColin Finck 
2329c2c66affSColin Finck                 if (pos == length)
2330c2c66affSColin Finck                     break;
2331c2c66affSColin Finck 
2332c2c66affSColin Finck                 for (i = 0; i < startoffstripe; i++) {
2333318da0c1SPierre Schweitzer                     uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes;
2334c2c66affSColin Finck 
2335c2c66affSColin Finck                     context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2336c2c66affSColin Finck                 }
2337c2c66affSColin Finck 
2338c2c66affSColin Finck                 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2339c2c66affSColin Finck 
2340c2c66affSColin Finck                 parity2 = (parity1 + 1) % ci->num_stripes;
2341c2c66affSColin Finck                 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2342c2c66affSColin Finck 
2343c2c66affSColin Finck                 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2344c2c66affSColin Finck                     skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2345c2c66affSColin Finck 
2346c2c66affSColin Finck                     for (i = 0; i < ci->num_stripes; i++) {
2347c2c66affSColin Finck                         context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2348c2c66affSColin Finck                     }
2349c2c66affSColin Finck 
2350318da0c1SPierre Schweitzer                     pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2351318da0c1SPierre Schweitzer                     need_dummy = true;
2352c2c66affSColin Finck                 }
2353c2c66affSColin Finck             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2354c2c66affSColin Finck                 for (i = 0; i < ci->num_stripes; i++) {
2355c2c66affSColin Finck                     context.stripes[i].stripeend += ci->stripe_length;
2356c2c66affSColin Finck                 }
2357c2c66affSColin Finck 
2358318da0c1SPierre Schweitzer                 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2));
2359318da0c1SPierre Schweitzer                 need_dummy = true;
2360c2c66affSColin Finck             } else {
2361318da0c1SPierre Schweitzer                 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2362c2c66affSColin Finck 
2363c2c66affSColin Finck                 i = 0;
2364c2c66affSColin Finck                 while (stripe != parity1) {
2365c2c66affSColin Finck                     if (endoffstripe == i) {
2366c2c66affSColin Finck                         context.stripes[stripe].stripeend = endoff + 1;
2367c2c66affSColin Finck                         break;
2368c2c66affSColin Finck                     } else if (endoffstripe > i)
2369c2c66affSColin Finck                         context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2370c2c66affSColin Finck 
2371c2c66affSColin Finck                     i++;
2372c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2373c2c66affSColin Finck                 }
2374c2c66affSColin Finck 
2375c2c66affSColin Finck                 break;
2376c2c66affSColin Finck             }
2377c2c66affSColin Finck         }
2378c2c66affSColin Finck 
2379c2c66affSColin Finck         for (i = 0; i < ci->num_stripes; i++) {
2380c2c66affSColin Finck             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2381318da0c1SPierre Schweitzer                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
2382c2c66affSColin Finck 
2383c2c66affSColin Finck                 if (!context.stripes[i].mdl) {
2384c2c66affSColin Finck                     ERR("IoAllocateMdl failed\n");
2385eb7fbc25SPierre Schweitzer                     MmUnlockPages(master_mdl);
2386eb7fbc25SPierre Schweitzer                     IoFreeMdl(master_mdl);
2387c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
2388c2c66affSColin Finck                     goto exit;
2389c2c66affSColin Finck                 }
2390c2c66affSColin Finck             }
2391c2c66affSColin Finck         }
2392c2c66affSColin Finck 
2393c2c66affSColin Finck         if (need_dummy) {
2394c2c66affSColin Finck             dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2395c2c66affSColin Finck             if (!dummypage) {
2396c2c66affSColin Finck                 ERR("out of memory\n");
2397eb7fbc25SPierre Schweitzer                 MmUnlockPages(master_mdl);
2398eb7fbc25SPierre Schweitzer                 IoFreeMdl(master_mdl);
2399c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
2400c2c66affSColin Finck                 goto exit;
2401c2c66affSColin Finck             }
2402c2c66affSColin Finck 
2403318da0c1SPierre Schweitzer             dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2404c2c66affSColin Finck             if (!dummy_mdl) {
2405c2c66affSColin Finck                 ERR("IoAllocateMdl failed\n");
2406eb7fbc25SPierre Schweitzer                 MmUnlockPages(master_mdl);
2407eb7fbc25SPierre Schweitzer                 IoFreeMdl(master_mdl);
2408c2c66affSColin Finck                 Status = STATUS_INSUFFICIENT_RESOURCES;
2409c2c66affSColin Finck                 goto exit;
2410c2c66affSColin Finck             }
2411c2c66affSColin Finck 
2412c2c66affSColin Finck             MmBuildMdlForNonPagedPool(dummy_mdl);
2413c2c66affSColin Finck 
2414c2c66affSColin Finck             dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2415c2c66affSColin Finck         }
2416c2c66affSColin Finck 
2417318da0c1SPierre Schweitzer         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2418c2c66affSColin Finck         if (!stripeoff) {
2419c2c66affSColin Finck             ERR("out of memory\n");
2420eb7fbc25SPierre Schweitzer             MmUnlockPages(master_mdl);
2421eb7fbc25SPierre Schweitzer             IoFreeMdl(master_mdl);
2422c2c66affSColin Finck             Status = STATUS_INSUFFICIENT_RESOURCES;
2423c2c66affSColin Finck             goto exit;
2424c2c66affSColin Finck         }
2425c2c66affSColin Finck 
2426318da0c1SPierre Schweitzer         RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2427c2c66affSColin Finck 
2428c2c66affSColin Finck         pos = 0;
2429c2c66affSColin Finck 
2430c2c66affSColin Finck         while (pos < length) {
2431c2c66affSColin Finck             PFN_NUMBER* stripe_pfns;
2432c2c66affSColin Finck 
2433c2c66affSColin Finck             parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2434c2c66affSColin Finck 
2435c2c66affSColin Finck             if (pos == 0) {
2436318da0c1SPierre Schweitzer                 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2437318da0c1SPierre Schweitzer                 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2438c2c66affSColin Finck                                                        ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2439c2c66affSColin Finck 
2440c2c66affSColin Finck                 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2441c2c66affSColin Finck 
2442c2c66affSColin Finck                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2443c2c66affSColin Finck 
2444c2c66affSColin Finck                 stripeoff[stripe] = readlen;
2445c2c66affSColin Finck                 pos += readlen;
2446c2c66affSColin Finck 
2447c2c66affSColin Finck                 stripe = (stripe + 1) % ci->num_stripes;
2448c2c66affSColin Finck 
2449c2c66affSColin Finck                 while (stripe != parity1) {
2450c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2451318da0c1SPierre Schweitzer                     readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2452c2c66affSColin Finck 
2453c2c66affSColin Finck                     if (readlen == 0)
2454c2c66affSColin Finck                         break;
2455c2c66affSColin Finck 
2456c2c66affSColin Finck                     RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2457c2c66affSColin Finck 
2458c2c66affSColin Finck                     stripeoff[stripe] = readlen;
2459c2c66affSColin Finck                     pos += readlen;
2460c2c66affSColin Finck 
2461c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2462c2c66affSColin Finck                 }
2463c2c66affSColin Finck             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2464318da0c1SPierre Schweitzer                 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2465318da0c1SPierre Schweitzer                 uint16_t parity2 = (parity1 + 1) % ci->num_stripes;
2466c2c66affSColin Finck                 ULONG k;
2467c2c66affSColin Finck 
2468c2c66affSColin Finck                 while (stripe != parity1) {
2469c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2470c2c66affSColin Finck 
2471c2c66affSColin Finck                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2472c2c66affSColin Finck 
2473318da0c1SPierre Schweitzer                     stripeoff[stripe] += (uint32_t)ci->stripe_length;
2474318da0c1SPierre Schweitzer                     pos += (uint32_t)ci->stripe_length;
2475c2c66affSColin Finck 
2476c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2477c2c66affSColin Finck                 }
2478c2c66affSColin Finck 
2479c2c66affSColin Finck                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2480c2c66affSColin Finck 
2481c2c66affSColin Finck                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2482c2c66affSColin Finck                     stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2483c2c66affSColin Finck                     stripeoff[parity1] += PAGE_SIZE;
2484c2c66affSColin Finck                 }
2485c2c66affSColin Finck 
2486c2c66affSColin Finck                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2487c2c66affSColin Finck 
2488c2c66affSColin Finck                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2489c2c66affSColin Finck                     stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2490c2c66affSColin Finck                     stripeoff[parity2] += PAGE_SIZE;
2491c2c66affSColin Finck                 }
2492c2c66affSColin Finck             } else {
2493318da0c1SPierre Schweitzer                 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2494318da0c1SPierre Schweitzer                 uint32_t readlen;
2495c2c66affSColin Finck 
2496c2c66affSColin Finck                 while (pos < length) {
2497c2c66affSColin Finck                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2498318da0c1SPierre Schweitzer                     readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2499c2c66affSColin Finck 
2500c2c66affSColin Finck                     if (readlen == 0)
2501c2c66affSColin Finck                         break;
2502c2c66affSColin Finck 
2503c2c66affSColin Finck                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2504c2c66affSColin Finck 
2505c2c66affSColin Finck                     stripeoff[stripe] += readlen;
2506c2c66affSColin Finck                     pos += readlen;
2507c2c66affSColin Finck 
2508c2c66affSColin Finck                     stripe = (stripe + 1) % ci->num_stripes;
2509c2c66affSColin Finck                 }
2510c2c66affSColin Finck             }
2511c2c66affSColin Finck         }
2512c2c66affSColin Finck 
2513c2c66affSColin Finck         MmUnlockPages(master_mdl);
2514c2c66affSColin Finck         IoFreeMdl(master_mdl);
2515c2c66affSColin Finck 
2516c2c66affSColin Finck         ExFreePool(stripeoff);
2517c2c66affSColin Finck     }
2518c2c66affSColin Finck 
2519c2c66affSColin Finck     context.address = addr;
2520c2c66affSColin Finck 
2521c2c66affSColin Finck     for (i = 0; i < ci->num_stripes; i++) {
2522c2c66affSColin Finck         if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2523c2c66affSColin Finck             context.stripes[i].status = ReadDataStatus_MissingDevice;
2524c2c66affSColin Finck             context.stripes_left--;
2525c2c66affSColin Finck 
2526c2c66affSColin Finck             if (!devices[i] || !devices[i]->devobj)
2527c2c66affSColin Finck                 missing_devices++;
2528c2c66affSColin Finck         }
2529c2c66affSColin Finck     }
2530c2c66affSColin Finck 
2531c2c66affSColin Finck     if (missing_devices > allowed_missing) {
2532c2c66affSColin Finck         ERR("not enough devices to service request (%u missing)\n", missing_devices);
2533c2c66affSColin Finck         Status = STATUS_UNEXPECTED_IO_ERROR;
2534c2c66affSColin Finck         goto exit;
2535c2c66affSColin Finck     }
2536c2c66affSColin Finck 
2537c2c66affSColin Finck     for (i = 0; i < ci->num_stripes; i++) {
2538c2c66affSColin Finck         PIO_STACK_LOCATION IrpSp;
2539c2c66affSColin Finck 
2540c2c66affSColin Finck         if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2541c2c66affSColin Finck             context.stripes[i].context = (struct read_data_context*)&context;
2542c2c66affSColin Finck 
2543c2c66affSColin Finck             if (type == BLOCK_FLAG_RAID10) {
2544c2c66affSColin Finck                 context.stripes[i].stripenum = i / ci->sub_stripes;
2545c2c66affSColin Finck             }
2546c2c66affSColin Finck 
2547c2c66affSColin Finck             if (!Irp) {
2548318da0c1SPierre Schweitzer                 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false);
2549c2c66affSColin Finck 
2550c2c66affSColin Finck                 if (!context.stripes[i].Irp) {
2551c2c66affSColin Finck                     ERR("IoAllocateIrp failed\n");
2552c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
2553c2c66affSColin Finck                     goto exit;
2554c2c66affSColin Finck                 }
2555c2c66affSColin Finck             } else {
2556c2c66affSColin Finck                 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2557c2c66affSColin Finck 
2558c2c66affSColin Finck                 if (!context.stripes[i].Irp) {
2559c2c66affSColin Finck                     ERR("IoMakeAssociatedIrp failed\n");
2560c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
2561c2c66affSColin Finck                     goto exit;
2562c2c66affSColin Finck                 }
2563c2c66affSColin Finck             }
2564c2c66affSColin Finck 
2565c2c66affSColin Finck             IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2566c2c66affSColin Finck             IrpSp->MajorFunction = IRP_MJ_READ;
2567318da0c1SPierre Schweitzer             IrpSp->MinorFunction = IRP_MN_NORMAL;
2568318da0c1SPierre Schweitzer             IrpSp->FileObject = devices[i]->fileobj;
2569c2c66affSColin Finck 
2570c2c66affSColin Finck             if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2571c2c66affSColin Finck                 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2572c2c66affSColin Finck                 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2573c2c66affSColin Finck                     ERR("out of memory\n");
2574c2c66affSColin Finck                     Status = STATUS_INSUFFICIENT_RESOURCES;
2575c2c66affSColin Finck                     goto exit;
2576c2c66affSColin Finck                 }
2577c2c66affSColin Finck 
2578c2c66affSColin Finck                 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
2579c2c66affSColin Finck 
2580c2c66affSColin Finck                 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2581c2c66affSColin Finck             } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2582c2c66affSColin Finck                 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2583c2c66affSColin Finck             else
2584c2c66affSColin Finck                 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2585c2c66affSColin Finck 
2586c2c66affSColin Finck             IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2587c2c66affSColin Finck             IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2588c2c66affSColin Finck 
2589c2c66affSColin Finck             total_reading += IrpSp->Parameters.Read.Length;
2590c2c66affSColin Finck 
2591c2c66affSColin Finck             context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2592c2c66affSColin Finck 
2593318da0c1SPierre Schweitzer             IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true);
2594c2c66affSColin Finck 
2595c2c66affSColin Finck             context.stripes[i].status = ReadDataStatus_Pending;
2596c2c66affSColin Finck         }
2597c2c66affSColin Finck     }
2598c2c66affSColin Finck 
2599318da0c1SPierre Schweitzer     need_to_wait = false;
2600c2c66affSColin Finck     for (i = 0; i < ci->num_stripes; i++) {
2601c2c66affSColin Finck         if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2602c2c66affSColin Finck             IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2603318da0c1SPierre Schweitzer             need_to_wait = true;
2604c2c66affSColin Finck         }
2605c2c66affSColin Finck     }
2606c2c66affSColin Finck 
2607c2c66affSColin Finck     if (need_to_wait)
2608318da0c1SPierre Schweitzer         KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
2609c2c66affSColin Finck 
2610c2c66affSColin Finck     if (diskacc)
2611c2c66affSColin Finck         fFsRtlUpdateDiskCounters(total_reading, 0);
2612c2c66affSColin Finck 
2613c2c66affSColin Finck     // check if any of the devices return a "user-induced" error
2614c2c66affSColin Finck 
2615c2c66affSColin Finck     for (i = 0; i < ci->num_stripes; i++) {
2616c2c66affSColin Finck         if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2617c2c66affSColin Finck             Status = context.stripes[i].iosb.Status;
2618c2c66affSColin Finck             goto exit;
2619c2c66affSColin Finck         }
2620c2c66affSColin Finck     }
2621c2c66affSColin Finck 
2622c2c66affSColin Finck     if (type == BLOCK_FLAG_RAID0) {
2623c2c66affSColin Finck         Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2624c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2625194ea909SVictor Perevertkin             ERR("read_data_raid0 returned %08lx\n", Status);
2626c2c66affSColin Finck 
2627c2c66affSColin Finck             if (file_read)
2628c2c66affSColin Finck                 ExFreePool(context.va);
2629c2c66affSColin Finck 
2630c2c66affSColin Finck             goto exit;
2631c2c66affSColin Finck         }
2632c2c66affSColin Finck 
2633c2c66affSColin Finck         if (file_read) {
2634c2c66affSColin Finck             RtlCopyMemory(buf, context.va, length);
2635c2c66affSColin Finck             ExFreePool(context.va);
2636c2c66affSColin Finck         }
2637c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID10) {
2638c2c66affSColin Finck         Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2639c2c66affSColin Finck 
2640c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2641194ea909SVictor Perevertkin             ERR("read_data_raid10 returned %08lx\n", Status);
2642c2c66affSColin Finck 
2643c2c66affSColin Finck             if (file_read)
2644c2c66affSColin Finck                 ExFreePool(context.va);
2645c2c66affSColin Finck 
2646c2c66affSColin Finck             goto exit;
2647c2c66affSColin Finck         }
2648c2c66affSColin Finck 
2649c2c66affSColin Finck         if (file_read) {
2650c2c66affSColin Finck             RtlCopyMemory(buf, context.va, length);
2651c2c66affSColin Finck             ExFreePool(context.va);
2652c2c66affSColin Finck         }
2653c2c66affSColin Finck     } else if (type == BLOCK_FLAG_DUPLICATE) {
2654c2c66affSColin Finck         Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2655c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2656194ea909SVictor Perevertkin             ERR("read_data_dup returned %08lx\n", Status);
2657c2c66affSColin Finck 
2658c2c66affSColin Finck             if (file_read)
2659c2c66affSColin Finck                 ExFreePool(context.va);
2660c2c66affSColin Finck 
2661c2c66affSColin Finck             goto exit;
2662c2c66affSColin Finck         }
2663c2c66affSColin Finck 
2664c2c66affSColin Finck         if (file_read) {
2665c2c66affSColin Finck             RtlCopyMemory(buf, context.va, length);
2666c2c66affSColin Finck             ExFreePool(context.va);
2667c2c66affSColin Finck         }
2668c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID5) {
2669318da0c1SPierre Schweitzer         Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2670c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2671194ea909SVictor Perevertkin             ERR("read_data_raid5 returned %08lx\n", Status);
2672c2c66affSColin Finck 
2673c2c66affSColin Finck             if (file_read)
2674c2c66affSColin Finck                 ExFreePool(context.va);
2675c2c66affSColin Finck 
2676c2c66affSColin Finck             goto exit;
2677c2c66affSColin Finck         }
2678c2c66affSColin Finck 
2679c2c66affSColin Finck         if (file_read) {
2680c2c66affSColin Finck             RtlCopyMemory(buf, context.va, length);
2681c2c66affSColin Finck             ExFreePool(context.va);
2682c2c66affSColin Finck         }
2683c2c66affSColin Finck     } else if (type == BLOCK_FLAG_RAID6) {
2684318da0c1SPierre Schweitzer         Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2685c2c66affSColin Finck         if (!NT_SUCCESS(Status)) {
2686194ea909SVictor Perevertkin             ERR("read_data_raid6 returned %08lx\n", Status);
2687c2c66affSColin Finck 
2688c2c66affSColin Finck             if (file_read)
2689c2c66affSColin Finck                 ExFreePool(context.va);
2690c2c66affSColin Finck 
2691c2c66affSColin Finck             goto exit;
2692c2c66affSColin Finck         }
2693c2c66affSColin Finck 
2694c2c66affSColin Finck         if (file_read) {
2695c2c66affSColin Finck             RtlCopyMemory(buf, context.va, length);
2696c2c66affSColin Finck             ExFreePool(context.va);
2697c2c66affSColin Finck         }
2698c2c66affSColin Finck     }
2699c2c66affSColin Finck 
2700c2c66affSColin Finck exit:
2701c2c66affSColin Finck     if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2702c2c66affSColin Finck         chunk_unlock_range(Vcb, c, lockaddr, locklen);
2703c2c66affSColin Finck 
2704c2c66affSColin Finck     if (dummy_mdl)
2705c2c66affSColin Finck         IoFreeMdl(dummy_mdl);
2706c2c66affSColin Finck 
2707c2c66affSColin Finck     if (dummypage)
2708c2c66affSColin Finck         ExFreePool(dummypage);
2709c2c66affSColin Finck 
2710c2c66affSColin Finck     for (i = 0; i < ci->num_stripes; i++) {
2711c2c66affSColin Finck         if (context.stripes[i].mdl) {
2712c2c66affSColin Finck             if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2713c2c66affSColin Finck                 MmUnlockPages(context.stripes[i].mdl);
2714c2c66affSColin Finck 
2715c2c66affSColin Finck             IoFreeMdl(context.stripes[i].mdl);
2716c2c66affSColin Finck         }
2717c2c66affSColin Finck 
2718c2c66affSColin Finck         if (context.stripes[i].Irp)
2719c2c66affSColin Finck             IoFreeIrp(context.stripes[i].Irp);
2720c2c66affSColin Finck     }
2721c2c66affSColin Finck 
2722c2c66affSColin Finck     ExFreePool(context.stripes);
2723c2c66affSColin Finck 
2724c2c66affSColin Finck     if (!Vcb->log_to_phys_loaded)
2725c2c66affSColin Finck         ExFreePool(devices);
2726c2c66affSColin Finck 
2727c2c66affSColin Finck     return Status;
2728c2c66affSColin Finck }
2729c2c66affSColin Finck 
2730174dfab6SVincent Franchomme __attribute__((nonnull(1, 2)))
read_stream(fcb * fcb,uint8_t * data,uint64_t start,ULONG length,ULONG * pbr)2731318da0c1SPierre Schweitzer NTSTATUS read_stream(fcb* fcb, uint8_t* data, uint64_t start, ULONG length, ULONG* pbr) {
2732c2c66affSColin Finck     ULONG readlen;
2733c2c66affSColin Finck 
2734194ea909SVictor Perevertkin     TRACE("(%p, %p, %I64x, %lx, %p)\n", fcb, data, start, length, pbr);
2735c2c66affSColin Finck 
2736c2c66affSColin Finck     if (pbr) *pbr = 0;
2737c2c66affSColin Finck 
2738c2c66affSColin Finck     if (start >= fcb->adsdata.Length) {
2739c2c66affSColin Finck         TRACE("tried to read beyond end of stream\n");
2740c2c66affSColin Finck         return STATUS_END_OF_FILE;
2741c2c66affSColin Finck     }
2742c2c66affSColin Finck 
2743c2c66affSColin Finck     if (length == 0) {
2744c2c66affSColin Finck         WARN("tried to read zero bytes\n");
2745c2c66affSColin Finck         return STATUS_SUCCESS;
2746c2c66affSColin Finck     }
2747c2c66affSColin Finck 
2748c2c66affSColin Finck     if (start + length < fcb->adsdata.Length)
2749c2c66affSColin Finck         readlen = length;
2750c2c66affSColin Finck     else
2751c2c66affSColin Finck         readlen = fcb->adsdata.Length - (ULONG)start;
2752c2c66affSColin Finck 
2753c2c66affSColin Finck     if (readlen > 0)
2754194ea909SVictor Perevertkin         RtlCopyMemory(data, fcb->adsdata.Buffer + start, readlen);
2755c2c66affSColin Finck 
2756c2c66affSColin Finck     if (pbr) *pbr = readlen;
2757c2c66affSColin Finck 
2758c2c66affSColin Finck     return STATUS_SUCCESS;
2759c2c66affSColin Finck }
2760c2c66affSColin Finck 
2761194ea909SVictor Perevertkin typedef struct {
2762194ea909SVictor Perevertkin     uint64_t off;
2763194ea909SVictor Perevertkin     uint64_t ed_size;
2764194ea909SVictor Perevertkin     uint64_t ed_offset;
2765194ea909SVictor Perevertkin     uint64_t ed_num_bytes;
2766194ea909SVictor Perevertkin } read_part_extent;
2767194ea909SVictor Perevertkin 
2768194ea909SVictor Perevertkin typedef struct {
2769194ea909SVictor Perevertkin     LIST_ENTRY list_entry;
2770194ea909SVictor Perevertkin     uint64_t addr;
2771194ea909SVictor Perevertkin     chunk* c;
2772194ea909SVictor Perevertkin     uint32_t read;
2773194ea909SVictor Perevertkin     uint32_t to_read;
2774194ea909SVictor Perevertkin     void* csum;
2775194ea909SVictor Perevertkin     bool csum_free;
2776194ea909SVictor Perevertkin     uint8_t* buf;
2777194ea909SVictor Perevertkin     bool buf_free;
2778194ea909SVictor Perevertkin     uint32_t bumpoff;
2779194ea909SVictor Perevertkin     bool mdl;
2780194ea909SVictor Perevertkin     void* data;
2781194ea909SVictor Perevertkin     uint8_t compression;
2782194ea909SVictor Perevertkin     unsigned int num_extents;
2783194ea909SVictor Perevertkin     read_part_extent extents[1];
2784194ea909SVictor Perevertkin } read_part;
2785194ea909SVictor Perevertkin 
2786194ea909SVictor Perevertkin typedef struct {
2787194ea909SVictor Perevertkin     LIST_ENTRY list_entry;
2788194ea909SVictor Perevertkin     calc_job* cj;
2789194ea909SVictor Perevertkin     void* decomp;
2790194ea909SVictor Perevertkin     void* data;
2791194ea909SVictor Perevertkin     unsigned int offset;
2792194ea909SVictor Perevertkin     size_t length;
2793194ea909SVictor Perevertkin } comp_calc_job;
2794194ea909SVictor Perevertkin 
2795174dfab6SVincent Franchomme __attribute__((nonnull(1, 2)))
read_file(fcb * fcb,uint8_t * data,uint64_t start,uint64_t length,ULONG * pbr,PIRP Irp)2796318da0c1SPierre Schweitzer NTSTATUS read_file(fcb* fcb, uint8_t* data, uint64_t start, uint64_t length, ULONG* pbr, PIRP Irp) {
2797c2c66affSColin Finck     NTSTATUS Status;
2798318da0c1SPierre Schweitzer     uint32_t bytes_read = 0;
2799318da0c1SPierre Schweitzer     uint64_t last_end;
2800c2c66affSColin Finck     LIST_ENTRY* le;
2801318da0c1SPierre Schweitzer     POOL_TYPE pool_type;
2802194ea909SVictor Perevertkin     LIST_ENTRY read_parts, calc_jobs;
2803c2c66affSColin Finck 
2804318da0c1SPierre Schweitzer     TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
2805c2c66affSColin Finck 
2806c2c66affSColin Finck     if (pbr)
2807c2c66affSColin Finck         *pbr = 0;
2808c2c66affSColin Finck 
2809c2c66affSColin Finck     if (start >= fcb->inode_item.st_size) {
2810c2c66affSColin Finck         WARN("Tried to read beyond end of file\n");
2811194ea909SVictor Perevertkin         return STATUS_END_OF_FILE;
2812c2c66affSColin Finck     }
2813c2c66affSColin Finck 
2814194ea909SVictor Perevertkin     InitializeListHead(&read_parts);
2815194ea909SVictor Perevertkin     InitializeListHead(&calc_jobs);
2816194ea909SVictor Perevertkin 
2817318da0c1SPierre Schweitzer     pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool;
2818c2c66affSColin Finck 
2819c2c66affSColin Finck     le = fcb->extents.Flink;
2820c2c66affSColin Finck 
2821c2c66affSColin Finck     last_end = start;
2822c2c66affSColin Finck 
2823c2c66affSColin Finck     while (le != &fcb->extents) {
2824c2c66affSColin Finck         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2825c2c66affSColin Finck 
2826c2c66affSColin Finck         if (!ext->ignore) {
2827194ea909SVictor Perevertkin             EXTENT_DATA* ed = &ext->extent_data;
2828174dfab6SVincent Franchomme             uint64_t len;
2829c2c66affSColin Finck 
2830174dfab6SVincent Franchomme             if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC)
2831174dfab6SVincent Franchomme                 len = ((EXTENT_DATA2*)ed->data)->num_bytes;
2832174dfab6SVincent Franchomme             else
2833174dfab6SVincent Franchomme                 len = ed->decoded_size;
2834c2c66affSColin Finck 
2835c2c66affSColin Finck             if (ext->offset + len <= start) {
2836c2c66affSColin Finck                 last_end = ext->offset + len;
2837c2c66affSColin Finck                 goto nextitem;
2838c2c66affSColin Finck             }
2839c2c66affSColin Finck 
2840c2c66affSColin Finck             if (ext->offset > last_end && ext->offset > start + bytes_read) {
2841318da0c1SPierre Schweitzer                 uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end));
2842c2c66affSColin Finck 
2843c2c66affSColin Finck                 RtlZeroMemory(data + bytes_read, read);
2844c2c66affSColin Finck                 bytes_read += read;
2845c2c66affSColin Finck                 length -= read;
2846c2c66affSColin Finck             }
2847c2c66affSColin Finck 
2848c2c66affSColin Finck             if (length == 0 || ext->offset > start + bytes_read + length)
2849c2c66affSColin Finck                 break;
2850c2c66affSColin Finck 
2851c2c66affSColin Finck             if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
2852c2c66affSColin Finck                 WARN("Encryption not supported\n");
2853c2c66affSColin Finck                 Status = STATUS_NOT_IMPLEMENTED;
2854c2c66affSColin Finck                 goto exit;
2855c2c66affSColin Finck             }
2856c2c66affSColin Finck 
2857c2c66affSColin Finck             if (ed->encoding != BTRFS_ENCODING_NONE) {
2858c2c66affSColin Finck                 WARN("Other encodings not supported\n");
2859c2c66affSColin Finck                 Status = STATUS_NOT_IMPLEMENTED;
2860c2c66affSColin Finck                 goto exit;
2861c2c66affSColin Finck             }
2862c2c66affSColin Finck 
2863c2c66affSColin Finck             switch (ed->type) {
2864c2c66affSColin Finck                 case EXTENT_TYPE_INLINE:
2865c2c66affSColin Finck                 {
2866318da0c1SPierre Schweitzer                     uint64_t off = start + bytes_read - ext->offset;
2867318da0c1SPierre Schweitzer                     uint32_t read;
2868c2c66affSColin Finck 
2869c2c66affSColin Finck                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
2870318da0c1SPierre Schweitzer                         read = (uint32_t)min(min(len, ext->datalen) - off, length);
2871c2c66affSColin Finck 
2872c2c66affSColin Finck                         RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2873eb7fbc25SPierre Schweitzer                     } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) {
2874318da0c1SPierre Schweitzer                         uint8_t* decomp;
2875318da0c1SPierre Schweitzer                         bool decomp_alloc;
2876318da0c1SPierre Schweitzer                         uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]);
2877c2c66affSColin Finck 
2878c2c66affSColin Finck                         if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2879318da0c1SPierre Schweitzer                             ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size);
2880c2c66affSColin Finck                             Status = STATUS_INTERNAL_ERROR;
2881c2c66affSColin Finck                             goto exit;
2882c2c66affSColin Finck                         }
2883c2c66affSColin Finck 
2884318da0c1SPierre Schweitzer                         read = (uint32_t)min(ed->decoded_size - off, length);
2885c2c66affSColin Finck 
2886c2c66affSColin Finck                         if (off > 0) {
2887318da0c1SPierre Schweitzer                             decomp = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)ed->decoded_size, ALLOC_TAG);
2888c2c66affSColin Finck                             if (!decomp) {
2889c2c66affSColin Finck                                 ERR("out of memory\n");
2890c2c66affSColin Finck                                 Status = STATUS_INSUFFICIENT_RESOURCES;
2891c2c66affSColin Finck                                 goto exit;
2892c2c66affSColin Finck                             }
2893c2c66affSColin Finck 
2894318da0c1SPierre Schweitzer                             decomp_alloc = true;
2895c2c66affSColin Finck                         } else {
2896c2c66affSColin Finck                             decomp = data + bytes_read;
2897318da0c1SPierre Schweitzer                             decomp_alloc = false;
2898c2c66affSColin Finck                         }
2899c2c66affSColin Finck 
2900c2c66affSColin Finck                         if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2901318da0c1SPierre Schweitzer                             Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2902c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
2903194ea909SVictor Perevertkin                                 ERR("zlib_decompress returned %08lx\n", Status);
2904c2c66affSColin Finck                                 if (decomp_alloc) ExFreePool(decomp);
2905c2c66affSColin Finck                                 goto exit;
2906c2c66affSColin Finck                             }
2907c2c66affSColin Finck                         } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2908318da0c1SPierre Schweitzer                             if (inlen < sizeof(uint32_t)) {
2909c2c66affSColin Finck                                 ERR("extent data was truncated\n");
2910c2c66affSColin Finck                                 Status = STATUS_INTERNAL_ERROR;
2911c2c66affSColin Finck                                 if (decomp_alloc) ExFreePool(decomp);
2912c2c66affSColin Finck                                 goto exit;
2913c2c66affSColin Finck                             } else
2914318da0c1SPierre Schweitzer                                 inlen -= sizeof(uint32_t);
2915c2c66affSColin Finck 
2916318da0c1SPierre Schweitzer                             Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t));
2917c2c66affSColin Finck                             if (!NT_SUCCESS(Status)) {
2918194ea909SVictor Perevertkin                                 ERR("lzo_decompress returned %08lx\n", Status);
2919c2c66affSColin Finck                                 if (decomp_alloc) ExFreePool(decomp);
2920c2c66affSColin Finck                                 goto exit;
2921c2c66affSColin Finck                             }
2922eb7fbc25SPierre Schweitzer                         } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
2923318da0c1SPierre Schweitzer                             Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2924eb7fbc25SPierre Schweitzer                             if (!NT_SUCCESS(Status)) {
2925194ea909SVictor Perevertkin                                 ERR("zstd_decompress returned %08lx\n", Status);
2926eb7fbc25SPierre Schweitzer                                 if (decomp_alloc) ExFreePool(decomp);
2927eb7fbc25SPierre Schweitzer                                 goto exit;
2928eb7fbc25SPierre Schweitzer                             }
2929c2c66affSColin Finck                         }
2930c2c66affSColin Finck 
2931c2c66affSColin Finck                         if (decomp_alloc) {
2932c2c66affSColin Finck                             RtlCopyMemory(data + bytes_read, decomp + off, read);
2933c2c66affSColin Finck                             ExFreePool(decomp);
2934c2c66affSColin Finck                         }
2935c2c66affSColin Finck                     } else {
2936c2c66affSColin Finck                         ERR("unhandled compression type %x\n", ed->compression);
2937c2c66affSColin Finck                         Status = STATUS_NOT_IMPLEMENTED;
2938c2c66affSColin Finck                         goto exit;
2939c2c66affSColin Finck                     }
2940c2c66affSColin Finck 
2941c2c66affSColin Finck                     bytes_read += read;
2942c2c66affSColin Finck                     length -= read;
2943c2c66affSColin Finck 
2944c2c66affSColin Finck                     break;
2945c2c66affSColin Finck                 }
2946c2c66affSColin Finck 
2947c2c66affSColin Finck                 case EXTENT_TYPE_REGULAR:
2948c2c66affSColin Finck                 {
2949174dfab6SVincent Franchomme                     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
2950194ea909SVictor Perevertkin                     read_part* rp;
2951c2c66affSColin Finck 
2952194ea909SVictor Perevertkin                     rp = ExAllocatePoolWithTag(pool_type, sizeof(read_part), ALLOC_TAG);
2953194ea909SVictor Perevertkin                     if (!rp) {
2954c2c66affSColin Finck                         ERR("out of memory\n");
2955c2c66affSColin Finck                         Status = STATUS_INSUFFICIENT_RESOURCES;
2956c2c66affSColin Finck                         goto exit;
2957c2c66affSColin Finck                     }
2958c2c66affSColin Finck 
2959194ea909SVictor Perevertkin                     rp->mdl = (Irp && Irp->MdlAddress) ? true : false;
2960194ea909SVictor Perevertkin                     rp->extents[0].off = start + bytes_read - ext->offset;
2961194ea909SVictor Perevertkin                     rp->bumpoff = 0;
2962194ea909SVictor Perevertkin                     rp->num_extents = 1;
2963194ea909SVictor Perevertkin                     rp->csum_free = false;
2964194ea909SVictor Perevertkin 
2965194ea909SVictor Perevertkin                     rp->read = (uint32_t)(len - rp->extents[0].off);
2966194ea909SVictor Perevertkin                     if (rp->read > length) rp->read = (uint32_t)length;
2967194ea909SVictor Perevertkin 
2968194ea909SVictor Perevertkin                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
2969194ea909SVictor Perevertkin                         rp->addr = ed2->address + ed2->offset + rp->extents[0].off;
2970194ea909SVictor Perevertkin                         rp->to_read = (uint32_t)sector_align(rp->read, fcb->Vcb->superblock.sector_size);
2971194ea909SVictor Perevertkin 
2972174dfab6SVincent Franchomme                         if (rp->addr & (fcb->Vcb->superblock.sector_size - 1)) {
2973174dfab6SVincent Franchomme                             rp->bumpoff = rp->addr & (fcb->Vcb->superblock.sector_size - 1);
2974194ea909SVictor Perevertkin                             rp->addr -= rp->bumpoff;
2975194ea909SVictor Perevertkin                             rp->to_read = (uint32_t)sector_align(rp->read + rp->bumpoff, fcb->Vcb->superblock.sector_size);
2976194ea909SVictor Perevertkin                         }
2977194ea909SVictor Perevertkin                     } else {
2978194ea909SVictor Perevertkin                         rp->addr = ed2->address;
2979194ea909SVictor Perevertkin                         rp->to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
2980c2c66affSColin Finck                     }
2981c2c66affSColin Finck 
2982174dfab6SVincent Franchomme                     if (ed->compression == BTRFS_COMPRESSION_NONE && (start & (fcb->Vcb->superblock.sector_size - 1)) == 0 &&
2983174dfab6SVincent Franchomme                         (length & (fcb->Vcb->superblock.sector_size - 1)) == 0) {
2984194ea909SVictor Perevertkin                         rp->buf = data + bytes_read;
2985194ea909SVictor Perevertkin                         rp->buf_free = false;
2986194ea909SVictor Perevertkin                     } else {
2987194ea909SVictor Perevertkin                         rp->buf = ExAllocatePoolWithTag(pool_type, rp->to_read, ALLOC_TAG);
2988194ea909SVictor Perevertkin                         rp->buf_free = true;
2989c2c66affSColin Finck 
2990194ea909SVictor Perevertkin                         if (!rp->buf) {
2991194ea909SVictor Perevertkin                             ERR("out of memory\n");
2992194ea909SVictor Perevertkin                             Status = STATUS_INSUFFICIENT_RESOURCES;
2993194ea909SVictor Perevertkin                             ExFreePool(rp);
2994194ea909SVictor Perevertkin                             goto exit;
2995194ea909SVictor Perevertkin                         }
2996c2c66affSColin Finck 
2997194ea909SVictor Perevertkin                         rp->mdl = false;
2998194ea909SVictor Perevertkin                     }
2999194ea909SVictor Perevertkin 
3000194ea909SVictor Perevertkin                     rp->c = get_chunk_from_address(fcb->Vcb, rp->addr);
3001194ea909SVictor Perevertkin 
3002194ea909SVictor Perevertkin                     if (!rp->c) {
3003194ea909SVictor Perevertkin                         ERR("get_chunk_from_address(%I64x) failed\n", rp->addr);
3004194ea909SVictor Perevertkin 
3005194ea909SVictor Perevertkin                         if (rp->buf_free)
3006194ea909SVictor Perevertkin                             ExFreePool(rp->buf);
3007194ea909SVictor Perevertkin 
3008194ea909SVictor Perevertkin                         ExFreePool(rp);
3009c2c66affSColin Finck 
3010174dfab6SVincent Franchomme                         Status = STATUS_INTERNAL_ERROR;
3011c2c66affSColin Finck                         goto exit;
3012c2c66affSColin Finck                     }
3013c2c66affSColin Finck 
3014c2c66affSColin Finck                     if (ext->csum) {
3015c2c66affSColin Finck                         if (ed->compression == BTRFS_COMPRESSION_NONE) {
3016174dfab6SVincent Franchomme                             rp->csum = (uint8_t*)ext->csum + (fcb->Vcb->csum_size * (rp->extents[0].off >> fcb->Vcb->sector_shift));
3017c2c66affSColin Finck                         } else
3018194ea909SVictor Perevertkin                             rp->csum = ext->csum;
3019c2c66affSColin Finck                     } else
3020194ea909SVictor Perevertkin                         rp->csum = NULL;
3021c2c66affSColin Finck 
3022194ea909SVictor Perevertkin                     rp->data = data + bytes_read;
3023194ea909SVictor Perevertkin                     rp->compression = ed->compression;
3024194ea909SVictor Perevertkin                     rp->extents[0].ed_offset = ed2->offset;
3025194ea909SVictor Perevertkin                     rp->extents[0].ed_size = ed2->size;
3026194ea909SVictor Perevertkin                     rp->extents[0].ed_num_bytes = ed2->num_bytes;
3027c2c66affSColin Finck 
3028194ea909SVictor Perevertkin                     InsertTailList(&read_parts, &rp->list_entry);
3029c2c66affSColin Finck 
3030194ea909SVictor Perevertkin                     bytes_read += rp->read;
3031194ea909SVictor Perevertkin                     length -= rp->read;
3032c2c66affSColin Finck 
3033c2c66affSColin Finck                     break;
3034c2c66affSColin Finck                 }
3035c2c66affSColin Finck 
3036c2c66affSColin Finck                 case EXTENT_TYPE_PREALLOC:
3037c2c66affSColin Finck                 {
3038318da0c1SPierre Schweitzer                     uint64_t off = start + bytes_read - ext->offset;
3039318da0c1SPierre Schweitzer                     uint32_t read = (uint32_t)(len - off);
3040c2c66affSColin Finck 
3041318da0c1SPierre Schweitzer                     if (read > length) read = (uint32_t)length;
3042c2c66affSColin Finck 
3043c2c66affSColin Finck                     RtlZeroMemory(data + bytes_read, read);
3044c2c66affSColin Finck 
3045c2c66affSColin Finck                     bytes_read += read;
3046c2c66affSColin Finck                     length -= read;
3047c2c66affSColin Finck 
3048c2c66affSColin Finck                     break;
3049c2c66affSColin Finck                 }
3050c2c66affSColin Finck 
3051c2c66affSColin Finck                 default:
3052c2c66affSColin Finck                     WARN("Unsupported extent data type %u\n", ed->type);
3053c2c66affSColin Finck                     Status = STATUS_NOT_IMPLEMENTED;
3054c2c66affSColin Finck                     goto exit;
3055c2c66affSColin Finck             }
3056c2c66affSColin Finck 
3057c2c66affSColin Finck             last_end = ext->offset + len;
3058c2c66affSColin Finck 
3059c2c66affSColin Finck             if (length == 0)
3060c2c66affSColin Finck                 break;
3061c2c66affSColin Finck         }
3062c2c66affSColin Finck 
3063c2c66affSColin Finck nextitem:
3064c2c66affSColin Finck         le = le->Flink;
3065c2c66affSColin Finck     }
3066c2c66affSColin Finck 
3067194ea909SVictor Perevertkin     if (!IsListEmpty(&read_parts) && read_parts.Flink->Flink != &read_parts) { // at least two entries in list
3068194ea909SVictor Perevertkin         read_part* last_rp = CONTAINING_RECORD(read_parts.Flink, read_part, list_entry);
3069194ea909SVictor Perevertkin 
3070194ea909SVictor Perevertkin         le = read_parts.Flink->Flink;
3071194ea909SVictor Perevertkin         while (le != &read_parts) {
3072194ea909SVictor Perevertkin             LIST_ENTRY* le2 = le->Flink;
3073194ea909SVictor Perevertkin             read_part* rp = CONTAINING_RECORD(le, read_part, list_entry);
3074194ea909SVictor Perevertkin 
3075194ea909SVictor Perevertkin             // merge together runs
3076194ea909SVictor Perevertkin             if (rp->compression != BTRFS_COMPRESSION_NONE && rp->compression == last_rp->compression && rp->addr == last_rp->addr + last_rp->to_read &&
3077194ea909SVictor Perevertkin                 rp->data == (uint8_t*)last_rp->data + last_rp->read && rp->c == last_rp->c && ((rp->csum && last_rp->csum) || (!rp->csum && !last_rp->csum))) {
3078194ea909SVictor Perevertkin                 read_part* rp2;
3079194ea909SVictor Perevertkin 
3080194ea909SVictor Perevertkin                 rp2 = ExAllocatePoolWithTag(pool_type, offsetof(read_part, extents) + (sizeof(read_part_extent) * (last_rp->num_extents + 1)), ALLOC_TAG);
3081194ea909SVictor Perevertkin 
3082194ea909SVictor Perevertkin                 rp2->addr = last_rp->addr;
3083194ea909SVictor Perevertkin                 rp2->c = last_rp->c;
3084194ea909SVictor Perevertkin                 rp2->read = last_rp->read + rp->read;
3085194ea909SVictor Perevertkin                 rp2->to_read = last_rp->to_read + rp->to_read;
3086194ea909SVictor Perevertkin                 rp2->csum_free = false;
3087194ea909SVictor Perevertkin 
3088194ea909SVictor Perevertkin                 if (last_rp->csum) {
3089174dfab6SVincent Franchomme                     uint32_t sectors = (last_rp->to_read + rp->to_read) >> fcb->Vcb->sector_shift;
3090194ea909SVictor Perevertkin 
3091194ea909SVictor Perevertkin                     rp2->csum = ExAllocatePoolWithTag(pool_type, sectors * fcb->Vcb->csum_size, ALLOC_TAG);
3092194ea909SVictor Perevertkin                     if (!rp2->csum) {
3093194ea909SVictor Perevertkin                         ERR("out of memory\n");
3094194ea909SVictor Perevertkin                         ExFreePool(rp2);
3095194ea909SVictor Perevertkin                         Status = STATUS_INSUFFICIENT_RESOURCES;
3096194ea909SVictor Perevertkin                         goto exit;
3097194ea909SVictor Perevertkin                     }
3098194ea909SVictor Perevertkin 
3099174dfab6SVincent Franchomme                     RtlCopyMemory(rp2->csum, last_rp->csum, (last_rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift);
3100174dfab6SVincent Franchomme                     RtlCopyMemory((uint8_t*)rp2->csum + ((last_rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift), rp->csum,
3101174dfab6SVincent Franchomme                                   (rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift);
3102194ea909SVictor Perevertkin 
3103194ea909SVictor Perevertkin                     rp2->csum_free = true;
3104194ea909SVictor Perevertkin                 } else
3105194ea909SVictor Perevertkin                     rp2->csum = NULL;
3106194ea909SVictor Perevertkin 
3107194ea909SVictor Perevertkin                 rp2->buf = ExAllocatePoolWithTag(pool_type, rp2->to_read, ALLOC_TAG);
3108194ea909SVictor Perevertkin                 if (!rp2->buf) {
3109194ea909SVictor Perevertkin                     ERR("out of memory\n");
3110194ea909SVictor Perevertkin 
3111194ea909SVictor Perevertkin                     if (rp2->csum)
3112194ea909SVictor Perevertkin                         ExFreePool(rp2->csum);
3113194ea909SVictor Perevertkin 
3114194ea909SVictor Perevertkin                     ExFreePool(rp2);
3115194ea909SVictor Perevertkin                     Status = STATUS_INSUFFICIENT_RESOURCES;
3116194ea909SVictor Perevertkin                     goto exit;
3117194ea909SVictor Perevertkin                 }
3118194ea909SVictor Perevertkin 
3119194ea909SVictor Perevertkin                 rp2->buf_free = true;
3120194ea909SVictor Perevertkin                 rp2->bumpoff = 0;
3121194ea909SVictor Perevertkin                 rp2->mdl = false;
3122194ea909SVictor Perevertkin                 rp2->data = last_rp->data;
3123194ea909SVictor Perevertkin                 rp2->compression = last_rp->compression;
3124194ea909SVictor Perevertkin                 rp2->num_extents = last_rp->num_extents + 1;
3125194ea909SVictor Perevertkin 
3126194ea909SVictor Perevertkin                 RtlCopyMemory(rp2->extents, last_rp->extents, last_rp->num_extents * sizeof(read_part_extent));
3127194ea909SVictor Perevertkin                 RtlCopyMemory(&rp2->extents[last_rp->num_extents], rp->extents, sizeof(read_part_extent));
3128194ea909SVictor Perevertkin 
3129194ea909SVictor Perevertkin                 InsertHeadList(le->Blink, &rp2->list_entry);
3130194ea909SVictor Perevertkin 
3131194ea909SVictor Perevertkin                 if (rp->buf_free)
3132194ea909SVictor Perevertkin                     ExFreePool(rp->buf);
3133194ea909SVictor Perevertkin 
3134194ea909SVictor Perevertkin                 if (rp->csum_free)
3135194ea909SVictor Perevertkin                     ExFreePool(rp->csum);
3136194ea909SVictor Perevertkin 
3137194ea909SVictor Perevertkin                 RemoveEntryList(&rp->list_entry);
3138194ea909SVictor Perevertkin 
3139194ea909SVictor Perevertkin                 ExFreePool(rp);
3140194ea909SVictor Perevertkin 
3141194ea909SVictor Perevertkin                 if (last_rp->buf_free)
3142194ea909SVictor Perevertkin                     ExFreePool(last_rp->buf);
3143194ea909SVictor Perevertkin 
3144194ea909SVictor Perevertkin                 if (last_rp->csum_free)
3145194ea909SVictor Perevertkin                     ExFreePool(last_rp->csum);
3146194ea909SVictor Perevertkin 
3147194ea909SVictor Perevertkin                 RemoveEntryList(&last_rp->list_entry);
3148194ea909SVictor Perevertkin 
3149194ea909SVictor Perevertkin                 ExFreePool(last_rp);
3150194ea909SVictor Perevertkin 
3151194ea909SVictor Perevertkin                 last_rp = rp2;
3152194ea909SVictor Perevertkin             } else
3153194ea909SVictor Perevertkin                 last_rp = rp;
3154194ea909SVictor Perevertkin 
3155194ea909SVictor Perevertkin             le = le2;
3156194ea909SVictor Perevertkin         }
3157194ea909SVictor Perevertkin     }
3158194ea909SVictor Perevertkin 
3159194ea909SVictor Perevertkin     le = read_parts.Flink;
3160194ea909SVictor Perevertkin     while (le != &read_parts) {
3161194ea909SVictor Perevertkin         read_part* rp = CONTAINING_RECORD(le, read_part, list_entry);
3162194ea909SVictor Perevertkin 
3163194ea909SVictor Perevertkin         Status = read_data(fcb->Vcb, rp->addr, rp->to_read, rp->csum, false, rp->buf, rp->c, NULL, Irp, 0, rp->mdl,
3164174dfab6SVincent Franchomme                            fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3165194ea909SVictor Perevertkin         if (!NT_SUCCESS(Status)) {
3166194ea909SVictor Perevertkin             ERR("read_data returned %08lx\n", Status);
3167194ea909SVictor Perevertkin             goto exit;
3168194ea909SVictor Perevertkin         }
3169194ea909SVictor Perevertkin 
3170194ea909SVictor Perevertkin         if (rp->compression == BTRFS_COMPRESSION_NONE) {
3171194ea909SVictor Perevertkin             if (rp->buf_free)
3172194ea909SVictor Perevertkin                 RtlCopyMemory(rp->data, rp->buf + rp->bumpoff, rp->read);
3173194ea909SVictor Perevertkin         } else {
3174194ea909SVictor Perevertkin             uint8_t* buf = rp->buf;
3175*6e0cf03dSVincent Franchomme 
3176194ea909SVictor Perevertkin             for (unsigned int i = 0; i < rp->num_extents; i++) {
3177194ea909SVictor Perevertkin                 uint8_t *decomp = NULL, *buf2;
3178194ea909SVictor Perevertkin                 ULONG outlen, inlen, off2;
3179194ea909SVictor Perevertkin                 uint32_t inpageoff = 0;
3180194ea909SVictor Perevertkin                 comp_calc_job* ccj;
3181194ea909SVictor Perevertkin 
3182194ea909SVictor Perevertkin                 off2 = (ULONG)(rp->extents[i].ed_offset + rp->extents[i].off);
3183194ea909SVictor Perevertkin                 buf2 = buf;
3184194ea909SVictor Perevertkin                 inlen = (ULONG)rp->extents[i].ed_size;
3185194ea909SVictor Perevertkin 
3186194ea909SVictor Perevertkin                 if (rp->compression == BTRFS_COMPRESSION_LZO) {
3187194ea909SVictor Perevertkin                     ULONG inoff = sizeof(uint32_t);
3188194ea909SVictor Perevertkin 
3189194ea909SVictor Perevertkin                     inlen -= sizeof(uint32_t);
3190194ea909SVictor Perevertkin 
3191194ea909SVictor Perevertkin                     // If reading a few sectors in, skip to the interesting bit
3192194ea909SVictor Perevertkin                     while (off2 > LZO_PAGE_SIZE) {
3193194ea909SVictor Perevertkin                         uint32_t partlen;
3194194ea909SVictor Perevertkin 
3195194ea909SVictor Perevertkin                         if (inlen < sizeof(uint32_t))
3196194ea909SVictor Perevertkin                             break;
3197194ea909SVictor Perevertkin 
3198194ea909SVictor Perevertkin                         partlen = *(uint32_t*)(buf2 + inoff);
3199194ea909SVictor Perevertkin 
3200194ea909SVictor Perevertkin                         if (partlen < inlen) {
3201194ea909SVictor Perevertkin                             off2 -= LZO_PAGE_SIZE;
3202194ea909SVictor Perevertkin                             inoff += partlen + sizeof(uint32_t);
3203194ea909SVictor Perevertkin                             inlen -= partlen + sizeof(uint32_t);
3204194ea909SVictor Perevertkin 
3205194ea909SVictor Perevertkin                             if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t))
3206194ea909SVictor Perevertkin                                 inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE;
3207194ea909SVictor Perevertkin                         } else
3208194ea909SVictor Perevertkin                             break;
3209194ea909SVictor Perevertkin                     }
3210194ea909SVictor Perevertkin 
3211194ea909SVictor Perevertkin                     buf2 = &buf2[inoff];
3212194ea909SVictor Perevertkin                     inpageoff = inoff % LZO_PAGE_SIZE;
3213194ea909SVictor Perevertkin                 }
3214194ea909SVictor Perevertkin 
321506042735SVincent Franchomme                 /* Previous versions of this code decompressed directly into the destination buffer,
321606042735SVincent Franchomme                  * but unfortunately that can't be relied on - Windows likes to use dummy pages sometimes
321706042735SVincent Franchomme                  * when mmap-ing, which breaks the backtracking used by e.g. zstd. */
321806042735SVincent Franchomme 
321906042735SVincent Franchomme                 if (off2 != 0)
3220194ea909SVictor Perevertkin                     outlen = off2 + min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
322106042735SVincent Franchomme                 else
322206042735SVincent Franchomme                     outlen = min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
3223194ea909SVictor Perevertkin 
3224194ea909SVictor Perevertkin                 decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG);
3225194ea909SVictor Perevertkin                 if (!decomp) {
3226194ea909SVictor Perevertkin                     ERR("out of memory\n");
3227194ea909SVictor Perevertkin                     Status = STATUS_INSUFFICIENT_RESOURCES;
3228194ea909SVictor Perevertkin                     goto exit;
3229194ea909SVictor Perevertkin                 }
3230194ea909SVictor Perevertkin 
3231194ea909SVictor Perevertkin                 ccj = (comp_calc_job*)ExAllocatePoolWithTag(pool_type, sizeof(comp_calc_job), ALLOC_TAG);
3232194ea909SVictor Perevertkin                 if (!ccj) {
3233194ea909SVictor Perevertkin                     ERR("out of memory\n");
3234194ea909SVictor Perevertkin 
3235194ea909SVictor Perevertkin                     ExFreePool(decomp);
3236194ea909SVictor Perevertkin 
3237194ea909SVictor Perevertkin                     Status = STATUS_INSUFFICIENT_RESOURCES;
3238194ea909SVictor Perevertkin                     goto exit;
3239194ea909SVictor Perevertkin                 }
3240194ea909SVictor Perevertkin 
3241194ea909SVictor Perevertkin                 ccj->data = rp->data;
3242194ea909SVictor Perevertkin                 ccj->decomp = decomp;
3243194ea909SVictor Perevertkin 
3244194ea909SVictor Perevertkin                 ccj->offset = off2;
3245194ea909SVictor Perevertkin                 ccj->length = (size_t)min(rp->read, rp->extents[i].ed_num_bytes - rp->extents[i].off);
3246194ea909SVictor Perevertkin 
324706042735SVincent Franchomme                 Status = add_calc_job_decomp(fcb->Vcb, rp->compression, buf2, inlen, decomp, outlen,
324806042735SVincent Franchomme                                              inpageoff, &ccj->cj);
324906042735SVincent Franchomme                 if (!NT_SUCCESS(Status)) {
325006042735SVincent Franchomme                     ERR("add_calc_job_decomp returned %08lx\n", Status);
325106042735SVincent Franchomme 
325206042735SVincent Franchomme                     ExFreePool(decomp);
325306042735SVincent Franchomme                     ExFreePool(ccj);
325406042735SVincent Franchomme 
325506042735SVincent Franchomme                     goto exit;
325606042735SVincent Franchomme                 }
325706042735SVincent Franchomme 
3258194ea909SVictor Perevertkin                 InsertTailList(&calc_jobs, &ccj->list_entry);
3259194ea909SVictor Perevertkin 
3260194ea909SVictor Perevertkin                 buf += rp->extents[i].ed_size;
3261194ea909SVictor Perevertkin                 rp->data = (uint8_t*)rp->data + rp->extents[i].ed_num_bytes - rp->extents[i].off;
3262194ea909SVictor Perevertkin                 rp->read -= (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off);
3263194ea909SVictor Perevertkin             }
3264194ea909SVictor Perevertkin         }
3265194ea909SVictor Perevertkin 
3266194ea909SVictor Perevertkin         le = le->Flink;
3267194ea909SVictor Perevertkin     }
3268194ea909SVictor Perevertkin 
3269c2c66affSColin Finck     if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3270318da0c1SPierre Schweitzer         uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length);
3271c2c66affSColin Finck 
3272c2c66affSColin Finck         RtlZeroMemory(data + bytes_read, read);
3273c2c66affSColin Finck 
3274c2c66affSColin Finck         bytes_read += read;
3275c2c66affSColin Finck         length -= read;
3276c2c66affSColin Finck     }
3277c2c66affSColin Finck 
3278c2c66affSColin Finck     Status = STATUS_SUCCESS;
3279194ea909SVictor Perevertkin 
3280194ea909SVictor Perevertkin     while (!IsListEmpty(&calc_jobs)) {
3281194ea909SVictor Perevertkin         comp_calc_job* ccj = CONTAINING_RECORD(RemoveTailList(&calc_jobs), comp_calc_job, list_entry);
3282194ea909SVictor Perevertkin 
3283194ea909SVictor Perevertkin         calc_thread_main(fcb->Vcb, ccj->cj);
3284194ea909SVictor Perevertkin 
3285194ea909SVictor Perevertkin         KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL);
3286194ea909SVictor Perevertkin 
3287194ea909SVictor Perevertkin         if (!NT_SUCCESS(ccj->cj->Status))
3288194ea909SVictor Perevertkin             Status = ccj->cj->Status;
3289194ea909SVictor Perevertkin 
3290194ea909SVictor Perevertkin         RtlCopyMemory(ccj->data, (uint8_t*)ccj->decomp + ccj->offset, ccj->length);
3291194ea909SVictor Perevertkin         ExFreePool(ccj->decomp);
3292194ea909SVictor Perevertkin 
3293194ea909SVictor Perevertkin         ExFreePool(ccj);
3294194ea909SVictor Perevertkin     }
3295194ea909SVictor Perevertkin 
3296c2c66affSColin Finck     if (pbr)
3297c2c66affSColin Finck         *pbr = bytes_read;
3298c2c66affSColin Finck 
3299c2c66affSColin Finck exit:
3300194ea909SVictor Perevertkin     while (!IsListEmpty(&read_parts)) {
3301194ea909SVictor Perevertkin         read_part* rp = CONTAINING_RECORD(RemoveHeadList(&read_parts), read_part, list_entry);
3302194ea909SVictor Perevertkin 
3303194ea909SVictor Perevertkin         if (rp->buf_free)
3304194ea909SVictor Perevertkin             ExFreePool(rp->buf);
3305194ea909SVictor Perevertkin 
3306194ea909SVictor Perevertkin         if (rp->csum_free)
3307194ea909SVictor Perevertkin             ExFreePool(rp->csum);
3308194ea909SVictor Perevertkin 
3309194ea909SVictor Perevertkin         ExFreePool(rp);
3310194ea909SVictor Perevertkin     }
3311194ea909SVictor Perevertkin 
3312194ea909SVictor Perevertkin     while (!IsListEmpty(&calc_jobs)) {
3313194ea909SVictor Perevertkin         comp_calc_job* ccj = CONTAINING_RECORD(RemoveHeadList(&calc_jobs), comp_calc_job, list_entry);
3314194ea909SVictor Perevertkin 
3315194ea909SVictor Perevertkin         KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL);
3316194ea909SVictor Perevertkin 
3317194ea909SVictor Perevertkin         if (ccj->decomp)
3318194ea909SVictor Perevertkin             ExFreePool(ccj->decomp);
3319194ea909SVictor Perevertkin 
3320194ea909SVictor Perevertkin         ExFreePool(ccj->cj);
3321194ea909SVictor Perevertkin 
3322194ea909SVictor Perevertkin         ExFreePool(ccj);
3323194ea909SVictor Perevertkin     }
3324194ea909SVictor Perevertkin 
3325c2c66affSColin Finck     return Status;
3326c2c66affSColin Finck }
3327c2c66affSColin Finck 
do_read(PIRP Irp,bool wait,ULONG * bytes_read)3328318da0c1SPierre Schweitzer NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) {
3329c2c66affSColin Finck     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3330c2c66affSColin Finck     PFILE_OBJECT FileObject = IrpSp->FileObject;
3331c2c66affSColin Finck     fcb* fcb = FileObject->FsContext;
3332318da0c1SPierre Schweitzer     uint8_t* data = NULL;
3333c2c66affSColin Finck     ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3334318da0c1SPierre Schweitzer     uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3335c2c66affSColin Finck 
3336c2c66affSColin Finck     *bytes_read = 0;
3337c2c66affSColin Finck 
3338c2c66affSColin Finck     if (!fcb || !fcb->Vcb || !fcb->subvol)
3339c2c66affSColin Finck         return STATUS_INTERNAL_ERROR;
3340c2c66affSColin Finck 
334162e630deSPierre Schweitzer     TRACE("fcb = %p\n", fcb);
3342194ea909SVictor Perevertkin     TRACE("offset = %I64x, length = %lx\n", start, length);
3343318da0c1SPierre Schweitzer     TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false");
3344c2c66affSColin Finck 
3345c2c66affSColin Finck     if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3346c2c66affSColin Finck         return STATUS_INVALID_DEVICE_REQUEST;
3347c2c66affSColin Finck 
3348c2c66affSColin Finck     if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3349c2c66affSColin Finck         WARN("tried to read locked region\n");
3350c2c66affSColin Finck         return STATUS_FILE_LOCK_CONFLICT;
3351c2c66affSColin Finck     }
3352c2c66affSColin Finck 
3353c2c66affSColin Finck     if (length == 0) {
3354c2c66affSColin Finck         TRACE("tried to read zero bytes\n");
3355c2c66affSColin Finck         return STATUS_SUCCESS;
3356c2c66affSColin Finck     }
3357c2c66affSColin Finck 
3358318da0c1SPierre Schweitzer     if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) {
3359318da0c1SPierre Schweitzer         TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart);
3360c2c66affSColin Finck         return STATUS_END_OF_FILE;
3361c2c66affSColin Finck     }
3362c2c66affSColin Finck 
3363318da0c1SPierre Schweitzer     TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3364c2c66affSColin Finck 
3365174dfab6SVincent Franchomme     if (!(Irp->Flags & IRP_NOCACHE) && IrpSp->MinorFunction & IRP_MN_MDL) {
3366174dfab6SVincent Franchomme         NTSTATUS Status = STATUS_SUCCESS;
3367174dfab6SVincent Franchomme 
3368174dfab6SVincent Franchomme         _SEH2_TRY {
3369174dfab6SVincent Franchomme             if (!FileObject->PrivateCacheMap) {
3370174dfab6SVincent Franchomme                 CC_FILE_SIZES ccfs;
3371174dfab6SVincent Franchomme 
3372174dfab6SVincent Franchomme                 ccfs.AllocationSize = fcb->Header.AllocationSize;
3373174dfab6SVincent Franchomme                 ccfs.FileSize = fcb->Header.FileSize;
3374174dfab6SVincent Franchomme                 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3375174dfab6SVincent Franchomme 
3376174dfab6SVincent Franchomme                 init_file_cache(FileObject, &ccfs);
3377174dfab6SVincent Franchomme             }
3378174dfab6SVincent Franchomme 
3379174dfab6SVincent Franchomme             CcMdlRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3380174dfab6SVincent Franchomme         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
3381174dfab6SVincent Franchomme         Status = _SEH2_GetExceptionCode();
3382174dfab6SVincent Franchomme     } _SEH2_END;
3383174dfab6SVincent Franchomme 
3384174dfab6SVincent Franchomme         if (NT_SUCCESS(Status)) {
3385174dfab6SVincent Franchomme             Status = Irp->IoStatus.Status;
3386174dfab6SVincent Franchomme             Irp->IoStatus.Information += addon;
3387174dfab6SVincent Franchomme             *bytes_read = (ULONG)Irp->IoStatus.Information;
3388174dfab6SVincent Franchomme         } else
3389174dfab6SVincent Franchomme             ERR("EXCEPTION - %08lx\n", Status);
3390174dfab6SVincent Franchomme 
3391174dfab6SVincent Franchomme         return Status;
3392174dfab6SVincent Franchomme     }
3393174dfab6SVincent Franchomme 
3394c2c66affSColin Finck     data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3395c2c66affSColin Finck 
3396c2c66affSColin Finck     if (Irp->MdlAddress && !data) {
3397c2c66affSColin Finck         ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3398c2c66affSColin Finck         return STATUS_INSUFFICIENT_RESOURCES;
3399c2c66affSColin Finck     }
3400c2c66affSColin Finck 
3401318da0c1SPierre Schweitzer     if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3402318da0c1SPierre Schweitzer         length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3403c2c66affSColin Finck         RtlZeroMemory(data, length);
3404c2c66affSColin Finck         Irp->IoStatus.Information = *bytes_read = length;
3405c2c66affSColin Finck         return STATUS_SUCCESS;
3406c2c66affSColin Finck     }
3407c2c66affSColin Finck 
3408318da0c1SPierre Schweitzer     if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3409318da0c1SPierre Schweitzer         addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3410c2c66affSColin Finck         RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3411c2c66affSColin Finck         length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3412c2c66affSColin Finck     }
3413c2c66affSColin Finck 
3414c2c66affSColin Finck     if (!(Irp->Flags & IRP_NOCACHE)) {
3415c2c66affSColin Finck         NTSTATUS Status = STATUS_SUCCESS;
3416c2c66affSColin Finck 
3417c2c66affSColin Finck         _SEH2_TRY {
3418c2c66affSColin Finck             if (!FileObject->PrivateCacheMap) {
3419c2c66affSColin Finck                 CC_FILE_SIZES ccfs;
3420c2c66affSColin Finck 
3421c2c66affSColin Finck                 ccfs.AllocationSize = fcb->Header.AllocationSize;
3422c2c66affSColin Finck                 ccfs.FileSize = fcb->Header.FileSize;
3423c2c66affSColin Finck                 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3424c2c66affSColin Finck 
3425c2c66affSColin Finck                 init_file_cache(FileObject, &ccfs);
3426c2c66affSColin Finck             }
3427c2c66affSColin Finck 
3428c2c66affSColin Finck             if (fCcCopyReadEx) {
3429194ea909SVictor Perevertkin                 TRACE("CcCopyReadEx(%p, %I64x, %lx, %u, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3430c2c66affSColin Finck                         length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3431194ea909SVictor Perevertkin                 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
3432c2c66affSColin Finck                 if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3433c2c66affSColin Finck                     TRACE("CcCopyReadEx could not wait\n");
3434c2c66affSColin Finck 
3435c2c66affSColin Finck                     IoMarkIrpPending(Irp);
3436c2c66affSColin Finck                     return STATUS_PENDING;
3437c2c66affSColin Finck                 }
3438c2c66affSColin Finck                 TRACE("CcCopyReadEx finished\n");
3439c2c66affSColin Finck             } else {
3440194ea909SVictor Perevertkin                 TRACE("CcCopyRead(%p, %I64x, %lx, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3441194ea909SVictor Perevertkin                 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
3442c2c66affSColin Finck                 if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3443c2c66affSColin Finck                     TRACE("CcCopyRead could not wait\n");
3444c2c66affSColin Finck 
3445c2c66affSColin Finck                     IoMarkIrpPending(Irp);
3446c2c66affSColin Finck                     return STATUS_PENDING;
3447c2c66affSColin Finck                 }
3448c2c66affSColin Finck                 TRACE("CcCopyRead finished\n");
3449c2c66affSColin Finck             }
3450c2c66affSColin Finck         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
3451c2c66affSColin Finck             Status = _SEH2_GetExceptionCode();
3452c2c66affSColin Finck         } _SEH2_END;
3453c2c66affSColin Finck 
3454c2c66affSColin Finck         if (NT_SUCCESS(Status)) {
3455c2c66affSColin Finck             Status = Irp->IoStatus.Status;
3456c2c66affSColin Finck             Irp->IoStatus.Information += addon;
3457c2c66affSColin Finck             *bytes_read = (ULONG)Irp->IoStatus.Information;
3458c2c66affSColin Finck         } else
3459194ea909SVictor Perevertkin             ERR("EXCEPTION - %08lx\n", Status);
3460c2c66affSColin Finck 
3461c2c66affSColin Finck         return Status;
3462c2c66affSColin Finck     } else {
3463c2c66affSColin Finck         NTSTATUS Status;
3464c2c66affSColin Finck 
3465c2c66affSColin Finck         if (!wait) {
3466c2c66affSColin Finck             IoMarkIrpPending(Irp);
3467c2c66affSColin Finck             return STATUS_PENDING;
3468c2c66affSColin Finck         }
3469c2c66affSColin Finck 
3470194ea909SVictor Perevertkin         if (fcb->ads) {
3471c2c66affSColin Finck             Status = read_stream(fcb, data, start, length, bytes_read);
3472194ea909SVictor Perevertkin 
3473194ea909SVictor Perevertkin             if (!NT_SUCCESS(Status))
3474194ea909SVictor Perevertkin                 ERR("read_stream returned %08lx\n", Status);
3475194ea909SVictor Perevertkin         } else {
3476c2c66affSColin Finck             Status = read_file(fcb, data, start, length, bytes_read, Irp);
3477c2c66affSColin Finck 
3478194ea909SVictor Perevertkin             if (!NT_SUCCESS(Status))
3479194ea909SVictor Perevertkin                 ERR("read_file returned %08lx\n", Status);
3480194ea909SVictor Perevertkin         }
3481194ea909SVictor Perevertkin 
3482c2c66affSColin Finck         *bytes_read += addon;
3483194ea909SVictor Perevertkin         TRACE("read %lu bytes\n", *bytes_read);
3484c2c66affSColin Finck 
3485c2c66affSColin Finck         Irp->IoStatus.Information = *bytes_read;
3486c2c66affSColin Finck 
3487c2c66affSColin Finck         if (diskacc && Status != STATUS_PENDING) {
3488c2c66affSColin Finck             PETHREAD thread = NULL;
3489c2c66affSColin Finck 
3490c2c66affSColin Finck             if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3491c2c66affSColin Finck                 thread = Irp->Tail.Overlay.Thread;
3492c2c66affSColin Finck             else if (!IoIsSystemThread(PsGetCurrentThread()))
3493c2c66affSColin Finck                 thread = PsGetCurrentThread();
3494c2c66affSColin Finck             else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
3495c2c66affSColin Finck                 thread = PsGetCurrentThread();
3496c2c66affSColin Finck 
3497c2c66affSColin Finck             if (thread)
3498c2c66affSColin Finck                 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3499c2c66affSColin Finck         }
3500c2c66affSColin Finck 
3501c2c66affSColin Finck         return Status;
3502c2c66affSColin Finck     }
3503c2c66affSColin Finck }
3504c2c66affSColin Finck 
3505c2c66affSColin Finck _Dispatch_type_(IRP_MJ_READ)
_Function_class_(DRIVER_DISPATCH)3506c2c66affSColin Finck _Function_class_(DRIVER_DISPATCH)
3507318da0c1SPierre Schweitzer NTSTATUS __stdcall drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
3508c2c66affSColin Finck     device_extension* Vcb = DeviceObject->DeviceExtension;
3509c2c66affSColin Finck     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3510c2c66affSColin Finck     PFILE_OBJECT FileObject = IrpSp->FileObject;
3511c2c66affSColin Finck     ULONG bytes_read = 0;
3512c2c66affSColin Finck     NTSTATUS Status;
3513318da0c1SPierre Schweitzer     bool top_level;
3514c2c66affSColin Finck     fcb* fcb;
3515c2c66affSColin Finck     ccb* ccb;
3516318da0c1SPierre Schweitzer     bool acquired_fcb_lock = false, wait;
3517c2c66affSColin Finck 
3518c2c66affSColin Finck     FsRtlEnterFileSystem();
3519c2c66affSColin Finck 
3520c2c66affSColin Finck     top_level = is_top_level(Irp);
3521c2c66affSColin Finck 
3522c2c66affSColin Finck     TRACE("read\n");
3523c2c66affSColin Finck 
3524c2c66affSColin Finck     if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
3525c2c66affSColin Finck         Status = vol_read(DeviceObject, Irp);
3526c2c66affSColin Finck         goto exit2;
3527c2c66affSColin Finck     } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
3528c2c66affSColin Finck         Status = STATUS_INVALID_PARAMETER;
3529c2c66affSColin Finck         goto end;
3530c2c66affSColin Finck     }
3531c2c66affSColin Finck 
3532c2c66affSColin Finck     Irp->IoStatus.Information = 0;
3533c2c66affSColin Finck 
3534c2c66affSColin Finck     if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
3535c2c66affSColin Finck         CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
3536c2c66affSColin Finck 
3537c2c66affSColin Finck         Irp->MdlAddress = NULL;
3538c2c66affSColin Finck         Status = STATUS_SUCCESS;
3539c2c66affSColin Finck 
3540c2c66affSColin Finck         goto exit;
3541c2c66affSColin Finck     }
3542c2c66affSColin Finck 
3543c2c66affSColin Finck     fcb = FileObject->FsContext;
3544c2c66affSColin Finck 
3545c2c66affSColin Finck     if (!fcb) {
3546c2c66affSColin Finck         ERR("fcb was NULL\n");
3547c2c66affSColin Finck         Status = STATUS_INVALID_PARAMETER;
3548c2c66affSColin Finck         goto exit;
3549c2c66affSColin Finck     }
3550c2c66affSColin Finck 
3551c2c66affSColin Finck     ccb = FileObject->FsContext2;
3552c2c66affSColin Finck 
3553c2c66affSColin Finck     if (!ccb) {
3554c2c66affSColin Finck         ERR("ccb was NULL\n");
3555c2c66affSColin Finck         Status = STATUS_INVALID_PARAMETER;
3556c2c66affSColin Finck         goto exit;
3557c2c66affSColin Finck     }
3558c2c66affSColin Finck 
3559c2c66affSColin Finck     if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
3560c2c66affSColin Finck         WARN("insufficient privileges\n");
3561c2c66affSColin Finck         Status = STATUS_ACCESS_DENIED;
3562c2c66affSColin Finck         goto exit;
3563c2c66affSColin Finck     }
3564c2c66affSColin Finck 
3565c2c66affSColin Finck     if (fcb == Vcb->volume_fcb) {
3566c2c66affSColin Finck         TRACE("reading volume FCB\n");
3567c2c66affSColin Finck 
3568c2c66affSColin Finck         IoSkipCurrentIrpStackLocation(Irp);
3569c2c66affSColin Finck 
3570c2c66affSColin Finck         Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
3571c2c66affSColin Finck 
3572c2c66affSColin Finck         goto exit2;
3573c2c66affSColin Finck     }
3574c2c66affSColin Finck 
357562e630deSPierre Schweitzer     if (!(Irp->Flags & IRP_PAGING_IO))
357662e630deSPierre Schweitzer         FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL);
357762e630deSPierre Schweitzer 
3578c2c66affSColin Finck     wait = IoIsOperationSynchronous(Irp);
3579c2c66affSColin Finck 
3580c2c66affSColin Finck     // Don't offload jobs when doing paging IO - otherwise this can lead to
3581c2c66affSColin Finck     // deadlocks in CcCopyRead.
3582c2c66affSColin Finck     if (Irp->Flags & IRP_PAGING_IO)
3583318da0c1SPierre Schweitzer         wait = true;
3584c2c66affSColin Finck 
3585883b1f31SPierre Schweitzer     if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer && FileObject->SectionObjectPointer->DataSectionObject) {
35864672b2baSPierre Schweitzer         IO_STATUS_BLOCK iosb;
35874672b2baSPierre Schweitzer 
35884672b2baSPierre Schweitzer         CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb);
35894672b2baSPierre Schweitzer         if (!NT_SUCCESS(iosb.Status)) {
3590194ea909SVictor Perevertkin             ERR("CcFlushCache returned %08lx\n", iosb.Status);
35914672b2baSPierre Schweitzer             return iosb.Status;
35924672b2baSPierre Schweitzer         }
35934672b2baSPierre Schweitzer     }
35944672b2baSPierre Schweitzer 
3595c2c66affSColin Finck     if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
3596c2c66affSColin Finck         if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
3597c2c66affSColin Finck             Status = STATUS_PENDING;
3598c2c66affSColin Finck             IoMarkIrpPending(Irp);
3599c2c66affSColin Finck             goto exit;
3600c2c66affSColin Finck         }
3601c2c66affSColin Finck 
3602318da0c1SPierre Schweitzer         acquired_fcb_lock = true;
3603c2c66affSColin Finck     }
3604c2c66affSColin Finck 
3605c2c66affSColin Finck     Status = do_read(Irp, wait, &bytes_read);
3606c2c66affSColin Finck 
3607318da0c1SPierre Schweitzer     if (acquired_fcb_lock)
3608c2c66affSColin Finck         ExReleaseResourceLite(fcb->Header.Resource);
3609c2c66affSColin Finck 
3610c2c66affSColin Finck exit:
3611c2c66affSColin Finck     if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO))
3612c2c66affSColin Finck         FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0);
3613c2c66affSColin Finck 
3614c2c66affSColin Finck end:
3615c2c66affSColin Finck     Irp->IoStatus.Status = Status;
3616c2c66affSColin Finck 
3617194ea909SVictor Perevertkin     TRACE("Irp->IoStatus.Status = %08lx\n", Irp->IoStatus.Status);
3618194ea909SVictor Perevertkin     TRACE("Irp->IoStatus.Information = %Iu\n", Irp->IoStatus.Information);
3619194ea909SVictor Perevertkin     TRACE("returning %08lx\n", Status);
3620c2c66affSColin Finck 
3621c2c66affSColin Finck     if (Status != STATUS_PENDING)
3622c2c66affSColin Finck         IoCompleteRequest(Irp, IO_NO_INCREMENT);
3623c2c66affSColin Finck     else {
3624c2c66affSColin Finck         if (!add_thread_job(Vcb, Irp))
3625318da0c1SPierre Schweitzer             Status = do_read_job(Irp);
3626c2c66affSColin Finck     }
3627c2c66affSColin Finck 
3628c2c66affSColin Finck exit2:
3629c2c66affSColin Finck     if (top_level)
3630c2c66affSColin Finck         IoSetTopLevelIrp(NULL);
3631c2c66affSColin Finck 
3632c2c66affSColin Finck     FsRtlExitFileSystem();
3633c2c66affSColin Finck 
3634c2c66affSColin Finck     return Status;
3635c2c66affSColin Finck }
3636