xref: /reactos/drivers/filesystems/btrfs/read.c (revision bd712186)
1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
20 enum read_data_status {
21     ReadDataStatus_Pending,
22     ReadDataStatus_Success,
23     ReadDataStatus_Error,
24     ReadDataStatus_MissingDevice,
25     ReadDataStatus_Skip
26 };
27 
28 struct read_data_context;
29 
30 typedef struct {
31     struct read_data_context* context;
32     uint16_t stripenum;
33     bool rewrite;
34     PIRP Irp;
35     IO_STATUS_BLOCK iosb;
36     enum read_data_status status;
37     PMDL mdl;
38     uint64_t stripestart;
39     uint64_t stripeend;
40 } read_data_stripe;
41 
42 typedef struct {
43     KEVENT Event;
44     NTSTATUS Status;
45     chunk* c;
46     uint64_t address;
47     uint32_t buflen;
48     LONG num_stripes, stripes_left;
49     uint64_t type;
50     uint32_t sector_size;
51     uint16_t firstoff, startoffstripe, sectors_per_stripe;
52     uint32_t* csum;
53     bool tree;
54     read_data_stripe* stripes;
55     uint8_t* va;
56 } read_data_context;
57 
58 extern bool diskacc;
59 extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
60 extern tCcCopyReadEx fCcCopyReadEx;
61 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
62 
63 #define LZO_PAGE_SIZE 4096
64 
65 _Function_class_(IO_COMPLETION_ROUTINE)
66 static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
67     read_data_stripe* stripe = conptr;
68     read_data_context* context = (read_data_context*)stripe->context;
69 
70     UNUSED(DeviceObject);
71 
72     stripe->iosb = Irp->IoStatus;
73 
74     if (NT_SUCCESS(Irp->IoStatus.Status))
75         stripe->status = ReadDataStatus_Success;
76     else
77         stripe->status = ReadDataStatus_Error;
78 
79     if (InterlockedDecrement(&context->stripes_left) == 0)
80         KeSetEvent(&context->Event, 0, false);
81 
82     return STATUS_MORE_PROCESSING_REQUIRED;
83 }
84 
85 NTSTATUS check_csum(device_extension* Vcb, uint8_t* data, uint32_t sectors, uint32_t* csum) {
86     NTSTATUS Status;
87     calc_job* cj;
88     uint32_t* csum2;
89 
90     // From experimenting, it seems that 40 sectors is roughly the crossover
91     // point where offloading the crc32 calculation becomes worth it.
92 
93     if (sectors < 40 || get_num_of_processors() < 2) {
94         ULONG j;
95 
96         for (j = 0; j < sectors; j++) {
97             uint32_t crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
98 
99             if (crc32 != csum[j]) {
100                 return STATUS_CRC_ERROR;
101             }
102         }
103 
104         return STATUS_SUCCESS;
105     }
106 
107     csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(uint32_t) * sectors, ALLOC_TAG);
108     if (!csum2) {
109         ERR("out of memory\n");
110         return STATUS_INSUFFICIENT_RESOURCES;
111     }
112 
113     Status = add_calc_job(Vcb, data, sectors, csum2, &cj);
114     if (!NT_SUCCESS(Status)) {
115         ERR("add_calc_job returned %08x\n", Status);
116         ExFreePool(csum2);
117         return Status;
118     }
119 
120     KeWaitForSingleObject(&cj->event, Executive, KernelMode, false, NULL);
121 
122     if (RtlCompareMemory(csum2, csum, sectors * sizeof(uint32_t)) != sectors * sizeof(uint32_t)) {
123         free_calc_job(cj);
124         ExFreePool(csum2);
125         return STATUS_CRC_ERROR;
126     }
127 
128     free_calc_job(cj);
129     ExFreePool(csum2);
130 
131     return STATUS_SUCCESS;
132 }
133 
134 static NTSTATUS read_data_dup(device_extension* Vcb, uint8_t* buf, uint64_t addr, read_data_context* context, CHUNK_ITEM* ci,
135                               device** devices, uint64_t generation) {
136     ULONG i;
137     bool checksum_error = false;
138     uint16_t j, stripe = 0;
139     NTSTATUS Status;
140     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
141 
142     for (j = 0; j < ci->num_stripes; j++) {
143         if (context->stripes[j].status == ReadDataStatus_Error) {
144             WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
145             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
146             return context->stripes[j].iosb.Status;
147         } else if (context->stripes[j].status == ReadDataStatus_Success) {
148             stripe = j;
149             break;
150         }
151     }
152 
153     if (context->stripes[stripe].status != ReadDataStatus_Success)
154         return STATUS_INTERNAL_ERROR;
155 
156     if (context->tree) {
157         tree_header* th = (tree_header*)buf;
158         uint32_t crc32;
159 
160         crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, context->buflen - sizeof(th->csum));
161 
162         if (th->address != context->address || crc32 != *((uint32_t*)th->csum)) {
163             checksum_error = true;
164             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
165         } else if (generation != 0 && th->generation != generation) {
166             checksum_error = true;
167             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
168         }
169     } else if (context->csum) {
170         Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
171 
172         if (Status == STATUS_CRC_ERROR) {
173             checksum_error = true;
174             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
175         } else if (!NT_SUCCESS(Status)) {
176             ERR("check_csum returned %08x\n", Status);
177             return Status;
178         }
179     }
180 
181     if (!checksum_error)
182         return STATUS_SUCCESS;
183 
184     if (ci->num_stripes == 1)
185         return STATUS_CRC_ERROR;
186 
187     if (context->tree) {
188         tree_header* t2;
189         bool recovered = false;
190 
191         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
192         if (!t2) {
193             ERR("out of memory\n");
194             return STATUS_INSUFFICIENT_RESOURCES;
195         }
196 
197         for (j = 0; j < ci->num_stripes; j++) {
198             if (j != stripe && devices[j] && devices[j]->devobj) {
199                 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart,
200                                         Vcb->superblock.node_size, (uint8_t*)t2, false);
201                 if (!NT_SUCCESS(Status)) {
202                     WARN("sync_read_phys returned %08x\n", Status);
203                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
204                 } else {
205                     uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
206 
207                     if (t2->address == addr && crc32 == *((uint32_t*)t2->csum) && (generation == 0 || t2->generation == generation)) {
208                         RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
209                         ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
210                         recovered = true;
211 
212                         if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
213                             Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart,
214                                                      t2, Vcb->superblock.node_size);
215                             if (!NT_SUCCESS(Status)) {
216                                 WARN("write_data_phys returned %08x\n", Status);
217                                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
218                             }
219                         }
220 
221                         break;
222                     } else if (t2->address != addr || crc32 != *((uint32_t*)t2->csum))
223                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
224                     else
225                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS);
226                 }
227             }
228         }
229 
230         if (!recovered) {
231             ERR("unrecoverable checksum error at %I64x\n", addr);
232             ExFreePool(t2);
233             return STATUS_CRC_ERROR;
234         }
235 
236         ExFreePool(t2);
237     } else {
238         ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size;
239         uint8_t* sector;
240 
241         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
242         if (!sector) {
243             ERR("out of memory\n");
244             return STATUS_INSUFFICIENT_RESOURCES;
245         }
246 
247         for (i = 0; i < sectors; i++) {
248             uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
249 
250             if (context->csum[i] != crc32) {
251                 bool recovered = false;
252 
253                 for (j = 0; j < ci->num_stripes; j++) {
254                     if (j != stripe && devices[j] && devices[j]->devobj) {
255                         Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj,
256                                                 cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
257                                                 Vcb->superblock.sector_size, sector, false);
258                         if (!NT_SUCCESS(Status)) {
259                             WARN("sync_read_phys returned %08x\n", Status);
260                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
261                         } else {
262                             uint32_t crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
263 
264                             if (crc32b == context->csum[i]) {
265                                 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
266                                 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
267                                 recovered = true;
268 
269                                 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
270                                     Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj,
271                                                              cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
272                                                              sector, Vcb->superblock.sector_size);
273                                     if (!NT_SUCCESS(Status)) {
274                                         WARN("write_data_phys returned %08x\n", Status);
275                                         log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
276                                     }
277                                 }
278 
279                                 break;
280                             } else
281                                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
282                         }
283                     }
284                 }
285 
286                 if (!recovered) {
287                     ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
288                     ExFreePool(sector);
289                     return STATUS_CRC_ERROR;
290                 }
291             }
292         }
293 
294         ExFreePool(sector);
295     }
296 
297     return STATUS_SUCCESS;
298 }
299 
300 static NTSTATUS read_data_raid0(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
301                                 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
302     uint64_t i;
303 
304     for (i = 0; i < ci->num_stripes; i++) {
305         if (context->stripes[i].status == ReadDataStatus_Error) {
306             WARN("stripe %I64u returned error %08x\n", i, context->stripes[i].iosb.Status);
307             log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS);
308             return context->stripes[i].iosb.Status;
309         }
310     }
311 
312     if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
313         tree_header* th = (tree_header*)buf;
314         uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
315 
316         if (crc32 != *((uint32_t*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) {
317             uint64_t off;
318             uint16_t stripe;
319 
320             get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe);
321 
322             ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
323 
324             if (crc32 != *((uint32_t*)th->csum)) {
325                 WARN("crc32 was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
326                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
327                 return STATUS_CRC_ERROR;
328             } else if (addr != th->address) {
329                 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
330                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
331                 return STATUS_CRC_ERROR;
332             } else if (generation != 0 && generation != th->generation) {
333                 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
334                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
335                 return STATUS_CRC_ERROR;
336             }
337         }
338     } else if (context->csum) {
339         NTSTATUS Status;
340 
341         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
342 
343         if (Status == STATUS_CRC_ERROR) {
344             for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
345                 uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
346 
347                 if (context->csum[i] != crc32) {
348                     uint64_t off;
349                     uint16_t stripe;
350 
351                     get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe);
352 
353                     ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
354 
355                     log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
356 
357                     return Status;
358                 }
359             }
360 
361             return Status;
362         } else if (!NT_SUCCESS(Status)) {
363             ERR("check_csum returned %08x\n", Status);
364             return Status;
365         }
366     }
367 
368     return STATUS_SUCCESS;
369 }
370 
371 static NTSTATUS read_data_raid10(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
372                                  CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
373     uint64_t i;
374     uint16_t j, stripe;
375     NTSTATUS Status;
376     bool checksum_error = false;
377     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
378 
379     for (j = 0; j < ci->num_stripes; j++) {
380         if (context->stripes[j].status == ReadDataStatus_Error) {
381             WARN("stripe %I64u returned error %08x\n", j, context->stripes[j].iosb.Status);
382             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
383             return context->stripes[j].iosb.Status;
384         } else if (context->stripes[j].status == ReadDataStatus_Success)
385             stripe = j;
386     }
387 
388     if (context->tree) {
389         tree_header* th = (tree_header*)buf;
390         uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
391 
392         if (crc32 != *((uint32_t*)th->csum)) {
393             WARN("crc32 was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
394             checksum_error = true;
395             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
396         } else if (addr != th->address) {
397             WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
398             checksum_error = true;
399             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
400         } else if (generation != 0 && generation != th->generation) {
401             WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
402             checksum_error = true;
403             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
404         }
405     } else if (context->csum) {
406         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
407 
408         if (Status == STATUS_CRC_ERROR)
409             checksum_error = true;
410         else if (!NT_SUCCESS(Status)) {
411             ERR("check_csum returned %08x\n", Status);
412             return Status;
413         }
414     }
415 
416     if (!checksum_error)
417         return STATUS_SUCCESS;
418 
419     if (context->tree) {
420         tree_header* t2;
421         uint64_t off;
422         uint16_t badsubstripe = 0;
423         bool recovered = false;
424 
425         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
426         if (!t2) {
427             ERR("out of memory\n");
428             return STATUS_INSUFFICIENT_RESOURCES;
429         }
430 
431         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe);
432 
433         stripe *= ci->sub_stripes;
434 
435         for (j = 0; j < ci->sub_stripes; j++) {
436             if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
437                 badsubstripe = j;
438                 break;
439             }
440         }
441 
442         for (j = 0; j < ci->sub_stripes; j++) {
443             if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
444                 Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off,
445                                         Vcb->superblock.node_size, (uint8_t*)t2, false);
446                 if (!NT_SUCCESS(Status)) {
447                     WARN("sync_read_phys returned %08x\n", Status);
448                     log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS);
449                 } else {
450                     uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
451 
452                     if (t2->address == addr && crc32 == *((uint32_t*)t2->csum) && (generation == 0 || t2->generation == generation)) {
453                         RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
454                         ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id);
455                         recovered = true;
456 
457                         if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
458                             Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj,
459                                                      cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size);
460                             if (!NT_SUCCESS(Status)) {
461                                 WARN("write_data_phys returned %08x\n", Status);
462                                 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
463                             }
464                         }
465 
466                         break;
467                     } else if (t2->address != addr || crc32 != *((uint32_t*)t2->csum))
468                         log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
469                     else
470                         log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS);
471                 }
472             }
473         }
474 
475         if (!recovered) {
476             ERR("unrecoverable checksum error at %I64x\n", addr);
477             ExFreePool(t2);
478             return STATUS_CRC_ERROR;
479         }
480 
481         ExFreePool(t2);
482     } else {
483         ULONG sectors = length / Vcb->superblock.sector_size;
484         uint8_t* sector;
485 
486         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
487         if (!sector) {
488             ERR("out of memory\n");
489             return STATUS_INSUFFICIENT_RESOURCES;
490         }
491 
492         for (i = 0; i < sectors; i++) {
493             uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
494 
495             if (context->csum[i] != crc32) {
496                 uint64_t off;
497                 uint16_t stripe2, badsubstripe = 0;
498                 bool recovered = false;
499 
500                 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
501                                  ci->num_stripes / ci->sub_stripes, &off, &stripe2);
502 
503                 stripe2 *= ci->sub_stripes;
504 
505                 for (j = 0; j < ci->sub_stripes; j++) {
506                     if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
507                         badsubstripe = j;
508                         break;
509                     }
510                 }
511 
512                 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
513 
514                 for (j = 0; j < ci->sub_stripes; j++) {
515                     if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
516                         Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off,
517                                                 Vcb->superblock.sector_size, sector, false);
518                         if (!NT_SUCCESS(Status)) {
519                             WARN("sync_read_phys returned %08x\n", Status);
520                             log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS);
521                         } else {
522                             uint32_t crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
523 
524                             if (crc32b == context->csum[i]) {
525                                 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
526                                 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id);
527                                 recovered = true;
528 
529                                 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
530                                     Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj,
531                                                              cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size);
532                                     if (!NT_SUCCESS(Status)) {
533                                         WARN("write_data_phys returned %08x\n", Status);
534                                         log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
535                                     }
536                                 }
537 
538                                 break;
539                             } else
540                                 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
541                         }
542                     }
543                 }
544 
545                 if (!recovered) {
546                     ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
547                     ExFreePool(sector);
548                     return STATUS_CRC_ERROR;
549                 }
550             }
551         }
552 
553         ExFreePool(sector);
554     }
555 
556     return STATUS_SUCCESS;
557 }
558 
559 static NTSTATUS read_data_raid5(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
560                                 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
561     ULONG i;
562     NTSTATUS Status;
563     bool checksum_error = false;
564     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
565     uint16_t j, stripe;
566     bool no_success = true;
567 
568     for (j = 0; j < ci->num_stripes; j++) {
569         if (context->stripes[j].status == ReadDataStatus_Error) {
570             WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
571             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
572             return context->stripes[j].iosb.Status;
573         } else if (context->stripes[j].status == ReadDataStatus_Success) {
574             stripe = j;
575             no_success = false;
576         }
577     }
578 
579     if (c) {    // check partial stripes
580         LIST_ENTRY* le;
581         uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length;
582 
583         ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
584 
585         le = c->partial_stripes.Flink;
586         while (le != &c->partial_stripes) {
587             partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
588 
589             if (ps->address + ps_length > addr && ps->address < addr + length) {
590                 ULONG runlength, index;
591 
592                 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
593 
594                 while (runlength != 0) {
595 #ifdef __REACTOS__
596                     uint64_t runstart, runend, start, end;
597 #endif
598                     if (index >= ps->bmplen)
599                         break;
600 
601                     if (index + runlength >= ps->bmplen) {
602                         runlength = ps->bmplen - index;
603 
604                         if (runlength == 0)
605                             break;
606                     }
607 
608 #ifndef __REACTOS__
609                     uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size);
610                     uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size);
611                     uint64_t start = max(runstart, addr);
612                     uint64_t end = min(runend, addr + length);
613 #else
614                     runstart = ps->address + (index * Vcb->superblock.sector_size);
615                     runend = runstart + (runlength * Vcb->superblock.sector_size);
616                     start = max(runstart, addr);
617                     end = min(runend, addr + length);
618 #endif
619 
620                     if (end > start)
621                         RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
622 
623                     runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
624                 }
625             } else if (ps->address >= addr + length)
626                 break;
627 
628             le = le->Flink;
629         }
630 
631         ExReleaseResourceLite(&c->partial_stripes_lock);
632     }
633 
634     if (context->tree) {
635         tree_header* th = (tree_header*)buf;
636         uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
637 
638         if (addr != th->address || crc32 != *((uint32_t*)th->csum)) {
639             checksum_error = true;
640             if (!no_success && !degraded)
641                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
642         } else if (generation != 0 && generation != th->generation) {
643             checksum_error = true;
644             if (!no_success && !degraded)
645                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
646         }
647     } else if (context->csum) {
648         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
649 
650         if (Status == STATUS_CRC_ERROR) {
651             if (!degraded)
652                 WARN("checksum error\n");
653             checksum_error = true;
654         } else if (!NT_SUCCESS(Status)) {
655             ERR("check_csum returned %08x\n", Status);
656             return Status;
657         }
658     } else if (degraded)
659         checksum_error = true;
660 
661     if (!checksum_error)
662         return STATUS_SUCCESS;
663 
664     if (context->tree) {
665         uint16_t parity;
666         uint64_t off;
667         bool recovered = false, first = true, failed = false;
668         uint8_t* t2;
669 
670         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
671         if (!t2) {
672             ERR("out of memory\n");
673             return STATUS_INSUFFICIENT_RESOURCES;
674         }
675 
676         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe);
677 
678         parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
679 
680         stripe = (parity + stripe + 1) % ci->num_stripes;
681 
682         for (j = 0; j < ci->num_stripes; j++) {
683             if (j != stripe) {
684                 if (devices[j] && devices[j]->devobj) {
685                     if (first) {
686                         Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false);
687                         if (!NT_SUCCESS(Status)) {
688                             ERR("sync_read_phys returned %08x\n", Status);
689                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
690                             failed = true;
691                             break;
692                         }
693 
694                         first = false;
695                     } else {
696                         Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false);
697                         if (!NT_SUCCESS(Status)) {
698                             ERR("sync_read_phys returned %08x\n", Status);
699                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
700                             failed = true;
701                             break;
702                         }
703 
704                         do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
705                     }
706                 } else {
707                     failed = true;
708                     break;
709                 }
710             }
711         }
712 
713         if (!failed) {
714             tree_header* t3 = (tree_header*)t2;
715             uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum));
716 
717             if (t3->address == addr && crc32 == *((uint32_t*)t3->csum) && (generation == 0 || t3->generation == generation)) {
718                 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
719 
720                 if (!degraded)
721                     ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
722 
723                 recovered = true;
724 
725                 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
726                     Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
727                     if (!NT_SUCCESS(Status)) {
728                         WARN("write_data_phys returned %08x\n", Status);
729                         log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
730                     }
731                 }
732             }
733         }
734 
735         if (!recovered) {
736             ERR("unrecoverable checksum error at %I64x\n", addr);
737             ExFreePool(t2);
738             return STATUS_CRC_ERROR;
739         }
740 
741         ExFreePool(t2);
742     } else {
743         ULONG sectors = length / Vcb->superblock.sector_size;
744         uint8_t* sector;
745 
746         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
747         if (!sector) {
748             ERR("out of memory\n");
749             return STATUS_INSUFFICIENT_RESOURCES;
750         }
751 
752         for (i = 0; i < sectors; i++) {
753             uint16_t parity;
754             uint64_t off;
755             uint32_t crc32;
756 
757             if (context->csum)
758                 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
759 
760             get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
761                              ci->num_stripes - 1, &off, &stripe);
762 
763             parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
764 
765             stripe = (parity + stripe + 1) % ci->num_stripes;
766 
767             if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) {
768                 bool recovered = false, first = true, failed = false;
769 
770                 if (devices[stripe] && devices[stripe]->devobj)
771                     log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS);
772 
773                 for (j = 0; j < ci->num_stripes; j++) {
774                     if (j != stripe) {
775                         if (devices[j] && devices[j]->devobj) {
776                             if (first) {
777                                 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false);
778                                 if (!NT_SUCCESS(Status)) {
779                                     ERR("sync_read_phys returned %08x\n", Status);
780                                     failed = true;
781                                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
782                                     break;
783                                 }
784 
785                                 first = false;
786                             } else {
787                                 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
788                                                         sector + Vcb->superblock.sector_size, false);
789                                 if (!NT_SUCCESS(Status)) {
790                                     ERR("sync_read_phys returned %08x\n", Status);
791                                     failed = true;
792                                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
793                                     break;
794                                 }
795 
796                                 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
797                             }
798                         } else {
799                             failed = true;
800                             break;
801                         }
802                     }
803                 }
804 
805                 if (!failed) {
806                     if (context->csum)
807                         crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
808 
809                     if (!context->csum || crc32 == context->csum[i]) {
810                         RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
811 
812                         if (!degraded)
813                             ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
814 
815                         recovered = true;
816 
817                         if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
818                             Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off,
819                                                      sector, Vcb->superblock.sector_size);
820                             if (!NT_SUCCESS(Status)) {
821                                 WARN("write_data_phys returned %08x\n", Status);
822                                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
823                             }
824                         }
825                     }
826                 }
827 
828                 if (!recovered) {
829                     ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
830                     ExFreePool(sector);
831                     return STATUS_CRC_ERROR;
832                 }
833             }
834         }
835 
836         ExFreePool(sector);
837     }
838 
839     return STATUS_SUCCESS;
840 }
841 
842 void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) {
843     if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
844         uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
845         uint16_t stripe;
846 
847         stripe = num_stripes - 3;
848 
849         if (stripe == missing)
850             RtlZeroMemory(out, sector_size);
851         else
852             RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size);
853 
854         do {
855             stripe--;
856 
857             galois_double(out, sector_size);
858 
859             if (stripe != missing)
860                 do_xor(out, sectors + (stripe * sector_size), sector_size);
861         } while (stripe > 0);
862 
863         do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
864 
865         if (missing != 0)
866             galois_divpower(out, (uint8_t)missing, sector_size);
867     } else { // reconstruct from p and q
868         uint16_t x, y, stripe;
869         uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
870         uint32_t j;
871 
872         stripe = num_stripes - 3;
873 
874         pxy = out + sector_size;
875         qxy = out;
876 
877         if (stripe == missing1 || stripe == missing2) {
878             RtlZeroMemory(qxy, sector_size);
879             RtlZeroMemory(pxy, sector_size);
880 
881             if (stripe == missing1)
882                 x = stripe;
883             else
884                 y = stripe;
885         } else {
886             RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size);
887             RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size);
888         }
889 
890         do {
891             stripe--;
892 
893             galois_double(qxy, sector_size);
894 
895             if (stripe != missing1 && stripe != missing2) {
896                 do_xor(qxy, sectors + (stripe * sector_size), sector_size);
897                 do_xor(pxy, sectors + (stripe * sector_size), sector_size);
898             } else if (stripe == missing1)
899                 x = stripe;
900             else if (stripe == missing2)
901                 y = stripe;
902         } while (stripe > 0);
903 
904         gyx = gpow2(y > x ? (y-x) : (255-x+y));
905         gx = gpow2(255-x);
906 
907         denom = gdiv(1, gyx ^ 1);
908         a = gmul(gyx, denom);
909         b = gmul(gx, denom);
910 
911         p = sectors + ((num_stripes - 2) * sector_size);
912         q = sectors + ((num_stripes - 1) * sector_size);
913 
914         for (j = 0; j < sector_size; j++) {
915             *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
916 
917             p++;
918             q++;
919             pxy++;
920             qxy++;
921         }
922 
923         do_xor(out + sector_size, out, sector_size);
924         do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
925     }
926 }
927 
928 static NTSTATUS read_data_raid6(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
929                                 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
930     NTSTATUS Status;
931     ULONG i;
932     bool checksum_error = false;
933     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
934     uint16_t stripe, j;
935     bool no_success = true;
936 
937     for (j = 0; j < ci->num_stripes; j++) {
938         if (context->stripes[j].status == ReadDataStatus_Error) {
939             WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
940 
941             if (devices[j])
942                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
943             return context->stripes[j].iosb.Status;
944         } else if (context->stripes[j].status == ReadDataStatus_Success) {
945             stripe = j;
946             no_success = false;
947         }
948     }
949 
950     if (c) {    // check partial stripes
951         LIST_ENTRY* le;
952         uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length;
953 
954         ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
955 
956         le = c->partial_stripes.Flink;
957         while (le != &c->partial_stripes) {
958             partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
959 
960             if (ps->address + ps_length > addr && ps->address < addr + length) {
961                 ULONG runlength, index;
962 
963                 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
964 
965                 while (runlength != 0) {
966 #ifdef __REACTOS__
967                     uint64_t runstart, runend, start, end;
968 #endif
969                     if (index >= ps->bmplen)
970                         break;
971 
972                     if (index + runlength >= ps->bmplen) {
973                         runlength = ps->bmplen - index;
974 
975                         if (runlength == 0)
976                             break;
977                     }
978 
979 #ifndef __REACTOS__
980                     uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size);
981                     uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size);
982                     uint64_t start = max(runstart, addr);
983                     uint64_t end = min(runend, addr + length);
984 #else
985                     runstart = ps->address + (index * Vcb->superblock.sector_size);
986                     runend = runstart + (runlength * Vcb->superblock.sector_size);
987                     start = max(runstart, addr);
988                     end = min(runend, addr + length);
989 #endif
990 
991                     if (end > start)
992                         RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
993 
994                     runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
995                 }
996             } else if (ps->address >= addr + length)
997                 break;
998 
999             le = le->Flink;
1000         }
1001 
1002         ExReleaseResourceLite(&c->partial_stripes_lock);
1003     }
1004 
1005     if (context->tree) {
1006         tree_header* th = (tree_header*)buf;
1007         uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1008 
1009         if (addr != th->address || crc32 != *((uint32_t*)th->csum)) {
1010             checksum_error = true;
1011             if (!no_success && !degraded && devices[stripe])
1012                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1013         } else if (generation != 0 && generation != th->generation) {
1014             checksum_error = true;
1015             if (!no_success && !degraded && devices[stripe])
1016                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
1017         }
1018     } else if (context->csum) {
1019         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
1020 
1021         if (Status == STATUS_CRC_ERROR) {
1022             if (!degraded)
1023                 WARN("checksum error\n");
1024             checksum_error = true;
1025         } else if (!NT_SUCCESS(Status)) {
1026             ERR("check_csum returned %08x\n", Status);
1027             return Status;
1028         }
1029     } else if (degraded)
1030         checksum_error = true;
1031 
1032     if (!checksum_error)
1033         return STATUS_SUCCESS;
1034 
1035     if (context->tree) {
1036         uint8_t* sector;
1037         uint16_t k, physstripe, parity1, parity2, error_stripe;
1038         uint64_t off;
1039         bool recovered = false, failed = false;
1040         ULONG num_errors = 0;
1041 
1042         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1043         if (!sector) {
1044             ERR("out of memory\n");
1045             return STATUS_INSUFFICIENT_RESOURCES;
1046         }
1047 
1048         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe);
1049 
1050         parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1051         parity2 = (parity1 + 1) % ci->num_stripes;
1052 
1053         physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1054 
1055         j = (parity2 + 1) % ci->num_stripes;
1056 
1057         for (k = 0; k < ci->num_stripes - 1; k++) {
1058             if (j != physstripe) {
1059                 if (devices[j] && devices[j]->devobj) {
1060                     Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size,
1061                                             sector + (k * Vcb->superblock.node_size), false);
1062                     if (!NT_SUCCESS(Status)) {
1063                         ERR("sync_read_phys returned %08x\n", Status);
1064                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1065                         num_errors++;
1066                         error_stripe = k;
1067 
1068                         if (num_errors > 1) {
1069                             failed = true;
1070                             break;
1071                         }
1072                     }
1073                 } else {
1074                     num_errors++;
1075                     error_stripe = k;
1076 
1077                     if (num_errors > 1) {
1078                         failed = true;
1079                         break;
1080                     }
1081                 }
1082             }
1083 
1084             j = (j + 1) % ci->num_stripes;
1085         }
1086 
1087         if (!failed) {
1088             if (num_errors == 0) {
1089                 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1090                 uint32_t crc32;
1091 
1092                 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1093                               Vcb->superblock.node_size);
1094 
1095                 for (j = 0; j < ci->num_stripes - 2; j++) {
1096                     if (j != stripe)
1097                         do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1098                 }
1099 
1100                 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1101 
1102                 if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation)) {
1103                     RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1104 
1105                     if (devices[physstripe] && devices[physstripe]->devobj)
1106                         ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1107 
1108                     recovered = true;
1109 
1110                     if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1111                         Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1112                                                  sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1113                         if (!NT_SUCCESS(Status)) {
1114                             WARN("write_data_phys returned %08x\n", Status);
1115                             log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1116                         }
1117                     }
1118                 }
1119             }
1120 
1121             if (!recovered) {
1122                 uint32_t crc32;
1123                 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1124                 bool read_q = false;
1125 
1126                 if (devices[parity2] && devices[parity2]->devobj) {
1127                     Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1128                                             Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false);
1129                     if (!NT_SUCCESS(Status)) {
1130                         ERR("sync_read_phys returned %08x\n", Status);
1131                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1132                     } else
1133                         read_q = true;
1134                 }
1135 
1136                 if (read_q) {
1137                     if (num_errors == 1) {
1138                         raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1139 
1140                         crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1141 
1142                         if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation))
1143                             recovered = true;
1144                     } else {
1145                         for (j = 0; j < ci->num_stripes - 1; j++) {
1146                             if (j != stripe) {
1147                                 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1148 
1149                                 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1150 
1151                                 if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation)) {
1152                                     recovered = true;
1153                                     error_stripe = j;
1154                                     break;
1155                                 }
1156                             }
1157                         }
1158                     }
1159                 }
1160 
1161                 if (recovered) {
1162                     uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1163 
1164                     if (devices[physstripe] && devices[physstripe]->devobj)
1165                         ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1166 
1167                     RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1168 
1169                     if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1170                         Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1171                                                  sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1172                         if (!NT_SUCCESS(Status)) {
1173                             WARN("write_data_phys returned %08x\n", Status);
1174                             log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1175                         }
1176                     }
1177 
1178                     if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1179                         if (error_stripe == ci->num_stripes - 2) {
1180                             ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1181 
1182                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1183 
1184                             RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1185 
1186                             for (j = 0; j < ci->num_stripes - 2; j++) {
1187                                 if (j == stripe) {
1188                                     do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1189                                            Vcb->superblock.node_size);
1190                                 } else {
1191                                     do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1192                                             Vcb->superblock.node_size);
1193                                 }
1194                             }
1195                         } else {
1196                             ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1197                                 devices[error_stripe_phys]->devitem.dev_id);
1198 
1199                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1200 
1201                             RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1202                                           sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1203                         }
1204                     }
1205 
1206                     if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1207                         Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1208                                                  sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1209                         if (!NT_SUCCESS(Status)) {
1210                             WARN("write_data_phys returned %08x\n", Status);
1211                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1212                         }
1213                     }
1214                 }
1215             }
1216         }
1217 
1218         if (!recovered) {
1219             ERR("unrecoverable checksum error at %I64x\n", addr);
1220             ExFreePool(sector);
1221             return STATUS_CRC_ERROR;
1222         }
1223 
1224         ExFreePool(sector);
1225     } else {
1226         ULONG sectors = length / Vcb->superblock.sector_size;
1227         uint8_t* sector;
1228 
1229         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG);
1230         if (!sector) {
1231             ERR("out of memory\n");
1232             return STATUS_INSUFFICIENT_RESOURCES;
1233         }
1234 
1235         for (i = 0; i < sectors; i++) {
1236             uint64_t off;
1237             uint16_t physstripe, parity1, parity2;
1238             uint32_t crc32;
1239 
1240             if (context->csum)
1241                 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1242 
1243             get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
1244                              ci->num_stripes - 2, &off, &stripe);
1245 
1246             parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1247             parity2 = (parity1 + 1) % ci->num_stripes;
1248 
1249             physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1250 
1251             if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) {
1252                 uint16_t k, error_stripe;
1253                 bool recovered = false, failed = false;
1254                 ULONG num_errors = 0;
1255 
1256                 if (devices[physstripe] && devices[physstripe]->devobj)
1257                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS);
1258 
1259                 j = (parity2 + 1) % ci->num_stripes;
1260 
1261                 for (k = 0; k < ci->num_stripes - 1; k++) {
1262                     if (j != physstripe) {
1263                         if (devices[j] && devices[j]->devobj) {
1264                             Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
1265                                                     sector + (k * Vcb->superblock.sector_size), false);
1266                             if (!NT_SUCCESS(Status)) {
1267                                 ERR("sync_read_phys returned %08x\n", Status);
1268                                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1269                                 num_errors++;
1270                                 error_stripe = k;
1271 
1272                                 if (num_errors > 1) {
1273                                     failed = true;
1274                                     break;
1275                                 }
1276                             }
1277                         } else {
1278                             num_errors++;
1279                             error_stripe = k;
1280 
1281                             if (num_errors > 1) {
1282                                 failed = true;
1283                                 break;
1284                             }
1285                         }
1286                     }
1287 
1288                     j = (j + 1) % ci->num_stripes;
1289                 }
1290 
1291                 if (!failed) {
1292                     if (num_errors == 0) {
1293                         RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1294 
1295                         for (j = 0; j < ci->num_stripes - 2; j++) {
1296                             if (j != stripe)
1297                                 do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1298                         }
1299 
1300                         if (context->csum)
1301                             crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1302 
1303                         if (!context->csum || crc32 == context->csum[i]) {
1304                             RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1305 
1306                             if (devices[physstripe] && devices[physstripe]->devobj)
1307                                 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1308                                     devices[physstripe]->devitem.dev_id);
1309 
1310                             recovered = true;
1311 
1312                             if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1313                                 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1314                                                          sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1315                                 if (!NT_SUCCESS(Status)) {
1316                                     WARN("write_data_phys returned %08x\n", Status);
1317                                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1318                                 }
1319                             }
1320                         }
1321                     }
1322 
1323                     if (!recovered) {
1324                         bool read_q = false;
1325 
1326                         if (devices[parity2] && devices[parity2]->devobj) {
1327                             Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1328                                                     Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), false);
1329                             if (!NT_SUCCESS(Status)) {
1330                                 ERR("sync_read_phys returned %08x\n", Status);
1331                                 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS);
1332                             } else
1333                                 read_q = true;
1334                         }
1335 
1336                         if (read_q) {
1337                             if (num_errors == 1) {
1338                                 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1339 
1340                                 if (!devices[physstripe] || !devices[physstripe]->devobj)
1341                                     recovered = true;
1342                                 else {
1343                                     crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1344 
1345                                     if (crc32 == context->csum[i])
1346                                         recovered = true;
1347                                 }
1348                             } else {
1349                                 for (j = 0; j < ci->num_stripes - 1; j++) {
1350                                     if (j != stripe) {
1351                                         raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1352 
1353                                         crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1354 
1355                                         if (crc32 == context->csum[i]) {
1356                                             recovered = true;
1357                                             error_stripe = j;
1358                                             break;
1359                                         }
1360                                     }
1361                                 }
1362                             }
1363                         }
1364 
1365                         if (recovered) {
1366                             uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1367 
1368                             if (devices[physstripe] && devices[physstripe]->devobj)
1369                                 ERR("recovering from checksum error at %I64x, device %I64x\n",
1370                                     addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id);
1371 
1372                             RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1373 
1374                             if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1375                                 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1376                                                          sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1377                                 if (!NT_SUCCESS(Status)) {
1378                                     WARN("write_data_phys returned %08x\n", Status);
1379                                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1380                                 }
1381                             }
1382 
1383                             if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1384                                 if (error_stripe == ci->num_stripes - 2) {
1385                                     ERR("recovering from parity error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1386                                         devices[error_stripe_phys]->devitem.dev_id);
1387 
1388                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1389 
1390                                     RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1391 
1392                                     for (j = 0; j < ci->num_stripes - 2; j++) {
1393                                         if (j == stripe) {
1394                                             do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size),
1395                                                    Vcb->superblock.sector_size);
1396                                         } else {
1397                                             do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size),
1398                                                    Vcb->superblock.sector_size);
1399                                         }
1400                                     }
1401                                 } else {
1402                                     ERR("recovering from checksum error at %I64x, device %I64x\n",
1403                                         addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length),
1404                                         devices[error_stripe_phys]->devitem.dev_id);
1405 
1406                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1407 
1408                                     RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size),
1409                                                   sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1410                                 }
1411                             }
1412 
1413                             if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1414                                 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1415                                                          sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1416                                 if (!NT_SUCCESS(Status)) {
1417                                     WARN("write_data_phys returned %08x\n", Status);
1418                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1419                                 }
1420                             }
1421                         }
1422                     }
1423                 }
1424 
1425                 if (!recovered) {
1426                     ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
1427                     ExFreePool(sector);
1428                     return STATUS_CRC_ERROR;
1429                 }
1430             }
1431         }
1432 
1433         ExFreePool(sector);
1434     }
1435 
1436     return STATUS_SUCCESS;
1437 }
1438 
1439 NTSTATUS read_data(_In_ device_extension* Vcb, _In_ uint64_t addr, _In_ uint32_t length, _In_reads_bytes_opt_(length*sizeof(uint32_t)/Vcb->superblock.sector_size) uint32_t* csum,
1440                    _In_ bool is_tree, _Out_writes_bytes_(length) uint8_t* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ uint64_t generation, _In_ bool file_read,
1441                    _In_ ULONG priority) {
1442     CHUNK_ITEM* ci;
1443     CHUNK_ITEM_STRIPE* cis;
1444     read_data_context context;
1445     uint64_t type, offset, total_reading = 0;
1446     NTSTATUS Status;
1447     device** devices = NULL;
1448     uint16_t i, startoffstripe, allowed_missing, missing_devices = 0;
1449     uint8_t* dummypage = NULL;
1450     PMDL dummy_mdl = NULL;
1451     bool need_to_wait;
1452     uint64_t lockaddr, locklen;
1453 
1454     if (Vcb->log_to_phys_loaded) {
1455         if (!c) {
1456             c = get_chunk_from_address(Vcb, addr);
1457 
1458             if (!c) {
1459                 ERR("get_chunk_from_address failed\n");
1460                 return STATUS_INTERNAL_ERROR;
1461             }
1462         }
1463 
1464         ci = c->chunk_item;
1465         offset = c->offset;
1466         devices = c->devices;
1467 
1468         if (pc)
1469             *pc = c;
1470     } else {
1471         LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1472 
1473         ci = NULL;
1474 
1475         c = NULL;
1476         while (le != &Vcb->sys_chunks) {
1477             sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
1478 
1479             if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1480                 CHUNK_ITEM* chunk_item = sc->data;
1481 
1482                 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1483                     ci = chunk_item;
1484                     offset = sc->key.offset;
1485                     cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1486 
1487                     devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
1488                     if (!devices) {
1489                         ERR("out of memory\n");
1490                         return STATUS_INSUFFICIENT_RESOURCES;
1491                     }
1492 
1493                     for (i = 0; i < ci->num_stripes; i++) {
1494                         devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1495                     }
1496 
1497                     break;
1498                 }
1499             }
1500 
1501             le = le->Flink;
1502         }
1503 
1504         if (!ci) {
1505             ERR("could not find chunk for %I64x in bootstrap\n", addr);
1506             return STATUS_INTERNAL_ERROR;
1507         }
1508 
1509         if (pc)
1510             *pc = NULL;
1511     }
1512 
1513     if (ci->type & BLOCK_FLAG_DUPLICATE) {
1514         type = BLOCK_FLAG_DUPLICATE;
1515         allowed_missing = ci->num_stripes - 1;
1516     } else if (ci->type & BLOCK_FLAG_RAID0) {
1517         type = BLOCK_FLAG_RAID0;
1518         allowed_missing = 0;
1519     } else if (ci->type & BLOCK_FLAG_RAID1) {
1520         type = BLOCK_FLAG_DUPLICATE;
1521         allowed_missing = 1;
1522     } else if (ci->type & BLOCK_FLAG_RAID10) {
1523         type = BLOCK_FLAG_RAID10;
1524         allowed_missing = 1;
1525     } else if (ci->type & BLOCK_FLAG_RAID5) {
1526         type = BLOCK_FLAG_RAID5;
1527         allowed_missing = 1;
1528     } else if (ci->type & BLOCK_FLAG_RAID6) {
1529         type = BLOCK_FLAG_RAID6;
1530         allowed_missing = 2;
1531     } else { // SINGLE
1532         type = BLOCK_FLAG_DUPLICATE;
1533         allowed_missing = 0;
1534     }
1535 
1536     cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1537 
1538     RtlZeroMemory(&context, sizeof(read_data_context));
1539     KeInitializeEvent(&context.Event, NotificationEvent, false);
1540 
1541     context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
1542     if (!context.stripes) {
1543         ERR("out of memory\n");
1544         return STATUS_INSUFFICIENT_RESOURCES;
1545     }
1546 
1547     if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1548         get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1549         chunk_lock_range(Vcb, c, lockaddr, locklen);
1550     }
1551 
1552     RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1553 
1554     context.buflen = length;
1555     context.num_stripes = ci->num_stripes;
1556     context.stripes_left = context.num_stripes;
1557     context.sector_size = Vcb->superblock.sector_size;
1558     context.csum = csum;
1559     context.tree = is_tree;
1560     context.type = type;
1561 
1562     if (type == BLOCK_FLAG_RAID0) {
1563         uint64_t startoff, endoff;
1564         uint16_t endoffstripe, stripe;
1565         uint32_t *stripeoff, pos;
1566         PMDL master_mdl;
1567         PFN_NUMBER* pfns;
1568 
1569         // FIXME - test this still works if page size isn't the same as sector size
1570 
1571         // This relies on the fact that MDLs are followed in memory by the page file numbers,
1572         // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1573         // data for you without doing a memcpy yourself.
1574         // MDLs are officially opaque, so this might very well break in future versions of Windows.
1575 
1576         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1577         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1578 
1579         if (file_read) {
1580             // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1581             // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1582             // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1583 
1584             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1585 
1586             if (!context.va) {
1587                 ERR("out of memory\n");
1588                 Status = STATUS_INSUFFICIENT_RESOURCES;
1589                 goto exit;
1590             }
1591         } else
1592             context.va = buf;
1593 
1594         master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1595         if (!master_mdl) {
1596             ERR("out of memory\n");
1597             Status = STATUS_INSUFFICIENT_RESOURCES;
1598             goto exit;
1599         }
1600 
1601         Status = STATUS_SUCCESS;
1602 
1603         _SEH2_TRY {
1604             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1605         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1606             Status = _SEH2_GetExceptionCode();
1607         } _SEH2_END;
1608 
1609         if (!NT_SUCCESS(Status)) {
1610             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1611             IoFreeMdl(master_mdl);
1612             goto exit;
1613         }
1614 
1615         pfns = (PFN_NUMBER*)(master_mdl + 1);
1616 
1617         for (i = 0; i < ci->num_stripes; i++) {
1618             if (startoffstripe > i)
1619                 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1620             else if (startoffstripe == i)
1621                 context.stripes[i].stripestart = startoff;
1622             else
1623                 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1624 
1625             if (endoffstripe > i)
1626                 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1627             else if (endoffstripe == i)
1628                 context.stripes[i].stripeend = endoff + 1;
1629             else
1630                 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1631 
1632             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1633                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
1634 
1635                 if (!context.stripes[i].mdl) {
1636                     ERR("IoAllocateMdl failed\n");
1637                     MmUnlockPages(master_mdl);
1638                     IoFreeMdl(master_mdl);
1639                     Status = STATUS_INSUFFICIENT_RESOURCES;
1640                     goto exit;
1641                 }
1642             }
1643         }
1644 
1645         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
1646         if (!stripeoff) {
1647             ERR("out of memory\n");
1648             MmUnlockPages(master_mdl);
1649             IoFreeMdl(master_mdl);
1650             Status = STATUS_INSUFFICIENT_RESOURCES;
1651             goto exit;
1652         }
1653 
1654         RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
1655 
1656         pos = 0;
1657         stripe = startoffstripe;
1658         while (pos < length) {
1659             PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1660 
1661             if (pos == 0) {
1662                 uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1663 
1664                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1665 
1666                 stripeoff[stripe] += readlen;
1667                 pos += readlen;
1668             } else if (length - pos < ci->stripe_length) {
1669                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1670 
1671                 pos = length;
1672             } else {
1673                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1674 
1675                 stripeoff[stripe] += (uint32_t)ci->stripe_length;
1676                 pos += (uint32_t)ci->stripe_length;
1677             }
1678 
1679             stripe = (stripe + 1) % ci->num_stripes;
1680         }
1681 
1682         MmUnlockPages(master_mdl);
1683         IoFreeMdl(master_mdl);
1684 
1685         ExFreePool(stripeoff);
1686     } else if (type == BLOCK_FLAG_RAID10) {
1687         uint64_t startoff, endoff;
1688         uint16_t endoffstripe, j, stripe;
1689         ULONG orig_ls;
1690         PMDL master_mdl;
1691         PFN_NUMBER* pfns;
1692         uint32_t* stripeoff, pos;
1693         read_data_stripe** stripes;
1694 
1695         if (c)
1696             orig_ls = c->last_stripe;
1697         else
1698             orig_ls = 0;
1699 
1700         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1701         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1702 
1703         if ((ci->num_stripes % ci->sub_stripes) != 0) {
1704             ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1705             Status = STATUS_INTERNAL_ERROR;
1706             goto exit;
1707         }
1708 
1709         if (file_read) {
1710             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1711 
1712             if (!context.va) {
1713                 ERR("out of memory\n");
1714                 Status = STATUS_INSUFFICIENT_RESOURCES;
1715                 goto exit;
1716             }
1717         } else
1718             context.va = buf;
1719 
1720         context.firstoff = (uint16_t)((startoff % ci->stripe_length) / Vcb->superblock.sector_size);
1721         context.startoffstripe = startoffstripe;
1722         context.sectors_per_stripe = (uint16_t)(ci->stripe_length / Vcb->superblock.sector_size);
1723 
1724         startoffstripe *= ci->sub_stripes;
1725         endoffstripe *= ci->sub_stripes;
1726 
1727         if (c)
1728             c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1729 
1730         master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1731         if (!master_mdl) {
1732             ERR("out of memory\n");
1733             Status = STATUS_INSUFFICIENT_RESOURCES;
1734             goto exit;
1735         }
1736 
1737         Status = STATUS_SUCCESS;
1738 
1739         _SEH2_TRY {
1740             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1741         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1742             Status = _SEH2_GetExceptionCode();
1743         } _SEH2_END;
1744 
1745         if (!NT_SUCCESS(Status)) {
1746             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1747             IoFreeMdl(master_mdl);
1748             goto exit;
1749         }
1750 
1751         pfns = (PFN_NUMBER*)(master_mdl + 1);
1752 
1753         stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1754         if (!stripes) {
1755             ERR("out of memory\n");
1756             MmUnlockPages(master_mdl);
1757             IoFreeMdl(master_mdl);
1758             Status = STATUS_INSUFFICIENT_RESOURCES;
1759             goto exit;
1760         }
1761 
1762         RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1763 
1764         for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1765             uint64_t sstart, send;
1766             bool stripeset = false;
1767 
1768             if (startoffstripe > i)
1769                 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1770             else if (startoffstripe == i)
1771                 sstart = startoff;
1772             else
1773                 sstart = startoff - (startoff % ci->stripe_length);
1774 
1775             if (endoffstripe > i)
1776                 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1777             else if (endoffstripe == i)
1778                 send = endoff + 1;
1779             else
1780                 send = endoff - (endoff % ci->stripe_length);
1781 
1782             for (j = 0; j < ci->sub_stripes; j++) {
1783                 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1784                     context.stripes[i+j].stripestart = sstart;
1785                     context.stripes[i+j].stripeend = send;
1786                     stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1787 
1788                     if (sstart != send) {
1789                         context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1790 
1791                         if (!context.stripes[i+j].mdl) {
1792                             ERR("IoAllocateMdl failed\n");
1793                             MmUnlockPages(master_mdl);
1794                             IoFreeMdl(master_mdl);
1795                             Status = STATUS_INSUFFICIENT_RESOURCES;
1796                             goto exit;
1797                         }
1798                     }
1799 
1800                     stripeset = true;
1801                 } else
1802                     context.stripes[i+j].status = ReadDataStatus_Skip;
1803             }
1804 
1805             if (!stripeset) {
1806                 for (j = 0; j < ci->sub_stripes; j++) {
1807                     if (devices[i+j] && devices[i+j]->devobj) {
1808                         context.stripes[i+j].stripestart = sstart;
1809                         context.stripes[i+j].stripeend = send;
1810                         context.stripes[i+j].status = ReadDataStatus_Pending;
1811                         stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1812 
1813                         if (sstart != send) {
1814                             context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1815 
1816                             if (!context.stripes[i+j].mdl) {
1817                                 ERR("IoAllocateMdl failed\n");
1818                                 MmUnlockPages(master_mdl);
1819                                 IoFreeMdl(master_mdl);
1820                                 Status = STATUS_INSUFFICIENT_RESOURCES;
1821                                 goto exit;
1822                             }
1823                         }
1824 
1825                         stripeset = true;
1826                         break;
1827                     }
1828                 }
1829 
1830                 if (!stripeset) {
1831                     ERR("could not find stripe to read\n");
1832                     Status = STATUS_DEVICE_NOT_READY;
1833                     goto exit;
1834                 }
1835             }
1836         }
1837 
1838         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1839         if (!stripeoff) {
1840             ERR("out of memory\n");
1841             MmUnlockPages(master_mdl);
1842             IoFreeMdl(master_mdl);
1843             Status = STATUS_INSUFFICIENT_RESOURCES;
1844             goto exit;
1845         }
1846 
1847         RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes);
1848 
1849         pos = 0;
1850         stripe = startoffstripe / ci->sub_stripes;
1851         while (pos < length) {
1852             PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1853 
1854             if (pos == 0) {
1855                 uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1856                                              ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1857 
1858                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1859 
1860                 stripeoff[stripe] += readlen;
1861                 pos += readlen;
1862             } else if (length - pos < ci->stripe_length) {
1863                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1864 
1865                 pos = length;
1866             } else {
1867                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1868 
1869                 stripeoff[stripe] += (ULONG)ci->stripe_length;
1870                 pos += (ULONG)ci->stripe_length;
1871             }
1872 
1873             stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1874         }
1875 
1876         MmUnlockPages(master_mdl);
1877         IoFreeMdl(master_mdl);
1878 
1879         ExFreePool(stripeoff);
1880         ExFreePool(stripes);
1881     } else if (type == BLOCK_FLAG_DUPLICATE) {
1882         uint64_t orig_ls;
1883 
1884         if (c)
1885             orig_ls = i = c->last_stripe;
1886         else
1887             orig_ls = i = 0;
1888 
1889         while (!devices[i] || !devices[i]->devobj) {
1890             i = (i + 1) % ci->num_stripes;
1891 
1892             if (i == orig_ls) {
1893                 ERR("no devices available to service request\n");
1894                 Status = STATUS_DEVICE_NOT_READY;
1895                 goto exit;
1896             }
1897         }
1898 
1899         if (c)
1900             c->last_stripe = (i + 1) % ci->num_stripes;
1901 
1902         context.stripes[i].stripestart = addr - offset;
1903         context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1904 
1905         if (file_read) {
1906             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1907 
1908             if (!context.va) {
1909                 ERR("out of memory\n");
1910                 Status = STATUS_INSUFFICIENT_RESOURCES;
1911                 goto exit;
1912             }
1913 
1914             context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1915             if (!context.stripes[i].mdl) {
1916                 ERR("IoAllocateMdl failed\n");
1917                 Status = STATUS_INSUFFICIENT_RESOURCES;
1918                 goto exit;
1919             }
1920 
1921             MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
1922         } else {
1923             context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL);
1924 
1925             if (!context.stripes[i].mdl) {
1926                 ERR("IoAllocateMdl failed\n");
1927                 Status = STATUS_INSUFFICIENT_RESOURCES;
1928                 goto exit;
1929             }
1930 
1931             Status = STATUS_SUCCESS;
1932 
1933             _SEH2_TRY {
1934                 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess);
1935             } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1936                 Status = _SEH2_GetExceptionCode();
1937             } _SEH2_END;
1938 
1939             if (!NT_SUCCESS(Status)) {
1940                 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1941                 goto exit;
1942             }
1943         }
1944     } else if (type == BLOCK_FLAG_RAID5) {
1945         uint64_t startoff, endoff;
1946         uint16_t endoffstripe, parity;
1947         uint32_t *stripeoff, pos;
1948         PMDL master_mdl;
1949         PFN_NUMBER *pfns, dummy;
1950         bool need_dummy = false;
1951 
1952         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
1953         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
1954 
1955         if (file_read) {
1956             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1957 
1958             if (!context.va) {
1959                 ERR("out of memory\n");
1960                 Status = STATUS_INSUFFICIENT_RESOURCES;
1961                 goto exit;
1962             }
1963         } else
1964             context.va = buf;
1965 
1966         master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1967         if (!master_mdl) {
1968             ERR("out of memory\n");
1969             Status = STATUS_INSUFFICIENT_RESOURCES;
1970             goto exit;
1971         }
1972 
1973         Status = STATUS_SUCCESS;
1974 
1975         _SEH2_TRY {
1976             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1977         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1978             Status = _SEH2_GetExceptionCode();
1979         } _SEH2_END;
1980 
1981         if (!NT_SUCCESS(Status)) {
1982             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1983             IoFreeMdl(master_mdl);
1984             goto exit;
1985         }
1986 
1987         pfns = (PFN_NUMBER*)(master_mdl + 1);
1988 
1989         pos = 0;
1990         while (pos < length) {
1991             parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
1992 
1993             if (pos == 0) {
1994                 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
1995                 ULONG skip, readlen;
1996 
1997                 i = startoffstripe;
1998                 while (stripe != parity) {
1999                     if (i == startoffstripe) {
2000                         readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2001 
2002                         context.stripes[stripe].stripestart = startoff;
2003                         context.stripes[stripe].stripeend = startoff + readlen;
2004 
2005                         pos += readlen;
2006 
2007                         if (pos == length)
2008                             break;
2009                     } else {
2010                         readlen = min(length - pos, (ULONG)ci->stripe_length);
2011 
2012                         context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2013                         context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2014 
2015                         pos += readlen;
2016 
2017                         if (pos == length)
2018                             break;
2019                     }
2020 
2021                     i++;
2022                     stripe = (stripe + 1) % ci->num_stripes;
2023                 }
2024 
2025                 if (pos == length)
2026                     break;
2027 
2028                 for (i = 0; i < startoffstripe; i++) {
2029                     uint16_t stripe2 = (parity + i + 1) % ci->num_stripes;
2030 
2031                     context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2032                 }
2033 
2034                 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2035 
2036                 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2037                     skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2038 
2039                     for (i = 0; i < ci->num_stripes; i++) {
2040                         context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2041                     }
2042 
2043                     pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2044                     need_dummy = true;
2045                 }
2046             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2047                 for (i = 0; i < ci->num_stripes; i++) {
2048                     context.stripes[i].stripeend += ci->stripe_length;
2049                 }
2050 
2051                 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1));
2052                 need_dummy = true;
2053             } else {
2054                 uint16_t stripe = (parity + 1) % ci->num_stripes;
2055 
2056                 i = 0;
2057                 while (stripe != parity) {
2058                     if (endoffstripe == i) {
2059                         context.stripes[stripe].stripeend = endoff + 1;
2060                         break;
2061                     } else if (endoffstripe > i)
2062                         context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2063 
2064                     i++;
2065                     stripe = (stripe + 1) % ci->num_stripes;
2066                 }
2067 
2068                 break;
2069             }
2070         }
2071 
2072         for (i = 0; i < ci->num_stripes; i++) {
2073             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2074                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2075                                                        false, false, NULL);
2076 
2077                 if (!context.stripes[i].mdl) {
2078                     ERR("IoAllocateMdl failed\n");
2079                     MmUnlockPages(master_mdl);
2080                     IoFreeMdl(master_mdl);
2081                     Status = STATUS_INSUFFICIENT_RESOURCES;
2082                     goto exit;
2083                 }
2084             }
2085         }
2086 
2087         if (need_dummy) {
2088             dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2089             if (!dummypage) {
2090                 ERR("out of memory\n");
2091                 MmUnlockPages(master_mdl);
2092                 IoFreeMdl(master_mdl);
2093                 Status = STATUS_INSUFFICIENT_RESOURCES;
2094                 goto exit;
2095             }
2096 
2097             dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2098             if (!dummy_mdl) {
2099                 ERR("IoAllocateMdl failed\n");
2100                 MmUnlockPages(master_mdl);
2101                 IoFreeMdl(master_mdl);
2102                 Status = STATUS_INSUFFICIENT_RESOURCES;
2103                 goto exit;
2104             }
2105 
2106             MmBuildMdlForNonPagedPool(dummy_mdl);
2107 
2108             dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2109         }
2110 
2111         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2112         if (!stripeoff) {
2113             ERR("out of memory\n");
2114             MmUnlockPages(master_mdl);
2115             IoFreeMdl(master_mdl);
2116             Status = STATUS_INSUFFICIENT_RESOURCES;
2117             goto exit;
2118         }
2119 
2120         RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2121 
2122         pos = 0;
2123 
2124         while (pos < length) {
2125             PFN_NUMBER* stripe_pfns;
2126 
2127             parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2128 
2129             if (pos == 0) {
2130                 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2131                 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2132                                                        ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2133 
2134                 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2135 
2136                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2137 
2138                 stripeoff[stripe] = readlen;
2139                 pos += readlen;
2140 
2141                 stripe = (stripe + 1) % ci->num_stripes;
2142 
2143                 while (stripe != parity) {
2144                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2145                     readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2146 
2147                     if (readlen == 0)
2148                         break;
2149 
2150                     RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2151 
2152                     stripeoff[stripe] = readlen;
2153                     pos += readlen;
2154 
2155                     stripe = (stripe + 1) % ci->num_stripes;
2156                 }
2157             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2158                 uint16_t stripe = (parity + 1) % ci->num_stripes;
2159                 ULONG k;
2160 
2161                 while (stripe != parity) {
2162                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2163 
2164                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2165 
2166                     stripeoff[stripe] += (uint32_t)ci->stripe_length;
2167                     pos += (uint32_t)ci->stripe_length;
2168 
2169                     stripe = (stripe + 1) % ci->num_stripes;
2170                 }
2171 
2172                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2173 
2174                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2175                     stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2176                     stripeoff[parity] += PAGE_SIZE;
2177                 }
2178             } else {
2179                 uint16_t stripe = (parity + 1) % ci->num_stripes;
2180                 uint32_t readlen;
2181 
2182                 while (pos < length) {
2183                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2184                     readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2185 
2186                     if (readlen == 0)
2187                         break;
2188 
2189                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2190 
2191                     stripeoff[stripe] += readlen;
2192                     pos += readlen;
2193 
2194                     stripe = (stripe + 1) % ci->num_stripes;
2195                 }
2196             }
2197         }
2198 
2199         MmUnlockPages(master_mdl);
2200         IoFreeMdl(master_mdl);
2201 
2202         ExFreePool(stripeoff);
2203     } else if (type == BLOCK_FLAG_RAID6) {
2204         uint64_t startoff, endoff;
2205         uint16_t endoffstripe, parity1;
2206         uint32_t *stripeoff, pos;
2207         PMDL master_mdl;
2208         PFN_NUMBER *pfns, dummy;
2209         bool need_dummy = false;
2210 
2211         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2212         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2213 
2214         if (file_read) {
2215             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2216 
2217             if (!context.va) {
2218                 ERR("out of memory\n");
2219                 Status = STATUS_INSUFFICIENT_RESOURCES;
2220                 goto exit;
2221             }
2222         } else
2223             context.va = buf;
2224 
2225         master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2226         if (!master_mdl) {
2227             ERR("out of memory\n");
2228             Status = STATUS_INSUFFICIENT_RESOURCES;
2229             goto exit;
2230         }
2231 
2232         Status = STATUS_SUCCESS;
2233 
2234         _SEH2_TRY {
2235             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
2236         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2237             Status = _SEH2_GetExceptionCode();
2238         } _SEH2_END;
2239 
2240         if (!NT_SUCCESS(Status)) {
2241             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2242             IoFreeMdl(master_mdl);
2243             goto exit;
2244         }
2245 
2246         pfns = (PFN_NUMBER*)(master_mdl + 1);
2247 
2248         pos = 0;
2249         while (pos < length) {
2250             parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2251 
2252             if (pos == 0) {
2253                 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2254                 ULONG skip, readlen;
2255 
2256                 i = startoffstripe;
2257                 while (stripe != parity1) {
2258                     if (i == startoffstripe) {
2259                         readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2260 
2261                         context.stripes[stripe].stripestart = startoff;
2262                         context.stripes[stripe].stripeend = startoff + readlen;
2263 
2264                         pos += readlen;
2265 
2266                         if (pos == length)
2267                             break;
2268                     } else {
2269                         readlen = min(length - pos, (ULONG)ci->stripe_length);
2270 
2271                         context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2272                         context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2273 
2274                         pos += readlen;
2275 
2276                         if (pos == length)
2277                             break;
2278                     }
2279 
2280                     i++;
2281                     stripe = (stripe + 1) % ci->num_stripes;
2282                 }
2283 
2284                 if (pos == length)
2285                     break;
2286 
2287                 for (i = 0; i < startoffstripe; i++) {
2288                     uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes;
2289 
2290                     context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2291                 }
2292 
2293                 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2294 
2295                 parity2 = (parity1 + 1) % ci->num_stripes;
2296                 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2297 
2298                 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2299                     skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2300 
2301                     for (i = 0; i < ci->num_stripes; i++) {
2302                         context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2303                     }
2304 
2305                     pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2306                     need_dummy = true;
2307                 }
2308             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2309                 for (i = 0; i < ci->num_stripes; i++) {
2310                     context.stripes[i].stripeend += ci->stripe_length;
2311                 }
2312 
2313                 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2));
2314                 need_dummy = true;
2315             } else {
2316                 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2317 
2318                 i = 0;
2319                 while (stripe != parity1) {
2320                     if (endoffstripe == i) {
2321                         context.stripes[stripe].stripeend = endoff + 1;
2322                         break;
2323                     } else if (endoffstripe > i)
2324                         context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2325 
2326                     i++;
2327                     stripe = (stripe + 1) % ci->num_stripes;
2328                 }
2329 
2330                 break;
2331             }
2332         }
2333 
2334         for (i = 0; i < ci->num_stripes; i++) {
2335             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2336                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
2337 
2338                 if (!context.stripes[i].mdl) {
2339                     ERR("IoAllocateMdl failed\n");
2340                     MmUnlockPages(master_mdl);
2341                     IoFreeMdl(master_mdl);
2342                     Status = STATUS_INSUFFICIENT_RESOURCES;
2343                     goto exit;
2344                 }
2345             }
2346         }
2347 
2348         if (need_dummy) {
2349             dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2350             if (!dummypage) {
2351                 ERR("out of memory\n");
2352                 MmUnlockPages(master_mdl);
2353                 IoFreeMdl(master_mdl);
2354                 Status = STATUS_INSUFFICIENT_RESOURCES;
2355                 goto exit;
2356             }
2357 
2358             dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2359             if (!dummy_mdl) {
2360                 ERR("IoAllocateMdl failed\n");
2361                 MmUnlockPages(master_mdl);
2362                 IoFreeMdl(master_mdl);
2363                 Status = STATUS_INSUFFICIENT_RESOURCES;
2364                 goto exit;
2365             }
2366 
2367             MmBuildMdlForNonPagedPool(dummy_mdl);
2368 
2369             dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2370         }
2371 
2372         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2373         if (!stripeoff) {
2374             ERR("out of memory\n");
2375             MmUnlockPages(master_mdl);
2376             IoFreeMdl(master_mdl);
2377             Status = STATUS_INSUFFICIENT_RESOURCES;
2378             goto exit;
2379         }
2380 
2381         RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2382 
2383         pos = 0;
2384 
2385         while (pos < length) {
2386             PFN_NUMBER* stripe_pfns;
2387 
2388             parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2389 
2390             if (pos == 0) {
2391                 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2392                 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2393                                                        ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2394 
2395                 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2396 
2397                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2398 
2399                 stripeoff[stripe] = readlen;
2400                 pos += readlen;
2401 
2402                 stripe = (stripe + 1) % ci->num_stripes;
2403 
2404                 while (stripe != parity1) {
2405                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2406                     readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2407 
2408                     if (readlen == 0)
2409                         break;
2410 
2411                     RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2412 
2413                     stripeoff[stripe] = readlen;
2414                     pos += readlen;
2415 
2416                     stripe = (stripe + 1) % ci->num_stripes;
2417                 }
2418             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2419                 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2420                 uint16_t parity2 = (parity1 + 1) % ci->num_stripes;
2421                 ULONG k;
2422 
2423                 while (stripe != parity1) {
2424                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2425 
2426                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2427 
2428                     stripeoff[stripe] += (uint32_t)ci->stripe_length;
2429                     pos += (uint32_t)ci->stripe_length;
2430 
2431                     stripe = (stripe + 1) % ci->num_stripes;
2432                 }
2433 
2434                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2435 
2436                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2437                     stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2438                     stripeoff[parity1] += PAGE_SIZE;
2439                 }
2440 
2441                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2442 
2443                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2444                     stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2445                     stripeoff[parity2] += PAGE_SIZE;
2446                 }
2447             } else {
2448                 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2449                 uint32_t readlen;
2450 
2451                 while (pos < length) {
2452                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2453                     readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2454 
2455                     if (readlen == 0)
2456                         break;
2457 
2458                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2459 
2460                     stripeoff[stripe] += readlen;
2461                     pos += readlen;
2462 
2463                     stripe = (stripe + 1) % ci->num_stripes;
2464                 }
2465             }
2466         }
2467 
2468         MmUnlockPages(master_mdl);
2469         IoFreeMdl(master_mdl);
2470 
2471         ExFreePool(stripeoff);
2472     }
2473 
2474     context.address = addr;
2475 
2476     for (i = 0; i < ci->num_stripes; i++) {
2477         if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2478             context.stripes[i].status = ReadDataStatus_MissingDevice;
2479             context.stripes_left--;
2480 
2481             if (!devices[i] || !devices[i]->devobj)
2482                 missing_devices++;
2483         }
2484     }
2485 
2486     if (missing_devices > allowed_missing) {
2487         ERR("not enough devices to service request (%u missing)\n", missing_devices);
2488         Status = STATUS_UNEXPECTED_IO_ERROR;
2489         goto exit;
2490     }
2491 
2492     for (i = 0; i < ci->num_stripes; i++) {
2493         PIO_STACK_LOCATION IrpSp;
2494 
2495         if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2496             context.stripes[i].context = (struct read_data_context*)&context;
2497 
2498             if (type == BLOCK_FLAG_RAID10) {
2499                 context.stripes[i].stripenum = i / ci->sub_stripes;
2500             }
2501 
2502             if (!Irp) {
2503                 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false);
2504 
2505                 if (!context.stripes[i].Irp) {
2506                     ERR("IoAllocateIrp failed\n");
2507                     Status = STATUS_INSUFFICIENT_RESOURCES;
2508                     goto exit;
2509                 }
2510             } else {
2511                 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2512 
2513                 if (!context.stripes[i].Irp) {
2514                     ERR("IoMakeAssociatedIrp failed\n");
2515                     Status = STATUS_INSUFFICIENT_RESOURCES;
2516                     goto exit;
2517                 }
2518             }
2519 
2520             IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2521             IrpSp->MajorFunction = IRP_MJ_READ;
2522             IrpSp->MinorFunction = IRP_MN_NORMAL;
2523             IrpSp->FileObject = devices[i]->fileobj;
2524 
2525             if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2526                 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2527                 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2528                     ERR("out of memory\n");
2529                     Status = STATUS_INSUFFICIENT_RESOURCES;
2530                     goto exit;
2531                 }
2532 
2533                 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
2534 
2535                 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2536             } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2537                 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2538             else
2539                 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2540 
2541             IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2542             IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2543 
2544             total_reading += IrpSp->Parameters.Read.Length;
2545 
2546             context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2547 
2548             IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true);
2549 
2550             context.stripes[i].status = ReadDataStatus_Pending;
2551         }
2552     }
2553 
2554     need_to_wait = false;
2555     for (i = 0; i < ci->num_stripes; i++) {
2556         if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2557             IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2558             need_to_wait = true;
2559         }
2560     }
2561 
2562     if (need_to_wait)
2563         KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
2564 
2565     if (diskacc)
2566         fFsRtlUpdateDiskCounters(total_reading, 0);
2567 
2568     // check if any of the devices return a "user-induced" error
2569 
2570     for (i = 0; i < ci->num_stripes; i++) {
2571         if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2572             Status = context.stripes[i].iosb.Status;
2573             goto exit;
2574         }
2575     }
2576 
2577     if (type == BLOCK_FLAG_RAID0) {
2578         Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2579         if (!NT_SUCCESS(Status)) {
2580             ERR("read_data_raid0 returned %08x\n", Status);
2581 
2582             if (file_read)
2583                 ExFreePool(context.va);
2584 
2585             goto exit;
2586         }
2587 
2588         if (file_read) {
2589             RtlCopyMemory(buf, context.va, length);
2590             ExFreePool(context.va);
2591         }
2592     } else if (type == BLOCK_FLAG_RAID10) {
2593         Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2594 
2595         if (!NT_SUCCESS(Status)) {
2596             ERR("read_data_raid10 returned %08x\n", Status);
2597 
2598             if (file_read)
2599                 ExFreePool(context.va);
2600 
2601             goto exit;
2602         }
2603 
2604         if (file_read) {
2605             RtlCopyMemory(buf, context.va, length);
2606             ExFreePool(context.va);
2607         }
2608     } else if (type == BLOCK_FLAG_DUPLICATE) {
2609         Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2610         if (!NT_SUCCESS(Status)) {
2611             ERR("read_data_dup returned %08x\n", Status);
2612 
2613             if (file_read)
2614                 ExFreePool(context.va);
2615 
2616             goto exit;
2617         }
2618 
2619         if (file_read) {
2620             RtlCopyMemory(buf, context.va, length);
2621             ExFreePool(context.va);
2622         }
2623     } else if (type == BLOCK_FLAG_RAID5) {
2624         Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2625         if (!NT_SUCCESS(Status)) {
2626             ERR("read_data_raid5 returned %08x\n", Status);
2627 
2628             if (file_read)
2629                 ExFreePool(context.va);
2630 
2631             goto exit;
2632         }
2633 
2634         if (file_read) {
2635             RtlCopyMemory(buf, context.va, length);
2636             ExFreePool(context.va);
2637         }
2638     } else if (type == BLOCK_FLAG_RAID6) {
2639         Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2640         if (!NT_SUCCESS(Status)) {
2641             ERR("read_data_raid6 returned %08x\n", Status);
2642 
2643             if (file_read)
2644                 ExFreePool(context.va);
2645 
2646             goto exit;
2647         }
2648 
2649         if (file_read) {
2650             RtlCopyMemory(buf, context.va, length);
2651             ExFreePool(context.va);
2652         }
2653     }
2654 
2655 exit:
2656     if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2657         chunk_unlock_range(Vcb, c, lockaddr, locklen);
2658 
2659     if (dummy_mdl)
2660         IoFreeMdl(dummy_mdl);
2661 
2662     if (dummypage)
2663         ExFreePool(dummypage);
2664 
2665     for (i = 0; i < ci->num_stripes; i++) {
2666         if (context.stripes[i].mdl) {
2667             if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2668                 MmUnlockPages(context.stripes[i].mdl);
2669 
2670             IoFreeMdl(context.stripes[i].mdl);
2671         }
2672 
2673         if (context.stripes[i].Irp)
2674             IoFreeIrp(context.stripes[i].Irp);
2675     }
2676 
2677     ExFreePool(context.stripes);
2678 
2679     if (!Vcb->log_to_phys_loaded)
2680         ExFreePool(devices);
2681 
2682     return Status;
2683 }
2684 
2685 NTSTATUS read_stream(fcb* fcb, uint8_t* data, uint64_t start, ULONG length, ULONG* pbr) {
2686     ULONG readlen;
2687 
2688     TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
2689 
2690     if (pbr) *pbr = 0;
2691 
2692     if (start >= fcb->adsdata.Length) {
2693         TRACE("tried to read beyond end of stream\n");
2694         return STATUS_END_OF_FILE;
2695     }
2696 
2697     if (length == 0) {
2698         WARN("tried to read zero bytes\n");
2699         return STATUS_SUCCESS;
2700     }
2701 
2702     if (start + length < fcb->adsdata.Length)
2703         readlen = length;
2704     else
2705         readlen = fcb->adsdata.Length - (ULONG)start;
2706 
2707     if (readlen > 0)
2708         RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen);
2709 
2710     if (pbr) *pbr = readlen;
2711 
2712     return STATUS_SUCCESS;
2713 }
2714 
2715 NTSTATUS read_file(fcb* fcb, uint8_t* data, uint64_t start, uint64_t length, ULONG* pbr, PIRP Irp) {
2716     NTSTATUS Status;
2717     EXTENT_DATA* ed;
2718     uint32_t bytes_read = 0;
2719     uint64_t last_end;
2720     LIST_ENTRY* le;
2721     POOL_TYPE pool_type;
2722 
2723     TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
2724 
2725     if (pbr)
2726         *pbr = 0;
2727 
2728     if (start >= fcb->inode_item.st_size) {
2729         WARN("Tried to read beyond end of file\n");
2730         Status = STATUS_END_OF_FILE;
2731         goto exit;
2732     }
2733 
2734     pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool;
2735 
2736     le = fcb->extents.Flink;
2737 
2738     last_end = start;
2739 
2740     while (le != &fcb->extents) {
2741         uint64_t len;
2742         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2743         EXTENT_DATA2* ed2;
2744 
2745         if (!ext->ignore) {
2746             ed = &ext->extent_data;
2747 
2748             ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL;
2749 
2750             len = ed2 ? ed2->num_bytes : ed->decoded_size;
2751 
2752             if (ext->offset + len <= start) {
2753                 last_end = ext->offset + len;
2754                 goto nextitem;
2755             }
2756 
2757             if (ext->offset > last_end && ext->offset > start + bytes_read) {
2758                 uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end));
2759 
2760                 RtlZeroMemory(data + bytes_read, read);
2761                 bytes_read += read;
2762                 length -= read;
2763             }
2764 
2765             if (length == 0 || ext->offset > start + bytes_read + length)
2766                 break;
2767 
2768             if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
2769                 WARN("Encryption not supported\n");
2770                 Status = STATUS_NOT_IMPLEMENTED;
2771                 goto exit;
2772             }
2773 
2774             if (ed->encoding != BTRFS_ENCODING_NONE) {
2775                 WARN("Other encodings not supported\n");
2776                 Status = STATUS_NOT_IMPLEMENTED;
2777                 goto exit;
2778             }
2779 
2780             switch (ed->type) {
2781                 case EXTENT_TYPE_INLINE:
2782                 {
2783                     uint64_t off = start + bytes_read - ext->offset;
2784                     uint32_t read;
2785 
2786                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
2787                         read = (uint32_t)min(min(len, ext->datalen) - off, length);
2788 
2789                         RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2790                     } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) {
2791                         uint8_t* decomp;
2792                         bool decomp_alloc;
2793                         uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]);
2794 
2795                         if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2796                             ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size);
2797                             Status = STATUS_INTERNAL_ERROR;
2798                             goto exit;
2799                         }
2800 
2801                         read = (uint32_t)min(ed->decoded_size - off, length);
2802 
2803                         if (off > 0) {
2804                             decomp = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)ed->decoded_size, ALLOC_TAG);
2805                             if (!decomp) {
2806                                 ERR("out of memory\n");
2807                                 Status = STATUS_INSUFFICIENT_RESOURCES;
2808                                 goto exit;
2809                             }
2810 
2811                             decomp_alloc = true;
2812                         } else {
2813                             decomp = data + bytes_read;
2814                             decomp_alloc = false;
2815                         }
2816 
2817                         if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2818                             Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2819                             if (!NT_SUCCESS(Status)) {
2820                                 ERR("zlib_decompress returned %08x\n", Status);
2821                                 if (decomp_alloc) ExFreePool(decomp);
2822                                 goto exit;
2823                             }
2824                         } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2825                             if (inlen < sizeof(uint32_t)) {
2826                                 ERR("extent data was truncated\n");
2827                                 Status = STATUS_INTERNAL_ERROR;
2828                                 if (decomp_alloc) ExFreePool(decomp);
2829                                 goto exit;
2830                             } else
2831                                 inlen -= sizeof(uint32_t);
2832 
2833                             Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t));
2834                             if (!NT_SUCCESS(Status)) {
2835                                 ERR("lzo_decompress returned %08x\n", Status);
2836                                 if (decomp_alloc) ExFreePool(decomp);
2837                                 goto exit;
2838                             }
2839                         } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
2840                             Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2841                             if (!NT_SUCCESS(Status)) {
2842                                 ERR("zstd_decompress returned %08x\n", Status);
2843                                 if (decomp_alloc) ExFreePool(decomp);
2844                                 goto exit;
2845                             }
2846                         }
2847 
2848                         if (decomp_alloc) {
2849                             RtlCopyMemory(data + bytes_read, decomp + off, read);
2850                             ExFreePool(decomp);
2851                         }
2852                     } else {
2853                         ERR("unhandled compression type %x\n", ed->compression);
2854                         Status = STATUS_NOT_IMPLEMENTED;
2855                         goto exit;
2856                     }
2857 
2858                     bytes_read += read;
2859                     length -= read;
2860 
2861                     break;
2862                 }
2863 
2864                 case EXTENT_TYPE_REGULAR:
2865                 {
2866                     uint64_t off = start + bytes_read - ext->offset;
2867                     uint32_t to_read, read;
2868                     uint8_t* buf;
2869                     bool mdl = (Irp && Irp->MdlAddress) ? true : false;
2870                     bool buf_free;
2871                     uint32_t bumpoff = 0, *csum;
2872                     uint64_t addr;
2873                     chunk* c;
2874 
2875                     read = (uint32_t)(len - off);
2876                     if (read > length) read = (uint32_t)length;
2877 
2878                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
2879                         addr = ed2->address + ed2->offset + off;
2880                         to_read = (uint32_t)sector_align(read, fcb->Vcb->superblock.sector_size);
2881 
2882                         if (addr % fcb->Vcb->superblock.sector_size > 0) {
2883                             bumpoff = addr % fcb->Vcb->superblock.sector_size;
2884                             addr -= bumpoff;
2885                             to_read = (uint32_t)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size);
2886                         }
2887                     } else {
2888                         addr = ed2->address;
2889                         to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
2890                     }
2891 
2892                     if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
2893                         length % fcb->Vcb->superblock.sector_size == 0) {
2894                         buf = data + bytes_read;
2895                         buf_free = false;
2896                     } else {
2897                         buf = ExAllocatePoolWithTag(pool_type, to_read, ALLOC_TAG);
2898                         buf_free = true;
2899 
2900                         if (!buf) {
2901                             ERR("out of memory\n");
2902                             Status = STATUS_INSUFFICIENT_RESOURCES;
2903                             goto exit;
2904                         }
2905 
2906                         mdl = false;
2907                     }
2908 
2909                     c = get_chunk_from_address(fcb->Vcb, addr);
2910 
2911                     if (!c) {
2912                         ERR("get_chunk_from_address(%I64x) failed\n", addr);
2913 
2914                         if (buf_free)
2915                             ExFreePool(buf);
2916 
2917                         goto exit;
2918                     }
2919 
2920                     if (ext->csum) {
2921                         if (ed->compression == BTRFS_COMPRESSION_NONE)
2922                             csum = &ext->csum[off / fcb->Vcb->superblock.sector_size];
2923                         else
2924                             csum = ext->csum;
2925                     } else
2926                         csum = NULL;
2927 
2928                     Status = read_data(fcb->Vcb, addr, to_read, csum, false, buf, c, NULL, Irp, 0, mdl,
2929                                        fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
2930                     if (!NT_SUCCESS(Status)) {
2931                         ERR("read_data returned %08x\n", Status);
2932 
2933                         if (buf_free)
2934                             ExFreePool(buf);
2935 
2936                         goto exit;
2937                     }
2938 
2939                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
2940                         if (buf_free)
2941                             RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
2942                     } else {
2943                         uint8_t *decomp = NULL, *buf2;
2944                         ULONG outlen, inlen, off2;
2945                         uint32_t inpageoff = 0;
2946 
2947                         off2 = (ULONG)(ed2->offset + off);
2948                         buf2 = buf;
2949                         inlen = (ULONG)ed2->size;
2950 
2951                         if (ed->compression == BTRFS_COMPRESSION_LZO) {
2952                             ULONG inoff = sizeof(uint32_t);
2953 
2954                             inlen -= sizeof(uint32_t);
2955 
2956                             // If reading a few sectors in, skip to the interesting bit
2957                             while (off2 > LZO_PAGE_SIZE) {
2958                                 uint32_t partlen;
2959 
2960                                 if (inlen < sizeof(uint32_t))
2961                                     break;
2962 
2963                                 partlen = *(uint32_t*)(buf2 + inoff);
2964 
2965                                 if (partlen < inlen) {
2966                                     off2 -= LZO_PAGE_SIZE;
2967                                     inoff += partlen + sizeof(uint32_t);
2968                                     inlen -= partlen + sizeof(uint32_t);
2969 
2970                                     if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t))
2971                                         inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE;
2972                                 } else
2973                                     break;
2974                             }
2975 
2976                             buf2 = &buf2[inoff];
2977                             inpageoff = inoff % LZO_PAGE_SIZE;
2978                         }
2979 
2980                         if (off2 != 0) {
2981                             outlen = off2 + min(read, (uint32_t)(ed2->num_bytes - off));
2982 
2983                             decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG);
2984                             if (!decomp) {
2985                                 ERR("out of memory\n");
2986                                 ExFreePool(buf);
2987                                 Status = STATUS_INSUFFICIENT_RESOURCES;
2988                                 goto exit;
2989                             }
2990                         } else
2991                             outlen = min(read, (uint32_t)(ed2->num_bytes - off));
2992 
2993                         if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2994                             Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
2995 
2996                             if (!NT_SUCCESS(Status)) {
2997                                 ERR("zlib_decompress returned %08x\n", Status);
2998                                 ExFreePool(buf);
2999 
3000                                 if (decomp)
3001                                     ExFreePool(decomp);
3002 
3003                                 goto exit;
3004                             }
3005                         } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
3006                             Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff);
3007 
3008                             if (!NT_SUCCESS(Status)) {
3009                                 ERR("lzo_decompress returned %08x\n", Status);
3010                                 ExFreePool(buf);
3011 
3012                                 if (decomp)
3013                                     ExFreePool(decomp);
3014 
3015                                 goto exit;
3016                             }
3017                         } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
3018                             Status = zstd_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
3019 
3020                             if (!NT_SUCCESS(Status)) {
3021                                 ERR("zstd_decompress returned %08x\n", Status);
3022                                 ExFreePool(buf);
3023 
3024                                 if (decomp)
3025                                     ExFreePool(decomp);
3026 
3027                                 goto exit;
3028                             }
3029                         } else {
3030                             ERR("unsupported compression type %x\n", ed->compression);
3031                             Status = STATUS_NOT_SUPPORTED;
3032 
3033                             ExFreePool(buf);
3034 
3035                             if (decomp)
3036                                 ExFreePool(decomp);
3037 
3038                             goto exit;
3039                         }
3040 
3041                         if (decomp) {
3042                             RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off));
3043                             ExFreePool(decomp);
3044                         }
3045                     }
3046 
3047                     if (buf_free)
3048                         ExFreePool(buf);
3049 
3050                     bytes_read += read;
3051                     length -= read;
3052 
3053                     break;
3054                 }
3055 
3056                 case EXTENT_TYPE_PREALLOC:
3057                 {
3058                     uint64_t off = start + bytes_read - ext->offset;
3059                     uint32_t read = (uint32_t)(len - off);
3060 
3061                     if (read > length) read = (uint32_t)length;
3062 
3063                     RtlZeroMemory(data + bytes_read, read);
3064 
3065                     bytes_read += read;
3066                     length -= read;
3067 
3068                     break;
3069                 }
3070 
3071                 default:
3072                     WARN("Unsupported extent data type %u\n", ed->type);
3073                     Status = STATUS_NOT_IMPLEMENTED;
3074                     goto exit;
3075             }
3076 
3077             last_end = ext->offset + len;
3078 
3079             if (length == 0)
3080                 break;
3081         }
3082 
3083 nextitem:
3084         le = le->Flink;
3085     }
3086 
3087     if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3088         uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length);
3089 
3090         RtlZeroMemory(data + bytes_read, read);
3091 
3092         bytes_read += read;
3093         length -= read;
3094     }
3095 
3096     Status = STATUS_SUCCESS;
3097     if (pbr)
3098         *pbr = bytes_read;
3099 
3100 exit:
3101     return Status;
3102 }
3103 
3104 NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) {
3105     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3106     PFILE_OBJECT FileObject = IrpSp->FileObject;
3107     fcb* fcb = FileObject->FsContext;
3108     uint8_t* data = NULL;
3109     ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3110     uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3111 
3112     *bytes_read = 0;
3113 
3114     if (!fcb || !fcb->Vcb || !fcb->subvol)
3115         return STATUS_INTERNAL_ERROR;
3116 
3117     TRACE("fcb = %p\n", fcb);
3118     TRACE("offset = %I64x, length = %x\n", start, length);
3119     TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false");
3120 
3121     if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3122         return STATUS_INVALID_DEVICE_REQUEST;
3123 
3124     if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3125         WARN("tried to read locked region\n");
3126         return STATUS_FILE_LOCK_CONFLICT;
3127     }
3128 
3129     if (length == 0) {
3130         TRACE("tried to read zero bytes\n");
3131         return STATUS_SUCCESS;
3132     }
3133 
3134     if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) {
3135         TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart);
3136         return STATUS_END_OF_FILE;
3137     }
3138 
3139     TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3140 
3141     if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) {
3142         data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3143 
3144         if (Irp->MdlAddress && !data) {
3145             ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3146             return STATUS_INSUFFICIENT_RESOURCES;
3147         }
3148 
3149         if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3150             length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3151             RtlZeroMemory(data, length);
3152             Irp->IoStatus.Information = *bytes_read = length;
3153             return STATUS_SUCCESS;
3154         }
3155 
3156         if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3157             addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3158             RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3159             length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3160         }
3161     }
3162 
3163     if (!(Irp->Flags & IRP_NOCACHE)) {
3164         NTSTATUS Status = STATUS_SUCCESS;
3165 
3166         _SEH2_TRY {
3167             if (!FileObject->PrivateCacheMap) {
3168                 CC_FILE_SIZES ccfs;
3169 
3170                 ccfs.AllocationSize = fcb->Header.AllocationSize;
3171                 ccfs.FileSize = fcb->Header.FileSize;
3172                 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3173 
3174                 init_file_cache(FileObject, &ccfs);
3175             }
3176 
3177             if (IrpSp->MinorFunction & IRP_MN_MDL) {
3178                 CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3179             } else {
3180                 if (fCcCopyReadEx) {
3181                     TRACE("CcCopyReadEx(%p, %I64x, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3182                           length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3183                     TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3184                     if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3185                         TRACE("CcCopyReadEx could not wait\n");
3186 
3187                         IoMarkIrpPending(Irp);
3188                         return STATUS_PENDING;
3189                     }
3190                     TRACE("CcCopyReadEx finished\n");
3191                 } else {
3192                     TRACE("CcCopyRead(%p, %I64x, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3193                     TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3194                     if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3195                         TRACE("CcCopyRead could not wait\n");
3196 
3197                         IoMarkIrpPending(Irp);
3198                         return STATUS_PENDING;
3199                     }
3200                     TRACE("CcCopyRead finished\n");
3201                 }
3202             }
3203         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
3204             Status = _SEH2_GetExceptionCode();
3205         } _SEH2_END;
3206 
3207         if (NT_SUCCESS(Status)) {
3208             Status = Irp->IoStatus.Status;
3209             Irp->IoStatus.Information += addon;
3210             *bytes_read = (ULONG)Irp->IoStatus.Information;
3211         } else
3212             ERR("EXCEPTION - %08x\n", Status);
3213 
3214         return Status;
3215     } else {
3216         NTSTATUS Status;
3217 
3218         if (!wait) {
3219             IoMarkIrpPending(Irp);
3220             return STATUS_PENDING;
3221         }
3222 
3223         if (fcb->ads)
3224             Status = read_stream(fcb, data, start, length, bytes_read);
3225         else
3226             Status = read_file(fcb, data, start, length, bytes_read, Irp);
3227 
3228         *bytes_read += addon;
3229         TRACE("read %u bytes\n", *bytes_read);
3230 
3231         Irp->IoStatus.Information = *bytes_read;
3232 
3233         if (diskacc && Status != STATUS_PENDING) {
3234             PETHREAD thread = NULL;
3235 
3236             if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3237                 thread = Irp->Tail.Overlay.Thread;
3238             else if (!IoIsSystemThread(PsGetCurrentThread()))
3239                 thread = PsGetCurrentThread();
3240             else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
3241                 thread = PsGetCurrentThread();
3242 
3243             if (thread)
3244                 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3245         }
3246 
3247         return Status;
3248     }
3249 }
3250 
3251 _Dispatch_type_(IRP_MJ_READ)
3252 _Function_class_(DRIVER_DISPATCH)
3253 NTSTATUS __stdcall drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
3254     device_extension* Vcb = DeviceObject->DeviceExtension;
3255     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3256     PFILE_OBJECT FileObject = IrpSp->FileObject;
3257     ULONG bytes_read = 0;
3258     NTSTATUS Status;
3259     bool top_level;
3260     fcb* fcb;
3261     ccb* ccb;
3262     bool acquired_fcb_lock = false, wait;
3263 
3264     FsRtlEnterFileSystem();
3265 
3266     top_level = is_top_level(Irp);
3267 
3268     TRACE("read\n");
3269 
3270     if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
3271         Status = vol_read(DeviceObject, Irp);
3272         goto exit2;
3273     } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
3274         Status = STATUS_INVALID_PARAMETER;
3275         goto end;
3276     }
3277 
3278     Irp->IoStatus.Information = 0;
3279 
3280     if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
3281         CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
3282 
3283         Irp->MdlAddress = NULL;
3284         Status = STATUS_SUCCESS;
3285 
3286         goto exit;
3287     }
3288 
3289     fcb = FileObject->FsContext;
3290 
3291     if (!fcb) {
3292         ERR("fcb was NULL\n");
3293         Status = STATUS_INVALID_PARAMETER;
3294         goto exit;
3295     }
3296 
3297     ccb = FileObject->FsContext2;
3298 
3299     if (!ccb) {
3300         ERR("ccb was NULL\n");
3301         Status = STATUS_INVALID_PARAMETER;
3302         goto exit;
3303     }
3304 
3305     if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
3306         WARN("insufficient privileges\n");
3307         Status = STATUS_ACCESS_DENIED;
3308         goto exit;
3309     }
3310 
3311     if (fcb == Vcb->volume_fcb) {
3312         TRACE("reading volume FCB\n");
3313 
3314         IoSkipCurrentIrpStackLocation(Irp);
3315 
3316         Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
3317 
3318         goto exit2;
3319     }
3320 
3321     if (!(Irp->Flags & IRP_PAGING_IO))
3322         FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL);
3323 
3324     wait = IoIsOperationSynchronous(Irp);
3325 
3326     // Don't offload jobs when doing paging IO - otherwise this can lead to
3327     // deadlocks in CcCopyRead.
3328     if (Irp->Flags & IRP_PAGING_IO)
3329         wait = true;
3330 
3331     if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer && FileObject->SectionObjectPointer->DataSectionObject) {
3332         IO_STATUS_BLOCK iosb;
3333 
3334         CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb);
3335         if (!NT_SUCCESS(iosb.Status)) {
3336             ERR("CcFlushCache returned %08x\n", iosb.Status);
3337             return iosb.Status;
3338         }
3339     }
3340 
3341     if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
3342         if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
3343             Status = STATUS_PENDING;
3344             IoMarkIrpPending(Irp);
3345             goto exit;
3346         }
3347 
3348         acquired_fcb_lock = true;
3349     }
3350 
3351     Status = do_read(Irp, wait, &bytes_read);
3352 
3353     if (acquired_fcb_lock)
3354         ExReleaseResourceLite(fcb->Header.Resource);
3355 
3356 exit:
3357     if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO))
3358         FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0);
3359 
3360 end:
3361     Irp->IoStatus.Status = Status;
3362 
3363     TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status);
3364     TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information);
3365     TRACE("returning %08x\n", Status);
3366 
3367     if (Status != STATUS_PENDING)
3368         IoCompleteRequest(Irp, IO_NO_INCREMENT);
3369     else {
3370         if (!add_thread_job(Vcb, Irp))
3371             Status = do_read_job(Irp);
3372     }
3373 
3374 exit2:
3375     if (top_level)
3376         IoSetTopLevelIrp(NULL);
3377 
3378     FsRtlExitFileSystem();
3379 
3380     return Status;
3381 }
3382