xref: /reactos/drivers/filesystems/btrfs/read.c (revision 84ccccab)
1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
20 enum read_data_status {
21     ReadDataStatus_Pending,
22     ReadDataStatus_Success,
23     ReadDataStatus_Error,
24     ReadDataStatus_MissingDevice,
25     ReadDataStatus_Skip
26 };
27 
28 struct read_data_context;
29 
30 typedef struct {
31     struct read_data_context* context;
32     UINT16 stripenum;
33     BOOL rewrite;
34     PIRP Irp;
35     IO_STATUS_BLOCK iosb;
36     enum read_data_status status;
37     PMDL mdl;
38     UINT64 stripestart;
39     UINT64 stripeend;
40 } read_data_stripe;
41 
42 typedef struct {
43     KEVENT Event;
44     NTSTATUS Status;
45     chunk* c;
46     UINT64 address;
47     UINT32 buflen;
48     LONG num_stripes, stripes_left;
49     UINT64 type;
50     UINT32 sector_size;
51     UINT16 firstoff, startoffstripe, sectors_per_stripe;
52     UINT32* csum;
53     BOOL tree;
54     read_data_stripe* stripes;
55     UINT8* va;
56 } read_data_context;
57 
58 extern BOOL diskacc;
59 extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
60 extern tCcCopyReadEx fCcCopyReadEx;
61 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
62 
63 #define LINUX_PAGE_SIZE 4096
64 
65 _Function_class_(IO_COMPLETION_ROUTINE)
66 #ifdef __REACTOS__
67 static NTSTATUS NTAPI read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
68 #else
69 static NTSTATUS read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
70 #endif
71     read_data_stripe* stripe = conptr;
72     read_data_context* context = (read_data_context*)stripe->context;
73 
74     UNUSED(DeviceObject);
75 
76     stripe->iosb = Irp->IoStatus;
77 
78     if (NT_SUCCESS(Irp->IoStatus.Status))
79         stripe->status = ReadDataStatus_Success;
80     else
81         stripe->status = ReadDataStatus_Error;
82 
83     if (InterlockedDecrement(&context->stripes_left) == 0)
84         KeSetEvent(&context->Event, 0, FALSE);
85 
86     return STATUS_MORE_PROCESSING_REQUIRED;
87 }
88 
89 NTSTATUS check_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) {
90     NTSTATUS Status;
91     calc_job* cj;
92     UINT32* csum2;
93 
94     // From experimenting, it seems that 40 sectors is roughly the crossover
95     // point where offloading the crc32 calculation becomes worth it.
96 
97     if (sectors < 40 || KeQueryActiveProcessorCount(NULL) < 2) {
98         ULONG j;
99 
100         for (j = 0; j < sectors; j++) {
101             UINT32 crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
102 
103             if (crc32 != csum[j]) {
104                 return STATUS_CRC_ERROR;
105             }
106         }
107 
108         return STATUS_SUCCESS;
109     }
110 
111     csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sectors, ALLOC_TAG);
112     if (!csum2) {
113         ERR("out of memory\n");
114         return STATUS_INSUFFICIENT_RESOURCES;
115     }
116 
117     Status = add_calc_job(Vcb, data, sectors, csum2, &cj);
118     if (!NT_SUCCESS(Status)) {
119         ERR("add_calc_job returned %08x\n", Status);
120         ExFreePool(csum2);
121         return Status;
122     }
123 
124     KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL);
125 
126     if (RtlCompareMemory(csum2, csum, sectors * sizeof(UINT32)) != sectors * sizeof(UINT32)) {
127         free_calc_job(cj);
128         ExFreePool(csum2);
129         return STATUS_CRC_ERROR;
130     }
131 
132     free_calc_job(cj);
133     ExFreePool(csum2);
134 
135     return STATUS_SUCCESS;
136 }
137 
138 static NTSTATUS read_data_dup(device_extension* Vcb, UINT8* buf, UINT64 addr, read_data_context* context, CHUNK_ITEM* ci,
139                               device** devices, UINT64 generation) {
140     ULONG i;
141     BOOL checksum_error = FALSE;
142     UINT16 j, stripe = 0;
143     NTSTATUS Status;
144     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
145 
146     for (j = 0; j < ci->num_stripes; j++) {
147         if (context->stripes[j].status == ReadDataStatus_Error) {
148             WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
149             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
150             return context->stripes[j].iosb.Status;
151         } else if (context->stripes[j].status == ReadDataStatus_Success) {
152             stripe = j;
153             break;
154         }
155     }
156 
157     if (context->stripes[stripe].status != ReadDataStatus_Success)
158         return STATUS_INTERNAL_ERROR;
159 
160     if (context->tree) {
161         tree_header* th = (tree_header*)buf;
162         UINT32 crc32;
163 
164         crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
165 
166         if (th->address != context->address || crc32 != *((UINT32*)th->csum)) {
167             checksum_error = TRUE;
168             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
169         } else if (generation != 0 && th->generation != generation) {
170             checksum_error = TRUE;
171             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
172         }
173     } else if (context->csum) {
174 #ifdef DEBUG_STATS
175         LARGE_INTEGER time1, time2;
176 
177         time1 = KeQueryPerformanceCounter(NULL);
178 #endif
179         Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
180 
181         if (Status == STATUS_CRC_ERROR) {
182             checksum_error = TRUE;
183             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
184         } else if (!NT_SUCCESS(Status)) {
185             ERR("check_csum returned %08x\n", Status);
186             return Status;
187         }
188 #ifdef DEBUG_STATS
189         time2 = KeQueryPerformanceCounter(NULL);
190 
191         Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
192 #endif
193     }
194 
195     if (!checksum_error)
196         return STATUS_SUCCESS;
197 
198     if (ci->num_stripes == 1)
199         return STATUS_CRC_ERROR;
200 
201     if (context->tree) {
202         tree_header* t2;
203         BOOL recovered = FALSE;
204 
205         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
206         if (!t2) {
207             ERR("out of memory\n");
208             return STATUS_INSUFFICIENT_RESOURCES;
209         }
210 
211         for (j = 0; j < ci->num_stripes; j++) {
212             if (j != stripe && devices[j] && devices[j]->devobj) {
213                 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart, Vcb->superblock.node_size, (UINT8*)t2, FALSE);
214                 if (!NT_SUCCESS(Status)) {
215                     WARN("sync_read_phys returned %08x\n", Status);
216                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
217                 } else {
218                     UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
219 
220                     if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) {
221                         RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
222                         ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
223                         recovered = TRUE;
224 
225                         if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
226                             Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart,
227                                                      t2, Vcb->superblock.node_size);
228                             if (!NT_SUCCESS(Status)) {
229                                 WARN("write_data_phys returned %08x\n", Status);
230                                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
231                             }
232                         }
233 
234                         break;
235                     } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum))
236                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
237                     else
238                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS);
239                 }
240             }
241         }
242 
243         if (!recovered) {
244             ERR("unrecoverable checksum error at %llx\n", addr);
245             ExFreePool(t2);
246             return STATUS_CRC_ERROR;
247         }
248 
249         ExFreePool(t2);
250     } else {
251         ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size;
252         UINT8* sector;
253 
254         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
255         if (!sector) {
256             ERR("out of memory\n");
257             return STATUS_INSUFFICIENT_RESOURCES;
258         }
259 
260         for (i = 0; i < sectors; i++) {
261             UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
262 
263             if (context->csum[i] != crc32) {
264                 BOOL recovered = FALSE;
265 
266                 for (j = 0; j < ci->num_stripes; j++) {
267                     if (j != stripe && devices[j] && devices[j]->devobj) {
268                         Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
269                                                 Vcb->superblock.sector_size, sector, FALSE);
270                         if (!NT_SUCCESS(Status)) {
271                             WARN("sync_read_phys returned %08x\n", Status);
272                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
273                         } else {
274                             UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
275 
276                             if (crc32b == context->csum[i]) {
277                                 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
278                                 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
279                                 recovered = TRUE;
280 
281                                 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
282                                     Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
283                                                              sector, Vcb->superblock.sector_size);
284                                     if (!NT_SUCCESS(Status)) {
285                                         WARN("write_data_phys returned %08x\n", Status);
286                                         log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
287                                     }
288                                 }
289 
290                                 break;
291                             } else
292                                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
293                         }
294                     }
295                 }
296 
297                 if (!recovered) {
298                     ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
299                     ExFreePool(sector);
300                     return STATUS_CRC_ERROR;
301                 }
302             }
303         }
304 
305         ExFreePool(sector);
306     }
307 
308     return STATUS_SUCCESS;
309 }
310 
311 static NTSTATUS read_data_raid0(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context,
312                                 CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) {
313     UINT64 i;
314 
315     for (i = 0; i < ci->num_stripes; i++) {
316         if (context->stripes[i].status == ReadDataStatus_Error) {
317             WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status);
318             log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS);
319             return context->stripes[i].iosb.Status;
320         }
321     }
322 
323     if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
324         tree_header* th = (tree_header*)buf;
325         UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
326 
327         if (crc32 != *((UINT32*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) {
328             UINT64 off;
329             UINT16 stripe;
330 
331             get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe);
332 
333             ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
334 
335             if (crc32 != *((UINT32*)th->csum)) {
336                 WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
337                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
338                 return STATUS_CRC_ERROR;
339             } else if (addr != th->address) {
340                 WARN("address of tree was %llx, not %llx as expected\n", th->address, addr);
341                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
342                 return STATUS_CRC_ERROR;
343             } else if (generation != 0 && generation != th->generation) {
344                 WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation);
345                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
346                 return STATUS_CRC_ERROR;
347             }
348         }
349     } else if (context->csum) {
350         NTSTATUS Status;
351 #ifdef DEBUG_STATS
352         LARGE_INTEGER time1, time2;
353 
354         time1 = KeQueryPerformanceCounter(NULL);
355 #endif
356         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
357 
358         if (Status == STATUS_CRC_ERROR) {
359             for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
360                 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
361 
362                 if (context->csum[i] != crc32) {
363                     UINT64 off;
364                     UINT16 stripe;
365 
366                     get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe);
367 
368                     ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
369 
370                     log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
371 
372                     return Status;
373                 }
374             }
375 
376             return Status;
377         } else if (!NT_SUCCESS(Status)) {
378             ERR("check_csum returned %08x\n", Status);
379             return Status;
380         }
381 #ifdef DEBUG_STATS
382         time2 = KeQueryPerformanceCounter(NULL);
383 
384         Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
385 #endif
386     }
387 
388     return STATUS_SUCCESS;
389 }
390 
391 static NTSTATUS read_data_raid10(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context,
392                                  CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) {
393     UINT64 i;
394     UINT16 j, stripe;
395     NTSTATUS Status;
396     BOOL checksum_error = FALSE;
397     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
398 
399     for (j = 0; j < ci->num_stripes; j++) {
400         if (context->stripes[j].status == ReadDataStatus_Error) {
401             WARN("stripe %llu returned error %08x\n", j, context->stripes[j].iosb.Status);
402             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
403             return context->stripes[j].iosb.Status;
404         } else if (context->stripes[j].status == ReadDataStatus_Success)
405             stripe = j;
406     }
407 
408     if (context->tree) {
409         tree_header* th = (tree_header*)buf;
410         UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
411 
412         if (crc32 != *((UINT32*)th->csum)) {
413             WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
414             checksum_error = TRUE;
415             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
416         } else if (addr != th->address) {
417             WARN("address of tree was %llx, not %llx as expected\n", th->address, addr);
418             checksum_error = TRUE;
419             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
420         } else if (generation != 0 && generation != th->generation) {
421             WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation);
422             checksum_error = TRUE;
423             log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
424         }
425     } else if (context->csum) {
426 #ifdef DEBUG_STATS
427         LARGE_INTEGER time1, time2;
428 
429         time1 = KeQueryPerformanceCounter(NULL);
430 #endif
431         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
432 
433         if (Status == STATUS_CRC_ERROR)
434             checksum_error = TRUE;
435         else if (!NT_SUCCESS(Status)) {
436             ERR("check_csum returned %08x\n", Status);
437             return Status;
438         }
439 #ifdef DEBUG_STATS
440         time2 = KeQueryPerformanceCounter(NULL);
441 
442         Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
443 #endif
444     }
445 
446     if (!checksum_error)
447         return STATUS_SUCCESS;
448 
449     if (context->tree) {
450         tree_header* t2;
451         UINT64 off;
452         UINT16 badsubstripe = 0;
453         BOOL recovered = FALSE;
454 
455         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
456         if (!t2) {
457             ERR("out of memory\n");
458             return STATUS_INSUFFICIENT_RESOURCES;
459         }
460 
461         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe);
462 
463         stripe *= ci->sub_stripes;
464 
465         for (j = 0; j < ci->sub_stripes; j++) {
466             if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
467                 badsubstripe = j;
468                 break;
469             }
470         }
471 
472         for (j = 0; j < ci->sub_stripes; j++) {
473             if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
474                 Status = sync_read_phys(devices[stripe + j]->devobj, cis[stripe + j].offset + off,
475                                         Vcb->superblock.node_size, (UINT8*)t2, FALSE);
476                 if (!NT_SUCCESS(Status)) {
477                     WARN("sync_read_phys returned %08x\n", Status);
478                     log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS);
479                 } else {
480                     UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
481 
482                     if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) {
483                         RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
484                         ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe + j]->devitem.dev_id);
485                         recovered = TRUE;
486 
487                         if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
488                             Status = write_data_phys(devices[stripe + badsubstripe]->devobj, cis[stripe + badsubstripe].offset + off,
489                                                      t2, Vcb->superblock.node_size);
490                             if (!NT_SUCCESS(Status)) {
491                                 WARN("write_data_phys returned %08x\n", Status);
492                                 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
493                             }
494                         }
495 
496                         break;
497                     } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum))
498                         log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
499                     else
500                         log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS);
501                 }
502             }
503         }
504 
505         if (!recovered) {
506             ERR("unrecoverable checksum error at %llx\n", addr);
507             ExFreePool(t2);
508             return STATUS_CRC_ERROR;
509         }
510 
511         ExFreePool(t2);
512     } else {
513         ULONG sectors = length / Vcb->superblock.sector_size;
514         UINT8* sector;
515 
516         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
517         if (!sector) {
518             ERR("out of memory\n");
519             return STATUS_INSUFFICIENT_RESOURCES;
520         }
521 
522         for (i = 0; i < sectors; i++) {
523             UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
524 
525             if (context->csum[i] != crc32) {
526                 UINT64 off;
527                 UINT16 stripe2, badsubstripe = 0;
528                 BOOL recovered = FALSE;
529 
530                 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
531                                  ci->num_stripes / ci->sub_stripes, &off, &stripe2);
532 
533                 stripe2 *= ci->sub_stripes;
534 
535                 for (j = 0; j < ci->sub_stripes; j++) {
536                     if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
537                         badsubstripe = j;
538                         break;
539                     }
540                 }
541 
542                 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
543 
544                 for (j = 0; j < ci->sub_stripes; j++) {
545                     if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
546                         Status = sync_read_phys(devices[stripe2 + j]->devobj, cis[stripe2 + j].offset + off,
547                                                 Vcb->superblock.sector_size, sector, FALSE);
548                         if (!NT_SUCCESS(Status)) {
549                             WARN("sync_read_phys returned %08x\n", Status);
550                             log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS);
551                         } else {
552                             UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
553 
554                             if (crc32b == context->csum[i]) {
555                                 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
556                                 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id);
557                                 recovered = TRUE;
558 
559                                 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
560                                     Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, cis[stripe2 + badsubstripe].offset + off,
561                                                              sector, Vcb->superblock.sector_size);
562                                     if (!NT_SUCCESS(Status)) {
563                                         WARN("write_data_phys returned %08x\n", Status);
564                                         log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
565                                     }
566                                 }
567 
568                                 break;
569                             } else
570                                 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
571                         }
572                     }
573                 }
574 
575                 if (!recovered) {
576                     ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
577                     ExFreePool(sector);
578                     return STATUS_CRC_ERROR;
579                 }
580             }
581         }
582 
583         ExFreePool(sector);
584     }
585 
586     return STATUS_SUCCESS;
587 }
588 
589 static NTSTATUS read_data_raid5(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci,
590                                 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) {
591     ULONG i;
592     NTSTATUS Status;
593     BOOL checksum_error = FALSE;
594     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
595     UINT16 j, stripe;
596     BOOL no_success = TRUE;
597 
598     for (j = 0; j < ci->num_stripes; j++) {
599         if (context->stripes[j].status == ReadDataStatus_Error) {
600             WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
601             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
602             return context->stripes[j].iosb.Status;
603         } else if (context->stripes[j].status == ReadDataStatus_Success) {
604             stripe = j;
605             no_success = FALSE;
606         }
607     }
608 
609     if (c) {    // check partial stripes
610         LIST_ENTRY* le;
611         UINT64 ps_length = (ci->num_stripes - 1) * ci->stripe_length;
612 
613         ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE);
614 
615         le = c->partial_stripes.Flink;
616         while (le != &c->partial_stripes) {
617             partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
618 
619             if (ps->address + ps_length > addr && ps->address < addr + length) {
620                 ULONG runlength, index;
621 
622                 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
623 
624                 while (runlength != 0) {
625                     UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size);
626                     UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size);
627                     UINT64 start = max(runstart, addr);
628                     UINT64 end = min(runend, addr + length);
629 
630                     if (end > start)
631                         RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
632 
633                     runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
634                 }
635             } else if (ps->address >= addr + length)
636                 break;
637 
638             le = le->Flink;
639         }
640 
641         ExReleaseResourceLite(&c->partial_stripes_lock);
642     }
643 
644     if (context->tree) {
645         tree_header* th = (tree_header*)buf;
646         UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
647 
648         if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
649             checksum_error = TRUE;
650             if (!no_success && !degraded)
651                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
652         } else if (generation != 0 && generation != th->generation) {
653             checksum_error = TRUE;
654             if (!no_success && !degraded)
655                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
656         }
657     } else if (context->csum) {
658 #ifdef DEBUG_STATS
659         LARGE_INTEGER time1, time2;
660 
661         time1 = KeQueryPerformanceCounter(NULL);
662 #endif
663         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
664 
665         if (Status == STATUS_CRC_ERROR) {
666             if (!degraded)
667                 WARN("checksum error\n");
668             checksum_error = TRUE;
669         } else if (!NT_SUCCESS(Status)) {
670             ERR("check_csum returned %08x\n", Status);
671             return Status;
672         }
673 
674 #ifdef DEBUG_STATS
675         time2 = KeQueryPerformanceCounter(NULL);
676 
677         Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
678 #endif
679     } else if (degraded)
680         checksum_error = TRUE;
681 
682     if (!checksum_error)
683         return STATUS_SUCCESS;
684 
685     if (context->tree) {
686         UINT16 parity;
687         UINT64 off;
688         BOOL recovered = FALSE, first = TRUE, failed = FALSE;
689         UINT8* t2;
690 
691         t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
692         if (!t2) {
693             ERR("out of memory\n");
694             return STATUS_INSUFFICIENT_RESOURCES;
695         }
696 
697         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe);
698 
699         parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
700 
701         stripe = (parity + stripe + 1) % ci->num_stripes;
702 
703         for (j = 0; j < ci->num_stripes; j++) {
704             if (j != stripe) {
705                 if (devices[j] && devices[j]->devobj) {
706                     if (first) {
707                         Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2, FALSE);
708                         if (!NT_SUCCESS(Status)) {
709                             ERR("sync_read_phys returned %08x\n", Status);
710                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
711                             failed = TRUE;
712                             break;
713                         }
714 
715                         first = FALSE;
716                     } else {
717                         Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, FALSE);
718                         if (!NT_SUCCESS(Status)) {
719                             ERR("sync_read_phys returned %08x\n", Status);
720                             log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
721                             failed = TRUE;
722                             break;
723                         }
724 
725                         do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
726                     }
727                 } else {
728                     failed = TRUE;
729                     break;
730                 }
731             }
732         }
733 
734         if (!failed) {
735             tree_header* t3 = (tree_header*)t2;
736             UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum));
737 
738             if (t3->address == addr && crc32 == *((UINT32*)t3->csum) && (generation == 0 || t3->generation == generation)) {
739                 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
740 
741                 if (!degraded)
742                     ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
743 
744                 recovered = TRUE;
745 
746                 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
747                     Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
748                     if (!NT_SUCCESS(Status)) {
749                         WARN("write_data_phys returned %08x\n", Status);
750                         log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
751                     }
752                 }
753             }
754         }
755 
756         if (!recovered) {
757             ERR("unrecoverable checksum error at %llx\n", addr);
758             ExFreePool(t2);
759             return STATUS_CRC_ERROR;
760         }
761 
762         ExFreePool(t2);
763     } else {
764         ULONG sectors = length / Vcb->superblock.sector_size;
765         UINT8* sector;
766 
767         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
768         if (!sector) {
769             ERR("out of memory\n");
770             return STATUS_INSUFFICIENT_RESOURCES;
771         }
772 
773         for (i = 0; i < sectors; i++) {
774             UINT16 parity;
775             UINT64 off;
776             UINT32 crc32;
777 
778             if (context->csum)
779                 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
780 
781             get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
782                              ci->num_stripes - 1, &off, &stripe);
783 
784             parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
785 
786             stripe = (parity + stripe + 1) % ci->num_stripes;
787 
788             if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) {
789                 BOOL recovered = FALSE, first = TRUE, failed = FALSE;
790 
791                 if (devices[stripe] && devices[stripe]->devobj)
792                     log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS);
793 
794                 for (j = 0; j < ci->num_stripes; j++) {
795                     if (j != stripe) {
796                         if (devices[j] && devices[j]->devobj) {
797                             if (first) {
798                                 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, FALSE);
799                                 if (!NT_SUCCESS(Status)) {
800                                     ERR("sync_read_phys returned %08x\n", Status);
801                                     failed = TRUE;
802                                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
803                                     break;
804                                 }
805 
806                                 first = FALSE;
807                             } else {
808                                 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + Vcb->superblock.sector_size, FALSE);
809                                 if (!NT_SUCCESS(Status)) {
810                                     ERR("sync_read_phys returned %08x\n", Status);
811                                     failed = TRUE;
812                                     log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
813                                     break;
814                                 }
815 
816                                 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
817                             }
818                         } else {
819                             failed = TRUE;
820                             break;
821                         }
822                     }
823                 }
824 
825                 if (!failed) {
826                     if (context->csum)
827                         crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
828 
829                     if (!context->csum || crc32 == context->csum[i]) {
830                         RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
831 
832                         if (!degraded)
833                             ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
834 
835                         recovered = TRUE;
836 
837                         if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
838                             Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off,
839                                                      sector, Vcb->superblock.sector_size);
840                             if (!NT_SUCCESS(Status)) {
841                                 WARN("write_data_phys returned %08x\n", Status);
842                                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
843                             }
844                         }
845                     }
846                 }
847 
848                 if (!recovered) {
849                     ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
850                     ExFreePool(sector);
851                     return STATUS_CRC_ERROR;
852                 }
853             }
854         }
855 
856         ExFreePool(sector);
857     }
858 
859     return STATUS_SUCCESS;
860 }
861 
862 void raid6_recover2(UINT8* sectors, UINT16 num_stripes, ULONG sector_size, UINT16 missing1, UINT16 missing2, UINT8* out) {
863     if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
864         UINT16 missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
865         UINT16 stripe;
866 
867         stripe = num_stripes - 3;
868 
869         if (stripe == missing)
870             RtlZeroMemory(out, sector_size);
871         else
872             RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size);
873 
874         do {
875             stripe--;
876 
877             galois_double(out, sector_size);
878 
879             if (stripe != missing)
880                 do_xor(out, sectors + (stripe * sector_size), sector_size);
881         } while (stripe > 0);
882 
883         do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
884 
885         if (missing != 0)
886             galois_divpower(out, (UINT8)missing, sector_size);
887     } else { // reconstruct from p and q
888         UINT16 x, y, stripe;
889         UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
890         UINT32 j;
891 
892         stripe = num_stripes - 3;
893 
894         pxy = out + sector_size;
895         qxy = out;
896 
897         if (stripe == missing1 || stripe == missing2) {
898             RtlZeroMemory(qxy, sector_size);
899             RtlZeroMemory(pxy, sector_size);
900 
901             if (stripe == missing1)
902                 x = stripe;
903             else
904                 y = stripe;
905         } else {
906             RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size);
907             RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size);
908         }
909 
910         do {
911             stripe--;
912 
913             galois_double(qxy, sector_size);
914 
915             if (stripe != missing1 && stripe != missing2) {
916                 do_xor(qxy, sectors + (stripe * sector_size), sector_size);
917                 do_xor(pxy, sectors + (stripe * sector_size), sector_size);
918             } else if (stripe == missing1)
919                 x = stripe;
920             else if (stripe == missing2)
921                 y = stripe;
922         } while (stripe > 0);
923 
924         gyx = gpow2(y > x ? (y-x) : (255-x+y));
925         gx = gpow2(255-x);
926 
927         denom = gdiv(1, gyx ^ 1);
928         a = gmul(gyx, denom);
929         b = gmul(gx, denom);
930 
931         p = sectors + ((num_stripes - 2) * sector_size);
932         q = sectors + ((num_stripes - 1) * sector_size);
933 
934         for (j = 0; j < sector_size; j++) {
935             *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
936 
937             p++;
938             q++;
939             pxy++;
940             qxy++;
941         }
942 
943         do_xor(out + sector_size, out, sector_size);
944         do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
945     }
946 }
947 
948 static NTSTATUS read_data_raid6(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci,
949                                 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) {
950     NTSTATUS Status;
951     ULONG i;
952     BOOL checksum_error = FALSE;
953     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
954     UINT16 stripe, j;
955     BOOL no_success = TRUE;
956 
957     for (j = 0; j < ci->num_stripes; j++) {
958         if (context->stripes[j].status == ReadDataStatus_Error) {
959             WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
960 
961             if (devices[j])
962                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
963             return context->stripes[j].iosb.Status;
964         } else if (context->stripes[j].status == ReadDataStatus_Success) {
965             stripe = j;
966             no_success = FALSE;
967         }
968     }
969 
970     if (c) {    // check partial stripes
971         LIST_ENTRY* le;
972         UINT64 ps_length = (ci->num_stripes - 2) * ci->stripe_length;
973 
974         ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE);
975 
976         le = c->partial_stripes.Flink;
977         while (le != &c->partial_stripes) {
978             partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
979 
980             if (ps->address + ps_length > addr && ps->address < addr + length) {
981                 ULONG runlength, index;
982 
983                 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
984 
985                 while (runlength != 0) {
986                     UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size);
987                     UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size);
988                     UINT64 start = max(runstart, addr);
989                     UINT64 end = min(runend, addr + length);
990 
991                     if (end > start)
992                         RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
993 
994                     runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
995                 }
996             } else if (ps->address >= addr + length)
997                 break;
998 
999             le = le->Flink;
1000         }
1001 
1002         ExReleaseResourceLite(&c->partial_stripes_lock);
1003     }
1004 
1005     if (context->tree) {
1006         tree_header* th = (tree_header*)buf;
1007         UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1008 
1009         if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
1010             checksum_error = TRUE;
1011             if (!no_success && !degraded && devices[stripe])
1012                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1013         } else if (generation != 0 && generation != th->generation) {
1014             checksum_error = TRUE;
1015             if (!no_success && !degraded && devices[stripe])
1016                 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
1017         }
1018     } else if (context->csum) {
1019 #ifdef DEBUG_STATS
1020         LARGE_INTEGER time1, time2;
1021 
1022         time1 = KeQueryPerformanceCounter(NULL);
1023 #endif
1024         Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
1025 
1026         if (Status == STATUS_CRC_ERROR) {
1027             if (!degraded)
1028                 WARN("checksum error\n");
1029             checksum_error = TRUE;
1030         } else if (!NT_SUCCESS(Status)) {
1031             ERR("check_csum returned %08x\n", Status);
1032             return Status;
1033         }
1034 #ifdef DEBUG_STATS
1035         time2 = KeQueryPerformanceCounter(NULL);
1036 
1037         Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
1038 #endif
1039     } else if (degraded)
1040         checksum_error = TRUE;
1041 
1042     if (!checksum_error)
1043         return STATUS_SUCCESS;
1044 
1045     if (context->tree) {
1046         UINT8* sector;
1047         UINT16 k, physstripe, parity1, parity2, error_stripe;
1048         UINT64 off;
1049         BOOL recovered = FALSE, failed = FALSE;
1050         ULONG num_errors = 0;
1051 
1052         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1053         if (!sector) {
1054             ERR("out of memory\n");
1055             return STATUS_INSUFFICIENT_RESOURCES;
1056         }
1057 
1058         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe);
1059 
1060         parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1061         parity2 = (parity1 + 1) % ci->num_stripes;
1062 
1063         physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1064 
1065         j = (parity2 + 1) % ci->num_stripes;
1066 
1067         for (k = 0; k < ci->num_stripes - 1; k++) {
1068             if (j != physstripe) {
1069                 if (devices[j] && devices[j]->devobj) {
1070                     Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, sector + (k * Vcb->superblock.node_size), FALSE);
1071                     if (!NT_SUCCESS(Status)) {
1072                         ERR("sync_read_phys returned %08x\n", Status);
1073                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1074                         num_errors++;
1075                         error_stripe = k;
1076 
1077                         if (num_errors > 1) {
1078                             failed = TRUE;
1079                             break;
1080                         }
1081                     }
1082                 } else {
1083                     num_errors++;
1084                     error_stripe = k;
1085 
1086                     if (num_errors > 1) {
1087                         failed = TRUE;
1088                         break;
1089                     }
1090                 }
1091             }
1092 
1093             j = (j + 1) % ci->num_stripes;
1094         }
1095 
1096         if (!failed) {
1097             if (num_errors == 0) {
1098                 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1099                 UINT32 crc32;
1100 
1101                 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1102                               Vcb->superblock.node_size);
1103 
1104                 for (j = 0; j < ci->num_stripes - 2; j++) {
1105                     if (j != stripe)
1106                         do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1107                 }
1108 
1109                 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1110 
1111                 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) {
1112                     RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1113 
1114                     if (devices[physstripe] && devices[physstripe]->devobj)
1115                         ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id);
1116 
1117                     recovered = TRUE;
1118 
1119                     if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1120                         Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1121                                                  sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1122                         if (!NT_SUCCESS(Status)) {
1123                             WARN("write_data_phys returned %08x\n", Status);
1124                             log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1125                         }
1126                     }
1127                 }
1128             }
1129 
1130             if (!recovered) {
1131                 UINT32 crc32;
1132                 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1133                 BOOL read_q = FALSE;
1134 
1135                 if (devices[parity2] && devices[parity2]->devobj) {
1136                     Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off,
1137                                             Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), FALSE);
1138                     if (!NT_SUCCESS(Status)) {
1139                         ERR("sync_read_phys returned %08x\n", Status);
1140                         log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1141                     } else
1142                         read_q = TRUE;
1143                 }
1144 
1145                 if (read_q) {
1146                     if (num_errors == 1) {
1147                         raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1148 
1149                         crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1150 
1151                         if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation))
1152                             recovered = TRUE;
1153                     } else {
1154                         for (j = 0; j < ci->num_stripes - 1; j++) {
1155                             if (j != stripe) {
1156                                 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1157 
1158                                 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1159 
1160                                 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) {
1161                                     recovered = TRUE;
1162                                     error_stripe = j;
1163                                     break;
1164                                 }
1165                             }
1166                         }
1167                     }
1168                 }
1169 
1170                 if (recovered) {
1171                     UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1172 
1173                     if (devices[physstripe] && devices[physstripe]->devobj)
1174                         ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id);
1175 
1176                     RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1177 
1178                     if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1179                         Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1180                                                  sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1181                         if (!NT_SUCCESS(Status)) {
1182                             WARN("write_data_phys returned %08x\n", Status);
1183                             log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1184                         }
1185                     }
1186 
1187                     if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1188                         if (error_stripe == ci->num_stripes - 2) {
1189                             ERR("recovering from parity error at %llx, device %llx\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1190 
1191                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1192 
1193                             RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1194 
1195                             for (j = 0; j < ci->num_stripes - 2; j++) {
1196                                 if (j == stripe) {
1197                                     do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1198                                            Vcb->superblock.node_size);
1199                                 } else {
1200                                     do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1201                                             Vcb->superblock.node_size);
1202                                 }
1203                             }
1204                         } else {
1205                             ERR("recovering from checksum error at %llx, device %llx\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1206                                 devices[error_stripe_phys]->devitem.dev_id);
1207 
1208                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1209 
1210                             RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1211                                           sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1212                         }
1213                     }
1214 
1215                     if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1216                         Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off,
1217                                                  sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1218                         if (!NT_SUCCESS(Status)) {
1219                             WARN("write_data_phys returned %08x\n", Status);
1220                             log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1221                         }
1222                     }
1223                 }
1224             }
1225         }
1226 
1227         if (!recovered) {
1228             ERR("unrecoverable checksum error at %llx\n", addr);
1229             ExFreePool(sector);
1230             return STATUS_CRC_ERROR;
1231         }
1232 
1233         ExFreePool(sector);
1234     } else {
1235         ULONG sectors = length / Vcb->superblock.sector_size;
1236         UINT8* sector;
1237 
1238         sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG);
1239         if (!sector) {
1240             ERR("out of memory\n");
1241             return STATUS_INSUFFICIENT_RESOURCES;
1242         }
1243 
1244         for (i = 0; i < sectors; i++) {
1245             UINT64 off;
1246             UINT16 physstripe, parity1, parity2;
1247             UINT32 crc32;
1248 
1249             if (context->csum)
1250                 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1251 
1252             get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
1253                              ci->num_stripes - 2, &off, &stripe);
1254 
1255             parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1256             parity2 = (parity1 + 1) % ci->num_stripes;
1257 
1258             physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1259 
1260             if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) {
1261                 UINT16 k, error_stripe;
1262                 BOOL recovered = FALSE, failed = FALSE;
1263                 ULONG num_errors = 0;
1264 
1265                 if (devices[physstripe] && devices[physstripe]->devobj)
1266                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS);
1267 
1268                 j = (parity2 + 1) % ci->num_stripes;
1269 
1270                 for (k = 0; k < ci->num_stripes - 1; k++) {
1271                     if (j != physstripe) {
1272                         if (devices[j] && devices[j]->devobj) {
1273                             Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + (k * Vcb->superblock.sector_size), FALSE);
1274                             if (!NT_SUCCESS(Status)) {
1275                                 ERR("sync_read_phys returned %08x\n", Status);
1276                                 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1277                                 num_errors++;
1278                                 error_stripe = k;
1279 
1280                                 if (num_errors > 1) {
1281                                     failed = TRUE;
1282                                     break;
1283                                 }
1284                             }
1285                         } else {
1286                             num_errors++;
1287                             error_stripe = k;
1288 
1289                             if (num_errors > 1) {
1290                                 failed = TRUE;
1291                                 break;
1292                             }
1293                         }
1294                     }
1295 
1296                     j = (j + 1) % ci->num_stripes;
1297                 }
1298 
1299                 if (!failed) {
1300                     if (num_errors == 0) {
1301                         RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1302 
1303                         for (j = 0; j < ci->num_stripes - 2; j++) {
1304                             if (j != stripe)
1305                                 do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1306                         }
1307 
1308                         if (context->csum)
1309                             crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1310 
1311                         if (!context->csum || crc32 == context->csum[i]) {
1312                             RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1313 
1314                             if (devices[physstripe] && devices[physstripe]->devobj)
1315                                 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1316                                     devices[physstripe]->devitem.dev_id);
1317 
1318                             recovered = TRUE;
1319 
1320                             if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1321                                 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1322                                                          sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1323                                 if (!NT_SUCCESS(Status)) {
1324                                     WARN("write_data_phys returned %08x\n", Status);
1325                                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1326                                 }
1327                             }
1328                         }
1329                     }
1330 
1331                     if (!recovered) {
1332                         BOOL read_q = FALSE;
1333 
1334                         if (devices[parity2] && devices[parity2]->devobj) {
1335                             Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off,
1336                                                     Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), FALSE);
1337                             if (!NT_SUCCESS(Status)) {
1338                                 ERR("sync_read_phys returned %08x\n", Status);
1339                                 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS);
1340                             } else
1341                                 read_q = TRUE;
1342                         }
1343 
1344                         if (read_q) {
1345                             if (num_errors == 1) {
1346                                 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1347 
1348                                 if (!devices[physstripe] || !devices[physstripe]->devobj)
1349                                     recovered = TRUE;
1350                                 else {
1351                                     crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1352 
1353                                     if (crc32 == context->csum[i])
1354                                         recovered = TRUE;
1355                                 }
1356                             } else {
1357                                 for (j = 0; j < ci->num_stripes - 1; j++) {
1358                                     if (j != stripe) {
1359                                         raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1360 
1361                                         crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1362 
1363                                         if (crc32 == context->csum[i]) {
1364                                             recovered = TRUE;
1365                                             error_stripe = j;
1366                                             break;
1367                                         }
1368                                     }
1369                                 }
1370                             }
1371                         }
1372 
1373                         if (recovered) {
1374                             UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1375 
1376                             if (devices[physstripe] && devices[physstripe]->devobj)
1377                                 ERR("recovering from checksum error at %llx, device %llx\n",
1378                                     addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id);
1379 
1380                             RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1381 
1382                             if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1383                                 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1384                                                          sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1385                                 if (!NT_SUCCESS(Status)) {
1386                                     WARN("write_data_phys returned %08x\n", Status);
1387                                     log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1388                                 }
1389                             }
1390 
1391                             if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1392                                 if (error_stripe == ci->num_stripes - 2) {
1393                                     ERR("recovering from parity error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1394                                         devices[error_stripe_phys]->devitem.dev_id);
1395 
1396                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1397 
1398                                     RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1399 
1400                                     for (j = 0; j < ci->num_stripes - 2; j++) {
1401                                         if (j == stripe) {
1402                                             do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size),
1403                                                    Vcb->superblock.sector_size);
1404                                         } else {
1405                                             do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size),
1406                                                    Vcb->superblock.sector_size);
1407                                         }
1408                                     }
1409                                 } else {
1410                                     ERR("recovering from checksum error at %llx, device %llx\n",
1411                                         addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length),
1412                                         devices[error_stripe_phys]->devitem.dev_id);
1413 
1414                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1415 
1416                                     RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size),
1417                                                   sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1418                                 }
1419                             }
1420 
1421                             if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1422                                 Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off,
1423                                                          sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1424                                 if (!NT_SUCCESS(Status)) {
1425                                     WARN("write_data_phys returned %08x\n", Status);
1426                                     log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1427                                 }
1428                             }
1429                         }
1430                     }
1431                 }
1432 
1433                 if (!recovered) {
1434                     ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
1435                     ExFreePool(sector);
1436                     return STATUS_CRC_ERROR;
1437                 }
1438             }
1439         }
1440 
1441         ExFreePool(sector);
1442     }
1443 
1444     return STATUS_SUCCESS;
1445 }
1446 
1447 NTSTATUS read_data(_In_ device_extension* Vcb, _In_ UINT64 addr, _In_ UINT32 length, _In_reads_bytes_opt_(length*sizeof(UINT32)/Vcb->superblock.sector_size) UINT32* csum,
1448                    _In_ BOOL is_tree, _Out_writes_bytes_(length) UINT8* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ UINT64 generation, _In_ BOOL file_read,
1449                    _In_ ULONG priority) {
1450     CHUNK_ITEM* ci;
1451     CHUNK_ITEM_STRIPE* cis;
1452     read_data_context context;
1453     UINT64 type, offset, total_reading = 0;
1454     NTSTATUS Status;
1455     device** devices = NULL;
1456     UINT16 i, startoffstripe, allowed_missing, missing_devices = 0;
1457     UINT8* dummypage = NULL;
1458     PMDL dummy_mdl = NULL;
1459     BOOL need_to_wait;
1460     UINT64 lockaddr, locklen;
1461 #ifdef DEBUG_STATS
1462     LARGE_INTEGER time1, time2;
1463 #endif
1464 
1465     if (Vcb->log_to_phys_loaded) {
1466         if (!c) {
1467             c = get_chunk_from_address(Vcb, addr);
1468 
1469             if (!c) {
1470                 ERR("get_chunk_from_address failed\n");
1471                 return STATUS_INTERNAL_ERROR;
1472             }
1473         }
1474 
1475         ci = c->chunk_item;
1476         offset = c->offset;
1477         devices = c->devices;
1478 
1479         if (pc)
1480             *pc = c;
1481     } else {
1482         LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1483 
1484         ci = NULL;
1485 
1486         c = NULL;
1487         while (le != &Vcb->sys_chunks) {
1488             sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
1489 
1490             if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1491                 CHUNK_ITEM* chunk_item = sc->data;
1492 
1493                 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1494                     ci = chunk_item;
1495                     offset = sc->key.offset;
1496                     cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1497 
1498                     devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
1499                     if (!devices) {
1500                         ERR("out of memory\n");
1501                         return STATUS_INSUFFICIENT_RESOURCES;
1502                     }
1503 
1504                     for (i = 0; i < ci->num_stripes; i++) {
1505                         devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1506                     }
1507 
1508                     break;
1509                 }
1510             }
1511 
1512             le = le->Flink;
1513         }
1514 
1515         if (!ci) {
1516             ERR("could not find chunk for %llx in bootstrap\n", addr);
1517             return STATUS_INTERNAL_ERROR;
1518         }
1519 
1520         if (pc)
1521             *pc = NULL;
1522     }
1523 
1524     if (ci->type & BLOCK_FLAG_DUPLICATE) {
1525         type = BLOCK_FLAG_DUPLICATE;
1526         allowed_missing = ci->num_stripes - 1;
1527     } else if (ci->type & BLOCK_FLAG_RAID0) {
1528         type = BLOCK_FLAG_RAID0;
1529         allowed_missing = 0;
1530     } else if (ci->type & BLOCK_FLAG_RAID1) {
1531         type = BLOCK_FLAG_DUPLICATE;
1532         allowed_missing = 1;
1533     } else if (ci->type & BLOCK_FLAG_RAID10) {
1534         type = BLOCK_FLAG_RAID10;
1535         allowed_missing = 1;
1536     } else if (ci->type & BLOCK_FLAG_RAID5) {
1537         type = BLOCK_FLAG_RAID5;
1538         allowed_missing = 1;
1539     } else if (ci->type & BLOCK_FLAG_RAID6) {
1540         type = BLOCK_FLAG_RAID6;
1541         allowed_missing = 2;
1542     } else { // SINGLE
1543         type = BLOCK_FLAG_DUPLICATE;
1544         allowed_missing = 0;
1545     }
1546 
1547     cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1548 
1549     RtlZeroMemory(&context, sizeof(read_data_context));
1550     KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
1551 
1552     context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
1553     if (!context.stripes) {
1554         ERR("out of memory\n");
1555         return STATUS_INSUFFICIENT_RESOURCES;
1556     }
1557 
1558     if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1559         get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1560         chunk_lock_range(Vcb, c, lockaddr, locklen);
1561     }
1562 
1563     RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1564 
1565     context.buflen = length;
1566     context.num_stripes = ci->num_stripes;
1567     context.stripes_left = context.num_stripes;
1568     context.sector_size = Vcb->superblock.sector_size;
1569     context.csum = csum;
1570     context.tree = is_tree;
1571     context.type = type;
1572 
1573     if (type == BLOCK_FLAG_RAID0) {
1574         UINT64 startoff, endoff;
1575         UINT16 endoffstripe, stripe;
1576         UINT32 *stripeoff, pos;
1577         PMDL master_mdl;
1578         PFN_NUMBER* pfns;
1579 
1580         // FIXME - test this still works if page size isn't the same as sector size
1581 
1582         // This relies on the fact that MDLs are followed in memory by the page file numbers,
1583         // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1584         // data for you without doing a memcpy yourself.
1585         // MDLs are officially opaque, so this might very well break in future versions of Windows.
1586 
1587         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1588         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1589 
1590         if (file_read) {
1591             // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1592             // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1593             // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1594 
1595             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1596 
1597             if (!context.va) {
1598                 ERR("out of memory\n");
1599                 Status = STATUS_INSUFFICIENT_RESOURCES;
1600                 goto exit;
1601             }
1602         } else
1603             context.va = buf;
1604 
1605         master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1606         if (!master_mdl) {
1607             ERR("out of memory\n");
1608             Status = STATUS_INSUFFICIENT_RESOURCES;
1609             goto exit;
1610         }
1611 
1612         Status = STATUS_SUCCESS;
1613 
1614         _SEH2_TRY {
1615             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1616         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1617             Status = _SEH2_GetExceptionCode();
1618         } _SEH2_END;
1619 
1620         if (!NT_SUCCESS(Status)) {
1621             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1622             IoFreeMdl(master_mdl);
1623             goto exit;
1624         }
1625 
1626         pfns = (PFN_NUMBER*)(master_mdl + 1);
1627 
1628         for (i = 0; i < ci->num_stripes; i++) {
1629             if (startoffstripe > i)
1630                 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1631             else if (startoffstripe == i)
1632                 context.stripes[i].stripestart = startoff;
1633             else
1634                 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1635 
1636             if (endoffstripe > i)
1637                 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1638             else if (endoffstripe == i)
1639                 context.stripes[i].stripeend = endoff + 1;
1640             else
1641                 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1642 
1643             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1644                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL);
1645 
1646                 if (!context.stripes[i].mdl) {
1647                     ERR("IoAllocateMdl failed\n");
1648                     Status = STATUS_INSUFFICIENT_RESOURCES;
1649                     goto exit;
1650                 }
1651             }
1652         }
1653 
1654         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
1655         if (!stripeoff) {
1656             ERR("out of memory\n");
1657             Status = STATUS_INSUFFICIENT_RESOURCES;
1658             goto exit;
1659         }
1660 
1661         RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
1662 
1663         pos = 0;
1664         stripe = startoffstripe;
1665         while (pos < length) {
1666             PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1667 
1668             if (pos == 0) {
1669                 UINT32 readlen = (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1670 
1671                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1672 
1673                 stripeoff[stripe] += readlen;
1674                 pos += readlen;
1675             } else if (length - pos < ci->stripe_length) {
1676                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1677 
1678                 pos = length;
1679             } else {
1680                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1681 
1682                 stripeoff[stripe] += (UINT32)ci->stripe_length;
1683                 pos += (UINT32)ci->stripe_length;
1684             }
1685 
1686             stripe = (stripe + 1) % ci->num_stripes;
1687         }
1688 
1689         MmUnlockPages(master_mdl);
1690         IoFreeMdl(master_mdl);
1691 
1692         ExFreePool(stripeoff);
1693     } else if (type == BLOCK_FLAG_RAID10) {
1694         UINT64 startoff, endoff;
1695         UINT16 endoffstripe, j, stripe;
1696         ULONG orig_ls;
1697         PMDL master_mdl;
1698         PFN_NUMBER* pfns;
1699         UINT32* stripeoff, pos;
1700         read_data_stripe** stripes;
1701 
1702         if (c)
1703             orig_ls = c->last_stripe;
1704         else
1705             orig_ls = 0;
1706 
1707         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1708         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1709 
1710         if ((ci->num_stripes % ci->sub_stripes) != 0) {
1711             ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1712             Status = STATUS_INTERNAL_ERROR;
1713             goto exit;
1714         }
1715 
1716         if (file_read) {
1717             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1718 
1719             if (!context.va) {
1720                 ERR("out of memory\n");
1721                 Status = STATUS_INSUFFICIENT_RESOURCES;
1722                 goto exit;
1723             }
1724         } else
1725             context.va = buf;
1726 
1727         context.firstoff = (UINT16)((startoff % ci->stripe_length) / Vcb->superblock.sector_size);
1728         context.startoffstripe = startoffstripe;
1729         context.sectors_per_stripe = (UINT16)(ci->stripe_length / Vcb->superblock.sector_size);
1730 
1731         startoffstripe *= ci->sub_stripes;
1732         endoffstripe *= ci->sub_stripes;
1733 
1734         if (c)
1735             c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1736 
1737         master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1738         if (!master_mdl) {
1739             ERR("out of memory\n");
1740             Status = STATUS_INSUFFICIENT_RESOURCES;
1741             goto exit;
1742         }
1743 
1744         Status = STATUS_SUCCESS;
1745 
1746         _SEH2_TRY {
1747             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1748         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1749             Status = _SEH2_GetExceptionCode();
1750         } _SEH2_END;
1751 
1752         if (!NT_SUCCESS(Status)) {
1753             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1754             IoFreeMdl(master_mdl);
1755             goto exit;
1756         }
1757 
1758         pfns = (PFN_NUMBER*)(master_mdl + 1);
1759 
1760         stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1761         if (!stripes) {
1762             ERR("out of memory\n");
1763             Status = STATUS_INSUFFICIENT_RESOURCES;
1764             goto exit;
1765         }
1766 
1767         RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1768 
1769         for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1770             UINT64 sstart, send;
1771             BOOL stripeset = FALSE;
1772 
1773             if (startoffstripe > i)
1774                 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1775             else if (startoffstripe == i)
1776                 sstart = startoff;
1777             else
1778                 sstart = startoff - (startoff % ci->stripe_length);
1779 
1780             if (endoffstripe > i)
1781                 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1782             else if (endoffstripe == i)
1783                 send = endoff + 1;
1784             else
1785                 send = endoff - (endoff % ci->stripe_length);
1786 
1787             for (j = 0; j < ci->sub_stripes; j++) {
1788                 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1789                     context.stripes[i+j].stripestart = sstart;
1790                     context.stripes[i+j].stripeend = send;
1791                     stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1792 
1793                     if (sstart != send) {
1794                         context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL);
1795 
1796                         if (!context.stripes[i+j].mdl) {
1797                             ERR("IoAllocateMdl failed\n");
1798                             Status = STATUS_INSUFFICIENT_RESOURCES;
1799                             goto exit;
1800                         }
1801                     }
1802 
1803                     stripeset = TRUE;
1804                 } else
1805                     context.stripes[i+j].status = ReadDataStatus_Skip;
1806             }
1807 
1808             if (!stripeset) {
1809                 for (j = 0; j < ci->sub_stripes; j++) {
1810                     if (devices[i+j] && devices[i+j]->devobj) {
1811                         context.stripes[i+j].stripestart = sstart;
1812                         context.stripes[i+j].stripeend = send;
1813                         context.stripes[i+j].status = ReadDataStatus_Pending;
1814                         stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1815 
1816                         if (sstart != send) {
1817                             context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL);
1818 
1819                             if (!context.stripes[i+j].mdl) {
1820                                 ERR("IoAllocateMdl failed\n");
1821                                 Status = STATUS_INSUFFICIENT_RESOURCES;
1822                                 goto exit;
1823                             }
1824                         }
1825 
1826                         stripeset = TRUE;
1827                         break;
1828                     }
1829                 }
1830 
1831                 if (!stripeset) {
1832                     ERR("could not find stripe to read\n");
1833                     Status = STATUS_DEVICE_NOT_READY;
1834                     goto exit;
1835                 }
1836             }
1837         }
1838 
1839         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1840         if (!stripeoff) {
1841             ERR("out of memory\n");
1842             Status = STATUS_INSUFFICIENT_RESOURCES;
1843             goto exit;
1844         }
1845 
1846         RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes);
1847 
1848         pos = 0;
1849         stripe = startoffstripe / ci->sub_stripes;
1850         while (pos < length) {
1851             PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1852 
1853             if (pos == 0) {
1854                 UINT32 readlen = (UINT32)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1855                                              ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1856 
1857                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1858 
1859                 stripeoff[stripe] += readlen;
1860                 pos += readlen;
1861             } else if (length - pos < ci->stripe_length) {
1862                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1863 
1864                 pos = length;
1865             } else {
1866                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1867 
1868                 stripeoff[stripe] += (ULONG)ci->stripe_length;
1869                 pos += (ULONG)ci->stripe_length;
1870             }
1871 
1872             stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1873         }
1874 
1875         MmUnlockPages(master_mdl);
1876         IoFreeMdl(master_mdl);
1877 
1878         ExFreePool(stripeoff);
1879         ExFreePool(stripes);
1880     } else if (type == BLOCK_FLAG_DUPLICATE) {
1881         UINT64 orig_ls;
1882 
1883         if (c)
1884             orig_ls = i = c->last_stripe;
1885         else
1886             orig_ls = i = 0;
1887 
1888         while (!devices[i] || !devices[i]->devobj) {
1889             i = (i + 1) % ci->num_stripes;
1890 
1891             if (i == orig_ls) {
1892                 ERR("no devices available to service request\n");
1893                 Status = STATUS_DEVICE_NOT_READY;
1894                 goto exit;
1895             }
1896         }
1897 
1898         if (c)
1899             c->last_stripe = (i + 1) % ci->num_stripes;
1900 
1901         context.stripes[i].stripestart = addr - offset;
1902         context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1903 
1904         if (file_read) {
1905             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1906 
1907             if (!context.va) {
1908                 ERR("out of memory\n");
1909                 Status = STATUS_INSUFFICIENT_RESOURCES;
1910                 goto exit;
1911             }
1912 
1913             context.stripes[i].mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1914             if (!context.stripes[i].mdl) {
1915                 ERR("IoAllocateMdl failed\n");
1916                 Status = STATUS_INSUFFICIENT_RESOURCES;
1917                 goto exit;
1918             }
1919 
1920             MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
1921         } else {
1922             context.stripes[i].mdl = IoAllocateMdl(buf, length, FALSE, FALSE, NULL);
1923 
1924             if (!context.stripes[i].mdl) {
1925                 ERR("IoAllocateMdl failed\n");
1926                 Status = STATUS_INSUFFICIENT_RESOURCES;
1927                 goto exit;
1928             }
1929 
1930             Status = STATUS_SUCCESS;
1931 
1932             _SEH2_TRY {
1933                 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess);
1934             } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1935                 Status = _SEH2_GetExceptionCode();
1936             } _SEH2_END;
1937 
1938             if (!NT_SUCCESS(Status)) {
1939                 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1940                 goto exit;
1941             }
1942         }
1943     } else if (type == BLOCK_FLAG_RAID5) {
1944         UINT64 startoff, endoff;
1945         UINT16 endoffstripe, parity;
1946         UINT32 *stripeoff, pos;
1947         PMDL master_mdl;
1948         PFN_NUMBER *pfns, dummy;
1949         BOOL need_dummy = FALSE;
1950 
1951         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
1952         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
1953 
1954         if (file_read) {
1955             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1956 
1957             if (!context.va) {
1958                 ERR("out of memory\n");
1959                 Status = STATUS_INSUFFICIENT_RESOURCES;
1960                 goto exit;
1961             }
1962         } else
1963             context.va = buf;
1964 
1965         master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1966         if (!master_mdl) {
1967             ERR("out of memory\n");
1968             Status = STATUS_INSUFFICIENT_RESOURCES;
1969             goto exit;
1970         }
1971 
1972         Status = STATUS_SUCCESS;
1973 
1974         _SEH2_TRY {
1975             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1976         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1977             Status = _SEH2_GetExceptionCode();
1978         } _SEH2_END;
1979 
1980         if (!NT_SUCCESS(Status)) {
1981             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1982             IoFreeMdl(master_mdl);
1983             goto exit;
1984         }
1985 
1986         pfns = (PFN_NUMBER*)(master_mdl + 1);
1987 
1988         pos = 0;
1989         while (pos < length) {
1990             parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
1991 
1992             if (pos == 0) {
1993                 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes;
1994                 ULONG skip, readlen;
1995 
1996                 i = startoffstripe;
1997                 while (stripe != parity) {
1998                     if (i == startoffstripe) {
1999                         readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2000 
2001                         context.stripes[stripe].stripestart = startoff;
2002                         context.stripes[stripe].stripeend = startoff + readlen;
2003 
2004                         pos += readlen;
2005 
2006                         if (pos == length)
2007                             break;
2008                     } else {
2009                         readlen = min(length - pos, (ULONG)ci->stripe_length);
2010 
2011                         context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2012                         context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2013 
2014                         pos += readlen;
2015 
2016                         if (pos == length)
2017                             break;
2018                     }
2019 
2020                     i++;
2021                     stripe = (stripe + 1) % ci->num_stripes;
2022                 }
2023 
2024                 if (pos == length)
2025                     break;
2026 
2027                 for (i = 0; i < startoffstripe; i++) {
2028                     UINT16 stripe2 = (parity + i + 1) % ci->num_stripes;
2029 
2030                     context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2031                 }
2032 
2033                 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2034 
2035                 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2036                     skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2037 
2038                     for (i = 0; i < ci->num_stripes; i++) {
2039                         context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2040                     }
2041 
2042                     pos += (UINT32)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2043                     need_dummy = TRUE;
2044                 }
2045             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2046                 for (i = 0; i < ci->num_stripes; i++) {
2047                     context.stripes[i].stripeend += ci->stripe_length;
2048                 }
2049 
2050                 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 1));
2051                 need_dummy = TRUE;
2052             } else {
2053                 UINT16 stripe = (parity + 1) % ci->num_stripes;
2054 
2055                 i = 0;
2056                 while (stripe != parity) {
2057                     if (endoffstripe == i) {
2058                         context.stripes[stripe].stripeend = endoff + 1;
2059                         break;
2060                     } else if (endoffstripe > i)
2061                         context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2062 
2063                     i++;
2064                     stripe = (stripe + 1) % ci->num_stripes;
2065                 }
2066 
2067                 break;
2068             }
2069         }
2070 
2071         for (i = 0; i < ci->num_stripes; i++) {
2072             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2073                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2074                                                        FALSE, FALSE, NULL);
2075 
2076                 if (!context.stripes[i].mdl) {
2077                     ERR("IoAllocateMdl failed\n");
2078                     Status = STATUS_INSUFFICIENT_RESOURCES;
2079                     goto exit;
2080                 }
2081             }
2082         }
2083 
2084         if (need_dummy) {
2085             dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2086             if (!dummypage) {
2087                 ERR("out of memory\n");
2088                 Status = STATUS_INSUFFICIENT_RESOURCES;
2089                 goto exit;
2090             }
2091 
2092             dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL);
2093             if (!dummy_mdl) {
2094                 ERR("IoAllocateMdl failed\n");
2095                 Status = STATUS_INSUFFICIENT_RESOURCES;
2096                 ExFreePool(dummypage);
2097                 goto exit;
2098             }
2099 
2100             MmBuildMdlForNonPagedPool(dummy_mdl);
2101 
2102             dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2103         }
2104 
2105         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
2106         if (!stripeoff) {
2107             ERR("out of memory\n");
2108             Status = STATUS_INSUFFICIENT_RESOURCES;
2109             goto exit;
2110         }
2111 
2112         RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
2113 
2114         pos = 0;
2115 
2116         while (pos < length) {
2117             PFN_NUMBER* stripe_pfns;
2118 
2119             parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2120 
2121             if (pos == 0) {
2122                 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2123                 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2124                                                        ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2125 
2126                 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2127 
2128                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2129 
2130                 stripeoff[stripe] = readlen;
2131                 pos += readlen;
2132 
2133                 stripe = (stripe + 1) % ci->num_stripes;
2134 
2135                 while (stripe != parity) {
2136                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2137                     readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2138 
2139                     if (readlen == 0)
2140                         break;
2141 
2142                     RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2143 
2144                     stripeoff[stripe] = readlen;
2145                     pos += readlen;
2146 
2147                     stripe = (stripe + 1) % ci->num_stripes;
2148                 }
2149             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2150                 UINT16 stripe = (parity + 1) % ci->num_stripes;
2151                 ULONG k;
2152 
2153                 while (stripe != parity) {
2154                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2155 
2156                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2157 
2158                     stripeoff[stripe] += (UINT32)ci->stripe_length;
2159                     pos += (UINT32)ci->stripe_length;
2160 
2161                     stripe = (stripe + 1) % ci->num_stripes;
2162                 }
2163 
2164                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2165 
2166                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2167                     stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2168                     stripeoff[parity] += PAGE_SIZE;
2169                 }
2170             } else {
2171                 UINT16 stripe = (parity + 1) % ci->num_stripes;
2172                 UINT32 readlen;
2173 
2174                 while (pos < length) {
2175                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2176                     readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2177 
2178                     if (readlen == 0)
2179                         break;
2180 
2181                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2182 
2183                     stripeoff[stripe] += readlen;
2184                     pos += readlen;
2185 
2186                     stripe = (stripe + 1) % ci->num_stripes;
2187                 }
2188             }
2189         }
2190 
2191         MmUnlockPages(master_mdl);
2192         IoFreeMdl(master_mdl);
2193 
2194         ExFreePool(stripeoff);
2195     } else if (type == BLOCK_FLAG_RAID6) {
2196         UINT64 startoff, endoff;
2197         UINT16 endoffstripe, parity1;
2198         UINT32 *stripeoff, pos;
2199         PMDL master_mdl;
2200         PFN_NUMBER *pfns, dummy;
2201         BOOL need_dummy = FALSE;
2202 
2203         get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2204         get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2205 
2206         if (file_read) {
2207             context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2208 
2209             if (!context.va) {
2210                 ERR("out of memory\n");
2211                 Status = STATUS_INSUFFICIENT_RESOURCES;
2212                 goto exit;
2213             }
2214         } else
2215             context.va = buf;
2216 
2217         master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
2218         if (!master_mdl) {
2219             ERR("out of memory\n");
2220             Status = STATUS_INSUFFICIENT_RESOURCES;
2221             goto exit;
2222         }
2223 
2224         Status = STATUS_SUCCESS;
2225 
2226         _SEH2_TRY {
2227             MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
2228         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2229             Status = _SEH2_GetExceptionCode();
2230         } _SEH2_END;
2231 
2232         if (!NT_SUCCESS(Status)) {
2233             ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2234             IoFreeMdl(master_mdl);
2235             goto exit;
2236         }
2237 
2238         pfns = (PFN_NUMBER*)(master_mdl + 1);
2239 
2240         pos = 0;
2241         while (pos < length) {
2242             parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2243 
2244             if (pos == 0) {
2245                 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2246                 ULONG skip, readlen;
2247 
2248                 i = startoffstripe;
2249                 while (stripe != parity1) {
2250                     if (i == startoffstripe) {
2251                         readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2252 
2253                         context.stripes[stripe].stripestart = startoff;
2254                         context.stripes[stripe].stripeend = startoff + readlen;
2255 
2256                         pos += readlen;
2257 
2258                         if (pos == length)
2259                             break;
2260                     } else {
2261                         readlen = min(length - pos, (ULONG)ci->stripe_length);
2262 
2263                         context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2264                         context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2265 
2266                         pos += readlen;
2267 
2268                         if (pos == length)
2269                             break;
2270                     }
2271 
2272                     i++;
2273                     stripe = (stripe + 1) % ci->num_stripes;
2274                 }
2275 
2276                 if (pos == length)
2277                     break;
2278 
2279                 for (i = 0; i < startoffstripe; i++) {
2280                     UINT16 stripe2 = (parity1 + i + 2) % ci->num_stripes;
2281 
2282                     context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2283                 }
2284 
2285                 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2286 
2287                 parity2 = (parity1 + 1) % ci->num_stripes;
2288                 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2289 
2290                 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2291                     skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2292 
2293                     for (i = 0; i < ci->num_stripes; i++) {
2294                         context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2295                     }
2296 
2297                     pos += (UINT32)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2298                     need_dummy = TRUE;
2299                 }
2300             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2301                 for (i = 0; i < ci->num_stripes; i++) {
2302                     context.stripes[i].stripeend += ci->stripe_length;
2303                 }
2304 
2305                 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 2));
2306                 need_dummy = TRUE;
2307             } else {
2308                 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2309 
2310                 i = 0;
2311                 while (stripe != parity1) {
2312                     if (endoffstripe == i) {
2313                         context.stripes[stripe].stripeend = endoff + 1;
2314                         break;
2315                     } else if (endoffstripe > i)
2316                         context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2317 
2318                     i++;
2319                     stripe = (stripe + 1) % ci->num_stripes;
2320                 }
2321 
2322                 break;
2323             }
2324         }
2325 
2326         for (i = 0; i < ci->num_stripes; i++) {
2327             if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2328                 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL);
2329 
2330                 if (!context.stripes[i].mdl) {
2331                     ERR("IoAllocateMdl failed\n");
2332                     Status = STATUS_INSUFFICIENT_RESOURCES;
2333                     goto exit;
2334                 }
2335             }
2336         }
2337 
2338         if (need_dummy) {
2339             dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2340             if (!dummypage) {
2341                 ERR("out of memory\n");
2342                 Status = STATUS_INSUFFICIENT_RESOURCES;
2343                 goto exit;
2344             }
2345 
2346             dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL);
2347             if (!dummy_mdl) {
2348                 ERR("IoAllocateMdl failed\n");
2349                 Status = STATUS_INSUFFICIENT_RESOURCES;
2350                 ExFreePool(dummypage);
2351                 goto exit;
2352             }
2353 
2354             MmBuildMdlForNonPagedPool(dummy_mdl);
2355 
2356             dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2357         }
2358 
2359         stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
2360         if (!stripeoff) {
2361             ERR("out of memory\n");
2362             Status = STATUS_INSUFFICIENT_RESOURCES;
2363             goto exit;
2364         }
2365 
2366         RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
2367 
2368         pos = 0;
2369 
2370         while (pos < length) {
2371             PFN_NUMBER* stripe_pfns;
2372 
2373             parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2374 
2375             if (pos == 0) {
2376                 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2377                 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2378                                                        ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2379 
2380                 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2381 
2382                 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2383 
2384                 stripeoff[stripe] = readlen;
2385                 pos += readlen;
2386 
2387                 stripe = (stripe + 1) % ci->num_stripes;
2388 
2389                 while (stripe != parity1) {
2390                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2391                     readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2392 
2393                     if (readlen == 0)
2394                         break;
2395 
2396                     RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2397 
2398                     stripeoff[stripe] = readlen;
2399                     pos += readlen;
2400 
2401                     stripe = (stripe + 1) % ci->num_stripes;
2402                 }
2403             } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2404                 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2405                 UINT16 parity2 = (parity1 + 1) % ci->num_stripes;
2406                 ULONG k;
2407 
2408                 while (stripe != parity1) {
2409                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2410 
2411                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2412 
2413                     stripeoff[stripe] += (UINT32)ci->stripe_length;
2414                     pos += (UINT32)ci->stripe_length;
2415 
2416                     stripe = (stripe + 1) % ci->num_stripes;
2417                 }
2418 
2419                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2420 
2421                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2422                     stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2423                     stripeoff[parity1] += PAGE_SIZE;
2424                 }
2425 
2426                 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2427 
2428                 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2429                     stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2430                     stripeoff[parity2] += PAGE_SIZE;
2431                 }
2432             } else {
2433                 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2434                 UINT32 readlen;
2435 
2436                 while (pos < length) {
2437                     stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2438                     readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2439 
2440                     if (readlen == 0)
2441                         break;
2442 
2443                     RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2444 
2445                     stripeoff[stripe] += readlen;
2446                     pos += readlen;
2447 
2448                     stripe = (stripe + 1) % ci->num_stripes;
2449                 }
2450             }
2451         }
2452 
2453         MmUnlockPages(master_mdl);
2454         IoFreeMdl(master_mdl);
2455 
2456         ExFreePool(stripeoff);
2457     }
2458 
2459     context.address = addr;
2460 
2461     for (i = 0; i < ci->num_stripes; i++) {
2462         if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2463             context.stripes[i].status = ReadDataStatus_MissingDevice;
2464             context.stripes_left--;
2465 
2466             if (!devices[i] || !devices[i]->devobj)
2467                 missing_devices++;
2468         }
2469     }
2470 
2471     if (missing_devices > allowed_missing) {
2472         ERR("not enough devices to service request (%u missing)\n", missing_devices);
2473         Status = STATUS_UNEXPECTED_IO_ERROR;
2474         goto exit;
2475     }
2476 
2477     for (i = 0; i < ci->num_stripes; i++) {
2478         PIO_STACK_LOCATION IrpSp;
2479 
2480         if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2481             context.stripes[i].context = (struct read_data_context*)&context;
2482 
2483             if (type == BLOCK_FLAG_RAID10) {
2484                 context.stripes[i].stripenum = i / ci->sub_stripes;
2485             }
2486 
2487             if (!Irp) {
2488                 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
2489 
2490                 if (!context.stripes[i].Irp) {
2491                     ERR("IoAllocateIrp failed\n");
2492                     Status = STATUS_INSUFFICIENT_RESOURCES;
2493                     goto exit;
2494                 }
2495             } else {
2496                 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2497 
2498                 if (!context.stripes[i].Irp) {
2499                     ERR("IoMakeAssociatedIrp failed\n");
2500                     Status = STATUS_INSUFFICIENT_RESOURCES;
2501                     goto exit;
2502                 }
2503             }
2504 
2505             IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2506             IrpSp->MajorFunction = IRP_MJ_READ;
2507 
2508             if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2509                 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2510                 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2511                     ERR("out of memory\n");
2512                     Status = STATUS_INSUFFICIENT_RESOURCES;
2513                     goto exit;
2514                 }
2515 
2516                 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
2517 
2518                 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2519             } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2520                 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2521             else
2522                 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2523 
2524             IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2525             IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2526 
2527             total_reading += IrpSp->Parameters.Read.Length;
2528 
2529             context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2530 
2531             IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], TRUE, TRUE, TRUE);
2532 
2533             context.stripes[i].status = ReadDataStatus_Pending;
2534         }
2535     }
2536 
2537 #ifdef DEBUG_STATS
2538     if (!is_tree)
2539         time1 = KeQueryPerformanceCounter(NULL);
2540 #endif
2541 
2542     need_to_wait = FALSE;
2543     for (i = 0; i < ci->num_stripes; i++) {
2544         if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2545             IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2546             need_to_wait = TRUE;
2547         }
2548     }
2549 
2550     if (need_to_wait)
2551         KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
2552 
2553 #ifdef DEBUG_STATS
2554     if (!is_tree) {
2555         time2 = KeQueryPerformanceCounter(NULL);
2556 
2557         Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
2558     }
2559 #endif
2560 
2561     if (diskacc)
2562         fFsRtlUpdateDiskCounters(total_reading, 0);
2563 
2564     // check if any of the devices return a "user-induced" error
2565 
2566     for (i = 0; i < ci->num_stripes; i++) {
2567         if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2568             Status = context.stripes[i].iosb.Status;
2569             goto exit;
2570         }
2571     }
2572 
2573     if (type == BLOCK_FLAG_RAID0) {
2574         Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2575         if (!NT_SUCCESS(Status)) {
2576             ERR("read_data_raid0 returned %08x\n", Status);
2577 
2578             if (file_read)
2579                 ExFreePool(context.va);
2580 
2581             goto exit;
2582         }
2583 
2584         if (file_read) {
2585             RtlCopyMemory(buf, context.va, length);
2586             ExFreePool(context.va);
2587         }
2588     } else if (type == BLOCK_FLAG_RAID10) {
2589         Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2590 
2591         if (!NT_SUCCESS(Status)) {
2592             ERR("read_data_raid10 returned %08x\n", Status);
2593 
2594             if (file_read)
2595                 ExFreePool(context.va);
2596 
2597             goto exit;
2598         }
2599 
2600         if (file_read) {
2601             RtlCopyMemory(buf, context.va, length);
2602             ExFreePool(context.va);
2603         }
2604     } else if (type == BLOCK_FLAG_DUPLICATE) {
2605         Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2606         if (!NT_SUCCESS(Status)) {
2607             ERR("read_data_dup returned %08x\n", Status);
2608 
2609             if (file_read)
2610                 ExFreePool(context.va);
2611 
2612             goto exit;
2613         }
2614 
2615         if (file_read) {
2616             RtlCopyMemory(buf, context.va, length);
2617             ExFreePool(context.va);
2618         }
2619     } else if (type == BLOCK_FLAG_RAID5) {
2620         Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE);
2621         if (!NT_SUCCESS(Status)) {
2622             ERR("read_data_raid5 returned %08x\n", Status);
2623 
2624             if (file_read)
2625                 ExFreePool(context.va);
2626 
2627             goto exit;
2628         }
2629 
2630         if (file_read) {
2631             RtlCopyMemory(buf, context.va, length);
2632             ExFreePool(context.va);
2633         }
2634     } else if (type == BLOCK_FLAG_RAID6) {
2635         Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE);
2636         if (!NT_SUCCESS(Status)) {
2637             ERR("read_data_raid6 returned %08x\n", Status);
2638 
2639             if (file_read)
2640                 ExFreePool(context.va);
2641 
2642             goto exit;
2643         }
2644 
2645         if (file_read) {
2646             RtlCopyMemory(buf, context.va, length);
2647             ExFreePool(context.va);
2648         }
2649     }
2650 
2651 exit:
2652     if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2653         chunk_unlock_range(Vcb, c, lockaddr, locklen);
2654 
2655     if (dummy_mdl)
2656         IoFreeMdl(dummy_mdl);
2657 
2658     if (dummypage)
2659         ExFreePool(dummypage);
2660 
2661     for (i = 0; i < ci->num_stripes; i++) {
2662         if (context.stripes[i].mdl) {
2663             if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2664                 MmUnlockPages(context.stripes[i].mdl);
2665 
2666             IoFreeMdl(context.stripes[i].mdl);
2667         }
2668 
2669         if (context.stripes[i].Irp)
2670             IoFreeIrp(context.stripes[i].Irp);
2671     }
2672 
2673     ExFreePool(context.stripes);
2674 
2675     if (!Vcb->log_to_phys_loaded)
2676         ExFreePool(devices);
2677 
2678     return Status;
2679 }
2680 
2681 NTSTATUS read_stream(fcb* fcb, UINT8* data, UINT64 start, ULONG length, ULONG* pbr) {
2682     ULONG readlen;
2683 
2684     TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
2685 
2686     if (pbr) *pbr = 0;
2687 
2688     if (start >= fcb->adsdata.Length) {
2689         TRACE("tried to read beyond end of stream\n");
2690         return STATUS_END_OF_FILE;
2691     }
2692 
2693     if (length == 0) {
2694         WARN("tried to read zero bytes\n");
2695         return STATUS_SUCCESS;
2696     }
2697 
2698     if (start + length < fcb->adsdata.Length)
2699         readlen = length;
2700     else
2701         readlen = fcb->adsdata.Length - (ULONG)start;
2702 
2703     if (readlen > 0)
2704         RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen);
2705 
2706     if (pbr) *pbr = readlen;
2707 
2708     return STATUS_SUCCESS;
2709 }
2710 
2711 NTSTATUS read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp) {
2712     NTSTATUS Status;
2713     EXTENT_DATA* ed;
2714     UINT32 bytes_read = 0;
2715     UINT64 last_end;
2716     LIST_ENTRY* le;
2717 #ifdef DEBUG_STATS
2718     LARGE_INTEGER time1, time2;
2719 #endif
2720 
2721     TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
2722 
2723     if (pbr)
2724         *pbr = 0;
2725 
2726     if (start >= fcb->inode_item.st_size) {
2727         WARN("Tried to read beyond end of file\n");
2728         Status = STATUS_END_OF_FILE;
2729         goto exit;
2730     }
2731 
2732 #ifdef DEBUG_STATS
2733     time1 = KeQueryPerformanceCounter(NULL);
2734 #endif
2735 
2736     le = fcb->extents.Flink;
2737 
2738     last_end = start;
2739 
2740     while (le != &fcb->extents) {
2741         UINT64 len;
2742         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2743         EXTENT_DATA2* ed2;
2744 
2745         if (!ext->ignore) {
2746             ed = &ext->extent_data;
2747 
2748             ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL;
2749 
2750             len = ed2 ? ed2->num_bytes : ed->decoded_size;
2751 
2752             if (ext->offset + len <= start) {
2753                 last_end = ext->offset + len;
2754                 goto nextitem;
2755             }
2756 
2757             if (ext->offset > last_end && ext->offset > start + bytes_read) {
2758                 UINT32 read = (UINT32)min(length, ext->offset - max(start, last_end));
2759 
2760                 RtlZeroMemory(data + bytes_read, read);
2761                 bytes_read += read;
2762                 length -= read;
2763             }
2764 
2765             if (length == 0 || ext->offset > start + bytes_read + length)
2766                 break;
2767 
2768             if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
2769                 WARN("Encryption not supported\n");
2770                 Status = STATUS_NOT_IMPLEMENTED;
2771                 goto exit;
2772             }
2773 
2774             if (ed->encoding != BTRFS_ENCODING_NONE) {
2775                 WARN("Other encodings not supported\n");
2776                 Status = STATUS_NOT_IMPLEMENTED;
2777                 goto exit;
2778             }
2779 
2780             switch (ed->type) {
2781                 case EXTENT_TYPE_INLINE:
2782                 {
2783                     UINT64 off = start + bytes_read - ext->offset;
2784                     UINT32 read;
2785 
2786                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
2787                         read = (UINT32)min(min(len, ext->datalen) - off, length);
2788 
2789                         RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2790                     } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO) {
2791                         UINT8* decomp;
2792                         BOOL decomp_alloc;
2793                         UINT16 inlen = ext->datalen - (UINT16)offsetof(EXTENT_DATA, data[0]);
2794 
2795                         if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2796                             ERR("ed->decoded_size was invalid (%llx)\n", ed->decoded_size);
2797                             Status = STATUS_INTERNAL_ERROR;
2798                             goto exit;
2799                         }
2800 
2801                         read = (UINT32)min(ed->decoded_size - off, length);
2802 
2803                         if (off > 0) {
2804                             decomp = ExAllocatePoolWithTag(NonPagedPool, (UINT32)ed->decoded_size, ALLOC_TAG);
2805                             if (!decomp) {
2806                                 ERR("out of memory\n");
2807                                 Status = STATUS_INSUFFICIENT_RESOURCES;
2808                                 goto exit;
2809                             }
2810 
2811                             decomp_alloc = TRUE;
2812                         } else {
2813                             decomp = data + bytes_read;
2814                             decomp_alloc = FALSE;
2815                         }
2816 
2817                         if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2818                             Status = zlib_decompress(ed->data, inlen, decomp, (UINT32)(read + off));
2819                             if (!NT_SUCCESS(Status)) {
2820                                 ERR("zlib_decompress returned %08x\n", Status);
2821                                 if (decomp_alloc) ExFreePool(decomp);
2822                                 goto exit;
2823                             }
2824                         } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2825                             if (inlen < sizeof(UINT32)) {
2826                                 ERR("extent data was truncated\n");
2827                                 Status = STATUS_INTERNAL_ERROR;
2828                                 if (decomp_alloc) ExFreePool(decomp);
2829                                 goto exit;
2830                             } else
2831                                 inlen -= sizeof(UINT32);
2832 
2833                             Status = lzo_decompress(ed->data + sizeof(UINT32), inlen, decomp, (UINT32)(read + off), sizeof(UINT32));
2834                             if (!NT_SUCCESS(Status)) {
2835                                 ERR("lzo_decompress returned %08x\n", Status);
2836                                 if (decomp_alloc) ExFreePool(decomp);
2837                                 goto exit;
2838                             }
2839                         }
2840 
2841                         if (decomp_alloc) {
2842                             RtlCopyMemory(data + bytes_read, decomp + off, read);
2843                             ExFreePool(decomp);
2844                         }
2845                     } else {
2846                         ERR("unhandled compression type %x\n", ed->compression);
2847                         Status = STATUS_NOT_IMPLEMENTED;
2848                         goto exit;
2849                     }
2850 
2851                     bytes_read += read;
2852                     length -= read;
2853 
2854                     break;
2855                 }
2856 
2857                 case EXTENT_TYPE_REGULAR:
2858                 {
2859                     UINT64 off = start + bytes_read - ext->offset;
2860                     UINT32 to_read, read;
2861                     UINT8* buf;
2862                     BOOL mdl = (Irp && Irp->MdlAddress) ? TRUE : FALSE;
2863                     BOOL buf_free;
2864                     UINT32 bumpoff = 0, *csum;
2865                     UINT64 addr;
2866                     chunk* c;
2867 
2868                     read = (UINT32)(len - off);
2869                     if (read > length) read = (UINT32)length;
2870 
2871                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
2872                         addr = ed2->address + ed2->offset + off;
2873                         to_read = (UINT32)sector_align(read, fcb->Vcb->superblock.sector_size);
2874 
2875                         if (addr % fcb->Vcb->superblock.sector_size > 0) {
2876                             bumpoff = addr % fcb->Vcb->superblock.sector_size;
2877                             addr -= bumpoff;
2878                             to_read = (UINT32)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size);
2879                         }
2880                     } else {
2881                         addr = ed2->address;
2882                         to_read = (UINT32)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
2883                     }
2884 
2885                     if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
2886                         length % fcb->Vcb->superblock.sector_size == 0) {
2887                         buf = data + bytes_read;
2888                         buf_free = FALSE;
2889                     } else {
2890                         buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG);
2891                         buf_free = TRUE;
2892 
2893                         if (!buf) {
2894                             ERR("out of memory\n");
2895                             Status = STATUS_INSUFFICIENT_RESOURCES;
2896                             goto exit;
2897                         }
2898 
2899                         mdl = FALSE;
2900                     }
2901 
2902                     c = get_chunk_from_address(fcb->Vcb, addr);
2903 
2904                     if (!c) {
2905                         ERR("get_chunk_from_address(%llx) failed\n", addr);
2906 
2907                         if (buf_free)
2908                             ExFreePool(buf);
2909 
2910                         goto exit;
2911                     }
2912 
2913                     if (ext->csum) {
2914                         if (ed->compression == BTRFS_COMPRESSION_NONE)
2915                             csum = &ext->csum[off / fcb->Vcb->superblock.sector_size];
2916                         else
2917                             csum = ext->csum;
2918                     } else
2919                         csum = NULL;
2920 
2921                     Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp, 0, mdl,
2922                                        fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
2923                     if (!NT_SUCCESS(Status)) {
2924                         ERR("read_data returned %08x\n", Status);
2925 
2926                         if (buf_free)
2927                             ExFreePool(buf);
2928 
2929                         goto exit;
2930                     }
2931 
2932                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
2933                         if (buf_free)
2934                             RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
2935                     } else {
2936                         UINT8 *decomp = NULL, *buf2;
2937                         ULONG outlen, inlen, off2;
2938                         UINT32 inpageoff = 0;
2939 
2940                         off2 = (ULONG)(ed2->offset + off);
2941                         buf2 = buf;
2942                         inlen = (ULONG)ed2->size;
2943 
2944                         if (ed->compression == BTRFS_COMPRESSION_LZO) {
2945                             ULONG inoff = sizeof(UINT32);
2946 
2947                             inlen -= sizeof(UINT32);
2948 
2949                             // If reading a few sectors in, skip to the interesting bit
2950                             while (off2 > LINUX_PAGE_SIZE) {
2951                                 UINT32 partlen;
2952 
2953                                 if (inlen < sizeof(UINT32))
2954                                     break;
2955 
2956                                 partlen = *(UINT32*)(buf2 + inoff);
2957 
2958                                 if (partlen < inlen) {
2959                                     off2 -= LINUX_PAGE_SIZE;
2960                                     inoff += partlen + sizeof(UINT32);
2961                                     inlen -= partlen + sizeof(UINT32);
2962 
2963                                     if (LINUX_PAGE_SIZE - (inoff % LINUX_PAGE_SIZE) < sizeof(UINT32))
2964                                         inoff = ((inoff / LINUX_PAGE_SIZE) + 1) * LINUX_PAGE_SIZE;
2965                                 } else
2966                                     break;
2967                             }
2968 
2969                             buf2 = &buf2[inoff];
2970                             inpageoff = inoff % LINUX_PAGE_SIZE;
2971                         }
2972 
2973                         if (off2 != 0) {
2974                             outlen = off2 + min(read, (UINT32)(ed2->num_bytes - off));
2975 
2976                             decomp = ExAllocatePoolWithTag(PagedPool, outlen, ALLOC_TAG);
2977                             if (!decomp) {
2978                                 ERR("out of memory\n");
2979                                 ExFreePool(buf);
2980                                 Status = STATUS_INSUFFICIENT_RESOURCES;
2981                                 goto exit;
2982                             }
2983                         } else
2984                             outlen = min(read, (UINT32)(ed2->num_bytes - off));
2985 
2986                         if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2987                             Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
2988 
2989                             if (!NT_SUCCESS(Status)) {
2990                                 ERR("zlib_decompress returned %08x\n", Status);
2991                                 ExFreePool(buf);
2992 
2993                                 if (decomp)
2994                                     ExFreePool(decomp);
2995 
2996                                 goto exit;
2997                             }
2998                         } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2999                             Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff);
3000 
3001                             if (!NT_SUCCESS(Status)) {
3002                                 ERR("lzo_decompress returned %08x\n", Status);
3003                                 ExFreePool(buf);
3004 
3005                                 if (decomp)
3006                                     ExFreePool(decomp);
3007 
3008                                 goto exit;
3009                             }
3010                         } else {
3011                             ERR("unsupported compression type %x\n", ed->compression);
3012                             Status = STATUS_NOT_SUPPORTED;
3013 
3014                             ExFreePool(buf);
3015 
3016                             if (decomp)
3017                                 ExFreePool(decomp);
3018 
3019                             goto exit;
3020                         }
3021 
3022                         if (decomp) {
3023                             RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off));
3024                             ExFreePool(decomp);
3025                         }
3026                     }
3027 
3028                     if (buf_free)
3029                         ExFreePool(buf);
3030 
3031                     bytes_read += read;
3032                     length -= read;
3033 
3034                     break;
3035                 }
3036 
3037                 case EXTENT_TYPE_PREALLOC:
3038                 {
3039                     UINT64 off = start + bytes_read - ext->offset;
3040                     UINT32 read = (UINT32)(len - off);
3041 
3042                     if (read > length) read = (UINT32)length;
3043 
3044                     RtlZeroMemory(data + bytes_read, read);
3045 
3046                     bytes_read += read;
3047                     length -= read;
3048 
3049                     break;
3050                 }
3051 
3052                 default:
3053                     WARN("Unsupported extent data type %u\n", ed->type);
3054                     Status = STATUS_NOT_IMPLEMENTED;
3055                     goto exit;
3056             }
3057 
3058             last_end = ext->offset + len;
3059 
3060             if (length == 0)
3061                 break;
3062         }
3063 
3064 nextitem:
3065         le = le->Flink;
3066     }
3067 
3068     if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3069         UINT32 read = (UINT32)min(fcb->inode_item.st_size - start - bytes_read, length);
3070 
3071         RtlZeroMemory(data + bytes_read, read);
3072 
3073         bytes_read += read;
3074         length -= read;
3075     }
3076 
3077     Status = STATUS_SUCCESS;
3078     if (pbr)
3079         *pbr = bytes_read;
3080 
3081 #ifdef DEBUG_STATS
3082     time2 = KeQueryPerformanceCounter(NULL);
3083 
3084     fcb->Vcb->stats.num_reads++;
3085     fcb->Vcb->stats.data_read += bytes_read;
3086     fcb->Vcb->stats.read_total_time += time2.QuadPart - time1.QuadPart;
3087 #endif
3088 
3089 exit:
3090     return Status;
3091 }
3092 
3093 NTSTATUS do_read(PIRP Irp, BOOLEAN wait, ULONG* bytes_read) {
3094     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3095     PFILE_OBJECT FileObject = IrpSp->FileObject;
3096     fcb* fcb = FileObject->FsContext;
3097     UINT8* data = NULL;
3098     ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3099     UINT64 start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3100 
3101     *bytes_read = 0;
3102 
3103     if (!fcb || !fcb->Vcb || !fcb->subvol)
3104         return STATUS_INTERNAL_ERROR;
3105 
3106     TRACE("file = %S (fcb = %p)\n", file_desc(FileObject), fcb);
3107     TRACE("offset = %llx, length = %x\n", start, length);
3108     TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE", Irp->Flags & IRP_NOCACHE ? "TRUE" : "FALSE");
3109 
3110     if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3111         return STATUS_INVALID_DEVICE_REQUEST;
3112 
3113     if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3114         WARN("tried to read locked region\n");
3115         return STATUS_FILE_LOCK_CONFLICT;
3116     }
3117 
3118     if (length == 0) {
3119         TRACE("tried to read zero bytes\n");
3120         return STATUS_SUCCESS;
3121     }
3122 
3123     if (start >= (UINT64)fcb->Header.FileSize.QuadPart) {
3124         TRACE("tried to read with offset after file end (%llx >= %llx)\n", start, fcb->Header.FileSize.QuadPart);
3125         return STATUS_END_OF_FILE;
3126     }
3127 
3128     TRACE("FileObject %p fcb %p FileSize = %llx st_size = %llx (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3129 
3130     if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) {
3131         data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3132 
3133         if (Irp->MdlAddress && !data) {
3134             ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3135             return STATUS_INSUFFICIENT_RESOURCES;
3136         }
3137 
3138         if (start >= (UINT64)fcb->Header.ValidDataLength.QuadPart) {
3139             length = (ULONG)min(length, min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3140             RtlZeroMemory(data, length);
3141             Irp->IoStatus.Information = *bytes_read = length;
3142             return STATUS_SUCCESS;
3143         }
3144 
3145         if (length + start > (UINT64)fcb->Header.ValidDataLength.QuadPart) {
3146             addon = (ULONG)(min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3147             RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3148             length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3149         }
3150     }
3151 
3152     if (!(Irp->Flags & IRP_NOCACHE)) {
3153         NTSTATUS Status = STATUS_SUCCESS;
3154 
3155         _SEH2_TRY {
3156             if (!FileObject->PrivateCacheMap) {
3157                 CC_FILE_SIZES ccfs;
3158 
3159                 ccfs.AllocationSize = fcb->Header.AllocationSize;
3160                 ccfs.FileSize = fcb->Header.FileSize;
3161                 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3162 
3163                 init_file_cache(FileObject, &ccfs);
3164             }
3165 
3166             if (IrpSp->MinorFunction & IRP_MN_MDL) {
3167                 CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3168             } else {
3169                 if (fCcCopyReadEx) {
3170                     TRACE("CcCopyReadEx(%p, %llx, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3171                           length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3172                     TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3173                     if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3174                         TRACE("CcCopyReadEx could not wait\n");
3175 
3176                         IoMarkIrpPending(Irp);
3177                         return STATUS_PENDING;
3178                     }
3179                     TRACE("CcCopyReadEx finished\n");
3180                 } else {
3181                     TRACE("CcCopyRead(%p, %llx, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3182                     TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3183                     if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3184                         TRACE("CcCopyRead could not wait\n");
3185 
3186                         IoMarkIrpPending(Irp);
3187                         return STATUS_PENDING;
3188                     }
3189                     TRACE("CcCopyRead finished\n");
3190                 }
3191             }
3192         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
3193             Status = _SEH2_GetExceptionCode();
3194         } _SEH2_END;
3195 
3196         if (NT_SUCCESS(Status)) {
3197             Status = Irp->IoStatus.Status;
3198             Irp->IoStatus.Information += addon;
3199             *bytes_read = (ULONG)Irp->IoStatus.Information;
3200         } else
3201             ERR("EXCEPTION - %08x\n", Status);
3202 
3203         return Status;
3204     } else {
3205         NTSTATUS Status;
3206 
3207         if (!wait) {
3208             IoMarkIrpPending(Irp);
3209             return STATUS_PENDING;
3210         }
3211 
3212         if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer->DataSectionObject) {
3213             IO_STATUS_BLOCK iosb;
3214 
3215             CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, length, &iosb);
3216 
3217             if (!NT_SUCCESS(iosb.Status)) {
3218                 ERR("CcFlushCache returned %08x\n", iosb.Status);
3219                 return iosb.Status;
3220             }
3221         }
3222 
3223         if (fcb->ads)
3224             Status = read_stream(fcb, data, start, length, bytes_read);
3225         else
3226             Status = read_file(fcb, data, start, length, bytes_read, Irp);
3227 
3228         *bytes_read += addon;
3229         TRACE("read %u bytes\n", *bytes_read);
3230 
3231         Irp->IoStatus.Information = *bytes_read;
3232 
3233         if (diskacc && Status != STATUS_PENDING) {
3234             PETHREAD thread = NULL;
3235 
3236             if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3237                 thread = Irp->Tail.Overlay.Thread;
3238             else if (!IoIsSystemThread(PsGetCurrentThread()))
3239                 thread = PsGetCurrentThread();
3240             else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
3241                 thread = PsGetCurrentThread();
3242 
3243             if (thread)
3244                 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3245         }
3246 
3247         return Status;
3248     }
3249 }
3250 
3251 _Dispatch_type_(IRP_MJ_READ)
3252 _Function_class_(DRIVER_DISPATCH)
3253 NTSTATUS drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
3254     device_extension* Vcb = DeviceObject->DeviceExtension;
3255     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3256     PFILE_OBJECT FileObject = IrpSp->FileObject;
3257     ULONG bytes_read = 0;
3258     NTSTATUS Status;
3259     BOOL top_level;
3260     fcb* fcb;
3261     ccb* ccb;
3262     BOOLEAN fcb_lock = FALSE, wait;
3263 
3264     FsRtlEnterFileSystem();
3265 
3266     top_level = is_top_level(Irp);
3267 
3268     TRACE("read\n");
3269 
3270     if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
3271         Status = vol_read(DeviceObject, Irp);
3272         goto exit2;
3273     } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
3274         Status = STATUS_INVALID_PARAMETER;
3275         goto end;
3276     }
3277 
3278     Irp->IoStatus.Information = 0;
3279 
3280     if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
3281         CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
3282 
3283         Irp->MdlAddress = NULL;
3284         Status = STATUS_SUCCESS;
3285 
3286         goto exit;
3287     }
3288 
3289     fcb = FileObject->FsContext;
3290 
3291     if (!fcb) {
3292         ERR("fcb was NULL\n");
3293         Status = STATUS_INVALID_PARAMETER;
3294         goto exit;
3295     }
3296 
3297     ccb = FileObject->FsContext2;
3298 
3299     if (!ccb) {
3300         ERR("ccb was NULL\n");
3301         Status = STATUS_INVALID_PARAMETER;
3302         goto exit;
3303     }
3304 
3305     if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
3306         WARN("insufficient privileges\n");
3307         Status = STATUS_ACCESS_DENIED;
3308         goto exit;
3309     }
3310 
3311     if (fcb == Vcb->volume_fcb) {
3312         TRACE("reading volume FCB\n");
3313 
3314         IoSkipCurrentIrpStackLocation(Irp);
3315 
3316         Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
3317 
3318         goto exit2;
3319     }
3320 
3321     wait = IoIsOperationSynchronous(Irp);
3322 
3323     // Don't offload jobs when doing paging IO - otherwise this can lead to
3324     // deadlocks in CcCopyRead.
3325     if (Irp->Flags & IRP_PAGING_IO)
3326         wait = TRUE;
3327 
3328     if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
3329         if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
3330             Status = STATUS_PENDING;
3331             IoMarkIrpPending(Irp);
3332             goto exit;
3333         }
3334 
3335         fcb_lock = TRUE;
3336     }
3337 
3338     Status = do_read(Irp, wait, &bytes_read);
3339 
3340     if (fcb_lock)
3341         ExReleaseResourceLite(fcb->Header.Resource);
3342 
3343 exit:
3344     if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO))
3345         FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0);
3346 
3347 end:
3348     Irp->IoStatus.Status = Status;
3349 
3350     TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status);
3351     TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information);
3352     TRACE("returning %08x\n", Status);
3353 
3354     if (Status != STATUS_PENDING)
3355         IoCompleteRequest(Irp, IO_NO_INCREMENT);
3356     else {
3357         if (!add_thread_job(Vcb, Irp))
3358             do_read_job(Irp);
3359     }
3360 
3361 exit2:
3362     if (top_level)
3363         IoSetTopLevelIrp(NULL);
3364 
3365     FsRtlExitFileSystem();
3366 
3367     return Status;
3368 }
3369