1c2c66affSColin Finck /* Copyright (c) Mark Harmstone 2016-17
2c2c66affSColin Finck *
3c2c66affSColin Finck * This file is part of WinBtrfs.
4c2c66affSColin Finck *
5c2c66affSColin Finck * WinBtrfs is free software: you can redistribute it and/or modify
6c2c66affSColin Finck * it under the terms of the GNU Lesser General Public Licence as published by
7c2c66affSColin Finck * the Free Software Foundation, either version 3 of the Licence, or
8c2c66affSColin Finck * (at your option) any later version.
9c2c66affSColin Finck *
10c2c66affSColin Finck * WinBtrfs is distributed in the hope that it will be useful,
11c2c66affSColin Finck * but WITHOUT ANY WARRANTY; without even the implied warranty of
12c2c66affSColin Finck * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13c2c66affSColin Finck * GNU Lesser General Public Licence for more details.
14c2c66affSColin Finck *
15c2c66affSColin Finck * You should have received a copy of the GNU Lesser General Public Licence
16c2c66affSColin Finck * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17c2c66affSColin Finck
18c2c66affSColin Finck #include "btrfs_drv.h"
19194ea909SVictor Perevertkin #include "xxhash.h"
20194ea909SVictor Perevertkin #include "crc32c.h"
21c2c66affSColin Finck
22c2c66affSColin Finck enum read_data_status {
23c2c66affSColin Finck ReadDataStatus_Pending,
24c2c66affSColin Finck ReadDataStatus_Success,
25c2c66affSColin Finck ReadDataStatus_Error,
26c2c66affSColin Finck ReadDataStatus_MissingDevice,
27c2c66affSColin Finck ReadDataStatus_Skip
28c2c66affSColin Finck };
29c2c66affSColin Finck
30c2c66affSColin Finck struct read_data_context;
31c2c66affSColin Finck
32c2c66affSColin Finck typedef struct {
33c2c66affSColin Finck struct read_data_context* context;
34318da0c1SPierre Schweitzer uint16_t stripenum;
35318da0c1SPierre Schweitzer bool rewrite;
36c2c66affSColin Finck PIRP Irp;
37c2c66affSColin Finck IO_STATUS_BLOCK iosb;
38c2c66affSColin Finck enum read_data_status status;
39c2c66affSColin Finck PMDL mdl;
40318da0c1SPierre Schweitzer uint64_t stripestart;
41318da0c1SPierre Schweitzer uint64_t stripeend;
42c2c66affSColin Finck } read_data_stripe;
43c2c66affSColin Finck
44c2c66affSColin Finck typedef struct {
45c2c66affSColin Finck KEVENT Event;
46c2c66affSColin Finck NTSTATUS Status;
47c2c66affSColin Finck chunk* c;
48318da0c1SPierre Schweitzer uint64_t address;
49318da0c1SPierre Schweitzer uint32_t buflen;
50c2c66affSColin Finck LONG num_stripes, stripes_left;
51318da0c1SPierre Schweitzer uint64_t type;
52318da0c1SPierre Schweitzer uint32_t sector_size;
53318da0c1SPierre Schweitzer uint16_t firstoff, startoffstripe, sectors_per_stripe;
54194ea909SVictor Perevertkin void* csum;
55318da0c1SPierre Schweitzer bool tree;
56c2c66affSColin Finck read_data_stripe* stripes;
57318da0c1SPierre Schweitzer uint8_t* va;
58c2c66affSColin Finck } read_data_context;
59c2c66affSColin Finck
60318da0c1SPierre Schweitzer extern bool diskacc;
61c2c66affSColin Finck extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
62c2c66affSColin Finck extern tCcCopyReadEx fCcCopyReadEx;
63c2c66affSColin Finck extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
64c2c66affSColin Finck
65318da0c1SPierre Schweitzer #define LZO_PAGE_SIZE 4096
66c2c66affSColin Finck
_Function_class_(IO_COMPLETION_ROUTINE)67c2c66affSColin Finck _Function_class_(IO_COMPLETION_ROUTINE)
68318da0c1SPierre Schweitzer static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
69c2c66affSColin Finck read_data_stripe* stripe = conptr;
70c2c66affSColin Finck read_data_context* context = (read_data_context*)stripe->context;
71c2c66affSColin Finck
72c2c66affSColin Finck UNUSED(DeviceObject);
73c2c66affSColin Finck
74c2c66affSColin Finck stripe->iosb = Irp->IoStatus;
75c2c66affSColin Finck
76c2c66affSColin Finck if (NT_SUCCESS(Irp->IoStatus.Status))
77c2c66affSColin Finck stripe->status = ReadDataStatus_Success;
78c2c66affSColin Finck else
79c2c66affSColin Finck stripe->status = ReadDataStatus_Error;
80c2c66affSColin Finck
81c2c66affSColin Finck if (InterlockedDecrement(&context->stripes_left) == 0)
82318da0c1SPierre Schweitzer KeSetEvent(&context->Event, 0, false);
83c2c66affSColin Finck
84c2c66affSColin Finck return STATUS_MORE_PROCESSING_REQUIRED;
85c2c66affSColin Finck }
86c2c66affSColin Finck
check_csum(device_extension * Vcb,uint8_t * data,uint32_t sectors,void * csum)87194ea909SVictor Perevertkin NTSTATUS check_csum(device_extension* Vcb, uint8_t* data, uint32_t sectors, void* csum) {
88194ea909SVictor Perevertkin void* csum2;
89c2c66affSColin Finck
90194ea909SVictor Perevertkin csum2 = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * sectors, ALLOC_TAG);
91c2c66affSColin Finck if (!csum2) {
92c2c66affSColin Finck ERR("out of memory\n");
93c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
94c2c66affSColin Finck }
95c2c66affSColin Finck
96194ea909SVictor Perevertkin do_calc_job(Vcb, data, sectors, csum2);
97c2c66affSColin Finck
98194ea909SVictor Perevertkin if (RtlCompareMemory(csum2, csum, sectors * Vcb->csum_size) != sectors * Vcb->csum_size) {
99c2c66affSColin Finck ExFreePool(csum2);
100c2c66affSColin Finck return STATUS_CRC_ERROR;
101c2c66affSColin Finck }
102c2c66affSColin Finck
103c2c66affSColin Finck ExFreePool(csum2);
104c2c66affSColin Finck
105c2c66affSColin Finck return STATUS_SUCCESS;
106c2c66affSColin Finck }
107c2c66affSColin Finck
get_tree_checksum(device_extension * Vcb,tree_header * th,void * csum)108194ea909SVictor Perevertkin void get_tree_checksum(device_extension* Vcb, tree_header* th, void* csum) {
109194ea909SVictor Perevertkin switch (Vcb->superblock.csum_type) {
110194ea909SVictor Perevertkin case CSUM_TYPE_CRC32C:
111194ea909SVictor Perevertkin *(uint32_t*)csum = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
112194ea909SVictor Perevertkin break;
113194ea909SVictor Perevertkin
114194ea909SVictor Perevertkin case CSUM_TYPE_XXHASH:
115194ea909SVictor Perevertkin *(uint64_t*)csum = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
116194ea909SVictor Perevertkin break;
117194ea909SVictor Perevertkin
118194ea909SVictor Perevertkin case CSUM_TYPE_SHA256:
119194ea909SVictor Perevertkin calc_sha256(csum, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
120194ea909SVictor Perevertkin break;
121194ea909SVictor Perevertkin
122194ea909SVictor Perevertkin case CSUM_TYPE_BLAKE2:
123194ea909SVictor Perevertkin blake2b(csum, BLAKE2_HASH_SIZE, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
124194ea909SVictor Perevertkin break;
125194ea909SVictor Perevertkin }
126194ea909SVictor Perevertkin }
127194ea909SVictor Perevertkin
check_tree_checksum(device_extension * Vcb,tree_header * th)128194ea909SVictor Perevertkin bool check_tree_checksum(device_extension* Vcb, tree_header* th) {
129194ea909SVictor Perevertkin switch (Vcb->superblock.csum_type) {
130194ea909SVictor Perevertkin case CSUM_TYPE_CRC32C: {
131194ea909SVictor Perevertkin uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
132194ea909SVictor Perevertkin
133194ea909SVictor Perevertkin if (crc32 == *((uint32_t*)th->csum))
134194ea909SVictor Perevertkin return true;
135194ea909SVictor Perevertkin
136194ea909SVictor Perevertkin WARN("hash was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
137194ea909SVictor Perevertkin
138194ea909SVictor Perevertkin break;
139194ea909SVictor Perevertkin }
140194ea909SVictor Perevertkin
141194ea909SVictor Perevertkin case CSUM_TYPE_XXHASH: {
142194ea909SVictor Perevertkin uint64_t hash = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
143194ea909SVictor Perevertkin
144194ea909SVictor Perevertkin if (hash == *((uint64_t*)th->csum))
145194ea909SVictor Perevertkin return true;
146194ea909SVictor Perevertkin
147194ea909SVictor Perevertkin WARN("hash was %I64x, expected %I64x\n", hash, *((uint64_t*)th->csum));
148194ea909SVictor Perevertkin
149194ea909SVictor Perevertkin break;
150194ea909SVictor Perevertkin }
151194ea909SVictor Perevertkin
152194ea909SVictor Perevertkin case CSUM_TYPE_SHA256: {
153194ea909SVictor Perevertkin uint8_t hash[SHA256_HASH_SIZE];
154194ea909SVictor Perevertkin
155194ea909SVictor Perevertkin calc_sha256(hash, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
156194ea909SVictor Perevertkin
157194ea909SVictor Perevertkin if (RtlCompareMemory(hash, th, SHA256_HASH_SIZE) == SHA256_HASH_SIZE)
158194ea909SVictor Perevertkin return true;
159194ea909SVictor Perevertkin
160194ea909SVictor Perevertkin WARN("hash was invalid\n");
161194ea909SVictor Perevertkin
162194ea909SVictor Perevertkin break;
163194ea909SVictor Perevertkin }
164194ea909SVictor Perevertkin
165194ea909SVictor Perevertkin case CSUM_TYPE_BLAKE2: {
166194ea909SVictor Perevertkin uint8_t hash[BLAKE2_HASH_SIZE];
167194ea909SVictor Perevertkin
168194ea909SVictor Perevertkin blake2b(hash, sizeof(hash), (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
169194ea909SVictor Perevertkin
170194ea909SVictor Perevertkin if (RtlCompareMemory(hash, th, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE)
171194ea909SVictor Perevertkin return true;
172194ea909SVictor Perevertkin
173194ea909SVictor Perevertkin WARN("hash was invalid\n");
174194ea909SVictor Perevertkin
175194ea909SVictor Perevertkin break;
176194ea909SVictor Perevertkin }
177194ea909SVictor Perevertkin }
178194ea909SVictor Perevertkin
179194ea909SVictor Perevertkin return false;
180194ea909SVictor Perevertkin }
181194ea909SVictor Perevertkin
get_sector_csum(device_extension * Vcb,void * buf,void * csum)182194ea909SVictor Perevertkin void get_sector_csum(device_extension* Vcb, void* buf, void* csum) {
183194ea909SVictor Perevertkin switch (Vcb->superblock.csum_type) {
184194ea909SVictor Perevertkin case CSUM_TYPE_CRC32C:
185194ea909SVictor Perevertkin *(uint32_t*)csum = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
186194ea909SVictor Perevertkin break;
187194ea909SVictor Perevertkin
188194ea909SVictor Perevertkin case CSUM_TYPE_XXHASH:
189194ea909SVictor Perevertkin *(uint64_t*)csum = XXH64(buf, Vcb->superblock.sector_size, 0);
190194ea909SVictor Perevertkin break;
191194ea909SVictor Perevertkin
192194ea909SVictor Perevertkin case CSUM_TYPE_SHA256:
193194ea909SVictor Perevertkin calc_sha256(csum, buf, Vcb->superblock.sector_size);
194194ea909SVictor Perevertkin break;
195194ea909SVictor Perevertkin
196194ea909SVictor Perevertkin case CSUM_TYPE_BLAKE2:
197194ea909SVictor Perevertkin blake2b(csum, BLAKE2_HASH_SIZE, buf, Vcb->superblock.sector_size);
198194ea909SVictor Perevertkin break;
199194ea909SVictor Perevertkin }
200194ea909SVictor Perevertkin }
201194ea909SVictor Perevertkin
check_sector_csum(device_extension * Vcb,void * buf,void * csum)202194ea909SVictor Perevertkin bool check_sector_csum(device_extension* Vcb, void* buf, void* csum) {
203194ea909SVictor Perevertkin switch (Vcb->superblock.csum_type) {
204194ea909SVictor Perevertkin case CSUM_TYPE_CRC32C: {
205194ea909SVictor Perevertkin uint32_t crc32 = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
206194ea909SVictor Perevertkin
207194ea909SVictor Perevertkin return *(uint32_t*)csum == crc32;
208194ea909SVictor Perevertkin }
209194ea909SVictor Perevertkin
210194ea909SVictor Perevertkin case CSUM_TYPE_XXHASH: {
211194ea909SVictor Perevertkin uint64_t hash = XXH64(buf, Vcb->superblock.sector_size, 0);
212194ea909SVictor Perevertkin
213194ea909SVictor Perevertkin return *(uint64_t*)csum == hash;
214194ea909SVictor Perevertkin }
215194ea909SVictor Perevertkin
216194ea909SVictor Perevertkin case CSUM_TYPE_SHA256: {
217194ea909SVictor Perevertkin uint8_t hash[SHA256_HASH_SIZE];
218194ea909SVictor Perevertkin
219194ea909SVictor Perevertkin calc_sha256(hash, buf, Vcb->superblock.sector_size);
220194ea909SVictor Perevertkin
221194ea909SVictor Perevertkin return RtlCompareMemory(hash, csum, SHA256_HASH_SIZE) == SHA256_HASH_SIZE;
222194ea909SVictor Perevertkin }
223194ea909SVictor Perevertkin
224194ea909SVictor Perevertkin case CSUM_TYPE_BLAKE2: {
225194ea909SVictor Perevertkin uint8_t hash[BLAKE2_HASH_SIZE];
226194ea909SVictor Perevertkin
227194ea909SVictor Perevertkin blake2b(hash, sizeof(hash), buf, Vcb->superblock.sector_size);
228194ea909SVictor Perevertkin
229194ea909SVictor Perevertkin return RtlCompareMemory(hash, csum, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE;
230194ea909SVictor Perevertkin }
231194ea909SVictor Perevertkin }
232194ea909SVictor Perevertkin
233194ea909SVictor Perevertkin return false;
234194ea909SVictor Perevertkin }
235194ea909SVictor Perevertkin
read_data_dup(device_extension * Vcb,uint8_t * buf,uint64_t addr,read_data_context * context,CHUNK_ITEM * ci,device ** devices,uint64_t generation)236318da0c1SPierre Schweitzer static NTSTATUS read_data_dup(device_extension* Vcb, uint8_t* buf, uint64_t addr, read_data_context* context, CHUNK_ITEM* ci,
237318da0c1SPierre Schweitzer device** devices, uint64_t generation) {
238318da0c1SPierre Schweitzer bool checksum_error = false;
239318da0c1SPierre Schweitzer uint16_t j, stripe = 0;
240c2c66affSColin Finck NTSTATUS Status;
241c2c66affSColin Finck CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
242c2c66affSColin Finck
243c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) {
244c2c66affSColin Finck if (context->stripes[j].status == ReadDataStatus_Error) {
245194ea909SVictor Perevertkin WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
246c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
247c2c66affSColin Finck return context->stripes[j].iosb.Status;
248c2c66affSColin Finck } else if (context->stripes[j].status == ReadDataStatus_Success) {
249c2c66affSColin Finck stripe = j;
250c2c66affSColin Finck break;
251c2c66affSColin Finck }
252c2c66affSColin Finck }
253c2c66affSColin Finck
254c2c66affSColin Finck if (context->stripes[stripe].status != ReadDataStatus_Success)
255c2c66affSColin Finck return STATUS_INTERNAL_ERROR;
256c2c66affSColin Finck
257c2c66affSColin Finck if (context->tree) {
258c2c66affSColin Finck tree_header* th = (tree_header*)buf;
259c2c66affSColin Finck
260194ea909SVictor Perevertkin if (th->address != context->address || !check_tree_checksum(Vcb, th)) {
261318da0c1SPierre Schweitzer checksum_error = true;
262c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
263c2c66affSColin Finck } else if (generation != 0 && th->generation != generation) {
264318da0c1SPierre Schweitzer checksum_error = true;
265c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
266c2c66affSColin Finck }
267c2c66affSColin Finck } else if (context->csum) {
268c2c66affSColin Finck Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
269c2c66affSColin Finck
270c2c66affSColin Finck if (Status == STATUS_CRC_ERROR) {
271318da0c1SPierre Schweitzer checksum_error = true;
272c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
273c2c66affSColin Finck } else if (!NT_SUCCESS(Status)) {
274194ea909SVictor Perevertkin ERR("check_csum returned %08lx\n", Status);
275c2c66affSColin Finck return Status;
276c2c66affSColin Finck }
277c2c66affSColin Finck }
278c2c66affSColin Finck
279c2c66affSColin Finck if (!checksum_error)
280c2c66affSColin Finck return STATUS_SUCCESS;
281c2c66affSColin Finck
282c2c66affSColin Finck if (ci->num_stripes == 1)
283c2c66affSColin Finck return STATUS_CRC_ERROR;
284c2c66affSColin Finck
285c2c66affSColin Finck if (context->tree) {
286c2c66affSColin Finck tree_header* t2;
287318da0c1SPierre Schweitzer bool recovered = false;
288c2c66affSColin Finck
289c2c66affSColin Finck t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
290c2c66affSColin Finck if (!t2) {
291c2c66affSColin Finck ERR("out of memory\n");
292c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
293c2c66affSColin Finck }
294c2c66affSColin Finck
295c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) {
296c2c66affSColin Finck if (j != stripe && devices[j] && devices[j]->devobj) {
297318da0c1SPierre Schweitzer Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart,
298318da0c1SPierre Schweitzer Vcb->superblock.node_size, (uint8_t*)t2, false);
299c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
300194ea909SVictor Perevertkin WARN("sync_read_phys returned %08lx\n", Status);
301c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
302c2c66affSColin Finck } else {
303194ea909SVictor Perevertkin bool checksum_error = !check_tree_checksum(Vcb, t2);
304c2c66affSColin Finck
305194ea909SVictor Perevertkin if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
306c2c66affSColin Finck RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
307318da0c1SPierre Schweitzer ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
308318da0c1SPierre Schweitzer recovered = true;
309c2c66affSColin Finck
310c2c66affSColin Finck if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
311318da0c1SPierre Schweitzer Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart,
312c2c66affSColin Finck t2, Vcb->superblock.node_size);
313c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
314194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
315c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
316c2c66affSColin Finck }
317c2c66affSColin Finck }
318c2c66affSColin Finck
319c2c66affSColin Finck break;
320194ea909SVictor Perevertkin } else if (t2->address != addr || checksum_error)
321c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
322c2c66affSColin Finck else
323c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS);
324c2c66affSColin Finck }
325c2c66affSColin Finck }
326c2c66affSColin Finck }
327c2c66affSColin Finck
328c2c66affSColin Finck if (!recovered) {
329318da0c1SPierre Schweitzer ERR("unrecoverable checksum error at %I64x\n", addr);
330c2c66affSColin Finck ExFreePool(t2);
331c2c66affSColin Finck return STATUS_CRC_ERROR;
332c2c66affSColin Finck }
333c2c66affSColin Finck
334c2c66affSColin Finck ExFreePool(t2);
335c2c66affSColin Finck } else {
336174dfab6SVincent Franchomme ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information >> Vcb->sector_shift;
337318da0c1SPierre Schweitzer uint8_t* sector;
338194ea909SVictor Perevertkin void* ptr = context->csum;
339c2c66affSColin Finck
340c2c66affSColin Finck sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
341c2c66affSColin Finck if (!sector) {
342c2c66affSColin Finck ERR("out of memory\n");
343c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
344c2c66affSColin Finck }
345c2c66affSColin Finck
346174dfab6SVincent Franchomme for (ULONG i = 0; i < sectors; i++) {
347174dfab6SVincent Franchomme if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
348318da0c1SPierre Schweitzer bool recovered = false;
349c2c66affSColin Finck
350c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) {
351c2c66affSColin Finck if (j != stripe && devices[j] && devices[j]->devobj) {
352318da0c1SPierre Schweitzer Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj,
353174dfab6SVincent Franchomme cis[j].offset + context->stripes[stripe].stripestart + ((uint64_t)i << Vcb->sector_shift),
354318da0c1SPierre Schweitzer Vcb->superblock.sector_size, sector, false);
355c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
356194ea909SVictor Perevertkin WARN("sync_read_phys returned %08lx\n", Status);
357c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
358c2c66affSColin Finck } else {
359194ea909SVictor Perevertkin if (check_sector_csum(Vcb, sector, ptr)) {
360174dfab6SVincent Franchomme RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
361174dfab6SVincent Franchomme ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe]->devitem.dev_id);
362318da0c1SPierre Schweitzer recovered = true;
363c2c66affSColin Finck
364c2c66affSColin Finck if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
365318da0c1SPierre Schweitzer Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj,
366174dfab6SVincent Franchomme cis[stripe].offset + context->stripes[stripe].stripestart + ((uint64_t)i << Vcb->sector_shift),
367c2c66affSColin Finck sector, Vcb->superblock.sector_size);
368c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
369194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
370c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
371c2c66affSColin Finck }
372c2c66affSColin Finck }
373c2c66affSColin Finck
374c2c66affSColin Finck break;
375c2c66affSColin Finck } else
376c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
377c2c66affSColin Finck }
378c2c66affSColin Finck }
379c2c66affSColin Finck }
380c2c66affSColin Finck
381c2c66affSColin Finck if (!recovered) {
382174dfab6SVincent Franchomme ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
383c2c66affSColin Finck ExFreePool(sector);
384c2c66affSColin Finck return STATUS_CRC_ERROR;
385c2c66affSColin Finck }
386c2c66affSColin Finck }
387194ea909SVictor Perevertkin
388194ea909SVictor Perevertkin ptr = (uint8_t*)ptr + Vcb->csum_size;
389c2c66affSColin Finck }
390c2c66affSColin Finck
391c2c66affSColin Finck ExFreePool(sector);
392c2c66affSColin Finck }
393c2c66affSColin Finck
394c2c66affSColin Finck return STATUS_SUCCESS;
395c2c66affSColin Finck }
396c2c66affSColin Finck
read_data_raid0(device_extension * Vcb,uint8_t * buf,uint64_t addr,uint32_t length,read_data_context * context,CHUNK_ITEM * ci,device ** devices,uint64_t generation,uint64_t offset)397318da0c1SPierre Schweitzer static NTSTATUS read_data_raid0(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
398318da0c1SPierre Schweitzer CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
399174dfab6SVincent Franchomme for (uint16_t i = 0; i < ci->num_stripes; i++) {
400c2c66affSColin Finck if (context->stripes[i].status == ReadDataStatus_Error) {
401174dfab6SVincent Franchomme WARN("stripe %u returned error %08lx\n", i, context->stripes[i].iosb.Status);
402c2c66affSColin Finck log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS);
403c2c66affSColin Finck return context->stripes[i].iosb.Status;
404c2c66affSColin Finck }
405c2c66affSColin Finck }
406c2c66affSColin Finck
407c2c66affSColin Finck if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
408c2c66affSColin Finck tree_header* th = (tree_header*)buf;
409194ea909SVictor Perevertkin bool checksum_error = !check_tree_checksum(Vcb, th);
410c2c66affSColin Finck
411194ea909SVictor Perevertkin if (checksum_error || addr != th->address || (generation != 0 && generation != th->generation)) {
412318da0c1SPierre Schweitzer uint64_t off;
413318da0c1SPierre Schweitzer uint16_t stripe;
414c2c66affSColin Finck
415c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe);
416c2c66affSColin Finck
417318da0c1SPierre Schweitzer ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
418c2c66affSColin Finck
419194ea909SVictor Perevertkin if (checksum_error) {
420c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
421c2c66affSColin Finck return STATUS_CRC_ERROR;
422c2c66affSColin Finck } else if (addr != th->address) {
423318da0c1SPierre Schweitzer WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
424c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
425c2c66affSColin Finck return STATUS_CRC_ERROR;
426c2c66affSColin Finck } else if (generation != 0 && generation != th->generation) {
427318da0c1SPierre Schweitzer WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
428c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
429c2c66affSColin Finck return STATUS_CRC_ERROR;
430c2c66affSColin Finck }
431c2c66affSColin Finck }
432c2c66affSColin Finck } else if (context->csum) {
433c2c66affSColin Finck NTSTATUS Status;
434c2c66affSColin Finck
435174dfab6SVincent Franchomme Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
436c2c66affSColin Finck
437c2c66affSColin Finck if (Status == STATUS_CRC_ERROR) {
438194ea909SVictor Perevertkin void* ptr = context->csum;
439c2c66affSColin Finck
440174dfab6SVincent Franchomme for (uint32_t i = 0; i < length >> Vcb->sector_shift; i++) {
441174dfab6SVincent Franchomme if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
442318da0c1SPierre Schweitzer uint64_t off;
443318da0c1SPierre Schweitzer uint16_t stripe;
444c2c66affSColin Finck
445174dfab6SVincent Franchomme get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length, ci->num_stripes, &off, &stripe);
446c2c66affSColin Finck
447318da0c1SPierre Schweitzer ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
448c2c66affSColin Finck
449c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
450c2c66affSColin Finck
451c2c66affSColin Finck return Status;
452c2c66affSColin Finck }
453194ea909SVictor Perevertkin
454194ea909SVictor Perevertkin ptr = (uint8_t*)ptr + Vcb->csum_size;
455c2c66affSColin Finck }
456c2c66affSColin Finck
457c2c66affSColin Finck return Status;
458c2c66affSColin Finck } else if (!NT_SUCCESS(Status)) {
459194ea909SVictor Perevertkin ERR("check_csum returned %08lx\n", Status);
460c2c66affSColin Finck return Status;
461c2c66affSColin Finck }
462c2c66affSColin Finck }
463c2c66affSColin Finck
464c2c66affSColin Finck return STATUS_SUCCESS;
465c2c66affSColin Finck }
466c2c66affSColin Finck
read_data_raid10(device_extension * Vcb,uint8_t * buf,uint64_t addr,uint32_t length,read_data_context * context,CHUNK_ITEM * ci,device ** devices,uint64_t generation,uint64_t offset)467318da0c1SPierre Schweitzer static NTSTATUS read_data_raid10(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
468318da0c1SPierre Schweitzer CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
46906042735SVincent Franchomme uint16_t stripe = 0;
470c2c66affSColin Finck NTSTATUS Status;
471318da0c1SPierre Schweitzer bool checksum_error = false;
472c2c66affSColin Finck CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
473c2c66affSColin Finck
474174dfab6SVincent Franchomme for (uint16_t j = 0; j < ci->num_stripes; j++) {
475c2c66affSColin Finck if (context->stripes[j].status == ReadDataStatus_Error) {
476194ea909SVictor Perevertkin WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
477c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
478c2c66affSColin Finck return context->stripes[j].iosb.Status;
479c2c66affSColin Finck } else if (context->stripes[j].status == ReadDataStatus_Success)
480c2c66affSColin Finck stripe = j;
481c2c66affSColin Finck }
482c2c66affSColin Finck
483c2c66affSColin Finck if (context->tree) {
484c2c66affSColin Finck tree_header* th = (tree_header*)buf;
485c2c66affSColin Finck
486194ea909SVictor Perevertkin if (!check_tree_checksum(Vcb, th)) {
487318da0c1SPierre Schweitzer checksum_error = true;
488c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
489c2c66affSColin Finck } else if (addr != th->address) {
490318da0c1SPierre Schweitzer WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
491318da0c1SPierre Schweitzer checksum_error = true;
492c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
493c2c66affSColin Finck } else if (generation != 0 && generation != th->generation) {
494318da0c1SPierre Schweitzer WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
495318da0c1SPierre Schweitzer checksum_error = true;
496c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
497c2c66affSColin Finck }
498c2c66affSColin Finck } else if (context->csum) {
499174dfab6SVincent Franchomme Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
500c2c66affSColin Finck
501c2c66affSColin Finck if (Status == STATUS_CRC_ERROR)
502318da0c1SPierre Schweitzer checksum_error = true;
503c2c66affSColin Finck else if (!NT_SUCCESS(Status)) {
504194ea909SVictor Perevertkin ERR("check_csum returned %08lx\n", Status);
505c2c66affSColin Finck return Status;
506c2c66affSColin Finck }
507c2c66affSColin Finck }
508c2c66affSColin Finck
509c2c66affSColin Finck if (!checksum_error)
510c2c66affSColin Finck return STATUS_SUCCESS;
511c2c66affSColin Finck
512c2c66affSColin Finck if (context->tree) {
513c2c66affSColin Finck tree_header* t2;
514318da0c1SPierre Schweitzer uint64_t off;
515318da0c1SPierre Schweitzer uint16_t badsubstripe = 0;
516318da0c1SPierre Schweitzer bool recovered = false;
517c2c66affSColin Finck
518c2c66affSColin Finck t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
519c2c66affSColin Finck if (!t2) {
520c2c66affSColin Finck ERR("out of memory\n");
521c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
522c2c66affSColin Finck }
523c2c66affSColin Finck
524c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe);
525c2c66affSColin Finck
526c2c66affSColin Finck stripe *= ci->sub_stripes;
527c2c66affSColin Finck
528174dfab6SVincent Franchomme for (uint16_t j = 0; j < ci->sub_stripes; j++) {
529c2c66affSColin Finck if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
530c2c66affSColin Finck badsubstripe = j;
531c2c66affSColin Finck break;
532c2c66affSColin Finck }
533c2c66affSColin Finck }
534c2c66affSColin Finck
535174dfab6SVincent Franchomme for (uint16_t j = 0; j < ci->sub_stripes; j++) {
536c2c66affSColin Finck if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
537318da0c1SPierre Schweitzer Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off,
538318da0c1SPierre Schweitzer Vcb->superblock.node_size, (uint8_t*)t2, false);
539c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
540194ea909SVictor Perevertkin WARN("sync_read_phys returned %08lx\n", Status);
541c2c66affSColin Finck log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS);
542c2c66affSColin Finck } else {
543194ea909SVictor Perevertkin bool checksum_error = !check_tree_checksum(Vcb, t2);
544c2c66affSColin Finck
545194ea909SVictor Perevertkin if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
546c2c66affSColin Finck RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
547318da0c1SPierre Schweitzer ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id);
548318da0c1SPierre Schweitzer recovered = true;
549c2c66affSColin Finck
550c2c66affSColin Finck if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
551318da0c1SPierre Schweitzer Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj,
552318da0c1SPierre Schweitzer cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size);
553c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
554194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
555c2c66affSColin Finck log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
556c2c66affSColin Finck }
557c2c66affSColin Finck }
558c2c66affSColin Finck
559c2c66affSColin Finck break;
560194ea909SVictor Perevertkin } else if (t2->address != addr || checksum_error)
561c2c66affSColin Finck log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
562c2c66affSColin Finck else
563c2c66affSColin Finck log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS);
564c2c66affSColin Finck }
565c2c66affSColin Finck }
566c2c66affSColin Finck }
567c2c66affSColin Finck
568c2c66affSColin Finck if (!recovered) {
569318da0c1SPierre Schweitzer ERR("unrecoverable checksum error at %I64x\n", addr);
570c2c66affSColin Finck ExFreePool(t2);
571c2c66affSColin Finck return STATUS_CRC_ERROR;
572c2c66affSColin Finck }
573c2c66affSColin Finck
574c2c66affSColin Finck ExFreePool(t2);
575c2c66affSColin Finck } else {
576174dfab6SVincent Franchomme ULONG sectors = length >> Vcb->sector_shift;
577318da0c1SPierre Schweitzer uint8_t* sector;
578194ea909SVictor Perevertkin void* ptr = context->csum;
579c2c66affSColin Finck
580c2c66affSColin Finck sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
581c2c66affSColin Finck if (!sector) {
582c2c66affSColin Finck ERR("out of memory\n");
583c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
584c2c66affSColin Finck }
585c2c66affSColin Finck
586174dfab6SVincent Franchomme for (ULONG i = 0; i < sectors; i++) {
587174dfab6SVincent Franchomme if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
588318da0c1SPierre Schweitzer uint64_t off;
589318da0c1SPierre Schweitzer uint16_t stripe2, badsubstripe = 0;
590318da0c1SPierre Schweitzer bool recovered = false;
591c2c66affSColin Finck
592174dfab6SVincent Franchomme get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
593c2c66affSColin Finck ci->num_stripes / ci->sub_stripes, &off, &stripe2);
594c2c66affSColin Finck
595c2c66affSColin Finck stripe2 *= ci->sub_stripes;
596c2c66affSColin Finck
597174dfab6SVincent Franchomme for (uint16_t j = 0; j < ci->sub_stripes; j++) {
598c2c66affSColin Finck if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
599c2c66affSColin Finck badsubstripe = j;
600c2c66affSColin Finck break;
601c2c66affSColin Finck }
602c2c66affSColin Finck }
603c2c66affSColin Finck
604c2c66affSColin Finck log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
605c2c66affSColin Finck
606174dfab6SVincent Franchomme for (uint16_t j = 0; j < ci->sub_stripes; j++) {
607c2c66affSColin Finck if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
608318da0c1SPierre Schweitzer Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off,
609318da0c1SPierre Schweitzer Vcb->superblock.sector_size, sector, false);
610c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
611194ea909SVictor Perevertkin WARN("sync_read_phys returned %08lx\n", Status);
612c2c66affSColin Finck log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS);
613c2c66affSColin Finck } else {
614194ea909SVictor Perevertkin if (check_sector_csum(Vcb, sector, ptr)) {
615174dfab6SVincent Franchomme RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
616174dfab6SVincent Franchomme ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe2 + j]->devitem.dev_id);
617318da0c1SPierre Schweitzer recovered = true;
618c2c66affSColin Finck
619c2c66affSColin Finck if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
620318da0c1SPierre Schweitzer Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj,
621318da0c1SPierre Schweitzer cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size);
622c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
623194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
624c2c66affSColin Finck log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
625c2c66affSColin Finck }
626c2c66affSColin Finck }
627c2c66affSColin Finck
628c2c66affSColin Finck break;
629c2c66affSColin Finck } else
630c2c66affSColin Finck log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
631c2c66affSColin Finck }
632c2c66affSColin Finck }
633c2c66affSColin Finck }
634c2c66affSColin Finck
635c2c66affSColin Finck if (!recovered) {
636174dfab6SVincent Franchomme ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
637c2c66affSColin Finck ExFreePool(sector);
638c2c66affSColin Finck return STATUS_CRC_ERROR;
639c2c66affSColin Finck }
640c2c66affSColin Finck }
641194ea909SVictor Perevertkin
642194ea909SVictor Perevertkin ptr = (uint8_t*)ptr + Vcb->csum_size;
643c2c66affSColin Finck }
644c2c66affSColin Finck
645c2c66affSColin Finck ExFreePool(sector);
646c2c66affSColin Finck }
647c2c66affSColin Finck
648c2c66affSColin Finck return STATUS_SUCCESS;
649c2c66affSColin Finck }
650c2c66affSColin Finck
read_data_raid5(device_extension * Vcb,uint8_t * buf,uint64_t addr,uint32_t length,read_data_context * context,CHUNK_ITEM * ci,device ** devices,uint64_t offset,uint64_t generation,chunk * c,bool degraded)651318da0c1SPierre Schweitzer static NTSTATUS read_data_raid5(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
652318da0c1SPierre Schweitzer device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
653c2c66affSColin Finck NTSTATUS Status;
654318da0c1SPierre Schweitzer bool checksum_error = false;
655c2c66affSColin Finck CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
65606042735SVincent Franchomme uint16_t j, stripe = 0;
657318da0c1SPierre Schweitzer bool no_success = true;
658c2c66affSColin Finck
659c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) {
660c2c66affSColin Finck if (context->stripes[j].status == ReadDataStatus_Error) {
661194ea909SVictor Perevertkin WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
662c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
663c2c66affSColin Finck return context->stripes[j].iosb.Status;
664c2c66affSColin Finck } else if (context->stripes[j].status == ReadDataStatus_Success) {
665c2c66affSColin Finck stripe = j;
666318da0c1SPierre Schweitzer no_success = false;
667c2c66affSColin Finck }
668c2c66affSColin Finck }
669c2c66affSColin Finck
670c2c66affSColin Finck if (c) { // check partial stripes
671c2c66affSColin Finck LIST_ENTRY* le;
672318da0c1SPierre Schweitzer uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length;
673c2c66affSColin Finck
674318da0c1SPierre Schweitzer ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
675c2c66affSColin Finck
676c2c66affSColin Finck le = c->partial_stripes.Flink;
677c2c66affSColin Finck while (le != &c->partial_stripes) {
678c2c66affSColin Finck partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
679c2c66affSColin Finck
680c2c66affSColin Finck if (ps->address + ps_length > addr && ps->address < addr + length) {
681c2c66affSColin Finck ULONG runlength, index;
682c2c66affSColin Finck
683c2c66affSColin Finck runlength = RtlFindFirstRunClear(&ps->bmp, &index);
684c2c66affSColin Finck
685c2c66affSColin Finck while (runlength != 0) {
686318da0c1SPierre Schweitzer if (index >= ps->bmplen)
687318da0c1SPierre Schweitzer break;
688318da0c1SPierre Schweitzer
689318da0c1SPierre Schweitzer if (index + runlength >= ps->bmplen) {
690318da0c1SPierre Schweitzer runlength = ps->bmplen - index;
691318da0c1SPierre Schweitzer
692318da0c1SPierre Schweitzer if (runlength == 0)
693318da0c1SPierre Schweitzer break;
694318da0c1SPierre Schweitzer }
695*6e0cf03dSVincent Franchomme
696174dfab6SVincent Franchomme uint64_t runstart = ps->address + (index << Vcb->sector_shift);
697174dfab6SVincent Franchomme uint64_t runend = runstart + (runlength << Vcb->sector_shift);
698318da0c1SPierre Schweitzer uint64_t start = max(runstart, addr);
699318da0c1SPierre Schweitzer uint64_t end = min(runend, addr + length);
700c2c66affSColin Finck
701c2c66affSColin Finck if (end > start)
702c2c66affSColin Finck RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
703c2c66affSColin Finck
704c2c66affSColin Finck runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
705c2c66affSColin Finck }
706c2c66affSColin Finck } else if (ps->address >= addr + length)
707c2c66affSColin Finck break;
708c2c66affSColin Finck
709c2c66affSColin Finck le = le->Flink;
710c2c66affSColin Finck }
711c2c66affSColin Finck
712c2c66affSColin Finck ExReleaseResourceLite(&c->partial_stripes_lock);
713c2c66affSColin Finck }
714c2c66affSColin Finck
715c2c66affSColin Finck if (context->tree) {
716c2c66affSColin Finck tree_header* th = (tree_header*)buf;
717c2c66affSColin Finck
718194ea909SVictor Perevertkin if (addr != th->address || !check_tree_checksum(Vcb, th)) {
719318da0c1SPierre Schweitzer checksum_error = true;
720c2c66affSColin Finck if (!no_success && !degraded)
721c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
722c2c66affSColin Finck } else if (generation != 0 && generation != th->generation) {
723318da0c1SPierre Schweitzer checksum_error = true;
724c2c66affSColin Finck if (!no_success && !degraded)
725c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
726c2c66affSColin Finck }
727c2c66affSColin Finck } else if (context->csum) {
728174dfab6SVincent Franchomme Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
729c2c66affSColin Finck
730c2c66affSColin Finck if (Status == STATUS_CRC_ERROR) {
731c2c66affSColin Finck if (!degraded)
732c2c66affSColin Finck WARN("checksum error\n");
733318da0c1SPierre Schweitzer checksum_error = true;
734c2c66affSColin Finck } else if (!NT_SUCCESS(Status)) {
735194ea909SVictor Perevertkin ERR("check_csum returned %08lx\n", Status);
736c2c66affSColin Finck return Status;
737c2c66affSColin Finck }
738c2c66affSColin Finck } else if (degraded)
739318da0c1SPierre Schweitzer checksum_error = true;
740c2c66affSColin Finck
741c2c66affSColin Finck if (!checksum_error)
742c2c66affSColin Finck return STATUS_SUCCESS;
743c2c66affSColin Finck
744c2c66affSColin Finck if (context->tree) {
745318da0c1SPierre Schweitzer uint16_t parity;
746318da0c1SPierre Schweitzer uint64_t off;
747318da0c1SPierre Schweitzer bool recovered = false, first = true, failed = false;
748318da0c1SPierre Schweitzer uint8_t* t2;
749c2c66affSColin Finck
750c2c66affSColin Finck t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
751c2c66affSColin Finck if (!t2) {
752c2c66affSColin Finck ERR("out of memory\n");
753c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
754c2c66affSColin Finck }
755c2c66affSColin Finck
756c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe);
757c2c66affSColin Finck
758c2c66affSColin Finck parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
759c2c66affSColin Finck
760c2c66affSColin Finck stripe = (parity + stripe + 1) % ci->num_stripes;
761c2c66affSColin Finck
762c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) {
763c2c66affSColin Finck if (j != stripe) {
764c2c66affSColin Finck if (devices[j] && devices[j]->devobj) {
765c2c66affSColin Finck if (first) {
766318da0c1SPierre Schweitzer Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false);
767c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
768194ea909SVictor Perevertkin ERR("sync_read_phys returned %08lx\n", Status);
769c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
770318da0c1SPierre Schweitzer failed = true;
771c2c66affSColin Finck break;
772c2c66affSColin Finck }
773c2c66affSColin Finck
774318da0c1SPierre Schweitzer first = false;
775c2c66affSColin Finck } else {
776318da0c1SPierre Schweitzer Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false);
777c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
778194ea909SVictor Perevertkin ERR("sync_read_phys returned %08lx\n", Status);
779c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
780318da0c1SPierre Schweitzer failed = true;
781c2c66affSColin Finck break;
782c2c66affSColin Finck }
783c2c66affSColin Finck
784c2c66affSColin Finck do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
785c2c66affSColin Finck }
786c2c66affSColin Finck } else {
787318da0c1SPierre Schweitzer failed = true;
788c2c66affSColin Finck break;
789c2c66affSColin Finck }
790c2c66affSColin Finck }
791c2c66affSColin Finck }
792c2c66affSColin Finck
793c2c66affSColin Finck if (!failed) {
794c2c66affSColin Finck tree_header* t3 = (tree_header*)t2;
795c2c66affSColin Finck
796194ea909SVictor Perevertkin if (t3->address == addr && check_tree_checksum(Vcb, t3) && (generation == 0 || t3->generation == generation)) {
797c2c66affSColin Finck RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
798c2c66affSColin Finck
799c2c66affSColin Finck if (!degraded)
800318da0c1SPierre Schweitzer ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
801c2c66affSColin Finck
802318da0c1SPierre Schweitzer recovered = true;
803c2c66affSColin Finck
804c2c66affSColin Finck if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
805318da0c1SPierre Schweitzer Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
806c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
807194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
808c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
809c2c66affSColin Finck }
810c2c66affSColin Finck }
811c2c66affSColin Finck }
812c2c66affSColin Finck }
813c2c66affSColin Finck
814c2c66affSColin Finck if (!recovered) {
815318da0c1SPierre Schweitzer ERR("unrecoverable checksum error at %I64x\n", addr);
816c2c66affSColin Finck ExFreePool(t2);
817c2c66affSColin Finck return STATUS_CRC_ERROR;
818c2c66affSColin Finck }
819c2c66affSColin Finck
820c2c66affSColin Finck ExFreePool(t2);
821c2c66affSColin Finck } else {
822174dfab6SVincent Franchomme ULONG sectors = length >> Vcb->sector_shift;
823318da0c1SPierre Schweitzer uint8_t* sector;
824194ea909SVictor Perevertkin void* ptr = context->csum;
825c2c66affSColin Finck
826c2c66affSColin Finck sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
827c2c66affSColin Finck if (!sector) {
828c2c66affSColin Finck ERR("out of memory\n");
829c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
830c2c66affSColin Finck }
831c2c66affSColin Finck
832174dfab6SVincent Franchomme for (ULONG i = 0; i < sectors; i++) {
833318da0c1SPierre Schweitzer uint16_t parity;
834318da0c1SPierre Schweitzer uint64_t off;
835c2c66affSColin Finck
836174dfab6SVincent Franchomme get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
837c2c66affSColin Finck ci->num_stripes - 1, &off, &stripe);
838c2c66affSColin Finck
839174dfab6SVincent Franchomme parity = (((addr - offset + ((uint64_t)i << Vcb->sector_shift)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
840c2c66affSColin Finck
841c2c66affSColin Finck stripe = (parity + stripe + 1) % ci->num_stripes;
842c2c66affSColin Finck
843174dfab6SVincent Franchomme if (!devices[stripe] || !devices[stripe]->devobj || (ptr && !check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr))) {
844318da0c1SPierre Schweitzer bool recovered = false, first = true, failed = false;
845c2c66affSColin Finck
846c2c66affSColin Finck if (devices[stripe] && devices[stripe]->devobj)
847c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS);
848c2c66affSColin Finck
849c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) {
850c2c66affSColin Finck if (j != stripe) {
851c2c66affSColin Finck if (devices[j] && devices[j]->devobj) {
852c2c66affSColin Finck if (first) {
853318da0c1SPierre Schweitzer Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false);
854c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
855194ea909SVictor Perevertkin ERR("sync_read_phys returned %08lx\n", Status);
856318da0c1SPierre Schweitzer failed = true;
857c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
858c2c66affSColin Finck break;
859c2c66affSColin Finck }
860c2c66affSColin Finck
861318da0c1SPierre Schweitzer first = false;
862c2c66affSColin Finck } else {
863318da0c1SPierre Schweitzer Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
864318da0c1SPierre Schweitzer sector + Vcb->superblock.sector_size, false);
865c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
866194ea909SVictor Perevertkin ERR("sync_read_phys returned %08lx\n", Status);
867318da0c1SPierre Schweitzer failed = true;
868c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
869c2c66affSColin Finck break;
870c2c66affSColin Finck }
871c2c66affSColin Finck
872c2c66affSColin Finck do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
873c2c66affSColin Finck }
874c2c66affSColin Finck } else {
875318da0c1SPierre Schweitzer failed = true;
876c2c66affSColin Finck break;
877c2c66affSColin Finck }
878c2c66affSColin Finck }
879c2c66affSColin Finck }
880c2c66affSColin Finck
881c2c66affSColin Finck if (!failed) {
882194ea909SVictor Perevertkin if (!ptr || check_sector_csum(Vcb, sector, ptr)) {
883174dfab6SVincent Franchomme RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
884c2c66affSColin Finck
885c2c66affSColin Finck if (!degraded)
886174dfab6SVincent Franchomme ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe]->devitem.dev_id);
887c2c66affSColin Finck
888318da0c1SPierre Schweitzer recovered = true;
889c2c66affSColin Finck
890c2c66affSColin Finck if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
891318da0c1SPierre Schweitzer Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off,
892c2c66affSColin Finck sector, Vcb->superblock.sector_size);
893c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
894194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
895c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
896c2c66affSColin Finck }
897c2c66affSColin Finck }
898c2c66affSColin Finck }
899c2c66affSColin Finck }
900c2c66affSColin Finck
901c2c66affSColin Finck if (!recovered) {
902174dfab6SVincent Franchomme ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
903c2c66affSColin Finck ExFreePool(sector);
904c2c66affSColin Finck return STATUS_CRC_ERROR;
905c2c66affSColin Finck }
906c2c66affSColin Finck }
907194ea909SVictor Perevertkin
908194ea909SVictor Perevertkin if (ptr)
909194ea909SVictor Perevertkin ptr = (uint8_t*)ptr + Vcb->csum_size;
910c2c66affSColin Finck }
911c2c66affSColin Finck
912c2c66affSColin Finck ExFreePool(sector);
913c2c66affSColin Finck }
914c2c66affSColin Finck
915c2c66affSColin Finck return STATUS_SUCCESS;
916c2c66affSColin Finck }
917c2c66affSColin Finck
raid6_recover2(uint8_t * sectors,uint16_t num_stripes,ULONG sector_size,uint16_t missing1,uint16_t missing2,uint8_t * out)918318da0c1SPierre Schweitzer void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) {
919c2c66affSColin Finck if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
920318da0c1SPierre Schweitzer uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
921318da0c1SPierre Schweitzer uint16_t stripe;
922c2c66affSColin Finck
923c2c66affSColin Finck stripe = num_stripes - 3;
924c2c66affSColin Finck
925c2c66affSColin Finck if (stripe == missing)
926c2c66affSColin Finck RtlZeroMemory(out, sector_size);
927c2c66affSColin Finck else
928c2c66affSColin Finck RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size);
929c2c66affSColin Finck
930c2c66affSColin Finck do {
931c2c66affSColin Finck stripe--;
932c2c66affSColin Finck
933c2c66affSColin Finck galois_double(out, sector_size);
934c2c66affSColin Finck
935c2c66affSColin Finck if (stripe != missing)
936c2c66affSColin Finck do_xor(out, sectors + (stripe * sector_size), sector_size);
937c2c66affSColin Finck } while (stripe > 0);
938c2c66affSColin Finck
939c2c66affSColin Finck do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
940c2c66affSColin Finck
941c2c66affSColin Finck if (missing != 0)
942318da0c1SPierre Schweitzer galois_divpower(out, (uint8_t)missing, sector_size);
943c2c66affSColin Finck } else { // reconstruct from p and q
94406042735SVincent Franchomme uint16_t x = missing1, y = missing2, stripe;
945318da0c1SPierre Schweitzer uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
946318da0c1SPierre Schweitzer uint32_t j;
947c2c66affSColin Finck
948c2c66affSColin Finck stripe = num_stripes - 3;
949c2c66affSColin Finck
950c2c66affSColin Finck pxy = out + sector_size;
951c2c66affSColin Finck qxy = out;
952c2c66affSColin Finck
953c2c66affSColin Finck if (stripe == missing1 || stripe == missing2) {
954c2c66affSColin Finck RtlZeroMemory(qxy, sector_size);
955c2c66affSColin Finck RtlZeroMemory(pxy, sector_size);
956c2c66affSColin Finck } else {
957c2c66affSColin Finck RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size);
958c2c66affSColin Finck RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size);
959c2c66affSColin Finck }
960c2c66affSColin Finck
961c2c66affSColin Finck do {
962c2c66affSColin Finck stripe--;
963c2c66affSColin Finck
964c2c66affSColin Finck galois_double(qxy, sector_size);
965c2c66affSColin Finck
966c2c66affSColin Finck if (stripe != missing1 && stripe != missing2) {
967c2c66affSColin Finck do_xor(qxy, sectors + (stripe * sector_size), sector_size);
968c2c66affSColin Finck do_xor(pxy, sectors + (stripe * sector_size), sector_size);
96906042735SVincent Franchomme }
970c2c66affSColin Finck } while (stripe > 0);
971c2c66affSColin Finck
972c2c66affSColin Finck gyx = gpow2(y > x ? (y-x) : (255-x+y));
973c2c66affSColin Finck gx = gpow2(255-x);
974c2c66affSColin Finck
975c2c66affSColin Finck denom = gdiv(1, gyx ^ 1);
976c2c66affSColin Finck a = gmul(gyx, denom);
977c2c66affSColin Finck b = gmul(gx, denom);
978c2c66affSColin Finck
979c2c66affSColin Finck p = sectors + ((num_stripes - 2) * sector_size);
980c2c66affSColin Finck q = sectors + ((num_stripes - 1) * sector_size);
981c2c66affSColin Finck
982c2c66affSColin Finck for (j = 0; j < sector_size; j++) {
983c2c66affSColin Finck *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
984c2c66affSColin Finck
985c2c66affSColin Finck p++;
986c2c66affSColin Finck q++;
987c2c66affSColin Finck pxy++;
988c2c66affSColin Finck qxy++;
989c2c66affSColin Finck }
990c2c66affSColin Finck
991c2c66affSColin Finck do_xor(out + sector_size, out, sector_size);
992c2c66affSColin Finck do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
993c2c66affSColin Finck }
994c2c66affSColin Finck }
995c2c66affSColin Finck
read_data_raid6(device_extension * Vcb,uint8_t * buf,uint64_t addr,uint32_t length,read_data_context * context,CHUNK_ITEM * ci,device ** devices,uint64_t offset,uint64_t generation,chunk * c,bool degraded)996318da0c1SPierre Schweitzer static NTSTATUS read_data_raid6(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
997318da0c1SPierre Schweitzer device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
998c2c66affSColin Finck NTSTATUS Status;
999318da0c1SPierre Schweitzer bool checksum_error = false;
1000c2c66affSColin Finck CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
100106042735SVincent Franchomme uint16_t stripe = 0, j;
1002318da0c1SPierre Schweitzer bool no_success = true;
1003c2c66affSColin Finck
1004c2c66affSColin Finck for (j = 0; j < ci->num_stripes; j++) {
1005c2c66affSColin Finck if (context->stripes[j].status == ReadDataStatus_Error) {
1006194ea909SVictor Perevertkin WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
1007c2c66affSColin Finck
1008c2c66affSColin Finck if (devices[j])
1009c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1010c2c66affSColin Finck return context->stripes[j].iosb.Status;
1011c2c66affSColin Finck } else if (context->stripes[j].status == ReadDataStatus_Success) {
1012c2c66affSColin Finck stripe = j;
1013318da0c1SPierre Schweitzer no_success = false;
1014c2c66affSColin Finck }
1015c2c66affSColin Finck }
1016c2c66affSColin Finck
1017c2c66affSColin Finck if (c) { // check partial stripes
1018c2c66affSColin Finck LIST_ENTRY* le;
1019318da0c1SPierre Schweitzer uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length;
1020c2c66affSColin Finck
1021318da0c1SPierre Schweitzer ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
1022c2c66affSColin Finck
1023c2c66affSColin Finck le = c->partial_stripes.Flink;
1024c2c66affSColin Finck while (le != &c->partial_stripes) {
1025c2c66affSColin Finck partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
1026c2c66affSColin Finck
1027c2c66affSColin Finck if (ps->address + ps_length > addr && ps->address < addr + length) {
1028c2c66affSColin Finck ULONG runlength, index;
1029c2c66affSColin Finck
1030c2c66affSColin Finck runlength = RtlFindFirstRunClear(&ps->bmp, &index);
1031c2c66affSColin Finck
1032c2c66affSColin Finck while (runlength != 0) {
1033318da0c1SPierre Schweitzer if (index >= ps->bmplen)
1034318da0c1SPierre Schweitzer break;
1035318da0c1SPierre Schweitzer
1036318da0c1SPierre Schweitzer if (index + runlength >= ps->bmplen) {
1037318da0c1SPierre Schweitzer runlength = ps->bmplen - index;
1038318da0c1SPierre Schweitzer
1039318da0c1SPierre Schweitzer if (runlength == 0)
1040318da0c1SPierre Schweitzer break;
1041318da0c1SPierre Schweitzer }
1042318da0c1SPierre Schweitzer
1043174dfab6SVincent Franchomme uint64_t runstart = ps->address + (index << Vcb->sector_shift);
1044174dfab6SVincent Franchomme uint64_t runend = runstart + (runlength << Vcb->sector_shift);
1045318da0c1SPierre Schweitzer uint64_t start = max(runstart, addr);
1046318da0c1SPierre Schweitzer uint64_t end = min(runend, addr + length);
1047c2c66affSColin Finck
1048c2c66affSColin Finck if (end > start)
1049c2c66affSColin Finck RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
1050c2c66affSColin Finck
1051c2c66affSColin Finck runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
1052c2c66affSColin Finck }
1053c2c66affSColin Finck } else if (ps->address >= addr + length)
1054c2c66affSColin Finck break;
1055c2c66affSColin Finck
1056c2c66affSColin Finck le = le->Flink;
1057c2c66affSColin Finck }
1058c2c66affSColin Finck
1059c2c66affSColin Finck ExReleaseResourceLite(&c->partial_stripes_lock);
1060c2c66affSColin Finck }
1061c2c66affSColin Finck
1062c2c66affSColin Finck if (context->tree) {
1063c2c66affSColin Finck tree_header* th = (tree_header*)buf;
1064c2c66affSColin Finck
1065194ea909SVictor Perevertkin if (addr != th->address || !check_tree_checksum(Vcb, th)) {
1066318da0c1SPierre Schweitzer checksum_error = true;
1067c2c66affSColin Finck if (!no_success && !degraded && devices[stripe])
1068c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1069c2c66affSColin Finck } else if (generation != 0 && generation != th->generation) {
1070318da0c1SPierre Schweitzer checksum_error = true;
1071c2c66affSColin Finck if (!no_success && !degraded && devices[stripe])
1072c2c66affSColin Finck log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
1073c2c66affSColin Finck }
1074c2c66affSColin Finck } else if (context->csum) {
1075174dfab6SVincent Franchomme Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
1076c2c66affSColin Finck
1077c2c66affSColin Finck if (Status == STATUS_CRC_ERROR) {
1078c2c66affSColin Finck if (!degraded)
1079c2c66affSColin Finck WARN("checksum error\n");
1080318da0c1SPierre Schweitzer checksum_error = true;
1081c2c66affSColin Finck } else if (!NT_SUCCESS(Status)) {
1082194ea909SVictor Perevertkin ERR("check_csum returned %08lx\n", Status);
1083c2c66affSColin Finck return Status;
1084c2c66affSColin Finck }
1085c2c66affSColin Finck } else if (degraded)
1086318da0c1SPierre Schweitzer checksum_error = true;
1087c2c66affSColin Finck
1088c2c66affSColin Finck if (!checksum_error)
1089c2c66affSColin Finck return STATUS_SUCCESS;
1090c2c66affSColin Finck
1091c2c66affSColin Finck if (context->tree) {
1092318da0c1SPierre Schweitzer uint8_t* sector;
109306042735SVincent Franchomme uint16_t k, physstripe, parity1, parity2, error_stripe = 0;
1094318da0c1SPierre Schweitzer uint64_t off;
1095318da0c1SPierre Schweitzer bool recovered = false, failed = false;
1096c2c66affSColin Finck ULONG num_errors = 0;
1097c2c66affSColin Finck
1098c2c66affSColin Finck sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1099c2c66affSColin Finck if (!sector) {
1100c2c66affSColin Finck ERR("out of memory\n");
1101c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
1102c2c66affSColin Finck }
1103c2c66affSColin Finck
1104c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe);
1105c2c66affSColin Finck
1106c2c66affSColin Finck parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1107c2c66affSColin Finck parity2 = (parity1 + 1) % ci->num_stripes;
1108c2c66affSColin Finck
1109c2c66affSColin Finck physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1110c2c66affSColin Finck
1111c2c66affSColin Finck j = (parity2 + 1) % ci->num_stripes;
1112c2c66affSColin Finck
1113c2c66affSColin Finck for (k = 0; k < ci->num_stripes - 1; k++) {
1114c2c66affSColin Finck if (j != physstripe) {
1115c2c66affSColin Finck if (devices[j] && devices[j]->devobj) {
1116318da0c1SPierre Schweitzer Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size,
1117318da0c1SPierre Schweitzer sector + (k * Vcb->superblock.node_size), false);
1118c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1119194ea909SVictor Perevertkin ERR("sync_read_phys returned %08lx\n", Status);
1120c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1121c2c66affSColin Finck num_errors++;
1122c2c66affSColin Finck error_stripe = k;
1123c2c66affSColin Finck
1124c2c66affSColin Finck if (num_errors > 1) {
1125318da0c1SPierre Schweitzer failed = true;
1126c2c66affSColin Finck break;
1127c2c66affSColin Finck }
1128c2c66affSColin Finck }
1129c2c66affSColin Finck } else {
1130c2c66affSColin Finck num_errors++;
1131c2c66affSColin Finck error_stripe = k;
1132c2c66affSColin Finck
1133c2c66affSColin Finck if (num_errors > 1) {
1134318da0c1SPierre Schweitzer failed = true;
1135c2c66affSColin Finck break;
1136c2c66affSColin Finck }
1137c2c66affSColin Finck }
1138c2c66affSColin Finck }
1139c2c66affSColin Finck
1140c2c66affSColin Finck j = (j + 1) % ci->num_stripes;
1141c2c66affSColin Finck }
1142c2c66affSColin Finck
1143c2c66affSColin Finck if (!failed) {
1144c2c66affSColin Finck if (num_errors == 0) {
1145c2c66affSColin Finck tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1146c2c66affSColin Finck
1147c2c66affSColin Finck RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1148c2c66affSColin Finck Vcb->superblock.node_size);
1149c2c66affSColin Finck
1150c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 2; j++) {
1151c2c66affSColin Finck if (j != stripe)
1152c2c66affSColin Finck do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1153c2c66affSColin Finck }
1154c2c66affSColin Finck
1155194ea909SVictor Perevertkin if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
1156c2c66affSColin Finck RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1157c2c66affSColin Finck
1158c2c66affSColin Finck if (devices[physstripe] && devices[physstripe]->devobj)
1159318da0c1SPierre Schweitzer ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1160c2c66affSColin Finck
1161318da0c1SPierre Schweitzer recovered = true;
1162c2c66affSColin Finck
1163c2c66affSColin Finck if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1164318da0c1SPierre Schweitzer Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1165c2c66affSColin Finck sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1166c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1167194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
1168c2c66affSColin Finck log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1169c2c66affSColin Finck }
1170c2c66affSColin Finck }
1171c2c66affSColin Finck }
1172c2c66affSColin Finck }
1173c2c66affSColin Finck
1174c2c66affSColin Finck if (!recovered) {
1175c2c66affSColin Finck tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1176318da0c1SPierre Schweitzer bool read_q = false;
1177c2c66affSColin Finck
1178c2c66affSColin Finck if (devices[parity2] && devices[parity2]->devobj) {
1179318da0c1SPierre Schweitzer Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1180318da0c1SPierre Schweitzer Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false);
1181c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1182194ea909SVictor Perevertkin ERR("sync_read_phys returned %08lx\n", Status);
1183c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1184c2c66affSColin Finck } else
1185318da0c1SPierre Schweitzer read_q = true;
1186c2c66affSColin Finck }
1187c2c66affSColin Finck
1188c2c66affSColin Finck if (read_q) {
1189c2c66affSColin Finck if (num_errors == 1) {
1190c2c66affSColin Finck raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1191c2c66affSColin Finck
1192194ea909SVictor Perevertkin if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation))
1193318da0c1SPierre Schweitzer recovered = true;
1194c2c66affSColin Finck } else {
1195c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 1; j++) {
1196c2c66affSColin Finck if (j != stripe) {
1197c2c66affSColin Finck raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1198c2c66affSColin Finck
1199194ea909SVictor Perevertkin if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
1200318da0c1SPierre Schweitzer recovered = true;
1201c2c66affSColin Finck error_stripe = j;
1202c2c66affSColin Finck break;
1203c2c66affSColin Finck }
1204c2c66affSColin Finck }
1205c2c66affSColin Finck }
1206c2c66affSColin Finck }
1207c2c66affSColin Finck }
1208c2c66affSColin Finck
1209c2c66affSColin Finck if (recovered) {
1210318da0c1SPierre Schweitzer uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1211c2c66affSColin Finck
1212c2c66affSColin Finck if (devices[physstripe] && devices[physstripe]->devobj)
1213318da0c1SPierre Schweitzer ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1214c2c66affSColin Finck
1215c2c66affSColin Finck RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1216c2c66affSColin Finck
1217c2c66affSColin Finck if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1218318da0c1SPierre Schweitzer Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1219c2c66affSColin Finck sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1220c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1221194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
1222c2c66affSColin Finck log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1223c2c66affSColin Finck }
1224c2c66affSColin Finck }
1225c2c66affSColin Finck
1226c2c66affSColin Finck if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1227c2c66affSColin Finck if (error_stripe == ci->num_stripes - 2) {
1228318da0c1SPierre Schweitzer ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1229c2c66affSColin Finck
1230c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1231c2c66affSColin Finck
1232c2c66affSColin Finck RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1233c2c66affSColin Finck
1234c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 2; j++) {
1235c2c66affSColin Finck if (j == stripe) {
1236c2c66affSColin Finck do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1237c2c66affSColin Finck Vcb->superblock.node_size);
1238c2c66affSColin Finck } else {
1239c2c66affSColin Finck do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1240c2c66affSColin Finck Vcb->superblock.node_size);
1241c2c66affSColin Finck }
1242c2c66affSColin Finck }
1243c2c66affSColin Finck } else {
1244318da0c1SPierre Schweitzer ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1245c2c66affSColin Finck devices[error_stripe_phys]->devitem.dev_id);
1246c2c66affSColin Finck
1247c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1248c2c66affSColin Finck
1249c2c66affSColin Finck RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1250c2c66affSColin Finck sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1251c2c66affSColin Finck }
1252c2c66affSColin Finck }
1253c2c66affSColin Finck
1254c2c66affSColin Finck if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1255318da0c1SPierre Schweitzer Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1256c2c66affSColin Finck sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1257c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1258194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
1259c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1260c2c66affSColin Finck }
1261c2c66affSColin Finck }
1262c2c66affSColin Finck }
1263c2c66affSColin Finck }
1264c2c66affSColin Finck }
1265c2c66affSColin Finck
1266c2c66affSColin Finck if (!recovered) {
1267318da0c1SPierre Schweitzer ERR("unrecoverable checksum error at %I64x\n", addr);
1268c2c66affSColin Finck ExFreePool(sector);
1269c2c66affSColin Finck return STATUS_CRC_ERROR;
1270c2c66affSColin Finck }
1271c2c66affSColin Finck
1272c2c66affSColin Finck ExFreePool(sector);
1273c2c66affSColin Finck } else {
1274174dfab6SVincent Franchomme ULONG sectors = length >> Vcb->sector_shift;
1275318da0c1SPierre Schweitzer uint8_t* sector;
1276194ea909SVictor Perevertkin void* ptr = context->csum;
1277c2c66affSColin Finck
1278174dfab6SVincent Franchomme sector = ExAllocatePoolWithTag(NonPagedPool, (ci->num_stripes + 2) << Vcb->sector_shift, ALLOC_TAG);
1279c2c66affSColin Finck if (!sector) {
1280c2c66affSColin Finck ERR("out of memory\n");
1281c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
1282c2c66affSColin Finck }
1283c2c66affSColin Finck
1284174dfab6SVincent Franchomme for (ULONG i = 0; i < sectors; i++) {
1285318da0c1SPierre Schweitzer uint64_t off;
1286318da0c1SPierre Schweitzer uint16_t physstripe, parity1, parity2;
1287c2c66affSColin Finck
1288174dfab6SVincent Franchomme get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
1289c2c66affSColin Finck ci->num_stripes - 2, &off, &stripe);
1290c2c66affSColin Finck
1291174dfab6SVincent Franchomme parity1 = (((addr - offset + ((uint64_t)i << Vcb->sector_shift)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1292c2c66affSColin Finck parity2 = (parity1 + 1) % ci->num_stripes;
1293c2c66affSColin Finck
1294c2c66affSColin Finck physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1295c2c66affSColin Finck
1296174dfab6SVincent Franchomme if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && !check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr))) {
129706042735SVincent Franchomme uint16_t error_stripe = 0;
1298318da0c1SPierre Schweitzer bool recovered = false, failed = false;
1299c2c66affSColin Finck ULONG num_errors = 0;
1300c2c66affSColin Finck
1301c2c66affSColin Finck if (devices[physstripe] && devices[physstripe]->devobj)
1302c2c66affSColin Finck log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS);
1303c2c66affSColin Finck
1304c2c66affSColin Finck j = (parity2 + 1) % ci->num_stripes;
1305c2c66affSColin Finck
1306174dfab6SVincent Franchomme for (uint16_t k = 0; k < ci->num_stripes - 1; k++) {
1307c2c66affSColin Finck if (j != physstripe) {
1308c2c66affSColin Finck if (devices[j] && devices[j]->devobj) {
1309318da0c1SPierre Schweitzer Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
1310174dfab6SVincent Franchomme sector + ((ULONG)k << Vcb->sector_shift), false);
1311c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1312194ea909SVictor Perevertkin ERR("sync_read_phys returned %08lx\n", Status);
1313c2c66affSColin Finck log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1314c2c66affSColin Finck num_errors++;
1315c2c66affSColin Finck error_stripe = k;
1316c2c66affSColin Finck
1317c2c66affSColin Finck if (num_errors > 1) {
1318318da0c1SPierre Schweitzer failed = true;
1319c2c66affSColin Finck break;
1320c2c66affSColin Finck }
1321c2c66affSColin Finck }
1322c2c66affSColin Finck } else {
1323c2c66affSColin Finck num_errors++;
1324c2c66affSColin Finck error_stripe = k;
1325c2c66affSColin Finck
1326c2c66affSColin Finck if (num_errors > 1) {
1327318da0c1SPierre Schweitzer failed = true;
1328c2c66affSColin Finck break;
1329c2c66affSColin Finck }
1330c2c66affSColin Finck }
1331c2c66affSColin Finck }
1332c2c66affSColin Finck
1333c2c66affSColin Finck j = (j + 1) % ci->num_stripes;
1334c2c66affSColin Finck }
1335c2c66affSColin Finck
1336c2c66affSColin Finck if (!failed) {
1337c2c66affSColin Finck if (num_errors == 0) {
1338174dfab6SVincent Franchomme RtlCopyMemory(sector + ((unsigned int)stripe << Vcb->sector_shift), sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), Vcb->superblock.sector_size);
1339c2c66affSColin Finck
1340c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 2; j++) {
1341c2c66affSColin Finck if (j != stripe)
1342174dfab6SVincent Franchomme do_xor(sector + ((unsigned int)stripe << Vcb->sector_shift), sector + ((unsigned int)j << Vcb->sector_shift), Vcb->superblock.sector_size);
1343c2c66affSColin Finck }
1344c2c66affSColin Finck
1345174dfab6SVincent Franchomme if (!ptr || check_sector_csum(Vcb, sector + ((unsigned int)stripe << Vcb->sector_shift), ptr)) {
1346174dfab6SVincent Franchomme RtlCopyMemory(buf + (i << Vcb->sector_shift), sector + ((unsigned int)stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
1347c2c66affSColin Finck
1348c2c66affSColin Finck if (devices[physstripe] && devices[physstripe]->devobj)
1349174dfab6SVincent Franchomme ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift),
1350c2c66affSColin Finck devices[physstripe]->devitem.dev_id);
1351c2c66affSColin Finck
1352318da0c1SPierre Schweitzer recovered = true;
1353c2c66affSColin Finck
1354c2c66affSColin Finck if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1355318da0c1SPierre Schweitzer Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1356174dfab6SVincent Franchomme sector + ((unsigned int)stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
1357c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1358194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
1359c2c66affSColin Finck log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1360c2c66affSColin Finck }
1361c2c66affSColin Finck }
1362c2c66affSColin Finck }
1363c2c66affSColin Finck }
1364c2c66affSColin Finck
1365c2c66affSColin Finck if (!recovered) {
1366318da0c1SPierre Schweitzer bool read_q = false;
1367c2c66affSColin Finck
1368c2c66affSColin Finck if (devices[parity2] && devices[parity2]->devobj) {
1369318da0c1SPierre Schweitzer Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1370174dfab6SVincent Franchomme Vcb->superblock.sector_size, sector + ((unsigned int)(ci->num_stripes - 1) << Vcb->sector_shift), false);
1371c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1372194ea909SVictor Perevertkin ERR("sync_read_phys returned %08lx\n", Status);
1373c2c66affSColin Finck log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS);
1374c2c66affSColin Finck } else
1375318da0c1SPierre Schweitzer read_q = true;
1376c2c66affSColin Finck }
1377c2c66affSColin Finck
1378c2c66affSColin Finck if (read_q) {
1379c2c66affSColin Finck if (num_errors == 1) {
1380174dfab6SVincent Franchomme raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift));
1381c2c66affSColin Finck
1382c2c66affSColin Finck if (!devices[physstripe] || !devices[physstripe]->devobj)
1383318da0c1SPierre Schweitzer recovered = true;
1384194ea909SVictor Perevertkin else
1385174dfab6SVincent Franchomme recovered = check_sector_csum(Vcb, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), ptr);
1386c2c66affSColin Finck } else {
1387c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 1; j++) {
1388c2c66affSColin Finck if (j != stripe) {
1389174dfab6SVincent Franchomme raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift));
1390c2c66affSColin Finck
1391174dfab6SVincent Franchomme if (check_sector_csum(Vcb, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), ptr)) {
1392318da0c1SPierre Schweitzer recovered = true;
1393c2c66affSColin Finck error_stripe = j;
1394c2c66affSColin Finck break;
1395c2c66affSColin Finck }
1396c2c66affSColin Finck }
1397c2c66affSColin Finck }
1398c2c66affSColin Finck }
1399c2c66affSColin Finck }
1400c2c66affSColin Finck
1401c2c66affSColin Finck if (recovered) {
1402318da0c1SPierre Schweitzer uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1403c2c66affSColin Finck
1404c2c66affSColin Finck if (devices[physstripe] && devices[physstripe]->devobj)
1405318da0c1SPierre Schweitzer ERR("recovering from checksum error at %I64x, device %I64x\n",
1406174dfab6SVincent Franchomme addr + ((uint64_t)i << Vcb->sector_shift), devices[physstripe]->devitem.dev_id);
1407c2c66affSColin Finck
1408174dfab6SVincent Franchomme RtlCopyMemory(buf + (i << Vcb->sector_shift), sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), Vcb->superblock.sector_size);
1409c2c66affSColin Finck
1410c2c66affSColin Finck if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1411318da0c1SPierre Schweitzer Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1412174dfab6SVincent Franchomme sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), Vcb->superblock.sector_size);
1413c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1414194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
1415c2c66affSColin Finck log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1416c2c66affSColin Finck }
1417c2c66affSColin Finck }
1418c2c66affSColin Finck
1419c2c66affSColin Finck if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1420c2c66affSColin Finck if (error_stripe == ci->num_stripes - 2) {
1421174dfab6SVincent Franchomme ERR("recovering from parity error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift),
1422c2c66affSColin Finck devices[error_stripe_phys]->devitem.dev_id);
1423c2c66affSColin Finck
1424c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1425c2c66affSColin Finck
1426174dfab6SVincent Franchomme RtlZeroMemory(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), Vcb->superblock.sector_size);
1427c2c66affSColin Finck
1428c2c66affSColin Finck for (j = 0; j < ci->num_stripes - 2; j++) {
1429c2c66affSColin Finck if (j == stripe) {
1430174dfab6SVincent Franchomme do_xor(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift),
1431c2c66affSColin Finck Vcb->superblock.sector_size);
1432c2c66affSColin Finck } else {
1433174dfab6SVincent Franchomme do_xor(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), sector + ((unsigned int)j << Vcb->sector_shift),
1434c2c66affSColin Finck Vcb->superblock.sector_size);
1435c2c66affSColin Finck }
1436c2c66affSColin Finck }
1437c2c66affSColin Finck } else {
1438318da0c1SPierre Schweitzer ERR("recovering from checksum error at %I64x, device %I64x\n",
1439174dfab6SVincent Franchomme addr + ((uint64_t)i << Vcb->sector_shift) + ((error_stripe - stripe) * ci->stripe_length),
1440c2c66affSColin Finck devices[error_stripe_phys]->devitem.dev_id);
1441c2c66affSColin Finck
1442c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1443c2c66affSColin Finck
1444174dfab6SVincent Franchomme RtlCopyMemory(sector + ((unsigned int)error_stripe << Vcb->sector_shift),
1445174dfab6SVincent Franchomme sector + ((unsigned int)(ci->num_stripes + 1) << Vcb->sector_shift), Vcb->superblock.sector_size);
1446c2c66affSColin Finck }
1447c2c66affSColin Finck }
1448c2c66affSColin Finck
1449c2c66affSColin Finck if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1450318da0c1SPierre Schweitzer Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1451174dfab6SVincent Franchomme sector + ((unsigned int)error_stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
1452c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1453194ea909SVictor Perevertkin WARN("write_data_phys returned %08lx\n", Status);
1454c2c66affSColin Finck log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1455c2c66affSColin Finck }
1456c2c66affSColin Finck }
1457c2c66affSColin Finck }
1458c2c66affSColin Finck }
1459c2c66affSColin Finck }
1460c2c66affSColin Finck
1461c2c66affSColin Finck if (!recovered) {
1462174dfab6SVincent Franchomme ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
1463c2c66affSColin Finck ExFreePool(sector);
1464c2c66affSColin Finck return STATUS_CRC_ERROR;
1465c2c66affSColin Finck }
1466c2c66affSColin Finck }
1467194ea909SVictor Perevertkin
1468194ea909SVictor Perevertkin if (ptr)
1469194ea909SVictor Perevertkin ptr = (uint8_t*)ptr + Vcb->csum_size;
1470c2c66affSColin Finck }
1471c2c66affSColin Finck
1472c2c66affSColin Finck ExFreePool(sector);
1473c2c66affSColin Finck }
1474c2c66affSColin Finck
1475c2c66affSColin Finck return STATUS_SUCCESS;
1476c2c66affSColin Finck }
1477c2c66affSColin Finck
1478194ea909SVictor Perevertkin NTSTATUS read_data(_In_ device_extension* Vcb, _In_ uint64_t addr, _In_ uint32_t length, _In_reads_bytes_opt_(length*sizeof(uint32_t)/Vcb->superblock.sector_size) void* csum,
1479318da0c1SPierre Schweitzer _In_ bool is_tree, _Out_writes_bytes_(length) uint8_t* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ uint64_t generation, _In_ bool file_read,
1480c2c66affSColin Finck _In_ ULONG priority) {
1481c2c66affSColin Finck CHUNK_ITEM* ci;
1482c2c66affSColin Finck CHUNK_ITEM_STRIPE* cis;
1483c2c66affSColin Finck read_data_context context;
1484318da0c1SPierre Schweitzer uint64_t type, offset, total_reading = 0;
1485c2c66affSColin Finck NTSTATUS Status;
1486c2c66affSColin Finck device** devices = NULL;
1487318da0c1SPierre Schweitzer uint16_t i, startoffstripe, allowed_missing, missing_devices = 0;
1488318da0c1SPierre Schweitzer uint8_t* dummypage = NULL;
1489c2c66affSColin Finck PMDL dummy_mdl = NULL;
1490318da0c1SPierre Schweitzer bool need_to_wait;
1491318da0c1SPierre Schweitzer uint64_t lockaddr, locklen;
1492c2c66affSColin Finck
1493c2c66affSColin Finck if (Vcb->log_to_phys_loaded) {
1494c2c66affSColin Finck if (!c) {
1495c2c66affSColin Finck c = get_chunk_from_address(Vcb, addr);
1496c2c66affSColin Finck
1497c2c66affSColin Finck if (!c) {
1498c2c66affSColin Finck ERR("get_chunk_from_address failed\n");
1499c2c66affSColin Finck return STATUS_INTERNAL_ERROR;
1500c2c66affSColin Finck }
1501c2c66affSColin Finck }
1502c2c66affSColin Finck
1503c2c66affSColin Finck ci = c->chunk_item;
1504c2c66affSColin Finck offset = c->offset;
1505c2c66affSColin Finck devices = c->devices;
1506c2c66affSColin Finck
1507c2c66affSColin Finck if (pc)
1508c2c66affSColin Finck *pc = c;
1509c2c66affSColin Finck } else {
1510c2c66affSColin Finck LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1511c2c66affSColin Finck
1512c2c66affSColin Finck ci = NULL;
1513c2c66affSColin Finck
1514c2c66affSColin Finck c = NULL;
1515c2c66affSColin Finck while (le != &Vcb->sys_chunks) {
1516c2c66affSColin Finck sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
1517c2c66affSColin Finck
1518c2c66affSColin Finck if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1519c2c66affSColin Finck CHUNK_ITEM* chunk_item = sc->data;
1520c2c66affSColin Finck
1521c2c66affSColin Finck if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1522c2c66affSColin Finck ci = chunk_item;
1523c2c66affSColin Finck offset = sc->key.offset;
1524c2c66affSColin Finck cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1525c2c66affSColin Finck
1526318da0c1SPierre Schweitzer devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
1527c2c66affSColin Finck if (!devices) {
1528c2c66affSColin Finck ERR("out of memory\n");
1529c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
1530c2c66affSColin Finck }
1531c2c66affSColin Finck
1532c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
1533c2c66affSColin Finck devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1534c2c66affSColin Finck }
1535c2c66affSColin Finck
1536c2c66affSColin Finck break;
1537c2c66affSColin Finck }
1538c2c66affSColin Finck }
1539c2c66affSColin Finck
1540c2c66affSColin Finck le = le->Flink;
1541c2c66affSColin Finck }
1542c2c66affSColin Finck
1543c2c66affSColin Finck if (!ci) {
1544318da0c1SPierre Schweitzer ERR("could not find chunk for %I64x in bootstrap\n", addr);
1545c2c66affSColin Finck return STATUS_INTERNAL_ERROR;
1546c2c66affSColin Finck }
1547c2c66affSColin Finck
1548c2c66affSColin Finck if (pc)
1549c2c66affSColin Finck *pc = NULL;
1550c2c66affSColin Finck }
1551c2c66affSColin Finck
1552c2c66affSColin Finck if (ci->type & BLOCK_FLAG_DUPLICATE) {
1553c2c66affSColin Finck type = BLOCK_FLAG_DUPLICATE;
1554c2c66affSColin Finck allowed_missing = ci->num_stripes - 1;
1555c2c66affSColin Finck } else if (ci->type & BLOCK_FLAG_RAID0) {
1556c2c66affSColin Finck type = BLOCK_FLAG_RAID0;
1557c2c66affSColin Finck allowed_missing = 0;
1558c2c66affSColin Finck } else if (ci->type & BLOCK_FLAG_RAID1) {
1559c2c66affSColin Finck type = BLOCK_FLAG_DUPLICATE;
1560c2c66affSColin Finck allowed_missing = 1;
1561c2c66affSColin Finck } else if (ci->type & BLOCK_FLAG_RAID10) {
1562c2c66affSColin Finck type = BLOCK_FLAG_RAID10;
1563c2c66affSColin Finck allowed_missing = 1;
1564c2c66affSColin Finck } else if (ci->type & BLOCK_FLAG_RAID5) {
1565c2c66affSColin Finck type = BLOCK_FLAG_RAID5;
1566c2c66affSColin Finck allowed_missing = 1;
1567c2c66affSColin Finck } else if (ci->type & BLOCK_FLAG_RAID6) {
1568c2c66affSColin Finck type = BLOCK_FLAG_RAID6;
1569c2c66affSColin Finck allowed_missing = 2;
1570194ea909SVictor Perevertkin } else if (ci->type & BLOCK_FLAG_RAID1C3) {
1571194ea909SVictor Perevertkin type = BLOCK_FLAG_DUPLICATE;
1572194ea909SVictor Perevertkin allowed_missing = 2;
1573194ea909SVictor Perevertkin } else if (ci->type & BLOCK_FLAG_RAID1C4) {
1574194ea909SVictor Perevertkin type = BLOCK_FLAG_DUPLICATE;
1575194ea909SVictor Perevertkin allowed_missing = 3;
1576c2c66affSColin Finck } else { // SINGLE
1577c2c66affSColin Finck type = BLOCK_FLAG_DUPLICATE;
1578c2c66affSColin Finck allowed_missing = 0;
1579c2c66affSColin Finck }
1580c2c66affSColin Finck
1581c2c66affSColin Finck cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1582c2c66affSColin Finck
1583c2c66affSColin Finck RtlZeroMemory(&context, sizeof(read_data_context));
1584318da0c1SPierre Schweitzer KeInitializeEvent(&context.Event, NotificationEvent, false);
1585c2c66affSColin Finck
1586c2c66affSColin Finck context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
1587c2c66affSColin Finck if (!context.stripes) {
1588c2c66affSColin Finck ERR("out of memory\n");
1589c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
1590c2c66affSColin Finck }
1591c2c66affSColin Finck
1592c2c66affSColin Finck if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1593c2c66affSColin Finck get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1594c2c66affSColin Finck chunk_lock_range(Vcb, c, lockaddr, locklen);
1595c2c66affSColin Finck }
1596c2c66affSColin Finck
1597c2c66affSColin Finck RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1598c2c66affSColin Finck
1599c2c66affSColin Finck context.buflen = length;
1600c2c66affSColin Finck context.num_stripes = ci->num_stripes;
1601c2c66affSColin Finck context.stripes_left = context.num_stripes;
1602c2c66affSColin Finck context.sector_size = Vcb->superblock.sector_size;
1603c2c66affSColin Finck context.csum = csum;
1604c2c66affSColin Finck context.tree = is_tree;
1605c2c66affSColin Finck context.type = type;
1606c2c66affSColin Finck
1607c2c66affSColin Finck if (type == BLOCK_FLAG_RAID0) {
1608318da0c1SPierre Schweitzer uint64_t startoff, endoff;
1609318da0c1SPierre Schweitzer uint16_t endoffstripe, stripe;
1610318da0c1SPierre Schweitzer uint32_t *stripeoff, pos;
1611c2c66affSColin Finck PMDL master_mdl;
1612c2c66affSColin Finck PFN_NUMBER* pfns;
1613c2c66affSColin Finck
1614c2c66affSColin Finck // FIXME - test this still works if page size isn't the same as sector size
1615c2c66affSColin Finck
1616c2c66affSColin Finck // This relies on the fact that MDLs are followed in memory by the page file numbers,
1617c2c66affSColin Finck // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1618c2c66affSColin Finck // data for you without doing a memcpy yourself.
1619c2c66affSColin Finck // MDLs are officially opaque, so this might very well break in future versions of Windows.
1620c2c66affSColin Finck
1621c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1622c2c66affSColin Finck get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1623c2c66affSColin Finck
1624c2c66affSColin Finck if (file_read) {
1625c2c66affSColin Finck // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1626c2c66affSColin Finck // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1627c2c66affSColin Finck // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1628c2c66affSColin Finck
1629c2c66affSColin Finck context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1630c2c66affSColin Finck
1631c2c66affSColin Finck if (!context.va) {
1632c2c66affSColin Finck ERR("out of memory\n");
1633c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1634c2c66affSColin Finck goto exit;
1635c2c66affSColin Finck }
1636c2c66affSColin Finck } else
1637c2c66affSColin Finck context.va = buf;
1638c2c66affSColin Finck
1639318da0c1SPierre Schweitzer master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1640c2c66affSColin Finck if (!master_mdl) {
1641c2c66affSColin Finck ERR("out of memory\n");
1642c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1643c2c66affSColin Finck goto exit;
1644c2c66affSColin Finck }
1645c2c66affSColin Finck
1646c2c66affSColin Finck Status = STATUS_SUCCESS;
1647c2c66affSColin Finck
1648c2c66affSColin Finck _SEH2_TRY {
1649c2c66affSColin Finck MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)1650c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1651c2c66affSColin Finck Status = _SEH2_GetExceptionCode();
1652c2c66affSColin Finck } _SEH2_END;
1653c2c66affSColin Finck
1654c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1655194ea909SVictor Perevertkin ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1656c2c66affSColin Finck IoFreeMdl(master_mdl);
1657c2c66affSColin Finck goto exit;
1658c2c66affSColin Finck }
1659c2c66affSColin Finck
1660c2c66affSColin Finck pfns = (PFN_NUMBER*)(master_mdl + 1);
1661c2c66affSColin Finck
1662c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
1663c2c66affSColin Finck if (startoffstripe > i)
1664c2c66affSColin Finck context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1665c2c66affSColin Finck else if (startoffstripe == i)
1666c2c66affSColin Finck context.stripes[i].stripestart = startoff;
1667c2c66affSColin Finck else
1668c2c66affSColin Finck context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1669c2c66affSColin Finck
1670c2c66affSColin Finck if (endoffstripe > i)
1671c2c66affSColin Finck context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1672c2c66affSColin Finck else if (endoffstripe == i)
1673c2c66affSColin Finck context.stripes[i].stripeend = endoff + 1;
1674c2c66affSColin Finck else
1675c2c66affSColin Finck context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1676c2c66affSColin Finck
1677c2c66affSColin Finck if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1678318da0c1SPierre Schweitzer context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
1679c2c66affSColin Finck
1680c2c66affSColin Finck if (!context.stripes[i].mdl) {
1681c2c66affSColin Finck ERR("IoAllocateMdl failed\n");
1682eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
1683eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
1684c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1685c2c66affSColin Finck goto exit;
1686c2c66affSColin Finck }
1687c2c66affSColin Finck }
1688c2c66affSColin Finck }
1689c2c66affSColin Finck
1690318da0c1SPierre Schweitzer stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
1691c2c66affSColin Finck if (!stripeoff) {
1692c2c66affSColin Finck ERR("out of memory\n");
1693eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
1694eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
1695c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1696c2c66affSColin Finck goto exit;
1697c2c66affSColin Finck }
1698c2c66affSColin Finck
1699318da0c1SPierre Schweitzer RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
1700c2c66affSColin Finck
1701c2c66affSColin Finck pos = 0;
1702c2c66affSColin Finck stripe = startoffstripe;
1703c2c66affSColin Finck while (pos < length) {
1704c2c66affSColin Finck PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1705c2c66affSColin Finck
1706c2c66affSColin Finck if (pos == 0) {
1707318da0c1SPierre Schweitzer uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1708c2c66affSColin Finck
1709c2c66affSColin Finck RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1710c2c66affSColin Finck
1711c2c66affSColin Finck stripeoff[stripe] += readlen;
1712c2c66affSColin Finck pos += readlen;
1713c2c66affSColin Finck } else if (length - pos < ci->stripe_length) {
1714c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1715c2c66affSColin Finck
1716c2c66affSColin Finck pos = length;
1717c2c66affSColin Finck } else {
1718c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1719c2c66affSColin Finck
1720318da0c1SPierre Schweitzer stripeoff[stripe] += (uint32_t)ci->stripe_length;
1721318da0c1SPierre Schweitzer pos += (uint32_t)ci->stripe_length;
1722c2c66affSColin Finck }
1723c2c66affSColin Finck
1724c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
1725c2c66affSColin Finck }
1726c2c66affSColin Finck
1727c2c66affSColin Finck MmUnlockPages(master_mdl);
1728c2c66affSColin Finck IoFreeMdl(master_mdl);
1729c2c66affSColin Finck
1730c2c66affSColin Finck ExFreePool(stripeoff);
1731c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID10) {
1732318da0c1SPierre Schweitzer uint64_t startoff, endoff;
1733318da0c1SPierre Schweitzer uint16_t endoffstripe, j, stripe;
1734c2c66affSColin Finck ULONG orig_ls;
1735c2c66affSColin Finck PMDL master_mdl;
1736c2c66affSColin Finck PFN_NUMBER* pfns;
1737318da0c1SPierre Schweitzer uint32_t* stripeoff, pos;
1738c2c66affSColin Finck read_data_stripe** stripes;
1739c2c66affSColin Finck
1740c2c66affSColin Finck if (c)
1741c2c66affSColin Finck orig_ls = c->last_stripe;
1742c2c66affSColin Finck else
1743c2c66affSColin Finck orig_ls = 0;
1744c2c66affSColin Finck
1745c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1746c2c66affSColin Finck get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1747c2c66affSColin Finck
1748c2c66affSColin Finck if ((ci->num_stripes % ci->sub_stripes) != 0) {
1749318da0c1SPierre Schweitzer ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1750c2c66affSColin Finck Status = STATUS_INTERNAL_ERROR;
1751c2c66affSColin Finck goto exit;
1752c2c66affSColin Finck }
1753c2c66affSColin Finck
1754c2c66affSColin Finck if (file_read) {
1755c2c66affSColin Finck context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1756c2c66affSColin Finck
1757c2c66affSColin Finck if (!context.va) {
1758c2c66affSColin Finck ERR("out of memory\n");
1759c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1760c2c66affSColin Finck goto exit;
1761c2c66affSColin Finck }
1762c2c66affSColin Finck } else
1763c2c66affSColin Finck context.va = buf;
1764c2c66affSColin Finck
1765174dfab6SVincent Franchomme context.firstoff = (uint16_t)((startoff % ci->stripe_length) >> Vcb->sector_shift);
1766c2c66affSColin Finck context.startoffstripe = startoffstripe;
1767174dfab6SVincent Franchomme context.sectors_per_stripe = (uint16_t)(ci->stripe_length >> Vcb->sector_shift);
1768c2c66affSColin Finck
1769c2c66affSColin Finck startoffstripe *= ci->sub_stripes;
1770c2c66affSColin Finck endoffstripe *= ci->sub_stripes;
1771c2c66affSColin Finck
1772c2c66affSColin Finck if (c)
1773c2c66affSColin Finck c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1774c2c66affSColin Finck
1775318da0c1SPierre Schweitzer master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1776c2c66affSColin Finck if (!master_mdl) {
1777c2c66affSColin Finck ERR("out of memory\n");
1778c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1779c2c66affSColin Finck goto exit;
1780c2c66affSColin Finck }
1781c2c66affSColin Finck
1782c2c66affSColin Finck Status = STATUS_SUCCESS;
1783c2c66affSColin Finck
1784c2c66affSColin Finck _SEH2_TRY {
1785c2c66affSColin Finck MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)1786c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1787c2c66affSColin Finck Status = _SEH2_GetExceptionCode();
1788c2c66affSColin Finck } _SEH2_END;
1789c2c66affSColin Finck
1790c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1791194ea909SVictor Perevertkin ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1792c2c66affSColin Finck IoFreeMdl(master_mdl);
1793c2c66affSColin Finck goto exit;
1794c2c66affSColin Finck }
1795c2c66affSColin Finck
1796c2c66affSColin Finck pfns = (PFN_NUMBER*)(master_mdl + 1);
1797c2c66affSColin Finck
1798c2c66affSColin Finck stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1799c2c66affSColin Finck if (!stripes) {
1800c2c66affSColin Finck ERR("out of memory\n");
1801eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
1802eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
1803c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1804c2c66affSColin Finck goto exit;
1805c2c66affSColin Finck }
1806c2c66affSColin Finck
1807c2c66affSColin Finck RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1808c2c66affSColin Finck
1809c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1810318da0c1SPierre Schweitzer uint64_t sstart, send;
1811318da0c1SPierre Schweitzer bool stripeset = false;
1812c2c66affSColin Finck
1813c2c66affSColin Finck if (startoffstripe > i)
1814c2c66affSColin Finck sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1815c2c66affSColin Finck else if (startoffstripe == i)
1816c2c66affSColin Finck sstart = startoff;
1817c2c66affSColin Finck else
1818c2c66affSColin Finck sstart = startoff - (startoff % ci->stripe_length);
1819c2c66affSColin Finck
1820c2c66affSColin Finck if (endoffstripe > i)
1821c2c66affSColin Finck send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1822c2c66affSColin Finck else if (endoffstripe == i)
1823c2c66affSColin Finck send = endoff + 1;
1824c2c66affSColin Finck else
1825c2c66affSColin Finck send = endoff - (endoff % ci->stripe_length);
1826c2c66affSColin Finck
1827c2c66affSColin Finck for (j = 0; j < ci->sub_stripes; j++) {
1828c2c66affSColin Finck if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1829c2c66affSColin Finck context.stripes[i+j].stripestart = sstart;
1830c2c66affSColin Finck context.stripes[i+j].stripeend = send;
1831c2c66affSColin Finck stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1832c2c66affSColin Finck
1833c2c66affSColin Finck if (sstart != send) {
1834318da0c1SPierre Schweitzer context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1835c2c66affSColin Finck
1836c2c66affSColin Finck if (!context.stripes[i+j].mdl) {
1837c2c66affSColin Finck ERR("IoAllocateMdl failed\n");
1838eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
1839eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
1840c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1841c2c66affSColin Finck goto exit;
1842c2c66affSColin Finck }
1843c2c66affSColin Finck }
1844c2c66affSColin Finck
1845318da0c1SPierre Schweitzer stripeset = true;
1846c2c66affSColin Finck } else
1847c2c66affSColin Finck context.stripes[i+j].status = ReadDataStatus_Skip;
1848c2c66affSColin Finck }
1849c2c66affSColin Finck
1850c2c66affSColin Finck if (!stripeset) {
1851c2c66affSColin Finck for (j = 0; j < ci->sub_stripes; j++) {
1852c2c66affSColin Finck if (devices[i+j] && devices[i+j]->devobj) {
1853c2c66affSColin Finck context.stripes[i+j].stripestart = sstart;
1854c2c66affSColin Finck context.stripes[i+j].stripeend = send;
1855c2c66affSColin Finck context.stripes[i+j].status = ReadDataStatus_Pending;
1856c2c66affSColin Finck stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1857c2c66affSColin Finck
1858c2c66affSColin Finck if (sstart != send) {
1859318da0c1SPierre Schweitzer context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1860c2c66affSColin Finck
1861c2c66affSColin Finck if (!context.stripes[i+j].mdl) {
1862c2c66affSColin Finck ERR("IoAllocateMdl failed\n");
1863eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
1864eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
1865c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1866c2c66affSColin Finck goto exit;
1867c2c66affSColin Finck }
1868c2c66affSColin Finck }
1869c2c66affSColin Finck
1870318da0c1SPierre Schweitzer stripeset = true;
1871c2c66affSColin Finck break;
1872c2c66affSColin Finck }
1873c2c66affSColin Finck }
1874c2c66affSColin Finck
1875c2c66affSColin Finck if (!stripeset) {
1876c2c66affSColin Finck ERR("could not find stripe to read\n");
1877c2c66affSColin Finck Status = STATUS_DEVICE_NOT_READY;
1878c2c66affSColin Finck goto exit;
1879c2c66affSColin Finck }
1880c2c66affSColin Finck }
1881c2c66affSColin Finck }
1882c2c66affSColin Finck
1883318da0c1SPierre Schweitzer stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1884c2c66affSColin Finck if (!stripeoff) {
1885c2c66affSColin Finck ERR("out of memory\n");
1886eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
1887eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
1888c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1889c2c66affSColin Finck goto exit;
1890c2c66affSColin Finck }
1891c2c66affSColin Finck
1892318da0c1SPierre Schweitzer RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes);
1893c2c66affSColin Finck
1894c2c66affSColin Finck pos = 0;
1895c2c66affSColin Finck stripe = startoffstripe / ci->sub_stripes;
1896c2c66affSColin Finck while (pos < length) {
1897c2c66affSColin Finck PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1898c2c66affSColin Finck
1899c2c66affSColin Finck if (pos == 0) {
1900318da0c1SPierre Schweitzer uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1901c2c66affSColin Finck ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1902c2c66affSColin Finck
1903c2c66affSColin Finck RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1904c2c66affSColin Finck
1905c2c66affSColin Finck stripeoff[stripe] += readlen;
1906c2c66affSColin Finck pos += readlen;
1907c2c66affSColin Finck } else if (length - pos < ci->stripe_length) {
1908c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1909c2c66affSColin Finck
1910c2c66affSColin Finck pos = length;
1911c2c66affSColin Finck } else {
1912c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1913c2c66affSColin Finck
1914c2c66affSColin Finck stripeoff[stripe] += (ULONG)ci->stripe_length;
1915c2c66affSColin Finck pos += (ULONG)ci->stripe_length;
1916c2c66affSColin Finck }
1917c2c66affSColin Finck
1918c2c66affSColin Finck stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1919c2c66affSColin Finck }
1920c2c66affSColin Finck
1921c2c66affSColin Finck MmUnlockPages(master_mdl);
1922c2c66affSColin Finck IoFreeMdl(master_mdl);
1923c2c66affSColin Finck
1924c2c66affSColin Finck ExFreePool(stripeoff);
1925c2c66affSColin Finck ExFreePool(stripes);
1926c2c66affSColin Finck } else if (type == BLOCK_FLAG_DUPLICATE) {
1927318da0c1SPierre Schweitzer uint64_t orig_ls;
1928c2c66affSColin Finck
1929c2c66affSColin Finck if (c)
1930c2c66affSColin Finck orig_ls = i = c->last_stripe;
1931c2c66affSColin Finck else
1932c2c66affSColin Finck orig_ls = i = 0;
1933c2c66affSColin Finck
1934c2c66affSColin Finck while (!devices[i] || !devices[i]->devobj) {
1935c2c66affSColin Finck i = (i + 1) % ci->num_stripes;
1936c2c66affSColin Finck
1937c2c66affSColin Finck if (i == orig_ls) {
1938c2c66affSColin Finck ERR("no devices available to service request\n");
1939c2c66affSColin Finck Status = STATUS_DEVICE_NOT_READY;
1940c2c66affSColin Finck goto exit;
1941c2c66affSColin Finck }
1942c2c66affSColin Finck }
1943c2c66affSColin Finck
1944c2c66affSColin Finck if (c)
1945c2c66affSColin Finck c->last_stripe = (i + 1) % ci->num_stripes;
1946c2c66affSColin Finck
1947c2c66affSColin Finck context.stripes[i].stripestart = addr - offset;
1948c2c66affSColin Finck context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1949c2c66affSColin Finck
1950c2c66affSColin Finck if (file_read) {
1951c2c66affSColin Finck context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1952c2c66affSColin Finck
1953c2c66affSColin Finck if (!context.va) {
1954c2c66affSColin Finck ERR("out of memory\n");
1955c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1956c2c66affSColin Finck goto exit;
1957c2c66affSColin Finck }
1958c2c66affSColin Finck
1959318da0c1SPierre Schweitzer context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1960c2c66affSColin Finck if (!context.stripes[i].mdl) {
1961c2c66affSColin Finck ERR("IoAllocateMdl failed\n");
1962c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1963c2c66affSColin Finck goto exit;
1964c2c66affSColin Finck }
1965c2c66affSColin Finck
1966c2c66affSColin Finck MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
1967c2c66affSColin Finck } else {
1968318da0c1SPierre Schweitzer context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL);
1969c2c66affSColin Finck
1970c2c66affSColin Finck if (!context.stripes[i].mdl) {
1971c2c66affSColin Finck ERR("IoAllocateMdl failed\n");
1972c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
1973c2c66affSColin Finck goto exit;
1974c2c66affSColin Finck }
1975c2c66affSColin Finck
1976c2c66affSColin Finck Status = STATUS_SUCCESS;
1977c2c66affSColin Finck
1978c2c66affSColin Finck _SEH2_TRY {
1979c2c66affSColin Finck MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)1980c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1981c2c66affSColin Finck Status = _SEH2_GetExceptionCode();
1982c2c66affSColin Finck } _SEH2_END;
1983c2c66affSColin Finck
1984c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
1985194ea909SVictor Perevertkin ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1986c2c66affSColin Finck goto exit;
1987c2c66affSColin Finck }
1988c2c66affSColin Finck }
1989c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID5) {
1990318da0c1SPierre Schweitzer uint64_t startoff, endoff;
1991318da0c1SPierre Schweitzer uint16_t endoffstripe, parity;
1992318da0c1SPierre Schweitzer uint32_t *stripeoff, pos;
1993c2c66affSColin Finck PMDL master_mdl;
199406042735SVincent Franchomme PFN_NUMBER *pfns, dummy = 0;
1995318da0c1SPierre Schweitzer bool need_dummy = false;
1996c2c66affSColin Finck
1997c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
1998c2c66affSColin Finck get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
1999c2c66affSColin Finck
2000c2c66affSColin Finck if (file_read) {
2001c2c66affSColin Finck context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2002c2c66affSColin Finck
2003c2c66affSColin Finck if (!context.va) {
2004c2c66affSColin Finck ERR("out of memory\n");
2005c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2006c2c66affSColin Finck goto exit;
2007c2c66affSColin Finck }
2008c2c66affSColin Finck } else
2009c2c66affSColin Finck context.va = buf;
2010c2c66affSColin Finck
2011318da0c1SPierre Schweitzer master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2012c2c66affSColin Finck if (!master_mdl) {
2013c2c66affSColin Finck ERR("out of memory\n");
2014c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2015c2c66affSColin Finck goto exit;
2016c2c66affSColin Finck }
2017c2c66affSColin Finck
2018c2c66affSColin Finck Status = STATUS_SUCCESS;
2019c2c66affSColin Finck
2020c2c66affSColin Finck _SEH2_TRY {
2021c2c66affSColin Finck MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)2022c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2023c2c66affSColin Finck Status = _SEH2_GetExceptionCode();
2024c2c66affSColin Finck } _SEH2_END;
2025c2c66affSColin Finck
2026c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
2027194ea909SVictor Perevertkin ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2028c2c66affSColin Finck IoFreeMdl(master_mdl);
2029c2c66affSColin Finck goto exit;
2030c2c66affSColin Finck }
2031c2c66affSColin Finck
2032c2c66affSColin Finck pfns = (PFN_NUMBER*)(master_mdl + 1);
2033c2c66affSColin Finck
2034c2c66affSColin Finck pos = 0;
2035c2c66affSColin Finck while (pos < length) {
2036c2c66affSColin Finck parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2037c2c66affSColin Finck
2038c2c66affSColin Finck if (pos == 0) {
2039318da0c1SPierre Schweitzer uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2040c2c66affSColin Finck ULONG skip, readlen;
2041c2c66affSColin Finck
2042c2c66affSColin Finck i = startoffstripe;
2043c2c66affSColin Finck while (stripe != parity) {
2044c2c66affSColin Finck if (i == startoffstripe) {
2045c2c66affSColin Finck readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2046c2c66affSColin Finck
2047c2c66affSColin Finck context.stripes[stripe].stripestart = startoff;
2048c2c66affSColin Finck context.stripes[stripe].stripeend = startoff + readlen;
2049c2c66affSColin Finck
2050c2c66affSColin Finck pos += readlen;
2051c2c66affSColin Finck
2052c2c66affSColin Finck if (pos == length)
2053c2c66affSColin Finck break;
2054c2c66affSColin Finck } else {
2055c2c66affSColin Finck readlen = min(length - pos, (ULONG)ci->stripe_length);
2056c2c66affSColin Finck
2057c2c66affSColin Finck context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2058c2c66affSColin Finck context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2059c2c66affSColin Finck
2060c2c66affSColin Finck pos += readlen;
2061c2c66affSColin Finck
2062c2c66affSColin Finck if (pos == length)
2063c2c66affSColin Finck break;
2064c2c66affSColin Finck }
2065c2c66affSColin Finck
2066c2c66affSColin Finck i++;
2067c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2068c2c66affSColin Finck }
2069c2c66affSColin Finck
2070c2c66affSColin Finck if (pos == length)
2071c2c66affSColin Finck break;
2072c2c66affSColin Finck
2073c2c66affSColin Finck for (i = 0; i < startoffstripe; i++) {
2074318da0c1SPierre Schweitzer uint16_t stripe2 = (parity + i + 1) % ci->num_stripes;
2075c2c66affSColin Finck
2076c2c66affSColin Finck context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2077c2c66affSColin Finck }
2078c2c66affSColin Finck
2079c2c66affSColin Finck context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2080c2c66affSColin Finck
2081c2c66affSColin Finck if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2082c2c66affSColin Finck skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2083c2c66affSColin Finck
2084c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
2085c2c66affSColin Finck context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2086c2c66affSColin Finck }
2087c2c66affSColin Finck
2088318da0c1SPierre Schweitzer pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2089318da0c1SPierre Schweitzer need_dummy = true;
2090c2c66affSColin Finck }
2091c2c66affSColin Finck } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2092c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
2093c2c66affSColin Finck context.stripes[i].stripeend += ci->stripe_length;
2094c2c66affSColin Finck }
2095c2c66affSColin Finck
2096318da0c1SPierre Schweitzer pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1));
2097318da0c1SPierre Schweitzer need_dummy = true;
2098c2c66affSColin Finck } else {
2099318da0c1SPierre Schweitzer uint16_t stripe = (parity + 1) % ci->num_stripes;
2100c2c66affSColin Finck
2101c2c66affSColin Finck i = 0;
2102c2c66affSColin Finck while (stripe != parity) {
2103c2c66affSColin Finck if (endoffstripe == i) {
2104c2c66affSColin Finck context.stripes[stripe].stripeend = endoff + 1;
2105c2c66affSColin Finck break;
2106c2c66affSColin Finck } else if (endoffstripe > i)
2107c2c66affSColin Finck context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2108c2c66affSColin Finck
2109c2c66affSColin Finck i++;
2110c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2111c2c66affSColin Finck }
2112c2c66affSColin Finck
2113c2c66affSColin Finck break;
2114c2c66affSColin Finck }
2115c2c66affSColin Finck }
2116c2c66affSColin Finck
2117c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
2118c2c66affSColin Finck if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2119c2c66affSColin Finck context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2120318da0c1SPierre Schweitzer false, false, NULL);
2121c2c66affSColin Finck
2122c2c66affSColin Finck if (!context.stripes[i].mdl) {
2123c2c66affSColin Finck ERR("IoAllocateMdl failed\n");
2124eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
2125eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
2126c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2127c2c66affSColin Finck goto exit;
2128c2c66affSColin Finck }
2129c2c66affSColin Finck }
2130c2c66affSColin Finck }
2131c2c66affSColin Finck
2132c2c66affSColin Finck if (need_dummy) {
2133c2c66affSColin Finck dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2134c2c66affSColin Finck if (!dummypage) {
2135c2c66affSColin Finck ERR("out of memory\n");
2136eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
2137eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
2138c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2139c2c66affSColin Finck goto exit;
2140c2c66affSColin Finck }
2141c2c66affSColin Finck
2142318da0c1SPierre Schweitzer dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2143c2c66affSColin Finck if (!dummy_mdl) {
2144c2c66affSColin Finck ERR("IoAllocateMdl failed\n");
2145eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
2146eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
2147c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2148c2c66affSColin Finck goto exit;
2149c2c66affSColin Finck }
2150c2c66affSColin Finck
2151c2c66affSColin Finck MmBuildMdlForNonPagedPool(dummy_mdl);
2152c2c66affSColin Finck
2153c2c66affSColin Finck dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2154c2c66affSColin Finck }
2155c2c66affSColin Finck
2156318da0c1SPierre Schweitzer stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2157c2c66affSColin Finck if (!stripeoff) {
2158c2c66affSColin Finck ERR("out of memory\n");
2159eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
2160eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
2161c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2162c2c66affSColin Finck goto exit;
2163c2c66affSColin Finck }
2164c2c66affSColin Finck
2165318da0c1SPierre Schweitzer RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2166c2c66affSColin Finck
2167c2c66affSColin Finck pos = 0;
2168c2c66affSColin Finck
2169c2c66affSColin Finck while (pos < length) {
2170c2c66affSColin Finck PFN_NUMBER* stripe_pfns;
2171c2c66affSColin Finck
2172c2c66affSColin Finck parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2173c2c66affSColin Finck
2174c2c66affSColin Finck if (pos == 0) {
2175318da0c1SPierre Schweitzer uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2176318da0c1SPierre Schweitzer uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2177c2c66affSColin Finck ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2178c2c66affSColin Finck
2179c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2180c2c66affSColin Finck
2181c2c66affSColin Finck RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2182c2c66affSColin Finck
2183c2c66affSColin Finck stripeoff[stripe] = readlen;
2184c2c66affSColin Finck pos += readlen;
2185c2c66affSColin Finck
2186c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2187c2c66affSColin Finck
2188c2c66affSColin Finck while (stripe != parity) {
2189c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2190318da0c1SPierre Schweitzer readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2191c2c66affSColin Finck
2192c2c66affSColin Finck if (readlen == 0)
2193c2c66affSColin Finck break;
2194c2c66affSColin Finck
2195c2c66affSColin Finck RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2196c2c66affSColin Finck
2197c2c66affSColin Finck stripeoff[stripe] = readlen;
2198c2c66affSColin Finck pos += readlen;
2199c2c66affSColin Finck
2200c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2201c2c66affSColin Finck }
2202c2c66affSColin Finck } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2203318da0c1SPierre Schweitzer uint16_t stripe = (parity + 1) % ci->num_stripes;
2204c2c66affSColin Finck ULONG k;
2205c2c66affSColin Finck
2206c2c66affSColin Finck while (stripe != parity) {
2207c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2208c2c66affSColin Finck
2209c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2210c2c66affSColin Finck
2211318da0c1SPierre Schweitzer stripeoff[stripe] += (uint32_t)ci->stripe_length;
2212318da0c1SPierre Schweitzer pos += (uint32_t)ci->stripe_length;
2213c2c66affSColin Finck
2214c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2215c2c66affSColin Finck }
2216c2c66affSColin Finck
2217c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2218c2c66affSColin Finck
2219c2c66affSColin Finck for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2220c2c66affSColin Finck stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2221c2c66affSColin Finck stripeoff[parity] += PAGE_SIZE;
2222c2c66affSColin Finck }
2223c2c66affSColin Finck } else {
2224318da0c1SPierre Schweitzer uint16_t stripe = (parity + 1) % ci->num_stripes;
2225318da0c1SPierre Schweitzer uint32_t readlen;
2226c2c66affSColin Finck
2227c2c66affSColin Finck while (pos < length) {
2228c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2229c2c66affSColin Finck readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2230c2c66affSColin Finck
2231c2c66affSColin Finck if (readlen == 0)
2232c2c66affSColin Finck break;
2233c2c66affSColin Finck
2234c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2235c2c66affSColin Finck
2236c2c66affSColin Finck stripeoff[stripe] += readlen;
2237c2c66affSColin Finck pos += readlen;
2238c2c66affSColin Finck
2239c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2240c2c66affSColin Finck }
2241c2c66affSColin Finck }
2242c2c66affSColin Finck }
2243c2c66affSColin Finck
2244c2c66affSColin Finck MmUnlockPages(master_mdl);
2245c2c66affSColin Finck IoFreeMdl(master_mdl);
2246c2c66affSColin Finck
2247c2c66affSColin Finck ExFreePool(stripeoff);
2248c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID6) {
2249318da0c1SPierre Schweitzer uint64_t startoff, endoff;
2250318da0c1SPierre Schweitzer uint16_t endoffstripe, parity1;
2251318da0c1SPierre Schweitzer uint32_t *stripeoff, pos;
2252c2c66affSColin Finck PMDL master_mdl;
225306042735SVincent Franchomme PFN_NUMBER *pfns, dummy = 0;
2254318da0c1SPierre Schweitzer bool need_dummy = false;
2255c2c66affSColin Finck
2256c2c66affSColin Finck get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2257c2c66affSColin Finck get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2258c2c66affSColin Finck
2259c2c66affSColin Finck if (file_read) {
2260c2c66affSColin Finck context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2261c2c66affSColin Finck
2262c2c66affSColin Finck if (!context.va) {
2263c2c66affSColin Finck ERR("out of memory\n");
2264c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2265c2c66affSColin Finck goto exit;
2266c2c66affSColin Finck }
2267c2c66affSColin Finck } else
2268c2c66affSColin Finck context.va = buf;
2269c2c66affSColin Finck
2270318da0c1SPierre Schweitzer master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2271c2c66affSColin Finck if (!master_mdl) {
2272c2c66affSColin Finck ERR("out of memory\n");
2273c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2274c2c66affSColin Finck goto exit;
2275c2c66affSColin Finck }
2276c2c66affSColin Finck
2277c2c66affSColin Finck Status = STATUS_SUCCESS;
2278c2c66affSColin Finck
2279c2c66affSColin Finck _SEH2_TRY {
2280c2c66affSColin Finck MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)2281c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2282c2c66affSColin Finck Status = _SEH2_GetExceptionCode();
2283c2c66affSColin Finck } _SEH2_END;
2284c2c66affSColin Finck
2285c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
2286194ea909SVictor Perevertkin ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2287c2c66affSColin Finck IoFreeMdl(master_mdl);
2288c2c66affSColin Finck goto exit;
2289c2c66affSColin Finck }
2290c2c66affSColin Finck
2291c2c66affSColin Finck pfns = (PFN_NUMBER*)(master_mdl + 1);
2292c2c66affSColin Finck
2293c2c66affSColin Finck pos = 0;
2294c2c66affSColin Finck while (pos < length) {
2295c2c66affSColin Finck parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2296c2c66affSColin Finck
2297c2c66affSColin Finck if (pos == 0) {
2298318da0c1SPierre Schweitzer uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2299c2c66affSColin Finck ULONG skip, readlen;
2300c2c66affSColin Finck
2301c2c66affSColin Finck i = startoffstripe;
2302c2c66affSColin Finck while (stripe != parity1) {
2303c2c66affSColin Finck if (i == startoffstripe) {
2304c2c66affSColin Finck readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2305c2c66affSColin Finck
2306c2c66affSColin Finck context.stripes[stripe].stripestart = startoff;
2307c2c66affSColin Finck context.stripes[stripe].stripeend = startoff + readlen;
2308c2c66affSColin Finck
2309c2c66affSColin Finck pos += readlen;
2310c2c66affSColin Finck
2311c2c66affSColin Finck if (pos == length)
2312c2c66affSColin Finck break;
2313c2c66affSColin Finck } else {
2314c2c66affSColin Finck readlen = min(length - pos, (ULONG)ci->stripe_length);
2315c2c66affSColin Finck
2316c2c66affSColin Finck context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2317c2c66affSColin Finck context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2318c2c66affSColin Finck
2319c2c66affSColin Finck pos += readlen;
2320c2c66affSColin Finck
2321c2c66affSColin Finck if (pos == length)
2322c2c66affSColin Finck break;
2323c2c66affSColin Finck }
2324c2c66affSColin Finck
2325c2c66affSColin Finck i++;
2326c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2327c2c66affSColin Finck }
2328c2c66affSColin Finck
2329c2c66affSColin Finck if (pos == length)
2330c2c66affSColin Finck break;
2331c2c66affSColin Finck
2332c2c66affSColin Finck for (i = 0; i < startoffstripe; i++) {
2333318da0c1SPierre Schweitzer uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes;
2334c2c66affSColin Finck
2335c2c66affSColin Finck context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2336c2c66affSColin Finck }
2337c2c66affSColin Finck
2338c2c66affSColin Finck context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2339c2c66affSColin Finck
2340c2c66affSColin Finck parity2 = (parity1 + 1) % ci->num_stripes;
2341c2c66affSColin Finck context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2342c2c66affSColin Finck
2343c2c66affSColin Finck if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2344c2c66affSColin Finck skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2345c2c66affSColin Finck
2346c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
2347c2c66affSColin Finck context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2348c2c66affSColin Finck }
2349c2c66affSColin Finck
2350318da0c1SPierre Schweitzer pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2351318da0c1SPierre Schweitzer need_dummy = true;
2352c2c66affSColin Finck }
2353c2c66affSColin Finck } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2354c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
2355c2c66affSColin Finck context.stripes[i].stripeend += ci->stripe_length;
2356c2c66affSColin Finck }
2357c2c66affSColin Finck
2358318da0c1SPierre Schweitzer pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2));
2359318da0c1SPierre Schweitzer need_dummy = true;
2360c2c66affSColin Finck } else {
2361318da0c1SPierre Schweitzer uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2362c2c66affSColin Finck
2363c2c66affSColin Finck i = 0;
2364c2c66affSColin Finck while (stripe != parity1) {
2365c2c66affSColin Finck if (endoffstripe == i) {
2366c2c66affSColin Finck context.stripes[stripe].stripeend = endoff + 1;
2367c2c66affSColin Finck break;
2368c2c66affSColin Finck } else if (endoffstripe > i)
2369c2c66affSColin Finck context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2370c2c66affSColin Finck
2371c2c66affSColin Finck i++;
2372c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2373c2c66affSColin Finck }
2374c2c66affSColin Finck
2375c2c66affSColin Finck break;
2376c2c66affSColin Finck }
2377c2c66affSColin Finck }
2378c2c66affSColin Finck
2379c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
2380c2c66affSColin Finck if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2381318da0c1SPierre Schweitzer context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
2382c2c66affSColin Finck
2383c2c66affSColin Finck if (!context.stripes[i].mdl) {
2384c2c66affSColin Finck ERR("IoAllocateMdl failed\n");
2385eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
2386eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
2387c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2388c2c66affSColin Finck goto exit;
2389c2c66affSColin Finck }
2390c2c66affSColin Finck }
2391c2c66affSColin Finck }
2392c2c66affSColin Finck
2393c2c66affSColin Finck if (need_dummy) {
2394c2c66affSColin Finck dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2395c2c66affSColin Finck if (!dummypage) {
2396c2c66affSColin Finck ERR("out of memory\n");
2397eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
2398eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
2399c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2400c2c66affSColin Finck goto exit;
2401c2c66affSColin Finck }
2402c2c66affSColin Finck
2403318da0c1SPierre Schweitzer dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2404c2c66affSColin Finck if (!dummy_mdl) {
2405c2c66affSColin Finck ERR("IoAllocateMdl failed\n");
2406eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
2407eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
2408c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2409c2c66affSColin Finck goto exit;
2410c2c66affSColin Finck }
2411c2c66affSColin Finck
2412c2c66affSColin Finck MmBuildMdlForNonPagedPool(dummy_mdl);
2413c2c66affSColin Finck
2414c2c66affSColin Finck dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2415c2c66affSColin Finck }
2416c2c66affSColin Finck
2417318da0c1SPierre Schweitzer stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2418c2c66affSColin Finck if (!stripeoff) {
2419c2c66affSColin Finck ERR("out of memory\n");
2420eb7fbc25SPierre Schweitzer MmUnlockPages(master_mdl);
2421eb7fbc25SPierre Schweitzer IoFreeMdl(master_mdl);
2422c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2423c2c66affSColin Finck goto exit;
2424c2c66affSColin Finck }
2425c2c66affSColin Finck
2426318da0c1SPierre Schweitzer RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2427c2c66affSColin Finck
2428c2c66affSColin Finck pos = 0;
2429c2c66affSColin Finck
2430c2c66affSColin Finck while (pos < length) {
2431c2c66affSColin Finck PFN_NUMBER* stripe_pfns;
2432c2c66affSColin Finck
2433c2c66affSColin Finck parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2434c2c66affSColin Finck
2435c2c66affSColin Finck if (pos == 0) {
2436318da0c1SPierre Schweitzer uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2437318da0c1SPierre Schweitzer uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2438c2c66affSColin Finck ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2439c2c66affSColin Finck
2440c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2441c2c66affSColin Finck
2442c2c66affSColin Finck RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2443c2c66affSColin Finck
2444c2c66affSColin Finck stripeoff[stripe] = readlen;
2445c2c66affSColin Finck pos += readlen;
2446c2c66affSColin Finck
2447c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2448c2c66affSColin Finck
2449c2c66affSColin Finck while (stripe != parity1) {
2450c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2451318da0c1SPierre Schweitzer readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2452c2c66affSColin Finck
2453c2c66affSColin Finck if (readlen == 0)
2454c2c66affSColin Finck break;
2455c2c66affSColin Finck
2456c2c66affSColin Finck RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2457c2c66affSColin Finck
2458c2c66affSColin Finck stripeoff[stripe] = readlen;
2459c2c66affSColin Finck pos += readlen;
2460c2c66affSColin Finck
2461c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2462c2c66affSColin Finck }
2463c2c66affSColin Finck } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2464318da0c1SPierre Schweitzer uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2465318da0c1SPierre Schweitzer uint16_t parity2 = (parity1 + 1) % ci->num_stripes;
2466c2c66affSColin Finck ULONG k;
2467c2c66affSColin Finck
2468c2c66affSColin Finck while (stripe != parity1) {
2469c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2470c2c66affSColin Finck
2471c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2472c2c66affSColin Finck
2473318da0c1SPierre Schweitzer stripeoff[stripe] += (uint32_t)ci->stripe_length;
2474318da0c1SPierre Schweitzer pos += (uint32_t)ci->stripe_length;
2475c2c66affSColin Finck
2476c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2477c2c66affSColin Finck }
2478c2c66affSColin Finck
2479c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2480c2c66affSColin Finck
2481c2c66affSColin Finck for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2482c2c66affSColin Finck stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2483c2c66affSColin Finck stripeoff[parity1] += PAGE_SIZE;
2484c2c66affSColin Finck }
2485c2c66affSColin Finck
2486c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2487c2c66affSColin Finck
2488c2c66affSColin Finck for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2489c2c66affSColin Finck stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2490c2c66affSColin Finck stripeoff[parity2] += PAGE_SIZE;
2491c2c66affSColin Finck }
2492c2c66affSColin Finck } else {
2493318da0c1SPierre Schweitzer uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2494318da0c1SPierre Schweitzer uint32_t readlen;
2495c2c66affSColin Finck
2496c2c66affSColin Finck while (pos < length) {
2497c2c66affSColin Finck stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2498318da0c1SPierre Schweitzer readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2499c2c66affSColin Finck
2500c2c66affSColin Finck if (readlen == 0)
2501c2c66affSColin Finck break;
2502c2c66affSColin Finck
2503c2c66affSColin Finck RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2504c2c66affSColin Finck
2505c2c66affSColin Finck stripeoff[stripe] += readlen;
2506c2c66affSColin Finck pos += readlen;
2507c2c66affSColin Finck
2508c2c66affSColin Finck stripe = (stripe + 1) % ci->num_stripes;
2509c2c66affSColin Finck }
2510c2c66affSColin Finck }
2511c2c66affSColin Finck }
2512c2c66affSColin Finck
2513c2c66affSColin Finck MmUnlockPages(master_mdl);
2514c2c66affSColin Finck IoFreeMdl(master_mdl);
2515c2c66affSColin Finck
2516c2c66affSColin Finck ExFreePool(stripeoff);
2517c2c66affSColin Finck }
2518c2c66affSColin Finck
2519c2c66affSColin Finck context.address = addr;
2520c2c66affSColin Finck
2521c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
2522c2c66affSColin Finck if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2523c2c66affSColin Finck context.stripes[i].status = ReadDataStatus_MissingDevice;
2524c2c66affSColin Finck context.stripes_left--;
2525c2c66affSColin Finck
2526c2c66affSColin Finck if (!devices[i] || !devices[i]->devobj)
2527c2c66affSColin Finck missing_devices++;
2528c2c66affSColin Finck }
2529c2c66affSColin Finck }
2530c2c66affSColin Finck
2531c2c66affSColin Finck if (missing_devices > allowed_missing) {
2532c2c66affSColin Finck ERR("not enough devices to service request (%u missing)\n", missing_devices);
2533c2c66affSColin Finck Status = STATUS_UNEXPECTED_IO_ERROR;
2534c2c66affSColin Finck goto exit;
2535c2c66affSColin Finck }
2536c2c66affSColin Finck
2537c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
2538c2c66affSColin Finck PIO_STACK_LOCATION IrpSp;
2539c2c66affSColin Finck
2540c2c66affSColin Finck if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2541c2c66affSColin Finck context.stripes[i].context = (struct read_data_context*)&context;
2542c2c66affSColin Finck
2543c2c66affSColin Finck if (type == BLOCK_FLAG_RAID10) {
2544c2c66affSColin Finck context.stripes[i].stripenum = i / ci->sub_stripes;
2545c2c66affSColin Finck }
2546c2c66affSColin Finck
2547c2c66affSColin Finck if (!Irp) {
2548318da0c1SPierre Schweitzer context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false);
2549c2c66affSColin Finck
2550c2c66affSColin Finck if (!context.stripes[i].Irp) {
2551c2c66affSColin Finck ERR("IoAllocateIrp failed\n");
2552c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2553c2c66affSColin Finck goto exit;
2554c2c66affSColin Finck }
2555c2c66affSColin Finck } else {
2556c2c66affSColin Finck context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2557c2c66affSColin Finck
2558c2c66affSColin Finck if (!context.stripes[i].Irp) {
2559c2c66affSColin Finck ERR("IoMakeAssociatedIrp failed\n");
2560c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2561c2c66affSColin Finck goto exit;
2562c2c66affSColin Finck }
2563c2c66affSColin Finck }
2564c2c66affSColin Finck
2565c2c66affSColin Finck IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2566c2c66affSColin Finck IrpSp->MajorFunction = IRP_MJ_READ;
2567318da0c1SPierre Schweitzer IrpSp->MinorFunction = IRP_MN_NORMAL;
2568318da0c1SPierre Schweitzer IrpSp->FileObject = devices[i]->fileobj;
2569c2c66affSColin Finck
2570c2c66affSColin Finck if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2571c2c66affSColin Finck context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2572c2c66affSColin Finck if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2573c2c66affSColin Finck ERR("out of memory\n");
2574c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2575c2c66affSColin Finck goto exit;
2576c2c66affSColin Finck }
2577c2c66affSColin Finck
2578c2c66affSColin Finck context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
2579c2c66affSColin Finck
2580c2c66affSColin Finck context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2581c2c66affSColin Finck } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2582c2c66affSColin Finck context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2583c2c66affSColin Finck else
2584c2c66affSColin Finck context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2585c2c66affSColin Finck
2586c2c66affSColin Finck IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2587c2c66affSColin Finck IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2588c2c66affSColin Finck
2589c2c66affSColin Finck total_reading += IrpSp->Parameters.Read.Length;
2590c2c66affSColin Finck
2591c2c66affSColin Finck context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2592c2c66affSColin Finck
2593318da0c1SPierre Schweitzer IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true);
2594c2c66affSColin Finck
2595c2c66affSColin Finck context.stripes[i].status = ReadDataStatus_Pending;
2596c2c66affSColin Finck }
2597c2c66affSColin Finck }
2598c2c66affSColin Finck
2599318da0c1SPierre Schweitzer need_to_wait = false;
2600c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
2601c2c66affSColin Finck if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2602c2c66affSColin Finck IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2603318da0c1SPierre Schweitzer need_to_wait = true;
2604c2c66affSColin Finck }
2605c2c66affSColin Finck }
2606c2c66affSColin Finck
2607c2c66affSColin Finck if (need_to_wait)
2608318da0c1SPierre Schweitzer KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
2609c2c66affSColin Finck
2610c2c66affSColin Finck if (diskacc)
2611c2c66affSColin Finck fFsRtlUpdateDiskCounters(total_reading, 0);
2612c2c66affSColin Finck
2613c2c66affSColin Finck // check if any of the devices return a "user-induced" error
2614c2c66affSColin Finck
2615c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
2616c2c66affSColin Finck if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2617c2c66affSColin Finck Status = context.stripes[i].iosb.Status;
2618c2c66affSColin Finck goto exit;
2619c2c66affSColin Finck }
2620c2c66affSColin Finck }
2621c2c66affSColin Finck
2622c2c66affSColin Finck if (type == BLOCK_FLAG_RAID0) {
2623c2c66affSColin Finck Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2624c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
2625194ea909SVictor Perevertkin ERR("read_data_raid0 returned %08lx\n", Status);
2626c2c66affSColin Finck
2627c2c66affSColin Finck if (file_read)
2628c2c66affSColin Finck ExFreePool(context.va);
2629c2c66affSColin Finck
2630c2c66affSColin Finck goto exit;
2631c2c66affSColin Finck }
2632c2c66affSColin Finck
2633c2c66affSColin Finck if (file_read) {
2634c2c66affSColin Finck RtlCopyMemory(buf, context.va, length);
2635c2c66affSColin Finck ExFreePool(context.va);
2636c2c66affSColin Finck }
2637c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID10) {
2638c2c66affSColin Finck Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2639c2c66affSColin Finck
2640c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
2641194ea909SVictor Perevertkin ERR("read_data_raid10 returned %08lx\n", Status);
2642c2c66affSColin Finck
2643c2c66affSColin Finck if (file_read)
2644c2c66affSColin Finck ExFreePool(context.va);
2645c2c66affSColin Finck
2646c2c66affSColin Finck goto exit;
2647c2c66affSColin Finck }
2648c2c66affSColin Finck
2649c2c66affSColin Finck if (file_read) {
2650c2c66affSColin Finck RtlCopyMemory(buf, context.va, length);
2651c2c66affSColin Finck ExFreePool(context.va);
2652c2c66affSColin Finck }
2653c2c66affSColin Finck } else if (type == BLOCK_FLAG_DUPLICATE) {
2654c2c66affSColin Finck Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2655c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
2656194ea909SVictor Perevertkin ERR("read_data_dup returned %08lx\n", Status);
2657c2c66affSColin Finck
2658c2c66affSColin Finck if (file_read)
2659c2c66affSColin Finck ExFreePool(context.va);
2660c2c66affSColin Finck
2661c2c66affSColin Finck goto exit;
2662c2c66affSColin Finck }
2663c2c66affSColin Finck
2664c2c66affSColin Finck if (file_read) {
2665c2c66affSColin Finck RtlCopyMemory(buf, context.va, length);
2666c2c66affSColin Finck ExFreePool(context.va);
2667c2c66affSColin Finck }
2668c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID5) {
2669318da0c1SPierre Schweitzer Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2670c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
2671194ea909SVictor Perevertkin ERR("read_data_raid5 returned %08lx\n", Status);
2672c2c66affSColin Finck
2673c2c66affSColin Finck if (file_read)
2674c2c66affSColin Finck ExFreePool(context.va);
2675c2c66affSColin Finck
2676c2c66affSColin Finck goto exit;
2677c2c66affSColin Finck }
2678c2c66affSColin Finck
2679c2c66affSColin Finck if (file_read) {
2680c2c66affSColin Finck RtlCopyMemory(buf, context.va, length);
2681c2c66affSColin Finck ExFreePool(context.va);
2682c2c66affSColin Finck }
2683c2c66affSColin Finck } else if (type == BLOCK_FLAG_RAID6) {
2684318da0c1SPierre Schweitzer Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2685c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
2686194ea909SVictor Perevertkin ERR("read_data_raid6 returned %08lx\n", Status);
2687c2c66affSColin Finck
2688c2c66affSColin Finck if (file_read)
2689c2c66affSColin Finck ExFreePool(context.va);
2690c2c66affSColin Finck
2691c2c66affSColin Finck goto exit;
2692c2c66affSColin Finck }
2693c2c66affSColin Finck
2694c2c66affSColin Finck if (file_read) {
2695c2c66affSColin Finck RtlCopyMemory(buf, context.va, length);
2696c2c66affSColin Finck ExFreePool(context.va);
2697c2c66affSColin Finck }
2698c2c66affSColin Finck }
2699c2c66affSColin Finck
2700c2c66affSColin Finck exit:
2701c2c66affSColin Finck if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2702c2c66affSColin Finck chunk_unlock_range(Vcb, c, lockaddr, locklen);
2703c2c66affSColin Finck
2704c2c66affSColin Finck if (dummy_mdl)
2705c2c66affSColin Finck IoFreeMdl(dummy_mdl);
2706c2c66affSColin Finck
2707c2c66affSColin Finck if (dummypage)
2708c2c66affSColin Finck ExFreePool(dummypage);
2709c2c66affSColin Finck
2710c2c66affSColin Finck for (i = 0; i < ci->num_stripes; i++) {
2711c2c66affSColin Finck if (context.stripes[i].mdl) {
2712c2c66affSColin Finck if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2713c2c66affSColin Finck MmUnlockPages(context.stripes[i].mdl);
2714c2c66affSColin Finck
2715c2c66affSColin Finck IoFreeMdl(context.stripes[i].mdl);
2716c2c66affSColin Finck }
2717c2c66affSColin Finck
2718c2c66affSColin Finck if (context.stripes[i].Irp)
2719c2c66affSColin Finck IoFreeIrp(context.stripes[i].Irp);
2720c2c66affSColin Finck }
2721c2c66affSColin Finck
2722c2c66affSColin Finck ExFreePool(context.stripes);
2723c2c66affSColin Finck
2724c2c66affSColin Finck if (!Vcb->log_to_phys_loaded)
2725c2c66affSColin Finck ExFreePool(devices);
2726c2c66affSColin Finck
2727c2c66affSColin Finck return Status;
2728c2c66affSColin Finck }
2729c2c66affSColin Finck
2730174dfab6SVincent Franchomme __attribute__((nonnull(1, 2)))
read_stream(fcb * fcb,uint8_t * data,uint64_t start,ULONG length,ULONG * pbr)2731318da0c1SPierre Schweitzer NTSTATUS read_stream(fcb* fcb, uint8_t* data, uint64_t start, ULONG length, ULONG* pbr) {
2732c2c66affSColin Finck ULONG readlen;
2733c2c66affSColin Finck
2734194ea909SVictor Perevertkin TRACE("(%p, %p, %I64x, %lx, %p)\n", fcb, data, start, length, pbr);
2735c2c66affSColin Finck
2736c2c66affSColin Finck if (pbr) *pbr = 0;
2737c2c66affSColin Finck
2738c2c66affSColin Finck if (start >= fcb->adsdata.Length) {
2739c2c66affSColin Finck TRACE("tried to read beyond end of stream\n");
2740c2c66affSColin Finck return STATUS_END_OF_FILE;
2741c2c66affSColin Finck }
2742c2c66affSColin Finck
2743c2c66affSColin Finck if (length == 0) {
2744c2c66affSColin Finck WARN("tried to read zero bytes\n");
2745c2c66affSColin Finck return STATUS_SUCCESS;
2746c2c66affSColin Finck }
2747c2c66affSColin Finck
2748c2c66affSColin Finck if (start + length < fcb->adsdata.Length)
2749c2c66affSColin Finck readlen = length;
2750c2c66affSColin Finck else
2751c2c66affSColin Finck readlen = fcb->adsdata.Length - (ULONG)start;
2752c2c66affSColin Finck
2753c2c66affSColin Finck if (readlen > 0)
2754194ea909SVictor Perevertkin RtlCopyMemory(data, fcb->adsdata.Buffer + start, readlen);
2755c2c66affSColin Finck
2756c2c66affSColin Finck if (pbr) *pbr = readlen;
2757c2c66affSColin Finck
2758c2c66affSColin Finck return STATUS_SUCCESS;
2759c2c66affSColin Finck }
2760c2c66affSColin Finck
2761194ea909SVictor Perevertkin typedef struct {
2762194ea909SVictor Perevertkin uint64_t off;
2763194ea909SVictor Perevertkin uint64_t ed_size;
2764194ea909SVictor Perevertkin uint64_t ed_offset;
2765194ea909SVictor Perevertkin uint64_t ed_num_bytes;
2766194ea909SVictor Perevertkin } read_part_extent;
2767194ea909SVictor Perevertkin
2768194ea909SVictor Perevertkin typedef struct {
2769194ea909SVictor Perevertkin LIST_ENTRY list_entry;
2770194ea909SVictor Perevertkin uint64_t addr;
2771194ea909SVictor Perevertkin chunk* c;
2772194ea909SVictor Perevertkin uint32_t read;
2773194ea909SVictor Perevertkin uint32_t to_read;
2774194ea909SVictor Perevertkin void* csum;
2775194ea909SVictor Perevertkin bool csum_free;
2776194ea909SVictor Perevertkin uint8_t* buf;
2777194ea909SVictor Perevertkin bool buf_free;
2778194ea909SVictor Perevertkin uint32_t bumpoff;
2779194ea909SVictor Perevertkin bool mdl;
2780194ea909SVictor Perevertkin void* data;
2781194ea909SVictor Perevertkin uint8_t compression;
2782194ea909SVictor Perevertkin unsigned int num_extents;
2783194ea909SVictor Perevertkin read_part_extent extents[1];
2784194ea909SVictor Perevertkin } read_part;
2785194ea909SVictor Perevertkin
2786194ea909SVictor Perevertkin typedef struct {
2787194ea909SVictor Perevertkin LIST_ENTRY list_entry;
2788194ea909SVictor Perevertkin calc_job* cj;
2789194ea909SVictor Perevertkin void* decomp;
2790194ea909SVictor Perevertkin void* data;
2791194ea909SVictor Perevertkin unsigned int offset;
2792194ea909SVictor Perevertkin size_t length;
2793194ea909SVictor Perevertkin } comp_calc_job;
2794194ea909SVictor Perevertkin
2795174dfab6SVincent Franchomme __attribute__((nonnull(1, 2)))
read_file(fcb * fcb,uint8_t * data,uint64_t start,uint64_t length,ULONG * pbr,PIRP Irp)2796318da0c1SPierre Schweitzer NTSTATUS read_file(fcb* fcb, uint8_t* data, uint64_t start, uint64_t length, ULONG* pbr, PIRP Irp) {
2797c2c66affSColin Finck NTSTATUS Status;
2798318da0c1SPierre Schweitzer uint32_t bytes_read = 0;
2799318da0c1SPierre Schweitzer uint64_t last_end;
2800c2c66affSColin Finck LIST_ENTRY* le;
2801318da0c1SPierre Schweitzer POOL_TYPE pool_type;
2802194ea909SVictor Perevertkin LIST_ENTRY read_parts, calc_jobs;
2803c2c66affSColin Finck
2804318da0c1SPierre Schweitzer TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
2805c2c66affSColin Finck
2806c2c66affSColin Finck if (pbr)
2807c2c66affSColin Finck *pbr = 0;
2808c2c66affSColin Finck
2809c2c66affSColin Finck if (start >= fcb->inode_item.st_size) {
2810c2c66affSColin Finck WARN("Tried to read beyond end of file\n");
2811194ea909SVictor Perevertkin return STATUS_END_OF_FILE;
2812c2c66affSColin Finck }
2813c2c66affSColin Finck
2814194ea909SVictor Perevertkin InitializeListHead(&read_parts);
2815194ea909SVictor Perevertkin InitializeListHead(&calc_jobs);
2816194ea909SVictor Perevertkin
2817318da0c1SPierre Schweitzer pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool;
2818c2c66affSColin Finck
2819c2c66affSColin Finck le = fcb->extents.Flink;
2820c2c66affSColin Finck
2821c2c66affSColin Finck last_end = start;
2822c2c66affSColin Finck
2823c2c66affSColin Finck while (le != &fcb->extents) {
2824c2c66affSColin Finck extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2825c2c66affSColin Finck
2826c2c66affSColin Finck if (!ext->ignore) {
2827194ea909SVictor Perevertkin EXTENT_DATA* ed = &ext->extent_data;
2828174dfab6SVincent Franchomme uint64_t len;
2829c2c66affSColin Finck
2830174dfab6SVincent Franchomme if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC)
2831174dfab6SVincent Franchomme len = ((EXTENT_DATA2*)ed->data)->num_bytes;
2832174dfab6SVincent Franchomme else
2833174dfab6SVincent Franchomme len = ed->decoded_size;
2834c2c66affSColin Finck
2835c2c66affSColin Finck if (ext->offset + len <= start) {
2836c2c66affSColin Finck last_end = ext->offset + len;
2837c2c66affSColin Finck goto nextitem;
2838c2c66affSColin Finck }
2839c2c66affSColin Finck
2840c2c66affSColin Finck if (ext->offset > last_end && ext->offset > start + bytes_read) {
2841318da0c1SPierre Schweitzer uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end));
2842c2c66affSColin Finck
2843c2c66affSColin Finck RtlZeroMemory(data + bytes_read, read);
2844c2c66affSColin Finck bytes_read += read;
2845c2c66affSColin Finck length -= read;
2846c2c66affSColin Finck }
2847c2c66affSColin Finck
2848c2c66affSColin Finck if (length == 0 || ext->offset > start + bytes_read + length)
2849c2c66affSColin Finck break;
2850c2c66affSColin Finck
2851c2c66affSColin Finck if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
2852c2c66affSColin Finck WARN("Encryption not supported\n");
2853c2c66affSColin Finck Status = STATUS_NOT_IMPLEMENTED;
2854c2c66affSColin Finck goto exit;
2855c2c66affSColin Finck }
2856c2c66affSColin Finck
2857c2c66affSColin Finck if (ed->encoding != BTRFS_ENCODING_NONE) {
2858c2c66affSColin Finck WARN("Other encodings not supported\n");
2859c2c66affSColin Finck Status = STATUS_NOT_IMPLEMENTED;
2860c2c66affSColin Finck goto exit;
2861c2c66affSColin Finck }
2862c2c66affSColin Finck
2863c2c66affSColin Finck switch (ed->type) {
2864c2c66affSColin Finck case EXTENT_TYPE_INLINE:
2865c2c66affSColin Finck {
2866318da0c1SPierre Schweitzer uint64_t off = start + bytes_read - ext->offset;
2867318da0c1SPierre Schweitzer uint32_t read;
2868c2c66affSColin Finck
2869c2c66affSColin Finck if (ed->compression == BTRFS_COMPRESSION_NONE) {
2870318da0c1SPierre Schweitzer read = (uint32_t)min(min(len, ext->datalen) - off, length);
2871c2c66affSColin Finck
2872c2c66affSColin Finck RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2873eb7fbc25SPierre Schweitzer } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) {
2874318da0c1SPierre Schweitzer uint8_t* decomp;
2875318da0c1SPierre Schweitzer bool decomp_alloc;
2876318da0c1SPierre Schweitzer uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]);
2877c2c66affSColin Finck
2878c2c66affSColin Finck if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2879318da0c1SPierre Schweitzer ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size);
2880c2c66affSColin Finck Status = STATUS_INTERNAL_ERROR;
2881c2c66affSColin Finck goto exit;
2882c2c66affSColin Finck }
2883c2c66affSColin Finck
2884318da0c1SPierre Schweitzer read = (uint32_t)min(ed->decoded_size - off, length);
2885c2c66affSColin Finck
2886c2c66affSColin Finck if (off > 0) {
2887318da0c1SPierre Schweitzer decomp = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)ed->decoded_size, ALLOC_TAG);
2888c2c66affSColin Finck if (!decomp) {
2889c2c66affSColin Finck ERR("out of memory\n");
2890c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2891c2c66affSColin Finck goto exit;
2892c2c66affSColin Finck }
2893c2c66affSColin Finck
2894318da0c1SPierre Schweitzer decomp_alloc = true;
2895c2c66affSColin Finck } else {
2896c2c66affSColin Finck decomp = data + bytes_read;
2897318da0c1SPierre Schweitzer decomp_alloc = false;
2898c2c66affSColin Finck }
2899c2c66affSColin Finck
2900c2c66affSColin Finck if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2901318da0c1SPierre Schweitzer Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2902c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
2903194ea909SVictor Perevertkin ERR("zlib_decompress returned %08lx\n", Status);
2904c2c66affSColin Finck if (decomp_alloc) ExFreePool(decomp);
2905c2c66affSColin Finck goto exit;
2906c2c66affSColin Finck }
2907c2c66affSColin Finck } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2908318da0c1SPierre Schweitzer if (inlen < sizeof(uint32_t)) {
2909c2c66affSColin Finck ERR("extent data was truncated\n");
2910c2c66affSColin Finck Status = STATUS_INTERNAL_ERROR;
2911c2c66affSColin Finck if (decomp_alloc) ExFreePool(decomp);
2912c2c66affSColin Finck goto exit;
2913c2c66affSColin Finck } else
2914318da0c1SPierre Schweitzer inlen -= sizeof(uint32_t);
2915c2c66affSColin Finck
2916318da0c1SPierre Schweitzer Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t));
2917c2c66affSColin Finck if (!NT_SUCCESS(Status)) {
2918194ea909SVictor Perevertkin ERR("lzo_decompress returned %08lx\n", Status);
2919c2c66affSColin Finck if (decomp_alloc) ExFreePool(decomp);
2920c2c66affSColin Finck goto exit;
2921c2c66affSColin Finck }
2922eb7fbc25SPierre Schweitzer } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
2923318da0c1SPierre Schweitzer Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2924eb7fbc25SPierre Schweitzer if (!NT_SUCCESS(Status)) {
2925194ea909SVictor Perevertkin ERR("zstd_decompress returned %08lx\n", Status);
2926eb7fbc25SPierre Schweitzer if (decomp_alloc) ExFreePool(decomp);
2927eb7fbc25SPierre Schweitzer goto exit;
2928eb7fbc25SPierre Schweitzer }
2929c2c66affSColin Finck }
2930c2c66affSColin Finck
2931c2c66affSColin Finck if (decomp_alloc) {
2932c2c66affSColin Finck RtlCopyMemory(data + bytes_read, decomp + off, read);
2933c2c66affSColin Finck ExFreePool(decomp);
2934c2c66affSColin Finck }
2935c2c66affSColin Finck } else {
2936c2c66affSColin Finck ERR("unhandled compression type %x\n", ed->compression);
2937c2c66affSColin Finck Status = STATUS_NOT_IMPLEMENTED;
2938c2c66affSColin Finck goto exit;
2939c2c66affSColin Finck }
2940c2c66affSColin Finck
2941c2c66affSColin Finck bytes_read += read;
2942c2c66affSColin Finck length -= read;
2943c2c66affSColin Finck
2944c2c66affSColin Finck break;
2945c2c66affSColin Finck }
2946c2c66affSColin Finck
2947c2c66affSColin Finck case EXTENT_TYPE_REGULAR:
2948c2c66affSColin Finck {
2949174dfab6SVincent Franchomme EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
2950194ea909SVictor Perevertkin read_part* rp;
2951c2c66affSColin Finck
2952194ea909SVictor Perevertkin rp = ExAllocatePoolWithTag(pool_type, sizeof(read_part), ALLOC_TAG);
2953194ea909SVictor Perevertkin if (!rp) {
2954c2c66affSColin Finck ERR("out of memory\n");
2955c2c66affSColin Finck Status = STATUS_INSUFFICIENT_RESOURCES;
2956c2c66affSColin Finck goto exit;
2957c2c66affSColin Finck }
2958c2c66affSColin Finck
2959194ea909SVictor Perevertkin rp->mdl = (Irp && Irp->MdlAddress) ? true : false;
2960194ea909SVictor Perevertkin rp->extents[0].off = start + bytes_read - ext->offset;
2961194ea909SVictor Perevertkin rp->bumpoff = 0;
2962194ea909SVictor Perevertkin rp->num_extents = 1;
2963194ea909SVictor Perevertkin rp->csum_free = false;
2964194ea909SVictor Perevertkin
2965194ea909SVictor Perevertkin rp->read = (uint32_t)(len - rp->extents[0].off);
2966194ea909SVictor Perevertkin if (rp->read > length) rp->read = (uint32_t)length;
2967194ea909SVictor Perevertkin
2968194ea909SVictor Perevertkin if (ed->compression == BTRFS_COMPRESSION_NONE) {
2969194ea909SVictor Perevertkin rp->addr = ed2->address + ed2->offset + rp->extents[0].off;
2970194ea909SVictor Perevertkin rp->to_read = (uint32_t)sector_align(rp->read, fcb->Vcb->superblock.sector_size);
2971194ea909SVictor Perevertkin
2972174dfab6SVincent Franchomme if (rp->addr & (fcb->Vcb->superblock.sector_size - 1)) {
2973174dfab6SVincent Franchomme rp->bumpoff = rp->addr & (fcb->Vcb->superblock.sector_size - 1);
2974194ea909SVictor Perevertkin rp->addr -= rp->bumpoff;
2975194ea909SVictor Perevertkin rp->to_read = (uint32_t)sector_align(rp->read + rp->bumpoff, fcb->Vcb->superblock.sector_size);
2976194ea909SVictor Perevertkin }
2977194ea909SVictor Perevertkin } else {
2978194ea909SVictor Perevertkin rp->addr = ed2->address;
2979194ea909SVictor Perevertkin rp->to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
2980c2c66affSColin Finck }
2981c2c66affSColin Finck
2982174dfab6SVincent Franchomme if (ed->compression == BTRFS_COMPRESSION_NONE && (start & (fcb->Vcb->superblock.sector_size - 1)) == 0 &&
2983174dfab6SVincent Franchomme (length & (fcb->Vcb->superblock.sector_size - 1)) == 0) {
2984194ea909SVictor Perevertkin rp->buf = data + bytes_read;
2985194ea909SVictor Perevertkin rp->buf_free = false;
2986194ea909SVictor Perevertkin } else {
2987194ea909SVictor Perevertkin rp->buf = ExAllocatePoolWithTag(pool_type, rp->to_read, ALLOC_TAG);
2988194ea909SVictor Perevertkin rp->buf_free = true;
2989c2c66affSColin Finck
2990194ea909SVictor Perevertkin if (!rp->buf) {
2991194ea909SVictor Perevertkin ERR("out of memory\n");
2992194ea909SVictor Perevertkin Status = STATUS_INSUFFICIENT_RESOURCES;
2993194ea909SVictor Perevertkin ExFreePool(rp);
2994194ea909SVictor Perevertkin goto exit;
2995194ea909SVictor Perevertkin }
2996c2c66affSColin Finck
2997194ea909SVictor Perevertkin rp->mdl = false;
2998194ea909SVictor Perevertkin }
2999194ea909SVictor Perevertkin
3000194ea909SVictor Perevertkin rp->c = get_chunk_from_address(fcb->Vcb, rp->addr);
3001194ea909SVictor Perevertkin
3002194ea909SVictor Perevertkin if (!rp->c) {
3003194ea909SVictor Perevertkin ERR("get_chunk_from_address(%I64x) failed\n", rp->addr);
3004194ea909SVictor Perevertkin
3005194ea909SVictor Perevertkin if (rp->buf_free)
3006194ea909SVictor Perevertkin ExFreePool(rp->buf);
3007194ea909SVictor Perevertkin
3008194ea909SVictor Perevertkin ExFreePool(rp);
3009c2c66affSColin Finck
3010174dfab6SVincent Franchomme Status = STATUS_INTERNAL_ERROR;
3011c2c66affSColin Finck goto exit;
3012c2c66affSColin Finck }
3013c2c66affSColin Finck
3014c2c66affSColin Finck if (ext->csum) {
3015c2c66affSColin Finck if (ed->compression == BTRFS_COMPRESSION_NONE) {
3016174dfab6SVincent Franchomme rp->csum = (uint8_t*)ext->csum + (fcb->Vcb->csum_size * (rp->extents[0].off >> fcb->Vcb->sector_shift));
3017c2c66affSColin Finck } else
3018194ea909SVictor Perevertkin rp->csum = ext->csum;
3019c2c66affSColin Finck } else
3020194ea909SVictor Perevertkin rp->csum = NULL;
3021c2c66affSColin Finck
3022194ea909SVictor Perevertkin rp->data = data + bytes_read;
3023194ea909SVictor Perevertkin rp->compression = ed->compression;
3024194ea909SVictor Perevertkin rp->extents[0].ed_offset = ed2->offset;
3025194ea909SVictor Perevertkin rp->extents[0].ed_size = ed2->size;
3026194ea909SVictor Perevertkin rp->extents[0].ed_num_bytes = ed2->num_bytes;
3027c2c66affSColin Finck
3028194ea909SVictor Perevertkin InsertTailList(&read_parts, &rp->list_entry);
3029c2c66affSColin Finck
3030194ea909SVictor Perevertkin bytes_read += rp->read;
3031194ea909SVictor Perevertkin length -= rp->read;
3032c2c66affSColin Finck
3033c2c66affSColin Finck break;
3034c2c66affSColin Finck }
3035c2c66affSColin Finck
3036c2c66affSColin Finck case EXTENT_TYPE_PREALLOC:
3037c2c66affSColin Finck {
3038318da0c1SPierre Schweitzer uint64_t off = start + bytes_read - ext->offset;
3039318da0c1SPierre Schweitzer uint32_t read = (uint32_t)(len - off);
3040c2c66affSColin Finck
3041318da0c1SPierre Schweitzer if (read > length) read = (uint32_t)length;
3042c2c66affSColin Finck
3043c2c66affSColin Finck RtlZeroMemory(data + bytes_read, read);
3044c2c66affSColin Finck
3045c2c66affSColin Finck bytes_read += read;
3046c2c66affSColin Finck length -= read;
3047c2c66affSColin Finck
3048c2c66affSColin Finck break;
3049c2c66affSColin Finck }
3050c2c66affSColin Finck
3051c2c66affSColin Finck default:
3052c2c66affSColin Finck WARN("Unsupported extent data type %u\n", ed->type);
3053c2c66affSColin Finck Status = STATUS_NOT_IMPLEMENTED;
3054c2c66affSColin Finck goto exit;
3055c2c66affSColin Finck }
3056c2c66affSColin Finck
3057c2c66affSColin Finck last_end = ext->offset + len;
3058c2c66affSColin Finck
3059c2c66affSColin Finck if (length == 0)
3060c2c66affSColin Finck break;
3061c2c66affSColin Finck }
3062c2c66affSColin Finck
3063c2c66affSColin Finck nextitem:
3064c2c66affSColin Finck le = le->Flink;
3065c2c66affSColin Finck }
3066c2c66affSColin Finck
3067194ea909SVictor Perevertkin if (!IsListEmpty(&read_parts) && read_parts.Flink->Flink != &read_parts) { // at least two entries in list
3068194ea909SVictor Perevertkin read_part* last_rp = CONTAINING_RECORD(read_parts.Flink, read_part, list_entry);
3069194ea909SVictor Perevertkin
3070194ea909SVictor Perevertkin le = read_parts.Flink->Flink;
3071194ea909SVictor Perevertkin while (le != &read_parts) {
3072194ea909SVictor Perevertkin LIST_ENTRY* le2 = le->Flink;
3073194ea909SVictor Perevertkin read_part* rp = CONTAINING_RECORD(le, read_part, list_entry);
3074194ea909SVictor Perevertkin
3075194ea909SVictor Perevertkin // merge together runs
3076194ea909SVictor Perevertkin if (rp->compression != BTRFS_COMPRESSION_NONE && rp->compression == last_rp->compression && rp->addr == last_rp->addr + last_rp->to_read &&
3077194ea909SVictor Perevertkin rp->data == (uint8_t*)last_rp->data + last_rp->read && rp->c == last_rp->c && ((rp->csum && last_rp->csum) || (!rp->csum && !last_rp->csum))) {
3078194ea909SVictor Perevertkin read_part* rp2;
3079194ea909SVictor Perevertkin
3080194ea909SVictor Perevertkin rp2 = ExAllocatePoolWithTag(pool_type, offsetof(read_part, extents) + (sizeof(read_part_extent) * (last_rp->num_extents + 1)), ALLOC_TAG);
3081194ea909SVictor Perevertkin
3082194ea909SVictor Perevertkin rp2->addr = last_rp->addr;
3083194ea909SVictor Perevertkin rp2->c = last_rp->c;
3084194ea909SVictor Perevertkin rp2->read = last_rp->read + rp->read;
3085194ea909SVictor Perevertkin rp2->to_read = last_rp->to_read + rp->to_read;
3086194ea909SVictor Perevertkin rp2->csum_free = false;
3087194ea909SVictor Perevertkin
3088194ea909SVictor Perevertkin if (last_rp->csum) {
3089174dfab6SVincent Franchomme uint32_t sectors = (last_rp->to_read + rp->to_read) >> fcb->Vcb->sector_shift;
3090194ea909SVictor Perevertkin
3091194ea909SVictor Perevertkin rp2->csum = ExAllocatePoolWithTag(pool_type, sectors * fcb->Vcb->csum_size, ALLOC_TAG);
3092194ea909SVictor Perevertkin if (!rp2->csum) {
3093194ea909SVictor Perevertkin ERR("out of memory\n");
3094194ea909SVictor Perevertkin ExFreePool(rp2);
3095194ea909SVictor Perevertkin Status = STATUS_INSUFFICIENT_RESOURCES;
3096194ea909SVictor Perevertkin goto exit;
3097194ea909SVictor Perevertkin }
3098194ea909SVictor Perevertkin
3099174dfab6SVincent Franchomme RtlCopyMemory(rp2->csum, last_rp->csum, (last_rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift);
3100174dfab6SVincent Franchomme RtlCopyMemory((uint8_t*)rp2->csum + ((last_rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift), rp->csum,
3101174dfab6SVincent Franchomme (rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift);
3102194ea909SVictor Perevertkin
3103194ea909SVictor Perevertkin rp2->csum_free = true;
3104194ea909SVictor Perevertkin } else
3105194ea909SVictor Perevertkin rp2->csum = NULL;
3106194ea909SVictor Perevertkin
3107194ea909SVictor Perevertkin rp2->buf = ExAllocatePoolWithTag(pool_type, rp2->to_read, ALLOC_TAG);
3108194ea909SVictor Perevertkin if (!rp2->buf) {
3109194ea909SVictor Perevertkin ERR("out of memory\n");
3110194ea909SVictor Perevertkin
3111194ea909SVictor Perevertkin if (rp2->csum)
3112194ea909SVictor Perevertkin ExFreePool(rp2->csum);
3113194ea909SVictor Perevertkin
3114194ea909SVictor Perevertkin ExFreePool(rp2);
3115194ea909SVictor Perevertkin Status = STATUS_INSUFFICIENT_RESOURCES;
3116194ea909SVictor Perevertkin goto exit;
3117194ea909SVictor Perevertkin }
3118194ea909SVictor Perevertkin
3119194ea909SVictor Perevertkin rp2->buf_free = true;
3120194ea909SVictor Perevertkin rp2->bumpoff = 0;
3121194ea909SVictor Perevertkin rp2->mdl = false;
3122194ea909SVictor Perevertkin rp2->data = last_rp->data;
3123194ea909SVictor Perevertkin rp2->compression = last_rp->compression;
3124194ea909SVictor Perevertkin rp2->num_extents = last_rp->num_extents + 1;
3125194ea909SVictor Perevertkin
3126194ea909SVictor Perevertkin RtlCopyMemory(rp2->extents, last_rp->extents, last_rp->num_extents * sizeof(read_part_extent));
3127194ea909SVictor Perevertkin RtlCopyMemory(&rp2->extents[last_rp->num_extents], rp->extents, sizeof(read_part_extent));
3128194ea909SVictor Perevertkin
3129194ea909SVictor Perevertkin InsertHeadList(le->Blink, &rp2->list_entry);
3130194ea909SVictor Perevertkin
3131194ea909SVictor Perevertkin if (rp->buf_free)
3132194ea909SVictor Perevertkin ExFreePool(rp->buf);
3133194ea909SVictor Perevertkin
3134194ea909SVictor Perevertkin if (rp->csum_free)
3135194ea909SVictor Perevertkin ExFreePool(rp->csum);
3136194ea909SVictor Perevertkin
3137194ea909SVictor Perevertkin RemoveEntryList(&rp->list_entry);
3138194ea909SVictor Perevertkin
3139194ea909SVictor Perevertkin ExFreePool(rp);
3140194ea909SVictor Perevertkin
3141194ea909SVictor Perevertkin if (last_rp->buf_free)
3142194ea909SVictor Perevertkin ExFreePool(last_rp->buf);
3143194ea909SVictor Perevertkin
3144194ea909SVictor Perevertkin if (last_rp->csum_free)
3145194ea909SVictor Perevertkin ExFreePool(last_rp->csum);
3146194ea909SVictor Perevertkin
3147194ea909SVictor Perevertkin RemoveEntryList(&last_rp->list_entry);
3148194ea909SVictor Perevertkin
3149194ea909SVictor Perevertkin ExFreePool(last_rp);
3150194ea909SVictor Perevertkin
3151194ea909SVictor Perevertkin last_rp = rp2;
3152194ea909SVictor Perevertkin } else
3153194ea909SVictor Perevertkin last_rp = rp;
3154194ea909SVictor Perevertkin
3155194ea909SVictor Perevertkin le = le2;
3156194ea909SVictor Perevertkin }
3157194ea909SVictor Perevertkin }
3158194ea909SVictor Perevertkin
3159194ea909SVictor Perevertkin le = read_parts.Flink;
3160194ea909SVictor Perevertkin while (le != &read_parts) {
3161194ea909SVictor Perevertkin read_part* rp = CONTAINING_RECORD(le, read_part, list_entry);
3162194ea909SVictor Perevertkin
3163194ea909SVictor Perevertkin Status = read_data(fcb->Vcb, rp->addr, rp->to_read, rp->csum, false, rp->buf, rp->c, NULL, Irp, 0, rp->mdl,
3164174dfab6SVincent Franchomme fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3165194ea909SVictor Perevertkin if (!NT_SUCCESS(Status)) {
3166194ea909SVictor Perevertkin ERR("read_data returned %08lx\n", Status);
3167194ea909SVictor Perevertkin goto exit;
3168194ea909SVictor Perevertkin }
3169194ea909SVictor Perevertkin
3170194ea909SVictor Perevertkin if (rp->compression == BTRFS_COMPRESSION_NONE) {
3171194ea909SVictor Perevertkin if (rp->buf_free)
3172194ea909SVictor Perevertkin RtlCopyMemory(rp->data, rp->buf + rp->bumpoff, rp->read);
3173194ea909SVictor Perevertkin } else {
3174194ea909SVictor Perevertkin uint8_t* buf = rp->buf;
3175*6e0cf03dSVincent Franchomme
3176194ea909SVictor Perevertkin for (unsigned int i = 0; i < rp->num_extents; i++) {
3177194ea909SVictor Perevertkin uint8_t *decomp = NULL, *buf2;
3178194ea909SVictor Perevertkin ULONG outlen, inlen, off2;
3179194ea909SVictor Perevertkin uint32_t inpageoff = 0;
3180194ea909SVictor Perevertkin comp_calc_job* ccj;
3181194ea909SVictor Perevertkin
3182194ea909SVictor Perevertkin off2 = (ULONG)(rp->extents[i].ed_offset + rp->extents[i].off);
3183194ea909SVictor Perevertkin buf2 = buf;
3184194ea909SVictor Perevertkin inlen = (ULONG)rp->extents[i].ed_size;
3185194ea909SVictor Perevertkin
3186194ea909SVictor Perevertkin if (rp->compression == BTRFS_COMPRESSION_LZO) {
3187194ea909SVictor Perevertkin ULONG inoff = sizeof(uint32_t);
3188194ea909SVictor Perevertkin
3189194ea909SVictor Perevertkin inlen -= sizeof(uint32_t);
3190194ea909SVictor Perevertkin
3191194ea909SVictor Perevertkin // If reading a few sectors in, skip to the interesting bit
3192194ea909SVictor Perevertkin while (off2 > LZO_PAGE_SIZE) {
3193194ea909SVictor Perevertkin uint32_t partlen;
3194194ea909SVictor Perevertkin
3195194ea909SVictor Perevertkin if (inlen < sizeof(uint32_t))
3196194ea909SVictor Perevertkin break;
3197194ea909SVictor Perevertkin
3198194ea909SVictor Perevertkin partlen = *(uint32_t*)(buf2 + inoff);
3199194ea909SVictor Perevertkin
3200194ea909SVictor Perevertkin if (partlen < inlen) {
3201194ea909SVictor Perevertkin off2 -= LZO_PAGE_SIZE;
3202194ea909SVictor Perevertkin inoff += partlen + sizeof(uint32_t);
3203194ea909SVictor Perevertkin inlen -= partlen + sizeof(uint32_t);
3204194ea909SVictor Perevertkin
3205194ea909SVictor Perevertkin if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t))
3206194ea909SVictor Perevertkin inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE;
3207194ea909SVictor Perevertkin } else
3208194ea909SVictor Perevertkin break;
3209194ea909SVictor Perevertkin }
3210194ea909SVictor Perevertkin
3211194ea909SVictor Perevertkin buf2 = &buf2[inoff];
3212194ea909SVictor Perevertkin inpageoff = inoff % LZO_PAGE_SIZE;
3213194ea909SVictor Perevertkin }
3214194ea909SVictor Perevertkin
321506042735SVincent Franchomme /* Previous versions of this code decompressed directly into the destination buffer,
321606042735SVincent Franchomme * but unfortunately that can't be relied on - Windows likes to use dummy pages sometimes
321706042735SVincent Franchomme * when mmap-ing, which breaks the backtracking used by e.g. zstd. */
321806042735SVincent Franchomme
321906042735SVincent Franchomme if (off2 != 0)
3220194ea909SVictor Perevertkin outlen = off2 + min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
322106042735SVincent Franchomme else
322206042735SVincent Franchomme outlen = min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
3223194ea909SVictor Perevertkin
3224194ea909SVictor Perevertkin decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG);
3225194ea909SVictor Perevertkin if (!decomp) {
3226194ea909SVictor Perevertkin ERR("out of memory\n");
3227194ea909SVictor Perevertkin Status = STATUS_INSUFFICIENT_RESOURCES;
3228194ea909SVictor Perevertkin goto exit;
3229194ea909SVictor Perevertkin }
3230194ea909SVictor Perevertkin
3231194ea909SVictor Perevertkin ccj = (comp_calc_job*)ExAllocatePoolWithTag(pool_type, sizeof(comp_calc_job), ALLOC_TAG);
3232194ea909SVictor Perevertkin if (!ccj) {
3233194ea909SVictor Perevertkin ERR("out of memory\n");
3234194ea909SVictor Perevertkin
3235194ea909SVictor Perevertkin ExFreePool(decomp);
3236194ea909SVictor Perevertkin
3237194ea909SVictor Perevertkin Status = STATUS_INSUFFICIENT_RESOURCES;
3238194ea909SVictor Perevertkin goto exit;
3239194ea909SVictor Perevertkin }
3240194ea909SVictor Perevertkin
3241194ea909SVictor Perevertkin ccj->data = rp->data;
3242194ea909SVictor Perevertkin ccj->decomp = decomp;
3243194ea909SVictor Perevertkin
3244194ea909SVictor Perevertkin ccj->offset = off2;
3245194ea909SVictor Perevertkin ccj->length = (size_t)min(rp->read, rp->extents[i].ed_num_bytes - rp->extents[i].off);
3246194ea909SVictor Perevertkin
324706042735SVincent Franchomme Status = add_calc_job_decomp(fcb->Vcb, rp->compression, buf2, inlen, decomp, outlen,
324806042735SVincent Franchomme inpageoff, &ccj->cj);
324906042735SVincent Franchomme if (!NT_SUCCESS(Status)) {
325006042735SVincent Franchomme ERR("add_calc_job_decomp returned %08lx\n", Status);
325106042735SVincent Franchomme
325206042735SVincent Franchomme ExFreePool(decomp);
325306042735SVincent Franchomme ExFreePool(ccj);
325406042735SVincent Franchomme
325506042735SVincent Franchomme goto exit;
325606042735SVincent Franchomme }
325706042735SVincent Franchomme
3258194ea909SVictor Perevertkin InsertTailList(&calc_jobs, &ccj->list_entry);
3259194ea909SVictor Perevertkin
3260194ea909SVictor Perevertkin buf += rp->extents[i].ed_size;
3261194ea909SVictor Perevertkin rp->data = (uint8_t*)rp->data + rp->extents[i].ed_num_bytes - rp->extents[i].off;
3262194ea909SVictor Perevertkin rp->read -= (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off);
3263194ea909SVictor Perevertkin }
3264194ea909SVictor Perevertkin }
3265194ea909SVictor Perevertkin
3266194ea909SVictor Perevertkin le = le->Flink;
3267194ea909SVictor Perevertkin }
3268194ea909SVictor Perevertkin
3269c2c66affSColin Finck if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3270318da0c1SPierre Schweitzer uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length);
3271c2c66affSColin Finck
3272c2c66affSColin Finck RtlZeroMemory(data + bytes_read, read);
3273c2c66affSColin Finck
3274c2c66affSColin Finck bytes_read += read;
3275c2c66affSColin Finck length -= read;
3276c2c66affSColin Finck }
3277c2c66affSColin Finck
3278c2c66affSColin Finck Status = STATUS_SUCCESS;
3279194ea909SVictor Perevertkin
3280194ea909SVictor Perevertkin while (!IsListEmpty(&calc_jobs)) {
3281194ea909SVictor Perevertkin comp_calc_job* ccj = CONTAINING_RECORD(RemoveTailList(&calc_jobs), comp_calc_job, list_entry);
3282194ea909SVictor Perevertkin
3283194ea909SVictor Perevertkin calc_thread_main(fcb->Vcb, ccj->cj);
3284194ea909SVictor Perevertkin
3285194ea909SVictor Perevertkin KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL);
3286194ea909SVictor Perevertkin
3287194ea909SVictor Perevertkin if (!NT_SUCCESS(ccj->cj->Status))
3288194ea909SVictor Perevertkin Status = ccj->cj->Status;
3289194ea909SVictor Perevertkin
3290194ea909SVictor Perevertkin RtlCopyMemory(ccj->data, (uint8_t*)ccj->decomp + ccj->offset, ccj->length);
3291194ea909SVictor Perevertkin ExFreePool(ccj->decomp);
3292194ea909SVictor Perevertkin
3293194ea909SVictor Perevertkin ExFreePool(ccj);
3294194ea909SVictor Perevertkin }
3295194ea909SVictor Perevertkin
3296c2c66affSColin Finck if (pbr)
3297c2c66affSColin Finck *pbr = bytes_read;
3298c2c66affSColin Finck
3299c2c66affSColin Finck exit:
3300194ea909SVictor Perevertkin while (!IsListEmpty(&read_parts)) {
3301194ea909SVictor Perevertkin read_part* rp = CONTAINING_RECORD(RemoveHeadList(&read_parts), read_part, list_entry);
3302194ea909SVictor Perevertkin
3303194ea909SVictor Perevertkin if (rp->buf_free)
3304194ea909SVictor Perevertkin ExFreePool(rp->buf);
3305194ea909SVictor Perevertkin
3306194ea909SVictor Perevertkin if (rp->csum_free)
3307194ea909SVictor Perevertkin ExFreePool(rp->csum);
3308194ea909SVictor Perevertkin
3309194ea909SVictor Perevertkin ExFreePool(rp);
3310194ea909SVictor Perevertkin }
3311194ea909SVictor Perevertkin
3312194ea909SVictor Perevertkin while (!IsListEmpty(&calc_jobs)) {
3313194ea909SVictor Perevertkin comp_calc_job* ccj = CONTAINING_RECORD(RemoveHeadList(&calc_jobs), comp_calc_job, list_entry);
3314194ea909SVictor Perevertkin
3315194ea909SVictor Perevertkin KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL);
3316194ea909SVictor Perevertkin
3317194ea909SVictor Perevertkin if (ccj->decomp)
3318194ea909SVictor Perevertkin ExFreePool(ccj->decomp);
3319194ea909SVictor Perevertkin
3320194ea909SVictor Perevertkin ExFreePool(ccj->cj);
3321194ea909SVictor Perevertkin
3322194ea909SVictor Perevertkin ExFreePool(ccj);
3323194ea909SVictor Perevertkin }
3324194ea909SVictor Perevertkin
3325c2c66affSColin Finck return Status;
3326c2c66affSColin Finck }
3327c2c66affSColin Finck
do_read(PIRP Irp,bool wait,ULONG * bytes_read)3328318da0c1SPierre Schweitzer NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) {
3329c2c66affSColin Finck PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3330c2c66affSColin Finck PFILE_OBJECT FileObject = IrpSp->FileObject;
3331c2c66affSColin Finck fcb* fcb = FileObject->FsContext;
3332318da0c1SPierre Schweitzer uint8_t* data = NULL;
3333c2c66affSColin Finck ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3334318da0c1SPierre Schweitzer uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3335c2c66affSColin Finck
3336c2c66affSColin Finck *bytes_read = 0;
3337c2c66affSColin Finck
3338c2c66affSColin Finck if (!fcb || !fcb->Vcb || !fcb->subvol)
3339c2c66affSColin Finck return STATUS_INTERNAL_ERROR;
3340c2c66affSColin Finck
334162e630deSPierre Schweitzer TRACE("fcb = %p\n", fcb);
3342194ea909SVictor Perevertkin TRACE("offset = %I64x, length = %lx\n", start, length);
3343318da0c1SPierre Schweitzer TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false");
3344c2c66affSColin Finck
3345c2c66affSColin Finck if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3346c2c66affSColin Finck return STATUS_INVALID_DEVICE_REQUEST;
3347c2c66affSColin Finck
3348c2c66affSColin Finck if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3349c2c66affSColin Finck WARN("tried to read locked region\n");
3350c2c66affSColin Finck return STATUS_FILE_LOCK_CONFLICT;
3351c2c66affSColin Finck }
3352c2c66affSColin Finck
3353c2c66affSColin Finck if (length == 0) {
3354c2c66affSColin Finck TRACE("tried to read zero bytes\n");
3355c2c66affSColin Finck return STATUS_SUCCESS;
3356c2c66affSColin Finck }
3357c2c66affSColin Finck
3358318da0c1SPierre Schweitzer if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) {
3359318da0c1SPierre Schweitzer TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart);
3360c2c66affSColin Finck return STATUS_END_OF_FILE;
3361c2c66affSColin Finck }
3362c2c66affSColin Finck
3363318da0c1SPierre Schweitzer TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3364c2c66affSColin Finck
3365174dfab6SVincent Franchomme if (!(Irp->Flags & IRP_NOCACHE) && IrpSp->MinorFunction & IRP_MN_MDL) {
3366174dfab6SVincent Franchomme NTSTATUS Status = STATUS_SUCCESS;
3367174dfab6SVincent Franchomme
3368174dfab6SVincent Franchomme _SEH2_TRY {
3369174dfab6SVincent Franchomme if (!FileObject->PrivateCacheMap) {
3370174dfab6SVincent Franchomme CC_FILE_SIZES ccfs;
3371174dfab6SVincent Franchomme
3372174dfab6SVincent Franchomme ccfs.AllocationSize = fcb->Header.AllocationSize;
3373174dfab6SVincent Franchomme ccfs.FileSize = fcb->Header.FileSize;
3374174dfab6SVincent Franchomme ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3375174dfab6SVincent Franchomme
3376174dfab6SVincent Franchomme init_file_cache(FileObject, &ccfs);
3377174dfab6SVincent Franchomme }
3378174dfab6SVincent Franchomme
3379174dfab6SVincent Franchomme CcMdlRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3380174dfab6SVincent Franchomme } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
3381174dfab6SVincent Franchomme Status = _SEH2_GetExceptionCode();
3382174dfab6SVincent Franchomme } _SEH2_END;
3383174dfab6SVincent Franchomme
3384174dfab6SVincent Franchomme if (NT_SUCCESS(Status)) {
3385174dfab6SVincent Franchomme Status = Irp->IoStatus.Status;
3386174dfab6SVincent Franchomme Irp->IoStatus.Information += addon;
3387174dfab6SVincent Franchomme *bytes_read = (ULONG)Irp->IoStatus.Information;
3388174dfab6SVincent Franchomme } else
3389174dfab6SVincent Franchomme ERR("EXCEPTION - %08lx\n", Status);
3390174dfab6SVincent Franchomme
3391174dfab6SVincent Franchomme return Status;
3392174dfab6SVincent Franchomme }
3393174dfab6SVincent Franchomme
3394c2c66affSColin Finck data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3395c2c66affSColin Finck
3396c2c66affSColin Finck if (Irp->MdlAddress && !data) {
3397c2c66affSColin Finck ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3398c2c66affSColin Finck return STATUS_INSUFFICIENT_RESOURCES;
3399c2c66affSColin Finck }
3400c2c66affSColin Finck
3401318da0c1SPierre Schweitzer if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3402318da0c1SPierre Schweitzer length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3403c2c66affSColin Finck RtlZeroMemory(data, length);
3404c2c66affSColin Finck Irp->IoStatus.Information = *bytes_read = length;
3405c2c66affSColin Finck return STATUS_SUCCESS;
3406c2c66affSColin Finck }
3407c2c66affSColin Finck
3408318da0c1SPierre Schweitzer if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3409318da0c1SPierre Schweitzer addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3410c2c66affSColin Finck RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3411c2c66affSColin Finck length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3412c2c66affSColin Finck }
3413c2c66affSColin Finck
3414c2c66affSColin Finck if (!(Irp->Flags & IRP_NOCACHE)) {
3415c2c66affSColin Finck NTSTATUS Status = STATUS_SUCCESS;
3416c2c66affSColin Finck
3417c2c66affSColin Finck _SEH2_TRY {
3418c2c66affSColin Finck if (!FileObject->PrivateCacheMap) {
3419c2c66affSColin Finck CC_FILE_SIZES ccfs;
3420c2c66affSColin Finck
3421c2c66affSColin Finck ccfs.AllocationSize = fcb->Header.AllocationSize;
3422c2c66affSColin Finck ccfs.FileSize = fcb->Header.FileSize;
3423c2c66affSColin Finck ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3424c2c66affSColin Finck
3425c2c66affSColin Finck init_file_cache(FileObject, &ccfs);
3426c2c66affSColin Finck }
3427c2c66affSColin Finck
3428c2c66affSColin Finck if (fCcCopyReadEx) {
3429194ea909SVictor Perevertkin TRACE("CcCopyReadEx(%p, %I64x, %lx, %u, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3430c2c66affSColin Finck length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3431194ea909SVictor Perevertkin TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
3432c2c66affSColin Finck if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3433c2c66affSColin Finck TRACE("CcCopyReadEx could not wait\n");
3434c2c66affSColin Finck
3435c2c66affSColin Finck IoMarkIrpPending(Irp);
3436c2c66affSColin Finck return STATUS_PENDING;
3437c2c66affSColin Finck }
3438c2c66affSColin Finck TRACE("CcCopyReadEx finished\n");
3439c2c66affSColin Finck } else {
3440194ea909SVictor Perevertkin TRACE("CcCopyRead(%p, %I64x, %lx, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3441194ea909SVictor Perevertkin TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
3442c2c66affSColin Finck if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3443c2c66affSColin Finck TRACE("CcCopyRead could not wait\n");
3444c2c66affSColin Finck
3445c2c66affSColin Finck IoMarkIrpPending(Irp);
3446c2c66affSColin Finck return STATUS_PENDING;
3447c2c66affSColin Finck }
3448c2c66affSColin Finck TRACE("CcCopyRead finished\n");
3449c2c66affSColin Finck }
3450c2c66affSColin Finck } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
3451c2c66affSColin Finck Status = _SEH2_GetExceptionCode();
3452c2c66affSColin Finck } _SEH2_END;
3453c2c66affSColin Finck
3454c2c66affSColin Finck if (NT_SUCCESS(Status)) {
3455c2c66affSColin Finck Status = Irp->IoStatus.Status;
3456c2c66affSColin Finck Irp->IoStatus.Information += addon;
3457c2c66affSColin Finck *bytes_read = (ULONG)Irp->IoStatus.Information;
3458c2c66affSColin Finck } else
3459194ea909SVictor Perevertkin ERR("EXCEPTION - %08lx\n", Status);
3460c2c66affSColin Finck
3461c2c66affSColin Finck return Status;
3462c2c66affSColin Finck } else {
3463c2c66affSColin Finck NTSTATUS Status;
3464c2c66affSColin Finck
3465c2c66affSColin Finck if (!wait) {
3466c2c66affSColin Finck IoMarkIrpPending(Irp);
3467c2c66affSColin Finck return STATUS_PENDING;
3468c2c66affSColin Finck }
3469c2c66affSColin Finck
3470194ea909SVictor Perevertkin if (fcb->ads) {
3471c2c66affSColin Finck Status = read_stream(fcb, data, start, length, bytes_read);
3472194ea909SVictor Perevertkin
3473194ea909SVictor Perevertkin if (!NT_SUCCESS(Status))
3474194ea909SVictor Perevertkin ERR("read_stream returned %08lx\n", Status);
3475194ea909SVictor Perevertkin } else {
3476c2c66affSColin Finck Status = read_file(fcb, data, start, length, bytes_read, Irp);
3477c2c66affSColin Finck
3478194ea909SVictor Perevertkin if (!NT_SUCCESS(Status))
3479194ea909SVictor Perevertkin ERR("read_file returned %08lx\n", Status);
3480194ea909SVictor Perevertkin }
3481194ea909SVictor Perevertkin
3482c2c66affSColin Finck *bytes_read += addon;
3483194ea909SVictor Perevertkin TRACE("read %lu bytes\n", *bytes_read);
3484c2c66affSColin Finck
3485c2c66affSColin Finck Irp->IoStatus.Information = *bytes_read;
3486c2c66affSColin Finck
3487c2c66affSColin Finck if (diskacc && Status != STATUS_PENDING) {
3488c2c66affSColin Finck PETHREAD thread = NULL;
3489c2c66affSColin Finck
3490c2c66affSColin Finck if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3491c2c66affSColin Finck thread = Irp->Tail.Overlay.Thread;
3492c2c66affSColin Finck else if (!IoIsSystemThread(PsGetCurrentThread()))
3493c2c66affSColin Finck thread = PsGetCurrentThread();
3494c2c66affSColin Finck else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
3495c2c66affSColin Finck thread = PsGetCurrentThread();
3496c2c66affSColin Finck
3497c2c66affSColin Finck if (thread)
3498c2c66affSColin Finck fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3499c2c66affSColin Finck }
3500c2c66affSColin Finck
3501c2c66affSColin Finck return Status;
3502c2c66affSColin Finck }
3503c2c66affSColin Finck }
3504c2c66affSColin Finck
3505c2c66affSColin Finck _Dispatch_type_(IRP_MJ_READ)
_Function_class_(DRIVER_DISPATCH)3506c2c66affSColin Finck _Function_class_(DRIVER_DISPATCH)
3507318da0c1SPierre Schweitzer NTSTATUS __stdcall drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
3508c2c66affSColin Finck device_extension* Vcb = DeviceObject->DeviceExtension;
3509c2c66affSColin Finck PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3510c2c66affSColin Finck PFILE_OBJECT FileObject = IrpSp->FileObject;
3511c2c66affSColin Finck ULONG bytes_read = 0;
3512c2c66affSColin Finck NTSTATUS Status;
3513318da0c1SPierre Schweitzer bool top_level;
3514c2c66affSColin Finck fcb* fcb;
3515c2c66affSColin Finck ccb* ccb;
3516318da0c1SPierre Schweitzer bool acquired_fcb_lock = false, wait;
3517c2c66affSColin Finck
3518c2c66affSColin Finck FsRtlEnterFileSystem();
3519c2c66affSColin Finck
3520c2c66affSColin Finck top_level = is_top_level(Irp);
3521c2c66affSColin Finck
3522c2c66affSColin Finck TRACE("read\n");
3523c2c66affSColin Finck
3524c2c66affSColin Finck if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
3525c2c66affSColin Finck Status = vol_read(DeviceObject, Irp);
3526c2c66affSColin Finck goto exit2;
3527c2c66affSColin Finck } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
3528c2c66affSColin Finck Status = STATUS_INVALID_PARAMETER;
3529c2c66affSColin Finck goto end;
3530c2c66affSColin Finck }
3531c2c66affSColin Finck
3532c2c66affSColin Finck Irp->IoStatus.Information = 0;
3533c2c66affSColin Finck
3534c2c66affSColin Finck if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
3535c2c66affSColin Finck CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
3536c2c66affSColin Finck
3537c2c66affSColin Finck Irp->MdlAddress = NULL;
3538c2c66affSColin Finck Status = STATUS_SUCCESS;
3539c2c66affSColin Finck
3540c2c66affSColin Finck goto exit;
3541c2c66affSColin Finck }
3542c2c66affSColin Finck
3543c2c66affSColin Finck fcb = FileObject->FsContext;
3544c2c66affSColin Finck
3545c2c66affSColin Finck if (!fcb) {
3546c2c66affSColin Finck ERR("fcb was NULL\n");
3547c2c66affSColin Finck Status = STATUS_INVALID_PARAMETER;
3548c2c66affSColin Finck goto exit;
3549c2c66affSColin Finck }
3550c2c66affSColin Finck
3551c2c66affSColin Finck ccb = FileObject->FsContext2;
3552c2c66affSColin Finck
3553c2c66affSColin Finck if (!ccb) {
3554c2c66affSColin Finck ERR("ccb was NULL\n");
3555c2c66affSColin Finck Status = STATUS_INVALID_PARAMETER;
3556c2c66affSColin Finck goto exit;
3557c2c66affSColin Finck }
3558c2c66affSColin Finck
3559c2c66affSColin Finck if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
3560c2c66affSColin Finck WARN("insufficient privileges\n");
3561c2c66affSColin Finck Status = STATUS_ACCESS_DENIED;
3562c2c66affSColin Finck goto exit;
3563c2c66affSColin Finck }
3564c2c66affSColin Finck
3565c2c66affSColin Finck if (fcb == Vcb->volume_fcb) {
3566c2c66affSColin Finck TRACE("reading volume FCB\n");
3567c2c66affSColin Finck
3568c2c66affSColin Finck IoSkipCurrentIrpStackLocation(Irp);
3569c2c66affSColin Finck
3570c2c66affSColin Finck Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
3571c2c66affSColin Finck
3572c2c66affSColin Finck goto exit2;
3573c2c66affSColin Finck }
3574c2c66affSColin Finck
357562e630deSPierre Schweitzer if (!(Irp->Flags & IRP_PAGING_IO))
357662e630deSPierre Schweitzer FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL);
357762e630deSPierre Schweitzer
3578c2c66affSColin Finck wait = IoIsOperationSynchronous(Irp);
3579c2c66affSColin Finck
3580c2c66affSColin Finck // Don't offload jobs when doing paging IO - otherwise this can lead to
3581c2c66affSColin Finck // deadlocks in CcCopyRead.
3582c2c66affSColin Finck if (Irp->Flags & IRP_PAGING_IO)
3583318da0c1SPierre Schweitzer wait = true;
3584c2c66affSColin Finck
3585883b1f31SPierre Schweitzer if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer && FileObject->SectionObjectPointer->DataSectionObject) {
35864672b2baSPierre Schweitzer IO_STATUS_BLOCK iosb;
35874672b2baSPierre Schweitzer
35884672b2baSPierre Schweitzer CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb);
35894672b2baSPierre Schweitzer if (!NT_SUCCESS(iosb.Status)) {
3590194ea909SVictor Perevertkin ERR("CcFlushCache returned %08lx\n", iosb.Status);
35914672b2baSPierre Schweitzer return iosb.Status;
35924672b2baSPierre Schweitzer }
35934672b2baSPierre Schweitzer }
35944672b2baSPierre Schweitzer
3595c2c66affSColin Finck if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
3596c2c66affSColin Finck if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
3597c2c66affSColin Finck Status = STATUS_PENDING;
3598c2c66affSColin Finck IoMarkIrpPending(Irp);
3599c2c66affSColin Finck goto exit;
3600c2c66affSColin Finck }
3601c2c66affSColin Finck
3602318da0c1SPierre Schweitzer acquired_fcb_lock = true;
3603c2c66affSColin Finck }
3604c2c66affSColin Finck
3605c2c66affSColin Finck Status = do_read(Irp, wait, &bytes_read);
3606c2c66affSColin Finck
3607318da0c1SPierre Schweitzer if (acquired_fcb_lock)
3608c2c66affSColin Finck ExReleaseResourceLite(fcb->Header.Resource);
3609c2c66affSColin Finck
3610c2c66affSColin Finck exit:
3611c2c66affSColin Finck if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO))
3612c2c66affSColin Finck FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0);
3613c2c66affSColin Finck
3614c2c66affSColin Finck end:
3615c2c66affSColin Finck Irp->IoStatus.Status = Status;
3616c2c66affSColin Finck
3617194ea909SVictor Perevertkin TRACE("Irp->IoStatus.Status = %08lx\n", Irp->IoStatus.Status);
3618194ea909SVictor Perevertkin TRACE("Irp->IoStatus.Information = %Iu\n", Irp->IoStatus.Information);
3619194ea909SVictor Perevertkin TRACE("returning %08lx\n", Status);
3620c2c66affSColin Finck
3621c2c66affSColin Finck if (Status != STATUS_PENDING)
3622c2c66affSColin Finck IoCompleteRequest(Irp, IO_NO_INCREMENT);
3623c2c66affSColin Finck else {
3624c2c66affSColin Finck if (!add_thread_job(Vcb, Irp))
3625318da0c1SPierre Schweitzer Status = do_read_job(Irp);
3626c2c66affSColin Finck }
3627c2c66affSColin Finck
3628c2c66affSColin Finck exit2:
3629c2c66affSColin Finck if (top_level)
3630c2c66affSColin Finck IoSetTopLevelIrp(NULL);
3631c2c66affSColin Finck
3632c2c66affSColin Finck FsRtlExitFileSystem();
3633c2c66affSColin Finck
3634c2c66affSColin Finck return Status;
3635c2c66affSColin Finck }
3636