xref: /reactos/drivers/filesystems/btrfs/scrub.c (revision 58588b76)
1 /* Copyright (c) Mark Harmstone 2017
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
20 #define SCRUB_UNIT 0x100000 // 1 MB
21 
22 struct _scrub_context;
23 
24 typedef struct {
25     struct _scrub_context* context;
26     PIRP Irp;
27     UINT64 start;
28     UINT32 length;
29     IO_STATUS_BLOCK iosb;
30     UINT8* buf;
31     BOOL csum_error;
32     UINT32* bad_csums;
33 } scrub_context_stripe;
34 
35 typedef struct _scrub_context {
36     KEVENT Event;
37     scrub_context_stripe* stripes;
38     LONG stripes_left;
39 } scrub_context;
40 
41 typedef struct {
42     ANSI_STRING name;
43     BOOL orig_subvol;
44     LIST_ENTRY list_entry;
45 } path_part;
46 
47 static void log_file_checksum_error(device_extension* Vcb, UINT64 addr, UINT64 devid, UINT64 subvol, UINT64 inode, UINT64 offset) {
48     LIST_ENTRY *le, parts;
49     root* r = NULL;
50     KEY searchkey;
51     traverse_ptr tp;
52     UINT64 dir;
53     BOOL orig_subvol = TRUE, not_in_tree = FALSE;
54     ANSI_STRING fn;
55     scrub_error* err;
56     NTSTATUS Status;
57     ULONG utf16len;
58 
59     le = Vcb->roots.Flink;
60     while (le != &Vcb->roots) {
61         root* r2 = CONTAINING_RECORD(le, root, list_entry);
62 
63         if (r2->id == subvol) {
64             r = r2;
65             break;
66         }
67 
68         le = le->Flink;
69     }
70 
71     if (!r) {
72         ERR("could not find subvol %llx\n", subvol);
73         return;
74     }
75 
76     InitializeListHead(&parts);
77 
78     dir = inode;
79 
80     while (TRUE) {
81         if (dir == r->root_item.objid) {
82             if (r == Vcb->root_fileref->fcb->subvol)
83                 break;
84 
85             searchkey.obj_id = r->id;
86             searchkey.obj_type = TYPE_ROOT_BACKREF;
87             searchkey.offset = 0xffffffffffffffff;
88 
89             Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
90             if (!NT_SUCCESS(Status)) {
91                 ERR("find_item returned %08x\n", Status);
92                 goto end;
93             }
94 
95             if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
96                 ROOT_REF* rr = (ROOT_REF*)tp.item->data;
97                 path_part* pp;
98 
99                 if (tp.item->size < sizeof(ROOT_REF)) {
100                     ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
101                     goto end;
102                 }
103 
104                 if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) {
105                     ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
106                         tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n);
107                     goto end;
108                 }
109 
110                 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG);
111                 if (!pp) {
112                     ERR("out of memory\n");
113                     goto end;
114                 }
115 
116                 pp->name.Buffer = rr->name;
117                 pp->name.Length = pp->name.MaximumLength = rr->n;
118                 pp->orig_subvol = FALSE;
119 
120                 InsertTailList(&parts, &pp->list_entry);
121 
122                 r = NULL;
123 
124                 le = Vcb->roots.Flink;
125                 while (le != &Vcb->roots) {
126                     root* r2 = CONTAINING_RECORD(le, root, list_entry);
127 
128                     if (r2->id == tp.item->key.offset) {
129                         r = r2;
130                         break;
131                     }
132 
133                     le = le->Flink;
134                 }
135 
136                 if (!r) {
137                     ERR("could not find subvol %llx\n", tp.item->key.offset);
138                     goto end;
139                 }
140 
141                 dir = rr->dir;
142                 orig_subvol = FALSE;
143             } else {
144                 not_in_tree = TRUE;
145                 break;
146             }
147         } else {
148             searchkey.obj_id = dir;
149             searchkey.obj_type = TYPE_INODE_EXTREF;
150             searchkey.offset = 0xffffffffffffffff;
151 
152             Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL);
153             if (!NT_SUCCESS(Status)) {
154                 ERR("find_item returned %08x\n", Status);
155                 goto end;
156             }
157 
158             if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_REF) {
159                 INODE_REF* ir = (INODE_REF*)tp.item->data;
160                 path_part* pp;
161 
162                 if (tp.item->size < sizeof(INODE_REF)) {
163                     ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF));
164                     goto end;
165                 }
166 
167                 if (tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) {
168                     ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
169                         tp.item->size, offsetof(INODE_REF, name[0]) + ir->n);
170                     goto end;
171                 }
172 
173                 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG);
174                 if (!pp) {
175                     ERR("out of memory\n");
176                     goto end;
177                 }
178 
179                 pp->name.Buffer = ir->name;
180                 pp->name.Length = pp->name.MaximumLength = ir->n;
181                 pp->orig_subvol = orig_subvol;
182 
183                 InsertTailList(&parts, &pp->list_entry);
184 
185                 if (dir == tp.item->key.offset)
186                     break;
187 
188                 dir = tp.item->key.offset;
189             } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_EXTREF) {
190                 INODE_EXTREF* ier = (INODE_EXTREF*)tp.item->data;
191                 path_part* pp;
192 
193                 if (tp.item->size < sizeof(INODE_EXTREF)) {
194                     ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
195                                                                                tp.item->size, sizeof(INODE_EXTREF));
196                     goto end;
197                 }
198 
199                 if (tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) {
200                     ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
201                         tp.item->size, offsetof(INODE_EXTREF, name[0]) + ier->n);
202                     goto end;
203                 }
204 
205                 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG);
206                 if (!pp) {
207                     ERR("out of memory\n");
208                     goto end;
209                 }
210 
211                 pp->name.Buffer = ier->name;
212                 pp->name.Length = pp->name.MaximumLength = ier->n;
213                 pp->orig_subvol = orig_subvol;
214 
215                 InsertTailList(&parts, &pp->list_entry);
216 
217                 if (dir == ier->dir)
218                     break;
219 
220                 dir = ier->dir;
221             } else {
222                 ERR("could not find INODE_REF for inode %llx in subvol %llx\n", dir, r->id);
223                 goto end;
224             }
225         }
226     }
227 
228     fn.MaximumLength = 0;
229 
230     if (not_in_tree) {
231         le = parts.Blink;
232         while (le != &parts) {
233             path_part* pp = CONTAINING_RECORD(le, path_part, list_entry);
234             LIST_ENTRY* le2 = le->Blink;
235 
236             if (pp->orig_subvol)
237                 break;
238 
239             RemoveTailList(&parts);
240             ExFreePool(pp);
241 
242             le = le2;
243         }
244     }
245 
246     le = parts.Flink;
247     while (le != &parts) {
248         path_part* pp = CONTAINING_RECORD(le, path_part, list_entry);
249 
250         fn.MaximumLength += pp->name.Length + 1;
251 
252         le = le->Flink;
253     }
254 
255     fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG);
256     if (!fn.Buffer) {
257         ERR("out of memory\n");
258         goto end;
259     }
260 
261     fn.Length = 0;
262 
263     le = parts.Blink;
264     while (le != &parts) {
265         path_part* pp = CONTAINING_RECORD(le, path_part, list_entry);
266 
267         fn.Buffer[fn.Length] = '\\';
268         fn.Length++;
269 
270         RtlCopyMemory(&fn.Buffer[fn.Length], pp->name.Buffer, pp->name.Length);
271         fn.Length += pp->name.Length;
272 
273         le = le->Blink;
274     }
275 
276     if (not_in_tree)
277         ERR("subvol %llx, %.*s, offset %llx\n", subvol, fn.Length, fn.Buffer, offset);
278     else
279         ERR("%.*s, offset %llx\n", fn.Length, fn.Buffer, offset);
280 
281     Status = RtlUTF8ToUnicodeN(NULL, 0, &utf16len, fn.Buffer, fn.Length);
282     if (!NT_SUCCESS(Status)) {
283         ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
284         ExFreePool(fn.Buffer);
285         goto end;
286     }
287 
288     err = ExAllocatePoolWithTag(PagedPool, offsetof(scrub_error, data.filename[0]) + utf16len, ALLOC_TAG);
289     if (!err) {
290         ERR("out of memory\n");
291         ExFreePool(fn.Buffer);
292         goto end;
293     }
294 
295     err->address = addr;
296     err->device = devid;
297     err->recovered = FALSE;
298     err->is_metadata = FALSE;
299     err->parity = FALSE;
300 
301     err->data.subvol = not_in_tree ? subvol : 0;
302     err->data.offset = offset;
303     err->data.filename_length = (UINT16)utf16len;
304 
305     Status = RtlUTF8ToUnicodeN(err->data.filename, utf16len, &utf16len, fn.Buffer, fn.Length);
306     if (!NT_SUCCESS(Status)) {
307         ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status);
308         ExFreePool(fn.Buffer);
309         ExFreePool(err);
310         goto end;
311     }
312 
313     ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE);
314 
315     Vcb->scrub.num_errors++;
316     InsertTailList(&Vcb->scrub.errors, &err->list_entry);
317 
318     ExReleaseResourceLite(&Vcb->scrub.stats_lock);
319 
320     ExFreePool(fn.Buffer);
321 
322 end:
323     while (!IsListEmpty(&parts)) {
324         path_part* pp = CONTAINING_RECORD(RemoveHeadList(&parts), path_part, list_entry);
325 
326         ExFreePool(pp);
327     }
328 }
329 
330 static void log_file_checksum_error_shared(device_extension* Vcb, UINT64 treeaddr, UINT64 addr, UINT64 devid, UINT64 extent) {
331     tree_header* tree;
332     NTSTATUS Status;
333     leaf_node* ln;
334     ULONG i;
335 
336     tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
337     if (!tree) {
338         ERR("out of memory\n");
339         return;
340     }
341 
342     Status = read_data(Vcb, treeaddr, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)tree, NULL, NULL, NULL, 0, FALSE, NormalPagePriority);
343     if (!NT_SUCCESS(Status)) {
344         ERR("read_data returned %08x\n", Status);
345         goto end;
346     }
347 
348     if (tree->level != 0) {
349         ERR("tree level was %x, expected 0\n", tree->level);
350         goto end;
351     }
352 
353     ln = (leaf_node*)&tree[1];
354 
355     for (i = 0; i < tree->num_items; i++) {
356         if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
357             EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)tree + sizeof(tree_header) + ln[i].offset);
358             EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
359 
360             if (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0 && ed2->address == addr)
361                 log_file_checksum_error(Vcb, addr, devid, tree->tree_id, ln[i].key.obj_id, ln[i].key.offset + addr - extent);
362         }
363     }
364 
365 end:
366     ExFreePool(tree);
367 }
368 
369 static void log_tree_checksum_error(device_extension* Vcb, UINT64 addr, UINT64 devid, UINT64 root, UINT8 level, KEY* firstitem) {
370     scrub_error* err;
371 
372     err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG);
373     if (!err) {
374         ERR("out of memory\n");
375         return;
376     }
377 
378     err->address = addr;
379     err->device = devid;
380     err->recovered = FALSE;
381     err->is_metadata = TRUE;
382     err->parity = FALSE;
383 
384     err->metadata.root = root;
385     err->metadata.level = level;
386 
387     if (firstitem) {
388         ERR("root %llx, level %u, first item (%llx,%x,%llx)\n", root, level, firstitem->obj_id,
389                                                                 firstitem->obj_type, firstitem->offset);
390 
391         err->metadata.firstitem = *firstitem;
392     } else {
393         ERR("root %llx, level %u\n", root, level);
394 
395         RtlZeroMemory(&err->metadata.firstitem, sizeof(KEY));
396     }
397 
398     ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE);
399 
400     Vcb->scrub.num_errors++;
401     InsertTailList(&Vcb->scrub.errors, &err->list_entry);
402 
403     ExReleaseResourceLite(&Vcb->scrub.stats_lock);
404 }
405 
406 static void log_tree_checksum_error_shared(device_extension* Vcb, UINT64 offset, UINT64 address, UINT64 devid) {
407     tree_header* tree;
408     NTSTATUS Status;
409     internal_node* in;
410     ULONG i;
411 
412     tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
413     if (!tree) {
414         ERR("out of memory\n");
415         return;
416     }
417 
418     Status = read_data(Vcb, offset, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)tree, NULL, NULL, NULL, 0, FALSE, NormalPagePriority);
419     if (!NT_SUCCESS(Status)) {
420         ERR("read_data returned %08x\n", Status);
421         goto end;
422     }
423 
424     if (tree->level == 0) {
425         ERR("tree level was 0\n");
426         goto end;
427     }
428 
429     in = (internal_node*)&tree[1];
430 
431     for (i = 0; i < tree->num_items; i++) {
432         if (in[i].address == address) {
433             log_tree_checksum_error(Vcb, address, devid, tree->tree_id, tree->level - 1, &in[i].key);
434             break;
435         }
436     }
437 
438 end:
439     ExFreePool(tree);
440 }
441 
442 static void log_unrecoverable_error(device_extension* Vcb, UINT64 address, UINT64 devid) {
443     KEY searchkey;
444     traverse_ptr tp;
445     NTSTATUS Status;
446     EXTENT_ITEM* ei;
447     EXTENT_ITEM2* ei2 = NULL;
448     UINT8* ptr;
449     ULONG len;
450     UINT64 rc;
451 
452     // FIXME - still log even if rest of this function fails
453 
454     searchkey.obj_id = address;
455     searchkey.obj_type = TYPE_METADATA_ITEM;
456     searchkey.offset = 0xffffffffffffffff;
457 
458     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
459     if (!NT_SUCCESS(Status)) {
460         ERR("find_item returned %08x\n", Status);
461         return;
462     }
463 
464     if ((tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM) ||
465         tp.item->key.obj_id >= address + Vcb->superblock.sector_size ||
466         (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.obj_id + tp.item->key.offset <= address) ||
467         (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->key.obj_id + Vcb->superblock.node_size <= address)
468     )
469         return;
470 
471     if (tp.item->size < sizeof(EXTENT_ITEM)) {
472         ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
473         return;
474     }
475 
476     ei = (EXTENT_ITEM*)tp.item->data;
477     ptr = (UINT8*)&ei[1];
478     len = tp.item->size - sizeof(EXTENT_ITEM);
479 
480     if (tp.item->key.obj_id == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
481         if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
482             ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
483                                                                        tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
484             return;
485         }
486 
487         ei2 = (EXTENT_ITEM2*)ptr;
488 
489         ptr += sizeof(EXTENT_ITEM2);
490         len -= sizeof(EXTENT_ITEM2);
491     }
492 
493     rc = 0;
494 
495     while (len > 0) {
496         UINT8 type = *ptr;
497 
498         ptr++;
499         len--;
500 
501         if (type == TYPE_TREE_BLOCK_REF) {
502             TREE_BLOCK_REF* tbr;
503 
504             if (len < sizeof(TREE_BLOCK_REF)) {
505                 ERR("TREE_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(TREE_BLOCK_REF), len);
506                 break;
507             }
508 
509             tbr = (TREE_BLOCK_REF*)ptr;
510 
511             log_tree_checksum_error(Vcb, address, devid, tbr->offset, ei2 ? ei2->level : (UINT8)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
512 
513             rc++;
514 
515             ptr += sizeof(TREE_BLOCK_REF);
516             len -= sizeof(TREE_BLOCK_REF);
517         } else if (type == TYPE_EXTENT_DATA_REF) {
518             EXTENT_DATA_REF* edr;
519 
520             if (len < sizeof(EXTENT_DATA_REF)) {
521                 ERR("EXTENT_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(EXTENT_DATA_REF), len);
522                 break;
523             }
524 
525             edr = (EXTENT_DATA_REF*)ptr;
526 
527             log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
528 
529             rc += edr->count;
530 
531             ptr += sizeof(EXTENT_DATA_REF);
532             len -= sizeof(EXTENT_DATA_REF);
533         } else if (type == TYPE_SHARED_BLOCK_REF) {
534             SHARED_BLOCK_REF* sbr;
535 
536             if (len < sizeof(SHARED_BLOCK_REF)) {
537                 ERR("SHARED_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_BLOCK_REF), len);
538                 break;
539             }
540 
541             sbr = (SHARED_BLOCK_REF*)ptr;
542 
543             log_tree_checksum_error_shared(Vcb, sbr->offset, address, devid);
544 
545             rc++;
546 
547             ptr += sizeof(SHARED_BLOCK_REF);
548             len -= sizeof(SHARED_BLOCK_REF);
549         } else if (type == TYPE_SHARED_DATA_REF) {
550             SHARED_DATA_REF* sdr;
551 
552             if (len < sizeof(SHARED_DATA_REF)) {
553                 ERR("SHARED_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_DATA_REF), len);
554                 break;
555             }
556 
557             sdr = (SHARED_DATA_REF*)ptr;
558 
559             log_file_checksum_error_shared(Vcb, sdr->offset, address, devid, tp.item->key.obj_id);
560 
561             rc += sdr->count;
562 
563             ptr += sizeof(SHARED_DATA_REF);
564             len -= sizeof(SHARED_DATA_REF);
565         } else {
566             ERR("unknown extent type %x\n", type);
567             break;
568         }
569     }
570 
571     if (rc < ei->refcount) {
572         do {
573             traverse_ptr next_tp;
574 
575             if (find_next_item(Vcb, &tp, &next_tp, FALSE, NULL))
576                 tp = next_tp;
577             else
578                 break;
579 
580             if (tp.item->key.obj_id == address) {
581                 if (tp.item->key.obj_type == TYPE_TREE_BLOCK_REF)
582                     log_tree_checksum_error(Vcb, address, devid, tp.item->key.offset, ei2 ? ei2->level : (UINT8)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
583                 else if (tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) {
584                     EXTENT_DATA_REF* edr;
585 
586                     if (tp.item->size < sizeof(EXTENT_DATA_REF)) {
587                         ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
588                                                                           tp.item->size, sizeof(EXTENT_DATA_REF));
589                         break;
590                     }
591 
592                     edr = (EXTENT_DATA_REF*)tp.item->data;
593 
594                     log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
595                 } else if (tp.item->key.obj_type == TYPE_SHARED_BLOCK_REF)
596                     log_tree_checksum_error_shared(Vcb, tp.item->key.offset, address, devid);
597                 else if (tp.item->key.obj_type == TYPE_SHARED_DATA_REF)
598                     log_file_checksum_error_shared(Vcb, tp.item->key.offset, address, devid, tp.item->key.obj_id);
599             } else
600                 break;
601         } while (TRUE);
602     }
603 }
604 
605 static void log_error(device_extension* Vcb, UINT64 addr, UINT64 devid, BOOL metadata, BOOL recoverable, BOOL parity) {
606     if (recoverable) {
607         scrub_error* err;
608 
609         if (parity) {
610             ERR("recovering from parity error at %llx on device %llx\n", addr, devid);
611         } else {
612             if (metadata)
613                 ERR("recovering from metadata checksum error at %llx on device %llx\n", addr, devid);
614             else
615                 ERR("recovering from data checksum error at %llx on device %llx\n", addr, devid);
616         }
617 
618         err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG);
619         if (!err) {
620             ERR("out of memory\n");
621             return;
622         }
623 
624         err->address = addr;
625         err->device = devid;
626         err->recovered = TRUE;
627         err->is_metadata = metadata;
628         err->parity = parity;
629 
630         if (metadata)
631             RtlZeroMemory(&err->metadata, sizeof(err->metadata));
632         else
633             RtlZeroMemory(&err->data, sizeof(err->data));
634 
635         ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE);
636 
637         Vcb->scrub.num_errors++;
638         InsertTailList(&Vcb->scrub.errors, &err->list_entry);
639 
640         ExReleaseResourceLite(&Vcb->scrub.stats_lock);
641     } else {
642         if (metadata)
643             ERR("unrecoverable metadata checksum error at %llx\n", addr);
644         else
645             ERR("unrecoverable data checksum error at %llx\n", addr);
646 
647         log_unrecoverable_error(Vcb, addr, devid);
648     }
649 }
650 
651 _Function_class_(IO_COMPLETION_ROUTINE)
652 #ifdef __REACTOS__
653 static NTSTATUS NTAPI scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
654 #else
655 static NTSTATUS scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
656 #endif
657     scrub_context_stripe* stripe = conptr;
658     scrub_context* context = (scrub_context*)stripe->context;
659     ULONG left = InterlockedDecrement(&context->stripes_left);
660 
661     UNUSED(DeviceObject);
662 
663     stripe->iosb = Irp->IoStatus;
664 
665     if (left == 0)
666         KeSetEvent(&context->Event, 0, FALSE);
667 
668     return STATUS_MORE_PROCESSING_REQUIRED;
669 }
670 
671 static NTSTATUS scrub_extent_dup(device_extension* Vcb, chunk* c, UINT64 offset, UINT32* csum, scrub_context* context) {
672     NTSTATUS Status;
673     BOOL csum_error = FALSE;
674     ULONG i;
675     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
676     UINT16 present_devices = 0;
677 
678     if (csum) {
679         ULONG good_stripe = 0xffffffff;
680 
681         for (i = 0; i < c->chunk_item->num_stripes; i++) {
682             if (c->devices[i]->devobj) {
683                 present_devices++;
684 
685                 // if first stripe is okay, we only need to check that the others are identical to it
686                 if (good_stripe != 0xffffffff) {
687                     if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
688                                         context->stripes[good_stripe].length) != context->stripes[i].length) {
689                         context->stripes[i].csum_error = TRUE;
690                         csum_error = TRUE;
691                         log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
692                     }
693                 } else {
694                     Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, csum);
695                     if (Status == STATUS_CRC_ERROR) {
696                         context->stripes[i].csum_error = TRUE;
697                         csum_error = TRUE;
698                         log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
699                     } else if (!NT_SUCCESS(Status)) {
700                         ERR("check_csum returned %08x\n", Status);
701                         return Status;
702                     } else
703                         good_stripe = i;
704                 }
705             }
706         }
707     } else {
708         ULONG good_stripe = 0xffffffff;
709 
710         for (i = 0; i < c->chunk_item->num_stripes; i++) {
711             ULONG j;
712 
713             if (c->devices[i]->devobj) {
714                 // if first stripe is okay, we only need to check that the others are identical to it
715                 if (good_stripe != 0xffffffff) {
716                     if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
717                                          context->stripes[good_stripe].length) != context->stripes[i].length) {
718                         context->stripes[i].csum_error = TRUE;
719                         csum_error = TRUE;
720                         log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
721                     }
722                 } else {
723                     for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
724                         tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
725                         UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
726 
727                         if (crc32 != *((UINT32*)th->csum) || th->address != offset + UInt32x32To64(j, Vcb->superblock.node_size)) {
728                             context->stripes[i].csum_error = TRUE;
729                             csum_error = TRUE;
730                             log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
731                         }
732                     }
733 
734                     if (!context->stripes[i].csum_error)
735                         good_stripe = i;
736                 }
737             }
738         }
739     }
740 
741     if (!csum_error)
742         return STATUS_SUCCESS;
743 
744     // handle checksum error
745 
746     for (i = 0; i < c->chunk_item->num_stripes; i++) {
747         if (context->stripes[i].csum_error) {
748             if (csum) {
749                 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG);
750                 if (!context->stripes[i].bad_csums) {
751                     ERR("out of memory\n");
752                     return STATUS_INSUFFICIENT_RESOURCES;
753                 }
754 
755                 Status = calc_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, context->stripes[i].bad_csums);
756                 if (!NT_SUCCESS(Status)) {
757                     ERR("calc_csum returned %08x\n", Status);
758                     return Status;
759                 }
760             } else {
761                 ULONG j;
762 
763                 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(UINT32) / Vcb->superblock.node_size, ALLOC_TAG);
764                 if (!context->stripes[i].bad_csums) {
765                     ERR("out of memory\n");
766                     return STATUS_INSUFFICIENT_RESOURCES;
767                 }
768 
769                 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
770                     tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
771                     UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
772 
773                     context->stripes[i].bad_csums[j] = crc32;
774                 }
775             }
776         }
777     }
778 
779     if (present_devices > 1) {
780         ULONG good_stripe = 0xffffffff;
781 
782         for (i = 0; i < c->chunk_item->num_stripes; i++) {
783             if (c->devices[i]->devobj && !context->stripes[i].csum_error) {
784                 good_stripe = i;
785                 break;
786             }
787         }
788 
789         if (good_stripe != 0xffffffff) {
790             // log
791 
792             for (i = 0; i < c->chunk_item->num_stripes; i++) {
793                 if (context->stripes[i].csum_error) {
794                     ULONG j;
795 
796                     if (csum) {
797                         for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
798                             if (context->stripes[i].bad_csums[j] != csum[j]) {
799                                 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
800 
801                                 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, TRUE, FALSE);
802                                 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
803                             }
804                         }
805                     } else {
806                         for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
807                             tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
808                             UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
809 
810                             if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) {
811                                 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, TRUE, FALSE);
812                                 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
813                             }
814                         }
815                     }
816                 }
817             }
818 
819             // write good data over bad
820 
821             for (i = 0; i < c->chunk_item->num_stripes; i++) {
822                 if (context->stripes[i].csum_error && !c->devices[i]->readonly) {
823                     Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + offset - c->offset,
824                                              context->stripes[good_stripe].buf, context->stripes[i].length);
825 
826                     if (!NT_SUCCESS(Status)) {
827                         ERR("write_data_phys returned %08x\n", Status);
828                         log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS);
829                         return Status;
830                     }
831                 }
832             }
833 
834             return STATUS_SUCCESS;
835         }
836 
837         // if csum errors on all stripes, check sector by sector
838 
839         for (i = 0; i < c->chunk_item->num_stripes; i++) {
840             ULONG j;
841 
842             if (c->devices[i]->devobj) {
843                 if (csum) {
844                     for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
845                         if (context->stripes[i].bad_csums[j] != csum[j]) {
846                             ULONG k;
847                             UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
848                             BOOL recovered = FALSE;
849 
850                             for (k = 0; k < c->chunk_item->num_stripes; k++) {
851                                 if (i != k && c->devices[k]->devobj && context->stripes[k].bad_csums[j] == csum[j]) {
852                                     log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, TRUE, FALSE);
853                                     log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
854 
855                                     RtlCopyMemory(context->stripes[i].buf + (j * Vcb->superblock.sector_size),
856                                                   context->stripes[k].buf + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
857 
858                                     recovered = TRUE;
859                                     break;
860                                 }
861                             }
862 
863                             if (!recovered) {
864                                 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, FALSE, FALSE);
865                                 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
866                             }
867                         }
868                     }
869                 } else {
870                     for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
871                         tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
872                         UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
873 
874                         if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) {
875                             ULONG k;
876                             BOOL recovered = FALSE;
877 
878                             for (k = 0; k < c->chunk_item->num_stripes; k++) {
879                                 if (i != k && c->devices[k]->devobj) {
880                                     tree_header* th2 = (tree_header*)&context->stripes[k].buf[j * Vcb->superblock.node_size];
881 
882                                     if (context->stripes[k].bad_csums[j] == *((UINT32*)th2->csum) && th2->address == addr) {
883                                         log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, TRUE, FALSE);
884                                         log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
885 
886                                         RtlCopyMemory(th, th2, Vcb->superblock.node_size);
887 
888                                         recovered = TRUE;
889                                         break;
890                                     }
891                                 }
892                             }
893 
894                             if (!recovered) {
895                                 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, FALSE, FALSE);
896                                 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
897                             }
898                         }
899                     }
900                 }
901             }
902         }
903 
904         // write good data over bad
905 
906         for (i = 0; i < c->chunk_item->num_stripes; i++) {
907             if (c->devices[i]->devobj && !c->devices[i]->readonly) {
908                 Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + offset - c->offset,
909                                          context->stripes[i].buf, context->stripes[i].length);
910                 if (!NT_SUCCESS(Status)) {
911                     ERR("write_data_phys returned %08x\n", Status);
912                     log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
913                     return Status;
914                 }
915             }
916         }
917 
918         return STATUS_SUCCESS;
919     }
920 
921     for (i = 0; i < c->chunk_item->num_stripes; i++) {
922         if (c->devices[i]->devobj) {
923             ULONG j;
924 
925             if (csum) {
926                 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
927                     if (context->stripes[i].bad_csums[j] != csum[j]) {
928                         UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
929 
930                         log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, FALSE, FALSE);
931                     }
932                 }
933             } else {
934                 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
935                     tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
936                     UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
937 
938                     if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr)
939                         log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, FALSE, FALSE);
940                 }
941             }
942         }
943     }
944 
945     return STATUS_SUCCESS;
946 }
947 
948 static NTSTATUS scrub_extent_raid0(device_extension* Vcb, chunk* c, UINT64 offset, UINT32 length, UINT16 startoffstripe, UINT32* csum, scrub_context* context) {
949     ULONG j;
950     UINT16 stripe;
951     UINT32 pos, *stripeoff;
952 
953     pos = 0;
954     stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * c->chunk_item->num_stripes, ALLOC_TAG);
955     if (!stripeoff) {
956         ERR("out of memory\n");
957         return STATUS_INSUFFICIENT_RESOURCES;
958     }
959 
960     RtlZeroMemory(stripeoff, sizeof(UINT32) * c->chunk_item->num_stripes);
961 
962     stripe = startoffstripe;
963     while (pos < length) {
964         UINT32 readlen;
965 
966         if (pos == 0)
967             readlen = (UINT32)min(context->stripes[stripe].length, c->chunk_item->stripe_length - (context->stripes[stripe].start % c->chunk_item->stripe_length));
968         else
969             readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
970 
971         if (csum) {
972             for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
973                 UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + stripeoff[stripe], Vcb->superblock.sector_size);
974 
975                 if (crc32 != csum[pos / Vcb->superblock.sector_size]) {
976                     UINT64 addr = offset + pos;
977 
978                     log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
979                     log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
980                 }
981 
982                 pos += Vcb->superblock.sector_size;
983                 stripeoff[stripe] += Vcb->superblock.sector_size;
984             }
985         } else {
986             for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
987                 tree_header* th = (tree_header*)(context->stripes[stripe].buf + stripeoff[stripe]);
988                 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
989                 UINT64 addr = offset + pos;
990 
991                 if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
992                     log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
993                     log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
994                 }
995 
996                 pos += Vcb->superblock.node_size;
997                 stripeoff[stripe] += Vcb->superblock.node_size;
998             }
999         }
1000 
1001         stripe = (stripe + 1) % c->chunk_item->num_stripes;
1002     }
1003 
1004     ExFreePool(stripeoff);
1005 
1006     return STATUS_SUCCESS;
1007 }
1008 
1009 static NTSTATUS scrub_extent_raid10(device_extension* Vcb, chunk* c, UINT64 offset, UINT32 length, UINT16 startoffstripe, UINT32* csum, scrub_context* context) {
1010     ULONG j;
1011     UINT16 stripe, sub_stripes = max(c->chunk_item->sub_stripes, 1);
1012     UINT32 pos, *stripeoff;
1013     BOOL csum_error = FALSE;
1014     NTSTATUS Status;
1015 
1016     pos = 0;
1017     stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * c->chunk_item->num_stripes / sub_stripes, ALLOC_TAG);
1018     if (!stripeoff) {
1019         ERR("out of memory\n");
1020         return STATUS_INSUFFICIENT_RESOURCES;
1021     }
1022 
1023     RtlZeroMemory(stripeoff, sizeof(UINT32) * c->chunk_item->num_stripes / sub_stripes);
1024 
1025     stripe = startoffstripe;
1026     while (pos < length) {
1027         UINT32 readlen;
1028 
1029         if (pos == 0)
1030             readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length,
1031                                   c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1032         else
1033             readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
1034 
1035         if (csum) {
1036             ULONG good_stripe = 0xffffffff;
1037             UINT16 k;
1038 
1039             for (k = 0; k < sub_stripes; k++) {
1040                 if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1041                     // if first stripe is okay, we only need to check that the others are identical to it
1042                     if (good_stripe != 0xffffffff) {
1043                         if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1044                                             context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1045                                             readlen) != readlen) {
1046                             context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1047                             csum_error = TRUE;
1048                             log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1049                         }
1050                     } else {
1051                         for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
1052                             UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j, Vcb->superblock.sector_size);
1053 
1054                             if (crc32 != csum[(pos + j) / Vcb->superblock.sector_size]) {
1055                                 csum_error = TRUE;
1056                                 context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1057                                 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1058                                 break;
1059                             }
1060                         }
1061 
1062                         if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1063                             good_stripe = k;
1064                     }
1065                 }
1066             }
1067 
1068             pos += readlen;
1069             stripeoff[stripe] += readlen;
1070         } else {
1071             ULONG good_stripe = 0xffffffff;
1072             UINT16 k;
1073 
1074             for (k = 0; k < sub_stripes; k++) {
1075                 if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1076                     // if first stripe is okay, we only need to check that the others are identical to it
1077                     if (good_stripe != 0xffffffff) {
1078                         if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1079                                             context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1080                                             readlen) != readlen) {
1081                             context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1082                             csum_error = TRUE;
1083                             log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1084                         }
1085                     } else {
1086                         for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
1087                             tree_header* th = (tree_header*)(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j);
1088                             UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1089                             UINT64 addr = offset + pos + j;
1090 
1091                             if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
1092                                 csum_error = TRUE;
1093                                 context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1094                                 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1095                                 break;
1096                             }
1097                         }
1098 
1099                         if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1100                             good_stripe = k;
1101                     }
1102                 }
1103             }
1104 
1105             pos += readlen;
1106             stripeoff[stripe] += readlen;
1107         }
1108 
1109         stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1110     }
1111 
1112     if (!csum_error) {
1113         Status = STATUS_SUCCESS;
1114         goto end;
1115     }
1116 
1117     for (j = 0; j < c->chunk_item->num_stripes; j += sub_stripes) {
1118         ULONG goodstripe = 0xffffffff;
1119         UINT16 k;
1120         BOOL hasbadstripe = FALSE;
1121 
1122         if (context->stripes[j].length == 0)
1123             continue;
1124 
1125         for (k = 0; k < sub_stripes; k++) {
1126             if (c->devices[j + k]->devobj) {
1127                 if (!context->stripes[j + k].csum_error)
1128                     goodstripe = k;
1129                 else
1130                     hasbadstripe = TRUE;
1131             }
1132         }
1133 
1134         if (hasbadstripe) {
1135             if (goodstripe != 0xffffffff) {
1136                 for (k = 0; k < sub_stripes; k++) {
1137                     if (c->devices[j + k]->devobj && context->stripes[j + k].csum_error) {
1138                         UINT32 so = 0;
1139                         BOOL recovered = FALSE;
1140 
1141                         pos = 0;
1142 
1143                         stripe = startoffstripe;
1144                         while (pos < length) {
1145                             UINT32 readlen;
1146 
1147                             if (pos == 0)
1148                                 readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length,
1149                                               c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1150                             else
1151                                 readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
1152 
1153                             if (stripe == j / sub_stripes) {
1154                                 if (csum) {
1155                                     ULONG l;
1156 
1157                                     for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1158                                         if (RtlCompareMemory(context->stripes[j + k].buf + so,
1159                                                              context->stripes[j + goodstripe].buf + so,
1160                                                              Vcb->superblock.sector_size) != Vcb->superblock.sector_size) {
1161                                             UINT64 addr = offset + pos;
1162 
1163                                             log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, TRUE, FALSE);
1164 
1165                                             recovered = TRUE;
1166                                         }
1167 
1168                                         pos += Vcb->superblock.sector_size;
1169                                         so += Vcb->superblock.sector_size;
1170                                     }
1171                                 } else {
1172                                     ULONG l;
1173 
1174                                     for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1175                                         if (RtlCompareMemory(context->stripes[j + k].buf + so,
1176                                                             context->stripes[j + goodstripe].buf + so,
1177                                                             Vcb->superblock.node_size) != Vcb->superblock.node_size) {
1178                                             UINT64 addr = offset + pos;
1179 
1180                                             log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, TRUE, FALSE);
1181 
1182                                             recovered = TRUE;
1183                                         }
1184 
1185                                         pos += Vcb->superblock.node_size;
1186                                         so += Vcb->superblock.node_size;
1187                                     }
1188                                 }
1189                             } else
1190                                 pos += readlen;
1191 
1192                             stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1193                         }
1194 
1195                         if (recovered) {
1196                             // write good data over bad
1197 
1198                             if (!c->devices[j + k]->readonly) {
1199                                 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1200 
1201                                 Status = write_data_phys(c->devices[j + k]->devobj, cis[j + k].offset + offset - c->offset,
1202                                                          context->stripes[j + goodstripe].buf, context->stripes[j + goodstripe].length);
1203 
1204                                 if (!NT_SUCCESS(Status)) {
1205                                     ERR("write_data_phys returned %08x\n", Status);
1206                                     log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS);
1207                                     goto end;
1208                                 }
1209                             }
1210                         }
1211                     }
1212                 }
1213             } else {
1214                 UINT32 so = 0;
1215                 BOOL recovered = FALSE;
1216 
1217                 if (csum) {
1218                     for (k = 0; k < sub_stripes; k++) {
1219                         if (c->devices[j + k]->devobj) {
1220                             context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG);
1221                             if (!context->stripes[j + k].bad_csums) {
1222                                 ERR("out of memory\n");
1223                                 Status = STATUS_INSUFFICIENT_RESOURCES;
1224                                 goto end;
1225                             }
1226 
1227                             Status = calc_csum(Vcb, context->stripes[j + k].buf, context->stripes[j + k].length / Vcb->superblock.sector_size, context->stripes[j + k].bad_csums);
1228                             if (!NT_SUCCESS(Status)) {
1229                                 ERR("calc_csum returned %08x\n", Status);
1230                                 goto end;
1231                             }
1232                         }
1233                     }
1234                 } else {
1235                     for (k = 0; k < sub_stripes; k++) {
1236                         if (c->devices[j + k]->devobj) {
1237                             ULONG l;
1238 
1239                             context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(UINT32) / Vcb->superblock.node_size, ALLOC_TAG);
1240                             if (!context->stripes[j + k].bad_csums) {
1241                                 ERR("out of memory\n");
1242                                 Status = STATUS_INSUFFICIENT_RESOURCES;
1243                                 goto end;
1244                             }
1245 
1246                             for (l = 0; l < context->stripes[j + k].length / Vcb->superblock.node_size; l++) {
1247                                 tree_header* th = (tree_header*)&context->stripes[j + k].buf[l * Vcb->superblock.node_size];
1248                                 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1249 
1250                                 context->stripes[j + k].bad_csums[l] = crc32;
1251                             }
1252                         }
1253                     }
1254                 }
1255 
1256                 pos = 0;
1257 
1258                 stripe = startoffstripe;
1259                 while (pos < length) {
1260                     UINT32 readlen;
1261 
1262                     if (pos == 0)
1263                         readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length,
1264                                       c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1265                     else
1266                         readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
1267 
1268                     if (stripe == j / sub_stripes) {
1269                         ULONG l;
1270 
1271                         if (csum) {
1272                             for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1273                                 UINT32 crc32 = csum[pos / Vcb->superblock.sector_size];
1274                                 BOOL has_error = FALSE;
1275 
1276                                 goodstripe = 0xffffffff;
1277                                 for (k = 0; k < sub_stripes; k++) {
1278                                     if (c->devices[j + k]->devobj) {
1279                                         if (context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32)
1280                                             has_error = TRUE;
1281                                         else
1282                                             goodstripe = k;
1283                                     }
1284                                 }
1285 
1286                                 if (has_error) {
1287                                     if (goodstripe != 0xffffffff) {
1288                                         for (k = 0; k < sub_stripes; k++) {
1289                                             if (c->devices[j + k]->devobj && context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32) {
1290                                                 UINT64 addr = offset + pos;
1291 
1292                                                 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, TRUE, FALSE);
1293 
1294                                                 recovered = TRUE;
1295 
1296                                                 RtlCopyMemory(context->stripes[j + k].buf + so, context->stripes[j + goodstripe].buf + so,
1297                                                               Vcb->superblock.sector_size);
1298                                             }
1299                                         }
1300                                     } else {
1301                                         UINT64 addr = offset + pos;
1302 
1303                                         for (k = 0; k < sub_stripes; k++) {
1304                                             if (c->devices[j + j]->devobj) {
1305                                                 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, FALSE, FALSE);
1306                                                 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1307                                             }
1308                                         }
1309                                     }
1310                                 }
1311 
1312                                 pos += Vcb->superblock.sector_size;
1313                                 so += Vcb->superblock.sector_size;
1314                             }
1315                         } else {
1316                             for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1317                                 for (k = 0; k < sub_stripes; k++) {
1318                                     if (c->devices[j + k]->devobj) {
1319                                         tree_header* th = (tree_header*)&context->stripes[j + k].buf[so];
1320                                         UINT64 addr = offset + pos;
1321 
1322                                         if (context->stripes[j + k].bad_csums[so / Vcb->superblock.node_size] != *((UINT32*)th->csum) || th->address != addr) {
1323                                             ULONG m;
1324 
1325                                             recovered = FALSE;
1326 
1327                                             for (m = 0; m < sub_stripes; m++) {
1328                                                 if (m != k) {
1329                                                     tree_header* th2 = (tree_header*)&context->stripes[j + m].buf[so];
1330 
1331                                                     if (context->stripes[j + m].bad_csums[so / Vcb->superblock.node_size] == *((UINT32*)th2->csum) && th2->address == addr) {
1332                                                         log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, TRUE, FALSE);
1333 
1334                                                         RtlCopyMemory(th, th2, Vcb->superblock.node_size);
1335 
1336                                                         recovered = TRUE;
1337                                                         break;
1338                                                     } else
1339                                                         log_device_error(Vcb, c->devices[j + m], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1340                                                 }
1341                                             }
1342 
1343                                             if (!recovered)
1344                                                 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, FALSE, FALSE);
1345                                         }
1346                                     }
1347                                 }
1348 
1349                                 pos += Vcb->superblock.node_size;
1350                                 so += Vcb->superblock.node_size;
1351                             }
1352                         }
1353                     } else
1354                         pos += readlen;
1355 
1356                     stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1357                 }
1358 
1359                 if (recovered) {
1360                     // write good data over bad
1361 
1362                     for (k = 0; k < sub_stripes; k++) {
1363                         if (c->devices[j + k]->devobj && !c->devices[j + k]->readonly) {
1364                             CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1365 
1366                             Status = write_data_phys(c->devices[j + k]->devobj, cis[j + k].offset + offset - c->offset,
1367                                                      context->stripes[j + k].buf, context->stripes[j + k].length);
1368 
1369                             if (!NT_SUCCESS(Status)) {
1370                                 ERR("write_data_phys returned %08x\n", Status);
1371                                 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS);
1372                                 goto end;
1373                             }
1374                         }
1375                     }
1376                 }
1377             }
1378         }
1379     }
1380 
1381     Status = STATUS_SUCCESS;
1382 
1383 end:
1384     ExFreePool(stripeoff);
1385 
1386     return Status;
1387 }
1388 
1389 static NTSTATUS scrub_extent(device_extension* Vcb, chunk* c, ULONG type, UINT64 offset, UINT32 size, UINT32* csum) {
1390     ULONG i;
1391     scrub_context context;
1392     CHUNK_ITEM_STRIPE* cis;
1393     NTSTATUS Status;
1394     UINT16 startoffstripe, num_missing, allowed_missing;
1395 
1396     TRACE("(%p, %p, %llx, %llx, %p)\n", Vcb, c, offset, size, csum);
1397 
1398     context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1399     if (!context.stripes) {
1400         ERR("out of memory\n");
1401         Status = STATUS_INSUFFICIENT_RESOURCES;
1402         goto end;
1403     }
1404 
1405     RtlZeroMemory(context.stripes, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes);
1406 
1407     context.stripes_left = 0;
1408 
1409     cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1410 
1411     if (type == BLOCK_FLAG_RAID0) {
1412         UINT64 startoff, endoff;
1413         UINT16 endoffstripe;
1414 
1415         get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
1416         get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
1417 
1418         for (i = 0; i < c->chunk_item->num_stripes; i++) {
1419             if (startoffstripe > i)
1420                 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1421             else if (startoffstripe == i)
1422                 context.stripes[i].start = startoff;
1423             else
1424                 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1425 
1426             if (endoffstripe > i)
1427                 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1428             else if (endoffstripe == i)
1429                 context.stripes[i].length = (UINT32)(endoff + 1 - context.stripes[i].start);
1430             else
1431                 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1432         }
1433 
1434         allowed_missing = 0;
1435     } else if (type == BLOCK_FLAG_RAID10) {
1436         UINT64 startoff, endoff;
1437         UINT16 endoffstripe, j, sub_stripes = max(c->chunk_item->sub_stripes, 1);
1438 
1439         get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
1440         get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
1441 
1442         if ((c->chunk_item->num_stripes % sub_stripes) != 0) {
1443             ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", c->offset, c->chunk_item->num_stripes, sub_stripes);
1444             Status = STATUS_INTERNAL_ERROR;
1445             goto end;
1446         }
1447 
1448         startoffstripe *= sub_stripes;
1449         endoffstripe *= sub_stripes;
1450 
1451         for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
1452             if (startoffstripe > i)
1453                 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1454             else if (startoffstripe == i)
1455                 context.stripes[i].start = startoff;
1456             else
1457                 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1458 
1459             if (endoffstripe > i)
1460                 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1461             else if (endoffstripe == i)
1462                 context.stripes[i].length = (UINT32)(endoff + 1 - context.stripes[i].start);
1463             else
1464                 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1465 
1466             for (j = 1; j < sub_stripes; j++) {
1467                 context.stripes[i+j].start = context.stripes[i].start;
1468                 context.stripes[i+j].length = context.stripes[i].length;
1469             }
1470         }
1471 
1472         startoffstripe /= sub_stripes;
1473         allowed_missing = 1;
1474     } else
1475         allowed_missing = c->chunk_item->num_stripes - 1;
1476 
1477     num_missing = 0;
1478 
1479     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1480         PIO_STACK_LOCATION IrpSp;
1481 
1482         context.stripes[i].context = (struct _scrub_context*)&context;
1483 
1484         if (type == BLOCK_FLAG_DUPLICATE) {
1485             context.stripes[i].start = offset - c->offset;
1486             context.stripes[i].length = size;
1487         } else if (type != BLOCK_FLAG_RAID0 && type != BLOCK_FLAG_RAID10) {
1488             ERR("unexpected chunk type %x\n", type);
1489             Status = STATUS_INTERNAL_ERROR;
1490             goto end;
1491         }
1492 
1493         if (!c->devices[i]->devobj) {
1494             num_missing++;
1495 
1496             if (num_missing > allowed_missing) {
1497                 ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing, allowed_missing);
1498                 Status = STATUS_INTERNAL_ERROR;
1499                 goto end;
1500             }
1501         } else if (context.stripes[i].length > 0) {
1502             context.stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG);
1503 
1504             if (!context.stripes[i].buf) {
1505                 ERR("out of memory\n");
1506                 Status = STATUS_INSUFFICIENT_RESOURCES;
1507                 goto end;
1508             }
1509 
1510             context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, FALSE);
1511 
1512             if (!context.stripes[i].Irp) {
1513                 ERR("IoAllocateIrp failed\n");
1514                 Status = STATUS_INSUFFICIENT_RESOURCES;
1515                 goto end;
1516             }
1517 
1518             IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
1519             IrpSp->MajorFunction = IRP_MJ_READ;
1520 
1521             if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
1522                 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG);
1523                 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
1524                     ERR("out of memory\n");
1525                     Status = STATUS_INSUFFICIENT_RESOURCES;
1526                     goto end;
1527                 }
1528 
1529                 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
1530 
1531                 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1532             } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
1533                 context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, context.stripes[i].length, FALSE, FALSE, NULL);
1534                 if (!context.stripes[i].Irp->MdlAddress) {
1535                     ERR("IoAllocateMdl failed\n");
1536                     Status = STATUS_INSUFFICIENT_RESOURCES;
1537                     goto end;
1538                 }
1539 
1540                 Status = STATUS_SUCCESS;
1541 
1542                 _SEH2_TRY {
1543                     MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
1544                 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1545                     Status = _SEH2_GetExceptionCode();
1546                 } _SEH2_END;
1547 
1548                 if (!NT_SUCCESS(Status)) {
1549                     ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1550                     IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1551                     context.stripes[i].Irp->MdlAddress = NULL;
1552                     goto end;
1553                 }
1554             } else
1555                 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1556 
1557             IrpSp->Parameters.Read.Length = context.stripes[i].length;
1558             IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].start + cis[i].offset;
1559 
1560             context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
1561 
1562             IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion, &context.stripes[i], TRUE, TRUE, TRUE);
1563 
1564             context.stripes_left++;
1565 
1566             Vcb->scrub.data_scrubbed += context.stripes[i].length;
1567         }
1568     }
1569 
1570     if (context.stripes_left == 0) {
1571         ERR("error - not reading any stripes\n");
1572         Status = STATUS_INTERNAL_ERROR;
1573         goto end;
1574     }
1575 
1576     KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
1577 
1578     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1579         if (c->devices[i]->devobj && context.stripes[i].length > 0)
1580             IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
1581     }
1582 
1583     KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
1584 
1585     // return an error if any of the stripes returned an error
1586     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1587         if (!NT_SUCCESS(context.stripes[i].iosb.Status)) {
1588             Status = context.stripes[i].iosb.Status;
1589             log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS);
1590             goto end;
1591         }
1592     }
1593 
1594     if (type == BLOCK_FLAG_DUPLICATE) {
1595         Status = scrub_extent_dup(Vcb, c, offset, csum, &context);
1596         if (!NT_SUCCESS(Status)) {
1597             ERR("scrub_extent_dup returned %08x\n", Status);
1598             goto end;
1599         }
1600     } else if (type == BLOCK_FLAG_RAID0) {
1601         Status = scrub_extent_raid0(Vcb, c, offset, size, startoffstripe, csum, &context);
1602         if (!NT_SUCCESS(Status)) {
1603             ERR("scrub_extent_raid0 returned %08x\n", Status);
1604             goto end;
1605         }
1606     } else if (type == BLOCK_FLAG_RAID10) {
1607         Status = scrub_extent_raid10(Vcb, c, offset, size, startoffstripe, csum, &context);
1608         if (!NT_SUCCESS(Status)) {
1609             ERR("scrub_extent_raid10 returned %08x\n", Status);
1610             goto end;
1611         }
1612     }
1613 
1614 end:
1615     if (context.stripes) {
1616         for (i = 0; i < c->chunk_item->num_stripes; i++) {
1617             if (context.stripes[i].Irp) {
1618                 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
1619                     MmUnlockPages(context.stripes[i].Irp->MdlAddress);
1620                     IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1621                 }
1622                 IoFreeIrp(context.stripes[i].Irp);
1623             }
1624 
1625             if (context.stripes[i].buf)
1626                 ExFreePool(context.stripes[i].buf);
1627 
1628             if (context.stripes[i].bad_csums)
1629                 ExFreePool(context.stripes[i].bad_csums);
1630         }
1631 
1632         ExFreePool(context.stripes);
1633     }
1634 
1635     return Status;
1636 }
1637 
1638 static NTSTATUS scrub_data_extent(device_extension* Vcb, chunk* c, UINT64 offset, ULONG type, UINT32* csum, RTL_BITMAP* bmp) {
1639     NTSTATUS Status;
1640     ULONG runlength, index;
1641 
1642     runlength = RtlFindFirstRunClear(bmp, &index);
1643 
1644     while (runlength != 0) {
1645         do {
1646             ULONG rl;
1647 
1648             if (runlength * Vcb->superblock.sector_size > SCRUB_UNIT)
1649                 rl = SCRUB_UNIT / Vcb->superblock.sector_size;
1650             else
1651                 rl = runlength;
1652 
1653             Status = scrub_extent(Vcb, c, type, offset + UInt32x32To64(index, Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index]);
1654             if (!NT_SUCCESS(Status)) {
1655                 ERR("scrub_data_extent_dup returned %08x\n", Status);
1656                 return Status;
1657             }
1658 
1659             runlength -= rl;
1660             index += rl;
1661         } while (runlength > 0);
1662 
1663         runlength = RtlFindNextForwardRunClear(bmp, index, &index);
1664     }
1665 
1666     return STATUS_SUCCESS;
1667 }
1668 
1669 typedef struct {
1670     UINT8* buf;
1671     PIRP Irp;
1672     void* context;
1673     IO_STATUS_BLOCK iosb;
1674     UINT64 offset;
1675     BOOL rewrite, missing;
1676     RTL_BITMAP error;
1677     ULONG* errorarr;
1678 } scrub_context_raid56_stripe;
1679 
1680 typedef struct {
1681     scrub_context_raid56_stripe* stripes;
1682     LONG stripes_left;
1683     KEVENT Event;
1684     RTL_BITMAP alloc;
1685     RTL_BITMAP has_csum;
1686     RTL_BITMAP is_tree;
1687     UINT32* csum;
1688     UINT8* parity_scratch;
1689     UINT8* parity_scratch2;
1690 } scrub_context_raid56;
1691 
1692 _Function_class_(IO_COMPLETION_ROUTINE)
1693 #ifdef __REACTOS__
1694 static NTSTATUS NTAPI scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1695 #else
1696 static NTSTATUS scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1697 #endif
1698     scrub_context_raid56_stripe* stripe = conptr;
1699     scrub_context_raid56* context = (scrub_context_raid56*)stripe->context;
1700     LONG left = InterlockedDecrement(&context->stripes_left);
1701 
1702     UNUSED(DeviceObject);
1703 
1704     stripe->iosb = Irp->IoStatus;
1705 
1706     if (left == 0)
1707         KeSetEvent(&context->Event, 0, FALSE);
1708 
1709     return STATUS_MORE_PROCESSING_REQUIRED;
1710 }
1711 
1712 static void scrub_raid5_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, UINT64 stripe_start, UINT64 bit_start,
1713                                UINT64 num, UINT16 missing_devices) {
1714     ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off;
1715     UINT16 stripe, parity = (bit_start + num + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
1716     UINT64 stripeoff;
1717 
1718     stripe = (parity + 1) % c->chunk_item->num_stripes;
1719     off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1);
1720     stripeoff = num * sectors_per_stripe;
1721 
1722     if (missing_devices == 0)
1723         RtlCopyMemory(context->parity_scratch, &context->stripes[parity].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1724 
1725     while (stripe != parity) {
1726         RtlClearAllBits(&context->stripes[stripe].error);
1727 
1728         for (i = 0; i < sectors_per_stripe; i++) {
1729             if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1730                 if (RtlCheckBit(&context->is_tree, off)) {
1731                     tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size];
1732                     UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1733                     UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1734 
1735                     if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
1736                         RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size);
1737                         log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1738 
1739                         if (missing_devices > 0)
1740                             log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
1741                     }
1742 
1743                     off += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1744                     stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1745                     i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1;
1746 
1747                     continue;
1748                 } else if (RtlCheckBit(&context->has_csum, off)) {
1749                     UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1750 
1751                     if (crc32 != context->csum[off]) {
1752                         RtlSetBit(&context->stripes[stripe].error, i);
1753                         log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1754 
1755                         if (missing_devices > 0) {
1756                             UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1757 
1758                             log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
1759                         }
1760                     }
1761                 }
1762             }
1763 
1764             off++;
1765             stripeoff++;
1766         }
1767 
1768         if (missing_devices == 0)
1769             do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1770 
1771         stripe = (stripe + 1) % c->chunk_item->num_stripes;
1772         stripeoff = num * sectors_per_stripe;
1773     }
1774 
1775     // check parity
1776 
1777     if (missing_devices == 0) {
1778         RtlClearAllBits(&context->stripes[parity].error);
1779 
1780         for (i = 0; i < sectors_per_stripe; i++) {
1781             ULONG o, j;
1782 
1783             o = i * Vcb->superblock.sector_size;
1784             for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1785                 if (context->parity_scratch[o] != 0) {
1786                     RtlSetBit(&context->stripes[parity].error, i);
1787                     break;
1788                 }
1789                 o++;
1790             }
1791         }
1792     }
1793 
1794     // log and fix errors
1795 
1796     if (missing_devices > 0)
1797         return;
1798 
1799     for (i = 0; i < sectors_per_stripe; i++) {
1800         ULONG num_errors = 0, bad_off;
1801         UINT64 bad_stripe;
1802         BOOL alloc = FALSE;
1803 
1804         stripe = (parity + 1) % c->chunk_item->num_stripes;
1805         off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1806 
1807         while (stripe != parity) {
1808             if (RtlCheckBit(&context->alloc, off)) {
1809                 alloc = TRUE;
1810 
1811                 if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1812                     bad_stripe = stripe;
1813                     bad_off = off;
1814                     num_errors++;
1815                 }
1816             }
1817 
1818             off += sectors_per_stripe;
1819             stripe = (stripe + 1) % c->chunk_item->num_stripes;
1820         }
1821 
1822         if (!alloc)
1823             continue;
1824 
1825         if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity].error, i)) // everything fine
1826             continue;
1827 
1828         if (num_errors == 0 && RtlCheckBit(&context->stripes[parity].error, i)) { // parity error
1829             UINT64 addr;
1830 
1831             do_xor(&context->stripes[parity].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1832                    &context->parity_scratch[i * Vcb->superblock.sector_size],
1833                    Vcb->superblock.sector_size);
1834 
1835             bad_off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1836             addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size);
1837 
1838             context->stripes[parity].rewrite = TRUE;
1839 
1840             log_error(Vcb, addr, c->devices[parity]->devitem.dev_id, FALSE, TRUE, TRUE);
1841             log_device_error(Vcb, c->devices[parity], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1842         } else if (num_errors == 1) {
1843             UINT32 crc32;
1844             UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size);
1845 
1846             if (RtlCheckBit(&context->is_tree, bad_off)) {
1847                 tree_header* th;
1848 
1849                 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
1850                        &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1851                        Vcb->superblock.node_size);
1852 
1853                 th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
1854                 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1855 
1856                 if (crc32 == *((UINT32*)th->csum) && th->address == addr) {
1857                     RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1858                                   &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
1859 
1860                     context->stripes[bad_stripe].rewrite = TRUE;
1861 
1862                     RtlClearBits(&context->stripes[bad_stripe].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
1863 
1864                     log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, TRUE, TRUE, FALSE);
1865                 } else
1866                     log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
1867             } else {
1868                 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
1869                        &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1870                        Vcb->superblock.sector_size);
1871 
1872                 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
1873 
1874                 if (crc32 == context->csum[bad_off]) {
1875                     RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1876                                   &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
1877 
1878                     context->stripes[bad_stripe].rewrite = TRUE;
1879 
1880                     log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, FALSE, TRUE, FALSE);
1881                 } else
1882                     log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
1883             }
1884         } else {
1885             stripe = (parity + 1) % c->chunk_item->num_stripes;
1886             off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1887 
1888             while (stripe != parity) {
1889                 if (RtlCheckBit(&context->alloc, off)) {
1890                     if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1891                         UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1892 
1893                         log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), FALSE, FALSE);
1894                     }
1895                 }
1896 
1897                 off += sectors_per_stripe;
1898                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1899             }
1900         }
1901     }
1902 }
1903 
1904 static void scrub_raid6_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, UINT64 stripe_start, UINT64 bit_start,
1905                                UINT64 num, UINT16 missing_devices) {
1906     ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off;
1907     UINT16 stripe, parity1 = (bit_start + num + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
1908     UINT16 parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1909     UINT64 stripeoff;
1910 
1911     stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1912     off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2);
1913     stripeoff = num * sectors_per_stripe;
1914 
1915     if (c->devices[parity1]->devobj)
1916         RtlCopyMemory(context->parity_scratch, &context->stripes[parity1].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1917 
1918     if (c->devices[parity2]->devobj)
1919         RtlZeroMemory(context->parity_scratch2, (ULONG)c->chunk_item->stripe_length);
1920 
1921     while (stripe != parity1) {
1922         RtlClearAllBits(&context->stripes[stripe].error);
1923 
1924         for (i = 0; i < sectors_per_stripe; i++) {
1925             if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1926                 if (RtlCheckBit(&context->is_tree, off)) {
1927                     tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size];
1928                     UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1929                     UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1930 
1931                     if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
1932                         RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size);
1933                         log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1934 
1935                         if (missing_devices == 2)
1936                             log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
1937                     }
1938 
1939                     off += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1940                     stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1941                     i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1;
1942 
1943                     continue;
1944                 } else if (RtlCheckBit(&context->has_csum, off)) {
1945                     UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1946 
1947                     if (crc32 != context->csum[off]) {
1948                         UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1949 
1950                         RtlSetBit(&context->stripes[stripe].error, i);
1951                         log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1952 
1953                         if (missing_devices == 2)
1954                             log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
1955                     }
1956                 }
1957             }
1958 
1959             off++;
1960             stripeoff++;
1961         }
1962 
1963         if (c->devices[parity1]->devobj)
1964             do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (UINT32)c->chunk_item->stripe_length);
1965 
1966         stripe = (stripe + 1) % c->chunk_item->num_stripes;
1967         stripeoff = num * sectors_per_stripe;
1968     }
1969 
1970     RtlClearAllBits(&context->stripes[parity1].error);
1971 
1972     if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity2]->devobj)) {
1973         // check parity 1
1974 
1975         for (i = 0; i < sectors_per_stripe; i++) {
1976             ULONG o, j;
1977 
1978             o = i * Vcb->superblock.sector_size;
1979             for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1980                 if (context->parity_scratch[o] != 0) {
1981                     RtlSetBit(&context->stripes[parity1].error, i);
1982                     break;
1983                 }
1984                 o++;
1985             }
1986         }
1987     }
1988 
1989     RtlClearAllBits(&context->stripes[parity2].error);
1990 
1991     if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity1]->devobj)) {
1992         // check parity 2
1993 
1994         stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
1995 
1996         while (stripe != parity2) {
1997             galois_double(context->parity_scratch2, (UINT32)c->chunk_item->stripe_length);
1998             do_xor(context->parity_scratch2, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (UINT32)c->chunk_item->stripe_length);
1999 
2000             stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2001         }
2002 
2003         for (i = 0; i < sectors_per_stripe; i++) {
2004             if (RtlCompareMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2005                                 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size) != Vcb->superblock.sector_size)
2006                 RtlSetBit(&context->stripes[parity2].error, i);
2007         }
2008     }
2009 
2010     if (missing_devices == 2)
2011         return;
2012 
2013     // log and fix errors
2014 
2015     for (i = 0; i < sectors_per_stripe; i++) {
2016         ULONG num_errors = 0;
2017         UINT64 bad_stripe1, bad_stripe2;
2018         ULONG bad_off1, bad_off2;
2019         BOOL alloc = FALSE;
2020 
2021         stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2022         off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2023 
2024         while (stripe != parity1) {
2025             if (RtlCheckBit(&context->alloc, off)) {
2026                 alloc = TRUE;
2027 
2028                 if (!c->devices[stripe]->devobj || RtlCheckBit(&context->stripes[stripe].error, i)) {
2029                     if (num_errors == 0) {
2030                         bad_stripe1 = stripe;
2031                         bad_off1 = off;
2032                     } else if (num_errors == 1) {
2033                         bad_stripe2 = stripe;
2034                         bad_off2 = off;
2035                     }
2036                     num_errors++;
2037                 }
2038             }
2039 
2040             off += sectors_per_stripe;
2041             stripe = (stripe + 1) % c->chunk_item->num_stripes;
2042         }
2043 
2044         if (!alloc)
2045             continue;
2046 
2047         if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity1].error, i) && !RtlCheckBit(&context->stripes[parity2].error, i)) // everything fine
2048             continue;
2049 
2050         if (num_errors == 0) { // parity error
2051             UINT64 addr;
2052 
2053             if (RtlCheckBit(&context->stripes[parity1].error, i)) {
2054                 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2055                        &context->parity_scratch[i * Vcb->superblock.sector_size],
2056                        Vcb->superblock.sector_size);
2057 
2058                 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2059                 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2060 
2061                 context->stripes[parity1].rewrite = TRUE;
2062 
2063                 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE);
2064                 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2065             }
2066 
2067             if (RtlCheckBit(&context->stripes[parity2].error, i)) {
2068                 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2069                               &context->parity_scratch2[i * Vcb->superblock.sector_size],
2070                               Vcb->superblock.sector_size);
2071 
2072                 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2073                 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2074 
2075                 context->stripes[parity2].rewrite = TRUE;
2076 
2077                 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE);
2078                 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2079             }
2080         } else if (num_errors == 1) {
2081             UINT32 crc32a, crc32b, len;
2082             UINT16 stripe_num, bad_stripe_num;
2083             UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2084             UINT8* scratch;
2085 
2086             len = RtlCheckBit(&context->is_tree, bad_off1)? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2087 
2088             scratch = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
2089             if (!scratch) {
2090                 ERR("out of memory\n");
2091                 return;
2092             }
2093 
2094             RtlZeroMemory(scratch, len);
2095 
2096             do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
2097                    &context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2098 
2099             stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2100 
2101             if (c->devices[parity2]->devobj) {
2102                 stripe_num = c->chunk_item->num_stripes - 3;
2103                 while (stripe != parity2) {
2104                     galois_double(scratch, len);
2105 
2106                     if (stripe != bad_stripe1)
2107                         do_xor(scratch, &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2108                     else
2109                         bad_stripe_num = stripe_num;
2110 
2111                     stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2112                     stripe_num--;
2113                 }
2114 
2115                 do_xor(scratch, &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2116 
2117                 if (bad_stripe_num != 0)
2118                     galois_divpower(scratch, (UINT8)bad_stripe_num, len);
2119             }
2120 
2121             if (RtlCheckBit(&context->is_tree, bad_off1)) {
2122                 tree_header *th1 = NULL, *th2 = NULL;
2123 
2124                 if (c->devices[parity1]->devobj) {
2125                     th1 = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
2126                     crc32a = ~calc_crc32c(0xffffffff, (UINT8*)&th1->fs_uuid, Vcb->superblock.node_size - sizeof(th1->csum));
2127                 }
2128 
2129                 if (c->devices[parity2]->devobj) {
2130                     th2 = (tree_header*)scratch;
2131                     crc32b = ~calc_crc32c(0xffffffff, (UINT8*)&th2->fs_uuid, Vcb->superblock.node_size - sizeof(th2->csum));
2132                 }
2133 
2134                 if ((c->devices[parity1]->devobj && crc32a == *((UINT32*)th1->csum) && th1->address == addr) ||
2135                     (c->devices[parity2]->devobj && crc32b == *((UINT32*)th2->csum) && th2->address == addr)) {
2136                     if (!c->devices[parity1]->devobj || crc32a != *((UINT32*)th1->csum) || th1->address != addr) {
2137                         RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2138                                       scratch, Vcb->superblock.node_size);
2139 
2140                         if (c->devices[parity1]->devobj) {
2141                             // fix parity 1
2142 
2143                             stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2144 
2145                             RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2146                                           &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2147                                           Vcb->superblock.node_size);
2148 
2149                             stripe = (stripe + 1) % c->chunk_item->num_stripes;
2150 
2151                             while (stripe != parity1) {
2152                                 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2153                                        &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2154                                        Vcb->superblock.node_size);
2155 
2156                                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
2157                             }
2158 
2159                             context->stripes[parity1].rewrite = TRUE;
2160 
2161                             log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE);
2162                             log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2163                         }
2164                     } else {
2165                         RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2166                                       &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2167 
2168                         if (!c->devices[parity2]->devobj || crc32b != *((UINT32*)th2->csum) || th2->address != addr) {
2169                             // fix parity 2
2170                             stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2171 
2172                             if (c->devices[parity2]->devobj) {
2173                                 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2174                                             &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2175                                             Vcb->superblock.node_size);
2176 
2177                                 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2178 
2179                                 while (stripe != parity2) {
2180                                     galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.node_size);
2181 
2182                                     do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2183                                         &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2184                                         Vcb->superblock.node_size);
2185 
2186                                     stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2187                                 }
2188 
2189                                 context->stripes[parity2].rewrite = TRUE;
2190 
2191                                 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE);
2192                                 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2193                             }
2194                         }
2195                     }
2196 
2197                     context->stripes[bad_stripe1].rewrite = TRUE;
2198 
2199                     RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2200 
2201                     log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, TRUE, FALSE);
2202                 } else
2203                     log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, FALSE, FALSE);
2204             } else {
2205                 if (c->devices[parity1]->devobj)
2206                     crc32a = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2207 
2208                 if (c->devices[parity2]->devobj)
2209                     crc32b = ~calc_crc32c(0xffffffff, scratch, Vcb->superblock.sector_size);
2210 
2211                 if ((c->devices[parity1]->devobj && crc32a == context->csum[bad_off1]) || (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1])) {
2212                     if (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1]) {
2213                         RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2214                                       scratch, Vcb->superblock.sector_size);
2215 
2216                         if (c->devices[parity1]->devobj && crc32a != context->csum[bad_off1]) {
2217                             // fix parity 1
2218 
2219                             stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2220 
2221                             RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2222                                         &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2223                                         Vcb->superblock.sector_size);
2224 
2225                             stripe = (stripe + 1) % c->chunk_item->num_stripes;
2226 
2227                             while (stripe != parity1) {
2228                                 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2229                                     &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2230                                     Vcb->superblock.sector_size);
2231 
2232                                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
2233                             }
2234 
2235                             context->stripes[parity1].rewrite = TRUE;
2236 
2237                             log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE);
2238                             log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2239                         }
2240                     } else {
2241                         RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2242                                       &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2243 
2244                         if (c->devices[parity2]->devobj && crc32b != context->csum[bad_off1]) {
2245                             // fix parity 2
2246                             stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2247 
2248                             RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2249                                         &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2250                                         Vcb->superblock.sector_size);
2251 
2252                             stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2253 
2254                             while (stripe != parity2) {
2255                                 galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size);
2256 
2257                                 do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2258                                        &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2259                                        Vcb->superblock.sector_size);
2260 
2261                                 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2262                             }
2263 
2264                             context->stripes[parity2].rewrite = TRUE;
2265 
2266                             log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE);
2267                             log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2268                         }
2269                     }
2270 
2271                     context->stripes[bad_stripe1].rewrite = TRUE;
2272 
2273                     log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, TRUE, FALSE);
2274                 } else
2275                     log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, FALSE, FALSE);
2276             }
2277 
2278             ExFreePool(scratch);
2279         } else if (num_errors == 2 && missing_devices == 0) {
2280             UINT16 x, y, k;
2281             UINT64 addr;
2282             UINT32 len = (RtlCheckBit(&context->is_tree, bad_off1) || RtlCheckBit(&context->is_tree, bad_off2)) ? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2283             UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
2284             UINT32 j;
2285 
2286             stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2287 
2288             // put qxy in parity_scratch
2289             // put pxy in parity_scratch2
2290 
2291             k = c->chunk_item->num_stripes - 3;
2292             if (stripe == bad_stripe1 || stripe == bad_stripe2) {
2293                 RtlZeroMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], len);
2294                 RtlZeroMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], len);
2295 
2296                 if (stripe == bad_stripe1)
2297                     x = k;
2298                 else
2299                     y = k;
2300             } else {
2301                 RtlCopyMemory(&context->parity_scratch[i * Vcb->superblock.sector_size],
2302                               &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2303                 RtlCopyMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size],
2304                               &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2305             }
2306 
2307             stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2308 
2309             k--;
2310             do {
2311                 galois_double(&context->parity_scratch[i * Vcb->superblock.sector_size], len);
2312 
2313                 if (stripe != bad_stripe1 && stripe != bad_stripe2) {
2314                     do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
2315                            &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2316                     do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size],
2317                            &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2318                 } else if (stripe == bad_stripe1)
2319                     x = k;
2320                 else if (stripe == bad_stripe2)
2321                     y = k;
2322 
2323                 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2324                 k--;
2325             } while (stripe != parity2);
2326 
2327             gyx = gpow2(y > x ? (y-x) : (255-x+y));
2328             gx = gpow2(255-x);
2329 
2330             denom = gdiv(1, gyx ^ 1);
2331             a = gmul(gyx, denom);
2332             b = gmul(gx, denom);
2333 
2334             p = &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)];
2335             q = &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)];
2336             pxy = &context->parity_scratch2[i * Vcb->superblock.sector_size];
2337             qxy = &context->parity_scratch[i * Vcb->superblock.sector_size];
2338 
2339             for (j = 0; j < len; j++) {
2340                 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
2341 
2342                 p++;
2343                 q++;
2344                 pxy++;
2345                 qxy++;
2346             }
2347 
2348             do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->parity_scratch[i * Vcb->superblock.sector_size], len);
2349             do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2350 
2351             addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2352 
2353             if (RtlCheckBit(&context->is_tree, bad_off1)) {
2354                 tree_header* th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
2355                 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
2356 
2357                 if (crc32 == *((UINT32*)th->csum) && th->address == addr) {
2358                     RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2359                                   &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2360 
2361                     context->stripes[bad_stripe1].rewrite = TRUE;
2362 
2363                     RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2364 
2365                     log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, TRUE, FALSE);
2366                 } else
2367                     log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, FALSE, FALSE);
2368             } else {
2369                 UINT32 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2370 
2371                 if (crc32 == context->csum[bad_off1]) {
2372                     RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2373                                   &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2374 
2375                     context->stripes[bad_stripe1].rewrite = TRUE;
2376 
2377                     log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, TRUE, FALSE);
2378                 } else
2379                     log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, FALSE, FALSE);
2380             }
2381 
2382             addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off2 * Vcb->superblock.sector_size);
2383 
2384             if (RtlCheckBit(&context->is_tree, bad_off2)) {
2385                 tree_header* th = (tree_header*)&context->parity_scratch2[i * Vcb->superblock.sector_size];
2386                 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
2387 
2388                 if (crc32 == *((UINT32*)th->csum) && th->address == addr) {
2389                     RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2390                                   &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2391 
2392                     context->stripes[bad_stripe2].rewrite = TRUE;
2393 
2394                     RtlClearBits(&context->stripes[bad_stripe2].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2395 
2396                     log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, TRUE, TRUE, FALSE);
2397                 } else
2398                     log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, TRUE, FALSE, FALSE);
2399             } else {
2400                 UINT32 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2401 
2402                 if (crc32 == context->csum[bad_off2]) {
2403                     RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2404                                   &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2405 
2406                     context->stripes[bad_stripe2].rewrite = TRUE;
2407 
2408                     log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, FALSE, TRUE, FALSE);
2409                 } else
2410                     log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, FALSE, FALSE, FALSE);
2411             }
2412         } else {
2413             stripe = (parity2 + 1) % c->chunk_item->num_stripes;
2414             off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2415 
2416             while (stripe != parity1) {
2417                 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
2418                     if (RtlCheckBit(&context->stripes[stripe].error, i)) {
2419                         UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
2420 
2421                         log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), FALSE, FALSE);
2422                     }
2423                 }
2424 
2425                 off += sectors_per_stripe;
2426                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
2427             }
2428         }
2429     }
2430 }
2431 
2432 static NTSTATUS scrub_chunk_raid56_stripe_run(device_extension* Vcb, chunk* c, UINT64 stripe_start, UINT64 stripe_end) {
2433     NTSTATUS Status;
2434     KEY searchkey;
2435     traverse_ptr tp;
2436     BOOL b;
2437     UINT64 run_start, run_end, full_stripe_len, stripe;
2438     UINT32 max_read, num_sectors;
2439     ULONG arrlen, *allocarr, *csumarr = NULL, *treearr, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2440     scrub_context_raid56 context;
2441     UINT16 i;
2442     CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
2443 
2444     TRACE("(%p, %p, %llx, %llx)\n", Vcb, c, stripe_start, stripe_end);
2445 
2446     full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2447     run_start = c->offset + (stripe_start * full_stripe_len);
2448     run_end = c->offset + ((stripe_end + 1) * full_stripe_len);
2449 
2450     searchkey.obj_id = run_start;
2451     searchkey.obj_type = TYPE_METADATA_ITEM;
2452     searchkey.offset = 0xffffffffffffffff;
2453 
2454     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
2455     if (!NT_SUCCESS(Status)) {
2456         ERR("find_item returned %08x\n", Status);
2457         return Status;
2458     }
2459 
2460     num_sectors = (UINT32)((stripe_end - stripe_start + 1) * full_stripe_len / Vcb->superblock.sector_size);
2461     arrlen = (ULONG)sector_align((num_sectors / 8) + 1, sizeof(ULONG));
2462 
2463     allocarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2464     if (!allocarr) {
2465         ERR("out of memory\n");
2466         return STATUS_INSUFFICIENT_RESOURCES;
2467     }
2468 
2469     treearr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2470     if (!treearr) {
2471         ERR("out of memory\n");
2472         ExFreePool(allocarr);
2473         return STATUS_INSUFFICIENT_RESOURCES;
2474     }
2475 
2476     RtlInitializeBitMap(&context.alloc, allocarr, num_sectors);
2477     RtlClearAllBits(&context.alloc);
2478 
2479     RtlInitializeBitMap(&context.is_tree, treearr, num_sectors);
2480     RtlClearAllBits(&context.is_tree);
2481 
2482     context.parity_scratch = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG);
2483     if (!context.parity_scratch) {
2484         ERR("out of memory\n");
2485         ExFreePool(allocarr);
2486         ExFreePool(treearr);
2487         return STATUS_INSUFFICIENT_RESOURCES;
2488     }
2489 
2490     if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2491         csumarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2492         if (!csumarr) {
2493             ERR("out of memory\n");
2494             ExFreePool(allocarr);
2495             ExFreePool(treearr);
2496             ExFreePool(context.parity_scratch);
2497             return STATUS_INSUFFICIENT_RESOURCES;
2498         }
2499 
2500         RtlInitializeBitMap(&context.has_csum, csumarr, num_sectors);
2501         RtlClearAllBits(&context.has_csum);
2502 
2503         context.csum = ExAllocatePoolWithTag(PagedPool, num_sectors * sizeof(UINT32), ALLOC_TAG);
2504         if (!context.csum) {
2505             ERR("out of memory\n");
2506             ExFreePool(allocarr);
2507             ExFreePool(treearr);
2508             ExFreePool(context.parity_scratch);
2509             ExFreePool(csumarr);
2510             return STATUS_INSUFFICIENT_RESOURCES;
2511         }
2512     }
2513 
2514     if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2515         context.parity_scratch2 = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG);
2516         if (!context.parity_scratch2) {
2517             ERR("out of memory\n");
2518             ExFreePool(allocarr);
2519             ExFreePool(treearr);
2520             ExFreePool(context.parity_scratch);
2521 
2522             if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2523                 ExFreePool(csumarr);
2524                 ExFreePool(context.csum);
2525             }
2526 
2527             return STATUS_INSUFFICIENT_RESOURCES;
2528         }
2529     }
2530 
2531     do {
2532         traverse_ptr next_tp;
2533 
2534         if (tp.item->key.obj_id >= run_end)
2535             break;
2536 
2537         if (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM) {
2538             UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2539 
2540             if (tp.item->key.obj_id + size > run_start) {
2541                 UINT64 extent_start = max(run_start, tp.item->key.obj_id);
2542                 UINT64 extent_end = min(tp.item->key.obj_id + size, run_end);
2543                 BOOL extent_is_tree = FALSE;
2544 
2545                 RtlSetBits(&context.alloc, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size));
2546 
2547                 if (tp.item->key.obj_type == TYPE_METADATA_ITEM)
2548                     extent_is_tree = TRUE;
2549                 else {
2550                     EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2551 
2552                     if (tp.item->size < sizeof(EXTENT_ITEM)) {
2553                         ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2554                         Status = STATUS_INTERNAL_ERROR;
2555                         goto end;
2556                     }
2557 
2558                     if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
2559                         extent_is_tree = TRUE;
2560                 }
2561 
2562                 if (extent_is_tree)
2563                     RtlSetBits(&context.is_tree, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size));
2564                 else if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2565                     traverse_ptr tp2;
2566                     BOOL b2;
2567 
2568                     searchkey.obj_id = EXTENT_CSUM_ID;
2569                     searchkey.obj_type = TYPE_EXTENT_CSUM;
2570                     searchkey.offset = extent_start;
2571 
2572                     Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, FALSE, NULL);
2573                     if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
2574                         ERR("find_item returned %08x\n", Status);
2575                         goto end;
2576                     }
2577 
2578                     do {
2579                         traverse_ptr next_tp2;
2580 
2581                         if (tp2.item->key.offset >= extent_end)
2582                             break;
2583 
2584                         if (tp2.item->key.offset >= extent_start) {
2585                             UINT64 csum_start = max(extent_start, tp2.item->key.offset);
2586                             UINT64 csum_end = min(extent_end, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32)));
2587 
2588                             RtlSetBits(&context.has_csum, (ULONG)((csum_start - run_start) / Vcb->superblock.sector_size), (ULONG)((csum_end - csum_start) / Vcb->superblock.sector_size));
2589 
2590                             RtlCopyMemory(&context.csum[(csum_start - run_start) / Vcb->superblock.sector_size],
2591                                           tp2.item->data + ((csum_start - tp2.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size),
2592                                           (ULONG)((csum_end - csum_start) * sizeof(UINT32) / Vcb->superblock.sector_size));
2593                         }
2594 
2595                         b2 = find_next_item(Vcb, &tp2, &next_tp2, FALSE, NULL);
2596 
2597                         if (b2)
2598                             tp2 = next_tp2;
2599                     } while (b2);
2600                 }
2601             }
2602         }
2603 
2604         b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
2605 
2606         if (b)
2607             tp = next_tp;
2608     } while (b);
2609 
2610     context.stripes = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_context_raid56_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
2611     if (!context.stripes) {
2612         ERR("out of memory\n");
2613         Status = STATUS_INSUFFICIENT_RESOURCES;
2614         goto end;
2615     }
2616 
2617     max_read = (UINT32)min(1048576 / c->chunk_item->stripe_length, stripe_end - stripe_start + 1); // only process 1 MB of data at a time
2618 
2619     for (i = 0; i < c->chunk_item->num_stripes; i++) {
2620         context.stripes[i].buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(max_read * c->chunk_item->stripe_length), ALLOC_TAG);
2621         if (!context.stripes[i].buf) {
2622             UINT64 j;
2623 
2624             ERR("out of memory\n");
2625 
2626             for (j = 0; j < i; j++) {
2627                 ExFreePool(context.stripes[j].buf);
2628             }
2629             ExFreePool(context.stripes);
2630 
2631             Status = STATUS_INSUFFICIENT_RESOURCES;
2632             goto end;
2633         }
2634 
2635         context.stripes[i].errorarr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align(((c->chunk_item->stripe_length / Vcb->superblock.sector_size) / 8) + 1, sizeof(ULONG)), ALLOC_TAG);
2636         if (!context.stripes[i].errorarr) {
2637             UINT64 j;
2638 
2639             ERR("out of memory\n");
2640 
2641             ExFreePool(context.stripes[i].buf);
2642 
2643             for (j = 0; j < i; j++) {
2644                 ExFreePool(context.stripes[j].buf);
2645             }
2646             ExFreePool(context.stripes);
2647 
2648             Status = STATUS_INSUFFICIENT_RESOURCES;
2649             goto end;
2650         }
2651 
2652         RtlInitializeBitMap(&context.stripes[i].error, context.stripes[i].errorarr, (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size));
2653 
2654         context.stripes[i].context = &context;
2655         context.stripes[i].rewrite = FALSE;
2656     }
2657 
2658     stripe = stripe_start;
2659 
2660     Status = STATUS_SUCCESS;
2661 
2662     chunk_lock_range(Vcb, c, run_start, run_end - run_start);
2663 
2664     do {
2665         ULONG read_stripes;
2666         UINT16 missing_devices = 0;
2667         BOOL need_wait = FALSE;
2668 
2669         if (max_read < stripe_end + 1 - stripe)
2670             read_stripes = max_read;
2671         else
2672             read_stripes = (ULONG)(stripe_end + 1 - stripe);
2673 
2674         context.stripes_left = c->chunk_item->num_stripes;
2675 
2676         // read megabyte by megabyte
2677         for (i = 0; i < c->chunk_item->num_stripes; i++) {
2678             if (c->devices[i]->devobj) {
2679                 PIO_STACK_LOCATION IrpSp;
2680 
2681                 context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, FALSE);
2682 
2683                 if (!context.stripes[i].Irp) {
2684                     ERR("IoAllocateIrp failed\n");
2685                     Status = STATUS_INSUFFICIENT_RESOURCES;
2686                     goto end3;
2687                 }
2688 
2689                 context.stripes[i].Irp->MdlAddress = NULL;
2690 
2691                 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2692                 IrpSp->MajorFunction = IRP_MJ_READ;
2693 
2694                 if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2695                     context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(read_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
2696                     if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2697                         ERR("out of memory\n");
2698                         Status = STATUS_INSUFFICIENT_RESOURCES;
2699                         goto end3;
2700                     }
2701 
2702                     context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
2703 
2704                     context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2705                 } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
2706                     context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, (ULONG)(read_stripes * c->chunk_item->stripe_length), FALSE, FALSE, NULL);
2707                     if (!context.stripes[i].Irp->MdlAddress) {
2708                         ERR("IoAllocateMdl failed\n");
2709                         Status = STATUS_INSUFFICIENT_RESOURCES;
2710                         goto end3;
2711                     }
2712 
2713                     Status = STATUS_SUCCESS;
2714 
2715                     _SEH2_TRY {
2716                         MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
2717                     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2718                         Status = _SEH2_GetExceptionCode();
2719                     } _SEH2_END;
2720 
2721                     if (!NT_SUCCESS(Status)) {
2722                         ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2723                         IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2724                         goto end3;
2725                     }
2726                 } else
2727                     context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2728 
2729                 context.stripes[i].offset = stripe * c->chunk_item->stripe_length;
2730 
2731                 IrpSp->Parameters.Read.Length = (ULONG)(read_stripes * c->chunk_item->stripe_length);
2732                 IrpSp->Parameters.Read.ByteOffset.QuadPart = cis[i].offset + context.stripes[i].offset;
2733 
2734                 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2735                 context.stripes[i].missing = FALSE;
2736 
2737                 IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion_raid56, &context.stripes[i], TRUE, TRUE, TRUE);
2738 
2739                 Vcb->scrub.data_scrubbed += read_stripes * c->chunk_item->stripe_length;
2740                 need_wait = TRUE;
2741             } else {
2742                 context.stripes[i].Irp = NULL;
2743                 context.stripes[i].missing = TRUE;
2744                 missing_devices++;
2745                 InterlockedDecrement(&context.stripes_left);
2746             }
2747         }
2748 
2749         if (c->chunk_item->type & BLOCK_FLAG_RAID5 && missing_devices > 1) {
2750             ERR("too many missing devices (%u, maximum 1)\n", missing_devices);
2751             Status = STATUS_UNEXPECTED_IO_ERROR;
2752             goto end3;
2753         } else if (c->chunk_item->type & BLOCK_FLAG_RAID6 && missing_devices > 2) {
2754             ERR("too many missing devices (%u, maximum 2)\n", missing_devices);
2755             Status = STATUS_UNEXPECTED_IO_ERROR;
2756             goto end3;
2757         }
2758 
2759         if (need_wait) {
2760             KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
2761 
2762             for (i = 0; i < c->chunk_item->num_stripes; i++) {
2763                 if (c->devices[i]->devobj)
2764                     IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
2765             }
2766 
2767             KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
2768         }
2769 
2770         // return an error if any of the stripes returned an error
2771         for (i = 0; i < c->chunk_item->num_stripes; i++) {
2772             if (!context.stripes[i].missing && !NT_SUCCESS(context.stripes[i].iosb.Status)) {
2773                 Status = context.stripes[i].iosb.Status;
2774                 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS);
2775                 goto end3;
2776             }
2777         }
2778 
2779         if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2780             for (i = 0; i < read_stripes; i++) {
2781                 scrub_raid6_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2782             }
2783         } else {
2784             for (i = 0; i < read_stripes; i++) {
2785                 scrub_raid5_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2786             }
2787         }
2788         stripe += read_stripes;
2789 
2790 end3:
2791         for (i = 0; i < c->chunk_item->num_stripes; i++) {
2792             if (context.stripes[i].Irp) {
2793                 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
2794                     MmUnlockPages(context.stripes[i].Irp->MdlAddress);
2795                     IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2796                 }
2797                 IoFreeIrp(context.stripes[i].Irp);
2798                 context.stripes[i].Irp = NULL;
2799 
2800                 if (context.stripes[i].rewrite) {
2801                     Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + context.stripes[i].offset,
2802                                              context.stripes[i].buf, (UINT32)(read_stripes * c->chunk_item->stripe_length));
2803 
2804                     if (!NT_SUCCESS(Status)) {
2805                         ERR("write_data_phys returned %08x\n", Status);
2806                         log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS);
2807                         goto end2;
2808                     }
2809                 }
2810             }
2811         }
2812 
2813         if (!NT_SUCCESS(Status))
2814             break;
2815     } while (stripe < stripe_end);
2816 
2817 end2:
2818     chunk_unlock_range(Vcb, c, run_start, run_end - run_start);
2819 
2820     for (i = 0; i < c->chunk_item->num_stripes; i++) {
2821         ExFreePool(context.stripes[i].buf);
2822         ExFreePool(context.stripes[i].errorarr);
2823     }
2824     ExFreePool(context.stripes);
2825 
2826 end:
2827     ExFreePool(treearr);
2828     ExFreePool(allocarr);
2829     ExFreePool(context.parity_scratch);
2830 
2831     if (c->chunk_item->type & BLOCK_FLAG_RAID6)
2832         ExFreePool(context.parity_scratch2);
2833 
2834     if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2835         ExFreePool(csumarr);
2836         ExFreePool(context.csum);
2837     }
2838 
2839     return Status;
2840 }
2841 
2842 static NTSTATUS scrub_chunk_raid56(device_extension* Vcb, chunk* c, UINT64* offset, BOOL* changed) {
2843     NTSTATUS Status;
2844     KEY searchkey;
2845     traverse_ptr tp;
2846     BOOL b;
2847     UINT64 full_stripe_len, stripe, stripe_start, stripe_end, total_data = 0;
2848     ULONG num_extents = 0, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2849 
2850     full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2851     stripe = (*offset - c->offset) / full_stripe_len;
2852 
2853     *offset = c->offset + (stripe * full_stripe_len);
2854 
2855     searchkey.obj_id = *offset;
2856     searchkey.obj_type = TYPE_METADATA_ITEM;
2857     searchkey.offset = 0xffffffffffffffff;
2858 
2859     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
2860     if (!NT_SUCCESS(Status)) {
2861         ERR("find_item returned %08x\n", Status);
2862         return Status;
2863     }
2864 
2865     *changed = FALSE;
2866 
2867     do {
2868         traverse_ptr next_tp;
2869 
2870         if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2871             break;
2872 
2873         if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
2874             UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2875 
2876             TRACE("%llx\n", tp.item->key.obj_id);
2877 
2878             if (size < Vcb->superblock.sector_size) {
2879                 ERR("extent %llx has size less than sector_size (%llx < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size);
2880                 return STATUS_INTERNAL_ERROR;
2881             }
2882 
2883             stripe = (tp.item->key.obj_id - c->offset) / full_stripe_len;
2884 
2885             if (*changed) {
2886                 if (stripe > stripe_end + 1) {
2887                     Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2888                     if (!NT_SUCCESS(Status)) {
2889                         ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status);
2890                         return Status;
2891                     }
2892 
2893                     stripe_start = stripe;
2894                 }
2895             } else
2896                 stripe_start = stripe;
2897 
2898             stripe_end = (tp.item->key.obj_id + size - 1 - c->offset) / full_stripe_len;
2899 
2900             *changed = TRUE;
2901 
2902             total_data += size;
2903             num_extents++;
2904 
2905             // only do so much at a time
2906             if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
2907                 break;
2908         }
2909 
2910         b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
2911 
2912         if (b)
2913             tp = next_tp;
2914     } while (b);
2915 
2916     if (*changed) {
2917         Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2918         if (!NT_SUCCESS(Status)) {
2919             ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status);
2920             return Status;
2921         }
2922 
2923         *offset = c->offset + ((stripe_end + 1) * full_stripe_len);
2924     }
2925 
2926     return STATUS_SUCCESS;
2927 }
2928 
2929 static NTSTATUS scrub_chunk(device_extension* Vcb, chunk* c, UINT64* offset, BOOL* changed) {
2930     NTSTATUS Status;
2931     KEY searchkey;
2932     traverse_ptr tp;
2933     BOOL b = FALSE, tree_run = FALSE;
2934     ULONG type, num_extents = 0;
2935     UINT64 total_data = 0, tree_run_start, tree_run_end;
2936 
2937     TRACE("chunk %llx\n", c->offset);
2938 
2939     ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
2940 
2941     if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
2942         type = BLOCK_FLAG_DUPLICATE;
2943     else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
2944         type = BLOCK_FLAG_RAID0;
2945     else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
2946         type = BLOCK_FLAG_DUPLICATE;
2947     else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
2948         type = BLOCK_FLAG_RAID10;
2949     else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2950         Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2951         goto end;
2952     } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2953         Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2954         goto end;
2955     } else // SINGLE
2956         type = BLOCK_FLAG_DUPLICATE;
2957 
2958     searchkey.obj_id = *offset;
2959     searchkey.obj_type = TYPE_METADATA_ITEM;
2960     searchkey.offset = 0xffffffffffffffff;
2961 
2962     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
2963     if (!NT_SUCCESS(Status)) {
2964         ERR("error - find_item returned %08x\n", Status);
2965         goto end;
2966     }
2967 
2968     do {
2969         traverse_ptr next_tp;
2970 
2971         if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2972             break;
2973 
2974         if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
2975             UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2976             BOOL is_tree;
2977             UINT32* csum = NULL;
2978             RTL_BITMAP bmp;
2979             ULONG* bmparr = NULL;
2980 
2981             TRACE("%llx\n", tp.item->key.obj_id);
2982 
2983             is_tree = FALSE;
2984 
2985             if (tp.item->key.obj_type == TYPE_METADATA_ITEM)
2986                 is_tree = TRUE;
2987             else {
2988                 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2989 
2990                 if (tp.item->size < sizeof(EXTENT_ITEM)) {
2991                     ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2992                     Status = STATUS_INTERNAL_ERROR;
2993                     goto end;
2994                 }
2995 
2996                 if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
2997                     is_tree = TRUE;
2998             }
2999 
3000             if (size < Vcb->superblock.sector_size) {
3001                 ERR("extent %llx has size less than sector_size (%llx < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size);
3002                 Status = STATUS_INTERNAL_ERROR;
3003                 goto end;
3004             }
3005 
3006             // load csum
3007             if (!is_tree) {
3008                 traverse_ptr tp2;
3009 
3010                 csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(sizeof(UINT32) * size / Vcb->superblock.sector_size), ALLOC_TAG);
3011                 if (!csum) {
3012                     ERR("out of memory\n");
3013                     Status = STATUS_INSUFFICIENT_RESOURCES;
3014                     goto end;
3015                 }
3016 
3017                 bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)(sector_align(((size / Vcb->superblock.sector_size) >> 3) + 1, sizeof(ULONG))), ALLOC_TAG);
3018                 if (!bmparr) {
3019                     ERR("out of memory\n");
3020                     ExFreePool(csum);
3021                     Status = STATUS_INSUFFICIENT_RESOURCES;
3022                     goto end;
3023                 }
3024 
3025                 RtlInitializeBitMap(&bmp, bmparr, (ULONG)(size / Vcb->superblock.sector_size));
3026                 RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum
3027 
3028                 searchkey.obj_id = EXTENT_CSUM_ID;
3029                 searchkey.obj_type = TYPE_EXTENT_CSUM;
3030                 searchkey.offset = tp.item->key.obj_id;
3031 
3032                 Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, FALSE, NULL);
3033                 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
3034                     ERR("find_item returned %08x\n", Status);
3035                     ExFreePool(csum);
3036                     ExFreePool(bmparr);
3037                     goto end;
3038                 }
3039 
3040                 if (Status != STATUS_NOT_FOUND) {
3041                     do {
3042                         traverse_ptr next_tp2;
3043 
3044                         if (tp2.item->key.obj_type == TYPE_EXTENT_CSUM) {
3045                             if (tp2.item->key.offset >= tp.item->key.obj_id + size)
3046                                 break;
3047                             else if (tp2.item->size >= sizeof(UINT32) && tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= tp.item->key.obj_id) {
3048                                 UINT64 cs = max(tp.item->key.obj_id, tp2.item->key.offset);
3049                                 UINT64 ce = min(tp.item->key.obj_id + size, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32)));
3050 
3051                                 RtlCopyMemory(csum + ((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size),
3052                                               tp2.item->data + ((cs - tp2.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size),
3053                                               (ULONG)((ce - cs) * sizeof(UINT32) / Vcb->superblock.sector_size));
3054 
3055                                 RtlClearBits(&bmp, (ULONG)((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size));
3056 
3057                                 if (ce == tp.item->key.obj_id + size)
3058                                     break;
3059                             }
3060                         }
3061 
3062                         if (find_next_item(Vcb, &tp2, &next_tp2, FALSE, NULL))
3063                             tp2 = next_tp2;
3064                         else
3065                             break;
3066                     } while (TRUE);
3067                 }
3068             }
3069 
3070             if (tree_run) {
3071                 if (!is_tree || tp.item->key.obj_id > tree_run_end) {
3072                     Status = scrub_extent(Vcb, c, type, tree_run_start, (UINT32)(tree_run_end - tree_run_start), NULL);
3073                     if (!NT_SUCCESS(Status)) {
3074                         ERR("scrub_extent returned %08x\n", Status);
3075                         goto end;
3076                     }
3077 
3078                     if (!is_tree)
3079                         tree_run = FALSE;
3080                     else {
3081                         tree_run_start = tp.item->key.obj_id;
3082                         tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3083                     }
3084                 } else
3085                     tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3086             } else if (is_tree) {
3087                 tree_run = TRUE;
3088                 tree_run_start = tp.item->key.obj_id;
3089                 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3090             }
3091 
3092             if (!is_tree) {
3093                 Status = scrub_data_extent(Vcb, c, tp.item->key.obj_id, type, csum, &bmp);
3094                 if (!NT_SUCCESS(Status)) {
3095                     ERR("scrub_data_extent returned %08x\n", Status);
3096                     ExFreePool(csum);
3097                     ExFreePool(bmparr);
3098                     goto end;
3099                 }
3100 
3101                 ExFreePool(csum);
3102                 ExFreePool(bmparr);
3103             }
3104 
3105             *offset = tp.item->key.obj_id + size;
3106             *changed = TRUE;
3107 
3108             total_data += size;
3109             num_extents++;
3110 
3111             // only do so much at a time
3112             if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
3113                 break;
3114         }
3115 
3116         b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
3117 
3118         if (b)
3119             tp = next_tp;
3120     } while (b);
3121 
3122     if (tree_run) {
3123         Status = scrub_extent(Vcb, c, type, tree_run_start, (UINT32)(tree_run_end - tree_run_start), NULL);
3124         if (!NT_SUCCESS(Status)) {
3125             ERR("scrub_extent returned %08x\n", Status);
3126             goto end;
3127         }
3128     }
3129 
3130     Status = STATUS_SUCCESS;
3131 
3132 end:
3133     ExReleaseResourceLite(&Vcb->tree_lock);
3134 
3135     return Status;
3136 }
3137 
3138 _Function_class_(KSTART_ROUTINE)
3139 #ifdef __REACTOS__
3140 static void NTAPI scrub_thread(void* context) {
3141 #else
3142 static void scrub_thread(void* context) {
3143 #endif
3144     device_extension* Vcb = context;
3145     LIST_ENTRY chunks, *le;
3146     NTSTATUS Status;
3147     LARGE_INTEGER time;
3148 
3149     KeInitializeEvent(&Vcb->scrub.finished, NotificationEvent, FALSE);
3150 
3151     InitializeListHead(&chunks);
3152 
3153     ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
3154 
3155     if (Vcb->need_write && !Vcb->readonly)
3156         Status = do_write(Vcb, NULL);
3157     else
3158         Status = STATUS_SUCCESS;
3159 
3160     free_trees(Vcb);
3161 
3162     if (!NT_SUCCESS(Status)) {
3163         ExReleaseResourceLite(&Vcb->tree_lock);
3164         ERR("do_write returned %08x\n", Status);
3165         Vcb->scrub.error = Status;
3166         goto end;
3167     }
3168 
3169     ExConvertExclusiveToSharedLite(&Vcb->tree_lock);
3170 
3171     ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE);
3172 
3173     KeQuerySystemTime(&Vcb->scrub.start_time);
3174     Vcb->scrub.finish_time.QuadPart = 0;
3175     Vcb->scrub.resume_time.QuadPart = Vcb->scrub.start_time.QuadPart;
3176     Vcb->scrub.duration.QuadPart = 0;
3177     Vcb->scrub.total_chunks = 0;
3178     Vcb->scrub.chunks_left = 0;
3179     Vcb->scrub.data_scrubbed = 0;
3180     Vcb->scrub.num_errors = 0;
3181 
3182     while (!IsListEmpty(&Vcb->scrub.errors)) {
3183         scrub_error* err = CONTAINING_RECORD(RemoveHeadList(&Vcb->scrub.errors), scrub_error, list_entry);
3184         ExFreePool(err);
3185     }
3186 
3187     ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
3188 
3189     le = Vcb->chunks.Flink;
3190     while (le != &Vcb->chunks) {
3191         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
3192 
3193         acquire_chunk_lock(c, Vcb);
3194 
3195         if (!c->readonly) {
3196             InsertTailList(&chunks, &c->list_entry_balance);
3197             Vcb->scrub.total_chunks++;
3198             Vcb->scrub.chunks_left++;
3199         }
3200 
3201         release_chunk_lock(c, Vcb);
3202 
3203         le = le->Flink;
3204     }
3205 
3206     ExReleaseResourceLite(&Vcb->chunk_lock);
3207 
3208     ExReleaseResource(&Vcb->scrub.stats_lock);
3209 
3210     ExReleaseResourceLite(&Vcb->tree_lock);
3211 
3212     while (!IsListEmpty(&chunks)) {
3213         chunk* c = CONTAINING_RECORD(RemoveHeadList(&chunks), chunk, list_entry_balance);
3214         UINT64 offset = c->offset;
3215         BOOL changed;
3216 
3217         c->reloc = TRUE;
3218 
3219         KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, FALSE, NULL);
3220 
3221         if (!Vcb->scrub.stopping) {
3222             do {
3223                 changed = FALSE;
3224 
3225                 Status = scrub_chunk(Vcb, c, &offset, &changed);
3226                 if (!NT_SUCCESS(Status)) {
3227                     ERR("scrub_chunk returned %08x\n", Status);
3228                     Vcb->scrub.stopping = TRUE;
3229                     Vcb->scrub.error = Status;
3230                     break;
3231                 }
3232 
3233                 if (offset == c->offset + c->chunk_item->size || Vcb->scrub.stopping)
3234                     break;
3235 
3236                 KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, FALSE, NULL);
3237             } while (changed);
3238         }
3239 
3240         ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE);
3241 
3242         if (!Vcb->scrub.stopping)
3243             Vcb->scrub.chunks_left--;
3244 
3245         if (IsListEmpty(&chunks))
3246             KeQuerySystemTime(&Vcb->scrub.finish_time);
3247 
3248         ExReleaseResource(&Vcb->scrub.stats_lock);
3249 
3250         c->reloc = FALSE;
3251         c->list_entry_balance.Flink = NULL;
3252     }
3253 
3254     KeQuerySystemTime(&time);
3255     Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3256 
3257 end:
3258     ZwClose(Vcb->scrub.thread);
3259     Vcb->scrub.thread = NULL;
3260 
3261     KeSetEvent(&Vcb->scrub.finished, 0, FALSE);
3262 }
3263 
3264 NTSTATUS start_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
3265     NTSTATUS Status;
3266 
3267     if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3268         return STATUS_PRIVILEGE_NOT_HELD;
3269 
3270     if (Vcb->locked) {
3271         WARN("cannot start scrub while locked\n");
3272         return STATUS_DEVICE_NOT_READY;
3273     }
3274 
3275     if (Vcb->balance.thread) {
3276         WARN("cannot start scrub while balance running\n");
3277         return STATUS_DEVICE_NOT_READY;
3278     }
3279 
3280     if (Vcb->scrub.thread) {
3281         WARN("scrub already running\n");
3282         return STATUS_DEVICE_NOT_READY;
3283     }
3284 
3285     if (Vcb->readonly)
3286         return STATUS_MEDIA_WRITE_PROTECTED;
3287 
3288     Vcb->scrub.stopping = FALSE;
3289     Vcb->scrub.paused = FALSE;
3290     Vcb->scrub.error = STATUS_SUCCESS;
3291     KeInitializeEvent(&Vcb->scrub.event, NotificationEvent, !Vcb->scrub.paused);
3292 
3293     Status = PsCreateSystemThread(&Vcb->scrub.thread, 0, NULL, NULL, NULL, scrub_thread, Vcb);
3294     if (!NT_SUCCESS(Status)) {
3295         ERR("PsCreateSystemThread returned %08x\n", Status);
3296         return Status;
3297     }
3298 
3299     return STATUS_SUCCESS;
3300 }
3301 
3302 NTSTATUS query_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode, void* data, ULONG length) {
3303     btrfs_query_scrub* bqs = (btrfs_query_scrub*)data;
3304     ULONG len;
3305     NTSTATUS Status;
3306     LIST_ENTRY* le;
3307     btrfs_scrub_error* bse = NULL;
3308 
3309     if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3310         return STATUS_PRIVILEGE_NOT_HELD;
3311 
3312     if (length < offsetof(btrfs_query_scrub, errors))
3313         return STATUS_BUFFER_TOO_SMALL;
3314 
3315     ExAcquireResourceSharedLite(&Vcb->scrub.stats_lock, TRUE);
3316 
3317     if (Vcb->scrub.thread && Vcb->scrub.chunks_left > 0)
3318         bqs->status = Vcb->scrub.paused ? BTRFS_SCRUB_PAUSED : BTRFS_SCRUB_RUNNING;
3319     else
3320         bqs->status = BTRFS_SCRUB_STOPPED;
3321 
3322     bqs->start_time.QuadPart = Vcb->scrub.start_time.QuadPart;
3323     bqs->finish_time.QuadPart = Vcb->scrub.finish_time.QuadPart;
3324     bqs->chunks_left = Vcb->scrub.chunks_left;
3325     bqs->total_chunks = Vcb->scrub.total_chunks;
3326     bqs->data_scrubbed = Vcb->scrub.data_scrubbed;
3327 
3328     bqs->duration = Vcb->scrub.duration.QuadPart;
3329 
3330     if (bqs->status == BTRFS_SCRUB_RUNNING) {
3331         LARGE_INTEGER time;
3332 
3333         KeQuerySystemTime(&time);
3334         bqs->duration += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3335     }
3336 
3337     bqs->error = Vcb->scrub.error;
3338 
3339     bqs->num_errors = Vcb->scrub.num_errors;
3340 
3341     len = length - offsetof(btrfs_query_scrub, errors);
3342 
3343     le = Vcb->scrub.errors.Flink;
3344     while (le != &Vcb->scrub.errors) {
3345         scrub_error* err = CONTAINING_RECORD(le, scrub_error, list_entry);
3346         ULONG errlen;
3347 
3348         if (err->is_metadata)
3349             errlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3350         else
3351             errlen = offsetof(btrfs_scrub_error, data.filename) + err->data.filename_length;
3352 
3353         if (len < errlen) {
3354             Status = STATUS_BUFFER_OVERFLOW;
3355             goto end;
3356         }
3357 
3358         if (!bse)
3359             bse = &bqs->errors;
3360         else {
3361             ULONG lastlen;
3362 
3363             if (bse->is_metadata)
3364                 lastlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3365             else
3366                 lastlen = offsetof(btrfs_scrub_error, data.filename) + bse->data.filename_length;
3367 
3368             bse->next_entry = lastlen;
3369             bse = (btrfs_scrub_error*)(((UINT8*)bse) + lastlen);
3370         }
3371 
3372         bse->next_entry = 0;
3373         bse->address = err->address;
3374         bse->device = err->device;
3375         bse->recovered = err->recovered;
3376         bse->is_metadata = err->is_metadata;
3377         bse->parity = err->parity;
3378 
3379         if (err->is_metadata) {
3380             bse->metadata.root = err->metadata.root;
3381             bse->metadata.level = err->metadata.level;
3382             bse->metadata.firstitem = err->metadata.firstitem;
3383         } else {
3384             bse->data.subvol = err->data.subvol;
3385             bse->data.offset = err->data.offset;
3386             bse->data.filename_length = err->data.filename_length;
3387             RtlCopyMemory(bse->data.filename, err->data.filename, err->data.filename_length);
3388         }
3389 
3390         len -= errlen;
3391         le = le->Flink;
3392     }
3393 
3394     Status = STATUS_SUCCESS;
3395 
3396 end:
3397     ExReleaseResourceLite(&Vcb->scrub.stats_lock);
3398 
3399     return Status;
3400 }
3401 
3402 NTSTATUS pause_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
3403     LARGE_INTEGER time;
3404 
3405     if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3406         return STATUS_PRIVILEGE_NOT_HELD;
3407 
3408     if (!Vcb->scrub.thread)
3409         return STATUS_DEVICE_NOT_READY;
3410 
3411     if (Vcb->scrub.paused)
3412         return STATUS_DEVICE_NOT_READY;
3413 
3414     Vcb->scrub.paused = TRUE;
3415     KeClearEvent(&Vcb->scrub.event);
3416 
3417     KeQuerySystemTime(&time);
3418     Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3419 
3420     return STATUS_SUCCESS;
3421 }
3422 
3423 NTSTATUS resume_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
3424     if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3425         return STATUS_PRIVILEGE_NOT_HELD;
3426 
3427     if (!Vcb->scrub.thread)
3428         return STATUS_DEVICE_NOT_READY;
3429 
3430     if (!Vcb->scrub.paused)
3431         return STATUS_DEVICE_NOT_READY;
3432 
3433     Vcb->scrub.paused = FALSE;
3434     KeSetEvent(&Vcb->scrub.event, 0, FALSE);
3435 
3436     KeQuerySystemTime(&Vcb->scrub.resume_time);
3437 
3438     return STATUS_SUCCESS;
3439 }
3440 
3441 NTSTATUS stop_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
3442     if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3443         return STATUS_PRIVILEGE_NOT_HELD;
3444 
3445     if (!Vcb->scrub.thread)
3446         return STATUS_DEVICE_NOT_READY;
3447 
3448     Vcb->scrub.paused = FALSE;
3449     Vcb->scrub.stopping = TRUE;
3450     KeSetEvent(&Vcb->scrub.event, 0, FALSE);
3451 
3452     return STATUS_SUCCESS;
3453 }
3454