xref: /reactos/drivers/filesystems/btrfs/write.c (revision cc7cf826)
1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
20 typedef struct {
21     uint64_t start;
22     uint64_t end;
23     uint8_t* data;
24     PMDL mdl;
25     uint64_t irp_offset;
26 } write_stripe;
27 
28 _Function_class_(IO_COMPLETION_ROUTINE)
29 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr);
30 
31 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback);
32 
33 extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
34 extern tCcCopyWriteEx fCcCopyWriteEx;
35 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
36 extern bool diskacc;
37 
38 bool find_data_address_in_chunk(device_extension* Vcb, chunk* c, uint64_t length, uint64_t* address) {
39     LIST_ENTRY* le;
40     space* s;
41 
42     TRACE("(%p, %I64x, %I64x, %p)\n", Vcb, c->offset, length, address);
43 
44     if (length > c->chunk_item->size - c->used)
45         return false;
46 
47     if (!c->cache_loaded) {
48         NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
49 
50         if (!NT_SUCCESS(Status)) {
51             ERR("load_cache_chunk returned %08lx\n", Status);
52             return false;
53         }
54     }
55 
56     if (IsListEmpty(&c->space_size))
57         return false;
58 
59     le = c->space_size.Flink;
60     while (le != &c->space_size) {
61         s = CONTAINING_RECORD(le, space, list_entry_size);
62 
63         if (s->size == length) {
64             *address = s->address;
65             return true;
66         } else if (s->size < length) {
67             if (le == c->space_size.Flink)
68                 return false;
69 
70             s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
71 
72             *address = s->address;
73             return true;
74         }
75 
76         le = le->Flink;
77     }
78 
79     s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
80 
81     if (s->size > length) {
82         *address = s->address;
83         return true;
84     }
85 
86     return false;
87 }
88 
89 chunk* get_chunk_from_address(device_extension* Vcb, uint64_t address) {
90     LIST_ENTRY* le2;
91 
92     ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
93 
94     le2 = Vcb->chunks.Flink;
95     while (le2 != &Vcb->chunks) {
96         chunk* c = CONTAINING_RECORD(le2, chunk, list_entry);
97 
98         if (address >= c->offset && address < c->offset + c->chunk_item->size) {
99             ExReleaseResourceLite(&Vcb->chunk_lock);
100             return c;
101         }
102 
103         le2 = le2->Flink;
104     }
105 
106     ExReleaseResourceLite(&Vcb->chunk_lock);
107 
108     return NULL;
109 }
110 
111 typedef struct {
112     space* dh;
113     device* device;
114 } stripe;
115 
116 static uint64_t find_new_chunk_address(device_extension* Vcb, uint64_t size) {
117     uint64_t lastaddr;
118     LIST_ENTRY* le;
119 
120     lastaddr = 0xc00000;
121 
122     le = Vcb->chunks.Flink;
123     while (le != &Vcb->chunks) {
124         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
125 
126         if (c->offset >= lastaddr + size)
127             return lastaddr;
128 
129         lastaddr = c->offset + c->chunk_item->size;
130 
131         le = le->Flink;
132     }
133 
134     return lastaddr;
135 }
136 
137 static bool find_new_dup_stripes(device_extension* Vcb, stripe* stripes, uint64_t max_stripe_size, bool full_size) {
138     uint64_t devusage = 0xffffffffffffffff;
139     space *devdh1 = NULL, *devdh2 = NULL;
140     LIST_ENTRY* le;
141     device* dev2 = NULL;
142 
143     le = Vcb->devices.Flink;
144 
145     while (le != &Vcb->devices) {
146         device* dev = CONTAINING_RECORD(le, device, list_entry);
147 
148         if (!dev->readonly && !dev->reloc && dev->devobj) {
149             uint64_t usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
150 
151             // favour devices which have been used the least
152             if (usage < devusage) {
153                 if (!IsListEmpty(&dev->space)) {
154                     LIST_ENTRY* le2;
155                     space *dh1 = NULL, *dh2 = NULL;
156 
157                     le2 = dev->space.Flink;
158                     while (le2 != &dev->space) {
159                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
160 
161                         if (dh->size >= max_stripe_size && (!dh1 || !dh2 || dh->size < dh1->size)) {
162                             dh2 = dh1;
163                             dh1 = dh;
164                         }
165 
166                         le2 = le2->Flink;
167                     }
168 
169                     if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
170                         dev2 = dev;
171                         devusage = usage;
172                         devdh1 = dh1;
173                         devdh2 = dh2 ? dh2 : dh1;
174                     }
175                 }
176             }
177         }
178 
179         le = le->Flink;
180     }
181 
182     if (!devdh1) {
183         uint64_t size = 0;
184 
185         // Can't find hole of at least max_stripe_size; look for the largest one we can find
186 
187         if (full_size)
188             return false;
189 
190         le = Vcb->devices.Flink;
191         while (le != &Vcb->devices) {
192             device* dev = CONTAINING_RECORD(le, device, list_entry);
193 
194             if (!dev->readonly && !dev->reloc) {
195                 if (!IsListEmpty(&dev->space)) {
196                     LIST_ENTRY* le2;
197                     space *dh1 = NULL, *dh2 = NULL;
198 
199                     le2 = dev->space.Flink;
200                     while (le2 != &dev->space) {
201                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
202 
203                         if (!dh1 || !dh2 || dh->size < dh1->size) {
204                             dh2 = dh1;
205                             dh1 = dh;
206                         }
207 
208                         le2 = le2->Flink;
209                     }
210 
211                     if (dh1) {
212                         uint64_t devsize;
213 
214                         if (dh2)
215                             devsize = max(dh1->size / 2, min(dh1->size, dh2->size));
216                         else
217                             devsize = dh1->size / 2;
218 
219                         if (devsize > size) {
220                             dev2 = dev;
221                             devdh1 = dh1;
222 
223                             if (dh2 && min(dh1->size, dh2->size) > dh1->size / 2)
224                                 devdh2 = dh2;
225                             else
226                                 devdh2 = dh1;
227 
228                             size = devsize;
229                         }
230                     }
231                 }
232             }
233 
234             le = le->Flink;
235         }
236 
237         if (!devdh1)
238             return false;
239     }
240 
241     stripes[0].device = stripes[1].device = dev2;
242     stripes[0].dh = devdh1;
243     stripes[1].dh = devdh2;
244 
245     return true;
246 }
247 
248 static bool find_new_stripe(device_extension* Vcb, stripe* stripes, uint16_t i, uint64_t max_stripe_size, bool allow_missing, bool full_size) {
249     uint64_t k, devusage = 0xffffffffffffffff;
250     space* devdh = NULL;
251     LIST_ENTRY* le;
252     device* dev2 = NULL;
253 
254     le = Vcb->devices.Flink;
255     while (le != &Vcb->devices) {
256         device* dev = CONTAINING_RECORD(le, device, list_entry);
257         uint64_t usage;
258         bool skip = false;
259 
260         if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
261             le = le->Flink;
262             continue;
263         }
264 
265         // skip this device if it already has a stripe
266         if (i > 0) {
267             for (k = 0; k < i; k++) {
268                 if (stripes[k].device == dev) {
269                     skip = true;
270                     break;
271                 }
272             }
273         }
274 
275         if (!skip) {
276             usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
277 
278             // favour devices which have been used the least
279             if (usage < devusage) {
280                 if (!IsListEmpty(&dev->space)) {
281                     LIST_ENTRY* le2;
282 
283                     le2 = dev->space.Flink;
284                     while (le2 != &dev->space) {
285                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
286 
287                         if ((dev2 != dev && dh->size >= max_stripe_size) ||
288                             (dev2 == dev && dh->size >= max_stripe_size && dh->size < devdh->size)
289                         ) {
290                             devdh = dh;
291                             dev2 = dev;
292                             devusage = usage;
293                         }
294 
295                         le2 = le2->Flink;
296                     }
297                 }
298             }
299         }
300 
301         le = le->Flink;
302     }
303 
304     if (!devdh) {
305         // Can't find hole of at least max_stripe_size; look for the largest one we can find
306 
307         if (full_size)
308             return false;
309 
310         le = Vcb->devices.Flink;
311         while (le != &Vcb->devices) {
312             device* dev = CONTAINING_RECORD(le, device, list_entry);
313             bool skip = false;
314 
315             if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
316                 le = le->Flink;
317                 continue;
318             }
319 
320             // skip this device if it already has a stripe
321             if (i > 0) {
322                 for (k = 0; k < i; k++) {
323                     if (stripes[k].device == dev) {
324                         skip = true;
325                         break;
326                     }
327                 }
328             }
329 
330             if (!skip) {
331                 if (!IsListEmpty(&dev->space)) {
332                     LIST_ENTRY* le2;
333 
334                     le2 = dev->space.Flink;
335                     while (le2 != &dev->space) {
336                         space* dh = CONTAINING_RECORD(le2, space, list_entry);
337 
338                         if (!devdh || devdh->size < dh->size) {
339                             devdh = dh;
340                             dev2 = dev;
341                         }
342 
343                         le2 = le2->Flink;
344                     }
345                 }
346             }
347 
348             le = le->Flink;
349         }
350 
351         if (!devdh)
352             return false;
353     }
354 
355     stripes[i].dh = devdh;
356     stripes[i].device = dev2;
357 
358     return true;
359 }
360 
361 NTSTATUS alloc_chunk(device_extension* Vcb, uint64_t flags, chunk** pc, bool full_size) {
362     NTSTATUS Status;
363     uint64_t max_stripe_size, max_chunk_size, stripe_size, stripe_length, factor;
364     uint64_t total_size = 0, logaddr;
365     uint16_t i, type, num_stripes, sub_stripes, max_stripes, min_stripes, allowed_missing;
366     stripe* stripes = NULL;
367     uint16_t cisize;
368     CHUNK_ITEM_STRIPE* cis;
369     chunk* c = NULL;
370     space* s = NULL;
371     LIST_ENTRY* le;
372 
373     le = Vcb->devices.Flink;
374     while (le != &Vcb->devices) {
375         device* dev = CONTAINING_RECORD(le, device, list_entry);
376         total_size += dev->devitem.num_bytes;
377 
378         le = le->Flink;
379     }
380 
381     TRACE("total_size = %I64x\n", total_size);
382 
383     // We purposely check for DATA first - mixed blocks have the same size
384     // as DATA ones.
385     if (flags & BLOCK_FLAG_DATA) {
386         max_stripe_size = 0x40000000; // 1 GB
387         max_chunk_size = 10 * max_stripe_size;
388     } else if (flags & BLOCK_FLAG_METADATA) {
389         if (total_size > 0xC80000000) // 50 GB
390             max_stripe_size = 0x40000000; // 1 GB
391         else
392             max_stripe_size = 0x10000000; // 256 MB
393 
394         max_chunk_size = max_stripe_size;
395     } else if (flags & BLOCK_FLAG_SYSTEM) {
396         max_stripe_size = 0x2000000; // 32 MB
397         max_chunk_size = 2 * max_stripe_size;
398     } else {
399         ERR("unknown chunk type\n");
400         return STATUS_INTERNAL_ERROR;
401     }
402 
403     if (flags & BLOCK_FLAG_DUPLICATE) {
404         min_stripes = 2;
405         max_stripes = 2;
406         sub_stripes = 0;
407         type = BLOCK_FLAG_DUPLICATE;
408         allowed_missing = 0;
409     } else if (flags & BLOCK_FLAG_RAID0) {
410         min_stripes = 2;
411         max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
412         sub_stripes = 0;
413         type = BLOCK_FLAG_RAID0;
414         allowed_missing = 0;
415     } else if (flags & BLOCK_FLAG_RAID1) {
416         min_stripes = 2;
417         max_stripes = 2;
418         sub_stripes = 1;
419         type = BLOCK_FLAG_RAID1;
420         allowed_missing = 1;
421     } else if (flags & BLOCK_FLAG_RAID10) {
422         min_stripes = 4;
423         max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
424         sub_stripes = 2;
425         type = BLOCK_FLAG_RAID10;
426         allowed_missing = 1;
427     } else if (flags & BLOCK_FLAG_RAID5) {
428         min_stripes = 3;
429         max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
430         sub_stripes = 1;
431         type = BLOCK_FLAG_RAID5;
432         allowed_missing = 1;
433     } else if (flags & BLOCK_FLAG_RAID6) {
434         min_stripes = 4;
435         max_stripes = 257;
436         sub_stripes = 1;
437         type = BLOCK_FLAG_RAID6;
438         allowed_missing = 2;
439     } else if (flags & BLOCK_FLAG_RAID1C3) {
440         min_stripes = 3;
441         max_stripes = 3;
442         sub_stripes = 1;
443         type = BLOCK_FLAG_RAID1C3;
444         allowed_missing = 2;
445     } else if (flags & BLOCK_FLAG_RAID1C4) {
446         min_stripes = 4;
447         max_stripes = 4;
448         sub_stripes = 1;
449         type = BLOCK_FLAG_RAID1C4;
450         allowed_missing = 3;
451     } else { // SINGLE
452         min_stripes = 1;
453         max_stripes = 1;
454         sub_stripes = 1;
455         type = 0;
456         allowed_missing = 0;
457     }
458 
459     if (max_chunk_size > total_size / 10) {  // cap at 10%
460         max_chunk_size = total_size / 10;
461         max_stripe_size = max_chunk_size / min_stripes;
462     }
463 
464     if (max_stripe_size > total_size / (10 * min_stripes))
465         max_stripe_size = total_size / (10 * min_stripes);
466 
467     TRACE("would allocate a new chunk of %I64x bytes and stripe %I64x\n", max_chunk_size, max_stripe_size);
468 
469     stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * max_stripes, ALLOC_TAG);
470     if (!stripes) {
471         ERR("out of memory\n");
472         Status = STATUS_INSUFFICIENT_RESOURCES;
473         goto end;
474     }
475 
476     num_stripes = 0;
477 
478     if (type == BLOCK_FLAG_DUPLICATE) {
479         if (!find_new_dup_stripes(Vcb, stripes, max_stripe_size, full_size)) {
480             Status = STATUS_DISK_FULL;
481             goto end;
482         } else
483             num_stripes = max_stripes;
484     } else {
485         for (i = 0; i < max_stripes; i++) {
486             if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, false, full_size))
487                 break;
488             else
489                 num_stripes++;
490         }
491     }
492 
493     if (num_stripes < min_stripes && Vcb->options.allow_degraded && allowed_missing > 0) {
494         uint16_t added_missing = 0;
495 
496         for (i = num_stripes; i < max_stripes; i++) {
497             if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, true, full_size))
498                 break;
499             else {
500                 added_missing++;
501                 if (added_missing >= allowed_missing)
502                     break;
503             }
504         }
505 
506         num_stripes += added_missing;
507     }
508 
509     // for RAID10, round down to an even number of stripes
510     if (type == BLOCK_FLAG_RAID10 && (num_stripes % sub_stripes) != 0) {
511         num_stripes -= num_stripes % sub_stripes;
512     }
513 
514     if (num_stripes < min_stripes) {
515         WARN("found %u stripes, needed at least %u\n", num_stripes, min_stripes);
516         Status = STATUS_DISK_FULL;
517         goto end;
518     }
519 
520     c = ExAllocatePoolWithTag(NonPagedPool, sizeof(chunk), ALLOC_TAG);
521     if (!c) {
522         ERR("out of memory\n");
523         Status = STATUS_INSUFFICIENT_RESOURCES;
524         goto end;
525     }
526 
527     c->devices = NULL;
528 
529     cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
530     c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, cisize, ALLOC_TAG);
531     if (!c->chunk_item) {
532         ERR("out of memory\n");
533         Status = STATUS_INSUFFICIENT_RESOURCES;
534         goto end;
535     }
536 
537     stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
538 
539     if (type == BLOCK_FLAG_DUPLICATE && stripes[1].dh == stripes[0].dh)
540         stripe_size = min(stripes[0].dh->size / 2, max_stripe_size);
541     else {
542         stripe_size = max_stripe_size;
543         for (i = 0; i < num_stripes; i++) {
544             if (stripes[i].dh->size < stripe_size)
545                 stripe_size = stripes[i].dh->size;
546         }
547     }
548 
549     if (type == 0 || type == BLOCK_FLAG_DUPLICATE || type == BLOCK_FLAG_RAID1 || type == BLOCK_FLAG_RAID1C3 || type == BLOCK_FLAG_RAID1C4)
550         factor = 1;
551     else if (type == BLOCK_FLAG_RAID0)
552         factor = num_stripes;
553     else if (type == BLOCK_FLAG_RAID10)
554         factor = num_stripes / sub_stripes;
555     else if (type == BLOCK_FLAG_RAID5)
556         factor = num_stripes - 1;
557     else if (type == BLOCK_FLAG_RAID6)
558         factor = num_stripes - 2;
559 
560     if (stripe_size * factor > max_chunk_size)
561         stripe_size = max_chunk_size / factor;
562 
563     if (stripe_size % stripe_length > 0)
564         stripe_size -= stripe_size % stripe_length;
565 
566     if (stripe_size == 0) {
567         ERR("not enough free space found (stripe_size == 0)\n");
568         Status = STATUS_DISK_FULL;
569         goto end;
570     }
571 
572     c->chunk_item->size = stripe_size * factor;
573     c->chunk_item->root_id = Vcb->extent_root->id;
574     c->chunk_item->stripe_length = stripe_length;
575     c->chunk_item->type = flags;
576     c->chunk_item->opt_io_alignment = (uint32_t)c->chunk_item->stripe_length;
577     c->chunk_item->opt_io_width = (uint32_t)c->chunk_item->stripe_length;
578     c->chunk_item->sector_size = stripes[0].device->devitem.minimal_io_size;
579     c->chunk_item->num_stripes = num_stripes;
580     c->chunk_item->sub_stripes = sub_stripes;
581 
582     c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
583     if (!c->devices) {
584         ERR("out of memory\n");
585         Status = STATUS_INSUFFICIENT_RESOURCES;
586         goto end;
587     }
588 
589     cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
590     for (i = 0; i < num_stripes; i++) {
591         cis[i].dev_id = stripes[i].device->devitem.dev_id;
592 
593         if (type == BLOCK_FLAG_DUPLICATE && i == 1 && stripes[i].dh == stripes[0].dh)
594             cis[i].offset = stripes[0].dh->address + stripe_size;
595         else
596             cis[i].offset = stripes[i].dh->address;
597 
598         cis[i].dev_uuid = stripes[i].device->devitem.device_uuid;
599 
600         c->devices[i] = stripes[i].device;
601     }
602 
603     logaddr = find_new_chunk_address(Vcb, c->chunk_item->size);
604 
605     Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
606 
607     c->size = cisize;
608     c->offset = logaddr;
609     c->used = c->oldused = 0;
610     c->cache = c->old_cache = NULL;
611     c->readonly = false;
612     c->reloc = false;
613     c->last_alloc_set = false;
614     c->last_stripe = 0;
615     c->cache_loaded = true;
616     c->changed = false;
617     c->space_changed = false;
618     c->balance_num = 0;
619 
620     InitializeListHead(&c->space);
621     InitializeListHead(&c->space_size);
622     InitializeListHead(&c->deleting);
623     InitializeListHead(&c->changed_extents);
624 
625     InitializeListHead(&c->range_locks);
626     ExInitializeResourceLite(&c->range_locks_lock);
627     KeInitializeEvent(&c->range_locks_event, NotificationEvent, false);
628 
629     InitializeListHead(&c->partial_stripes);
630     ExInitializeResourceLite(&c->partial_stripes_lock);
631 
632     ExInitializeResourceLite(&c->lock);
633     ExInitializeResourceLite(&c->changed_extents_lock);
634 
635     s = ExAllocatePoolWithTag(NonPagedPool, sizeof(space), ALLOC_TAG);
636     if (!s) {
637         ERR("out of memory\n");
638         Status = STATUS_INSUFFICIENT_RESOURCES;
639         goto end;
640     }
641 
642     s->address = c->offset;
643     s->size = c->chunk_item->size;
644     InsertTailList(&c->space, &s->list_entry);
645     InsertTailList(&c->space_size, &s->list_entry_size);
646 
647     protect_superblocks(c);
648 
649     for (i = 0; i < num_stripes; i++) {
650         stripes[i].device->devitem.bytes_used += stripe_size;
651 
652         space_list_subtract2(&stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL, NULL);
653     }
654 
655     Status = STATUS_SUCCESS;
656 
657     if (flags & BLOCK_FLAG_RAID5 || flags & BLOCK_FLAG_RAID6)
658         Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_RAID56;
659 
660 end:
661     if (stripes)
662         ExFreePool(stripes);
663 
664     if (!NT_SUCCESS(Status)) {
665         if (c) {
666             if (c->devices)
667                 ExFreePool(c->devices);
668 
669             if (c->chunk_item)
670                 ExFreePool(c->chunk_item);
671 
672             ExFreePool(c);
673         }
674 
675         if (s) ExFreePool(s);
676     } else {
677         bool done = false;
678 
679         le = Vcb->chunks.Flink;
680         while (le != &Vcb->chunks) {
681             chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
682 
683             if (c2->offset > c->offset) {
684                 InsertHeadList(le->Blink, &c->list_entry);
685                 done = true;
686                 break;
687             }
688 
689             le = le->Flink;
690         }
691 
692         if (!done)
693             InsertTailList(&Vcb->chunks, &c->list_entry);
694 
695         c->created = true;
696         c->changed = true;
697         c->space_changed = true;
698         c->list_entry_balance.Flink = NULL;
699 
700         *pc = c;
701     }
702 
703     return Status;
704 }
705 
706 static NTSTATUS prepare_raid0_write(_Pre_satisfies_(_Curr_->chunk_item->num_stripes>0) _In_ chunk* c, _In_ uint64_t address, _In_reads_bytes_(length) void* data,
707                                     _In_ uint32_t length, _In_ write_stripe* stripes, _In_ PIRP Irp, _In_ uint64_t irp_offset, _In_ write_data_context* wtc) {
708     uint64_t startoff, endoff;
709     uint16_t startoffstripe, endoffstripe, stripenum;
710     uint64_t pos, *stripeoff;
711     uint32_t i;
712     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
713     PMDL master_mdl;
714     PFN_NUMBER* pfns;
715 
716     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
717     if (!stripeoff) {
718         ERR("out of memory\n");
719         return STATUS_INSUFFICIENT_RESOURCES;
720     }
721 
722     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
723     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
724 
725     if (file_write) {
726         master_mdl = Irp->MdlAddress;
727 
728         pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
729         pfns = &pfns[irp_offset >> PAGE_SHIFT];
730     } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
731         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
732         if (!wtc->scratch) {
733             ERR("out of memory\n");
734             return STATUS_INSUFFICIENT_RESOURCES;
735         }
736 
737         RtlCopyMemory(wtc->scratch, data, length);
738 
739         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
740         if (!master_mdl) {
741             ERR("out of memory\n");
742             return STATUS_INSUFFICIENT_RESOURCES;
743         }
744 
745         MmBuildMdlForNonPagedPool(master_mdl);
746 
747         wtc->mdl = master_mdl;
748 
749         pfns = (PFN_NUMBER*)(master_mdl + 1);
750     } else {
751         NTSTATUS Status = STATUS_SUCCESS;
752 
753         master_mdl = IoAllocateMdl(data, length, false, false, NULL);
754         if (!master_mdl) {
755             ERR("out of memory\n");
756             return STATUS_INSUFFICIENT_RESOURCES;
757         }
758 
759         _SEH2_TRY {
760             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
761         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
762             Status = _SEH2_GetExceptionCode();
763         } _SEH2_END;
764 
765         if (!NT_SUCCESS(Status)) {
766             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
767             IoFreeMdl(master_mdl);
768             return Status;
769         }
770 
771         wtc->mdl = master_mdl;
772 
773         pfns = (PFN_NUMBER*)(master_mdl + 1);
774     }
775 
776     for (i = 0; i < c->chunk_item->num_stripes; i++) {
777         if (startoffstripe > i)
778             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
779         else if (startoffstripe == i)
780             stripes[i].start = startoff;
781         else
782             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
783 
784         if (endoffstripe > i)
785             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
786         else if (endoffstripe == i)
787             stripes[i].end = endoff + 1;
788         else
789             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
790 
791         if (stripes[i].start != stripes[i].end) {
792             stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
793             if (!stripes[i].mdl) {
794                 ERR("IoAllocateMdl failed\n");
795                 ExFreePool(stripeoff);
796                 return STATUS_INSUFFICIENT_RESOURCES;
797             }
798         }
799     }
800 
801     pos = 0;
802     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
803 
804     stripenum = startoffstripe;
805 
806     while (pos < length) {
807         PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum].mdl + 1);
808 
809         if (pos == 0) {
810             uint32_t writelen = (uint32_t)min(stripes[stripenum].end - stripes[stripenum].start,
811                                           c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length));
812 
813             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
814 
815             stripeoff[stripenum] += writelen;
816             pos += writelen;
817         } else if (length - pos < c->chunk_item->stripe_length) {
818             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
819             break;
820         } else {
821             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
822 
823             stripeoff[stripenum] += c->chunk_item->stripe_length;
824             pos += c->chunk_item->stripe_length;
825         }
826 
827         stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
828     }
829 
830     ExFreePool(stripeoff);
831 
832     return STATUS_SUCCESS;
833 }
834 
835 static NTSTATUS prepare_raid10_write(_Pre_satisfies_(_Curr_->chunk_item->sub_stripes>0&&_Curr_->chunk_item->num_stripes>=_Curr_->chunk_item->sub_stripes) _In_ chunk* c,
836                                      _In_ uint64_t address, _In_reads_bytes_(length) void* data, _In_ uint32_t length, _In_ write_stripe* stripes,
837                                      _In_ PIRP Irp, _In_ uint64_t irp_offset, _In_ write_data_context* wtc) {
838     uint64_t startoff, endoff;
839     uint16_t startoffstripe, endoffstripe, stripenum;
840     uint64_t pos, *stripeoff;
841     uint32_t i;
842     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
843     PMDL master_mdl;
844     PFN_NUMBER* pfns;
845 
846     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
847     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
848 
849     stripenum = startoffstripe;
850     startoffstripe *= c->chunk_item->sub_stripes;
851     endoffstripe *= c->chunk_item->sub_stripes;
852 
853     if (file_write) {
854         master_mdl = Irp->MdlAddress;
855 
856         pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
857         pfns = &pfns[irp_offset >> PAGE_SHIFT];
858     } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
859         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
860         if (!wtc->scratch) {
861             ERR("out of memory\n");
862             return STATUS_INSUFFICIENT_RESOURCES;
863         }
864 
865         RtlCopyMemory(wtc->scratch, data, length);
866 
867         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
868         if (!master_mdl) {
869             ERR("out of memory\n");
870             return STATUS_INSUFFICIENT_RESOURCES;
871         }
872 
873         MmBuildMdlForNonPagedPool(master_mdl);
874 
875         wtc->mdl = master_mdl;
876 
877         pfns = (PFN_NUMBER*)(master_mdl + 1);
878     } else {
879         NTSTATUS Status = STATUS_SUCCESS;
880 
881         master_mdl = IoAllocateMdl(data, length, false, false, NULL);
882         if (!master_mdl) {
883             ERR("out of memory\n");
884             return STATUS_INSUFFICIENT_RESOURCES;
885         }
886 
887         _SEH2_TRY {
888             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
889         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
890             Status = _SEH2_GetExceptionCode();
891         } _SEH2_END;
892 
893         if (!NT_SUCCESS(Status)) {
894             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
895             IoFreeMdl(master_mdl);
896             return Status;
897         }
898 
899         wtc->mdl = master_mdl;
900 
901         pfns = (PFN_NUMBER*)(master_mdl + 1);
902     }
903 
904     for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
905         uint16_t j;
906 
907         if (startoffstripe > i)
908             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
909         else if (startoffstripe == i)
910             stripes[i].start = startoff;
911         else
912             stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
913 
914         if (endoffstripe > i)
915             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
916         else if (endoffstripe == i)
917             stripes[i].end = endoff + 1;
918         else
919             stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
920 
921         stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
922         if (!stripes[i].mdl) {
923             ERR("IoAllocateMdl failed\n");
924             return STATUS_INSUFFICIENT_RESOURCES;
925         }
926 
927         for (j = 1; j < c->chunk_item->sub_stripes; j++) {
928             stripes[i+j].start = stripes[i].start;
929             stripes[i+j].end = stripes[i].end;
930             stripes[i+j].data = stripes[i].data;
931             stripes[i+j].mdl = stripes[i].mdl;
932         }
933     }
934 
935     pos = 0;
936 
937     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
938     if (!stripeoff) {
939         ERR("out of memory\n");
940         return STATUS_INSUFFICIENT_RESOURCES;
941     }
942 
943     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
944 
945     while (pos < length) {
946         PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum * c->chunk_item->sub_stripes].mdl + 1);
947 
948         if (pos == 0) {
949             uint32_t writelen = (uint32_t)min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start,
950                                           c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length));
951 
952             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
953 
954             stripeoff[stripenum] += writelen;
955             pos += writelen;
956         } else if (length - pos < c->chunk_item->stripe_length) {
957             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
958             break;
959         } else {
960             RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
961 
962             stripeoff[stripenum] += c->chunk_item->stripe_length;
963             pos += c->chunk_item->stripe_length;
964         }
965 
966         stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
967     }
968 
969     ExFreePool(stripeoff);
970 
971     return STATUS_SUCCESS;
972 }
973 
974 static NTSTATUS add_partial_stripe(device_extension* Vcb, chunk *c, uint64_t address, uint32_t length, void* data) {
975     NTSTATUS Status;
976     LIST_ENTRY* le;
977     partial_stripe* ps;
978     uint64_t stripe_addr;
979     uint16_t num_data_stripes;
980 
981     num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
982     stripe_addr = address - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
983 
984     ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
985 
986     le = c->partial_stripes.Flink;
987     while (le != &c->partial_stripes) {
988         ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
989 
990         if (ps->address == stripe_addr) {
991             // update existing entry
992 
993             RtlCopyMemory(ps->data + address - stripe_addr, data, length);
994             RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) / Vcb->superblock.sector_size), length / Vcb->superblock.sector_size);
995 
996             // if now filled, flush
997             if (RtlAreBitsClear(&ps->bmp, 0, (ULONG)((num_data_stripes * c->chunk_item->stripe_length) / Vcb->superblock.sector_size))) {
998                 Status = flush_partial_stripe(Vcb, c, ps);
999                 if (!NT_SUCCESS(Status)) {
1000                     ERR("flush_partial_stripe returned %08lx\n", Status);
1001                     goto end;
1002                 }
1003 
1004                 RemoveEntryList(&ps->list_entry);
1005 
1006                 if (ps->bmparr)
1007                     ExFreePool(ps->bmparr);
1008 
1009                 ExFreePool(ps);
1010             }
1011 
1012             Status = STATUS_SUCCESS;
1013             goto end;
1014         } else if (ps->address > stripe_addr)
1015             break;
1016 
1017         le = le->Flink;
1018     }
1019 
1020     // add new entry
1021 
1022     ps = ExAllocatePoolWithTag(NonPagedPool, offsetof(partial_stripe, data[0]) + (ULONG)(num_data_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
1023     if (!ps) {
1024         ERR("out of memory\n");
1025         Status = STATUS_INSUFFICIENT_RESOURCES;
1026         goto end;
1027     }
1028 
1029     ps->bmplen = (ULONG)(num_data_stripes * c->chunk_item->stripe_length) / Vcb->superblock.sector_size;
1030 
1031     ps->address = stripe_addr;
1032     ps->bmparr = ExAllocatePoolWithTag(NonPagedPool, (size_t)sector_align(((ps->bmplen / 8) + 1), sizeof(ULONG)), ALLOC_TAG);
1033     if (!ps->bmparr) {
1034         ERR("out of memory\n");
1035         ExFreePool(ps);
1036         Status = STATUS_INSUFFICIENT_RESOURCES;
1037         goto end;
1038     }
1039 
1040     RtlInitializeBitMap(&ps->bmp, ps->bmparr, ps->bmplen);
1041     RtlSetAllBits(&ps->bmp);
1042 
1043     RtlCopyMemory(ps->data + address - stripe_addr, data, length);
1044     RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) / Vcb->superblock.sector_size), length / Vcb->superblock.sector_size);
1045 
1046     InsertHeadList(le->Blink, &ps->list_entry);
1047 
1048     Status = STATUS_SUCCESS;
1049 
1050 end:
1051     ExReleaseResourceLite(&c->partial_stripes_lock);
1052 
1053     return Status;
1054 }
1055 
1056 typedef struct {
1057     PMDL mdl;
1058     PFN_NUMBER* pfns;
1059 } log_stripe;
1060 
1061 static NTSTATUS prepare_raid5_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1062                                     uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1063     uint64_t startoff, endoff, parity_start, parity_end;
1064     uint16_t startoffstripe, endoffstripe, parity, num_data_stripes = c->chunk_item->num_stripes - 1;
1065     uint64_t pos, parity_pos, *stripeoff = NULL;
1066     uint32_t i;
1067     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1068     PMDL master_mdl;
1069     NTSTATUS Status;
1070     PFN_NUMBER *pfns, *parity_pfns;
1071     log_stripe* log_stripes = NULL;
1072 
1073     if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1074         uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1075 
1076         delta = min(length, delta);
1077         Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + length - delta);
1078         if (!NT_SUCCESS(Status)) {
1079             ERR("add_partial_stripe returned %08lx\n", Status);
1080             goto exit;
1081         }
1082 
1083         length -= (uint32_t)delta;
1084     }
1085 
1086     if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1087         uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1088 
1089         Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, data);
1090         if (!NT_SUCCESS(Status)) {
1091             ERR("add_partial_stripe returned %08lx\n", Status);
1092             goto exit;
1093         }
1094 
1095         address += delta;
1096         length -= (uint32_t)delta;
1097         irp_offset += delta;
1098         data = (uint8_t*)data + delta;
1099     }
1100 
1101     if (length == 0) {
1102         Status = STATUS_SUCCESS;
1103         goto exit;
1104     }
1105 
1106     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1107     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1108 
1109     pos = 0;
1110     while (pos < length) {
1111         parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1112 
1113         if (pos == 0) {
1114             uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1115             ULONG skip, writelen;
1116 
1117             i = startoffstripe;
1118             while (stripe != parity) {
1119                 if (i == startoffstripe) {
1120                     writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1121 
1122                     stripes[stripe].start = startoff;
1123                     stripes[stripe].end = startoff + writelen;
1124 
1125                     pos += writelen;
1126 
1127                     if (pos == length)
1128                         break;
1129                 } else {
1130                     writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1131 
1132                     stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1133                     stripes[stripe].end = stripes[stripe].start + writelen;
1134 
1135                     pos += writelen;
1136 
1137                     if (pos == length)
1138                         break;
1139                 }
1140 
1141                 i++;
1142                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1143             }
1144 
1145             if (pos == length)
1146                 break;
1147 
1148             for (i = 0; i < startoffstripe; i++) {
1149                 stripe = (parity + i + 1) % c->chunk_item->num_stripes;
1150 
1151                 stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1152             }
1153 
1154             stripes[parity].start = stripes[parity].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1155 
1156             if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1157                 skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1158 
1159                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1160                     stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1161                 }
1162 
1163                 pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1164             }
1165         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1166             for (i = 0; i < c->chunk_item->num_stripes; i++) {
1167                 stripes[i].end += c->chunk_item->stripe_length;
1168             }
1169 
1170             pos += c->chunk_item->stripe_length * num_data_stripes;
1171         } else {
1172             uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1173 
1174             i = 0;
1175             while (stripe != parity) {
1176                 if (endoffstripe == i) {
1177                     stripes[stripe].end = endoff + 1;
1178                     break;
1179                 } else if (endoffstripe > i)
1180                     stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1181 
1182                 i++;
1183                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1184             }
1185 
1186             break;
1187         }
1188     }
1189 
1190     parity_start = 0xffffffffffffffff;
1191     parity_end = 0;
1192 
1193     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1194         if (stripes[i].start != 0 || stripes[i].end != 0) {
1195             parity_start = min(stripes[i].start, parity_start);
1196             parity_end = max(stripes[i].end, parity_end);
1197         }
1198     }
1199 
1200     if (parity_end == parity_start) {
1201         Status = STATUS_SUCCESS;
1202         goto exit;
1203     }
1204 
1205     parity = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1206     stripes[parity].start = parity_start;
1207 
1208     parity = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1209     stripes[parity].end = parity_end;
1210 
1211     log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1212     if (!log_stripes) {
1213         ERR("out of memory\n");
1214         Status = STATUS_INSUFFICIENT_RESOURCES;
1215         goto exit;
1216     }
1217 
1218     RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1219 
1220     for (i = 0; i < num_data_stripes; i++) {
1221         log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1222         if (!log_stripes[i].mdl) {
1223             ERR("out of memory\n");
1224             Status = STATUS_INSUFFICIENT_RESOURCES;
1225             goto exit;
1226         }
1227 
1228         log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1229         log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1230     }
1231 
1232     wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1233     if (!wtc->parity1) {
1234         ERR("out of memory\n");
1235         Status = STATUS_INSUFFICIENT_RESOURCES;
1236         goto exit;
1237     }
1238 
1239     wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1240     if (!wtc->parity1_mdl) {
1241         ERR("out of memory\n");
1242         Status = STATUS_INSUFFICIENT_RESOURCES;
1243         goto exit;
1244     }
1245 
1246     MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1247 
1248     if (file_write)
1249         master_mdl = Irp->MdlAddress;
1250     else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1251         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1252         if (!wtc->scratch) {
1253             ERR("out of memory\n");
1254             Status = STATUS_INSUFFICIENT_RESOURCES;
1255             goto exit;
1256         }
1257 
1258         RtlCopyMemory(wtc->scratch, data, length);
1259 
1260         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1261         if (!master_mdl) {
1262             ERR("out of memory\n");
1263             Status = STATUS_INSUFFICIENT_RESOURCES;
1264             goto exit;
1265         }
1266 
1267         MmBuildMdlForNonPagedPool(master_mdl);
1268 
1269         wtc->mdl = master_mdl;
1270     } else {
1271         master_mdl = IoAllocateMdl(data, length, false, false, NULL);
1272         if (!master_mdl) {
1273             ERR("out of memory\n");
1274             Status = STATUS_INSUFFICIENT_RESOURCES;
1275             goto exit;
1276         }
1277 
1278         Status = STATUS_SUCCESS;
1279 
1280         _SEH2_TRY {
1281             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
1282         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1283             Status = _SEH2_GetExceptionCode();
1284         } _SEH2_END;
1285 
1286         if (!NT_SUCCESS(Status)) {
1287             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1288             IoFreeMdl(master_mdl);
1289             return Status;
1290         }
1291 
1292         wtc->mdl = master_mdl;
1293     }
1294 
1295     pfns = (PFN_NUMBER*)(master_mdl + 1);
1296     parity_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1297 
1298     if (file_write)
1299         pfns = &pfns[irp_offset >> PAGE_SHIFT];
1300 
1301     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1302         if (stripes[i].start != stripes[i].end) {
1303             stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1304             if (!stripes[i].mdl) {
1305                 ERR("IoAllocateMdl failed\n");
1306                 Status = STATUS_INSUFFICIENT_RESOURCES;
1307                 goto exit;
1308             }
1309         }
1310     }
1311 
1312     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1313     if (!stripeoff) {
1314         ERR("out of memory\n");
1315         Status = STATUS_INSUFFICIENT_RESOURCES;
1316         goto exit;
1317     }
1318 
1319     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1320 
1321     pos = 0;
1322     parity_pos = 0;
1323 
1324     while (pos < length) {
1325         PFN_NUMBER* stripe_pfns;
1326 
1327         parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1328 
1329         if (pos == 0) {
1330             uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1331             uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1332                                                             c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1333             uint32_t maxwritelen = writelen;
1334 
1335             stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1336 
1337             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1338 
1339             RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1340             log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1341 
1342             stripeoff[stripe] = writelen;
1343             pos += writelen;
1344 
1345             stripe = (stripe + 1) % c->chunk_item->num_stripes;
1346             i = startoffstripe + 1;
1347 
1348             while (stripe != parity) {
1349                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1350                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1351 
1352                 if (writelen == 0)
1353                     break;
1354 
1355                 if (writelen > maxwritelen)
1356                     maxwritelen = writelen;
1357 
1358                 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1359 
1360                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1361                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1362 
1363                 stripeoff[stripe] = writelen;
1364                 pos += writelen;
1365 
1366                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1367                 i++;
1368             }
1369 
1370             stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1371 
1372             RtlCopyMemory(stripe_pfns, parity_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1373             stripeoff[parity] = maxwritelen;
1374             parity_pos = maxwritelen;
1375         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1376             uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1377 
1378             i = 0;
1379             while (stripe != parity) {
1380                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1381 
1382                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1383 
1384                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1385                 log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1386 
1387                 stripeoff[stripe] += c->chunk_item->stripe_length;
1388                 pos += c->chunk_item->stripe_length;
1389 
1390                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1391                 i++;
1392             }
1393 
1394             stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1395 
1396             RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1397             stripeoff[parity] += c->chunk_item->stripe_length;
1398             parity_pos += c->chunk_item->stripe_length;
1399         } else {
1400             uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1401             uint32_t writelen, maxwritelen = 0;
1402 
1403             i = 0;
1404             while (pos < length) {
1405                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1406                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1407 
1408                 if (writelen == 0)
1409                     break;
1410 
1411                 if (writelen > maxwritelen)
1412                     maxwritelen = writelen;
1413 
1414                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1415 
1416                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1417                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1418 
1419                 stripeoff[stripe] += writelen;
1420                 pos += writelen;
1421 
1422                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1423                 i++;
1424             }
1425 
1426             stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1427 
1428             RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1429         }
1430     }
1431 
1432     for (i = 0; i < num_data_stripes; i++) {
1433         uint8_t* ss = MmGetSystemAddressForMdlSafe(log_stripes[i].mdl, priority);
1434 
1435         if (i == 0)
1436             RtlCopyMemory(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1437         else
1438             do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1439     }
1440 
1441     Status = STATUS_SUCCESS;
1442 
1443 exit:
1444     if (log_stripes) {
1445         for (i = 0; i < num_data_stripes; i++) {
1446             if (log_stripes[i].mdl)
1447                 IoFreeMdl(log_stripes[i].mdl);
1448         }
1449 
1450         ExFreePool(log_stripes);
1451     }
1452 
1453     if (stripeoff)
1454         ExFreePool(stripeoff);
1455 
1456     return Status;
1457 }
1458 
1459 static NTSTATUS prepare_raid6_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1460                                     uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1461     uint64_t startoff, endoff, parity_start, parity_end;
1462     uint16_t startoffstripe, endoffstripe, parity1, num_data_stripes = c->chunk_item->num_stripes - 2;
1463     uint64_t pos, parity_pos, *stripeoff = NULL;
1464     uint32_t i;
1465     bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1466     PMDL master_mdl;
1467     NTSTATUS Status;
1468     PFN_NUMBER *pfns, *parity1_pfns, *parity2_pfns;
1469     log_stripe* log_stripes = NULL;
1470 
1471     if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1472         uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1473 
1474         delta = min(length, delta);
1475         Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + length - delta);
1476         if (!NT_SUCCESS(Status)) {
1477             ERR("add_partial_stripe returned %08lx\n", Status);
1478             goto exit;
1479         }
1480 
1481         length -= (uint32_t)delta;
1482     }
1483 
1484     if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1485         uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1486 
1487         Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, data);
1488         if (!NT_SUCCESS(Status)) {
1489             ERR("add_partial_stripe returned %08lx\n", Status);
1490             goto exit;
1491         }
1492 
1493         address += delta;
1494         length -= (uint32_t)delta;
1495         irp_offset += delta;
1496         data = (uint8_t*)data + delta;
1497     }
1498 
1499     if (length == 0) {
1500         Status = STATUS_SUCCESS;
1501         goto exit;
1502     }
1503 
1504     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1505     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1506 
1507     pos = 0;
1508     while (pos < length) {
1509         parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1510 
1511         if (pos == 0) {
1512             uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes;
1513             uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1514             ULONG skip, writelen;
1515 
1516             i = startoffstripe;
1517             while (stripe != parity1) {
1518                 if (i == startoffstripe) {
1519                     writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1520 
1521                     stripes[stripe].start = startoff;
1522                     stripes[stripe].end = startoff + writelen;
1523 
1524                     pos += writelen;
1525 
1526                     if (pos == length)
1527                         break;
1528                 } else {
1529                     writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1530 
1531                     stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1532                     stripes[stripe].end = stripes[stripe].start + writelen;
1533 
1534                     pos += writelen;
1535 
1536                     if (pos == length)
1537                         break;
1538                 }
1539 
1540                 i++;
1541                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1542             }
1543 
1544             if (pos == length)
1545                 break;
1546 
1547             for (i = 0; i < startoffstripe; i++) {
1548                 stripe = (parity1 + i + 2) % c->chunk_item->num_stripes;
1549 
1550                 stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1551             }
1552 
1553             stripes[parity1].start = stripes[parity1].end = stripes[parity2].start = stripes[parity2].end =
1554                 startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1555 
1556             if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1557                 skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1558 
1559                 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1560                     stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1561                 }
1562 
1563                 pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1564             }
1565         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1566             for (i = 0; i < c->chunk_item->num_stripes; i++) {
1567                 stripes[i].end += c->chunk_item->stripe_length;
1568             }
1569 
1570             pos += c->chunk_item->stripe_length * num_data_stripes;
1571         } else {
1572             uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1573 
1574             i = 0;
1575             while (stripe != parity1) {
1576                 if (endoffstripe == i) {
1577                     stripes[stripe].end = endoff + 1;
1578                     break;
1579                 } else if (endoffstripe > i)
1580                     stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1581 
1582                 i++;
1583                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1584             }
1585 
1586             break;
1587         }
1588     }
1589 
1590     parity_start = 0xffffffffffffffff;
1591     parity_end = 0;
1592 
1593     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1594         if (stripes[i].start != 0 || stripes[i].end != 0) {
1595             parity_start = min(stripes[i].start, parity_start);
1596             parity_end = max(stripes[i].end, parity_end);
1597         }
1598     }
1599 
1600     if (parity_end == parity_start) {
1601         Status = STATUS_SUCCESS;
1602         goto exit;
1603     }
1604 
1605     parity1 = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1606     stripes[parity1].start = stripes[(parity1 + 1) % c->chunk_item->num_stripes].start = parity_start;
1607 
1608     parity1 = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1609     stripes[parity1].end = stripes[(parity1 + 1) % c->chunk_item->num_stripes].end = parity_end;
1610 
1611     log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1612     if (!log_stripes) {
1613         ERR("out of memory\n");
1614         Status = STATUS_INSUFFICIENT_RESOURCES;
1615         goto exit;
1616     }
1617 
1618     RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1619 
1620     for (i = 0; i < num_data_stripes; i++) {
1621         log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1622         if (!log_stripes[i].mdl) {
1623             ERR("out of memory\n");
1624             Status = STATUS_INSUFFICIENT_RESOURCES;
1625             goto exit;
1626         }
1627 
1628         log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1629         log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1630     }
1631 
1632     wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1633     if (!wtc->parity1) {
1634         ERR("out of memory\n");
1635         Status = STATUS_INSUFFICIENT_RESOURCES;
1636         goto exit;
1637     }
1638 
1639     wtc->parity2 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1640     if (!wtc->parity2) {
1641         ERR("out of memory\n");
1642         Status = STATUS_INSUFFICIENT_RESOURCES;
1643         goto exit;
1644     }
1645 
1646     wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1647     if (!wtc->parity1_mdl) {
1648         ERR("out of memory\n");
1649         Status = STATUS_INSUFFICIENT_RESOURCES;
1650         goto exit;
1651     }
1652 
1653     MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1654 
1655     wtc->parity2_mdl = IoAllocateMdl(wtc->parity2, (ULONG)(parity_end - parity_start), false, false, NULL);
1656     if (!wtc->parity2_mdl) {
1657         ERR("out of memory\n");
1658         Status = STATUS_INSUFFICIENT_RESOURCES;
1659         goto exit;
1660     }
1661 
1662     MmBuildMdlForNonPagedPool(wtc->parity2_mdl);
1663 
1664     if (file_write)
1665         master_mdl = Irp->MdlAddress;
1666     else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1667         wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1668         if (!wtc->scratch) {
1669             ERR("out of memory\n");
1670             Status = STATUS_INSUFFICIENT_RESOURCES;
1671             goto exit;
1672         }
1673 
1674         RtlCopyMemory(wtc->scratch, data, length);
1675 
1676         master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1677         if (!master_mdl) {
1678             ERR("out of memory\n");
1679             Status = STATUS_INSUFFICIENT_RESOURCES;
1680             goto exit;
1681         }
1682 
1683         MmBuildMdlForNonPagedPool(master_mdl);
1684 
1685         wtc->mdl = master_mdl;
1686     } else {
1687         master_mdl = IoAllocateMdl(data, length, false, false, NULL);
1688         if (!master_mdl) {
1689             ERR("out of memory\n");
1690             Status = STATUS_INSUFFICIENT_RESOURCES;
1691             goto exit;
1692         }
1693 
1694         Status = STATUS_SUCCESS;
1695 
1696         _SEH2_TRY {
1697             MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
1698         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1699             Status = _SEH2_GetExceptionCode();
1700         } _SEH2_END;
1701 
1702         if (!NT_SUCCESS(Status)) {
1703             ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1704             IoFreeMdl(master_mdl);
1705             goto exit;
1706         }
1707 
1708         wtc->mdl = master_mdl;
1709     }
1710 
1711     pfns = (PFN_NUMBER*)(master_mdl + 1);
1712     parity1_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1713     parity2_pfns = (PFN_NUMBER*)(wtc->parity2_mdl + 1);
1714 
1715     if (file_write)
1716         pfns = &pfns[irp_offset >> PAGE_SHIFT];
1717 
1718     for (i = 0; i < c->chunk_item->num_stripes; i++) {
1719         if (stripes[i].start != stripes[i].end) {
1720             stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1721             if (!stripes[i].mdl) {
1722                 ERR("IoAllocateMdl failed\n");
1723                 Status = STATUS_INSUFFICIENT_RESOURCES;
1724                 goto exit;
1725             }
1726         }
1727     }
1728 
1729     stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1730     if (!stripeoff) {
1731         ERR("out of memory\n");
1732         Status = STATUS_INSUFFICIENT_RESOURCES;
1733         goto exit;
1734     }
1735 
1736     RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1737 
1738     pos = 0;
1739     parity_pos = 0;
1740 
1741     while (pos < length) {
1742         PFN_NUMBER* stripe_pfns;
1743 
1744         parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1745 
1746         if (pos == 0) {
1747             uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes, parity2;
1748             uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1749                                                             c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1750             uint32_t maxwritelen = writelen;
1751 
1752             stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1753 
1754             RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1755 
1756             RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1757             log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1758 
1759             stripeoff[stripe] = writelen;
1760             pos += writelen;
1761 
1762             stripe = (stripe + 1) % c->chunk_item->num_stripes;
1763             i = startoffstripe + 1;
1764 
1765             while (stripe != parity1) {
1766                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1767                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1768 
1769                 if (writelen == 0)
1770                     break;
1771 
1772                 if (writelen > maxwritelen)
1773                     maxwritelen = writelen;
1774 
1775                 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1776 
1777                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1778                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1779 
1780                 stripeoff[stripe] = writelen;
1781                 pos += writelen;
1782 
1783                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1784                 i++;
1785             }
1786 
1787             stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1788             RtlCopyMemory(stripe_pfns, parity1_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1789             stripeoff[parity1] = maxwritelen;
1790 
1791             parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1792 
1793             stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1794             RtlCopyMemory(stripe_pfns, parity2_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1795             stripeoff[parity2] = maxwritelen;
1796 
1797             parity_pos = maxwritelen;
1798         } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1799             uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1800 
1801             i = 0;
1802             while (stripe != parity1) {
1803                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1804 
1805                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1806 
1807                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1808                 log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1809 
1810                 stripeoff[stripe] += c->chunk_item->stripe_length;
1811                 pos += c->chunk_item->stripe_length;
1812 
1813                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1814                 i++;
1815             }
1816 
1817             stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1818             RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1819             stripeoff[parity1] += c->chunk_item->stripe_length;
1820 
1821             parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1822 
1823             stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1824             RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1825             stripeoff[parity2] += c->chunk_item->stripe_length;
1826 
1827             parity_pos += c->chunk_item->stripe_length;
1828         } else {
1829             uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1830             uint32_t writelen, maxwritelen = 0;
1831 
1832             i = 0;
1833             while (pos < length) {
1834                 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1835                 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1836 
1837                 if (writelen == 0)
1838                     break;
1839 
1840                 if (writelen > maxwritelen)
1841                     maxwritelen = writelen;
1842 
1843                 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1844 
1845                 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1846                 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1847 
1848                 stripeoff[stripe] += writelen;
1849                 pos += writelen;
1850 
1851                 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1852                 i++;
1853             }
1854 
1855             stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1856             RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1857 
1858             parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1859 
1860             stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1861             RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1862         }
1863     }
1864 
1865     for (i = 0; i < num_data_stripes; i++) {
1866         uint8_t* ss = MmGetSystemAddressForMdlSafe(log_stripes[c->chunk_item->num_stripes - 3 - i].mdl, priority);
1867 
1868         if (i == 0) {
1869             RtlCopyMemory(wtc->parity1, ss, (ULONG)(parity_end - parity_start));
1870             RtlCopyMemory(wtc->parity2, ss, (ULONG)(parity_end - parity_start));
1871         } else {
1872             do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1873 
1874             galois_double(wtc->parity2, (uint32_t)(parity_end - parity_start));
1875             do_xor(wtc->parity2, ss, (uint32_t)(parity_end - parity_start));
1876         }
1877     }
1878 
1879     Status = STATUS_SUCCESS;
1880 
1881 exit:
1882     if (log_stripes) {
1883         for (i = 0; i < num_data_stripes; i++) {
1884             if (log_stripes[i].mdl)
1885                 IoFreeMdl(log_stripes[i].mdl);
1886         }
1887 
1888         ExFreePool(log_stripes);
1889     }
1890 
1891     if (stripeoff)
1892         ExFreePool(stripeoff);
1893 
1894     return Status;
1895 }
1896 
1897 NTSTATUS write_data(_In_ device_extension* Vcb, _In_ uint64_t address, _In_reads_bytes_(length) void* data, _In_ uint32_t length, _In_ write_data_context* wtc,
1898                     _In_opt_ PIRP Irp, _In_opt_ chunk* c, _In_ bool file_write, _In_ uint64_t irp_offset, _In_ ULONG priority) {
1899     NTSTATUS Status;
1900     uint32_t i;
1901     CHUNK_ITEM_STRIPE* cis;
1902     write_stripe* stripes = NULL;
1903     uint64_t total_writing = 0;
1904     ULONG allowed_missing, missing;
1905 
1906     TRACE("(%p, %I64x, %p, %x)\n", Vcb, address, data, length);
1907 
1908     if (!c) {
1909         c = get_chunk_from_address(Vcb, address);
1910         if (!c) {
1911             ERR("could not get chunk for address %I64x\n", address);
1912             return STATUS_INTERNAL_ERROR;
1913         }
1914     }
1915 
1916     stripes = ExAllocatePoolWithTag(PagedPool, sizeof(write_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1917     if (!stripes) {
1918         ERR("out of memory\n");
1919         return STATUS_INSUFFICIENT_RESOURCES;
1920     }
1921 
1922     RtlZeroMemory(stripes, sizeof(write_stripe) * c->chunk_item->num_stripes);
1923 
1924     cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1925 
1926     if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
1927         Status = prepare_raid0_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1928         if (!NT_SUCCESS(Status)) {
1929             ERR("prepare_raid0_write returned %08lx\n", Status);
1930             goto prepare_failed;
1931         }
1932 
1933         allowed_missing = 0;
1934     } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
1935         Status = prepare_raid10_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1936         if (!NT_SUCCESS(Status)) {
1937             ERR("prepare_raid10_write returned %08lx\n", Status);
1938             goto prepare_failed;
1939         }
1940 
1941         allowed_missing = 1;
1942     } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
1943         Status = prepare_raid5_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1944         if (!NT_SUCCESS(Status)) {
1945             ERR("prepare_raid5_write returned %08lx\n", Status);
1946             goto prepare_failed;
1947         }
1948 
1949         allowed_missing = 1;
1950     } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
1951         Status = prepare_raid6_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1952         if (!NT_SUCCESS(Status)) {
1953             ERR("prepare_raid6_write returned %08lx\n", Status);
1954             goto prepare_failed;
1955         }
1956 
1957         allowed_missing = 2;
1958     } else {  // write same data to every location - SINGLE, DUP, RAID1, RAID1C3, RAID1C4
1959         for (i = 0; i < c->chunk_item->num_stripes; i++) {
1960             stripes[i].start = address - c->offset;
1961             stripes[i].end = stripes[i].start + length;
1962             stripes[i].data = data;
1963             stripes[i].irp_offset = irp_offset;
1964 
1965             if (c->devices[i]->devobj) {
1966                 if (file_write) {
1967                     uint8_t* va;
1968                     ULONG writelen = (ULONG)(stripes[i].end - stripes[i].start);
1969 
1970                     va = (uint8_t*)MmGetMdlVirtualAddress(Irp->MdlAddress) + stripes[i].irp_offset;
1971 
1972                     stripes[i].mdl = IoAllocateMdl(va, writelen, false, false, NULL);
1973                     if (!stripes[i].mdl) {
1974                         ERR("IoAllocateMdl failed\n");
1975                         Status = STATUS_INSUFFICIENT_RESOURCES;
1976                         goto prepare_failed;
1977                     }
1978 
1979                     IoBuildPartialMdl(Irp->MdlAddress, stripes[i].mdl, va, writelen);
1980                 } else {
1981                     stripes[i].mdl = IoAllocateMdl(stripes[i].data, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1982                     if (!stripes[i].mdl) {
1983                         ERR("IoAllocateMdl failed\n");
1984                         Status = STATUS_INSUFFICIENT_RESOURCES;
1985                         goto prepare_failed;
1986                     }
1987 
1988                     Status = STATUS_SUCCESS;
1989 
1990                     _SEH2_TRY {
1991                         MmProbeAndLockPages(stripes[i].mdl, KernelMode, IoReadAccess);
1992                     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1993                         Status = _SEH2_GetExceptionCode();
1994                     } _SEH2_END;
1995 
1996                     if (!NT_SUCCESS(Status)) {
1997                         ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1998                         IoFreeMdl(stripes[i].mdl);
1999                         stripes[i].mdl = NULL;
2000                         goto prepare_failed;
2001                     }
2002                 }
2003             }
2004         }
2005 
2006         allowed_missing = c->chunk_item->num_stripes - 1;
2007     }
2008 
2009     missing = 0;
2010     for (i = 0; i < c->chunk_item->num_stripes; i++) {
2011         if (!c->devices[i]->devobj)
2012             missing++;
2013     }
2014 
2015     if (missing > allowed_missing) {
2016         ERR("cannot write as %lu missing devices (maximum %lu)\n", missing, allowed_missing);
2017         Status = STATUS_DEVICE_NOT_READY;
2018         goto prepare_failed;
2019     }
2020 
2021     for (i = 0; i < c->chunk_item->num_stripes; i++) {
2022         write_data_stripe* stripe;
2023         PIO_STACK_LOCATION IrpSp;
2024 
2025         stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG);
2026         if (!stripe) {
2027             ERR("out of memory\n");
2028             Status = STATUS_INSUFFICIENT_RESOURCES;
2029             goto end;
2030         }
2031 
2032         if (stripes[i].start == stripes[i].end || !c->devices[i]->devobj) {
2033             stripe->status = WriteDataStatus_Ignore;
2034             stripe->Irp = NULL;
2035             stripe->buf = stripes[i].data;
2036             stripe->mdl = NULL;
2037         } else {
2038             stripe->context = (struct _write_data_context*)wtc;
2039             stripe->buf = stripes[i].data;
2040             stripe->device = c->devices[i];
2041             RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
2042             stripe->status = WriteDataStatus_Pending;
2043             stripe->mdl = stripes[i].mdl;
2044 
2045             if (!Irp) {
2046                 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, false);
2047 
2048                 if (!stripe->Irp) {
2049                     ERR("IoAllocateIrp failed\n");
2050                     ExFreePool(stripe);
2051                     Status = STATUS_INSUFFICIENT_RESOURCES;
2052                     goto end;
2053                 }
2054             } else {
2055                 stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize);
2056 
2057                 if (!stripe->Irp) {
2058                     ERR("IoMakeAssociatedIrp failed\n");
2059                     ExFreePool(stripe);
2060                     Status = STATUS_INSUFFICIENT_RESOURCES;
2061                     goto end;
2062                 }
2063             }
2064 
2065             IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2066             IrpSp->MajorFunction = IRP_MJ_WRITE;
2067             IrpSp->FileObject = stripe->device->fileobj;
2068 
2069             if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
2070                 stripe->Irp->AssociatedIrp.SystemBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2071 
2072                 stripe->Irp->Flags = IRP_BUFFERED_IO;
2073             } else if (stripe->device->devobj->Flags & DO_DIRECT_IO)
2074                 stripe->Irp->MdlAddress = stripe->mdl;
2075             else
2076                 stripe->Irp->UserBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2077 
2078 #ifdef DEBUG_PARANOID
2079             if (stripes[i].end < stripes[i].start) {
2080                 ERR("trying to write stripe with negative length (%I64x < %I64x)\n", stripes[i].end, stripes[i].start);
2081                 int3;
2082             }
2083 #endif
2084 
2085             IrpSp->Parameters.Write.Length = (ULONG)(stripes[i].end - stripes[i].start);
2086             IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset;
2087 
2088             total_writing += IrpSp->Parameters.Write.Length;
2089 
2090             stripe->Irp->UserIosb = &stripe->iosb;
2091             wtc->stripes_left++;
2092 
2093             IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, true, true, true);
2094         }
2095 
2096         InsertTailList(&wtc->stripes, &stripe->list_entry);
2097     }
2098 
2099     if (diskacc)
2100         fFsRtlUpdateDiskCounters(0, total_writing);
2101 
2102     Status = STATUS_SUCCESS;
2103 
2104 end:
2105 
2106     if (stripes) ExFreePool(stripes);
2107 
2108     if (!NT_SUCCESS(Status))
2109         free_write_data_stripes(wtc);
2110 
2111     return Status;
2112 
2113 prepare_failed:
2114     for (i = 0; i < c->chunk_item->num_stripes; i++) {
2115         if (stripes[i].mdl && (i == 0 || stripes[i].mdl != stripes[i-1].mdl)) {
2116             if (stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2117                 MmUnlockPages(stripes[i].mdl);
2118 
2119             IoFreeMdl(stripes[i].mdl);
2120         }
2121     }
2122 
2123     if (wtc->parity1_mdl) {
2124         if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2125             MmUnlockPages(wtc->parity1_mdl);
2126 
2127         IoFreeMdl(wtc->parity1_mdl);
2128         wtc->parity1_mdl = NULL;
2129     }
2130 
2131     if (wtc->parity2_mdl) {
2132         if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2133             MmUnlockPages(wtc->parity2_mdl);
2134 
2135         IoFreeMdl(wtc->parity2_mdl);
2136         wtc->parity2_mdl = NULL;
2137     }
2138 
2139     if (wtc->mdl) {
2140         if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2141             MmUnlockPages(wtc->mdl);
2142 
2143         IoFreeMdl(wtc->mdl);
2144         wtc->mdl = NULL;
2145     }
2146 
2147     if (wtc->parity1) {
2148         ExFreePool(wtc->parity1);
2149         wtc->parity1 = NULL;
2150     }
2151 
2152     if (wtc->parity2) {
2153         ExFreePool(wtc->parity2);
2154         wtc->parity2 = NULL;
2155     }
2156 
2157     if (wtc->scratch) {
2158         ExFreePool(wtc->scratch);
2159         wtc->scratch = NULL;
2160     }
2161 
2162     ExFreePool(stripes);
2163     return Status;
2164 }
2165 
2166 void get_raid56_lock_range(chunk* c, uint64_t address, uint64_t length, uint64_t* lockaddr, uint64_t* locklen) {
2167     uint64_t startoff, endoff;
2168     uint16_t startoffstripe, endoffstripe, datastripes;
2169 
2170     datastripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
2171 
2172     get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, datastripes, &startoff, &startoffstripe);
2173     get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, datastripes, &endoff, &endoffstripe);
2174 
2175     startoff -= startoff % c->chunk_item->stripe_length;
2176     endoff = sector_align(endoff, c->chunk_item->stripe_length);
2177 
2178     *lockaddr = c->offset + (startoff * datastripes);
2179     *locklen = (endoff - startoff) * datastripes;
2180 }
2181 
2182 NTSTATUS write_data_complete(device_extension* Vcb, uint64_t address, void* data, uint32_t length, PIRP Irp, chunk* c, bool file_write, uint64_t irp_offset, ULONG priority) {
2183     write_data_context wtc;
2184     NTSTATUS Status;
2185     uint64_t lockaddr, locklen;
2186 
2187     KeInitializeEvent(&wtc.Event, NotificationEvent, false);
2188     InitializeListHead(&wtc.stripes);
2189     wtc.stripes_left = 0;
2190     wtc.parity1 = wtc.parity2 = wtc.scratch = NULL;
2191     wtc.mdl = wtc.parity1_mdl = wtc.parity2_mdl = NULL;
2192 
2193     if (!c) {
2194         c = get_chunk_from_address(Vcb, address);
2195         if (!c) {
2196             ERR("could not get chunk for address %I64x\n", address);
2197             return STATUS_INTERNAL_ERROR;
2198         }
2199     }
2200 
2201     if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
2202         get_raid56_lock_range(c, address, length, &lockaddr, &locklen);
2203         chunk_lock_range(Vcb, c, lockaddr, locklen);
2204     }
2205 
2206     _SEH2_TRY {
2207         Status = write_data(Vcb, address, data, length, &wtc, Irp, c, file_write, irp_offset, priority);
2208     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2209         Status = _SEH2_GetExceptionCode();
2210     } _SEH2_END;
2211 
2212     if (!NT_SUCCESS(Status)) {
2213         ERR("write_data returned %08lx\n", Status);
2214 
2215         if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2216             chunk_unlock_range(Vcb, c, lockaddr, locklen);
2217 
2218         free_write_data_stripes(&wtc);
2219         return Status;
2220     }
2221 
2222     if (wtc.stripes.Flink != &wtc.stripes) {
2223         // launch writes and wait
2224         LIST_ENTRY* le = wtc.stripes.Flink;
2225         bool no_wait = true;
2226 
2227         while (le != &wtc.stripes) {
2228             write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2229 
2230             if (stripe->status != WriteDataStatus_Ignore) {
2231                 IoCallDriver(stripe->device->devobj, stripe->Irp);
2232                 no_wait = false;
2233             }
2234 
2235             le = le->Flink;
2236         }
2237 
2238         if (!no_wait)
2239             KeWaitForSingleObject(&wtc.Event, Executive, KernelMode, false, NULL);
2240 
2241         le = wtc.stripes.Flink;
2242         while (le != &wtc.stripes) {
2243             write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2244 
2245             if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
2246                 Status = stripe->iosb.Status;
2247 
2248                 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2249                 break;
2250             }
2251 
2252             le = le->Flink;
2253         }
2254 
2255         free_write_data_stripes(&wtc);
2256     }
2257 
2258     if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2259         chunk_unlock_range(Vcb, c, lockaddr, locklen);
2260 
2261     return Status;
2262 }
2263 
2264 _Function_class_(IO_COMPLETION_ROUTINE)
2265 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2266     write_data_stripe* stripe = conptr;
2267     write_data_context* context = (write_data_context*)stripe->context;
2268     LIST_ENTRY* le;
2269 
2270     UNUSED(DeviceObject);
2271 
2272     // FIXME - we need a lock here
2273 
2274     if (stripe->status == WriteDataStatus_Cancelling) {
2275         stripe->status = WriteDataStatus_Cancelled;
2276         goto end;
2277     }
2278 
2279     stripe->iosb = Irp->IoStatus;
2280 
2281     if (NT_SUCCESS(Irp->IoStatus.Status)) {
2282         stripe->status = WriteDataStatus_Success;
2283     } else {
2284         le = context->stripes.Flink;
2285 
2286         stripe->status = WriteDataStatus_Error;
2287 
2288         while (le != &context->stripes) {
2289             write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2290 
2291             if (s2->status == WriteDataStatus_Pending) {
2292                 s2->status = WriteDataStatus_Cancelling;
2293                 IoCancelIrp(s2->Irp);
2294             }
2295 
2296             le = le->Flink;
2297         }
2298     }
2299 
2300 end:
2301     if (InterlockedDecrement(&context->stripes_left) == 0)
2302         KeSetEvent(&context->Event, 0, false);
2303 
2304     return STATUS_MORE_PROCESSING_REQUIRED;
2305 }
2306 
2307 void free_write_data_stripes(write_data_context* wtc) {
2308     LIST_ENTRY* le;
2309     PMDL last_mdl = NULL;
2310 
2311     if (wtc->parity1_mdl) {
2312         if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2313             MmUnlockPages(wtc->parity1_mdl);
2314 
2315         IoFreeMdl(wtc->parity1_mdl);
2316     }
2317 
2318     if (wtc->parity2_mdl) {
2319         if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2320             MmUnlockPages(wtc->parity2_mdl);
2321 
2322         IoFreeMdl(wtc->parity2_mdl);
2323     }
2324 
2325     if (wtc->mdl) {
2326         if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2327             MmUnlockPages(wtc->mdl);
2328 
2329         IoFreeMdl(wtc->mdl);
2330     }
2331 
2332     if (wtc->parity1)
2333         ExFreePool(wtc->parity1);
2334 
2335     if (wtc->parity2)
2336         ExFreePool(wtc->parity2);
2337 
2338     if (wtc->scratch)
2339         ExFreePool(wtc->scratch);
2340 
2341     le = wtc->stripes.Flink;
2342     while (le != &wtc->stripes) {
2343         write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2344 
2345         if (stripe->mdl && stripe->mdl != last_mdl) {
2346             if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2347                 MmUnlockPages(stripe->mdl);
2348 
2349             IoFreeMdl(stripe->mdl);
2350         }
2351 
2352         last_mdl = stripe->mdl;
2353 
2354         if (stripe->Irp)
2355             IoFreeIrp(stripe->Irp);
2356 
2357         le = le->Flink;
2358     }
2359 
2360     while (!IsListEmpty(&wtc->stripes)) {
2361         write_data_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&wtc->stripes), write_data_stripe, list_entry);
2362 
2363         ExFreePool(stripe);
2364     }
2365 }
2366 
2367 void add_extent(_In_ fcb* fcb, _In_ LIST_ENTRY* prevextle, _In_ __drv_aliasesMem extent* newext) {
2368     LIST_ENTRY* le = prevextle->Flink;
2369 
2370     while (le != &fcb->extents) {
2371         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2372 
2373         if (ext->offset >= newext->offset) {
2374             InsertHeadList(ext->list_entry.Blink, &newext->list_entry);
2375             return;
2376         }
2377 
2378         le = le->Flink;
2379     }
2380 
2381     InsertTailList(&fcb->extents, &newext->list_entry);
2382 }
2383 
2384 NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t end_data, PIRP Irp, LIST_ENTRY* rollback) {
2385     NTSTATUS Status;
2386     LIST_ENTRY* le;
2387 
2388     le = fcb->extents.Flink;
2389 
2390     while (le != &fcb->extents) {
2391         LIST_ENTRY* le2 = le->Flink;
2392         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2393         EXTENT_DATA* ed = &ext->extent_data;
2394         EXTENT_DATA2* ed2 = NULL;
2395         uint64_t len;
2396 
2397         if (!ext->ignore) {
2398             if (ed->type != EXTENT_TYPE_INLINE)
2399                 ed2 = (EXTENT_DATA2*)ed->data;
2400 
2401             len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
2402 
2403             if (ext->offset < end_data && ext->offset + len > start_data) {
2404                 if (ed->type == EXTENT_TYPE_INLINE) {
2405                     if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2406                         remove_fcb_extent(fcb, ext, rollback);
2407 
2408                         fcb->inode_item.st_blocks -= len;
2409                         fcb->inode_item_changed = true;
2410                     } else {
2411                         ERR("trying to split inline extent\n");
2412 #ifdef DEBUG_PARANOID
2413                         int3;
2414 #endif
2415                         return STATUS_INTERNAL_ERROR;
2416                     }
2417                 } else if (ed->type != EXTENT_TYPE_INLINE) {
2418                     if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2419                         if (ed2->size != 0) {
2420                             chunk* c;
2421 
2422                             fcb->inode_item.st_blocks -= len;
2423                             fcb->inode_item_changed = true;
2424 
2425                             c = get_chunk_from_address(Vcb, ed2->address);
2426 
2427                             if (!c) {
2428                                 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2429                             } else {
2430                                 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
2431                                                                    fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
2432                                 if (!NT_SUCCESS(Status)) {
2433                                     ERR("update_changed_extent_ref returned %08lx\n", Status);
2434                                     goto end;
2435                                 }
2436                             }
2437                         }
2438 
2439                         remove_fcb_extent(fcb, ext, rollback);
2440                     } else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning
2441                         EXTENT_DATA2* ned2;
2442                         extent* newext;
2443 
2444                         if (ed2->size != 0) {
2445                             fcb->inode_item.st_blocks -= end_data - ext->offset;
2446                             fcb->inode_item_changed = true;
2447                         }
2448 
2449                         newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2450                         if (!newext) {
2451                             ERR("out of memory\n");
2452                             Status = STATUS_INSUFFICIENT_RESOURCES;
2453                             goto end;
2454                         }
2455 
2456                         ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2457 
2458                         newext->extent_data.generation = Vcb->superblock.generation;
2459                         newext->extent_data.decoded_size = ed->decoded_size;
2460                         newext->extent_data.compression = ed->compression;
2461                         newext->extent_data.encryption = ed->encryption;
2462                         newext->extent_data.encoding = ed->encoding;
2463                         newext->extent_data.type = ed->type;
2464                         ned2->address = ed2->address;
2465                         ned2->size = ed2->size;
2466                         ned2->offset = ed2->offset + (end_data - ext->offset);
2467                         ned2->num_bytes = ed2->num_bytes - (end_data - ext->offset);
2468 
2469                         newext->offset = end_data;
2470                         newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2471                         newext->unique = ext->unique;
2472                         newext->ignore = false;
2473                         newext->inserted = true;
2474 
2475                         if (ext->csum) {
2476                             if (ed->compression == BTRFS_COMPRESSION_NONE) {
2477                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ned2->num_bytes * Vcb->csum_size / Vcb->superblock.sector_size), ALLOC_TAG);
2478                                 if (!newext->csum) {
2479                                     ERR("out of memory\n");
2480                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2481                                     ExFreePool(newext);
2482                                     goto end;
2483                                 }
2484 
2485                                 RtlCopyMemory(newext->csum, (uint8_t*)ext->csum + ((end_data - ext->offset) * Vcb->csum_size / Vcb->superblock.sector_size),
2486                                               (ULONG)(ned2->num_bytes * Vcb->csum_size / Vcb->superblock.sector_size));
2487                             } else {
2488                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * Vcb->csum_size / Vcb->superblock.sector_size), ALLOC_TAG);
2489                                 if (!newext->csum) {
2490                                     ERR("out of memory\n");
2491                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2492                                     ExFreePool(newext);
2493                                     goto end;
2494                                 }
2495 
2496                                 RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ed2->size * Vcb->csum_size / Vcb->superblock.sector_size));
2497                             }
2498                         } else
2499                             newext->csum = NULL;
2500 
2501                         add_extent(fcb, &ext->list_entry, newext);
2502 
2503                         remove_fcb_extent(fcb, ext, rollback);
2504                     } else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end
2505                         EXTENT_DATA2* ned2;
2506                         extent* newext;
2507 
2508                         if (ed2->size != 0) {
2509                             fcb->inode_item.st_blocks -= ext->offset + len - start_data;
2510                             fcb->inode_item_changed = true;
2511                         }
2512 
2513                         newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2514                         if (!newext) {
2515                             ERR("out of memory\n");
2516                             Status = STATUS_INSUFFICIENT_RESOURCES;
2517                             goto end;
2518                         }
2519 
2520                         ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2521 
2522                         newext->extent_data.generation = Vcb->superblock.generation;
2523                         newext->extent_data.decoded_size = ed->decoded_size;
2524                         newext->extent_data.compression = ed->compression;
2525                         newext->extent_data.encryption = ed->encryption;
2526                         newext->extent_data.encoding = ed->encoding;
2527                         newext->extent_data.type = ed->type;
2528                         ned2->address = ed2->address;
2529                         ned2->size = ed2->size;
2530                         ned2->offset = ed2->offset;
2531                         ned2->num_bytes = start_data - ext->offset;
2532 
2533                         newext->offset = ext->offset;
2534                         newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2535                         newext->unique = ext->unique;
2536                         newext->ignore = false;
2537                         newext->inserted = true;
2538 
2539                         if (ext->csum) {
2540                             if (ed->compression == BTRFS_COMPRESSION_NONE) {
2541                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ned2->num_bytes * Vcb->csum_size / Vcb->superblock.sector_size), ALLOC_TAG);
2542                                 if (!newext->csum) {
2543                                     ERR("out of memory\n");
2544                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2545                                     ExFreePool(newext);
2546                                     goto end;
2547                                 }
2548 
2549                                 RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ned2->num_bytes * Vcb->csum_size / Vcb->superblock.sector_size));
2550                             } else {
2551                                 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * Vcb->csum_size / Vcb->superblock.sector_size), ALLOC_TAG);
2552                                 if (!newext->csum) {
2553                                     ERR("out of memory\n");
2554                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2555                                     ExFreePool(newext);
2556                                     goto end;
2557                                 }
2558 
2559                                 RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ed2->size * Vcb->csum_size / Vcb->superblock.sector_size));
2560                             }
2561                         } else
2562                             newext->csum = NULL;
2563 
2564                         InsertHeadList(&ext->list_entry, &newext->list_entry);
2565 
2566                         remove_fcb_extent(fcb, ext, rollback);
2567                     } else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle
2568                         EXTENT_DATA2 *neda2, *nedb2;
2569                         extent *newext1, *newext2;
2570 
2571                         if (ed2->size != 0) {
2572                             chunk* c;
2573 
2574                             fcb->inode_item.st_blocks -= end_data - start_data;
2575                             fcb->inode_item_changed = true;
2576 
2577                             c = get_chunk_from_address(Vcb, ed2->address);
2578 
2579                             if (!c) {
2580                                 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2581                             } else {
2582                                 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
2583                                                                    fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
2584                                 if (!NT_SUCCESS(Status)) {
2585                                     ERR("update_changed_extent_ref returned %08lx\n", Status);
2586                                     goto end;
2587                                 }
2588                             }
2589                         }
2590 
2591                         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2592                         if (!newext1) {
2593                             ERR("out of memory\n");
2594                             Status = STATUS_INSUFFICIENT_RESOURCES;
2595                             goto end;
2596                         }
2597 
2598                         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2599                         if (!newext2) {
2600                             ERR("out of memory\n");
2601                             Status = STATUS_INSUFFICIENT_RESOURCES;
2602                             ExFreePool(newext1);
2603                             goto end;
2604                         }
2605 
2606                         neda2 = (EXTENT_DATA2*)newext1->extent_data.data;
2607 
2608                         newext1->extent_data.generation = Vcb->superblock.generation;
2609                         newext1->extent_data.decoded_size = ed->decoded_size;
2610                         newext1->extent_data.compression = ed->compression;
2611                         newext1->extent_data.encryption = ed->encryption;
2612                         newext1->extent_data.encoding = ed->encoding;
2613                         newext1->extent_data.type = ed->type;
2614                         neda2->address = ed2->address;
2615                         neda2->size = ed2->size;
2616                         neda2->offset = ed2->offset;
2617                         neda2->num_bytes = start_data - ext->offset;
2618 
2619                         nedb2 = (EXTENT_DATA2*)newext2->extent_data.data;
2620 
2621                         newext2->extent_data.generation = Vcb->superblock.generation;
2622                         newext2->extent_data.decoded_size = ed->decoded_size;
2623                         newext2->extent_data.compression = ed->compression;
2624                         newext2->extent_data.encryption = ed->encryption;
2625                         newext2->extent_data.encoding = ed->encoding;
2626                         newext2->extent_data.type = ed->type;
2627                         nedb2->address = ed2->address;
2628                         nedb2->size = ed2->size;
2629                         nedb2->offset = ed2->offset + (end_data - ext->offset);
2630                         nedb2->num_bytes = ext->offset + len - end_data;
2631 
2632                         newext1->offset = ext->offset;
2633                         newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2634                         newext1->unique = ext->unique;
2635                         newext1->ignore = false;
2636                         newext1->inserted = true;
2637 
2638                         newext2->offset = end_data;
2639                         newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2640                         newext2->unique = ext->unique;
2641                         newext2->ignore = false;
2642                         newext2->inserted = true;
2643 
2644                         if (ext->csum) {
2645                             if (ed->compression == BTRFS_COMPRESSION_NONE) {
2646                                 newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(neda2->num_bytes * Vcb->csum_size / Vcb->superblock.sector_size), ALLOC_TAG);
2647                                 if (!newext1->csum) {
2648                                     ERR("out of memory\n");
2649                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2650                                     ExFreePool(newext1);
2651                                     ExFreePool(newext2);
2652                                     goto end;
2653                                 }
2654 
2655                                 newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(nedb2->num_bytes * Vcb->csum_size / Vcb->superblock.sector_size), ALLOC_TAG);
2656                                 if (!newext2->csum) {
2657                                     ERR("out of memory\n");
2658                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2659                                     ExFreePool(newext1->csum);
2660                                     ExFreePool(newext1);
2661                                     ExFreePool(newext2);
2662                                     goto end;
2663                                 }
2664 
2665                                 RtlCopyMemory(newext1->csum, ext->csum, (ULONG)(neda2->num_bytes * Vcb->csum_size / Vcb->superblock.sector_size));
2666                                 RtlCopyMemory(newext2->csum, (uint8_t*)ext->csum + ((end_data - ext->offset) * Vcb->csum_size / Vcb->superblock.sector_size),
2667                                               (ULONG)(nedb2->num_bytes * Vcb->csum_size / Vcb->superblock.sector_size));
2668                             } else {
2669                                 newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * Vcb->csum_size / Vcb->superblock.sector_size), ALLOC_TAG);
2670                                 if (!newext1->csum) {
2671                                     ERR("out of memory\n");
2672                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2673                                     ExFreePool(newext1);
2674                                     ExFreePool(newext2);
2675                                     goto end;
2676                                 }
2677 
2678                                 newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * Vcb->csum_size / Vcb->superblock.sector_size), ALLOC_TAG);
2679                                 if (!newext2->csum) {
2680                                     ERR("out of memory\n");
2681                                     Status = STATUS_INSUFFICIENT_RESOURCES;
2682                                     ExFreePool(newext1->csum);
2683                                     ExFreePool(newext1);
2684                                     ExFreePool(newext2);
2685                                     goto end;
2686                                 }
2687 
2688                                 RtlCopyMemory(newext1->csum, ext->csum, (ULONG)(ed2->size * Vcb->csum_size / Vcb->superblock.sector_size));
2689                                 RtlCopyMemory(newext2->csum, ext->csum, (ULONG)(ed2->size * Vcb->csum_size / Vcb->superblock.sector_size));
2690                             }
2691                         } else {
2692                             newext1->csum = NULL;
2693                             newext2->csum = NULL;
2694                         }
2695 
2696                         InsertHeadList(&ext->list_entry, &newext1->list_entry);
2697                         add_extent(fcb, &newext1->list_entry, newext2);
2698 
2699                         remove_fcb_extent(fcb, ext, rollback);
2700                     }
2701                 }
2702             }
2703         }
2704 
2705         le = le2;
2706     }
2707 
2708     Status = STATUS_SUCCESS;
2709 
2710 end:
2711     fcb->extents_changed = true;
2712     mark_fcb_dirty(fcb);
2713 
2714     return Status;
2715 }
2716 
2717 void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext) {
2718     rollback_extent* re;
2719 
2720     re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2721     if (!re) {
2722         ERR("out of memory\n");
2723         return;
2724     }
2725 
2726     re->fcb = fcb;
2727     re->ext = ext;
2728 
2729     add_rollback(rollback, ROLLBACK_INSERT_EXTENT, re);
2730 }
2731 
2732 #ifdef _MSC_VER
2733 #pragma warning(push)
2734 #pragma warning(suppress: 28194)
2735 #endif
2736 NTSTATUS add_extent_to_fcb(_In_ fcb* fcb, _In_ uint64_t offset, _In_reads_bytes_(edsize) EXTENT_DATA* ed, _In_ uint16_t edsize,
2737                            _In_ bool unique, _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* csum, _In_ LIST_ENTRY* rollback) {
2738     extent* ext;
2739     LIST_ENTRY* le;
2740 
2741     ext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + edsize, ALLOC_TAG);
2742     if (!ext) {
2743         ERR("out of memory\n");
2744         return STATUS_INSUFFICIENT_RESOURCES;
2745     }
2746 
2747     ext->offset = offset;
2748     ext->datalen = edsize;
2749     ext->unique = unique;
2750     ext->ignore = false;
2751     ext->inserted = true;
2752     ext->csum = csum;
2753 
2754     RtlCopyMemory(&ext->extent_data, ed, edsize);
2755 
2756     le = fcb->extents.Flink;
2757     while (le != &fcb->extents) {
2758         extent* oldext = CONTAINING_RECORD(le, extent, list_entry);
2759 
2760         if (oldext->offset >= offset) {
2761             InsertHeadList(le->Blink, &ext->list_entry);
2762             goto end;
2763         }
2764 
2765         le = le->Flink;
2766     }
2767 
2768     InsertTailList(&fcb->extents, &ext->list_entry);
2769 
2770 end:
2771     add_insert_extent_rollback(rollback, fcb, ext);
2772 
2773     return STATUS_SUCCESS;
2774 }
2775 #ifdef _MSC_VER
2776 #pragma warning(pop)
2777 #endif
2778 
2779 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) {
2780     if (!ext->ignore) {
2781         rollback_extent* re;
2782 
2783         ext->ignore = true;
2784 
2785         re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2786         if (!re) {
2787             ERR("out of memory\n");
2788             return;
2789         }
2790 
2791         re->fcb = fcb;
2792         re->ext = ext;
2793 
2794         add_rollback(rollback, ROLLBACK_DELETE_EXTENT, re);
2795     }
2796 }
2797 
2798 _Requires_lock_held_(c->lock)
2799 _When_(return != 0, _Releases_lock_(c->lock))
2800 bool insert_extent_chunk(_In_ device_extension* Vcb, _In_ fcb* fcb, _In_ chunk* c, _In_ uint64_t start_data, _In_ uint64_t length, _In_ bool prealloc, _In_opt_ void* data,
2801                          _In_opt_ PIRP Irp, _In_ LIST_ENTRY* rollback, _In_ uint8_t compression, _In_ uint64_t decoded_size, _In_ bool file_write, _In_ uint64_t irp_offset) {
2802     uint64_t address;
2803     NTSTATUS Status;
2804     EXTENT_DATA* ed;
2805     EXTENT_DATA2* ed2;
2806     uint16_t edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2));
2807     void* csum = NULL;
2808 
2809     TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %I64x, %u, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, prealloc, data, rollback);
2810 
2811     if (!find_data_address_in_chunk(Vcb, c, length, &address))
2812         return false;
2813 
2814     // add extent data to inode
2815     ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
2816     if (!ed) {
2817         ERR("out of memory\n");
2818         return false;
2819     }
2820 
2821     ed->generation = Vcb->superblock.generation;
2822     ed->decoded_size = decoded_size;
2823     ed->compression = compression;
2824     ed->encryption = BTRFS_ENCRYPTION_NONE;
2825     ed->encoding = BTRFS_ENCODING_NONE;
2826     ed->type = prealloc ? EXTENT_TYPE_PREALLOC : EXTENT_TYPE_REGULAR;
2827 
2828     ed2 = (EXTENT_DATA2*)ed->data;
2829     ed2->address = address;
2830     ed2->size = length;
2831     ed2->offset = 0;
2832     ed2->num_bytes = decoded_size;
2833 
2834     if (!prealloc && data && !(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
2835         ULONG sl = (ULONG)(length / Vcb->superblock.sector_size);
2836 
2837         csum = ExAllocatePoolWithTag(PagedPool, sl * Vcb->csum_size, ALLOC_TAG);
2838         if (!csum) {
2839             ERR("out of memory\n");
2840             ExFreePool(ed);
2841             return false;
2842         }
2843 
2844         do_calc_job(Vcb, data, sl, csum);
2845     }
2846 
2847     Status = add_extent_to_fcb(fcb, start_data, ed, edsize, true, csum, rollback);
2848     if (!NT_SUCCESS(Status)) {
2849         ERR("add_extent_to_fcb returned %08lx\n", Status);
2850         if (csum) ExFreePool(csum);
2851         ExFreePool(ed);
2852         return false;
2853     }
2854 
2855     ExFreePool(ed);
2856 
2857     c->used += length;
2858     space_list_subtract(c, false, address, length, rollback);
2859 
2860     fcb->inode_item.st_blocks += decoded_size;
2861 
2862     fcb->extents_changed = true;
2863     fcb->inode_item_changed = true;
2864     mark_fcb_dirty(fcb);
2865 
2866     ExAcquireResourceExclusiveLite(&c->changed_extents_lock, true);
2867 
2868     add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
2869 
2870     ExReleaseResourceLite(&c->changed_extents_lock);
2871 
2872     release_chunk_lock(c, Vcb);
2873 
2874     if (data) {
2875         Status = write_data_complete(Vcb, address, data, (uint32_t)length, Irp, NULL, file_write, irp_offset,
2876                                      fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
2877         if (!NT_SUCCESS(Status))
2878             ERR("write_data_complete returned %08lx\n", Status);
2879     }
2880 
2881     return true;
2882 }
2883 
2884 static bool try_extend_data(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
2885                             PIRP Irp, uint64_t* written, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
2886     bool success = false;
2887     EXTENT_DATA* ed;
2888     EXTENT_DATA2* ed2;
2889     chunk* c;
2890     LIST_ENTRY* le;
2891     extent* ext = NULL;
2892 
2893     le = fcb->extents.Flink;
2894 
2895     while (le != &fcb->extents) {
2896         extent* nextext = CONTAINING_RECORD(le, extent, list_entry);
2897 
2898         if (!nextext->ignore) {
2899             if (nextext->offset == start_data) {
2900                 ext = nextext;
2901                 break;
2902             } else if (nextext->offset > start_data)
2903                 break;
2904 
2905             ext = nextext;
2906         }
2907 
2908         le = le->Flink;
2909     }
2910 
2911     if (!ext)
2912         return false;
2913 
2914     ed = &ext->extent_data;
2915 
2916     if (ed->type != EXTENT_TYPE_REGULAR && ed->type != EXTENT_TYPE_PREALLOC) {
2917         TRACE("not extending extent which is not regular or prealloc\n");
2918         return false;
2919     }
2920 
2921     ed2 = (EXTENT_DATA2*)ed->data;
2922 
2923     if (ext->offset + ed2->num_bytes != start_data) {
2924         TRACE("last EXTENT_DATA does not run up to start_data (%I64x + %I64x != %I64x)\n", ext->offset, ed2->num_bytes, start_data);
2925         return false;
2926     }
2927 
2928     c = get_chunk_from_address(Vcb, ed2->address);
2929 
2930     if (c->reloc || c->readonly || c->chunk_item->type != Vcb->data_flags)
2931         return false;
2932 
2933     acquire_chunk_lock(c, Vcb);
2934 
2935     if (length > c->chunk_item->size - c->used) {
2936         release_chunk_lock(c, Vcb);
2937         return false;
2938     }
2939 
2940     if (!c->cache_loaded) {
2941         NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
2942 
2943         if (!NT_SUCCESS(Status)) {
2944             ERR("load_cache_chunk returned %08lx\n", Status);
2945             release_chunk_lock(c, Vcb);
2946             return false;
2947         }
2948     }
2949 
2950     le = c->space.Flink;
2951     while (le != &c->space) {
2952         space* s = CONTAINING_RECORD(le, space, list_entry);
2953 
2954         if (s->address == ed2->address + ed2->size) {
2955             uint64_t newlen = min(min(s->size, length), MAX_EXTENT_SIZE);
2956 
2957             success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset);
2958 
2959             if (success)
2960                 *written += newlen;
2961             else
2962                 release_chunk_lock(c, Vcb);
2963 
2964             return success;
2965         } else if (s->address > ed2->address + ed2->size)
2966             break;
2967 
2968         le = le->Flink;
2969     }
2970 
2971     release_chunk_lock(c, Vcb);
2972 
2973     return false;
2974 }
2975 
2976 static NTSTATUS insert_chunk_fragmented(fcb* fcb, uint64_t start, uint64_t length, uint8_t* data, bool prealloc, LIST_ENTRY* rollback) {
2977     LIST_ENTRY* le;
2978     uint64_t flags = fcb->Vcb->data_flags;
2979     bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
2980     NTSTATUS Status;
2981     chunk* c;
2982 
2983     ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
2984 
2985     // first create as many chunks as we can
2986     do {
2987         Status = alloc_chunk(fcb->Vcb, flags, &c, false);
2988     } while (NT_SUCCESS(Status));
2989 
2990     if (Status != STATUS_DISK_FULL) {
2991         ERR("alloc_chunk returned %08lx\n", Status);
2992         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
2993         return Status;
2994     }
2995 
2996     le = fcb->Vcb->chunks.Flink;
2997     while (le != &fcb->Vcb->chunks) {
2998         c = CONTAINING_RECORD(le, chunk, list_entry);
2999 
3000         if (!c->readonly && !c->reloc) {
3001             acquire_chunk_lock(c, fcb->Vcb);
3002 
3003             if (c->chunk_item->type == flags) {
3004                 while (!IsListEmpty(&c->space_size) && length > 0) {
3005                     space* s = CONTAINING_RECORD(c->space_size.Flink, space, list_entry_size);
3006                     uint64_t extlen = min(length, s->size);
3007 
3008                     if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, prealloc && !page_file, data, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3009                         start += extlen;
3010                         length -= extlen;
3011                         if (data) data += extlen;
3012 
3013                         acquire_chunk_lock(c, fcb->Vcb);
3014                     }
3015                 }
3016             }
3017 
3018             release_chunk_lock(c, fcb->Vcb);
3019 
3020             if (length == 0)
3021                 break;
3022         }
3023 
3024         le = le->Flink;
3025     }
3026 
3027     ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3028 
3029     return length == 0 ? STATUS_SUCCESS : STATUS_DISK_FULL;
3030 }
3031 
3032 static NTSTATUS insert_prealloc_extent(fcb* fcb, uint64_t start, uint64_t length, LIST_ENTRY* rollback) {
3033     LIST_ENTRY* le;
3034     chunk* c;
3035     uint64_t flags;
3036     NTSTATUS Status;
3037     bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
3038 
3039     flags = fcb->Vcb->data_flags;
3040 
3041     do {
3042         uint64_t extlen = min(MAX_EXTENT_SIZE, length);
3043 
3044         ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
3045 
3046         le = fcb->Vcb->chunks.Flink;
3047         while (le != &fcb->Vcb->chunks) {
3048             c = CONTAINING_RECORD(le, chunk, list_entry);
3049 
3050             if (!c->readonly && !c->reloc) {
3051                 acquire_chunk_lock(c, fcb->Vcb);
3052 
3053                 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3054                     if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3055                         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3056                         goto cont;
3057                     }
3058                 }
3059 
3060                 release_chunk_lock(c, fcb->Vcb);
3061             }
3062 
3063             le = le->Flink;
3064         }
3065 
3066         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3067 
3068         ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, true);
3069 
3070         Status = alloc_chunk(fcb->Vcb, flags, &c, false);
3071 
3072         ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3073 
3074         if (!NT_SUCCESS(Status)) {
3075             ERR("alloc_chunk returned %08lx\n", Status);
3076             goto end;
3077         }
3078 
3079         acquire_chunk_lock(c, fcb->Vcb);
3080 
3081         if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3082             if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0))
3083                 goto cont;
3084         }
3085 
3086         release_chunk_lock(c, fcb->Vcb);
3087 
3088         Status = insert_chunk_fragmented(fcb, start, length, NULL, true, rollback);
3089         if (!NT_SUCCESS(Status))
3090             ERR("insert_chunk_fragmented returned %08lx\n", Status);
3091 
3092         goto end;
3093 
3094 cont:
3095         length -= extlen;
3096         start += extlen;
3097     } while (length > 0);
3098 
3099     Status = STATUS_SUCCESS;
3100 
3101 end:
3102     return Status;
3103 }
3104 
3105 static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
3106                               PIRP Irp, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
3107     NTSTATUS Status;
3108     LIST_ENTRY* le;
3109     chunk* c;
3110     uint64_t flags, orig_length = length, written = 0;
3111 
3112     TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data);
3113 
3114     if (start_data > 0) {
3115         try_extend_data(Vcb, fcb, start_data, length, data, Irp, &written, file_write, irp_offset, rollback);
3116 
3117         if (written == length)
3118             return STATUS_SUCCESS;
3119         else if (written > 0) {
3120             start_data += written;
3121             irp_offset += written;
3122             length -= written;
3123             data = &((uint8_t*)data)[written];
3124         }
3125     }
3126 
3127     flags = Vcb->data_flags;
3128 
3129     while (written < orig_length) {
3130         uint64_t newlen = min(length, MAX_EXTENT_SIZE);
3131         bool done = false;
3132 
3133         // Rather than necessarily writing the whole extent at once, we deal with it in blocks of 128 MB.
3134         // First, see if we can write the extent part to an existing chunk.
3135 
3136         ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
3137 
3138         le = Vcb->chunks.Flink;
3139         while (le != &Vcb->chunks) {
3140             c = CONTAINING_RECORD(le, chunk, list_entry);
3141 
3142             if (!c->readonly && !c->reloc) {
3143                 acquire_chunk_lock(c, Vcb);
3144 
3145                 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3146                     insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3147                     written += newlen;
3148 
3149                     if (written == orig_length) {
3150                         ExReleaseResourceLite(&Vcb->chunk_lock);
3151                         return STATUS_SUCCESS;
3152                     } else {
3153                         done = true;
3154                         start_data += newlen;
3155                         irp_offset += newlen;
3156                         length -= newlen;
3157                         data = &((uint8_t*)data)[newlen];
3158                         break;
3159                     }
3160                 } else
3161                     release_chunk_lock(c, Vcb);
3162             }
3163 
3164             le = le->Flink;
3165         }
3166 
3167         ExReleaseResourceLite(&Vcb->chunk_lock);
3168 
3169         if (done) continue;
3170 
3171         // Otherwise, see if we can put it in a new chunk.
3172 
3173         ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
3174 
3175         Status = alloc_chunk(Vcb, flags, &c, false);
3176 
3177         ExReleaseResourceLite(&Vcb->chunk_lock);
3178 
3179         if (!NT_SUCCESS(Status)) {
3180             ERR("alloc_chunk returned %08lx\n", Status);
3181             return Status;
3182         }
3183 
3184         if (c) {
3185             acquire_chunk_lock(c, Vcb);
3186 
3187             if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3188                 insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3189                 written += newlen;
3190 
3191                 if (written == orig_length)
3192                     return STATUS_SUCCESS;
3193                 else {
3194                     done = true;
3195                     start_data += newlen;
3196                     irp_offset += newlen;
3197                     length -= newlen;
3198                     data = &((uint8_t*)data)[newlen];
3199                 }
3200             } else
3201                 release_chunk_lock(c, Vcb);
3202         }
3203 
3204         if (!done) {
3205             Status = insert_chunk_fragmented(fcb, start_data, length, data, false, rollback);
3206             if (!NT_SUCCESS(Status))
3207                 ERR("insert_chunk_fragmented returned %08lx\n", Status);
3208 
3209             return Status;
3210         }
3211     }
3212 
3213     return STATUS_DISK_FULL;
3214 }
3215 
3216 NTSTATUS truncate_file(fcb* fcb, uint64_t end, PIRP Irp, LIST_ENTRY* rollback) {
3217     NTSTATUS Status;
3218 
3219     // FIXME - convert into inline extent if short enough
3220 
3221     if (end > 0 && fcb_is_inline(fcb)) {
3222         uint8_t* buf;
3223         bool make_inline = end <= fcb->Vcb->options.max_inline;
3224 
3225         buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(make_inline ? (offsetof(EXTENT_DATA, data[0]) + end) : sector_align(end, fcb->Vcb->superblock.sector_size)), ALLOC_TAG);
3226         if (!buf) {
3227             ERR("out of memory\n");
3228             return STATUS_INSUFFICIENT_RESOURCES;
3229         }
3230 
3231         Status = read_file(fcb, make_inline ? (buf + offsetof(EXTENT_DATA, data[0])) : buf, 0, end, NULL, Irp);
3232         if (!NT_SUCCESS(Status)) {
3233             ERR("read_file returned %08lx\n", Status);
3234             ExFreePool(buf);
3235             return Status;
3236         }
3237 
3238         Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
3239         if (!NT_SUCCESS(Status)) {
3240             ERR("excise_extents returned %08lx\n", Status);
3241             ExFreePool(buf);
3242             return Status;
3243         }
3244 
3245         if (!make_inline) {
3246             RtlZeroMemory(buf + end, (ULONG)(sector_align(end, fcb->Vcb->superblock.sector_size) - end));
3247 
3248             Status = do_write_file(fcb, 0, sector_align(end, fcb->Vcb->superblock.sector_size), buf, Irp, false, 0, rollback);
3249             if (!NT_SUCCESS(Status)) {
3250                 ERR("do_write_file returned %08lx\n", Status);
3251                 ExFreePool(buf);
3252                 return Status;
3253             }
3254         } else {
3255             EXTENT_DATA* ed = (EXTENT_DATA*)buf;
3256 
3257             ed->generation = fcb->Vcb->superblock.generation;
3258             ed->decoded_size = end;
3259             ed->compression = BTRFS_COMPRESSION_NONE;
3260             ed->encryption = BTRFS_ENCRYPTION_NONE;
3261             ed->encoding = BTRFS_ENCODING_NONE;
3262             ed->type = EXTENT_TYPE_INLINE;
3263 
3264             Status = add_extent_to_fcb(fcb, 0, ed, (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end), false, NULL, rollback);
3265             if (!NT_SUCCESS(Status)) {
3266                 ERR("add_extent_to_fcb returned %08lx\n", Status);
3267                 ExFreePool(buf);
3268                 return Status;
3269             }
3270 
3271             fcb->inode_item.st_blocks += end;
3272         }
3273 
3274         ExFreePool(buf);
3275         return STATUS_SUCCESS;
3276     }
3277 
3278     Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
3279                             sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback);
3280     if (!NT_SUCCESS(Status)) {
3281         ERR("excise_extents returned %08lx\n", Status);
3282         return Status;
3283     }
3284 
3285     fcb->inode_item.st_size = end;
3286     fcb->inode_item_changed = true;
3287     TRACE("setting st_size to %I64x\n", end);
3288 
3289     fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
3290     fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
3291     fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
3292     // FIXME - inform cache manager of this
3293 
3294     TRACE("fcb %p FileSize = %I64x\n", fcb, fcb->Header.FileSize.QuadPart);
3295 
3296     return STATUS_SUCCESS;
3297 }
3298 
3299 NTSTATUS extend_file(fcb* fcb, file_ref* fileref, uint64_t end, bool prealloc, PIRP Irp, LIST_ENTRY* rollback) {
3300     uint64_t oldalloc, newalloc;
3301     bool cur_inline;
3302     NTSTATUS Status;
3303 
3304     TRACE("(%p, %p, %I64x, %u)\n", fcb, fileref, end, prealloc);
3305 
3306     if (fcb->ads) {
3307         if (end > 0xffff)
3308             return STATUS_DISK_FULL;
3309 
3310         return stream_set_end_of_file_information(fcb->Vcb, (uint16_t)end, fcb, fileref, false);
3311     } else {
3312         extent* ext = NULL;
3313         LIST_ENTRY* le;
3314 
3315         le = fcb->extents.Blink;
3316         while (le != &fcb->extents) {
3317             extent* ext2 = CONTAINING_RECORD(le, extent, list_entry);
3318 
3319             if (!ext2->ignore) {
3320                 ext = ext2;
3321                 break;
3322             }
3323 
3324             le = le->Blink;
3325         }
3326 
3327         oldalloc = 0;
3328         if (ext) {
3329             EXTENT_DATA* ed = &ext->extent_data;
3330             EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3331 
3332             oldalloc = ext->offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
3333             cur_inline = ed->type == EXTENT_TYPE_INLINE;
3334 
3335             if (cur_inline && end > fcb->Vcb->options.max_inline) {
3336                 uint64_t origlength, length;
3337                 uint8_t* data;
3338 
3339                 TRACE("giving inline file proper extents\n");
3340 
3341                 origlength = ed->decoded_size;
3342 
3343                 cur_inline = false;
3344 
3345                 length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
3346 
3347                 data = ExAllocatePoolWithTag(PagedPool, (ULONG)length, ALLOC_TAG);
3348                 if (!data) {
3349                     ERR("could not allocate %I64x bytes for data\n", length);
3350                     return STATUS_INSUFFICIENT_RESOURCES;
3351                 }
3352 
3353                 Status = read_file(fcb, data, 0, origlength, NULL, Irp);
3354                 if (!NT_SUCCESS(Status)) {
3355                     ERR("read_file returned %08lx\n", Status);
3356                     ExFreePool(data);
3357                     return Status;
3358                 }
3359 
3360                 RtlZeroMemory(data + origlength, (ULONG)(length - origlength));
3361 
3362                 Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
3363                 if (!NT_SUCCESS(Status)) {
3364                     ERR("excise_extents returned %08lx\n", Status);
3365                     ExFreePool(data);
3366                     return Status;
3367                 }
3368 
3369                 Status = do_write_file(fcb, 0, length, data, Irp, false, 0, rollback);
3370                 if (!NT_SUCCESS(Status)) {
3371                     ERR("do_write_file returned %08lx\n", Status);
3372                     ExFreePool(data);
3373                     return Status;
3374                 }
3375 
3376                 oldalloc = ext->offset + length;
3377 
3378                 ExFreePool(data);
3379             }
3380 
3381             if (cur_inline) {
3382                 uint16_t edsize;
3383 
3384                 if (end > oldalloc) {
3385                     edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end - ext->offset);
3386                     ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3387 
3388                     if (!ed) {
3389                         ERR("out of memory\n");
3390                         return STATUS_INSUFFICIENT_RESOURCES;
3391                     }
3392 
3393                     ed->generation = fcb->Vcb->superblock.generation;
3394                     ed->decoded_size = end - ext->offset;
3395                     ed->compression = BTRFS_COMPRESSION_NONE;
3396                     ed->encryption = BTRFS_ENCRYPTION_NONE;
3397                     ed->encoding = BTRFS_ENCODING_NONE;
3398                     ed->type = EXTENT_TYPE_INLINE;
3399 
3400                     Status = read_file(fcb, ed->data, ext->offset, oldalloc, NULL, Irp);
3401                     if (!NT_SUCCESS(Status)) {
3402                         ERR("read_file returned %08lx\n", Status);
3403                         ExFreePool(ed);
3404                         return Status;
3405                     }
3406 
3407                     RtlZeroMemory(ed->data + oldalloc - ext->offset, (ULONG)(end - oldalloc));
3408 
3409                     remove_fcb_extent(fcb, ext, rollback);
3410 
3411                     Status = add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, NULL, rollback);
3412                     if (!NT_SUCCESS(Status)) {
3413                         ERR("add_extent_to_fcb returned %08lx\n", Status);
3414                         ExFreePool(ed);
3415                         return Status;
3416                     }
3417 
3418                     ExFreePool(ed);
3419 
3420                     fcb->extents_changed = true;
3421                     mark_fcb_dirty(fcb);
3422                 }
3423 
3424                 TRACE("extending inline file (oldalloc = %I64x, end = %I64x)\n", oldalloc, end);
3425 
3426                 fcb->inode_item.st_size = end;
3427                 TRACE("setting st_size to %I64x\n", end);
3428 
3429                 fcb->inode_item.st_blocks = end;
3430 
3431                 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3432             } else {
3433                 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3434 
3435                 if (newalloc > oldalloc) {
3436                     if (prealloc) {
3437                         // FIXME - try and extend previous extent first
3438 
3439                         Status = insert_prealloc_extent(fcb, oldalloc, newalloc - oldalloc, rollback);
3440 
3441                         if (!NT_SUCCESS(Status)) {
3442                             ERR("insert_prealloc_extent returned %08lx\n", Status);
3443                             return Status;
3444                         }
3445                     }
3446 
3447                     fcb->extents_changed = true;
3448                 }
3449 
3450                 fcb->inode_item.st_size = end;
3451                 fcb->inode_item_changed = true;
3452                 mark_fcb_dirty(fcb);
3453 
3454                 TRACE("setting st_size to %I64x\n", end);
3455 
3456                 TRACE("newalloc = %I64x\n", newalloc);
3457 
3458                 fcb->Header.AllocationSize.QuadPart = newalloc;
3459                 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3460             }
3461         } else {
3462             if (end > fcb->Vcb->options.max_inline) {
3463                 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3464 
3465                 if (prealloc) {
3466                     Status = insert_prealloc_extent(fcb, 0, newalloc, rollback);
3467 
3468                     if (!NT_SUCCESS(Status)) {
3469                         ERR("insert_prealloc_extent returned %08lx\n", Status);
3470                         return Status;
3471                     }
3472                 }
3473 
3474                 fcb->extents_changed = true;
3475                 fcb->inode_item_changed = true;
3476                 mark_fcb_dirty(fcb);
3477 
3478                 fcb->inode_item.st_size = end;
3479                 TRACE("setting st_size to %I64x\n", end);
3480 
3481                 TRACE("newalloc = %I64x\n", newalloc);
3482 
3483                 fcb->Header.AllocationSize.QuadPart = newalloc;
3484                 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3485             } else {
3486                 EXTENT_DATA* ed;
3487                 uint16_t edsize;
3488 
3489                 edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end);
3490                 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3491 
3492                 if (!ed) {
3493                     ERR("out of memory\n");
3494                     return STATUS_INSUFFICIENT_RESOURCES;
3495                 }
3496 
3497                 ed->generation = fcb->Vcb->superblock.generation;
3498                 ed->decoded_size = end;
3499                 ed->compression = BTRFS_COMPRESSION_NONE;
3500                 ed->encryption = BTRFS_ENCRYPTION_NONE;
3501                 ed->encoding = BTRFS_ENCODING_NONE;
3502                 ed->type = EXTENT_TYPE_INLINE;
3503 
3504                 RtlZeroMemory(ed->data, (ULONG)end);
3505 
3506                 Status = add_extent_to_fcb(fcb, 0, ed, edsize, false, NULL, rollback);
3507                 if (!NT_SUCCESS(Status)) {
3508                     ERR("add_extent_to_fcb returned %08lx\n", Status);
3509                     ExFreePool(ed);
3510                     return Status;
3511                 }
3512 
3513                 ExFreePool(ed);
3514 
3515                 fcb->extents_changed = true;
3516                 fcb->inode_item_changed = true;
3517                 mark_fcb_dirty(fcb);
3518 
3519                 fcb->inode_item.st_size = end;
3520                 TRACE("setting st_size to %I64x\n", end);
3521 
3522                 fcb->inode_item.st_blocks = end;
3523 
3524                 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3525             }
3526         }
3527     }
3528 
3529     return STATUS_SUCCESS;
3530 }
3531 
3532 static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, uint64_t start_data, uint64_t end_data, void* data, uint64_t* written,
3533                                        PIRP Irp, bool file_write, uint64_t irp_offset, ULONG priority, LIST_ENTRY* rollback) {
3534     EXTENT_DATA* ed = &ext->extent_data;
3535     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3536     NTSTATUS Status;
3537     chunk* c = NULL;
3538 
3539     if (start_data <= ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace all
3540         extent* newext;
3541 
3542         newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3543         if (!newext) {
3544             ERR("out of memory\n");
3545             return STATUS_INSUFFICIENT_RESOURCES;
3546         }
3547 
3548         RtlCopyMemory(&newext->extent_data, &ext->extent_data, ext->datalen);
3549 
3550         newext->extent_data.type = EXTENT_TYPE_REGULAR;
3551 
3552         Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)ed2->num_bytes, Irp,
3553                                      NULL, file_write, irp_offset + ext->offset - start_data, priority);
3554         if (!NT_SUCCESS(Status)) {
3555             ERR("write_data_complete returned %08lx\n", Status);
3556             return Status;
3557         }
3558 
3559         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3560             ULONG sl = (ULONG)(ed2->num_bytes / fcb->Vcb->superblock.sector_size);
3561             void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3562 
3563             if (!csum) {
3564                 ERR("out of memory\n");
3565                 ExFreePool(newext);
3566                 return STATUS_INSUFFICIENT_RESOURCES;
3567             }
3568 
3569             do_calc_job(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3570 
3571             newext->csum = csum;
3572         } else
3573             newext->csum = NULL;
3574 
3575         *written = ed2->num_bytes;
3576 
3577         newext->offset = ext->offset;
3578         newext->datalen = ext->datalen;
3579         newext->unique = ext->unique;
3580         newext->ignore = false;
3581         newext->inserted = true;
3582         InsertHeadList(&ext->list_entry, &newext->list_entry);
3583 
3584         add_insert_extent_rollback(rollback, fcb, newext);
3585 
3586         remove_fcb_extent(fcb, ext, rollback);
3587 
3588         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3589     } else if (start_data <= ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace beginning
3590         EXTENT_DATA2* ned2;
3591         extent *newext1, *newext2;
3592 
3593         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3594         if (!newext1) {
3595             ERR("out of memory\n");
3596             return STATUS_INSUFFICIENT_RESOURCES;
3597         }
3598 
3599         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3600         if (!newext2) {
3601             ERR("out of memory\n");
3602             ExFreePool(newext1);
3603             return STATUS_INSUFFICIENT_RESOURCES;
3604         }
3605 
3606         RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3607         newext1->extent_data.type = EXTENT_TYPE_REGULAR;
3608         ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3609         ned2->num_bytes = end_data - ext->offset;
3610 
3611         RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3612         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3613         ned2->offset += end_data - ext->offset;
3614         ned2->num_bytes -= end_data - ext->offset;
3615 
3616         Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)(end_data - ext->offset),
3617                                      Irp, NULL, file_write, irp_offset + ext->offset - start_data, priority);
3618         if (!NT_SUCCESS(Status)) {
3619             ERR("write_data_complete returned %08lx\n", Status);
3620             ExFreePool(newext1);
3621             ExFreePool(newext2);
3622             return Status;
3623         }
3624 
3625         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3626             ULONG sl = (ULONG)((end_data - ext->offset) / fcb->Vcb->superblock.sector_size);
3627             void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3628 
3629             if (!csum) {
3630                 ERR("out of memory\n");
3631                 ExFreePool(newext1);
3632                 ExFreePool(newext2);
3633                 return STATUS_INSUFFICIENT_RESOURCES;
3634             }
3635 
3636             do_calc_job(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3637 
3638             newext1->csum = csum;
3639         } else
3640             newext1->csum = NULL;
3641 
3642         *written = end_data - ext->offset;
3643 
3644         newext1->offset = ext->offset;
3645         newext1->datalen = ext->datalen;
3646         newext1->unique = ext->unique;
3647         newext1->ignore = false;
3648         newext1->inserted = true;
3649         InsertHeadList(&ext->list_entry, &newext1->list_entry);
3650 
3651         add_insert_extent_rollback(rollback, fcb, newext1);
3652 
3653         newext2->offset = end_data;
3654         newext2->datalen = ext->datalen;
3655         newext2->unique = ext->unique;
3656         newext2->ignore = false;
3657         newext2->inserted = true;
3658         newext2->csum = NULL;
3659         add_extent(fcb, &newext1->list_entry, newext2);
3660 
3661         add_insert_extent_rollback(rollback, fcb, newext2);
3662 
3663         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3664 
3665         if (!c)
3666             ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3667         else {
3668             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3669                                                 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3670 
3671             if (!NT_SUCCESS(Status)) {
3672                 ERR("update_changed_extent_ref returned %08lx\n", Status);
3673                 return Status;
3674             }
3675         }
3676 
3677         remove_fcb_extent(fcb, ext, rollback);
3678     } else if (start_data > ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace end
3679         EXTENT_DATA2* ned2;
3680         extent *newext1, *newext2;
3681 
3682         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3683         if (!newext1) {
3684             ERR("out of memory\n");
3685             return STATUS_INSUFFICIENT_RESOURCES;
3686         }
3687 
3688         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3689         if (!newext2) {
3690             ERR("out of memory\n");
3691             ExFreePool(newext1);
3692             return STATUS_INSUFFICIENT_RESOURCES;
3693         }
3694 
3695         RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3696 
3697         ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3698         ned2->num_bytes = start_data - ext->offset;
3699 
3700         RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3701 
3702         newext2->extent_data.type = EXTENT_TYPE_REGULAR;
3703         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3704         ned2->offset += start_data - ext->offset;
3705         ned2->num_bytes = ext->offset + ed2->num_bytes - start_data;
3706 
3707         Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)ned2->num_bytes, Irp, NULL, file_write, irp_offset, priority);
3708         if (!NT_SUCCESS(Status)) {
3709             ERR("write_data_complete returned %08lx\n", Status);
3710             ExFreePool(newext1);
3711             ExFreePool(newext2);
3712             return Status;
3713         }
3714 
3715         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3716             ULONG sl = (ULONG)(ned2->num_bytes / fcb->Vcb->superblock.sector_size);
3717             void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3718 
3719             if (!csum) {
3720                 ERR("out of memory\n");
3721                 ExFreePool(newext1);
3722                 ExFreePool(newext2);
3723                 return STATUS_INSUFFICIENT_RESOURCES;
3724             }
3725 
3726             do_calc_job(fcb->Vcb, data, sl, csum);
3727 
3728             newext2->csum = csum;
3729         } else
3730             newext2->csum = NULL;
3731 
3732         *written = ned2->num_bytes;
3733 
3734         newext1->offset = ext->offset;
3735         newext1->datalen = ext->datalen;
3736         newext1->unique = ext->unique;
3737         newext1->ignore = false;
3738         newext1->inserted = true;
3739         newext1->csum = NULL;
3740         InsertHeadList(&ext->list_entry, &newext1->list_entry);
3741 
3742         add_insert_extent_rollback(rollback, fcb, newext1);
3743 
3744         newext2->offset = start_data;
3745         newext2->datalen = ext->datalen;
3746         newext2->unique = ext->unique;
3747         newext2->ignore = false;
3748         newext2->inserted = true;
3749         add_extent(fcb, &newext1->list_entry, newext2);
3750 
3751         add_insert_extent_rollback(rollback, fcb, newext2);
3752 
3753         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3754 
3755         if (!c)
3756             ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3757         else {
3758             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3759                                                fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3760 
3761             if (!NT_SUCCESS(Status)) {
3762                 ERR("update_changed_extent_ref returned %08lx\n", Status);
3763                 return Status;
3764             }
3765         }
3766 
3767         remove_fcb_extent(fcb, ext, rollback);
3768     } else if (start_data > ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace middle
3769         EXTENT_DATA2* ned2;
3770         extent *newext1, *newext2, *newext3;
3771 
3772         newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3773         if (!newext1) {
3774             ERR("out of memory\n");
3775             return STATUS_INSUFFICIENT_RESOURCES;
3776         }
3777 
3778         newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3779         if (!newext2) {
3780             ERR("out of memory\n");
3781             ExFreePool(newext1);
3782             return STATUS_INSUFFICIENT_RESOURCES;
3783         }
3784 
3785         newext3 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3786         if (!newext3) {
3787             ERR("out of memory\n");
3788             ExFreePool(newext1);
3789             ExFreePool(newext2);
3790             return STATUS_INSUFFICIENT_RESOURCES;
3791         }
3792 
3793         RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3794         RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3795         RtlCopyMemory(&newext3->extent_data, &ext->extent_data, ext->datalen);
3796 
3797         ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3798         ned2->num_bytes = start_data - ext->offset;
3799 
3800         newext2->extent_data.type = EXTENT_TYPE_REGULAR;
3801         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3802         ned2->offset += start_data - ext->offset;
3803         ned2->num_bytes = end_data - start_data;
3804 
3805         ned2 = (EXTENT_DATA2*)newext3->extent_data.data;
3806         ned2->offset += end_data - ext->offset;
3807         ned2->num_bytes -= end_data - ext->offset;
3808 
3809         ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3810         Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)(end_data - start_data), Irp, NULL, file_write, irp_offset, priority);
3811         if (!NT_SUCCESS(Status)) {
3812             ERR("write_data_complete returned %08lx\n", Status);
3813             ExFreePool(newext1);
3814             ExFreePool(newext2);
3815             ExFreePool(newext3);
3816             return Status;
3817         }
3818 
3819         if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3820             ULONG sl = (ULONG)((end_data - start_data) / fcb->Vcb->superblock.sector_size);
3821             void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3822 
3823             if (!csum) {
3824                 ERR("out of memory\n");
3825                 ExFreePool(newext1);
3826                 ExFreePool(newext2);
3827                 ExFreePool(newext3);
3828                 return STATUS_INSUFFICIENT_RESOURCES;
3829             }
3830 
3831             do_calc_job(fcb->Vcb, data, sl, csum);
3832 
3833             newext2->csum = csum;
3834         } else
3835             newext2->csum = NULL;
3836 
3837         *written = end_data - start_data;
3838 
3839         newext1->offset = ext->offset;
3840         newext1->datalen = ext->datalen;
3841         newext1->unique = ext->unique;
3842         newext1->ignore = false;
3843         newext1->inserted = true;
3844         newext1->csum = NULL;
3845         InsertHeadList(&ext->list_entry, &newext1->list_entry);
3846 
3847         add_insert_extent_rollback(rollback, fcb, newext1);
3848 
3849         newext2->offset = start_data;
3850         newext2->datalen = ext->datalen;
3851         newext2->unique = ext->unique;
3852         newext2->ignore = false;
3853         newext2->inserted = true;
3854         add_extent(fcb, &newext1->list_entry, newext2);
3855 
3856         add_insert_extent_rollback(rollback, fcb, newext2);
3857 
3858         newext3->offset = end_data;
3859         newext3->datalen = ext->datalen;
3860         newext3->unique = ext->unique;
3861         newext3->ignore = false;
3862         newext3->inserted = true;
3863         newext3->csum = NULL;
3864         add_extent(fcb, &newext2->list_entry, newext3);
3865 
3866         add_insert_extent_rollback(rollback, fcb, newext3);
3867 
3868         c = get_chunk_from_address(fcb->Vcb, ed2->address);
3869 
3870         if (!c)
3871             ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3872         else {
3873             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 2,
3874                                                fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3875 
3876             if (!NT_SUCCESS(Status)) {
3877                 ERR("update_changed_extent_ref returned %08lx\n", Status);
3878                 return Status;
3879             }
3880         }
3881 
3882         remove_fcb_extent(fcb, ext, rollback);
3883     }
3884 
3885     if (c)
3886         c->changed = true;
3887 
3888     return STATUS_SUCCESS;
3889 }
3890 
3891 NTSTATUS do_write_file(fcb* fcb, uint64_t start, uint64_t end_data, void* data, PIRP Irp, bool file_write, uint32_t irp_offset, LIST_ENTRY* rollback) {
3892     NTSTATUS Status;
3893     LIST_ENTRY *le, *le2;
3894     uint64_t written = 0, length = end_data - start;
3895     uint64_t last_cow_start;
3896     ULONG priority = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority;
3897 #ifdef DEBUG_PARANOID
3898     uint64_t last_off;
3899 #endif
3900     bool extents_changed = false;
3901 
3902     last_cow_start = 0;
3903 
3904     le = fcb->extents.Flink;
3905     while (le != &fcb->extents) {
3906         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
3907 
3908         le2 = le->Flink;
3909 
3910         if (!ext->ignore) {
3911             EXTENT_DATA* ed = &ext->extent_data;
3912             EXTENT_DATA2* ed2 = ed->type == EXTENT_TYPE_INLINE ? NULL : (EXTENT_DATA2*)ed->data;
3913             uint64_t len;
3914 
3915             len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
3916 
3917             if (ext->offset + len <= start)
3918                 goto nextitem;
3919 
3920             if (ext->offset > start + written + length)
3921                 break;
3922 
3923             if ((fcb->inode_item.flags & BTRFS_INODE_NODATACOW || ed->type == EXTENT_TYPE_PREALLOC) && ext->unique && ed->compression == BTRFS_COMPRESSION_NONE) {
3924                 if (max(last_cow_start, start + written) < ext->offset) {
3925                     uint64_t start_write = max(last_cow_start, start + written);
3926 
3927                     extents_changed = true;
3928 
3929                     Status = excise_extents(fcb->Vcb, fcb, start_write, ext->offset, Irp, rollback);
3930                     if (!NT_SUCCESS(Status)) {
3931                         ERR("excise_extents returned %08lx\n", Status);
3932                         return Status;
3933                     }
3934 
3935                     Status = insert_extent(fcb->Vcb, fcb, start_write, ext->offset - start_write, (uint8_t*)data + written, Irp, file_write, irp_offset + written, rollback);
3936                     if (!NT_SUCCESS(Status)) {
3937                         ERR("insert_extent returned %08lx\n", Status);
3938                         return Status;
3939                     }
3940 
3941                     written += ext->offset - start_write;
3942                     length -= ext->offset - start_write;
3943 
3944                     if (length == 0)
3945                         break;
3946                 }
3947 
3948                 if (ed->type == EXTENT_TYPE_REGULAR) {
3949                     uint64_t writeaddr = ed2->address + ed2->offset + start + written - ext->offset;
3950                     uint64_t write_len = min(len, length);
3951                     chunk* c;
3952 
3953                     TRACE("doing non-COW write to %I64x\n", writeaddr);
3954 
3955                     Status = write_data_complete(fcb->Vcb, writeaddr, (uint8_t*)data + written, (uint32_t)write_len, Irp, NULL, file_write, irp_offset + written, priority);
3956                     if (!NT_SUCCESS(Status)) {
3957                         ERR("write_data_complete returned %08lx\n", Status);
3958                         return Status;
3959                     }
3960 
3961                     c = get_chunk_from_address(fcb->Vcb, writeaddr);
3962                     if (c)
3963                         c->changed = true;
3964 
3965                     // This shouldn't ever get called - nocow files should always also be nosum.
3966                     if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3967                         do_calc_job(fcb->Vcb, (uint8_t*)data + written, (uint32_t)(write_len / fcb->Vcb->superblock.sector_size),
3968                                     (uint8_t*)ext->csum + ((start + written - ext->offset) * fcb->Vcb->csum_size / fcb->Vcb->superblock.sector_size));
3969 
3970                         ext->inserted = true;
3971                         extents_changed = true;
3972                     }
3973 
3974                     written += write_len;
3975                     length -= write_len;
3976 
3977                     if (length == 0)
3978                         break;
3979                 } else if (ed->type == EXTENT_TYPE_PREALLOC) {
3980                     uint64_t write_len;
3981 
3982                     Status = do_write_file_prealloc(fcb, ext, start + written, end_data, (uint8_t*)data + written, &write_len,
3983                                                     Irp, file_write, irp_offset + written, priority, rollback);
3984                     if (!NT_SUCCESS(Status)) {
3985                         ERR("do_write_file_prealloc returned %08lx\n", Status);
3986                         return Status;
3987                     }
3988 
3989                     extents_changed = true;
3990 
3991                     written += write_len;
3992                     length -= write_len;
3993 
3994                     if (length == 0)
3995                         break;
3996                 }
3997 
3998                 last_cow_start = ext->offset + len;
3999             }
4000         }
4001 
4002 nextitem:
4003         le = le2;
4004     }
4005 
4006     if (length > 0) {
4007         uint64_t start_write = max(last_cow_start, start + written);
4008 
4009         extents_changed = true;
4010 
4011         Status = excise_extents(fcb->Vcb, fcb, start_write, end_data, Irp, rollback);
4012         if (!NT_SUCCESS(Status)) {
4013             ERR("excise_extents returned %08lx\n", Status);
4014             return Status;
4015         }
4016 
4017         Status = insert_extent(fcb->Vcb, fcb, start_write, end_data - start_write, (uint8_t*)data + written, Irp, file_write, irp_offset + written, rollback);
4018         if (!NT_SUCCESS(Status)) {
4019             ERR("insert_extent returned %08lx\n", Status);
4020             return Status;
4021         }
4022     }
4023 
4024 #ifdef DEBUG_PARANOID
4025     last_off = 0xffffffffffffffff;
4026 
4027     le = fcb->extents.Flink;
4028     while (le != &fcb->extents) {
4029         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4030 
4031         if (!ext->ignore) {
4032             if (ext->offset == last_off) {
4033                 ERR("offset %I64x duplicated\n", ext->offset);
4034                 int3;
4035             } else if (ext->offset < last_off && last_off != 0xffffffffffffffff) {
4036                 ERR("offsets out of order\n");
4037                 int3;
4038             }
4039 
4040             last_off = ext->offset;
4041         }
4042 
4043         le = le->Flink;
4044     }
4045 #endif
4046 
4047     if (extents_changed) {
4048         fcb->extents_changed = true;
4049         mark_fcb_dirty(fcb);
4050     }
4051 
4052     return STATUS_SUCCESS;
4053 }
4054 
4055 NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, bool paging_io, bool no_cache,
4056                      bool wait, bool deferred_write, bool write_irp, LIST_ENTRY* rollback) {
4057     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4058     PFILE_OBJECT FileObject = IrpSp->FileObject;
4059     EXTENT_DATA* ed2;
4060     uint64_t off64, newlength, start_data, end_data;
4061     uint32_t bufhead;
4062     bool make_inline;
4063     INODE_ITEM* origii;
4064     bool changed_length = false;
4065     NTSTATUS Status;
4066     LARGE_INTEGER time;
4067     BTRFS_TIME now;
4068     fcb* fcb;
4069     ccb* ccb;
4070     file_ref* fileref;
4071     bool paging_lock = false, acquired_fcb_lock = false, acquired_tree_lock = false, pagefile;
4072     ULONG filter = 0;
4073 
4074     TRACE("(%p, %p, %I64x, %p, %lx, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
4075 
4076     if (*length == 0) {
4077         TRACE("returning success for zero-length write\n");
4078         return STATUS_SUCCESS;
4079     }
4080 
4081     if (!FileObject) {
4082         ERR("error - FileObject was NULL\n");
4083         return STATUS_ACCESS_DENIED;
4084     }
4085 
4086     fcb = FileObject->FsContext;
4087     ccb = FileObject->FsContext2;
4088     fileref = ccb ? ccb->fileref : NULL;
4089 
4090     if (!fcb->ads && fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
4091         WARN("tried to write to something other than a file or symlink (inode %I64x, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
4092         return STATUS_INVALID_DEVICE_REQUEST;
4093     }
4094 
4095     if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1)
4096         offset = fcb->Header.FileSize;
4097 
4098     off64 = offset.QuadPart;
4099 
4100     TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
4101 
4102     if (!no_cache && !CcCanIWrite(FileObject, *length, wait, deferred_write))
4103         return STATUS_PENDING;
4104 
4105     if (!wait && no_cache)
4106         return STATUS_PENDING;
4107 
4108     if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
4109         IO_STATUS_BLOCK iosb;
4110 
4111         ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, true);
4112 
4113         CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
4114 
4115         if (!NT_SUCCESS(iosb.Status)) {
4116             ExReleaseResourceLite(fcb->Header.PagingIoResource);
4117             ERR("CcFlushCache returned %08lx\n", iosb.Status);
4118             return iosb.Status;
4119         }
4120 
4121         paging_lock = true;
4122 
4123         CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, false);
4124     }
4125 
4126     if (paging_io) {
4127         if (!ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, wait)) {
4128             Status = STATUS_PENDING;
4129             goto end;
4130         } else
4131             paging_lock = true;
4132     }
4133 
4134     pagefile = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE && paging_io;
4135 
4136     if (!pagefile && !ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) {
4137         if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
4138             Status = STATUS_PENDING;
4139             goto end;
4140         } else
4141             acquired_tree_lock = true;
4142     }
4143 
4144     if (pagefile) {
4145         if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
4146             Status = STATUS_PENDING;
4147             goto end;
4148         } else
4149             acquired_fcb_lock = true;
4150     } else if (!ExIsResourceAcquiredExclusiveLite(fcb->Header.Resource)) {
4151         if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, wait)) {
4152             Status = STATUS_PENDING;
4153             goto end;
4154         } else
4155             acquired_fcb_lock = true;
4156     }
4157 
4158     newlength = fcb->ads ? fcb->adsdata.Length : fcb->inode_item.st_size;
4159 
4160     if (fcb->deleted)
4161         newlength = 0;
4162 
4163     TRACE("newlength = %I64x\n", newlength);
4164 
4165     if (off64 + *length > newlength) {
4166         if (paging_io) {
4167             if (off64 >= newlength) {
4168                 TRACE("paging IO tried to write beyond end of file (file size = %I64x, offset = %I64x, length = %lx)\n", newlength, off64, *length);
4169                 TRACE("FileObject: AllocationSize = %I64x, FileSize = %I64x, ValidDataLength = %I64x\n",
4170                     fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
4171                 Irp->IoStatus.Information = 0;
4172                 Status = STATUS_SUCCESS;
4173                 goto end;
4174             }
4175 
4176             *length = (ULONG)(newlength - off64);
4177         } else {
4178             newlength = off64 + *length;
4179             changed_length = true;
4180 
4181             TRACE("extending length to %I64x\n", newlength);
4182         }
4183     }
4184 
4185     if (fcb->ads)
4186         make_inline = false;
4187     else if (fcb->type == BTRFS_TYPE_SYMLINK)
4188         make_inline = newlength <= (Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - offsetof(EXTENT_DATA, data[0]));
4189     else
4190         make_inline = newlength <= fcb->Vcb->options.max_inline;
4191 
4192     if (changed_length) {
4193         if (newlength > (uint64_t)fcb->Header.AllocationSize.QuadPart) {
4194             if (!acquired_tree_lock) {
4195                 // We need to acquire the tree lock if we don't have it already -
4196                 // we can't give an inline file proper extents at the same time as we're
4197                 // doing a flush.
4198                 if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
4199                     Status = STATUS_PENDING;
4200                     goto end;
4201                 } else
4202                     acquired_tree_lock = true;
4203             }
4204 
4205             Status = extend_file(fcb, fileref, newlength, false, Irp, rollback);
4206             if (!NT_SUCCESS(Status)) {
4207                 ERR("extend_file returned %08lx\n", Status);
4208                 goto end;
4209             }
4210         } else if (!fcb->ads)
4211             fcb->inode_item.st_size = newlength;
4212 
4213         fcb->Header.FileSize.QuadPart = newlength;
4214         fcb->Header.ValidDataLength.QuadPart = newlength;
4215 
4216         TRACE("AllocationSize = %I64x\n", fcb->Header.AllocationSize.QuadPart);
4217         TRACE("FileSize = %I64x\n", fcb->Header.FileSize.QuadPart);
4218         TRACE("ValidDataLength = %I64x\n", fcb->Header.ValidDataLength.QuadPart);
4219     }
4220 
4221     if (!no_cache) {
4222         Status = STATUS_SUCCESS;
4223 
4224         _SEH2_TRY {
4225             if (!FileObject->PrivateCacheMap || changed_length) {
4226                 CC_FILE_SIZES ccfs;
4227 
4228                 ccfs.AllocationSize = fcb->Header.AllocationSize;
4229                 ccfs.FileSize = fcb->Header.FileSize;
4230                 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
4231 
4232                 if (!FileObject->PrivateCacheMap)
4233                     init_file_cache(FileObject, &ccfs);
4234 
4235                 CcSetFileSizes(FileObject, &ccfs);
4236             }
4237 
4238             if (IrpSp->MinorFunction & IRP_MN_MDL) {
4239                 CcPrepareMdlWrite(FileObject, &offset, *length, &Irp->MdlAddress, &Irp->IoStatus);
4240 
4241                 Status = Irp->IoStatus.Status;
4242                 goto end;
4243             } else {
4244                 if (fCcCopyWriteEx) {
4245                     TRACE("CcCopyWriteEx(%p, %I64x, %lx, %u, %p, %p)\n", FileObject, off64, *length, wait, buf, Irp->Tail.Overlay.Thread);
4246                     if (!fCcCopyWriteEx(FileObject, &offset, *length, wait, buf, Irp->Tail.Overlay.Thread)) {
4247                         Status = STATUS_PENDING;
4248                         goto end;
4249                     }
4250                     TRACE("CcCopyWriteEx finished\n");
4251                 } else {
4252                     TRACE("CcCopyWrite(%p, %I64x, %lx, %u, %p)\n", FileObject, off64, *length, wait, buf);
4253                     if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) {
4254                         Status = STATUS_PENDING;
4255                         goto end;
4256                     }
4257                     TRACE("CcCopyWrite finished\n");
4258                 }
4259 
4260                 Irp->IoStatus.Information = *length;
4261             }
4262         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4263             Status = _SEH2_GetExceptionCode();
4264         } _SEH2_END;
4265 
4266         if (changed_length) {
4267             queue_notification_fcb(fcb->ads ? fileref->parent : fileref, fcb->ads ? FILE_NOTIFY_CHANGE_STREAM_SIZE : FILE_NOTIFY_CHANGE_SIZE,
4268                                    fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED, fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
4269         }
4270 
4271         goto end;
4272     }
4273 
4274     if (fcb->ads) {
4275         if (changed_length) {
4276             char* data2;
4277 
4278             if (newlength > fcb->adsmaxlen) {
4279                 ERR("error - xattr too long (%I64u > %lu)\n", newlength, fcb->adsmaxlen);
4280                 Status = STATUS_DISK_FULL;
4281                 goto end;
4282             }
4283 
4284             data2 = ExAllocatePoolWithTag(PagedPool, (ULONG)newlength, ALLOC_TAG);
4285             if (!data2) {
4286                 ERR("out of memory\n");
4287                 Status = STATUS_INSUFFICIENT_RESOURCES;
4288                 goto end;
4289             }
4290 
4291             if (fcb->adsdata.Buffer) {
4292                 RtlCopyMemory(data2, fcb->adsdata.Buffer, fcb->adsdata.Length);
4293                 ExFreePool(fcb->adsdata.Buffer);
4294             }
4295 
4296             if (newlength > fcb->adsdata.Length)
4297                 RtlZeroMemory(&data2[fcb->adsdata.Length], (ULONG)(newlength - fcb->adsdata.Length));
4298 
4299 
4300             fcb->adsdata.Buffer = data2;
4301             fcb->adsdata.Length = fcb->adsdata.MaximumLength = (USHORT)newlength;
4302 
4303             fcb->Header.AllocationSize.QuadPart = newlength;
4304             fcb->Header.FileSize.QuadPart = newlength;
4305             fcb->Header.ValidDataLength.QuadPart = newlength;
4306         }
4307 
4308         if (*length > 0)
4309             RtlCopyMemory(&fcb->adsdata.Buffer[off64], buf, *length);
4310 
4311         fcb->Header.ValidDataLength.QuadPart = newlength;
4312 
4313         mark_fcb_dirty(fcb);
4314 
4315         if (fileref)
4316             mark_fileref_dirty(fileref);
4317     } else {
4318         bool compress = write_fcb_compressed(fcb), no_buf = false;
4319         uint8_t* data;
4320 
4321         if (make_inline) {
4322             start_data = 0;
4323             end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
4324             bufhead = sizeof(EXTENT_DATA) - 1;
4325         } else if (compress) {
4326             start_data = off64 & ~(uint64_t)(COMPRESSED_EXTENT_SIZE - 1);
4327             end_data = min(sector_align(off64 + *length, COMPRESSED_EXTENT_SIZE),
4328                            sector_align(newlength, fcb->Vcb->superblock.sector_size));
4329             bufhead = 0;
4330         } else {
4331             start_data = off64 & ~(uint64_t)(fcb->Vcb->superblock.sector_size - 1);
4332             end_data = sector_align(off64 + *length, fcb->Vcb->superblock.sector_size);
4333             bufhead = 0;
4334         }
4335 
4336         if (fcb_is_inline(fcb))
4337             end_data = max(end_data, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size));
4338 
4339         fcb->Header.ValidDataLength.QuadPart = newlength;
4340         TRACE("fcb %p FileSize = %I64x\n", fcb, fcb->Header.FileSize.QuadPart);
4341 
4342         if (!make_inline && !compress && off64 == start_data && off64 + *length == end_data) {
4343             data = buf;
4344             no_buf = true;
4345         } else {
4346             data = ExAllocatePoolWithTag(PagedPool, (ULONG)(end_data - start_data + bufhead), ALLOC_TAG);
4347             if (!data) {
4348                 ERR("out of memory\n");
4349                 Status = STATUS_INSUFFICIENT_RESOURCES;
4350                 goto end;
4351             }
4352 
4353             RtlZeroMemory(data + bufhead, (ULONG)(end_data - start_data));
4354 
4355             TRACE("start_data = %I64x\n", start_data);
4356             TRACE("end_data = %I64x\n", end_data);
4357 
4358             if (off64 > start_data || off64 + *length < end_data) {
4359                 if (changed_length) {
4360                     if (fcb->inode_item.st_size > start_data)
4361                         Status = read_file(fcb, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL, Irp);
4362                     else
4363                         Status = STATUS_SUCCESS;
4364                 } else
4365                     Status = read_file(fcb, data + bufhead, start_data, end_data - start_data, NULL, Irp);
4366 
4367                 if (!NT_SUCCESS(Status)) {
4368                     ERR("read_file returned %08lx\n", Status);
4369                     ExFreePool(data);
4370                     goto end;
4371                 }
4372             }
4373 
4374             RtlCopyMemory(data + bufhead + off64 - start_data, buf, *length);
4375         }
4376 
4377         if (make_inline) {
4378             Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, Irp, rollback);
4379             if (!NT_SUCCESS(Status)) {
4380                 ERR("error - excise_extents returned %08lx\n", Status);
4381                 ExFreePool(data);
4382                 goto end;
4383             }
4384 
4385             ed2 = (EXTENT_DATA*)data;
4386             ed2->generation = fcb->Vcb->superblock.generation;
4387             ed2->decoded_size = newlength;
4388             ed2->compression = BTRFS_COMPRESSION_NONE;
4389             ed2->encryption = BTRFS_ENCRYPTION_NONE;
4390             ed2->encoding = BTRFS_ENCODING_NONE;
4391             ed2->type = EXTENT_TYPE_INLINE;
4392 
4393             Status = add_extent_to_fcb(fcb, 0, ed2, (uint16_t)(offsetof(EXTENT_DATA, data[0]) + newlength), false, NULL, rollback);
4394             if (!NT_SUCCESS(Status)) {
4395                 ERR("add_extent_to_fcb returned %08lx\n", Status);
4396                 ExFreePool(data);
4397                 goto end;
4398             }
4399 
4400             fcb->inode_item.st_blocks += newlength;
4401         } else if (compress) {
4402             Status = write_compressed(fcb, start_data, end_data, data, Irp, rollback);
4403 
4404             if (!NT_SUCCESS(Status)) {
4405                 ERR("write_compressed returned %08lx\n", Status);
4406                 ExFreePool(data);
4407                 goto end;
4408             }
4409         } else {
4410             if (write_irp && Irp->MdlAddress && no_buf) {
4411                 bool locked = Irp->MdlAddress->MdlFlags & (MDL_PAGES_LOCKED | MDL_PARTIAL);
4412 
4413                 if (!locked) {
4414                     Status = STATUS_SUCCESS;
4415 
4416                     _SEH2_TRY {
4417                         MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
4418                     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4419                         Status = _SEH2_GetExceptionCode();
4420                     } _SEH2_END;
4421 
4422                     if (!NT_SUCCESS(Status)) {
4423                         ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
4424                         goto end;
4425                     }
4426                 }
4427 
4428                 _SEH2_TRY {
4429                     Status = do_write_file(fcb, start_data, end_data, data, Irp, true, 0, rollback);
4430                 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4431                     Status = _SEH2_GetExceptionCode();
4432                 } _SEH2_END;
4433 
4434                 if (!locked)
4435                     MmUnlockPages(Irp->MdlAddress);
4436             } else {
4437                 _SEH2_TRY {
4438                     Status = do_write_file(fcb, start_data, end_data, data, Irp, false, 0, rollback);
4439                 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4440                     Status = _SEH2_GetExceptionCode();
4441                 } _SEH2_END;
4442             }
4443 
4444             if (!NT_SUCCESS(Status)) {
4445                 ERR("do_write_file returned %08lx\n", Status);
4446                 if (!no_buf) ExFreePool(data);
4447                 goto end;
4448             }
4449         }
4450 
4451         if (!no_buf)
4452             ExFreePool(data);
4453     }
4454 
4455     KeQuerySystemTime(&time);
4456     win_time_to_unix(time, &now);
4457 
4458     if (!pagefile) {
4459         if (fcb->ads) {
4460             if (fileref && fileref->parent)
4461                 origii = &fileref->parent->fcb->inode_item;
4462             else {
4463                 ERR("no parent fcb found for stream\n");
4464                 Status = STATUS_INTERNAL_ERROR;
4465                 goto end;
4466             }
4467         } else
4468             origii = &fcb->inode_item;
4469 
4470         origii->transid = Vcb->superblock.generation;
4471         origii->sequence++;
4472 
4473         if (!ccb->user_set_change_time)
4474             origii->st_ctime = now;
4475 
4476         if (!fcb->ads) {
4477             if (changed_length) {
4478                 TRACE("setting st_size to %I64x\n", newlength);
4479                 origii->st_size = newlength;
4480                 filter |= FILE_NOTIFY_CHANGE_SIZE;
4481             }
4482 
4483             fcb->inode_item_changed = true;
4484         } else {
4485             fileref->parent->fcb->inode_item_changed = true;
4486 
4487             if (changed_length)
4488                 filter |= FILE_NOTIFY_CHANGE_STREAM_SIZE;
4489 
4490             filter |= FILE_NOTIFY_CHANGE_STREAM_WRITE;
4491         }
4492 
4493         if (!ccb->user_set_write_time) {
4494             origii->st_mtime = now;
4495             filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
4496         }
4497 
4498         mark_fcb_dirty(fcb->ads ? fileref->parent->fcb : fcb);
4499     }
4500 
4501     if (changed_length) {
4502         CC_FILE_SIZES ccfs;
4503 
4504         ccfs.AllocationSize = fcb->Header.AllocationSize;
4505         ccfs.FileSize = fcb->Header.FileSize;
4506         ccfs.ValidDataLength = fcb->Header.ValidDataLength;
4507 
4508         _SEH2_TRY {
4509             CcSetFileSizes(FileObject, &ccfs);
4510         } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4511             Status = _SEH2_GetExceptionCode();
4512             goto end;
4513         } _SEH2_END;
4514     }
4515 
4516     fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
4517     fcb->subvol->root_item.ctime = now;
4518 
4519     Status = STATUS_SUCCESS;
4520     Irp->IoStatus.Information = *length;
4521 
4522     if (filter != 0)
4523         queue_notification_fcb(fcb->ads ? fileref->parent : fileref, filter, fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED,
4524                                fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
4525 
4526 end:
4527     if (NT_SUCCESS(Status) && FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
4528         TRACE("CurrentByteOffset was: %I64x\n", FileObject->CurrentByteOffset.QuadPart);
4529         FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
4530         TRACE("CurrentByteOffset now: %I64x\n", FileObject->CurrentByteOffset.QuadPart);
4531     }
4532 
4533     if (acquired_fcb_lock)
4534         ExReleaseResourceLite(fcb->Header.Resource);
4535 
4536     if (acquired_tree_lock)
4537         ExReleaseResourceLite(&Vcb->tree_lock);
4538 
4539     if (paging_lock)
4540         ExReleaseResourceLite(fcb->Header.PagingIoResource);
4541 
4542     return Status;
4543 }
4544 
4545 NTSTATUS write_file(device_extension* Vcb, PIRP Irp, bool wait, bool deferred_write) {
4546     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4547     void* buf;
4548     NTSTATUS Status;
4549     LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
4550     PFILE_OBJECT FileObject = IrpSp->FileObject;
4551     fcb* fcb = FileObject ? FileObject->FsContext : NULL;
4552     LIST_ENTRY rollback;
4553 
4554     InitializeListHead(&rollback);
4555 
4556     TRACE("write\n");
4557 
4558     Irp->IoStatus.Information = 0;
4559 
4560     TRACE("offset = %I64x\n", offset.QuadPart);
4561     TRACE("length = %lx\n", IrpSp->Parameters.Write.Length);
4562 
4563     if (!Irp->AssociatedIrp.SystemBuffer) {
4564         buf = map_user_buffer(Irp, fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
4565 
4566         if (Irp->MdlAddress && !buf) {
4567             ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
4568             Status = STATUS_INSUFFICIENT_RESOURCES;
4569             goto exit;
4570         }
4571     } else
4572         buf = Irp->AssociatedIrp.SystemBuffer;
4573 
4574     TRACE("buf = %p\n", buf);
4575 
4576     if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
4577         WARN("tried to write to locked region\n");
4578         Status = STATUS_FILE_LOCK_CONFLICT;
4579         goto exit;
4580     }
4581 
4582     Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE,
4583                          wait, deferred_write, true, &rollback);
4584 
4585     if (Status == STATUS_PENDING)
4586         goto exit;
4587     else if (!NT_SUCCESS(Status)) {
4588         ERR("write_file2 returned %08lx\n", Status);
4589         goto exit;
4590     }
4591 
4592     if (NT_SUCCESS(Status)) {
4593         if (diskacc && Status != STATUS_PENDING && Irp->Flags & IRP_NOCACHE) {
4594             PETHREAD thread = NULL;
4595 
4596             if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
4597                 thread = Irp->Tail.Overlay.Thread;
4598             else if (!IoIsSystemThread(PsGetCurrentThread()))
4599                 thread = PsGetCurrentThread();
4600             else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
4601                 thread = PsGetCurrentThread();
4602 
4603             if (thread)
4604                 fPsUpdateDiskCounters(PsGetThreadProcess(thread), 0, IrpSp->Parameters.Write.Length, 0, 1, 0);
4605         }
4606     }
4607 
4608 exit:
4609     if (NT_SUCCESS(Status))
4610         clear_rollback(&rollback);
4611     else
4612         do_rollback(Vcb, &rollback);
4613 
4614     return Status;
4615 }
4616 
4617 _Dispatch_type_(IRP_MJ_WRITE)
4618 _Function_class_(DRIVER_DISPATCH)
4619 NTSTATUS __stdcall drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
4620     NTSTATUS Status;
4621     bool top_level;
4622     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4623     device_extension* Vcb = DeviceObject->DeviceExtension;
4624     PFILE_OBJECT FileObject = IrpSp->FileObject;
4625     fcb* fcb = FileObject ? FileObject->FsContext : NULL;
4626     ccb* ccb = FileObject ? FileObject->FsContext2 : NULL;
4627     bool wait = FileObject ? IoIsOperationSynchronous(Irp) : true;
4628 
4629     FsRtlEnterFileSystem();
4630 
4631     top_level = is_top_level(Irp);
4632 
4633     if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
4634         Status = vol_write(DeviceObject, Irp);
4635         goto exit;
4636     } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
4637         Status = STATUS_INVALID_PARAMETER;
4638         goto end;
4639     }
4640 
4641     if (!fcb) {
4642         ERR("fcb was NULL\n");
4643         Status = STATUS_INVALID_PARAMETER;
4644         goto end;
4645     }
4646 
4647     if (!ccb) {
4648         ERR("ccb was NULL\n");
4649         Status = STATUS_INVALID_PARAMETER;
4650         goto end;
4651     }
4652 
4653     if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) {
4654         WARN("insufficient permissions\n");
4655         Status = STATUS_ACCESS_DENIED;
4656         goto end;
4657     }
4658 
4659     if (fcb == Vcb->volume_fcb) {
4660         if (!Vcb->locked || Vcb->locked_fileobj != FileObject) {
4661             ERR("trying to write to volume when not locked, or locked with another FileObject\n");
4662             Status = STATUS_ACCESS_DENIED;
4663             goto end;
4664         }
4665 
4666         TRACE("writing directly to volume\n");
4667 
4668         IoSkipCurrentIrpStackLocation(Irp);
4669 
4670         Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
4671         goto exit;
4672     }
4673 
4674     if (is_subvol_readonly(fcb->subvol, Irp)) {
4675         Status = STATUS_ACCESS_DENIED;
4676         goto end;
4677     }
4678 
4679     if (Vcb->readonly) {
4680         Status = STATUS_MEDIA_WRITE_PROTECTED;
4681         goto end;
4682     }
4683 
4684     _SEH2_TRY {
4685         if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
4686             CcMdlWriteComplete(IrpSp->FileObject, &IrpSp->Parameters.Write.ByteOffset, Irp->MdlAddress);
4687 
4688             Irp->MdlAddress = NULL;
4689             Status = STATUS_SUCCESS;
4690         } else {
4691             if (!(Irp->Flags & IRP_PAGING_IO))
4692                 FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL);
4693 
4694             // Don't offload jobs when doing paging IO - otherwise this can lead to
4695             // deadlocks in CcCopyWrite.
4696             if (Irp->Flags & IRP_PAGING_IO)
4697                 wait = true;
4698 
4699             Status = write_file(Vcb, Irp, wait, false);
4700         }
4701     } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4702         Status = _SEH2_GetExceptionCode();
4703     } _SEH2_END;
4704 
4705 end:
4706     Irp->IoStatus.Status = Status;
4707 
4708     TRACE("wrote %Iu bytes\n", Irp->IoStatus.Information);
4709 
4710     if (Status != STATUS_PENDING)
4711         IoCompleteRequest(Irp, IO_NO_INCREMENT);
4712     else {
4713         IoMarkIrpPending(Irp);
4714 
4715         if (!add_thread_job(Vcb, Irp))
4716             Status = do_write_job(Vcb, Irp);
4717     }
4718 
4719 exit:
4720     if (top_level)
4721         IoSetTopLevelIrp(NULL);
4722 
4723     TRACE("returning %08lx\n", Status);
4724 
4725     FsRtlExitFileSystem();
4726 
4727     return Status;
4728 }
4729